diff options
| author | James Morris <james.l.morris@oracle.com> | 2017-07-24 20:44:18 -0400 |
|---|---|---|
| committer | James Morris <james.l.morris@oracle.com> | 2017-07-24 20:44:18 -0400 |
| commit | 53a2ebaaabc1eb8458796fec3bc1e0e80746b642 (patch) | |
| tree | 9d1f9227b49392cdd2edcc01057517da4f4b09c2 /drivers/gpu/drm/amd/amdgpu | |
| parent | 3cf29931453215536916d0c4da953fce1911ced3 (diff) | |
| parent | 520eccdfe187591a51ea9ab4c1a024ae4d0f68d9 (diff) | |
sync to Linus v4.13-rc2 for subsystem developers to work against
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
103 files changed, 9975 insertions, 4921 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 61360e27715f..26682454a446 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig | |||
| @@ -5,15 +5,23 @@ config DRM_AMDGPU_SI | |||
| 5 | Choose this option if you want to enable experimental support | 5 | Choose this option if you want to enable experimental support |
| 6 | for SI asics. | 6 | for SI asics. |
| 7 | 7 | ||
| 8 | SI is already supported in radeon. Experimental support for SI | ||
| 9 | in amdgpu will be disabled by default and is still provided by | ||
| 10 | radeon. Use module options to override this: | ||
| 11 | |||
| 12 | radeon.si_support=0 amdgpu.si_support=1 | ||
| 13 | |||
| 8 | config DRM_AMDGPU_CIK | 14 | config DRM_AMDGPU_CIK |
| 9 | bool "Enable amdgpu support for CIK parts" | 15 | bool "Enable amdgpu support for CIK parts" |
| 10 | depends on DRM_AMDGPU | 16 | depends on DRM_AMDGPU |
| 11 | help | 17 | help |
| 12 | Choose this option if you want to enable experimental support | 18 | Choose this option if you want to enable support for CIK asics. |
| 13 | for CIK asics. | 19 | |
| 20 | CIK is already supported in radeon. Support for CIK in amdgpu | ||
| 21 | will be disabled by default and is still provided by radeon. | ||
| 22 | Use module options to override this: | ||
| 14 | 23 | ||
| 15 | CIK is already supported in radeon. CIK support in amdgpu | 24 | radeon.cik_support=0 amdgpu.cik_support=1 |
| 16 | is for experimentation and testing. | ||
| 17 | 25 | ||
| 18 | config DRM_AMDGPU_USERPTR | 26 | config DRM_AMDGPU_USERPTR |
| 19 | bool "Always enable userptr write support" | 27 | bool "Always enable userptr write support" |
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 660786aba7d2..faea6349228f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | FULL_AMD_PATH=$(src)/.. | 5 | FULL_AMD_PATH=$(src)/.. |
| 6 | 6 | ||
| 7 | ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ | 7 | ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ |
| 8 | -I$(FULL_AMD_PATH)/include \ | 8 | -I$(FULL_AMD_PATH)/include \ |
| 9 | -I$(FULL_AMD_PATH)/amdgpu \ | 9 | -I$(FULL_AMD_PATH)/amdgpu \ |
| 10 | -I$(FULL_AMD_PATH)/scheduler \ | 10 | -I$(FULL_AMD_PATH)/scheduler \ |
| @@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ | |||
| 24 | atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ | 24 | atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ |
| 25 | amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ | 25 | amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ |
| 26 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ | 26 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ |
| 27 | amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o | 27 | amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ |
| 28 | amdgpu_queue_mgr.o | ||
| 28 | 29 | ||
| 29 | # add asic specific block | 30 | # add asic specific block |
| 30 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ | 31 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ |
| @@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ | |||
| 34 | amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o | 35 | amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o |
| 35 | 36 | ||
| 36 | amdgpu-y += \ | 37 | amdgpu-y += \ |
| 37 | vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o | 38 | vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o |
| 38 | 39 | ||
| 39 | # add GMC block | 40 | # add GMC block |
| 40 | amdgpu-y += \ | 41 | amdgpu-y += \ |
| @@ -54,7 +55,8 @@ amdgpu-y += \ | |||
| 54 | # add PSP block | 55 | # add PSP block |
| 55 | amdgpu-y += \ | 56 | amdgpu-y += \ |
| 56 | amdgpu_psp.o \ | 57 | amdgpu_psp.o \ |
| 57 | psp_v3_1.o | 58 | psp_v3_1.o \ |
| 59 | psp_v10_0.o | ||
| 58 | 60 | ||
| 59 | # add SMC block | 61 | # add SMC block |
| 60 | amdgpu-y += \ | 62 | amdgpu-y += \ |
| @@ -92,6 +94,11 @@ amdgpu-y += \ | |||
| 92 | vce_v3_0.o \ | 94 | vce_v3_0.o \ |
| 93 | vce_v4_0.o | 95 | vce_v4_0.o |
| 94 | 96 | ||
| 97 | # add VCN block | ||
| 98 | amdgpu-y += \ | ||
| 99 | amdgpu_vcn.o \ | ||
| 100 | vcn_v1_0.o | ||
| 101 | |||
| 95 | # add amdkfd interfaces | 102 | # add amdkfd interfaces |
| 96 | amdgpu-y += \ | 103 | amdgpu-y += \ |
| 97 | amdgpu_amdkfd.o \ | 104 | amdgpu_amdkfd.o \ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 833c3c16501a..ff7bf1a9f967 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
| @@ -36,16 +36,18 @@ | |||
| 36 | #include <linux/hashtable.h> | 36 | #include <linux/hashtable.h> |
| 37 | #include <linux/dma-fence.h> | 37 | #include <linux/dma-fence.h> |
| 38 | 38 | ||
| 39 | #include <ttm/ttm_bo_api.h> | 39 | #include <drm/ttm/ttm_bo_api.h> |
| 40 | #include <ttm/ttm_bo_driver.h> | 40 | #include <drm/ttm/ttm_bo_driver.h> |
| 41 | #include <ttm/ttm_placement.h> | 41 | #include <drm/ttm/ttm_placement.h> |
| 42 | #include <ttm/ttm_module.h> | 42 | #include <drm/ttm/ttm_module.h> |
| 43 | #include <ttm/ttm_execbuf_util.h> | 43 | #include <drm/ttm/ttm_execbuf_util.h> |
| 44 | 44 | ||
| 45 | #include <drm/drmP.h> | 45 | #include <drm/drmP.h> |
| 46 | #include <drm/drm_gem.h> | 46 | #include <drm/drm_gem.h> |
| 47 | #include <drm/amdgpu_drm.h> | 47 | #include <drm/amdgpu_drm.h> |
| 48 | 48 | ||
| 49 | #include <kgd_kfd_interface.h> | ||
| 50 | |||
| 49 | #include "amd_shared.h" | 51 | #include "amd_shared.h" |
| 50 | #include "amdgpu_mode.h" | 52 | #include "amdgpu_mode.h" |
| 51 | #include "amdgpu_ih.h" | 53 | #include "amdgpu_ih.h" |
| @@ -62,6 +64,7 @@ | |||
| 62 | #include "amdgpu_acp.h" | 64 | #include "amdgpu_acp.h" |
| 63 | #include "amdgpu_uvd.h" | 65 | #include "amdgpu_uvd.h" |
| 64 | #include "amdgpu_vce.h" | 66 | #include "amdgpu_vce.h" |
| 67 | #include "amdgpu_vcn.h" | ||
| 65 | 68 | ||
| 66 | #include "gpu_scheduler.h" | 69 | #include "gpu_scheduler.h" |
| 67 | #include "amdgpu_virt.h" | 70 | #include "amdgpu_virt.h" |
| @@ -92,6 +95,7 @@ extern int amdgpu_vm_size; | |||
| 92 | extern int amdgpu_vm_block_size; | 95 | extern int amdgpu_vm_block_size; |
| 93 | extern int amdgpu_vm_fault_stop; | 96 | extern int amdgpu_vm_fault_stop; |
| 94 | extern int amdgpu_vm_debug; | 97 | extern int amdgpu_vm_debug; |
| 98 | extern int amdgpu_vm_update_mode; | ||
| 95 | extern int amdgpu_sched_jobs; | 99 | extern int amdgpu_sched_jobs; |
| 96 | extern int amdgpu_sched_hw_submission; | 100 | extern int amdgpu_sched_hw_submission; |
| 97 | extern int amdgpu_no_evict; | 101 | extern int amdgpu_no_evict; |
| @@ -109,6 +113,15 @@ extern int amdgpu_prim_buf_per_se; | |||
| 109 | extern int amdgpu_pos_buf_per_se; | 113 | extern int amdgpu_pos_buf_per_se; |
| 110 | extern int amdgpu_cntl_sb_buf_per_se; | 114 | extern int amdgpu_cntl_sb_buf_per_se; |
| 111 | extern int amdgpu_param_buf_per_se; | 115 | extern int amdgpu_param_buf_per_se; |
| 116 | extern int amdgpu_job_hang_limit; | ||
| 117 | extern int amdgpu_lbpw; | ||
| 118 | |||
| 119 | #ifdef CONFIG_DRM_AMDGPU_SI | ||
| 120 | extern int amdgpu_si_support; | ||
| 121 | #endif | ||
| 122 | #ifdef CONFIG_DRM_AMDGPU_CIK | ||
| 123 | extern int amdgpu_cik_support; | ||
| 124 | #endif | ||
| 112 | 125 | ||
| 113 | #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ | 126 | #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ |
| 114 | #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 | 127 | #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 |
| @@ -305,8 +318,8 @@ struct amdgpu_gart_funcs { | |||
| 305 | /* set pte flags based per asic */ | 318 | /* set pte flags based per asic */ |
| 306 | uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, | 319 | uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, |
| 307 | uint32_t flags); | 320 | uint32_t flags); |
| 308 | /* adjust mc addr in fb for APU case */ | 321 | /* get the pde for a given mc addr */ |
| 309 | u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); | 322 | u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); |
| 310 | uint32_t (*get_invalidate_req)(unsigned int vm_id); | 323 | uint32_t (*get_invalidate_req)(unsigned int vm_id); |
| 311 | }; | 324 | }; |
| 312 | 325 | ||
| @@ -554,7 +567,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); | |||
| 554 | void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); | 567 | void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); |
| 555 | int amdgpu_gart_init(struct amdgpu_device *adev); | 568 | int amdgpu_gart_init(struct amdgpu_device *adev); |
| 556 | void amdgpu_gart_fini(struct amdgpu_device *adev); | 569 | void amdgpu_gart_fini(struct amdgpu_device *adev); |
| 557 | void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | 570 | int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, |
| 558 | int pages); | 571 | int pages); |
| 559 | int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, | 572 | int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, |
| 560 | int pages, struct page **pagelist, | 573 | int pages, struct page **pagelist, |
| @@ -602,6 +615,7 @@ struct amdgpu_mc { | |||
| 602 | uint32_t srbm_soft_reset; | 615 | uint32_t srbm_soft_reset; |
| 603 | struct amdgpu_mode_mc_save save; | 616 | struct amdgpu_mode_mc_save save; |
| 604 | bool prt_warning; | 617 | bool prt_warning; |
| 618 | uint64_t stolen_size; | ||
| 605 | /* apertures */ | 619 | /* apertures */ |
| 606 | u64 shared_aperture_start; | 620 | u64 shared_aperture_start; |
| 607 | u64 shared_aperture_end; | 621 | u64 shared_aperture_end; |
| @@ -772,6 +786,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, | |||
| 772 | struct dma_fence **f); | 786 | struct dma_fence **f); |
| 773 | 787 | ||
| 774 | /* | 788 | /* |
| 789 | * Queue manager | ||
| 790 | */ | ||
| 791 | struct amdgpu_queue_mapper { | ||
| 792 | int hw_ip; | ||
| 793 | struct mutex lock; | ||
| 794 | /* protected by lock */ | ||
| 795 | struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS]; | ||
| 796 | }; | ||
| 797 | |||
| 798 | struct amdgpu_queue_mgr { | ||
| 799 | struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM]; | ||
| 800 | }; | ||
| 801 | |||
| 802 | int amdgpu_queue_mgr_init(struct amdgpu_device *adev, | ||
| 803 | struct amdgpu_queue_mgr *mgr); | ||
| 804 | int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, | ||
| 805 | struct amdgpu_queue_mgr *mgr); | ||
| 806 | int amdgpu_queue_mgr_map(struct amdgpu_device *adev, | ||
| 807 | struct amdgpu_queue_mgr *mgr, | ||
| 808 | int hw_ip, int instance, int ring, | ||
| 809 | struct amdgpu_ring **out_ring); | ||
| 810 | |||
| 811 | /* | ||
| 775 | * context related structures | 812 | * context related structures |
| 776 | */ | 813 | */ |
| 777 | 814 | ||
| @@ -784,6 +821,7 @@ struct amdgpu_ctx_ring { | |||
| 784 | struct amdgpu_ctx { | 821 | struct amdgpu_ctx { |
| 785 | struct kref refcount; | 822 | struct kref refcount; |
| 786 | struct amdgpu_device *adev; | 823 | struct amdgpu_device *adev; |
| 824 | struct amdgpu_queue_mgr queue_mgr; | ||
| 787 | unsigned reset_counter; | 825 | unsigned reset_counter; |
| 788 | spinlock_t ring_lock; | 826 | spinlock_t ring_lock; |
| 789 | struct dma_fence **fences; | 827 | struct dma_fence **fences; |
| @@ -822,6 +860,7 @@ struct amdgpu_fpriv { | |||
| 822 | struct mutex bo_list_lock; | 860 | struct mutex bo_list_lock; |
| 823 | struct idr bo_list_handles; | 861 | struct idr bo_list_handles; |
| 824 | struct amdgpu_ctx_mgr ctx_mgr; | 862 | struct amdgpu_ctx_mgr ctx_mgr; |
| 863 | u32 vram_lost_counter; | ||
| 825 | }; | 864 | }; |
| 826 | 865 | ||
| 827 | /* | 866 | /* |
| @@ -830,6 +869,8 @@ struct amdgpu_fpriv { | |||
| 830 | 869 | ||
| 831 | struct amdgpu_bo_list { | 870 | struct amdgpu_bo_list { |
| 832 | struct mutex lock; | 871 | struct mutex lock; |
| 872 | struct rcu_head rhead; | ||
| 873 | struct kref refcount; | ||
| 833 | struct amdgpu_bo *gds_obj; | 874 | struct amdgpu_bo *gds_obj; |
| 834 | struct amdgpu_bo *gws_obj; | 875 | struct amdgpu_bo *gws_obj; |
| 835 | struct amdgpu_bo *oa_obj; | 876 | struct amdgpu_bo *oa_obj; |
| @@ -893,20 +934,26 @@ struct amdgpu_rlc { | |||
| 893 | u32 *register_restore; | 934 | u32 *register_restore; |
| 894 | }; | 935 | }; |
| 895 | 936 | ||
| 937 | #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES | ||
| 938 | |||
| 896 | struct amdgpu_mec { | 939 | struct amdgpu_mec { |
| 897 | struct amdgpu_bo *hpd_eop_obj; | 940 | struct amdgpu_bo *hpd_eop_obj; |
| 898 | u64 hpd_eop_gpu_addr; | 941 | u64 hpd_eop_gpu_addr; |
| 899 | struct amdgpu_bo *mec_fw_obj; | 942 | struct amdgpu_bo *mec_fw_obj; |
| 900 | u64 mec_fw_gpu_addr; | 943 | u64 mec_fw_gpu_addr; |
| 901 | u32 num_pipe; | ||
| 902 | u32 num_mec; | 944 | u32 num_mec; |
| 903 | u32 num_queue; | 945 | u32 num_pipe_per_mec; |
| 946 | u32 num_queue_per_pipe; | ||
| 904 | void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; | 947 | void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; |
| 948 | |||
| 949 | /* These are the resources for which amdgpu takes ownership */ | ||
| 950 | DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); | ||
| 905 | }; | 951 | }; |
| 906 | 952 | ||
| 907 | struct amdgpu_kiq { | 953 | struct amdgpu_kiq { |
| 908 | u64 eop_gpu_addr; | 954 | u64 eop_gpu_addr; |
| 909 | struct amdgpu_bo *eop_obj; | 955 | struct amdgpu_bo *eop_obj; |
| 956 | struct mutex ring_mutex; | ||
| 910 | struct amdgpu_ring ring; | 957 | struct amdgpu_ring ring; |
| 911 | struct amdgpu_irq_src irq; | 958 | struct amdgpu_irq_src irq; |
| 912 | }; | 959 | }; |
| @@ -981,9 +1028,15 @@ struct amdgpu_gfx_config { | |||
| 981 | }; | 1028 | }; |
| 982 | 1029 | ||
| 983 | struct amdgpu_cu_info { | 1030 | struct amdgpu_cu_info { |
| 984 | uint32_t number; /* total active CU number */ | 1031 | uint32_t max_waves_per_simd; |
| 985 | uint32_t ao_cu_mask; | ||
| 986 | uint32_t wave_front_size; | 1032 | uint32_t wave_front_size; |
| 1033 | uint32_t max_scratch_slots_per_cu; | ||
| 1034 | uint32_t lds_size; | ||
| 1035 | |||
| 1036 | /* total active CU number */ | ||
| 1037 | uint32_t number; | ||
| 1038 | uint32_t ao_cu_mask; | ||
| 1039 | uint32_t ao_cu_bitmap[4][4]; | ||
| 987 | uint32_t bitmap[4][4]; | 1040 | uint32_t bitmap[4][4]; |
| 988 | }; | 1041 | }; |
| 989 | 1042 | ||
| @@ -1061,6 +1114,8 @@ struct amdgpu_gfx { | |||
| 1061 | uint32_t grbm_soft_reset; | 1114 | uint32_t grbm_soft_reset; |
| 1062 | uint32_t srbm_soft_reset; | 1115 | uint32_t srbm_soft_reset; |
| 1063 | bool in_reset; | 1116 | bool in_reset; |
| 1117 | /* s3/s4 mask */ | ||
| 1118 | bool in_suspend; | ||
| 1064 | /* NGG */ | 1119 | /* NGG */ |
| 1065 | struct amdgpu_ngg ngg; | 1120 | struct amdgpu_ngg ngg; |
| 1066 | }; | 1121 | }; |
| @@ -1109,12 +1164,14 @@ struct amdgpu_cs_parser { | |||
| 1109 | 1164 | ||
| 1110 | /* user fence */ | 1165 | /* user fence */ |
| 1111 | struct amdgpu_bo_list_entry uf_entry; | 1166 | struct amdgpu_bo_list_entry uf_entry; |
| 1167 | |||
| 1168 | unsigned num_post_dep_syncobjs; | ||
| 1169 | struct drm_syncobj **post_dep_syncobjs; | ||
| 1112 | }; | 1170 | }; |
| 1113 | 1171 | ||
| 1114 | #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ | 1172 | #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ |
| 1115 | #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ | 1173 | #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ |
| 1116 | #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ | 1174 | #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ |
| 1117 | #define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */ | ||
| 1118 | 1175 | ||
| 1119 | struct amdgpu_job { | 1176 | struct amdgpu_job { |
| 1120 | struct amd_sched_job base; | 1177 | struct amd_sched_job base; |
| @@ -1122,6 +1179,8 @@ struct amdgpu_job { | |||
| 1122 | struct amdgpu_vm *vm; | 1179 | struct amdgpu_vm *vm; |
| 1123 | struct amdgpu_ring *ring; | 1180 | struct amdgpu_ring *ring; |
| 1124 | struct amdgpu_sync sync; | 1181 | struct amdgpu_sync sync; |
| 1182 | struct amdgpu_sync dep_sync; | ||
| 1183 | struct amdgpu_sync sched_sync; | ||
| 1125 | struct amdgpu_ib *ibs; | 1184 | struct amdgpu_ib *ibs; |
| 1126 | struct dma_fence *fence; /* the hw fence */ | 1185 | struct dma_fence *fence; /* the hw fence */ |
| 1127 | uint32_t preamble_status; | 1186 | uint32_t preamble_status; |
| @@ -1129,7 +1188,6 @@ struct amdgpu_job { | |||
| 1129 | void *owner; | 1188 | void *owner; |
| 1130 | uint64_t fence_ctx; /* the fence_context this job uses */ | 1189 | uint64_t fence_ctx; /* the fence_context this job uses */ |
| 1131 | bool vm_needs_flush; | 1190 | bool vm_needs_flush; |
| 1132 | bool need_pipeline_sync; | ||
| 1133 | unsigned vm_id; | 1191 | unsigned vm_id; |
| 1134 | uint64_t vm_pd_addr; | 1192 | uint64_t vm_pd_addr; |
| 1135 | uint32_t gds_base, gds_size; | 1193 | uint32_t gds_base, gds_size; |
| @@ -1221,6 +1279,9 @@ struct amdgpu_firmware { | |||
| 1221 | const struct amdgpu_psp_funcs *funcs; | 1279 | const struct amdgpu_psp_funcs *funcs; |
| 1222 | struct amdgpu_bo *rbuf; | 1280 | struct amdgpu_bo *rbuf; |
| 1223 | struct mutex mutex; | 1281 | struct mutex mutex; |
| 1282 | |||
| 1283 | /* gpu info firmware data pointer */ | ||
| 1284 | const struct firmware *gpu_info_fw; | ||
| 1224 | }; | 1285 | }; |
| 1225 | 1286 | ||
| 1226 | /* | 1287 | /* |
| @@ -1296,7 +1357,6 @@ struct amdgpu_smumgr { | |||
| 1296 | */ | 1357 | */ |
| 1297 | struct amdgpu_allowed_register_entry { | 1358 | struct amdgpu_allowed_register_entry { |
| 1298 | uint32_t reg_offset; | 1359 | uint32_t reg_offset; |
| 1299 | bool untouched; | ||
| 1300 | bool grbm_indexed; | 1360 | bool grbm_indexed; |
| 1301 | }; | 1361 | }; |
| 1302 | 1362 | ||
| @@ -1424,6 +1484,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); | |||
| 1424 | typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); | 1484 | typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); |
| 1425 | typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); | 1485 | typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); |
| 1426 | 1486 | ||
| 1487 | #define AMDGPU_RESET_MAGIC_NUM 64 | ||
| 1427 | struct amdgpu_device { | 1488 | struct amdgpu_device { |
| 1428 | struct device *dev; | 1489 | struct device *dev; |
| 1429 | struct drm_device *ddev; | 1490 | struct drm_device *ddev; |
| @@ -1523,7 +1584,9 @@ struct amdgpu_device { | |||
| 1523 | atomic64_t gtt_usage; | 1584 | atomic64_t gtt_usage; |
| 1524 | atomic64_t num_bytes_moved; | 1585 | atomic64_t num_bytes_moved; |
| 1525 | atomic64_t num_evictions; | 1586 | atomic64_t num_evictions; |
| 1587 | atomic64_t num_vram_cpu_page_faults; | ||
| 1526 | atomic_t gpu_reset_counter; | 1588 | atomic_t gpu_reset_counter; |
| 1589 | atomic_t vram_lost_counter; | ||
| 1527 | 1590 | ||
| 1528 | /* data for buffer migration throttling */ | 1591 | /* data for buffer migration throttling */ |
| 1529 | struct { | 1592 | struct { |
| @@ -1570,11 +1633,18 @@ struct amdgpu_device { | |||
| 1570 | /* sdma */ | 1633 | /* sdma */ |
| 1571 | struct amdgpu_sdma sdma; | 1634 | struct amdgpu_sdma sdma; |
| 1572 | 1635 | ||
| 1573 | /* uvd */ | 1636 | union { |
| 1574 | struct amdgpu_uvd uvd; | 1637 | struct { |
| 1638 | /* uvd */ | ||
| 1639 | struct amdgpu_uvd uvd; | ||
| 1640 | |||
| 1641 | /* vce */ | ||
| 1642 | struct amdgpu_vce vce; | ||
| 1643 | }; | ||
| 1575 | 1644 | ||
| 1576 | /* vce */ | 1645 | /* vcn */ |
| 1577 | struct amdgpu_vce vce; | 1646 | struct amdgpu_vcn vcn; |
| 1647 | }; | ||
| 1578 | 1648 | ||
| 1579 | /* firmwares */ | 1649 | /* firmwares */ |
| 1580 | struct amdgpu_firmware firmware; | 1650 | struct amdgpu_firmware firmware; |
| @@ -1598,6 +1668,9 @@ struct amdgpu_device { | |||
| 1598 | /* amdkfd interface */ | 1668 | /* amdkfd interface */ |
| 1599 | struct kfd_dev *kfd; | 1669 | struct kfd_dev *kfd; |
| 1600 | 1670 | ||
| 1671 | /* delayed work_func for deferring clockgating during resume */ | ||
| 1672 | struct delayed_work late_init_work; | ||
| 1673 | |||
| 1601 | struct amdgpu_virt virt; | 1674 | struct amdgpu_virt virt; |
| 1602 | 1675 | ||
| 1603 | /* link all shadow bo */ | 1676 | /* link all shadow bo */ |
| @@ -1606,9 +1679,13 @@ struct amdgpu_device { | |||
| 1606 | /* link all gtt */ | 1679 | /* link all gtt */ |
| 1607 | spinlock_t gtt_list_lock; | 1680 | spinlock_t gtt_list_lock; |
| 1608 | struct list_head gtt_list; | 1681 | struct list_head gtt_list; |
| 1682 | /* keep an lru list of rings by HW IP */ | ||
| 1683 | struct list_head ring_lru_list; | ||
| 1684 | spinlock_t ring_lru_list_lock; | ||
| 1609 | 1685 | ||
| 1610 | /* record hw reset is performed */ | 1686 | /* record hw reset is performed */ |
| 1611 | bool has_hw_reset; | 1687 | bool has_hw_reset; |
| 1688 | u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; | ||
| 1612 | 1689 | ||
| 1613 | }; | 1690 | }; |
| 1614 | 1691 | ||
| @@ -1617,7 +1694,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) | |||
| 1617 | return container_of(bdev, struct amdgpu_device, mman.bdev); | 1694 | return container_of(bdev, struct amdgpu_device, mman.bdev); |
| 1618 | } | 1695 | } |
| 1619 | 1696 | ||
| 1620 | bool amdgpu_device_is_px(struct drm_device *dev); | ||
| 1621 | int amdgpu_device_init(struct amdgpu_device *adev, | 1697 | int amdgpu_device_init(struct amdgpu_device *adev, |
| 1622 | struct drm_device *ddev, | 1698 | struct drm_device *ddev, |
| 1623 | struct pci_dev *pdev, | 1699 | struct pci_dev *pdev, |
| @@ -1733,30 +1809,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr | |||
| 1733 | unsigned occupied, chunk1, chunk2; | 1809 | unsigned occupied, chunk1, chunk2; |
| 1734 | void *dst; | 1810 | void *dst; |
| 1735 | 1811 | ||
| 1736 | if (ring->count_dw < count_dw) { | 1812 | if (unlikely(ring->count_dw < count_dw)) { |
| 1737 | DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); | 1813 | DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); |
| 1738 | } else { | 1814 | return; |
| 1739 | occupied = ring->wptr & ring->buf_mask; | 1815 | } |
| 1740 | dst = (void *)&ring->ring[occupied]; | 1816 | |
| 1741 | chunk1 = ring->buf_mask + 1 - occupied; | 1817 | occupied = ring->wptr & ring->buf_mask; |
| 1742 | chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; | 1818 | dst = (void *)&ring->ring[occupied]; |
| 1743 | chunk2 = count_dw - chunk1; | 1819 | chunk1 = ring->buf_mask + 1 - occupied; |
| 1744 | chunk1 <<= 2; | 1820 | chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; |
| 1745 | chunk2 <<= 2; | 1821 | chunk2 = count_dw - chunk1; |
| 1746 | 1822 | chunk1 <<= 2; | |
| 1747 | if (chunk1) | 1823 | chunk2 <<= 2; |
| 1748 | memcpy(dst, src, chunk1); | 1824 | |
| 1749 | 1825 | if (chunk1) | |
| 1750 | if (chunk2) { | 1826 | memcpy(dst, src, chunk1); |
| 1751 | src += chunk1; | 1827 | |
| 1752 | dst = (void *)ring->ring; | 1828 | if (chunk2) { |
| 1753 | memcpy(dst, src, chunk2); | 1829 | src += chunk1; |
| 1754 | } | 1830 | dst = (void *)ring->ring; |
| 1755 | 1831 | memcpy(dst, src, chunk2); | |
| 1756 | ring->wptr += count_dw; | ||
| 1757 | ring->wptr &= ring->ptr_mask; | ||
| 1758 | ring->count_dw -= count_dw; | ||
| 1759 | } | 1832 | } |
| 1833 | |||
| 1834 | ring->wptr += count_dw; | ||
| 1835 | ring->wptr &= ring->ptr_mask; | ||
| 1836 | ring->count_dw -= count_dw; | ||
| 1760 | } | 1837 | } |
| 1761 | 1838 | ||
| 1762 | static inline struct amdgpu_sdma_instance * | 1839 | static inline struct amdgpu_sdma_instance * |
| @@ -1792,6 +1869,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
| 1792 | #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) | 1869 | #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) |
| 1793 | #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) | 1870 | #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) |
| 1794 | #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) | 1871 | #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) |
| 1872 | #define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) | ||
| 1795 | #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) | 1873 | #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) |
| 1796 | #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) | 1874 | #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) |
| 1797 | #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) | 1875 | #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) |
| @@ -1813,6 +1891,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
| 1813 | #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) | 1891 | #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) |
| 1814 | #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) | 1892 | #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) |
| 1815 | #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) | 1893 | #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) |
| 1894 | #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) | ||
| 1816 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) | 1895 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) |
| 1817 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) | 1896 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) |
| 1818 | #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) | 1897 | #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) |
| @@ -1848,10 +1927,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); | |||
| 1848 | bool amdgpu_need_post(struct amdgpu_device *adev); | 1927 | bool amdgpu_need_post(struct amdgpu_device *adev); |
| 1849 | void amdgpu_update_display_priority(struct amdgpu_device *adev); | 1928 | void amdgpu_update_display_priority(struct amdgpu_device *adev); |
| 1850 | 1929 | ||
| 1851 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); | ||
| 1852 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | ||
| 1853 | u32 ip_instance, u32 ring, | ||
| 1854 | struct amdgpu_ring **out_ring); | ||
| 1855 | void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); | 1930 | void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); |
| 1856 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); | 1931 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); |
| 1857 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); | 1932 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); |
| @@ -1900,6 +1975,8 @@ static inline bool amdgpu_has_atpx(void) { return false; } | |||
| 1900 | extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; | 1975 | extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; |
| 1901 | extern const int amdgpu_max_kms_ioctl; | 1976 | extern const int amdgpu_max_kms_ioctl; |
| 1902 | 1977 | ||
| 1978 | bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, | ||
| 1979 | struct amdgpu_fpriv *fpriv); | ||
| 1903 | int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); | 1980 | int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); |
| 1904 | void amdgpu_driver_unload_kms(struct drm_device *dev); | 1981 | void amdgpu_driver_unload_kms(struct drm_device *dev); |
| 1905 | void amdgpu_driver_lastclose_kms(struct drm_device *dev); | 1982 | void amdgpu_driver_lastclose_kms(struct drm_device *dev); |
| @@ -1912,10 +1989,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); | |||
| 1912 | u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); | 1989 | u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); |
| 1913 | int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); | 1990 | int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); |
| 1914 | void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); | 1991 | void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); |
| 1915 | int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, | ||
| 1916 | int *max_error, | ||
| 1917 | struct timeval *vblank_time, | ||
| 1918 | unsigned flags); | ||
| 1919 | long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, | 1992 | long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, |
| 1920 | unsigned long arg); | 1993 | unsigned long arg); |
| 1921 | 1994 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index dba8a5b25e66..37971d9402e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "amd_shared.h" | 24 | #include "amd_shared.h" |
| 25 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
| 26 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
| 27 | #include "amdgpu_gfx.h" | ||
| 27 | #include <linux/module.h> | 28 | #include <linux/module.h> |
| 28 | 29 | ||
| 29 | const struct kfd2kgd_calls *kfd2kgd; | 30 | const struct kfd2kgd_calls *kfd2kgd; |
| @@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void) | |||
| 60 | return ret; | 61 | return ret; |
| 61 | } | 62 | } |
| 62 | 63 | ||
| 63 | bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) | 64 | bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) |
| 64 | { | 65 | { |
| 65 | switch (rdev->asic_type) { | 66 | switch (adev->asic_type) { |
| 66 | #ifdef CONFIG_DRM_AMDGPU_CIK | 67 | #ifdef CONFIG_DRM_AMDGPU_CIK |
| 67 | case CHIP_KAVERI: | 68 | case CHIP_KAVERI: |
| 68 | kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); | 69 | kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); |
| @@ -86,59 +87,82 @@ void amdgpu_amdkfd_fini(void) | |||
| 86 | } | 87 | } |
| 87 | } | 88 | } |
| 88 | 89 | ||
| 89 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) | 90 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) |
| 90 | { | 91 | { |
| 91 | if (kgd2kfd) | 92 | if (kgd2kfd) |
| 92 | rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, | 93 | adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, |
| 93 | rdev->pdev, kfd2kgd); | 94 | adev->pdev, kfd2kgd); |
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) | 97 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) |
| 97 | { | 98 | { |
| 98 | if (rdev->kfd) { | 99 | int i; |
| 100 | int last_valid_bit; | ||
| 101 | if (adev->kfd) { | ||
| 99 | struct kgd2kfd_shared_resources gpu_resources = { | 102 | struct kgd2kfd_shared_resources gpu_resources = { |
| 100 | .compute_vmid_bitmap = 0xFF00, | 103 | .compute_vmid_bitmap = 0xFF00, |
| 101 | 104 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, | |
| 102 | .first_compute_pipe = 1, | 105 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe |
| 103 | .compute_pipe_count = 4 - 1, | ||
| 104 | }; | 106 | }; |
| 105 | 107 | ||
| 106 | amdgpu_doorbell_get_kfd_info(rdev, | 108 | /* this is going to have a few of the MSBs set that we need to |
| 109 | * clear */ | ||
| 110 | bitmap_complement(gpu_resources.queue_bitmap, | ||
| 111 | adev->gfx.mec.queue_bitmap, | ||
| 112 | KGD_MAX_QUEUES); | ||
| 113 | |||
| 114 | /* remove the KIQ bit as well */ | ||
| 115 | if (adev->gfx.kiq.ring.ready) | ||
| 116 | clear_bit(amdgpu_gfx_queue_to_bit(adev, | ||
| 117 | adev->gfx.kiq.ring.me - 1, | ||
| 118 | adev->gfx.kiq.ring.pipe, | ||
| 119 | adev->gfx.kiq.ring.queue), | ||
| 120 | gpu_resources.queue_bitmap); | ||
| 121 | |||
| 122 | /* According to linux/bitmap.h we shouldn't use bitmap_clear if | ||
| 123 | * nbits is not compile time constant */ | ||
| 124 | last_valid_bit = 1 /* only first MEC can have compute queues */ | ||
| 125 | * adev->gfx.mec.num_pipe_per_mec | ||
| 126 | * adev->gfx.mec.num_queue_per_pipe; | ||
| 127 | for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) | ||
| 128 | clear_bit(i, gpu_resources.queue_bitmap); | ||
| 129 | |||
| 130 | amdgpu_doorbell_get_kfd_info(adev, | ||
| 107 | &gpu_resources.doorbell_physical_address, | 131 | &gpu_resources.doorbell_physical_address, |
| 108 | &gpu_resources.doorbell_aperture_size, | 132 | &gpu_resources.doorbell_aperture_size, |
| 109 | &gpu_resources.doorbell_start_offset); | 133 | &gpu_resources.doorbell_start_offset); |
| 110 | 134 | ||
| 111 | kgd2kfd->device_init(rdev->kfd, &gpu_resources); | 135 | kgd2kfd->device_init(adev->kfd, &gpu_resources); |
| 112 | } | 136 | } |
| 113 | } | 137 | } |
| 114 | 138 | ||
| 115 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) | 139 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) |
| 116 | { | 140 | { |
| 117 | if (rdev->kfd) { | 141 | if (adev->kfd) { |
| 118 | kgd2kfd->device_exit(rdev->kfd); | 142 | kgd2kfd->device_exit(adev->kfd); |
| 119 | rdev->kfd = NULL; | 143 | adev->kfd = NULL; |
| 120 | } | 144 | } |
| 121 | } | 145 | } |
| 122 | 146 | ||
| 123 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, | 147 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, |
| 124 | const void *ih_ring_entry) | 148 | const void *ih_ring_entry) |
| 125 | { | 149 | { |
| 126 | if (rdev->kfd) | 150 | if (adev->kfd) |
| 127 | kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); | 151 | kgd2kfd->interrupt(adev->kfd, ih_ring_entry); |
| 128 | } | 152 | } |
| 129 | 153 | ||
| 130 | void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) | 154 | void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) |
| 131 | { | 155 | { |
| 132 | if (rdev->kfd) | 156 | if (adev->kfd) |
| 133 | kgd2kfd->suspend(rdev->kfd); | 157 | kgd2kfd->suspend(adev->kfd); |
| 134 | } | 158 | } |
| 135 | 159 | ||
| 136 | int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) | 160 | int amdgpu_amdkfd_resume(struct amdgpu_device *adev) |
| 137 | { | 161 | { |
| 138 | int r = 0; | 162 | int r = 0; |
| 139 | 163 | ||
| 140 | if (rdev->kfd) | 164 | if (adev->kfd) |
| 141 | r = kgd2kfd->resume(rdev->kfd); | 165 | r = kgd2kfd->resume(adev->kfd); |
| 142 | 166 | ||
| 143 | return r; | 167 | return r; |
| 144 | } | 168 | } |
| @@ -147,7 +171,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | |||
| 147 | void **mem_obj, uint64_t *gpu_addr, | 171 | void **mem_obj, uint64_t *gpu_addr, |
| 148 | void **cpu_ptr) | 172 | void **cpu_ptr) |
| 149 | { | 173 | { |
| 150 | struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; | 174 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
| 151 | struct kgd_mem **mem = (struct kgd_mem **) mem_obj; | 175 | struct kgd_mem **mem = (struct kgd_mem **) mem_obj; |
| 152 | int r; | 176 | int r; |
| 153 | 177 | ||
| @@ -159,10 +183,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | |||
| 159 | if ((*mem) == NULL) | 183 | if ((*mem) == NULL) |
| 160 | return -ENOMEM; | 184 | return -ENOMEM; |
| 161 | 185 | ||
| 162 | r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, | 186 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, |
| 163 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); | 187 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); |
| 164 | if (r) { | 188 | if (r) { |
| 165 | dev_err(rdev->dev, | 189 | dev_err(adev->dev, |
| 166 | "failed to allocate BO for amdkfd (%d)\n", r); | 190 | "failed to allocate BO for amdkfd (%d)\n", r); |
| 167 | return r; | 191 | return r; |
| 168 | } | 192 | } |
| @@ -170,21 +194,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | |||
| 170 | /* map the buffer */ | 194 | /* map the buffer */ |
| 171 | r = amdgpu_bo_reserve((*mem)->bo, true); | 195 | r = amdgpu_bo_reserve((*mem)->bo, true); |
| 172 | if (r) { | 196 | if (r) { |
| 173 | dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); | 197 | dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); |
| 174 | goto allocate_mem_reserve_bo_failed; | 198 | goto allocate_mem_reserve_bo_failed; |
| 175 | } | 199 | } |
| 176 | 200 | ||
| 177 | r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, | 201 | r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, |
| 178 | &(*mem)->gpu_addr); | 202 | &(*mem)->gpu_addr); |
| 179 | if (r) { | 203 | if (r) { |
| 180 | dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); | 204 | dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); |
| 181 | goto allocate_mem_pin_bo_failed; | 205 | goto allocate_mem_pin_bo_failed; |
| 182 | } | 206 | } |
| 183 | *gpu_addr = (*mem)->gpu_addr; | 207 | *gpu_addr = (*mem)->gpu_addr; |
| 184 | 208 | ||
| 185 | r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); | 209 | r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); |
| 186 | if (r) { | 210 | if (r) { |
| 187 | dev_err(rdev->dev, | 211 | dev_err(adev->dev, |
| 188 | "(%d) failed to map bo to kernel for amdkfd\n", r); | 212 | "(%d) failed to map bo to kernel for amdkfd\n", r); |
| 189 | goto allocate_mem_kmap_bo_failed; | 213 | goto allocate_mem_kmap_bo_failed; |
| 190 | } | 214 | } |
| @@ -220,27 +244,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) | |||
| 220 | 244 | ||
| 221 | uint64_t get_vmem_size(struct kgd_dev *kgd) | 245 | uint64_t get_vmem_size(struct kgd_dev *kgd) |
| 222 | { | 246 | { |
| 223 | struct amdgpu_device *rdev = | 247 | struct amdgpu_device *adev = |
| 224 | (struct amdgpu_device *)kgd; | 248 | (struct amdgpu_device *)kgd; |
| 225 | 249 | ||
| 226 | BUG_ON(kgd == NULL); | 250 | BUG_ON(kgd == NULL); |
| 227 | 251 | ||
| 228 | return rdev->mc.real_vram_size; | 252 | return adev->mc.real_vram_size; |
| 229 | } | 253 | } |
| 230 | 254 | ||
| 231 | uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) | 255 | uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) |
| 232 | { | 256 | { |
| 233 | struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; | 257 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
| 234 | 258 | ||
| 235 | if (rdev->gfx.funcs->get_gpu_clock_counter) | 259 | if (adev->gfx.funcs->get_gpu_clock_counter) |
| 236 | return rdev->gfx.funcs->get_gpu_clock_counter(rdev); | 260 | return adev->gfx.funcs->get_gpu_clock_counter(adev); |
| 237 | return 0; | 261 | return 0; |
| 238 | } | 262 | } |
| 239 | 263 | ||
| 240 | uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) | 264 | uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) |
| 241 | { | 265 | { |
| 242 | struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; | 266 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
| 243 | 267 | ||
| 244 | /* The sclk is in quantas of 10kHz */ | 268 | /* The sclk is in quantas of 10kHz */ |
| 245 | return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; | 269 | return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; |
| 246 | } | 270 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index de530f68d4e3..73f83a10ae14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | |||
| @@ -39,15 +39,15 @@ struct kgd_mem { | |||
| 39 | int amdgpu_amdkfd_init(void); | 39 | int amdgpu_amdkfd_init(void); |
| 40 | void amdgpu_amdkfd_fini(void); | 40 | void amdgpu_amdkfd_fini(void); |
| 41 | 41 | ||
| 42 | bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); | 42 | bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev); |
| 43 | 43 | ||
| 44 | void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); | 44 | void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); |
| 45 | int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); | 45 | int amdgpu_amdkfd_resume(struct amdgpu_device *adev); |
| 46 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, | 46 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, |
| 47 | const void *ih_ring_entry); | 47 | const void *ih_ring_entry); |
| 48 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); | 48 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); |
| 49 | void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); | 49 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); |
| 50 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); | 50 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); |
| 51 | 51 | ||
| 52 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); | 52 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); |
| 53 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); | 53 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc..5254562fd0f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include "cikd.h" | 29 | #include "cikd.h" |
| 30 | #include "cik_sdma.h" | 30 | #include "cik_sdma.h" |
| 31 | #include "amdgpu_ucode.h" | 31 | #include "amdgpu_ucode.h" |
| 32 | #include "gfx_v7_0.h" | ||
| 32 | #include "gca/gfx_7_2_d.h" | 33 | #include "gca/gfx_7_2_d.h" |
| 33 | #include "gca/gfx_7_2_enum.h" | 34 | #include "gca/gfx_7_2_enum.h" |
| 34 | #include "gca/gfx_7_2_sh_mask.h" | 35 | #include "gca/gfx_7_2_sh_mask.h" |
| @@ -38,8 +39,6 @@ | |||
| 38 | #include "gmc/gmc_7_1_sh_mask.h" | 39 | #include "gmc/gmc_7_1_sh_mask.h" |
| 39 | #include "cik_structs.h" | 40 | #include "cik_structs.h" |
| 40 | 41 | ||
| 41 | #define CIK_PIPE_PER_MEC (4) | ||
| 42 | |||
| 43 | enum { | 42 | enum { |
| 44 | MAX_TRAPID = 8, /* 3 bits in the bitfield. */ | 43 | MAX_TRAPID = 8, /* 3 bits in the bitfield. */ |
| 45 | MAX_WATCH_ADDRESSES = 4 | 44 | MAX_WATCH_ADDRESSES = 4 |
| @@ -185,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd) | |||
| 185 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | 184 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, |
| 186 | uint32_t queue_id) | 185 | uint32_t queue_id) |
| 187 | { | 186 | { |
| 188 | uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; | 187 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 189 | uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); | 188 | |
| 189 | uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | ||
| 190 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | ||
| 190 | 191 | ||
| 191 | lock_srbm(kgd, mec, pipe, queue_id, 0); | 192 | lock_srbm(kgd, mec, pipe, queue_id, 0); |
| 192 | } | 193 | } |
| @@ -243,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |||
| 243 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | 244 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, |
| 244 | uint32_t hpd_size, uint64_t hpd_gpu_addr) | 245 | uint32_t hpd_size, uint64_t hpd_gpu_addr) |
| 245 | { | 246 | { |
| 246 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 247 | /* amdgpu owns the per-pipe state */ |
| 247 | |||
| 248 | uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; | ||
| 249 | uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); | ||
| 250 | |||
| 251 | lock_srbm(kgd, mec, pipe, 0, 0); | ||
| 252 | WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); | ||
| 253 | WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); | ||
| 254 | WREG32(mmCP_HPD_EOP_VMID, 0); | ||
| 255 | WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); | ||
| 256 | unlock_srbm(kgd); | ||
| 257 | |||
| 258 | return 0; | 248 | return 0; |
| 259 | } | 249 | } |
| 260 | 250 | ||
| @@ -264,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | |||
| 264 | uint32_t mec; | 254 | uint32_t mec; |
| 265 | uint32_t pipe; | 255 | uint32_t pipe; |
| 266 | 256 | ||
| 267 | mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; | 257 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
| 268 | pipe = (pipe_id % CIK_PIPE_PER_MEC); | 258 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
| 269 | 259 | ||
| 270 | lock_srbm(kgd, mec, pipe, 0, 0); | 260 | lock_srbm(kgd, mec, pipe, 0, 0); |
| 271 | 261 | ||
| @@ -309,55 +299,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |||
| 309 | m = get_mqd(mqd); | 299 | m = get_mqd(mqd); |
| 310 | 300 | ||
| 311 | is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); | 301 | is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); |
| 312 | |||
| 313 | acquire_queue(kgd, pipe_id, queue_id); | ||
| 314 | WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); | ||
| 315 | WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); | ||
| 316 | WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); | ||
| 317 | |||
| 318 | WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); | ||
| 319 | WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); | ||
| 320 | WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); | ||
| 321 | |||
| 322 | WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); | ||
| 323 | WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); | ||
| 324 | WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); | ||
| 325 | |||
| 326 | WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); | ||
| 327 | |||
| 328 | WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); | ||
| 329 | WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); | ||
| 330 | WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); | ||
| 331 | |||
| 332 | WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); | ||
| 333 | WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); | ||
| 334 | WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); | ||
| 335 | WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); | ||
| 336 | |||
| 337 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); | ||
| 338 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
| 339 | m->cp_hqd_pq_rptr_report_addr_hi); | ||
| 340 | |||
| 341 | WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); | ||
| 342 | |||
| 343 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); | ||
| 344 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); | ||
| 345 | |||
| 346 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); | ||
| 347 | |||
| 348 | WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); | ||
| 349 | |||
| 350 | WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); | ||
| 351 | |||
| 352 | WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); | ||
| 353 | WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); | ||
| 354 | |||
| 355 | WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); | ||
| 356 | |||
| 357 | if (is_wptr_shadow_valid) | 302 | if (is_wptr_shadow_valid) |
| 358 | WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); | 303 | m->cp_hqd_pq_wptr = wptr_shadow; |
| 359 | 304 | ||
| 360 | WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); | 305 | acquire_queue(kgd, pipe_id, queue_id); |
| 306 | gfx_v7_0_mqd_commit(adev, m); | ||
| 361 | release_queue(kgd); | 307 | release_queue(kgd); |
| 362 | 308 | ||
| 363 | return 0; | 309 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6697612239c2..133d06671e46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
| 29 | #include "amdgpu_amdkfd.h" | 29 | #include "amdgpu_amdkfd.h" |
| 30 | #include "amdgpu_ucode.h" | 30 | #include "amdgpu_ucode.h" |
| 31 | #include "gfx_v8_0.h" | ||
| 31 | #include "gca/gfx_8_0_sh_mask.h" | 32 | #include "gca/gfx_8_0_sh_mask.h" |
| 32 | #include "gca/gfx_8_0_d.h" | 33 | #include "gca/gfx_8_0_d.h" |
| 33 | #include "gca/gfx_8_0_enum.h" | 34 | #include "gca/gfx_8_0_enum.h" |
| @@ -38,8 +39,6 @@ | |||
| 38 | #include "vi_structs.h" | 39 | #include "vi_structs.h" |
| 39 | #include "vid.h" | 40 | #include "vid.h" |
| 40 | 41 | ||
| 41 | #define VI_PIPE_PER_MEC (4) | ||
| 42 | |||
| 43 | struct cik_sdma_rlc_registers; | 42 | struct cik_sdma_rlc_registers; |
| 44 | 43 | ||
| 45 | /* | 44 | /* |
| @@ -146,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd) | |||
| 146 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | 145 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, |
| 147 | uint32_t queue_id) | 146 | uint32_t queue_id) |
| 148 | { | 147 | { |
| 149 | uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; | 148 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 150 | uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); | 149 | |
| 150 | uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | ||
| 151 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | ||
| 151 | 152 | ||
| 152 | lock_srbm(kgd, mec, pipe, queue_id, 0); | 153 | lock_srbm(kgd, mec, pipe, queue_id, 0); |
| 153 | } | 154 | } |
| @@ -205,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |||
| 205 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | 206 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, |
| 206 | uint32_t hpd_size, uint64_t hpd_gpu_addr) | 207 | uint32_t hpd_size, uint64_t hpd_gpu_addr) |
| 207 | { | 208 | { |
| 209 | /* amdgpu owns the per-pipe state */ | ||
| 208 | return 0; | 210 | return 0; |
| 209 | } | 211 | } |
| 210 | 212 | ||
| @@ -214,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | |||
| 214 | uint32_t mec; | 216 | uint32_t mec; |
| 215 | uint32_t pipe; | 217 | uint32_t pipe; |
| 216 | 218 | ||
| 217 | mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; | 219 | mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
| 218 | pipe = (pipe_id % VI_PIPE_PER_MEC); | 220 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
| 219 | 221 | ||
| 220 | lock_srbm(kgd, mec, pipe, 0, 0); | 222 | lock_srbm(kgd, mec, pipe, 0, 0); |
| 221 | 223 | ||
| @@ -251,53 +253,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |||
| 251 | m = get_mqd(mqd); | 253 | m = get_mqd(mqd); |
| 252 | 254 | ||
| 253 | valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); | 255 | valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); |
| 254 | acquire_queue(kgd, pipe_id, queue_id); | 256 | if (valid_wptr == 0) |
| 255 | 257 | m->cp_hqd_pq_wptr = shadow_wptr; | |
| 256 | WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); | ||
| 257 | WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); | ||
| 258 | WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); | ||
| 259 | |||
| 260 | WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); | ||
| 261 | WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); | ||
| 262 | WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); | ||
| 263 | WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); | ||
| 264 | WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); | ||
| 265 | WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); | ||
| 266 | WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); | ||
| 267 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); | ||
| 268 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
| 269 | m->cp_hqd_pq_rptr_report_addr_hi); | ||
| 270 | |||
| 271 | if (valid_wptr > 0) | ||
| 272 | WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); | ||
| 273 | |||
| 274 | WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); | ||
| 275 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); | ||
| 276 | |||
| 277 | WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); | ||
| 278 | WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); | ||
| 279 | WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); | ||
| 280 | WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); | ||
| 281 | WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); | ||
| 282 | WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); | ||
| 283 | |||
| 284 | WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); | ||
| 285 | WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); | ||
| 286 | WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); | ||
| 287 | WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); | ||
| 288 | WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); | ||
| 289 | WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); | ||
| 290 | WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); | ||
| 291 | |||
| 292 | WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); | ||
| 293 | |||
| 294 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); | ||
| 295 | WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); | ||
| 296 | WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); | ||
| 297 | WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); | ||
| 298 | |||
| 299 | WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); | ||
| 300 | 258 | ||
| 259 | acquire_queue(kgd, pipe_id, queue_id); | ||
| 260 | gfx_v8_0_mqd_commit(adev, mqd); | ||
| 301 | release_queue(kgd); | 261 | release_queue(kgd); |
| 302 | 262 | ||
| 303 | return 0; | 263 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1cf78f4dd339..1e8e1123ddf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | |||
| @@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) | |||
| 693 | DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n", | 693 | DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n", |
| 694 | adev->clock.default_dispclk / 100); | 694 | adev->clock.default_dispclk / 100); |
| 695 | adev->clock.default_dispclk = 60000; | 695 | adev->clock.default_dispclk = 60000; |
| 696 | } else if (adev->clock.default_dispclk <= 60000) { | ||
| 697 | DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n", | ||
| 698 | adev->clock.default_dispclk / 100); | ||
| 699 | adev->clock.default_dispclk = 62500; | ||
| 696 | } | 700 | } |
| 697 | adev->clock.dp_extclk = | 701 | adev->clock.dp_extclk = |
| 698 | le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); | 702 | le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index a6649874e6ce..f621ee115c98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | |||
| @@ -35,33 +35,59 @@ | |||
| 35 | #define AMDGPU_BO_LIST_MAX_PRIORITY 32u | 35 | #define AMDGPU_BO_LIST_MAX_PRIORITY 32u |
| 36 | #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) | 36 | #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) |
| 37 | 37 | ||
| 38 | static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, | 38 | static int amdgpu_bo_list_set(struct amdgpu_device *adev, |
| 39 | struct amdgpu_bo_list **result, | 39 | struct drm_file *filp, |
| 40 | struct amdgpu_bo_list *list, | ||
| 41 | struct drm_amdgpu_bo_list_entry *info, | ||
| 42 | unsigned num_entries); | ||
| 43 | |||
| 44 | static void amdgpu_bo_list_release_rcu(struct kref *ref) | ||
| 45 | { | ||
| 46 | unsigned i; | ||
| 47 | struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, | ||
| 48 | refcount); | ||
| 49 | |||
| 50 | for (i = 0; i < list->num_entries; ++i) | ||
| 51 | amdgpu_bo_unref(&list->array[i].robj); | ||
| 52 | |||
| 53 | mutex_destroy(&list->lock); | ||
| 54 | kvfree(list->array); | ||
| 55 | kfree_rcu(list, rhead); | ||
| 56 | } | ||
| 57 | |||
| 58 | static int amdgpu_bo_list_create(struct amdgpu_device *adev, | ||
| 59 | struct drm_file *filp, | ||
| 60 | struct drm_amdgpu_bo_list_entry *info, | ||
| 61 | unsigned num_entries, | ||
| 40 | int *id) | 62 | int *id) |
| 41 | { | 63 | { |
| 42 | int r; | 64 | int r; |
| 65 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
| 66 | struct amdgpu_bo_list *list; | ||
| 43 | 67 | ||
| 44 | *result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); | 68 | list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); |
| 45 | if (!*result) | 69 | if (!list) |
| 46 | return -ENOMEM; | 70 | return -ENOMEM; |
| 47 | 71 | ||
| 72 | /* initialize bo list*/ | ||
| 73 | mutex_init(&list->lock); | ||
| 74 | kref_init(&list->refcount); | ||
| 75 | r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); | ||
| 76 | if (r) { | ||
| 77 | kfree(list); | ||
| 78 | return r; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* idr alloc should be called only after initialization of bo list. */ | ||
| 48 | mutex_lock(&fpriv->bo_list_lock); | 82 | mutex_lock(&fpriv->bo_list_lock); |
| 49 | r = idr_alloc(&fpriv->bo_list_handles, *result, | 83 | r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); |
| 50 | 1, 0, GFP_KERNEL); | 84 | mutex_unlock(&fpriv->bo_list_lock); |
| 51 | if (r < 0) { | 85 | if (r < 0) { |
| 52 | mutex_unlock(&fpriv->bo_list_lock); | 86 | kfree(list); |
| 53 | kfree(*result); | ||
| 54 | return r; | 87 | return r; |
| 55 | } | 88 | } |
| 56 | *id = r; | 89 | *id = r; |
| 57 | 90 | ||
| 58 | mutex_init(&(*result)->lock); | ||
| 59 | (*result)->num_entries = 0; | ||
| 60 | (*result)->array = NULL; | ||
| 61 | |||
| 62 | mutex_lock(&(*result)->lock); | ||
| 63 | mutex_unlock(&fpriv->bo_list_lock); | ||
| 64 | |||
| 65 | return 0; | 91 | return 0; |
| 66 | } | 92 | } |
| 67 | 93 | ||
| @@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) | |||
| 71 | 97 | ||
| 72 | mutex_lock(&fpriv->bo_list_lock); | 98 | mutex_lock(&fpriv->bo_list_lock); |
| 73 | list = idr_remove(&fpriv->bo_list_handles, id); | 99 | list = idr_remove(&fpriv->bo_list_handles, id); |
| 74 | if (list) { | ||
| 75 | /* Another user may have a reference to this list still */ | ||
| 76 | mutex_lock(&list->lock); | ||
| 77 | mutex_unlock(&list->lock); | ||
| 78 | amdgpu_bo_list_free(list); | ||
| 79 | } | ||
| 80 | mutex_unlock(&fpriv->bo_list_lock); | 100 | mutex_unlock(&fpriv->bo_list_lock); |
| 101 | if (list) | ||
| 102 | kref_put(&list->refcount, amdgpu_bo_list_release_rcu); | ||
| 81 | } | 103 | } |
| 82 | 104 | ||
| 83 | static int amdgpu_bo_list_set(struct amdgpu_device *adev, | 105 | static int amdgpu_bo_list_set(struct amdgpu_device *adev, |
| @@ -96,7 +118,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, | |||
| 96 | int r; | 118 | int r; |
| 97 | unsigned long total_size = 0; | 119 | unsigned long total_size = 0; |
| 98 | 120 | ||
| 99 | array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); | 121 | array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL); |
| 100 | if (!array) | 122 | if (!array) |
| 101 | return -ENOMEM; | 123 | return -ENOMEM; |
| 102 | memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); | 124 | memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); |
| @@ -148,7 +170,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, | |||
| 148 | for (i = 0; i < list->num_entries; ++i) | 170 | for (i = 0; i < list->num_entries; ++i) |
| 149 | amdgpu_bo_unref(&list->array[i].robj); | 171 | amdgpu_bo_unref(&list->array[i].robj); |
| 150 | 172 | ||
| 151 | drm_free_large(list->array); | 173 | kvfree(list->array); |
| 152 | 174 | ||
| 153 | list->gds_obj = gds_obj; | 175 | list->gds_obj = gds_obj; |
| 154 | list->gws_obj = gws_obj; | 176 | list->gws_obj = gws_obj; |
| @@ -163,7 +185,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, | |||
| 163 | error_free: | 185 | error_free: |
| 164 | while (i--) | 186 | while (i--) |
| 165 | amdgpu_bo_unref(&array[i].robj); | 187 | amdgpu_bo_unref(&array[i].robj); |
| 166 | drm_free_large(array); | 188 | kvfree(array); |
| 167 | return r; | 189 | return r; |
| 168 | } | 190 | } |
| 169 | 191 | ||
| @@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) | |||
| 172 | { | 194 | { |
| 173 | struct amdgpu_bo_list *result; | 195 | struct amdgpu_bo_list *result; |
| 174 | 196 | ||
| 175 | mutex_lock(&fpriv->bo_list_lock); | 197 | rcu_read_lock(); |
| 176 | result = idr_find(&fpriv->bo_list_handles, id); | 198 | result = idr_find(&fpriv->bo_list_handles, id); |
| 177 | if (result) | 199 | |
| 178 | mutex_lock(&result->lock); | 200 | if (result) { |
| 179 | mutex_unlock(&fpriv->bo_list_lock); | 201 | if (kref_get_unless_zero(&result->refcount)) |
| 202 | mutex_lock(&result->lock); | ||
| 203 | else | ||
| 204 | result = NULL; | ||
| 205 | } | ||
| 206 | rcu_read_unlock(); | ||
| 207 | |||
| 180 | return result; | 208 | return result; |
| 181 | } | 209 | } |
| 182 | 210 | ||
| @@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, | |||
| 214 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list) | 242 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list) |
| 215 | { | 243 | { |
| 216 | mutex_unlock(&list->lock); | 244 | mutex_unlock(&list->lock); |
| 245 | kref_put(&list->refcount, amdgpu_bo_list_release_rcu); | ||
| 217 | } | 246 | } |
| 218 | 247 | ||
| 219 | void amdgpu_bo_list_free(struct amdgpu_bo_list *list) | 248 | void amdgpu_bo_list_free(struct amdgpu_bo_list *list) |
| @@ -224,7 +253,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list) | |||
| 224 | amdgpu_bo_unref(&list->array[i].robj); | 253 | amdgpu_bo_unref(&list->array[i].robj); |
| 225 | 254 | ||
| 226 | mutex_destroy(&list->lock); | 255 | mutex_destroy(&list->lock); |
| 227 | drm_free_large(list->array); | 256 | kvfree(list->array); |
| 228 | kfree(list); | 257 | kfree(list); |
| 229 | } | 258 | } |
| 230 | 259 | ||
| @@ -244,8 +273,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, | |||
| 244 | 273 | ||
| 245 | int r; | 274 | int r; |
| 246 | 275 | ||
| 247 | info = drm_malloc_ab(args->in.bo_number, | 276 | info = kvmalloc_array(args->in.bo_number, |
| 248 | sizeof(struct drm_amdgpu_bo_list_entry)); | 277 | sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); |
| 249 | if (!info) | 278 | if (!info) |
| 250 | return -ENOMEM; | 279 | return -ENOMEM; |
| 251 | 280 | ||
| @@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, | |||
| 273 | 302 | ||
| 274 | switch (args->in.operation) { | 303 | switch (args->in.operation) { |
| 275 | case AMDGPU_BO_LIST_OP_CREATE: | 304 | case AMDGPU_BO_LIST_OP_CREATE: |
| 276 | r = amdgpu_bo_list_create(fpriv, &list, &handle); | 305 | r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, |
| 306 | &handle); | ||
| 277 | if (r) | 307 | if (r) |
| 278 | goto error_free; | 308 | goto error_free; |
| 279 | |||
| 280 | r = amdgpu_bo_list_set(adev, filp, list, info, | ||
| 281 | args->in.bo_number); | ||
| 282 | amdgpu_bo_list_put(list); | ||
| 283 | if (r) | ||
| 284 | goto error_free; | ||
| 285 | |||
| 286 | break; | 309 | break; |
| 287 | 310 | ||
| 288 | case AMDGPU_BO_LIST_OP_DESTROY: | 311 | case AMDGPU_BO_LIST_OP_DESTROY: |
| @@ -311,11 +334,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, | |||
| 311 | 334 | ||
| 312 | memset(args, 0, sizeof(*args)); | 335 | memset(args, 0, sizeof(*args)); |
| 313 | args->out.list_handle = handle; | 336 | args->out.list_handle = handle; |
| 314 | drm_free_large(info); | 337 | kvfree(info); |
| 315 | 338 | ||
| 316 | return 0; | 339 | return 0; |
| 317 | 340 | ||
| 318 | error_free: | 341 | error_free: |
| 319 | drm_free_large(info); | 342 | kvfree(info); |
| 320 | return r; | 343 | return r; |
| 321 | } | 344 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c6dba1eaefbd..c0a806280257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | |||
| @@ -838,6 +838,12 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, | |||
| 838 | return -EINVAL; | 838 | return -EINVAL; |
| 839 | 839 | ||
| 840 | mode_info = info->mode_info; | 840 | mode_info = info->mode_info; |
| 841 | if (mode_info) { | ||
| 842 | /* if the displays are off, vblank time is max */ | ||
| 843 | mode_info->vblank_time_us = 0xffffffff; | ||
| 844 | /* always set the reference clock */ | ||
| 845 | mode_info->ref_clock = adev->clock.spll.reference_freq; | ||
| 846 | } | ||
| 841 | 847 | ||
| 842 | if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { | 848 | if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { |
| 843 | list_for_each_entry(crtc, | 849 | list_for_each_entry(crtc, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4e6b9501ab0a..5599c01b265d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
| @@ -27,81 +27,10 @@ | |||
| 27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
| 28 | #include <drm/drmP.h> | 28 | #include <drm/drmP.h> |
| 29 | #include <drm/amdgpu_drm.h> | 29 | #include <drm/amdgpu_drm.h> |
| 30 | #include <drm/drm_syncobj.h> | ||
| 30 | #include "amdgpu.h" | 31 | #include "amdgpu.h" |
| 31 | #include "amdgpu_trace.h" | 32 | #include "amdgpu_trace.h" |
| 32 | 33 | ||
| 33 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | ||
| 34 | u32 ip_instance, u32 ring, | ||
| 35 | struct amdgpu_ring **out_ring) | ||
| 36 | { | ||
| 37 | /* Right now all IPs have only one instance - multiple rings. */ | ||
| 38 | if (ip_instance != 0) { | ||
| 39 | DRM_ERROR("invalid ip instance: %d\n", ip_instance); | ||
| 40 | return -EINVAL; | ||
| 41 | } | ||
| 42 | |||
| 43 | switch (ip_type) { | ||
| 44 | default: | ||
| 45 | DRM_ERROR("unknown ip type: %d\n", ip_type); | ||
| 46 | return -EINVAL; | ||
| 47 | case AMDGPU_HW_IP_GFX: | ||
| 48 | if (ring < adev->gfx.num_gfx_rings) { | ||
| 49 | *out_ring = &adev->gfx.gfx_ring[ring]; | ||
| 50 | } else { | ||
| 51 | DRM_ERROR("only %d gfx rings are supported now\n", | ||
| 52 | adev->gfx.num_gfx_rings); | ||
| 53 | return -EINVAL; | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | case AMDGPU_HW_IP_COMPUTE: | ||
| 57 | if (ring < adev->gfx.num_compute_rings) { | ||
| 58 | *out_ring = &adev->gfx.compute_ring[ring]; | ||
| 59 | } else { | ||
| 60 | DRM_ERROR("only %d compute rings are supported now\n", | ||
| 61 | adev->gfx.num_compute_rings); | ||
| 62 | return -EINVAL; | ||
| 63 | } | ||
| 64 | break; | ||
| 65 | case AMDGPU_HW_IP_DMA: | ||
| 66 | if (ring < adev->sdma.num_instances) { | ||
| 67 | *out_ring = &adev->sdma.instance[ring].ring; | ||
| 68 | } else { | ||
| 69 | DRM_ERROR("only %d SDMA rings are supported\n", | ||
| 70 | adev->sdma.num_instances); | ||
| 71 | return -EINVAL; | ||
| 72 | } | ||
| 73 | break; | ||
| 74 | case AMDGPU_HW_IP_UVD: | ||
| 75 | *out_ring = &adev->uvd.ring; | ||
| 76 | break; | ||
| 77 | case AMDGPU_HW_IP_VCE: | ||
| 78 | if (ring < adev->vce.num_rings){ | ||
| 79 | *out_ring = &adev->vce.ring[ring]; | ||
| 80 | } else { | ||
| 81 | DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings); | ||
| 82 | return -EINVAL; | ||
| 83 | } | ||
| 84 | break; | ||
| 85 | case AMDGPU_HW_IP_UVD_ENC: | ||
| 86 | if (ring < adev->uvd.num_enc_rings){ | ||
| 87 | *out_ring = &adev->uvd.ring_enc[ring]; | ||
| 88 | } else { | ||
| 89 | DRM_ERROR("only %d UVD ENC rings are supported\n", | ||
| 90 | adev->uvd.num_enc_rings); | ||
| 91 | return -EINVAL; | ||
| 92 | } | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | |||
| 96 | if (!(*out_ring && (*out_ring)->adev)) { | ||
| 97 | DRM_ERROR("Ring %d is not initialized on IP %d\n", | ||
| 98 | ring, ip_type); | ||
| 99 | return -EINVAL; | ||
| 100 | } | ||
| 101 | |||
| 102 | return 0; | ||
| 103 | } | ||
| 104 | |||
| 105 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | 34 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, |
| 106 | struct drm_amdgpu_cs_chunk_fence *data, | 35 | struct drm_amdgpu_cs_chunk_fence *data, |
| 107 | uint32_t *offset) | 36 | uint32_t *offset) |
| @@ -135,7 +64,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | |||
| 135 | return 0; | 64 | return 0; |
| 136 | } | 65 | } |
| 137 | 66 | ||
| 138 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | 67 | static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) |
| 139 | { | 68 | { |
| 140 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 69 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
| 141 | struct amdgpu_vm *vm = &fpriv->vm; | 70 | struct amdgpu_vm *vm = &fpriv->vm; |
| @@ -194,7 +123,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
| 194 | size = p->chunks[i].length_dw; | 123 | size = p->chunks[i].length_dw; |
| 195 | cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; | 124 | cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; |
| 196 | 125 | ||
| 197 | p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); | 126 | p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); |
| 198 | if (p->chunks[i].kdata == NULL) { | 127 | if (p->chunks[i].kdata == NULL) { |
| 199 | ret = -ENOMEM; | 128 | ret = -ENOMEM; |
| 200 | i--; | 129 | i--; |
| @@ -226,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
| 226 | break; | 155 | break; |
| 227 | 156 | ||
| 228 | case AMDGPU_CHUNK_ID_DEPENDENCIES: | 157 | case AMDGPU_CHUNK_ID_DEPENDENCIES: |
| 158 | case AMDGPU_CHUNK_ID_SYNCOBJ_IN: | ||
| 159 | case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: | ||
| 229 | break; | 160 | break; |
| 230 | 161 | ||
| 231 | default: | 162 | default: |
| @@ -247,7 +178,7 @@ free_all_kdata: | |||
| 247 | i = p->nchunks - 1; | 178 | i = p->nchunks - 1; |
| 248 | free_partial_kdata: | 179 | free_partial_kdata: |
| 249 | for (; i >= 0; i--) | 180 | for (; i >= 0; i--) |
| 250 | drm_free_large(p->chunks[i].kdata); | 181 | kvfree(p->chunks[i].kdata); |
| 251 | kfree(p->chunks); | 182 | kfree(p->chunks); |
| 252 | p->chunks = NULL; | 183 | p->chunks = NULL; |
| 253 | p->nchunks = 0; | 184 | p->nchunks = 0; |
| @@ -505,7 +436,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, | |||
| 505 | return r; | 436 | return r; |
| 506 | 437 | ||
| 507 | if (binding_userptr) { | 438 | if (binding_userptr) { |
| 508 | drm_free_large(lobj->user_pages); | 439 | kvfree(lobj->user_pages); |
| 509 | lobj->user_pages = NULL; | 440 | lobj->user_pages = NULL; |
| 510 | } | 441 | } |
| 511 | } | 442 | } |
| @@ -566,12 +497,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
| 566 | &e->user_invalidated) && e->user_pages) { | 497 | &e->user_invalidated) && e->user_pages) { |
| 567 | 498 | ||
| 568 | /* We acquired a page array, but somebody | 499 | /* We acquired a page array, but somebody |
| 569 | * invalidated it. Free it an try again | 500 | * invalidated it. Free it and try again |
| 570 | */ | 501 | */ |
| 571 | release_pages(e->user_pages, | 502 | release_pages(e->user_pages, |
| 572 | e->robj->tbo.ttm->num_pages, | 503 | e->robj->tbo.ttm->num_pages, |
| 573 | false); | 504 | false); |
| 574 | drm_free_large(e->user_pages); | 505 | kvfree(e->user_pages); |
| 575 | e->user_pages = NULL; | 506 | e->user_pages = NULL; |
| 576 | } | 507 | } |
| 577 | 508 | ||
| @@ -597,12 +528,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
| 597 | goto error_free_pages; | 528 | goto error_free_pages; |
| 598 | } | 529 | } |
| 599 | 530 | ||
| 600 | /* Fill the page arrays for all useptrs. */ | 531 | /* Fill the page arrays for all userptrs. */ |
| 601 | list_for_each_entry(e, &need_pages, tv.head) { | 532 | list_for_each_entry(e, &need_pages, tv.head) { |
| 602 | struct ttm_tt *ttm = e->robj->tbo.ttm; | 533 | struct ttm_tt *ttm = e->robj->tbo.ttm; |
| 603 | 534 | ||
| 604 | e->user_pages = drm_calloc_large(ttm->num_pages, | 535 | e->user_pages = kvmalloc_array(ttm->num_pages, |
| 605 | sizeof(struct page*)); | 536 | sizeof(struct page*), |
| 537 | GFP_KERNEL | __GFP_ZERO); | ||
| 606 | if (!e->user_pages) { | 538 | if (!e->user_pages) { |
| 607 | r = -ENOMEM; | 539 | r = -ENOMEM; |
| 608 | DRM_ERROR("calloc failure in %s\n", __func__); | 540 | DRM_ERROR("calloc failure in %s\n", __func__); |
| @@ -612,7 +544,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
| 612 | r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); | 544 | r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); |
| 613 | if (r) { | 545 | if (r) { |
| 614 | DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); | 546 | DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); |
| 615 | drm_free_large(e->user_pages); | 547 | kvfree(e->user_pages); |
| 616 | e->user_pages = NULL; | 548 | e->user_pages = NULL; |
| 617 | goto error_free_pages; | 549 | goto error_free_pages; |
| 618 | } | 550 | } |
| @@ -708,7 +640,7 @@ error_free_pages: | |||
| 708 | release_pages(e->user_pages, | 640 | release_pages(e->user_pages, |
| 709 | e->robj->tbo.ttm->num_pages, | 641 | e->robj->tbo.ttm->num_pages, |
| 710 | false); | 642 | false); |
| 711 | drm_free_large(e->user_pages); | 643 | kvfree(e->user_pages); |
| 712 | } | 644 | } |
| 713 | } | 645 | } |
| 714 | 646 | ||
| @@ -753,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo | |||
| 753 | ttm_eu_backoff_reservation(&parser->ticket, | 685 | ttm_eu_backoff_reservation(&parser->ticket, |
| 754 | &parser->validated); | 686 | &parser->validated); |
| 755 | } | 687 | } |
| 688 | |||
| 689 | for (i = 0; i < parser->num_post_dep_syncobjs; i++) | ||
| 690 | drm_syncobj_put(parser->post_dep_syncobjs[i]); | ||
| 691 | kfree(parser->post_dep_syncobjs); | ||
| 692 | |||
| 756 | dma_fence_put(parser->fence); | 693 | dma_fence_put(parser->fence); |
| 757 | 694 | ||
| 758 | if (parser->ctx) | 695 | if (parser->ctx) |
| @@ -761,7 +698,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo | |||
| 761 | amdgpu_bo_list_put(parser->bo_list); | 698 | amdgpu_bo_list_put(parser->bo_list); |
| 762 | 699 | ||
| 763 | for (i = 0; i < parser->nchunks; i++) | 700 | for (i = 0; i < parser->nchunks; i++) |
| 764 | drm_free_large(parser->chunks[i].kdata); | 701 | kvfree(parser->chunks[i].kdata); |
| 765 | kfree(parser->chunks); | 702 | kfree(parser->chunks); |
| 766 | if (parser->job) | 703 | if (parser->job) |
| 767 | amdgpu_job_free(parser->job); | 704 | amdgpu_job_free(parser->job); |
| @@ -916,9 +853,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
| 916 | return -EINVAL; | 853 | return -EINVAL; |
| 917 | } | 854 | } |
| 918 | 855 | ||
| 919 | r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, | 856 | r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, |
| 920 | chunk_ib->ip_instance, chunk_ib->ring, | 857 | chunk_ib->ip_instance, chunk_ib->ring, &ring); |
| 921 | &ring); | ||
| 922 | if (r) | 858 | if (r) |
| 923 | return r; | 859 | return r; |
| 924 | 860 | ||
| @@ -995,62 +931,148 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
| 995 | return 0; | 931 | return 0; |
| 996 | } | 932 | } |
| 997 | 933 | ||
| 998 | static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | 934 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, |
| 999 | struct amdgpu_cs_parser *p) | 935 | struct amdgpu_cs_chunk *chunk) |
| 1000 | { | 936 | { |
| 1001 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 937 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
| 1002 | int i, j, r; | 938 | unsigned num_deps; |
| 1003 | 939 | int i, r; | |
| 1004 | for (i = 0; i < p->nchunks; ++i) { | 940 | struct drm_amdgpu_cs_chunk_dep *deps; |
| 1005 | struct drm_amdgpu_cs_chunk_dep *deps; | ||
| 1006 | struct amdgpu_cs_chunk *chunk; | ||
| 1007 | unsigned num_deps; | ||
| 1008 | 941 | ||
| 1009 | chunk = &p->chunks[i]; | 942 | deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; |
| 943 | num_deps = chunk->length_dw * 4 / | ||
| 944 | sizeof(struct drm_amdgpu_cs_chunk_dep); | ||
| 1010 | 945 | ||
| 1011 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) | 946 | for (i = 0; i < num_deps; ++i) { |
| 1012 | continue; | 947 | struct amdgpu_ring *ring; |
| 948 | struct amdgpu_ctx *ctx; | ||
| 949 | struct dma_fence *fence; | ||
| 1013 | 950 | ||
| 1014 | deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; | 951 | ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); |
| 1015 | num_deps = chunk->length_dw * 4 / | 952 | if (ctx == NULL) |
| 1016 | sizeof(struct drm_amdgpu_cs_chunk_dep); | 953 | return -EINVAL; |
| 1017 | 954 | ||
| 1018 | for (j = 0; j < num_deps; ++j) { | 955 | r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, |
| 1019 | struct amdgpu_ring *ring; | 956 | deps[i].ip_type, |
| 1020 | struct amdgpu_ctx *ctx; | 957 | deps[i].ip_instance, |
| 1021 | struct dma_fence *fence; | 958 | deps[i].ring, &ring); |
| 959 | if (r) { | ||
| 960 | amdgpu_ctx_put(ctx); | ||
| 961 | return r; | ||
| 962 | } | ||
| 1022 | 963 | ||
| 1023 | r = amdgpu_cs_get_ring(adev, deps[j].ip_type, | 964 | fence = amdgpu_ctx_get_fence(ctx, ring, |
| 1024 | deps[j].ip_instance, | 965 | deps[i].handle); |
| 1025 | deps[j].ring, &ring); | 966 | if (IS_ERR(fence)) { |
| 967 | r = PTR_ERR(fence); | ||
| 968 | amdgpu_ctx_put(ctx); | ||
| 969 | return r; | ||
| 970 | } else if (fence) { | ||
| 971 | r = amdgpu_sync_fence(p->adev, &p->job->sync, | ||
| 972 | fence); | ||
| 973 | dma_fence_put(fence); | ||
| 974 | amdgpu_ctx_put(ctx); | ||
| 1026 | if (r) | 975 | if (r) |
| 1027 | return r; | 976 | return r; |
| 977 | } | ||
| 978 | } | ||
| 979 | return 0; | ||
| 980 | } | ||
| 1028 | 981 | ||
| 1029 | ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); | 982 | static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, |
| 1030 | if (ctx == NULL) | 983 | uint32_t handle) |
| 1031 | return -EINVAL; | 984 | { |
| 985 | int r; | ||
| 986 | struct dma_fence *fence; | ||
| 987 | r = drm_syncobj_fence_get(p->filp, handle, &fence); | ||
| 988 | if (r) | ||
| 989 | return r; | ||
| 1032 | 990 | ||
| 1033 | fence = amdgpu_ctx_get_fence(ctx, ring, | 991 | r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); |
| 1034 | deps[j].handle); | 992 | dma_fence_put(fence); |
| 1035 | if (IS_ERR(fence)) { | ||
| 1036 | r = PTR_ERR(fence); | ||
| 1037 | amdgpu_ctx_put(ctx); | ||
| 1038 | return r; | ||
| 1039 | 993 | ||
| 1040 | } else if (fence) { | 994 | return r; |
| 1041 | r = amdgpu_sync_fence(adev, &p->job->sync, | 995 | } |
| 1042 | fence); | 996 | |
| 1043 | dma_fence_put(fence); | 997 | static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, |
| 1044 | amdgpu_ctx_put(ctx); | 998 | struct amdgpu_cs_chunk *chunk) |
| 1045 | if (r) | 999 | { |
| 1046 | return r; | 1000 | unsigned num_deps; |
| 1047 | } | 1001 | int i, r; |
| 1002 | struct drm_amdgpu_cs_chunk_sem *deps; | ||
| 1003 | |||
| 1004 | deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; | ||
| 1005 | num_deps = chunk->length_dw * 4 / | ||
| 1006 | sizeof(struct drm_amdgpu_cs_chunk_sem); | ||
| 1007 | |||
| 1008 | for (i = 0; i < num_deps; ++i) { | ||
| 1009 | r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); | ||
| 1010 | if (r) | ||
| 1011 | return r; | ||
| 1012 | } | ||
| 1013 | return 0; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, | ||
| 1017 | struct amdgpu_cs_chunk *chunk) | ||
| 1018 | { | ||
| 1019 | unsigned num_deps; | ||
| 1020 | int i; | ||
| 1021 | struct drm_amdgpu_cs_chunk_sem *deps; | ||
| 1022 | deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; | ||
| 1023 | num_deps = chunk->length_dw * 4 / | ||
| 1024 | sizeof(struct drm_amdgpu_cs_chunk_sem); | ||
| 1025 | |||
| 1026 | p->post_dep_syncobjs = kmalloc_array(num_deps, | ||
| 1027 | sizeof(struct drm_syncobj *), | ||
| 1028 | GFP_KERNEL); | ||
| 1029 | p->num_post_dep_syncobjs = 0; | ||
| 1030 | |||
| 1031 | for (i = 0; i < num_deps; ++i) { | ||
| 1032 | p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); | ||
| 1033 | if (!p->post_dep_syncobjs[i]) | ||
| 1034 | return -EINVAL; | ||
| 1035 | p->num_post_dep_syncobjs++; | ||
| 1036 | } | ||
| 1037 | return 0; | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | ||
| 1041 | struct amdgpu_cs_parser *p) | ||
| 1042 | { | ||
| 1043 | int i, r; | ||
| 1044 | |||
| 1045 | for (i = 0; i < p->nchunks; ++i) { | ||
| 1046 | struct amdgpu_cs_chunk *chunk; | ||
| 1047 | |||
| 1048 | chunk = &p->chunks[i]; | ||
| 1049 | |||
| 1050 | if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { | ||
| 1051 | r = amdgpu_cs_process_fence_dep(p, chunk); | ||
| 1052 | if (r) | ||
| 1053 | return r; | ||
| 1054 | } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { | ||
| 1055 | r = amdgpu_cs_process_syncobj_in_dep(p, chunk); | ||
| 1056 | if (r) | ||
| 1057 | return r; | ||
| 1058 | } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { | ||
| 1059 | r = amdgpu_cs_process_syncobj_out_dep(p, chunk); | ||
| 1060 | if (r) | ||
| 1061 | return r; | ||
| 1048 | } | 1062 | } |
| 1049 | } | 1063 | } |
| 1050 | 1064 | ||
| 1051 | return 0; | 1065 | return 0; |
| 1052 | } | 1066 | } |
| 1053 | 1067 | ||
| 1068 | static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) | ||
| 1069 | { | ||
| 1070 | int i; | ||
| 1071 | |||
| 1072 | for (i = 0; i < p->num_post_dep_syncobjs; ++i) | ||
| 1073 | drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); | ||
| 1074 | } | ||
| 1075 | |||
| 1054 | static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | 1076 | static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, |
| 1055 | union drm_amdgpu_cs *cs) | 1077 | union drm_amdgpu_cs *cs) |
| 1056 | { | 1078 | { |
| @@ -1071,6 +1093,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
| 1071 | job->owner = p->filp; | 1093 | job->owner = p->filp; |
| 1072 | job->fence_ctx = entity->fence_context; | 1094 | job->fence_ctx = entity->fence_context; |
| 1073 | p->fence = dma_fence_get(&job->base.s_fence->finished); | 1095 | p->fence = dma_fence_get(&job->base.s_fence->finished); |
| 1096 | |||
| 1097 | amdgpu_cs_post_dependencies(p); | ||
| 1098 | |||
| 1074 | cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); | 1099 | cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); |
| 1075 | job->uf_sequence = cs->out.handle; | 1100 | job->uf_sequence = cs->out.handle; |
| 1076 | amdgpu_job_free_resources(job); | 1101 | amdgpu_job_free_resources(job); |
| @@ -1078,13 +1103,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
| 1078 | 1103 | ||
| 1079 | trace_amdgpu_cs_ioctl(job); | 1104 | trace_amdgpu_cs_ioctl(job); |
| 1080 | amd_sched_entity_push_job(&job->base); | 1105 | amd_sched_entity_push_job(&job->base); |
| 1081 | |||
| 1082 | return 0; | 1106 | return 0; |
| 1083 | } | 1107 | } |
| 1084 | 1108 | ||
| 1085 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 1109 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) |
| 1086 | { | 1110 | { |
| 1087 | struct amdgpu_device *adev = dev->dev_private; | 1111 | struct amdgpu_device *adev = dev->dev_private; |
| 1112 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
| 1088 | union drm_amdgpu_cs *cs = data; | 1113 | union drm_amdgpu_cs *cs = data; |
| 1089 | struct amdgpu_cs_parser parser = {}; | 1114 | struct amdgpu_cs_parser parser = {}; |
| 1090 | bool reserved_buffers = false; | 1115 | bool reserved_buffers = false; |
| @@ -1092,6 +1117,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
| 1092 | 1117 | ||
| 1093 | if (!adev->accel_working) | 1118 | if (!adev->accel_working) |
| 1094 | return -EBUSY; | 1119 | return -EBUSY; |
| 1120 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
| 1121 | return -ENODEV; | ||
| 1095 | 1122 | ||
| 1096 | parser.adev = adev; | 1123 | parser.adev = adev; |
| 1097 | parser.filp = filp; | 1124 | parser.filp = filp; |
| @@ -1153,21 +1180,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
| 1153 | { | 1180 | { |
| 1154 | union drm_amdgpu_wait_cs *wait = data; | 1181 | union drm_amdgpu_wait_cs *wait = data; |
| 1155 | struct amdgpu_device *adev = dev->dev_private; | 1182 | struct amdgpu_device *adev = dev->dev_private; |
| 1183 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
| 1156 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); | 1184 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); |
| 1157 | struct amdgpu_ring *ring = NULL; | 1185 | struct amdgpu_ring *ring = NULL; |
| 1158 | struct amdgpu_ctx *ctx; | 1186 | struct amdgpu_ctx *ctx; |
| 1159 | struct dma_fence *fence; | 1187 | struct dma_fence *fence; |
| 1160 | long r; | 1188 | long r; |
| 1161 | 1189 | ||
| 1162 | r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, | 1190 | if (amdgpu_kms_vram_lost(adev, fpriv)) |
| 1163 | wait->in.ring, &ring); | 1191 | return -ENODEV; |
| 1164 | if (r) | ||
| 1165 | return r; | ||
| 1166 | 1192 | ||
| 1167 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); | 1193 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); |
| 1168 | if (ctx == NULL) | 1194 | if (ctx == NULL) |
| 1169 | return -EINVAL; | 1195 | return -EINVAL; |
| 1170 | 1196 | ||
| 1197 | r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, | ||
| 1198 | wait->in.ip_type, wait->in.ip_instance, | ||
| 1199 | wait->in.ring, &ring); | ||
| 1200 | if (r) { | ||
| 1201 | amdgpu_ctx_put(ctx); | ||
| 1202 | return r; | ||
| 1203 | } | ||
| 1204 | |||
| 1171 | fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); | 1205 | fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); |
| 1172 | if (IS_ERR(fence)) | 1206 | if (IS_ERR(fence)) |
| 1173 | r = PTR_ERR(fence); | 1207 | r = PTR_ERR(fence); |
| @@ -1203,15 +1237,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, | |||
| 1203 | struct dma_fence *fence; | 1237 | struct dma_fence *fence; |
| 1204 | int r; | 1238 | int r; |
| 1205 | 1239 | ||
| 1206 | r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance, | ||
| 1207 | user->ring, &ring); | ||
| 1208 | if (r) | ||
| 1209 | return ERR_PTR(r); | ||
| 1210 | |||
| 1211 | ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); | 1240 | ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); |
| 1212 | if (ctx == NULL) | 1241 | if (ctx == NULL) |
| 1213 | return ERR_PTR(-EINVAL); | 1242 | return ERR_PTR(-EINVAL); |
| 1214 | 1243 | ||
| 1244 | r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, | ||
| 1245 | user->ip_instance, user->ring, &ring); | ||
| 1246 | if (r) { | ||
| 1247 | amdgpu_ctx_put(ctx); | ||
| 1248 | return ERR_PTR(r); | ||
| 1249 | } | ||
| 1250 | |||
| 1215 | fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); | 1251 | fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); |
| 1216 | amdgpu_ctx_put(ctx); | 1252 | amdgpu_ctx_put(ctx); |
| 1217 | 1253 | ||
| @@ -1332,12 +1368,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, | |||
| 1332 | struct drm_file *filp) | 1368 | struct drm_file *filp) |
| 1333 | { | 1369 | { |
| 1334 | struct amdgpu_device *adev = dev->dev_private; | 1370 | struct amdgpu_device *adev = dev->dev_private; |
| 1371 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
| 1335 | union drm_amdgpu_wait_fences *wait = data; | 1372 | union drm_amdgpu_wait_fences *wait = data; |
| 1336 | uint32_t fence_count = wait->in.fence_count; | 1373 | uint32_t fence_count = wait->in.fence_count; |
| 1337 | struct drm_amdgpu_fence *fences_user; | 1374 | struct drm_amdgpu_fence *fences_user; |
| 1338 | struct drm_amdgpu_fence *fences; | 1375 | struct drm_amdgpu_fence *fences; |
| 1339 | int r; | 1376 | int r; |
| 1340 | 1377 | ||
| 1378 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
| 1379 | return -ENODEV; | ||
| 1341 | /* Get the fences from userspace */ | 1380 | /* Get the fences from userspace */ |
| 1342 | fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), | 1381 | fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), |
| 1343 | GFP_KERNEL); | 1382 | GFP_KERNEL); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 90d1ac8a80f8..a11e44340b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
| @@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) | |||
| 52 | struct amd_sched_rq *rq; | 52 | struct amd_sched_rq *rq; |
| 53 | 53 | ||
| 54 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | 54 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; |
| 55 | |||
| 56 | if (ring == &adev->gfx.kiq.ring) | ||
| 57 | continue; | ||
| 58 | |||
| 55 | r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, | 59 | r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, |
| 56 | rq, amdgpu_sched_jobs); | 60 | rq, amdgpu_sched_jobs); |
| 57 | if (r) | 61 | if (r) |
| 58 | goto failed; | 62 | goto failed; |
| 59 | } | 63 | } |
| 60 | 64 | ||
| 65 | r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr); | ||
| 66 | if (r) | ||
| 67 | goto failed; | ||
| 68 | |||
| 61 | return 0; | 69 | return 0; |
| 62 | 70 | ||
| 63 | failed: | 71 | failed: |
| @@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) | |||
| 86 | for (i = 0; i < adev->num_rings; i++) | 94 | for (i = 0; i < adev->num_rings; i++) |
| 87 | amd_sched_entity_fini(&adev->rings[i]->sched, | 95 | amd_sched_entity_fini(&adev->rings[i]->sched, |
| 88 | &ctx->rings[i].entity); | 96 | &ctx->rings[i].entity); |
| 97 | |||
| 98 | amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); | ||
| 89 | } | 99 | } |
| 90 | 100 | ||
| 91 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | 101 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43ca16b6eee2..4a8fc15467cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
| @@ -54,8 +54,14 @@ | |||
| 54 | #include <linux/pci.h> | 54 | #include <linux/pci.h> |
| 55 | #include <linux/firmware.h> | 55 | #include <linux/firmware.h> |
| 56 | 56 | ||
| 57 | MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); | ||
| 58 | MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); | ||
| 59 | |||
| 60 | #define AMDGPU_RESUME_MS 2000 | ||
| 61 | |||
| 57 | static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); | 62 | static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); |
| 58 | static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); | 63 | static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); |
| 64 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); | ||
| 59 | 65 | ||
| 60 | static const char *amdgpu_asic_name[] = { | 66 | static const char *amdgpu_asic_name[] = { |
| 61 | "TAHITI", | 67 | "TAHITI", |
| @@ -77,6 +83,7 @@ static const char *amdgpu_asic_name[] = { | |||
| 77 | "POLARIS11", | 83 | "POLARIS11", |
| 78 | "POLARIS12", | 84 | "POLARIS12", |
| 79 | "VEGA10", | 85 | "VEGA10", |
| 86 | "RAVEN", | ||
| 80 | "LAST", | 87 | "LAST", |
| 81 | }; | 88 | }; |
| 82 | 89 | ||
| @@ -478,9 +485,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, | |||
| 478 | 485 | ||
| 479 | /* | 486 | /* |
| 480 | * amdgpu_wb_*() | 487 | * amdgpu_wb_*() |
| 481 | * Writeback is the the method by which the the GPU updates special pages | 488 | * Writeback is the method by which the GPU updates special pages in memory |
| 482 | * in memory with the status of certain GPU events (fences, ring pointers, | 489 | * with the status of certain GPU events (fences, ring pointers,etc.). |
| 483 | * etc.). | ||
| 484 | */ | 490 | */ |
| 485 | 491 | ||
| 486 | /** | 492 | /** |
| @@ -506,7 +512,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) | |||
| 506 | * | 512 | * |
| 507 | * @adev: amdgpu_device pointer | 513 | * @adev: amdgpu_device pointer |
| 508 | * | 514 | * |
| 509 | * Disables Writeback and frees the Writeback memory (all asics). | 515 | * Initializes writeback and allocates writeback memory (all asics). |
| 510 | * Used at driver startup. | 516 | * Used at driver startup. |
| 511 | * Returns 0 on success or an -error on failure. | 517 | * Returns 0 on success or an -error on failure. |
| 512 | */ | 518 | */ |
| @@ -614,7 +620,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) | |||
| 614 | * @mc: memory controller structure holding memory informations | 620 | * @mc: memory controller structure holding memory informations |
| 615 | * @base: base address at which to put VRAM | 621 | * @base: base address at which to put VRAM |
| 616 | * | 622 | * |
| 617 | * Function will place try to place VRAM at base address provided | 623 | * Function will try to place VRAM at base address provided |
| 618 | * as parameter (which is so far either PCI aperture address or | 624 | * as parameter (which is so far either PCI aperture address or |
| 619 | * for IGP TOM base address). | 625 | * for IGP TOM base address). |
| 620 | * | 626 | * |
| @@ -636,7 +642,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) | |||
| 636 | * ones) | 642 | * ones) |
| 637 | * | 643 | * |
| 638 | * Note: IGP TOM addr should be the same as the aperture addr, we don't | 644 | * Note: IGP TOM addr should be the same as the aperture addr, we don't |
| 639 | * explicitly check for that thought. | 645 | * explicitly check for that though. |
| 640 | * | 646 | * |
| 641 | * FIXME: when reducing VRAM size align new size on power of 2. | 647 | * FIXME: when reducing VRAM size align new size on power of 2. |
| 642 | */ | 648 | */ |
| @@ -1067,6 +1073,10 @@ def_value: | |||
| 1067 | 1073 | ||
| 1068 | static void amdgpu_check_vm_size(struct amdgpu_device *adev) | 1074 | static void amdgpu_check_vm_size(struct amdgpu_device *adev) |
| 1069 | { | 1075 | { |
| 1076 | /* no need to check the default value */ | ||
| 1077 | if (amdgpu_vm_size == -1) | ||
| 1078 | return; | ||
| 1079 | |||
| 1070 | if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { | 1080 | if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { |
| 1071 | dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", | 1081 | dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", |
| 1072 | amdgpu_vm_size); | 1082 | amdgpu_vm_size); |
| @@ -1152,16 +1162,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero | |||
| 1152 | return; | 1162 | return; |
| 1153 | 1163 | ||
| 1154 | if (state == VGA_SWITCHEROO_ON) { | 1164 | if (state == VGA_SWITCHEROO_ON) { |
| 1155 | unsigned d3_delay = dev->pdev->d3_delay; | ||
| 1156 | |||
| 1157 | pr_info("amdgpu: switched on\n"); | 1165 | pr_info("amdgpu: switched on\n"); |
| 1158 | /* don't suspend or resume card normally */ | 1166 | /* don't suspend or resume card normally */ |
| 1159 | dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; | 1167 | dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; |
| 1160 | 1168 | ||
| 1161 | amdgpu_device_resume(dev, true, true); | 1169 | amdgpu_device_resume(dev, true, true); |
| 1162 | 1170 | ||
| 1163 | dev->pdev->d3_delay = d3_delay; | ||
| 1164 | |||
| 1165 | dev->switch_power_state = DRM_SWITCH_POWER_ON; | 1171 | dev->switch_power_state = DRM_SWITCH_POWER_ON; |
| 1166 | drm_kms_helper_poll_enable(dev); | 1172 | drm_kms_helper_poll_enable(dev); |
| 1167 | } else { | 1173 | } else { |
| @@ -1342,6 +1348,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev, | |||
| 1342 | if (!ip_block_version) | 1348 | if (!ip_block_version) |
| 1343 | return -EINVAL; | 1349 | return -EINVAL; |
| 1344 | 1350 | ||
| 1351 | DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, | ||
| 1352 | ip_block_version->funcs->name); | ||
| 1353 | |||
| 1345 | adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; | 1354 | adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; |
| 1346 | 1355 | ||
| 1347 | return 0; | 1356 | return 0; |
| @@ -1392,6 +1401,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) | |||
| 1392 | } | 1401 | } |
| 1393 | } | 1402 | } |
| 1394 | 1403 | ||
| 1404 | static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) | ||
| 1405 | { | ||
| 1406 | const char *chip_name; | ||
| 1407 | char fw_name[30]; | ||
| 1408 | int err; | ||
| 1409 | const struct gpu_info_firmware_header_v1_0 *hdr; | ||
| 1410 | |||
| 1411 | adev->firmware.gpu_info_fw = NULL; | ||
| 1412 | |||
| 1413 | switch (adev->asic_type) { | ||
| 1414 | case CHIP_TOPAZ: | ||
| 1415 | case CHIP_TONGA: | ||
| 1416 | case CHIP_FIJI: | ||
| 1417 | case CHIP_POLARIS11: | ||
| 1418 | case CHIP_POLARIS10: | ||
| 1419 | case CHIP_POLARIS12: | ||
| 1420 | case CHIP_CARRIZO: | ||
| 1421 | case CHIP_STONEY: | ||
| 1422 | #ifdef CONFIG_DRM_AMDGPU_SI | ||
| 1423 | case CHIP_VERDE: | ||
| 1424 | case CHIP_TAHITI: | ||
| 1425 | case CHIP_PITCAIRN: | ||
| 1426 | case CHIP_OLAND: | ||
| 1427 | case CHIP_HAINAN: | ||
| 1428 | #endif | ||
| 1429 | #ifdef CONFIG_DRM_AMDGPU_CIK | ||
| 1430 | case CHIP_BONAIRE: | ||
| 1431 | case CHIP_HAWAII: | ||
| 1432 | case CHIP_KAVERI: | ||
| 1433 | case CHIP_KABINI: | ||
| 1434 | case CHIP_MULLINS: | ||
| 1435 | #endif | ||
| 1436 | default: | ||
| 1437 | return 0; | ||
| 1438 | case CHIP_VEGA10: | ||
| 1439 | chip_name = "vega10"; | ||
| 1440 | break; | ||
| 1441 | case CHIP_RAVEN: | ||
| 1442 | chip_name = "raven"; | ||
| 1443 | break; | ||
| 1444 | } | ||
| 1445 | |||
| 1446 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); | ||
| 1447 | err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); | ||
| 1448 | if (err) { | ||
| 1449 | dev_err(adev->dev, | ||
| 1450 | "Failed to load gpu_info firmware \"%s\"\n", | ||
| 1451 | fw_name); | ||
| 1452 | goto out; | ||
| 1453 | } | ||
| 1454 | err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); | ||
| 1455 | if (err) { | ||
| 1456 | dev_err(adev->dev, | ||
| 1457 | "Failed to validate gpu_info firmware \"%s\"\n", | ||
| 1458 | fw_name); | ||
| 1459 | goto out; | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; | ||
| 1463 | amdgpu_ucode_print_gpu_info_hdr(&hdr->header); | ||
| 1464 | |||
| 1465 | switch (hdr->version_major) { | ||
| 1466 | case 1: | ||
| 1467 | { | ||
| 1468 | const struct gpu_info_firmware_v1_0 *gpu_info_fw = | ||
| 1469 | (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + | ||
| 1470 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
| 1471 | |||
| 1472 | adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); | ||
| 1473 | adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); | ||
| 1474 | adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); | ||
| 1475 | adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); | ||
| 1476 | adev->gfx.config.max_texture_channel_caches = | ||
| 1477 | le32_to_cpu(gpu_info_fw->gc_num_tccs); | ||
| 1478 | adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); | ||
| 1479 | adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); | ||
| 1480 | adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); | ||
| 1481 | adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); | ||
| 1482 | adev->gfx.config.double_offchip_lds_buf = | ||
| 1483 | le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); | ||
| 1484 | adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); | ||
| 1485 | adev->gfx.cu_info.max_waves_per_simd = | ||
| 1486 | le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); | ||
| 1487 | adev->gfx.cu_info.max_scratch_slots_per_cu = | ||
| 1488 | le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); | ||
| 1489 | adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); | ||
| 1490 | break; | ||
| 1491 | } | ||
| 1492 | default: | ||
| 1493 | dev_err(adev->dev, | ||
| 1494 | "Unsupported gpu_info table %d\n", hdr->header.ucode_version); | ||
| 1495 | err = -EINVAL; | ||
| 1496 | goto out; | ||
| 1497 | } | ||
| 1498 | out: | ||
| 1499 | return err; | ||
| 1500 | } | ||
| 1501 | |||
| 1395 | static int amdgpu_early_init(struct amdgpu_device *adev) | 1502 | static int amdgpu_early_init(struct amdgpu_device *adev) |
| 1396 | { | 1503 | { |
| 1397 | int i, r; | 1504 | int i, r; |
| @@ -1444,8 +1551,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev) | |||
| 1444 | return r; | 1551 | return r; |
| 1445 | break; | 1552 | break; |
| 1446 | #endif | 1553 | #endif |
| 1447 | case CHIP_VEGA10: | 1554 | case CHIP_VEGA10: |
| 1448 | adev->family = AMDGPU_FAMILY_AI; | 1555 | case CHIP_RAVEN: |
| 1556 | if (adev->asic_type == CHIP_RAVEN) | ||
| 1557 | adev->family = AMDGPU_FAMILY_RV; | ||
| 1558 | else | ||
| 1559 | adev->family = AMDGPU_FAMILY_AI; | ||
| 1449 | 1560 | ||
| 1450 | r = soc15_set_ip_blocks(adev); | 1561 | r = soc15_set_ip_blocks(adev); |
| 1451 | if (r) | 1562 | if (r) |
| @@ -1456,6 +1567,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev) | |||
| 1456 | return -EINVAL; | 1567 | return -EINVAL; |
| 1457 | } | 1568 | } |
| 1458 | 1569 | ||
| 1570 | r = amdgpu_device_parse_gpu_info_fw(adev); | ||
| 1571 | if (r) | ||
| 1572 | return r; | ||
| 1573 | |||
| 1459 | if (amdgpu_sriov_vf(adev)) { | 1574 | if (amdgpu_sriov_vf(adev)) { |
| 1460 | r = amdgpu_virt_request_full_gpu(adev, true); | 1575 | r = amdgpu_virt_request_full_gpu(adev, true); |
| 1461 | if (r) | 1576 | if (r) |
| @@ -1464,7 +1579,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) | |||
| 1464 | 1579 | ||
| 1465 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1580 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 1466 | if ((amdgpu_ip_block_mask & (1 << i)) == 0) { | 1581 | if ((amdgpu_ip_block_mask & (1 << i)) == 0) { |
| 1467 | DRM_ERROR("disabled ip block: %d\n", i); | 1582 | DRM_ERROR("disabled ip block: %d <%s>\n", |
| 1583 | i, adev->ip_blocks[i].version->funcs->name); | ||
| 1468 | adev->ip_blocks[i].status.valid = false; | 1584 | adev->ip_blocks[i].status.valid = false; |
| 1469 | } else { | 1585 | } else { |
| 1470 | if (adev->ip_blocks[i].version->funcs->early_init) { | 1586 | if (adev->ip_blocks[i].version->funcs->early_init) { |
| @@ -1552,22 +1668,24 @@ static int amdgpu_init(struct amdgpu_device *adev) | |||
| 1552 | return 0; | 1668 | return 0; |
| 1553 | } | 1669 | } |
| 1554 | 1670 | ||
| 1555 | static int amdgpu_late_init(struct amdgpu_device *adev) | 1671 | static void amdgpu_fill_reset_magic(struct amdgpu_device *adev) |
| 1672 | { | ||
| 1673 | memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | static bool amdgpu_check_vram_lost(struct amdgpu_device *adev) | ||
| 1677 | { | ||
| 1678 | return !!memcmp(adev->gart.ptr, adev->reset_magic, | ||
| 1679 | AMDGPU_RESET_MAGIC_NUM); | ||
| 1680 | } | ||
| 1681 | |||
| 1682 | static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) | ||
| 1556 | { | 1683 | { |
| 1557 | int i = 0, r; | 1684 | int i = 0, r; |
| 1558 | 1685 | ||
| 1559 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1686 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 1560 | if (!adev->ip_blocks[i].status.valid) | 1687 | if (!adev->ip_blocks[i].status.valid) |
| 1561 | continue; | 1688 | continue; |
| 1562 | if (adev->ip_blocks[i].version->funcs->late_init) { | ||
| 1563 | r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); | ||
| 1564 | if (r) { | ||
| 1565 | DRM_ERROR("late_init of IP block <%s> failed %d\n", | ||
| 1566 | adev->ip_blocks[i].version->funcs->name, r); | ||
| 1567 | return r; | ||
| 1568 | } | ||
| 1569 | adev->ip_blocks[i].status.late_initialized = true; | ||
| 1570 | } | ||
| 1571 | /* skip CG for VCE/UVD, it's handled specially */ | 1689 | /* skip CG for VCE/UVD, it's handled specially */ |
| 1572 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && | 1690 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
| 1573 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { | 1691 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { |
| @@ -1581,6 +1699,31 @@ static int amdgpu_late_init(struct amdgpu_device *adev) | |||
| 1581 | } | 1699 | } |
| 1582 | } | 1700 | } |
| 1583 | } | 1701 | } |
| 1702 | return 0; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | static int amdgpu_late_init(struct amdgpu_device *adev) | ||
| 1706 | { | ||
| 1707 | int i = 0, r; | ||
| 1708 | |||
| 1709 | for (i = 0; i < adev->num_ip_blocks; i++) { | ||
| 1710 | if (!adev->ip_blocks[i].status.valid) | ||
| 1711 | continue; | ||
| 1712 | if (adev->ip_blocks[i].version->funcs->late_init) { | ||
| 1713 | r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); | ||
| 1714 | if (r) { | ||
| 1715 | DRM_ERROR("late_init of IP block <%s> failed %d\n", | ||
| 1716 | adev->ip_blocks[i].version->funcs->name, r); | ||
| 1717 | return r; | ||
| 1718 | } | ||
| 1719 | adev->ip_blocks[i].status.late_initialized = true; | ||
| 1720 | } | ||
| 1721 | } | ||
| 1722 | |||
| 1723 | mod_delayed_work(system_wq, &adev->late_init_work, | ||
| 1724 | msecs_to_jiffies(AMDGPU_RESUME_MS)); | ||
| 1725 | |||
| 1726 | amdgpu_fill_reset_magic(adev); | ||
| 1584 | 1727 | ||
| 1585 | return 0; | 1728 | return 0; |
| 1586 | } | 1729 | } |
| @@ -1672,6 +1815,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) | |||
| 1672 | return 0; | 1815 | return 0; |
| 1673 | } | 1816 | } |
| 1674 | 1817 | ||
| 1818 | static void amdgpu_late_init_func_handler(struct work_struct *work) | ||
| 1819 | { | ||
| 1820 | struct amdgpu_device *adev = | ||
| 1821 | container_of(work, struct amdgpu_device, late_init_work.work); | ||
| 1822 | amdgpu_late_set_cg_state(adev); | ||
| 1823 | } | ||
| 1824 | |||
| 1675 | int amdgpu_suspend(struct amdgpu_device *adev) | 1825 | int amdgpu_suspend(struct amdgpu_device *adev) |
| 1676 | { | 1826 | { |
| 1677 | int i, r; | 1827 | int i, r; |
| @@ -1717,19 +1867,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) | |||
| 1717 | { | 1867 | { |
| 1718 | int i, r; | 1868 | int i, r; |
| 1719 | 1869 | ||
| 1720 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1870 | static enum amd_ip_block_type ip_order[] = { |
| 1721 | if (!adev->ip_blocks[i].status.valid) | 1871 | AMD_IP_BLOCK_TYPE_GMC, |
| 1722 | continue; | 1872 | AMD_IP_BLOCK_TYPE_COMMON, |
| 1873 | AMD_IP_BLOCK_TYPE_IH, | ||
| 1874 | }; | ||
| 1723 | 1875 | ||
| 1724 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || | 1876 | for (i = 0; i < ARRAY_SIZE(ip_order); i++) { |
| 1725 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || | 1877 | int j; |
| 1726 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) | 1878 | struct amdgpu_ip_block *block; |
| 1727 | r = adev->ip_blocks[i].version->funcs->hw_init(adev); | ||
| 1728 | 1879 | ||
| 1729 | if (r) { | 1880 | for (j = 0; j < adev->num_ip_blocks; j++) { |
| 1730 | DRM_ERROR("resume of IP block <%s> failed %d\n", | 1881 | block = &adev->ip_blocks[j]; |
| 1731 | adev->ip_blocks[i].version->funcs->name, r); | 1882 | |
| 1732 | return r; | 1883 | if (block->version->type != ip_order[i] || |
| 1884 | !block->status.valid) | ||
| 1885 | continue; | ||
| 1886 | |||
| 1887 | r = block->version->funcs->hw_init(adev); | ||
| 1888 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); | ||
| 1733 | } | 1889 | } |
| 1734 | } | 1890 | } |
| 1735 | 1891 | ||
| @@ -1740,33 +1896,67 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) | |||
| 1740 | { | 1896 | { |
| 1741 | int i, r; | 1897 | int i, r; |
| 1742 | 1898 | ||
| 1899 | static enum amd_ip_block_type ip_order[] = { | ||
| 1900 | AMD_IP_BLOCK_TYPE_SMC, | ||
| 1901 | AMD_IP_BLOCK_TYPE_DCE, | ||
| 1902 | AMD_IP_BLOCK_TYPE_GFX, | ||
| 1903 | AMD_IP_BLOCK_TYPE_SDMA, | ||
| 1904 | AMD_IP_BLOCK_TYPE_VCE, | ||
| 1905 | }; | ||
| 1906 | |||
| 1907 | for (i = 0; i < ARRAY_SIZE(ip_order); i++) { | ||
| 1908 | int j; | ||
| 1909 | struct amdgpu_ip_block *block; | ||
| 1910 | |||
| 1911 | for (j = 0; j < adev->num_ip_blocks; j++) { | ||
| 1912 | block = &adev->ip_blocks[j]; | ||
| 1913 | |||
| 1914 | if (block->version->type != ip_order[i] || | ||
| 1915 | !block->status.valid) | ||
| 1916 | continue; | ||
| 1917 | |||
| 1918 | r = block->version->funcs->hw_init(adev); | ||
| 1919 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); | ||
| 1920 | } | ||
| 1921 | } | ||
| 1922 | |||
| 1923 | return 0; | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | static int amdgpu_resume_phase1(struct amdgpu_device *adev) | ||
| 1927 | { | ||
| 1928 | int i, r; | ||
| 1929 | |||
| 1743 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1930 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 1744 | if (!adev->ip_blocks[i].status.valid) | 1931 | if (!adev->ip_blocks[i].status.valid) |
| 1745 | continue; | 1932 | continue; |
| 1746 | |||
| 1747 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || | 1933 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || |
| 1748 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || | 1934 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || |
| 1749 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) | 1935 | adev->ip_blocks[i].version->type == |
| 1750 | continue; | 1936 | AMD_IP_BLOCK_TYPE_IH) { |
| 1751 | 1937 | r = adev->ip_blocks[i].version->funcs->resume(adev); | |
| 1752 | r = adev->ip_blocks[i].version->funcs->hw_init(adev); | 1938 | if (r) { |
| 1753 | if (r) { | 1939 | DRM_ERROR("resume of IP block <%s> failed %d\n", |
| 1754 | DRM_ERROR("resume of IP block <%s> failed %d\n", | 1940 | adev->ip_blocks[i].version->funcs->name, r); |
| 1755 | adev->ip_blocks[i].version->funcs->name, r); | 1941 | return r; |
| 1756 | return r; | 1942 | } |
| 1757 | } | 1943 | } |
| 1758 | } | 1944 | } |
| 1759 | 1945 | ||
| 1760 | return 0; | 1946 | return 0; |
| 1761 | } | 1947 | } |
| 1762 | 1948 | ||
| 1763 | static int amdgpu_resume(struct amdgpu_device *adev) | 1949 | static int amdgpu_resume_phase2(struct amdgpu_device *adev) |
| 1764 | { | 1950 | { |
| 1765 | int i, r; | 1951 | int i, r; |
| 1766 | 1952 | ||
| 1767 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1953 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 1768 | if (!adev->ip_blocks[i].status.valid) | 1954 | if (!adev->ip_blocks[i].status.valid) |
| 1769 | continue; | 1955 | continue; |
| 1956 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || | ||
| 1957 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || | ||
| 1958 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) | ||
| 1959 | continue; | ||
| 1770 | r = adev->ip_blocks[i].version->funcs->resume(adev); | 1960 | r = adev->ip_blocks[i].version->funcs->resume(adev); |
| 1771 | if (r) { | 1961 | if (r) { |
| 1772 | DRM_ERROR("resume of IP block <%s> failed %d\n", | 1962 | DRM_ERROR("resume of IP block <%s> failed %d\n", |
| @@ -1778,6 +1968,18 @@ static int amdgpu_resume(struct amdgpu_device *adev) | |||
| 1778 | return 0; | 1968 | return 0; |
| 1779 | } | 1969 | } |
| 1780 | 1970 | ||
| 1971 | static int amdgpu_resume(struct amdgpu_device *adev) | ||
| 1972 | { | ||
| 1973 | int r; | ||
| 1974 | |||
| 1975 | r = amdgpu_resume_phase1(adev); | ||
| 1976 | if (r) | ||
| 1977 | return r; | ||
| 1978 | r = amdgpu_resume_phase2(adev); | ||
| 1979 | |||
| 1980 | return r; | ||
| 1981 | } | ||
| 1982 | |||
| 1781 | static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) | 1983 | static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) |
| 1782 | { | 1984 | { |
| 1783 | if (adev->is_atom_fw) { | 1985 | if (adev->is_atom_fw) { |
| @@ -1860,8 +2062,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
| 1860 | 2062 | ||
| 1861 | amdgpu_check_arguments(adev); | 2063 | amdgpu_check_arguments(adev); |
| 1862 | 2064 | ||
| 1863 | /* Registers mapping */ | ||
| 1864 | /* TODO: block userspace mapping of io register */ | ||
| 1865 | spin_lock_init(&adev->mmio_idx_lock); | 2065 | spin_lock_init(&adev->mmio_idx_lock); |
| 1866 | spin_lock_init(&adev->smc_idx_lock); | 2066 | spin_lock_init(&adev->smc_idx_lock); |
| 1867 | spin_lock_init(&adev->pcie_idx_lock); | 2067 | spin_lock_init(&adev->pcie_idx_lock); |
| @@ -1877,6 +2077,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
| 1877 | INIT_LIST_HEAD(&adev->gtt_list); | 2077 | INIT_LIST_HEAD(&adev->gtt_list); |
| 1878 | spin_lock_init(&adev->gtt_list_lock); | 2078 | spin_lock_init(&adev->gtt_list_lock); |
| 1879 | 2079 | ||
| 2080 | INIT_LIST_HEAD(&adev->ring_lru_list); | ||
| 2081 | spin_lock_init(&adev->ring_lru_list_lock); | ||
| 2082 | |||
| 2083 | INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); | ||
| 2084 | |||
| 2085 | /* Registers mapping */ | ||
| 2086 | /* TODO: block userspace mapping of io register */ | ||
| 1880 | if (adev->asic_type >= CHIP_BONAIRE) { | 2087 | if (adev->asic_type >= CHIP_BONAIRE) { |
| 1881 | adev->rmmio_base = pci_resource_start(adev->pdev, 5); | 2088 | adev->rmmio_base = pci_resource_start(adev->pdev, 5); |
| 1882 | adev->rmmio_size = pci_resource_len(adev->pdev, 5); | 2089 | adev->rmmio_size = pci_resource_len(adev->pdev, 5); |
| @@ -1989,6 +2196,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
| 1989 | 2196 | ||
| 1990 | adev->accel_working = true; | 2197 | adev->accel_working = true; |
| 1991 | 2198 | ||
| 2199 | amdgpu_vm_check_compute_bug(adev); | ||
| 2200 | |||
| 1992 | /* Initialize the buffer migration limit. */ | 2201 | /* Initialize the buffer migration limit. */ |
| 1993 | if (amdgpu_moverate >= 0) | 2202 | if (amdgpu_moverate >= 0) |
| 1994 | max_MBps = amdgpu_moverate; | 2203 | max_MBps = amdgpu_moverate; |
| @@ -2017,6 +2226,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
| 2017 | if (r) | 2226 | if (r) |
| 2018 | DRM_ERROR("registering register debugfs failed (%d).\n", r); | 2227 | DRM_ERROR("registering register debugfs failed (%d).\n", r); |
| 2019 | 2228 | ||
| 2229 | r = amdgpu_debugfs_test_ib_ring_init(adev); | ||
| 2230 | if (r) | ||
| 2231 | DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r); | ||
| 2232 | |||
| 2020 | r = amdgpu_debugfs_firmware_init(adev); | 2233 | r = amdgpu_debugfs_firmware_init(adev); |
| 2021 | if (r) | 2234 | if (r) |
| 2022 | DRM_ERROR("registering firmware debugfs failed (%d).\n", r); | 2235 | DRM_ERROR("registering firmware debugfs failed (%d).\n", r); |
| @@ -2073,7 +2286,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
| 2073 | amdgpu_fence_driver_fini(adev); | 2286 | amdgpu_fence_driver_fini(adev); |
| 2074 | amdgpu_fbdev_fini(adev); | 2287 | amdgpu_fbdev_fini(adev); |
| 2075 | r = amdgpu_fini(adev); | 2288 | r = amdgpu_fini(adev); |
| 2289 | if (adev->firmware.gpu_info_fw) { | ||
| 2290 | release_firmware(adev->firmware.gpu_info_fw); | ||
| 2291 | adev->firmware.gpu_info_fw = NULL; | ||
| 2292 | } | ||
| 2076 | adev->accel_working = false; | 2293 | adev->accel_working = false; |
| 2294 | cancel_delayed_work_sync(&adev->late_init_work); | ||
| 2077 | /* free i2c buses */ | 2295 | /* free i2c buses */ |
| 2078 | amdgpu_i2c_fini(adev); | 2296 | amdgpu_i2c_fini(adev); |
| 2079 | amdgpu_atombios_fini(adev); | 2297 | amdgpu_atombios_fini(adev); |
| @@ -2458,16 +2676,15 @@ err: | |||
| 2458 | * amdgpu_sriov_gpu_reset - reset the asic | 2676 | * amdgpu_sriov_gpu_reset - reset the asic |
| 2459 | * | 2677 | * |
| 2460 | * @adev: amdgpu device pointer | 2678 | * @adev: amdgpu device pointer |
| 2461 | * @voluntary: if this reset is requested by guest. | 2679 | * @job: which job trigger hang |
| 2462 | * (true means by guest and false means by HYPERVISOR ) | ||
| 2463 | * | 2680 | * |
| 2464 | * Attempt the reset the GPU if it has hung (all asics). | 2681 | * Attempt the reset the GPU if it has hung (all asics). |
| 2465 | * for SRIOV case. | 2682 | * for SRIOV case. |
| 2466 | * Returns 0 for success or an error on failure. | 2683 | * Returns 0 for success or an error on failure. |
| 2467 | */ | 2684 | */ |
| 2468 | int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) | 2685 | int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) |
| 2469 | { | 2686 | { |
| 2470 | int i, r = 0; | 2687 | int i, j, r = 0; |
| 2471 | int resched; | 2688 | int resched; |
| 2472 | struct amdgpu_bo *bo, *tmp; | 2689 | struct amdgpu_bo *bo, *tmp; |
| 2473 | struct amdgpu_ring *ring; | 2690 | struct amdgpu_ring *ring; |
| @@ -2480,22 +2697,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) | |||
| 2480 | /* block TTM */ | 2697 | /* block TTM */ |
| 2481 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); | 2698 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); |
| 2482 | 2699 | ||
| 2483 | /* block scheduler */ | 2700 | /* we start from the ring trigger GPU hang */ |
| 2484 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 2701 | j = job ? job->ring->idx : 0; |
| 2485 | ring = adev->rings[i]; | ||
| 2486 | 2702 | ||
| 2703 | /* block scheduler */ | ||
| 2704 | for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { | ||
| 2705 | ring = adev->rings[i % AMDGPU_MAX_RINGS]; | ||
| 2487 | if (!ring || !ring->sched.thread) | 2706 | if (!ring || !ring->sched.thread) |
| 2488 | continue; | 2707 | continue; |
| 2489 | 2708 | ||
| 2490 | kthread_park(ring->sched.thread); | 2709 | kthread_park(ring->sched.thread); |
| 2710 | |||
| 2711 | if (job && j != i) | ||
| 2712 | continue; | ||
| 2713 | |||
| 2714 | /* here give the last chance to check if job removed from mirror-list | ||
| 2715 | * since we already pay some time on kthread_park */ | ||
| 2716 | if (job && list_empty(&job->base.node)) { | ||
| 2717 | kthread_unpark(ring->sched.thread); | ||
| 2718 | goto give_up_reset; | ||
| 2719 | } | ||
| 2720 | |||
| 2721 | if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit)) | ||
| 2722 | amd_sched_job_kickout(&job->base); | ||
| 2723 | |||
| 2724 | /* only do job_reset on the hang ring if @job not NULL */ | ||
| 2491 | amd_sched_hw_job_reset(&ring->sched); | 2725 | amd_sched_hw_job_reset(&ring->sched); |
| 2492 | } | ||
| 2493 | 2726 | ||
| 2494 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 2727 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
| 2495 | amdgpu_fence_driver_force_completion(adev); | 2728 | amdgpu_fence_driver_force_completion_ring(ring); |
| 2729 | } | ||
| 2496 | 2730 | ||
| 2497 | /* request to take full control of GPU before re-initialization */ | 2731 | /* request to take full control of GPU before re-initialization */ |
| 2498 | if (voluntary) | 2732 | if (job) |
| 2499 | amdgpu_virt_reset_gpu(adev); | 2733 | amdgpu_virt_reset_gpu(adev); |
| 2500 | else | 2734 | else |
| 2501 | amdgpu_virt_request_full_gpu(adev, true); | 2735 | amdgpu_virt_request_full_gpu(adev, true); |
| @@ -2545,20 +2779,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) | |||
| 2545 | } | 2779 | } |
| 2546 | dma_fence_put(fence); | 2780 | dma_fence_put(fence); |
| 2547 | 2781 | ||
| 2548 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 2782 | for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { |
| 2549 | struct amdgpu_ring *ring = adev->rings[i]; | 2783 | ring = adev->rings[i % AMDGPU_MAX_RINGS]; |
| 2550 | if (!ring || !ring->sched.thread) | 2784 | if (!ring || !ring->sched.thread) |
| 2551 | continue; | 2785 | continue; |
| 2552 | 2786 | ||
| 2787 | if (job && j != i) { | ||
| 2788 | kthread_unpark(ring->sched.thread); | ||
| 2789 | continue; | ||
| 2790 | } | ||
| 2791 | |||
| 2553 | amd_sched_job_recovery(&ring->sched); | 2792 | amd_sched_job_recovery(&ring->sched); |
| 2554 | kthread_unpark(ring->sched.thread); | 2793 | kthread_unpark(ring->sched.thread); |
| 2555 | } | 2794 | } |
| 2556 | 2795 | ||
| 2557 | drm_helper_resume_force_mode(adev->ddev); | 2796 | drm_helper_resume_force_mode(adev->ddev); |
| 2797 | give_up_reset: | ||
| 2558 | ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); | 2798 | ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); |
| 2559 | if (r) { | 2799 | if (r) { |
| 2560 | /* bad news, how to tell it to userspace ? */ | 2800 | /* bad news, how to tell it to userspace ? */ |
| 2561 | dev_info(adev->dev, "GPU reset failed\n"); | 2801 | dev_info(adev->dev, "GPU reset failed\n"); |
| 2802 | } else { | ||
| 2803 | dev_info(adev->dev, "GPU reset successed!\n"); | ||
| 2562 | } | 2804 | } |
| 2563 | 2805 | ||
| 2564 | adev->gfx.in_reset = false; | 2806 | adev->gfx.in_reset = false; |
| @@ -2578,10 +2820,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) | |||
| 2578 | { | 2820 | { |
| 2579 | int i, r; | 2821 | int i, r; |
| 2580 | int resched; | 2822 | int resched; |
| 2581 | bool need_full_reset; | 2823 | bool need_full_reset, vram_lost = false; |
| 2582 | |||
| 2583 | if (amdgpu_sriov_vf(adev)) | ||
| 2584 | return amdgpu_sriov_gpu_reset(adev, true); | ||
| 2585 | 2824 | ||
| 2586 | if (!amdgpu_check_soft_reset(adev)) { | 2825 | if (!amdgpu_check_soft_reset(adev)) { |
| 2587 | DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); | 2826 | DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); |
| @@ -2641,16 +2880,27 @@ retry: | |||
| 2641 | 2880 | ||
| 2642 | if (!r) { | 2881 | if (!r) { |
| 2643 | dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); | 2882 | dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); |
| 2644 | r = amdgpu_resume(adev); | 2883 | r = amdgpu_resume_phase1(adev); |
| 2884 | if (r) | ||
| 2885 | goto out; | ||
| 2886 | vram_lost = amdgpu_check_vram_lost(adev); | ||
| 2887 | if (vram_lost) { | ||
| 2888 | DRM_ERROR("VRAM is lost!\n"); | ||
| 2889 | atomic_inc(&adev->vram_lost_counter); | ||
| 2890 | } | ||
| 2891 | r = amdgpu_ttm_recover_gart(adev); | ||
| 2892 | if (r) | ||
| 2893 | goto out; | ||
| 2894 | r = amdgpu_resume_phase2(adev); | ||
| 2895 | if (r) | ||
| 2896 | goto out; | ||
| 2897 | if (vram_lost) | ||
| 2898 | amdgpu_fill_reset_magic(adev); | ||
| 2645 | } | 2899 | } |
| 2646 | } | 2900 | } |
| 2901 | out: | ||
| 2647 | if (!r) { | 2902 | if (!r) { |
| 2648 | amdgpu_irq_gpu_reset_resume_helper(adev); | 2903 | amdgpu_irq_gpu_reset_resume_helper(adev); |
| 2649 | if (need_full_reset && amdgpu_need_backup(adev)) { | ||
| 2650 | r = amdgpu_ttm_recover_gart(adev); | ||
| 2651 | if (r) | ||
| 2652 | DRM_ERROR("gart recovery failed!!!\n"); | ||
| 2653 | } | ||
| 2654 | r = amdgpu_ib_ring_tests(adev); | 2904 | r = amdgpu_ib_ring_tests(adev); |
| 2655 | if (r) { | 2905 | if (r) { |
| 2656 | dev_err(adev->dev, "ib ring test failed (%d).\n", r); | 2906 | dev_err(adev->dev, "ib ring test failed (%d).\n", r); |
| @@ -2712,10 +2962,11 @@ retry: | |||
| 2712 | drm_helper_resume_force_mode(adev->ddev); | 2962 | drm_helper_resume_force_mode(adev->ddev); |
| 2713 | 2963 | ||
| 2714 | ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); | 2964 | ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); |
| 2715 | if (r) { | 2965 | if (r) |
| 2716 | /* bad news, how to tell it to userspace ? */ | 2966 | /* bad news, how to tell it to userspace ? */ |
| 2717 | dev_info(adev->dev, "GPU reset failed\n"); | 2967 | dev_info(adev->dev, "GPU reset failed\n"); |
| 2718 | } | 2968 | else |
| 2969 | dev_info(adev->dev, "GPU reset successed!\n"); | ||
| 2719 | 2970 | ||
| 2720 | return r; | 2971 | return r; |
| 2721 | } | 2972 | } |
| @@ -3499,11 +3750,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) | |||
| 3499 | } | 3750 | } |
| 3500 | } | 3751 | } |
| 3501 | 3752 | ||
| 3753 | static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) | ||
| 3754 | { | ||
| 3755 | struct drm_info_node *node = (struct drm_info_node *) m->private; | ||
| 3756 | struct drm_device *dev = node->minor->dev; | ||
| 3757 | struct amdgpu_device *adev = dev->dev_private; | ||
| 3758 | int r = 0, i; | ||
| 3759 | |||
| 3760 | /* hold on the scheduler */ | ||
| 3761 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | ||
| 3762 | struct amdgpu_ring *ring = adev->rings[i]; | ||
| 3763 | |||
| 3764 | if (!ring || !ring->sched.thread) | ||
| 3765 | continue; | ||
| 3766 | kthread_park(ring->sched.thread); | ||
| 3767 | } | ||
| 3768 | |||
| 3769 | seq_printf(m, "run ib test:\n"); | ||
| 3770 | r = amdgpu_ib_ring_tests(adev); | ||
| 3771 | if (r) | ||
| 3772 | seq_printf(m, "ib ring tests failed (%d).\n", r); | ||
| 3773 | else | ||
| 3774 | seq_printf(m, "ib ring tests passed.\n"); | ||
| 3775 | |||
| 3776 | /* go on the scheduler */ | ||
| 3777 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | ||
| 3778 | struct amdgpu_ring *ring = adev->rings[i]; | ||
| 3779 | |||
| 3780 | if (!ring || !ring->sched.thread) | ||
| 3781 | continue; | ||
| 3782 | kthread_unpark(ring->sched.thread); | ||
| 3783 | } | ||
| 3784 | |||
| 3785 | return 0; | ||
| 3786 | } | ||
| 3787 | |||
| 3788 | static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = { | ||
| 3789 | {"amdgpu_test_ib", &amdgpu_debugfs_test_ib} | ||
| 3790 | }; | ||
| 3791 | |||
| 3792 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) | ||
| 3793 | { | ||
| 3794 | return amdgpu_debugfs_add_files(adev, | ||
| 3795 | amdgpu_debugfs_test_ib_ring_list, 1); | ||
| 3796 | } | ||
| 3797 | |||
| 3502 | int amdgpu_debugfs_init(struct drm_minor *minor) | 3798 | int amdgpu_debugfs_init(struct drm_minor *minor) |
| 3503 | { | 3799 | { |
| 3504 | return 0; | 3800 | return 0; |
| 3505 | } | 3801 | } |
| 3506 | #else | 3802 | #else |
| 3803 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) | ||
| 3804 | { | ||
| 3805 | return 0; | ||
| 3806 | } | ||
| 3507 | static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) | 3807 | static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) |
| 3508 | { | 3808 | { |
| 3509 | return 0; | 3809 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 38e9b0d3659a..1cb52fd19060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | * Authors: Alex Deucher | 22 | * Authors: Alex Deucher |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include "drmP.h" | 25 | #include <drm/drmP.h> |
| 26 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
| 27 | #include "amdgpu_atombios.h" | 27 | #include "amdgpu_atombios.h" |
| 28 | #include "amdgpu_i2c.h" | 28 | #include "amdgpu_i2c.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f2d705e6a75a..b59f37c83fa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
| @@ -39,7 +39,7 @@ | |||
| 39 | #include <linux/module.h> | 39 | #include <linux/module.h> |
| 40 | #include <linux/pm_runtime.h> | 40 | #include <linux/pm_runtime.h> |
| 41 | #include <linux/vga_switcheroo.h> | 41 | #include <linux/vga_switcheroo.h> |
| 42 | #include "drm_crtc_helper.h" | 42 | #include <drm/drm_crtc_helper.h> |
| 43 | 43 | ||
| 44 | #include "amdgpu.h" | 44 | #include "amdgpu.h" |
| 45 | #include "amdgpu_irq.h" | 45 | #include "amdgpu_irq.h" |
| @@ -65,9 +65,12 @@ | |||
| 65 | * - 3.13.0 - Add PRT support | 65 | * - 3.13.0 - Add PRT support |
| 66 | * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality | 66 | * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality |
| 67 | * - 3.15.0 - Export more gpu info for gfx9 | 67 | * - 3.15.0 - Export more gpu info for gfx9 |
| 68 | * - 3.16.0 - Add reserved vmid support | ||
| 69 | * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. | ||
| 70 | * - 3.18.0 - Export gpu always on cu bitmap | ||
| 68 | */ | 71 | */ |
| 69 | #define KMS_DRIVER_MAJOR 3 | 72 | #define KMS_DRIVER_MAJOR 3 |
| 70 | #define KMS_DRIVER_MINOR 15 | 73 | #define KMS_DRIVER_MINOR 18 |
| 71 | #define KMS_DRIVER_PATCHLEVEL 0 | 74 | #define KMS_DRIVER_PATCHLEVEL 0 |
| 72 | 75 | ||
| 73 | int amdgpu_vram_limit = 0; | 76 | int amdgpu_vram_limit = 0; |
| @@ -92,7 +95,8 @@ int amdgpu_vm_size = -1; | |||
| 92 | int amdgpu_vm_block_size = -1; | 95 | int amdgpu_vm_block_size = -1; |
| 93 | int amdgpu_vm_fault_stop = 0; | 96 | int amdgpu_vm_fault_stop = 0; |
| 94 | int amdgpu_vm_debug = 0; | 97 | int amdgpu_vm_debug = 0; |
| 95 | int amdgpu_vram_page_split = 1024; | 98 | int amdgpu_vram_page_split = 512; |
| 99 | int amdgpu_vm_update_mode = -1; | ||
| 96 | int amdgpu_exp_hw_support = 0; | 100 | int amdgpu_exp_hw_support = 0; |
| 97 | int amdgpu_sched_jobs = 32; | 101 | int amdgpu_sched_jobs = 32; |
| 98 | int amdgpu_sched_hw_submission = 2; | 102 | int amdgpu_sched_hw_submission = 2; |
| @@ -110,6 +114,8 @@ int amdgpu_prim_buf_per_se = 0; | |||
| 110 | int amdgpu_pos_buf_per_se = 0; | 114 | int amdgpu_pos_buf_per_se = 0; |
| 111 | int amdgpu_cntl_sb_buf_per_se = 0; | 115 | int amdgpu_cntl_sb_buf_per_se = 0; |
| 112 | int amdgpu_param_buf_per_se = 0; | 116 | int amdgpu_param_buf_per_se = 0; |
| 117 | int amdgpu_job_hang_limit = 0; | ||
| 118 | int amdgpu_lbpw = -1; | ||
| 113 | 119 | ||
| 114 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); | 120 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); |
| 115 | module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); | 121 | module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); |
| @@ -177,6 +183,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); | |||
| 177 | MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); | 183 | MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); |
| 178 | module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); | 184 | module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); |
| 179 | 185 | ||
| 186 | MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both"); | ||
| 187 | module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); | ||
| 188 | |||
| 180 | MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); | 189 | MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); |
| 181 | module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); | 190 | module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); |
| 182 | 191 | ||
| @@ -232,6 +241,38 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); | |||
| 232 | MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); | 241 | MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); |
| 233 | module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); | 242 | module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); |
| 234 | 243 | ||
| 244 | MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); | ||
| 245 | module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); | ||
| 246 | |||
| 247 | MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); | ||
| 248 | module_param_named(lbpw, amdgpu_lbpw, int, 0444); | ||
| 249 | |||
| 250 | #ifdef CONFIG_DRM_AMDGPU_SI | ||
| 251 | |||
| 252 | #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) | ||
| 253 | int amdgpu_si_support = 0; | ||
| 254 | MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); | ||
| 255 | #else | ||
| 256 | int amdgpu_si_support = 1; | ||
| 257 | MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); | ||
| 258 | #endif | ||
| 259 | |||
| 260 | module_param_named(si_support, amdgpu_si_support, int, 0444); | ||
| 261 | #endif | ||
| 262 | |||
| 263 | #ifdef CONFIG_DRM_AMDGPU_CIK | ||
| 264 | |||
| 265 | #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) | ||
| 266 | int amdgpu_cik_support = 0; | ||
| 267 | MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); | ||
| 268 | #else | ||
| 269 | int amdgpu_cik_support = 1; | ||
| 270 | MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); | ||
| 271 | #endif | ||
| 272 | |||
| 273 | module_param_named(cik_support, amdgpu_cik_support, int, 0444); | ||
| 274 | #endif | ||
| 275 | |||
| 235 | 276 | ||
| 236 | static const struct pci_device_id pciidlist[] = { | 277 | static const struct pci_device_id pciidlist[] = { |
| 237 | #ifdef CONFIG_DRM_AMDGPU_SI | 278 | #ifdef CONFIG_DRM_AMDGPU_SI |
| @@ -449,6 +490,7 @@ static const struct pci_device_id pciidlist[] = { | |||
| 449 | {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 490 | {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 450 | {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 491 | {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 451 | {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 492 | {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 493 | {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | ||
| 452 | {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 494 | {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 453 | /* Vega 10 */ | 495 | /* Vega 10 */ |
| 454 | {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, | 496 | {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, |
| @@ -460,6 +502,9 @@ static const struct pci_device_id pciidlist[] = { | |||
| 460 | {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, | 502 | {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, |
| 461 | {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, | 503 | {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, |
| 462 | {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, | 504 | {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, |
| 505 | /* Raven */ | ||
| 506 | {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, | ||
| 507 | |||
| 463 | {0, 0, 0} | 508 | {0, 0, 0} |
| 464 | }; | 509 | }; |
| 465 | 510 | ||
| @@ -491,6 +536,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev) | |||
| 491 | static int amdgpu_pci_probe(struct pci_dev *pdev, | 536 | static int amdgpu_pci_probe(struct pci_dev *pdev, |
| 492 | const struct pci_device_id *ent) | 537 | const struct pci_device_id *ent) |
| 493 | { | 538 | { |
| 539 | struct drm_device *dev; | ||
| 494 | unsigned long flags = ent->driver_data; | 540 | unsigned long flags = ent->driver_data; |
| 495 | int ret; | 541 | int ret; |
| 496 | 542 | ||
| @@ -513,7 +559,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, | |||
| 513 | if (ret) | 559 | if (ret) |
| 514 | return ret; | 560 | return ret; |
| 515 | 561 | ||
| 516 | return drm_get_pci_dev(pdev, ent, &kms_driver); | 562 | dev = drm_dev_alloc(&kms_driver, &pdev->dev); |
| 563 | if (IS_ERR(dev)) | ||
| 564 | return PTR_ERR(dev); | ||
| 565 | |||
| 566 | ret = pci_enable_device(pdev); | ||
| 567 | if (ret) | ||
| 568 | goto err_free; | ||
| 569 | |||
| 570 | dev->pdev = pdev; | ||
| 571 | |||
| 572 | pci_set_drvdata(pdev, dev); | ||
| 573 | |||
| 574 | ret = drm_dev_register(dev, ent->driver_data); | ||
| 575 | if (ret) | ||
| 576 | goto err_pci; | ||
| 577 | |||
| 578 | return 0; | ||
| 579 | |||
| 580 | err_pci: | ||
| 581 | pci_disable_device(pdev); | ||
| 582 | err_free: | ||
| 583 | drm_dev_unref(dev); | ||
| 584 | return ret; | ||
| 517 | } | 585 | } |
| 518 | 586 | ||
| 519 | static void | 587 | static void |
| @@ -521,7 +589,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) | |||
| 521 | { | 589 | { |
| 522 | struct drm_device *dev = pci_get_drvdata(pdev); | 590 | struct drm_device *dev = pci_get_drvdata(pdev); |
| 523 | 591 | ||
| 524 | drm_put_dev(dev); | 592 | drm_dev_unregister(dev); |
| 593 | drm_dev_unref(dev); | ||
| 525 | } | 594 | } |
| 526 | 595 | ||
| 527 | static void | 596 | static void |
| @@ -715,11 +784,21 @@ static const struct file_operations amdgpu_driver_kms_fops = { | |||
| 715 | #endif | 784 | #endif |
| 716 | }; | 785 | }; |
| 717 | 786 | ||
| 787 | static bool | ||
| 788 | amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, | ||
| 789 | bool in_vblank_irq, int *vpos, int *hpos, | ||
| 790 | ktime_t *stime, ktime_t *etime, | ||
| 791 | const struct drm_display_mode *mode) | ||
| 792 | { | ||
| 793 | return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, | ||
| 794 | stime, etime, mode); | ||
| 795 | } | ||
| 796 | |||
| 718 | static struct drm_driver kms_driver = { | 797 | static struct drm_driver kms_driver = { |
| 719 | .driver_features = | 798 | .driver_features = |
| 720 | DRIVER_USE_AGP | | 799 | DRIVER_USE_AGP | |
| 721 | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | | 800 | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | |
| 722 | DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, | 801 | DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, |
| 723 | .load = amdgpu_driver_load_kms, | 802 | .load = amdgpu_driver_load_kms, |
| 724 | .open = amdgpu_driver_open_kms, | 803 | .open = amdgpu_driver_open_kms, |
| 725 | .postclose = amdgpu_driver_postclose_kms, | 804 | .postclose = amdgpu_driver_postclose_kms, |
| @@ -729,8 +808,8 @@ static struct drm_driver kms_driver = { | |||
| 729 | .get_vblank_counter = amdgpu_get_vblank_counter_kms, | 808 | .get_vblank_counter = amdgpu_get_vblank_counter_kms, |
| 730 | .enable_vblank = amdgpu_enable_vblank_kms, | 809 | .enable_vblank = amdgpu_enable_vblank_kms, |
| 731 | .disable_vblank = amdgpu_disable_vblank_kms, | 810 | .disable_vblank = amdgpu_disable_vblank_kms, |
| 732 | .get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms, | 811 | .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, |
| 733 | .get_scanout_position = amdgpu_get_crtc_scanoutpos, | 812 | .get_scanout_position = amdgpu_get_crtc_scanout_position, |
| 734 | #if defined(CONFIG_DEBUG_FS) | 813 | #if defined(CONFIG_DEBUG_FS) |
| 735 | .debugfs_init = amdgpu_debugfs_init, | 814 | .debugfs_init = amdgpu_debugfs_init, |
| 736 | #endif | 815 | #endif |
| @@ -807,7 +886,7 @@ static int __init amdgpu_init(void) | |||
| 807 | driver->num_ioctls = amdgpu_max_kms_ioctl; | 886 | driver->num_ioctls = amdgpu_max_kms_ioctl; |
| 808 | amdgpu_register_atpx_handler(); | 887 | amdgpu_register_atpx_handler(); |
| 809 | /* let modprobe override vga console setting */ | 888 | /* let modprobe override vga console setting */ |
| 810 | return drm_pci_init(driver, pdriver); | 889 | return pci_register_driver(pdriver); |
| 811 | 890 | ||
| 812 | error_sched: | 891 | error_sched: |
| 813 | amdgpu_fence_slab_fini(); | 892 | amdgpu_fence_slab_fini(); |
| @@ -822,7 +901,7 @@ error_sync: | |||
| 822 | static void __exit amdgpu_exit(void) | 901 | static void __exit amdgpu_exit(void) |
| 823 | { | 902 | { |
| 824 | amdgpu_amdkfd_fini(); | 903 | amdgpu_amdkfd_fini(); |
| 825 | drm_pci_exit(driver, pdriver); | 904 | pci_unregister_driver(pdriver); |
| 826 | amdgpu_unregister_atpx_handler(); | 905 | amdgpu_unregister_atpx_handler(); |
| 827 | amdgpu_sync_fini(); | 906 | amdgpu_sync_fini(); |
| 828 | amd_sched_fence_slab_fini(); | 907 | amd_sched_fence_slab_fini(); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 236d9950221b..c0d8c6ff6380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | |||
| @@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) | |||
| 425 | 425 | ||
| 426 | void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev) | 426 | void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev) |
| 427 | { | 427 | { |
| 428 | struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev; | 428 | struct amdgpu_fbdev *afbdev; |
| 429 | struct drm_fb_helper *fb_helper; | 429 | struct drm_fb_helper *fb_helper; |
| 430 | int ret; | 430 | int ret; |
| 431 | 431 | ||
| 432 | if (!adev) | ||
| 433 | return; | ||
| 434 | |||
| 435 | afbdev = adev->mode_info.rfbdev; | ||
| 436 | |||
| 432 | if (!afbdev) | 437 | if (!afbdev) |
| 433 | return; | 438 | return; |
| 434 | 439 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7b60fb79c3a6..333bad749067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
| @@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) | |||
| 541 | } | 541 | } |
| 542 | } | 542 | } |
| 543 | 543 | ||
| 544 | void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring) | ||
| 545 | { | ||
| 546 | if (ring) | ||
| 547 | amdgpu_fence_write(ring, ring->fence_drv.sync_seq); | ||
| 548 | } | ||
| 549 | |||
| 544 | /* | 550 | /* |
| 545 | * Common fence implementation | 551 | * Common fence implementation |
| 546 | */ | 552 | */ |
| @@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = { | |||
| 660 | {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, | 666 | {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, |
| 661 | {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} | 667 | {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} |
| 662 | }; | 668 | }; |
| 669 | |||
| 670 | static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = { | ||
| 671 | {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, | ||
| 672 | }; | ||
| 663 | #endif | 673 | #endif |
| 664 | 674 | ||
| 665 | int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) | 675 | int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) |
| 666 | { | 676 | { |
| 667 | #if defined(CONFIG_DEBUG_FS) | 677 | #if defined(CONFIG_DEBUG_FS) |
| 678 | if (amdgpu_sriov_vf(adev)) | ||
| 679 | return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); | ||
| 668 | return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); | 680 | return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); |
| 669 | #else | 681 | #else |
| 670 | return 0; | 682 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 902e6015abca..a57abc1a25fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | |||
| @@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) | |||
| 224 | * | 224 | * |
| 225 | * Unbinds the requested pages from the gart page table and | 225 | * Unbinds the requested pages from the gart page table and |
| 226 | * replaces them with the dummy page (all asics). | 226 | * replaces them with the dummy page (all asics). |
| 227 | * Returns 0 for success, -EINVAL for failure. | ||
| 227 | */ | 228 | */ |
| 228 | void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | 229 | int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, |
| 229 | int pages) | 230 | int pages) |
| 230 | { | 231 | { |
| 231 | unsigned t; | 232 | unsigned t; |
| @@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | |||
| 237 | 238 | ||
| 238 | if (!adev->gart.ready) { | 239 | if (!adev->gart.ready) { |
| 239 | WARN(1, "trying to unbind memory from uninitialized GART !\n"); | 240 | WARN(1, "trying to unbind memory from uninitialized GART !\n"); |
| 240 | return; | 241 | return -EINVAL; |
| 241 | } | 242 | } |
| 242 | 243 | ||
| 243 | t = offset / AMDGPU_GPU_PAGE_SIZE; | 244 | t = offset / AMDGPU_GPU_PAGE_SIZE; |
| @@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | |||
| 258 | } | 259 | } |
| 259 | mb(); | 260 | mb(); |
| 260 | amdgpu_gart_flush_gpu_tlb(adev, 0); | 261 | amdgpu_gart_flush_gpu_tlb(adev, 0); |
| 262 | return 0; | ||
| 261 | } | 263 | } |
| 262 | 264 | ||
| 263 | /** | 265 | /** |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 94cb91cf93eb..621f739103a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
| @@ -219,16 +219,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, | |||
| 219 | ttm_eu_backoff_reservation(&ticket, &list); | 219 | ttm_eu_backoff_reservation(&ticket, &list); |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) | ||
| 223 | { | ||
| 224 | if (r == -EDEADLK) { | ||
| 225 | r = amdgpu_gpu_reset(adev); | ||
| 226 | if (!r) | ||
| 227 | r = -EAGAIN; | ||
| 228 | } | ||
| 229 | return r; | ||
| 230 | } | ||
| 231 | |||
| 232 | /* | 222 | /* |
| 233 | * GEM ioctls. | 223 | * GEM ioctls. |
| 234 | */ | 224 | */ |
| @@ -249,20 +239,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
| 249 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | | 239 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
| 250 | AMDGPU_GEM_CREATE_VRAM_CLEARED| | 240 | AMDGPU_GEM_CREATE_VRAM_CLEARED| |
| 251 | AMDGPU_GEM_CREATE_SHADOW | | 241 | AMDGPU_GEM_CREATE_SHADOW | |
| 252 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { | 242 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) |
| 253 | r = -EINVAL; | 243 | return -EINVAL; |
| 254 | goto error_unlock; | 244 | |
| 255 | } | ||
| 256 | /* reject invalid gem domains */ | 245 | /* reject invalid gem domains */ |
| 257 | if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | | 246 | if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | |
| 258 | AMDGPU_GEM_DOMAIN_GTT | | 247 | AMDGPU_GEM_DOMAIN_GTT | |
| 259 | AMDGPU_GEM_DOMAIN_VRAM | | 248 | AMDGPU_GEM_DOMAIN_VRAM | |
| 260 | AMDGPU_GEM_DOMAIN_GDS | | 249 | AMDGPU_GEM_DOMAIN_GDS | |
| 261 | AMDGPU_GEM_DOMAIN_GWS | | 250 | AMDGPU_GEM_DOMAIN_GWS | |
| 262 | AMDGPU_GEM_DOMAIN_OA)) { | 251 | AMDGPU_GEM_DOMAIN_OA)) |
| 263 | r = -EINVAL; | 252 | return -EINVAL; |
| 264 | goto error_unlock; | ||
| 265 | } | ||
| 266 | 253 | ||
| 267 | /* create a gem object to contain this object in */ | 254 | /* create a gem object to contain this object in */ |
| 268 | if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | | 255 | if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | |
| @@ -274,10 +261,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
| 274 | size = size << AMDGPU_GWS_SHIFT; | 261 | size = size << AMDGPU_GWS_SHIFT; |
| 275 | else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) | 262 | else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) |
| 276 | size = size << AMDGPU_OA_SHIFT; | 263 | size = size << AMDGPU_OA_SHIFT; |
| 277 | else { | 264 | else |
| 278 | r = -EINVAL; | 265 | return -EINVAL; |
| 279 | goto error_unlock; | ||
| 280 | } | ||
| 281 | } | 266 | } |
| 282 | size = roundup(size, PAGE_SIZE); | 267 | size = roundup(size, PAGE_SIZE); |
| 283 | 268 | ||
| @@ -286,21 +271,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
| 286 | args->in.domain_flags, | 271 | args->in.domain_flags, |
| 287 | kernel, &gobj); | 272 | kernel, &gobj); |
| 288 | if (r) | 273 | if (r) |
| 289 | goto error_unlock; | 274 | return r; |
| 290 | 275 | ||
| 291 | r = drm_gem_handle_create(filp, gobj, &handle); | 276 | r = drm_gem_handle_create(filp, gobj, &handle); |
| 292 | /* drop reference from allocate - handle holds it now */ | 277 | /* drop reference from allocate - handle holds it now */ |
| 293 | drm_gem_object_unreference_unlocked(gobj); | 278 | drm_gem_object_unreference_unlocked(gobj); |
| 294 | if (r) | 279 | if (r) |
| 295 | goto error_unlock; | 280 | return r; |
| 296 | 281 | ||
| 297 | memset(args, 0, sizeof(*args)); | 282 | memset(args, 0, sizeof(*args)); |
| 298 | args->out.handle = handle; | 283 | args->out.handle = handle; |
| 299 | return 0; | 284 | return 0; |
| 300 | |||
| 301 | error_unlock: | ||
| 302 | r = amdgpu_gem_handle_lockup(adev, r); | ||
| 303 | return r; | ||
| 304 | } | 285 | } |
| 305 | 286 | ||
| 306 | int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | 287 | int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, |
| @@ -334,7 +315,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
| 334 | AMDGPU_GEM_DOMAIN_CPU, 0, | 315 | AMDGPU_GEM_DOMAIN_CPU, 0, |
| 335 | 0, &gobj); | 316 | 0, &gobj); |
| 336 | if (r) | 317 | if (r) |
| 337 | goto handle_lockup; | 318 | return r; |
| 338 | 319 | ||
| 339 | bo = gem_to_amdgpu_bo(gobj); | 320 | bo = gem_to_amdgpu_bo(gobj); |
| 340 | bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; | 321 | bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; |
| @@ -374,7 +355,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
| 374 | /* drop reference from allocate - handle holds it now */ | 355 | /* drop reference from allocate - handle holds it now */ |
| 375 | drm_gem_object_unreference_unlocked(gobj); | 356 | drm_gem_object_unreference_unlocked(gobj); |
| 376 | if (r) | 357 | if (r) |
| 377 | goto handle_lockup; | 358 | return r; |
| 378 | 359 | ||
| 379 | args->handle = handle; | 360 | args->handle = handle; |
| 380 | return 0; | 361 | return 0; |
| @@ -388,9 +369,6 @@ unlock_mmap_sem: | |||
| 388 | release_object: | 369 | release_object: |
| 389 | drm_gem_object_unreference_unlocked(gobj); | 370 | drm_gem_object_unreference_unlocked(gobj); |
| 390 | 371 | ||
| 391 | handle_lockup: | ||
| 392 | r = amdgpu_gem_handle_lockup(adev, r); | ||
| 393 | |||
| 394 | return r; | 372 | return r; |
| 395 | } | 373 | } |
| 396 | 374 | ||
| @@ -456,7 +434,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) | |||
| 456 | int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, | 434 | int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, |
| 457 | struct drm_file *filp) | 435 | struct drm_file *filp) |
| 458 | { | 436 | { |
| 459 | struct amdgpu_device *adev = dev->dev_private; | ||
| 460 | union drm_amdgpu_gem_wait_idle *args = data; | 437 | union drm_amdgpu_gem_wait_idle *args = data; |
| 461 | struct drm_gem_object *gobj; | 438 | struct drm_gem_object *gobj; |
| 462 | struct amdgpu_bo *robj; | 439 | struct amdgpu_bo *robj; |
| @@ -484,7 +461,6 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, | |||
| 484 | r = ret; | 461 | r = ret; |
| 485 | 462 | ||
| 486 | drm_gem_object_unreference_unlocked(gobj); | 463 | drm_gem_object_unreference_unlocked(gobj); |
| 487 | r = amdgpu_gem_handle_lockup(adev, r); | ||
| 488 | return r; | 464 | return r; |
| 489 | } | 465 | } |
| 490 | 466 | ||
| @@ -593,9 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
| 593 | uint64_t va_flags; | 569 | uint64_t va_flags; |
| 594 | int r = 0; | 570 | int r = 0; |
| 595 | 571 | ||
| 596 | if (!adev->vm_manager.enabled) | ||
| 597 | return -ENOTTY; | ||
| 598 | |||
| 599 | if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { | 572 | if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { |
| 600 | dev_err(&dev->pdev->dev, | 573 | dev_err(&dev->pdev->dev, |
| 601 | "va_address 0x%lX is in reserved area 0x%X\n", | 574 | "va_address 0x%lX is in reserved area 0x%X\n", |
| @@ -621,6 +594,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
| 621 | args->operation); | 594 | args->operation); |
| 622 | return -EINVAL; | 595 | return -EINVAL; |
| 623 | } | 596 | } |
| 597 | if ((args->operation == AMDGPU_VA_OP_MAP) || | ||
| 598 | (args->operation == AMDGPU_VA_OP_REPLACE)) { | ||
| 599 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
| 600 | return -ENODEV; | ||
| 601 | } | ||
| 624 | 602 | ||
| 625 | INIT_LIST_HEAD(&list); | 603 | INIT_LIST_HEAD(&list); |
| 626 | if ((args->operation != AMDGPU_VA_OP_CLEAR) && | 604 | if ((args->operation != AMDGPU_VA_OP_CLEAR) && |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 19943356cca7..e26108aad3fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | |||
| @@ -108,3 +108,209 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s | |||
| 108 | p = next + 1; | 108 | p = next + 1; |
| 109 | } | 109 | } |
| 110 | } | 110 | } |
| 111 | |||
| 112 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) | ||
| 113 | { | ||
| 114 | int i, queue, pipe, mec; | ||
| 115 | |||
| 116 | /* policy for amdgpu compute queue ownership */ | ||
| 117 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { | ||
| 118 | queue = i % adev->gfx.mec.num_queue_per_pipe; | ||
| 119 | pipe = (i / adev->gfx.mec.num_queue_per_pipe) | ||
| 120 | % adev->gfx.mec.num_pipe_per_mec; | ||
| 121 | mec = (i / adev->gfx.mec.num_queue_per_pipe) | ||
| 122 | / adev->gfx.mec.num_pipe_per_mec; | ||
| 123 | |||
| 124 | /* we've run out of HW */ | ||
| 125 | if (mec >= adev->gfx.mec.num_mec) | ||
| 126 | break; | ||
| 127 | |||
| 128 | if (adev->gfx.mec.num_mec > 1) { | ||
| 129 | /* policy: amdgpu owns the first two queues of the first MEC */ | ||
| 130 | if (mec == 0 && queue < 2) | ||
| 131 | set_bit(i, adev->gfx.mec.queue_bitmap); | ||
| 132 | } else { | ||
| 133 | /* policy: amdgpu owns all queues in the first pipe */ | ||
| 134 | if (mec == 0 && pipe == 0) | ||
| 135 | set_bit(i, adev->gfx.mec.queue_bitmap); | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | /* update the number of active compute rings */ | ||
| 140 | adev->gfx.num_compute_rings = | ||
| 141 | bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); | ||
| 142 | |||
| 143 | /* If you hit this case and edited the policy, you probably just | ||
| 144 | * need to increase AMDGPU_MAX_COMPUTE_RINGS */ | ||
| 145 | if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) | ||
| 146 | adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; | ||
| 147 | } | ||
| 148 | |||
| 149 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, | ||
| 150 | struct amdgpu_ring *ring) | ||
| 151 | { | ||
| 152 | int queue_bit; | ||
| 153 | int mec, pipe, queue; | ||
| 154 | |||
| 155 | queue_bit = adev->gfx.mec.num_mec | ||
| 156 | * adev->gfx.mec.num_pipe_per_mec | ||
| 157 | * adev->gfx.mec.num_queue_per_pipe; | ||
| 158 | |||
| 159 | while (queue_bit-- >= 0) { | ||
| 160 | if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) | ||
| 161 | continue; | ||
| 162 | |||
| 163 | amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); | ||
| 164 | |||
| 165 | /* Using pipes 2/3 from MEC 2 seems cause problems */ | ||
| 166 | if (mec == 1 && pipe > 1) | ||
| 167 | continue; | ||
| 168 | |||
| 169 | ring->me = mec + 1; | ||
| 170 | ring->pipe = pipe; | ||
| 171 | ring->queue = queue; | ||
| 172 | |||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | dev_err(adev->dev, "Failed to find a queue for KIQ\n"); | ||
| 177 | return -EINVAL; | ||
| 178 | } | ||
| 179 | |||
| 180 | int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, | ||
| 181 | struct amdgpu_ring *ring, | ||
| 182 | struct amdgpu_irq_src *irq) | ||
| 183 | { | ||
| 184 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 185 | int r = 0; | ||
| 186 | |||
| 187 | mutex_init(&kiq->ring_mutex); | ||
| 188 | |||
| 189 | r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); | ||
| 190 | if (r) | ||
| 191 | return r; | ||
| 192 | |||
| 193 | ring->adev = NULL; | ||
| 194 | ring->ring_obj = NULL; | ||
| 195 | ring->use_doorbell = true; | ||
| 196 | ring->doorbell_index = AMDGPU_DOORBELL_KIQ; | ||
| 197 | |||
| 198 | r = amdgpu_gfx_kiq_acquire(adev, ring); | ||
| 199 | if (r) | ||
| 200 | return r; | ||
| 201 | |||
| 202 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | ||
| 203 | sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 204 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 205 | irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); | ||
| 206 | if (r) | ||
| 207 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); | ||
| 208 | |||
| 209 | return r; | ||
| 210 | } | ||
| 211 | |||
| 212 | void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, | ||
| 213 | struct amdgpu_irq_src *irq) | ||
| 214 | { | ||
| 215 | amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); | ||
| 216 | amdgpu_ring_fini(ring); | ||
| 217 | } | ||
| 218 | |||
| 219 | void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) | ||
| 220 | { | ||
| 221 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 222 | |||
| 223 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); | ||
| 224 | } | ||
| 225 | |||
| 226 | int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, | ||
| 227 | unsigned hpd_size) | ||
| 228 | { | ||
| 229 | int r; | ||
| 230 | u32 *hpd; | ||
| 231 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 232 | |||
| 233 | r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, | ||
| 234 | AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, | ||
| 235 | &kiq->eop_gpu_addr, (void **)&hpd); | ||
| 236 | if (r) { | ||
| 237 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); | ||
| 238 | return r; | ||
| 239 | } | ||
| 240 | |||
| 241 | memset(hpd, 0, hpd_size); | ||
| 242 | |||
| 243 | r = amdgpu_bo_reserve(kiq->eop_obj, true); | ||
| 244 | if (unlikely(r != 0)) | ||
| 245 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); | ||
| 246 | amdgpu_bo_kunmap(kiq->eop_obj); | ||
| 247 | amdgpu_bo_unreserve(kiq->eop_obj); | ||
| 248 | |||
| 249 | return 0; | ||
| 250 | } | ||
| 251 | |||
| 252 | /* create MQD for each compute queue */ | ||
| 253 | int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, | ||
| 254 | unsigned mqd_size) | ||
| 255 | { | ||
| 256 | struct amdgpu_ring *ring = NULL; | ||
| 257 | int r, i; | ||
| 258 | |||
| 259 | /* create MQD for KIQ */ | ||
| 260 | ring = &adev->gfx.kiq.ring; | ||
| 261 | if (!ring->mqd_obj) { | ||
| 262 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, | ||
| 263 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | ||
| 264 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | ||
| 265 | if (r) { | ||
| 266 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | ||
| 267 | return r; | ||
| 268 | } | ||
| 269 | |||
| 270 | /* prepare MQD backup */ | ||
| 271 | adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); | ||
| 272 | if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) | ||
| 273 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); | ||
| 274 | } | ||
| 275 | |||
| 276 | /* create MQD for each KCQ */ | ||
| 277 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 278 | ring = &adev->gfx.compute_ring[i]; | ||
| 279 | if (!ring->mqd_obj) { | ||
| 280 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, | ||
| 281 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | ||
| 282 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | ||
| 283 | if (r) { | ||
| 284 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | ||
| 285 | return r; | ||
| 286 | } | ||
| 287 | |||
| 288 | /* prepare MQD backup */ | ||
| 289 | adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); | ||
| 290 | if (!adev->gfx.mec.mqd_backup[i]) | ||
| 291 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | return 0; | ||
| 296 | } | ||
| 297 | |||
| 298 | void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) | ||
| 299 | { | ||
| 300 | struct amdgpu_ring *ring = NULL; | ||
| 301 | int i; | ||
| 302 | |||
| 303 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 304 | ring = &adev->gfx.compute_ring[i]; | ||
| 305 | kfree(adev->gfx.mec.mqd_backup[i]); | ||
| 306 | amdgpu_bo_free_kernel(&ring->mqd_obj, | ||
| 307 | &ring->mqd_gpu_addr, | ||
| 308 | &ring->mqd_ptr); | ||
| 309 | } | ||
| 310 | |||
| 311 | ring = &adev->gfx.kiq.ring; | ||
| 312 | kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); | ||
| 313 | amdgpu_bo_free_kernel(&ring->mqd_obj, | ||
| 314 | &ring->mqd_gpu_addr, | ||
| 315 | &ring->mqd_ptr); | ||
| 316 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e02044086445..1f279050d334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | |||
| @@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); | |||
| 30 | void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, | 30 | void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, |
| 31 | unsigned max_sh); | 31 | unsigned max_sh); |
| 32 | 32 | ||
| 33 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); | ||
| 34 | |||
| 35 | int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, | ||
| 36 | struct amdgpu_ring *ring, | ||
| 37 | struct amdgpu_irq_src *irq); | ||
| 38 | |||
| 39 | void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, | ||
| 40 | struct amdgpu_irq_src *irq); | ||
| 41 | |||
| 42 | void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); | ||
| 43 | int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, | ||
| 44 | unsigned hpd_size); | ||
| 45 | |||
| 46 | int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, | ||
| 47 | unsigned mqd_size); | ||
| 48 | void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); | ||
| 49 | |||
| 50 | /** | ||
| 51 | * amdgpu_gfx_create_bitmask - create a bitmask | ||
| 52 | * | ||
| 53 | * @bit_width: length of the mask | ||
| 54 | * | ||
| 55 | * create a variable length bit mask. | ||
| 56 | * Returns the bitmask. | ||
| 57 | */ | ||
| 58 | static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) | ||
| 59 | { | ||
| 60 | return (u32)((1ULL << bit_width) - 1); | ||
| 61 | } | ||
| 62 | |||
| 63 | static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, | ||
| 64 | int mec, int pipe, int queue) | ||
| 65 | { | ||
| 66 | int bit = 0; | ||
| 67 | |||
| 68 | bit += mec * adev->gfx.mec.num_pipe_per_mec | ||
| 69 | * adev->gfx.mec.num_queue_per_pipe; | ||
| 70 | bit += pipe * adev->gfx.mec.num_queue_per_pipe; | ||
| 71 | bit += queue; | ||
| 72 | |||
| 73 | return bit; | ||
| 74 | } | ||
| 75 | |||
| 76 | static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, | ||
| 77 | int *mec, int *pipe, int *queue) | ||
| 78 | { | ||
| 79 | *queue = bit % adev->gfx.mec.num_queue_per_pipe; | ||
| 80 | *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) | ||
| 81 | % adev->gfx.mec.num_pipe_per_mec; | ||
| 82 | *mec = (bit / adev->gfx.mec.num_queue_per_pipe) | ||
| 83 | / adev->gfx.mec.num_pipe_per_mec; | ||
| 84 | |||
| 85 | } | ||
| 86 | static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, | ||
| 87 | int mec, int pipe, int queue) | ||
| 88 | { | ||
| 89 | return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), | ||
| 90 | adev->gfx.mec.queue_bitmap); | ||
| 91 | } | ||
| 92 | |||
| 33 | #endif | 93 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6e4ae0d983c2..f774b3f497d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
| @@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 121 | { | 121 | { |
| 122 | struct amdgpu_device *adev = ring->adev; | 122 | struct amdgpu_device *adev = ring->adev; |
| 123 | struct amdgpu_ib *ib = &ibs[0]; | 123 | struct amdgpu_ib *ib = &ibs[0]; |
| 124 | struct dma_fence *tmp = NULL; | ||
| 124 | bool skip_preamble, need_ctx_switch; | 125 | bool skip_preamble, need_ctx_switch; |
| 125 | unsigned patch_offset = ~0; | 126 | unsigned patch_offset = ~0; |
| 126 | struct amdgpu_vm *vm; | 127 | struct amdgpu_vm *vm; |
| @@ -160,8 +161,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 160 | dev_err(adev->dev, "scheduling IB failed (%d).\n", r); | 161 | dev_err(adev->dev, "scheduling IB failed (%d).\n", r); |
| 161 | return r; | 162 | return r; |
| 162 | } | 163 | } |
| 163 | if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) | 164 | |
| 165 | if (ring->funcs->emit_pipeline_sync && job && | ||
| 166 | ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || | ||
| 167 | amdgpu_vm_need_pipeline_sync(ring, job))) { | ||
| 164 | amdgpu_ring_emit_pipeline_sync(ring); | 168 | amdgpu_ring_emit_pipeline_sync(ring); |
| 169 | dma_fence_put(tmp); | ||
| 170 | } | ||
| 171 | |||
| 172 | if (ring->funcs->insert_start) | ||
| 173 | ring->funcs->insert_start(ring); | ||
| 165 | 174 | ||
| 166 | if (vm) { | 175 | if (vm) { |
| 167 | r = amdgpu_vm_flush(ring, job); | 176 | r = amdgpu_vm_flush(ring, job); |
| @@ -188,8 +197,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 188 | status |= AMDGPU_HAVE_CTX_SWITCH; | 197 | status |= AMDGPU_HAVE_CTX_SWITCH; |
| 189 | status |= job->preamble_status; | 198 | status |= job->preamble_status; |
| 190 | 199 | ||
| 191 | if (vm) | ||
| 192 | status |= AMDGPU_VM_DOMAIN; | ||
| 193 | amdgpu_ring_emit_cntxcntl(ring, status); | 200 | amdgpu_ring_emit_cntxcntl(ring, status); |
| 194 | } | 201 | } |
| 195 | 202 | ||
| @@ -208,6 +215,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 208 | need_ctx_switch = false; | 215 | need_ctx_switch = false; |
| 209 | } | 216 | } |
| 210 | 217 | ||
| 218 | if (ring->funcs->emit_tmz) | ||
| 219 | amdgpu_ring_emit_tmz(ring, false); | ||
| 220 | |||
| 211 | if (ring->funcs->emit_hdp_invalidate | 221 | if (ring->funcs->emit_hdp_invalidate |
| 212 | #ifdef CONFIG_X86_64 | 222 | #ifdef CONFIG_X86_64 |
| 213 | && !(adev->flags & AMD_IS_APU) | 223 | && !(adev->flags & AMD_IS_APU) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index a3da1a122fc8..3de8e74e5b3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | |||
| @@ -62,8 +62,9 @@ enum amdgpu_ih_clientid | |||
| 62 | AMDGPU_IH_CLIENTID_MP0 = 0x1e, | 62 | AMDGPU_IH_CLIENTID_MP0 = 0x1e, |
| 63 | AMDGPU_IH_CLIENTID_MP1 = 0x1f, | 63 | AMDGPU_IH_CLIENTID_MP1 = 0x1f, |
| 64 | 64 | ||
| 65 | AMDGPU_IH_CLIENTID_MAX | 65 | AMDGPU_IH_CLIENTID_MAX, |
| 66 | 66 | ||
| 67 | AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD | ||
| 67 | }; | 68 | }; |
| 68 | 69 | ||
| 69 | #define AMDGPU_IH_CLIENTID_LEGACY 0 | 70 | #define AMDGPU_IH_CLIENTID_LEGACY 0 |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index a6b7e367a860..62da6c5c6095 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | |||
| @@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) | |||
| 83 | struct amdgpu_device *adev = container_of(work, struct amdgpu_device, | 83 | struct amdgpu_device *adev = container_of(work, struct amdgpu_device, |
| 84 | reset_work); | 84 | reset_work); |
| 85 | 85 | ||
| 86 | amdgpu_gpu_reset(adev); | 86 | if (!amdgpu_sriov_vf(adev)) |
| 87 | amdgpu_gpu_reset(adev); | ||
| 87 | } | 88 | } |
| 88 | 89 | ||
| 89 | /* Disable *all* interrupts */ | 90 | /* Disable *all* interrupts */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 7570f2439a11..3d641e10e6b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
| @@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) | |||
| 36 | job->base.sched->name, | 36 | job->base.sched->name, |
| 37 | atomic_read(&job->ring->fence_drv.last_seq), | 37 | atomic_read(&job->ring->fence_drv.last_seq), |
| 38 | job->ring->fence_drv.sync_seq); | 38 | job->ring->fence_drv.sync_seq); |
| 39 | amdgpu_gpu_reset(job->adev); | 39 | |
| 40 | if (amdgpu_sriov_vf(job->adev)) | ||
| 41 | amdgpu_sriov_gpu_reset(job->adev, job); | ||
| 42 | else | ||
| 43 | amdgpu_gpu_reset(job->adev); | ||
| 40 | } | 44 | } |
| 41 | 45 | ||
| 42 | int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, | 46 | int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, |
| @@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, | |||
| 57 | (*job)->vm = vm; | 61 | (*job)->vm = vm; |
| 58 | (*job)->ibs = (void *)&(*job)[1]; | 62 | (*job)->ibs = (void *)&(*job)[1]; |
| 59 | (*job)->num_ibs = num_ibs; | 63 | (*job)->num_ibs = num_ibs; |
| 60 | (*job)->need_pipeline_sync = false; | ||
| 61 | 64 | ||
| 62 | amdgpu_sync_create(&(*job)->sync); | 65 | amdgpu_sync_create(&(*job)->sync); |
| 66 | amdgpu_sync_create(&(*job)->dep_sync); | ||
| 67 | amdgpu_sync_create(&(*job)->sched_sync); | ||
| 63 | 68 | ||
| 64 | return 0; | 69 | return 0; |
| 65 | } | 70 | } |
| @@ -98,6 +103,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) | |||
| 98 | 103 | ||
| 99 | dma_fence_put(job->fence); | 104 | dma_fence_put(job->fence); |
| 100 | amdgpu_sync_free(&job->sync); | 105 | amdgpu_sync_free(&job->sync); |
| 106 | amdgpu_sync_free(&job->dep_sync); | ||
| 107 | amdgpu_sync_free(&job->sched_sync); | ||
| 101 | kfree(job); | 108 | kfree(job); |
| 102 | } | 109 | } |
| 103 | 110 | ||
| @@ -107,6 +114,8 @@ void amdgpu_job_free(struct amdgpu_job *job) | |||
| 107 | 114 | ||
| 108 | dma_fence_put(job->fence); | 115 | dma_fence_put(job->fence); |
| 109 | amdgpu_sync_free(&job->sync); | 116 | amdgpu_sync_free(&job->sync); |
| 117 | amdgpu_sync_free(&job->dep_sync); | ||
| 118 | amdgpu_sync_free(&job->sched_sync); | ||
| 110 | kfree(job); | 119 | kfree(job); |
| 111 | } | 120 | } |
| 112 | 121 | ||
| @@ -138,11 +147,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) | |||
| 138 | struct amdgpu_job *job = to_amdgpu_job(sched_job); | 147 | struct amdgpu_job *job = to_amdgpu_job(sched_job); |
| 139 | struct amdgpu_vm *vm = job->vm; | 148 | struct amdgpu_vm *vm = job->vm; |
| 140 | 149 | ||
| 141 | struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); | 150 | struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); |
| 151 | int r; | ||
| 142 | 152 | ||
| 153 | if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { | ||
| 154 | r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); | ||
| 155 | if (r) | ||
| 156 | DRM_ERROR("Error adding fence to sync (%d)\n", r); | ||
| 157 | } | ||
| 158 | if (!fence) | ||
| 159 | fence = amdgpu_sync_get_fence(&job->sync); | ||
| 143 | while (fence == NULL && vm && !job->vm_id) { | 160 | while (fence == NULL && vm && !job->vm_id) { |
| 144 | struct amdgpu_ring *ring = job->ring; | 161 | struct amdgpu_ring *ring = job->ring; |
| 145 | int r; | ||
| 146 | 162 | ||
| 147 | r = amdgpu_vm_grab_id(vm, ring, &job->sync, | 163 | r = amdgpu_vm_grab_id(vm, ring, &job->sync, |
| 148 | &job->base.s_fence->finished, | 164 | &job->base.s_fence->finished, |
| @@ -153,9 +169,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) | |||
| 153 | fence = amdgpu_sync_get_fence(&job->sync); | 169 | fence = amdgpu_sync_get_fence(&job->sync); |
| 154 | } | 170 | } |
| 155 | 171 | ||
| 156 | if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) | ||
| 157 | job->need_pipeline_sync = true; | ||
| 158 | |||
| 159 | return fence; | 172 | return fence; |
| 160 | } | 173 | } |
| 161 | 174 | ||
| @@ -163,6 +176,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) | |||
| 163 | { | 176 | { |
| 164 | struct dma_fence *fence = NULL; | 177 | struct dma_fence *fence = NULL; |
| 165 | struct amdgpu_job *job; | 178 | struct amdgpu_job *job; |
| 179 | struct amdgpu_fpriv *fpriv = NULL; | ||
| 166 | int r; | 180 | int r; |
| 167 | 181 | ||
| 168 | if (!sched_job) { | 182 | if (!sched_job) { |
| @@ -174,10 +188,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) | |||
| 174 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); | 188 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); |
| 175 | 189 | ||
| 176 | trace_amdgpu_sched_run_job(job); | 190 | trace_amdgpu_sched_run_job(job); |
| 177 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); | 191 | if (job->vm) |
| 178 | if (r) | 192 | fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); |
| 179 | DRM_ERROR("Error scheduling IBs (%d)\n", r); | 193 | /* skip ib schedule when vram is lost */ |
| 180 | 194 | if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) | |
| 195 | DRM_ERROR("Skip scheduling IBs!\n"); | ||
| 196 | else { | ||
| 197 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); | ||
| 198 | if (r) | ||
| 199 | DRM_ERROR("Error scheduling IBs (%d)\n", r); | ||
| 200 | } | ||
| 181 | /* if gpu reset, hw fence will be replaced here */ | 201 | /* if gpu reset, hw fence will be replaced here */ |
| 182 | dma_fence_put(job->fence); | 202 | dma_fence_put(job->fence); |
| 183 | job->fence = dma_fence_get(fence); | 203 | job->fence = dma_fence_get(fence); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 96c341670782..b0b23101d1c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
| @@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) | |||
| 87 | struct amdgpu_device *adev; | 87 | struct amdgpu_device *adev; |
| 88 | int r, acpi_status; | 88 | int r, acpi_status; |
| 89 | 89 | ||
| 90 | #ifdef CONFIG_DRM_AMDGPU_SI | ||
| 91 | if (!amdgpu_si_support) { | ||
| 92 | switch (flags & AMD_ASIC_MASK) { | ||
| 93 | case CHIP_TAHITI: | ||
| 94 | case CHIP_PITCAIRN: | ||
| 95 | case CHIP_VERDE: | ||
| 96 | case CHIP_OLAND: | ||
| 97 | case CHIP_HAINAN: | ||
| 98 | dev_info(dev->dev, | ||
| 99 | "SI support provided by radeon.\n"); | ||
| 100 | dev_info(dev->dev, | ||
| 101 | "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" | ||
| 102 | ); | ||
| 103 | return -ENODEV; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | #endif | ||
| 107 | #ifdef CONFIG_DRM_AMDGPU_CIK | ||
| 108 | if (!amdgpu_cik_support) { | ||
| 109 | switch (flags & AMD_ASIC_MASK) { | ||
| 110 | case CHIP_KAVERI: | ||
| 111 | case CHIP_BONAIRE: | ||
| 112 | case CHIP_HAWAII: | ||
| 113 | case CHIP_KABINI: | ||
| 114 | case CHIP_MULLINS: | ||
| 115 | dev_info(dev->dev, | ||
| 116 | "CIK support provided by radeon.\n"); | ||
| 117 | dev_info(dev->dev, | ||
| 118 | "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" | ||
| 119 | ); | ||
| 120 | return -ENODEV; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | #endif | ||
| 124 | |||
| 90 | adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); | 125 | adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); |
| 91 | if (adev == NULL) { | 126 | if (adev == NULL) { |
| 92 | return -ENOMEM; | 127 | return -ENOMEM; |
| @@ -235,6 +270,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, | |||
| 235 | static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 270 | static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) |
| 236 | { | 271 | { |
| 237 | struct amdgpu_device *adev = dev->dev_private; | 272 | struct amdgpu_device *adev = dev->dev_private; |
| 273 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
| 238 | struct drm_amdgpu_info *info = data; | 274 | struct drm_amdgpu_info *info = data; |
| 239 | struct amdgpu_mode_info *minfo = &adev->mode_info; | 275 | struct amdgpu_mode_info *minfo = &adev->mode_info; |
| 240 | void __user *out = (void __user *)(uintptr_t)info->return_pointer; | 276 | void __user *out = (void __user *)(uintptr_t)info->return_pointer; |
| @@ -247,6 +283,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 247 | 283 | ||
| 248 | if (!info->return_size || !info->return_pointer) | 284 | if (!info->return_size || !info->return_pointer) |
| 249 | return -EINVAL; | 285 | return -EINVAL; |
| 286 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
| 287 | return -ENODEV; | ||
| 250 | 288 | ||
| 251 | switch (info->query) { | 289 | switch (info->query) { |
| 252 | case AMDGPU_INFO_ACCEL_WORKING: | 290 | case AMDGPU_INFO_ACCEL_WORKING: |
| @@ -319,6 +357,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 319 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; | 357 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; |
| 320 | ib_size_alignment = 1; | 358 | ib_size_alignment = 1; |
| 321 | break; | 359 | break; |
| 360 | case AMDGPU_HW_IP_VCN_DEC: | ||
| 361 | type = AMD_IP_BLOCK_TYPE_VCN; | ||
| 362 | ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; | ||
| 363 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; | ||
| 364 | ib_size_alignment = 16; | ||
| 365 | break; | ||
| 366 | case AMDGPU_HW_IP_VCN_ENC: | ||
| 367 | type = AMD_IP_BLOCK_TYPE_VCN; | ||
| 368 | for (i = 0; i < adev->vcn.num_enc_rings; i++) | ||
| 369 | ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); | ||
| 370 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; | ||
| 371 | ib_size_alignment = 1; | ||
| 372 | break; | ||
| 322 | default: | 373 | default: |
| 323 | return -EINVAL; | 374 | return -EINVAL; |
| 324 | } | 375 | } |
| @@ -361,6 +412,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 361 | case AMDGPU_HW_IP_UVD_ENC: | 412 | case AMDGPU_HW_IP_UVD_ENC: |
| 362 | type = AMD_IP_BLOCK_TYPE_UVD; | 413 | type = AMD_IP_BLOCK_TYPE_UVD; |
| 363 | break; | 414 | break; |
| 415 | case AMDGPU_HW_IP_VCN_DEC: | ||
| 416 | case AMDGPU_HW_IP_VCN_ENC: | ||
| 417 | type = AMD_IP_BLOCK_TYPE_VCN; | ||
| 418 | break; | ||
| 364 | default: | 419 | default: |
| 365 | return -EINVAL; | 420 | return -EINVAL; |
| 366 | } | 421 | } |
| @@ -397,6 +452,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 397 | case AMDGPU_INFO_NUM_EVICTIONS: | 452 | case AMDGPU_INFO_NUM_EVICTIONS: |
| 398 | ui64 = atomic64_read(&adev->num_evictions); | 453 | ui64 = atomic64_read(&adev->num_evictions); |
| 399 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 454 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
| 455 | case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS: | ||
| 456 | ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); | ||
| 457 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | ||
| 400 | case AMDGPU_INFO_VRAM_USAGE: | 458 | case AMDGPU_INFO_VRAM_USAGE: |
| 401 | ui64 = atomic64_read(&adev->vram_usage); | 459 | ui64 = atomic64_read(&adev->vram_usage); |
| 402 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 460 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
| @@ -536,6 +594,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 536 | dev_info.cu_active_number = adev->gfx.cu_info.number; | 594 | dev_info.cu_active_number = adev->gfx.cu_info.number; |
| 537 | dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; | 595 | dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; |
| 538 | dev_info.ce_ram_size = adev->gfx.ce_ram_size; | 596 | dev_info.ce_ram_size = adev->gfx.ce_ram_size; |
| 597 | memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], | ||
| 598 | sizeof(adev->gfx.cu_info.ao_cu_bitmap)); | ||
| 539 | memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], | 599 | memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], |
| 540 | sizeof(adev->gfx.cu_info.bitmap)); | 600 | sizeof(adev->gfx.cu_info.bitmap)); |
| 541 | dev_info.vram_type = adev->mc.vram_type; | 601 | dev_info.vram_type = adev->mc.vram_type; |
| @@ -730,6 +790,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) | |||
| 730 | vga_switcheroo_process_delayed_switch(); | 790 | vga_switcheroo_process_delayed_switch(); |
| 731 | } | 791 | } |
| 732 | 792 | ||
| 793 | bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, | ||
| 794 | struct amdgpu_fpriv *fpriv) | ||
| 795 | { | ||
| 796 | return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); | ||
| 797 | } | ||
| 798 | |||
| 733 | /** | 799 | /** |
| 734 | * amdgpu_driver_open_kms - drm callback for open | 800 | * amdgpu_driver_open_kms - drm callback for open |
| 735 | * | 801 | * |
| @@ -757,7 +823,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
| 757 | goto out_suspend; | 823 | goto out_suspend; |
| 758 | } | 824 | } |
| 759 | 825 | ||
| 760 | r = amdgpu_vm_init(adev, &fpriv->vm); | 826 | r = amdgpu_vm_init(adev, &fpriv->vm, |
| 827 | AMDGPU_VM_CONTEXT_GFX); | ||
| 761 | if (r) { | 828 | if (r) { |
| 762 | kfree(fpriv); | 829 | kfree(fpriv); |
| 763 | goto out_suspend; | 830 | goto out_suspend; |
| @@ -782,6 +849,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
| 782 | 849 | ||
| 783 | amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); | 850 | amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); |
| 784 | 851 | ||
| 852 | fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); | ||
| 785 | file_priv->driver_priv = fpriv; | 853 | file_priv->driver_priv = fpriv; |
| 786 | 854 | ||
| 787 | out_suspend: | 855 | out_suspend: |
| @@ -814,8 +882,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
| 814 | 882 | ||
| 815 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); | 883 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); |
| 816 | 884 | ||
| 817 | amdgpu_uvd_free_handles(adev, file_priv); | 885 | if (adev->asic_type != CHIP_RAVEN) { |
| 818 | amdgpu_vce_free_handles(adev, file_priv); | 886 | amdgpu_uvd_free_handles(adev, file_priv); |
| 887 | amdgpu_vce_free_handles(adev, file_priv); | ||
| 888 | } | ||
| 819 | 889 | ||
| 820 | amdgpu_vm_bo_rmv(adev, fpriv->prt_va); | 890 | amdgpu_vm_bo_rmv(adev, fpriv->prt_va); |
| 821 | 891 | ||
| @@ -945,50 +1015,10 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) | |||
| 945 | amdgpu_irq_put(adev, &adev->crtc_irq, idx); | 1015 | amdgpu_irq_put(adev, &adev->crtc_irq, idx); |
| 946 | } | 1016 | } |
| 947 | 1017 | ||
| 948 | /** | ||
| 949 | * amdgpu_get_vblank_timestamp_kms - get vblank timestamp | ||
| 950 | * | ||
| 951 | * @dev: drm dev pointer | ||
| 952 | * @crtc: crtc to get the timestamp for | ||
| 953 | * @max_error: max error | ||
| 954 | * @vblank_time: time value | ||
| 955 | * @flags: flags passed to the driver | ||
| 956 | * | ||
| 957 | * Gets the timestamp on the requested crtc based on the | ||
| 958 | * scanout position. (all asics). | ||
| 959 | * Returns postive status flags on success, negative error on failure. | ||
| 960 | */ | ||
| 961 | int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, | ||
| 962 | int *max_error, | ||
| 963 | struct timeval *vblank_time, | ||
| 964 | unsigned flags) | ||
| 965 | { | ||
| 966 | struct drm_crtc *crtc; | ||
| 967 | struct amdgpu_device *adev = dev->dev_private; | ||
| 968 | |||
| 969 | if (pipe >= dev->num_crtcs) { | ||
| 970 | DRM_ERROR("Invalid crtc %u\n", pipe); | ||
| 971 | return -EINVAL; | ||
| 972 | } | ||
| 973 | |||
| 974 | /* Get associated drm_crtc: */ | ||
| 975 | crtc = &adev->mode_info.crtcs[pipe]->base; | ||
| 976 | if (!crtc) { | ||
| 977 | /* This can occur on driver load if some component fails to | ||
| 978 | * initialize completely and driver is unloaded */ | ||
| 979 | DRM_ERROR("Uninitialized crtc %d\n", pipe); | ||
| 980 | return -EINVAL; | ||
| 981 | } | ||
| 982 | |||
| 983 | /* Helper routine in DRM core does all the work: */ | ||
| 984 | return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, | ||
| 985 | vblank_time, flags, | ||
| 986 | &crtc->hwmode); | ||
| 987 | } | ||
| 988 | |||
| 989 | const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { | 1018 | const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { |
| 990 | DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1019 | DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
| 991 | DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1020 | DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
| 1021 | DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | ||
| 992 | DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1022 | DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
| 993 | /* KMS */ | 1023 | /* KMS */ |
| 994 | DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1024 | DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index dbd10618ec20..43a9d3aec6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | |||
| @@ -534,6 +534,9 @@ struct amdgpu_framebuffer { | |||
| 534 | ((em) == ATOM_ENCODER_MODE_DP_MST)) | 534 | ((em) == ATOM_ENCODER_MODE_DP_MST)) |
| 535 | 535 | ||
| 536 | /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ | 536 | /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ |
| 537 | #define DRM_SCANOUTPOS_VALID (1 << 0) | ||
| 538 | #define DRM_SCANOUTPOS_IN_VBLANK (1 << 1) | ||
| 539 | #define DRM_SCANOUTPOS_ACCURATE (1 << 2) | ||
| 537 | #define USE_REAL_VBLANKSTART (1 << 30) | 540 | #define USE_REAL_VBLANKSTART (1 << 30) |
| 538 | #define GET_DISTANCE_TO_VBLANKSTART (1 << 31) | 541 | #define GET_DISTANCE_TO_VBLANKSTART (1 << 31) |
| 539 | 542 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 365883d7948d..8ee69652be8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
| @@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) | |||
| 960 | return -EINVAL; | 960 | return -EINVAL; |
| 961 | 961 | ||
| 962 | /* hurrah the memory is not visible ! */ | 962 | /* hurrah the memory is not visible ! */ |
| 963 | atomic64_inc(&adev->num_vram_cpu_page_faults); | ||
| 963 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); | 964 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); |
| 964 | lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; | 965 | lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; |
| 965 | for (i = 0; i < abo->placement.num_placement; i++) { | 966 | for (i = 0; i < abo->placement.num_placement; i++) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index f5ae871aa11c..b7e1c026c0c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | |||
| @@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle) | |||
| 72 | case CHIP_CARRIZO: | 72 | case CHIP_CARRIZO: |
| 73 | case CHIP_STONEY: | 73 | case CHIP_STONEY: |
| 74 | case CHIP_VEGA10: | 74 | case CHIP_VEGA10: |
| 75 | case CHIP_RAVEN: | ||
| 75 | adev->pp_enabled = true; | 76 | adev->pp_enabled = true; |
| 76 | if (amdgpu_create_pp_handle(adev)) | 77 | if (amdgpu_create_pp_handle(adev)) |
| 77 | return -EINVAL; | 78 | return -EINVAL; |
| @@ -187,6 +188,9 @@ static int amdgpu_pp_hw_fini(void *handle) | |||
| 187 | int ret = 0; | 188 | int ret = 0; |
| 188 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 189 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 189 | 190 | ||
| 191 | if (adev->pp_enabled && adev->pm.dpm_enabled) | ||
| 192 | amdgpu_pm_sysfs_fini(adev); | ||
| 193 | |||
| 190 | if (adev->powerplay.ip_funcs->hw_fini) | 194 | if (adev->powerplay.ip_funcs->hw_fini) |
| 191 | ret = adev->powerplay.ip_funcs->hw_fini( | 195 | ret = adev->powerplay.ip_funcs->hw_fini( |
| 192 | adev->powerplay.pp_handle); | 196 | adev->powerplay.pp_handle); |
| @@ -205,10 +209,9 @@ static void amdgpu_pp_late_fini(void *handle) | |||
| 205 | adev->powerplay.ip_funcs->late_fini( | 209 | adev->powerplay.ip_funcs->late_fini( |
| 206 | adev->powerplay.pp_handle); | 210 | adev->powerplay.pp_handle); |
| 207 | 211 | ||
| 208 | if (adev->pp_enabled && adev->pm.dpm_enabled) | ||
| 209 | amdgpu_pm_sysfs_fini(adev); | ||
| 210 | 212 | ||
| 211 | amd_powerplay_destroy(adev->powerplay.pp_handle); | 213 | if (adev->pp_enabled) |
| 214 | amd_powerplay_destroy(adev->powerplay.pp_handle); | ||
| 212 | } | 215 | } |
| 213 | 216 | ||
| 214 | static int amdgpu_pp_suspend(void *handle) | 217 | static int amdgpu_pp_suspend(void *handle) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index ac5e92e5d59d..4083be61b328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | |||
| @@ -24,12 +24,13 @@ | |||
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | #include <linux/firmware.h> | 26 | #include <linux/firmware.h> |
| 27 | #include "drmP.h" | 27 | #include <drm/drmP.h> |
| 28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
| 29 | #include "amdgpu_psp.h" | 29 | #include "amdgpu_psp.h" |
| 30 | #include "amdgpu_ucode.h" | 30 | #include "amdgpu_ucode.h" |
| 31 | #include "soc15_common.h" | 31 | #include "soc15_common.h" |
| 32 | #include "psp_v3_1.h" | 32 | #include "psp_v3_1.h" |
| 33 | #include "psp_v10_0.h" | ||
| 33 | 34 | ||
| 34 | static void psp_set_funcs(struct amdgpu_device *adev); | 35 | static void psp_set_funcs(struct amdgpu_device *adev); |
| 35 | 36 | ||
| @@ -61,6 +62,12 @@ static int psp_sw_init(void *handle) | |||
| 61 | psp->compare_sram_data = psp_v3_1_compare_sram_data; | 62 | psp->compare_sram_data = psp_v3_1_compare_sram_data; |
| 62 | psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; | 63 | psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; |
| 63 | break; | 64 | break; |
| 65 | case CHIP_RAVEN: | ||
| 66 | psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; | ||
| 67 | psp->ring_init = psp_v10_0_ring_init; | ||
| 68 | psp->cmd_submit = psp_v10_0_cmd_submit; | ||
| 69 | psp->compare_sram_data = psp_v10_0_compare_sram_data; | ||
| 70 | break; | ||
| 64 | default: | 71 | default: |
| 65 | return -EINVAL; | 72 | return -EINVAL; |
| 66 | } | 73 | } |
| @@ -145,8 +152,8 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, | |||
| 145 | uint64_t tmr_mc, uint32_t size) | 152 | uint64_t tmr_mc, uint32_t size) |
| 146 | { | 153 | { |
| 147 | cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; | 154 | cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; |
| 148 | cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc; | 155 | cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); |
| 149 | cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32); | 156 | cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); |
| 150 | cmd->cmd.cmd_setup_tmr.buf_size = size; | 157 | cmd->cmd.cmd_setup_tmr.buf_size = size; |
| 151 | } | 158 | } |
| 152 | 159 | ||
| @@ -230,6 +237,13 @@ static int psp_asd_load(struct psp_context *psp) | |||
| 230 | int ret; | 237 | int ret; |
| 231 | struct psp_gfx_cmd_resp *cmd; | 238 | struct psp_gfx_cmd_resp *cmd; |
| 232 | 239 | ||
| 240 | /* If PSP version doesn't match ASD version, asd loading will be failed. | ||
| 241 | * add workaround to bypass it for sriov now. | ||
| 242 | * TODO: add version check to make it common | ||
| 243 | */ | ||
| 244 | if (amdgpu_sriov_vf(psp->adev)) | ||
| 245 | return 0; | ||
| 246 | |||
| 233 | cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); | 247 | cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); |
| 234 | if (!cmd) | 248 | if (!cmd) |
| 235 | return -ENOMEM; | 249 | return -ENOMEM; |
| @@ -319,14 +333,11 @@ static int psp_load_fw(struct amdgpu_device *adev) | |||
| 319 | { | 333 | { |
| 320 | int ret; | 334 | int ret; |
| 321 | struct psp_context *psp = &adev->psp; | 335 | struct psp_context *psp = &adev->psp; |
| 322 | struct psp_gfx_cmd_resp *cmd; | ||
| 323 | 336 | ||
| 324 | cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); | 337 | psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); |
| 325 | if (!cmd) | 338 | if (!psp->cmd) |
| 326 | return -ENOMEM; | 339 | return -ENOMEM; |
| 327 | 340 | ||
| 328 | psp->cmd = cmd; | ||
| 329 | |||
| 330 | ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, | 341 | ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, |
| 331 | AMDGPU_GEM_DOMAIN_GTT, | 342 | AMDGPU_GEM_DOMAIN_GTT, |
| 332 | &psp->fw_pri_bo, | 343 | &psp->fw_pri_bo, |
| @@ -365,8 +376,6 @@ static int psp_load_fw(struct amdgpu_device *adev) | |||
| 365 | if (ret) | 376 | if (ret) |
| 366 | goto failed_mem; | 377 | goto failed_mem; |
| 367 | 378 | ||
| 368 | kfree(cmd); | ||
| 369 | |||
| 370 | return 0; | 379 | return 0; |
| 371 | 380 | ||
| 372 | failed_mem: | 381 | failed_mem: |
| @@ -376,7 +385,8 @@ failed_mem1: | |||
| 376 | amdgpu_bo_free_kernel(&psp->fw_pri_bo, | 385 | amdgpu_bo_free_kernel(&psp->fw_pri_bo, |
| 377 | &psp->fw_pri_mc_addr, &psp->fw_pri_buf); | 386 | &psp->fw_pri_mc_addr, &psp->fw_pri_buf); |
| 378 | failed: | 387 | failed: |
| 379 | kfree(cmd); | 388 | kfree(psp->cmd); |
| 389 | psp->cmd = NULL; | ||
| 380 | return ret; | 390 | return ret; |
| 381 | } | 391 | } |
| 382 | 392 | ||
| @@ -436,6 +446,9 @@ static int psp_hw_fini(void *handle) | |||
| 436 | amdgpu_bo_free_kernel(&psp->fence_buf_bo, | 446 | amdgpu_bo_free_kernel(&psp->fence_buf_bo, |
| 437 | &psp->fence_buf_mc_addr, &psp->fence_buf); | 447 | &psp->fence_buf_mc_addr, &psp->fence_buf); |
| 438 | 448 | ||
| 449 | kfree(psp->cmd); | ||
| 450 | psp->cmd = NULL; | ||
| 451 | |||
| 439 | return 0; | 452 | return 0; |
| 440 | } | 453 | } |
| 441 | 454 | ||
| @@ -542,3 +555,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block = | |||
| 542 | .rev = 0, | 555 | .rev = 0, |
| 543 | .funcs = &psp_ip_funcs, | 556 | .funcs = &psp_ip_funcs, |
| 544 | }; | 557 | }; |
| 558 | |||
| 559 | const struct amdgpu_ip_block_version psp_v10_0_ip_block = | ||
| 560 | { | ||
| 561 | .type = AMD_IP_BLOCK_TYPE_PSP, | ||
| 562 | .major = 10, | ||
| 563 | .minor = 0, | ||
| 564 | .rev = 0, | ||
| 565 | .funcs = &psp_ip_funcs, | ||
| 566 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 0301e4e0b297..1a1c8b469f93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | |||
| @@ -138,4 +138,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; | |||
| 138 | extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, | 138 | extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, |
| 139 | uint32_t field_val, uint32_t mask, bool check_changed); | 139 | uint32_t field_val, uint32_t mask, bool check_changed); |
| 140 | 140 | ||
| 141 | extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; | ||
| 142 | |||
| 141 | #endif | 143 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c new file mode 100644 index 000000000000..befc09b68543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | |||
| @@ -0,0 +1,299 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2017 Valve Corporation | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | * Authors: Andres Rodriguez | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include "amdgpu.h" | ||
| 26 | #include "amdgpu_ring.h" | ||
| 27 | |||
| 28 | static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper, | ||
| 29 | int hw_ip) | ||
| 30 | { | ||
| 31 | if (!mapper) | ||
| 32 | return -EINVAL; | ||
| 33 | |||
| 34 | if (hw_ip > AMDGPU_MAX_IP_NUM) | ||
| 35 | return -EINVAL; | ||
| 36 | |||
| 37 | mapper->hw_ip = hw_ip; | ||
| 38 | mutex_init(&mapper->lock); | ||
| 39 | |||
| 40 | memset(mapper->queue_map, 0, sizeof(mapper->queue_map)); | ||
| 41 | |||
| 42 | return 0; | ||
| 43 | } | ||
| 44 | |||
| 45 | static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper, | ||
| 46 | int ring) | ||
| 47 | { | ||
| 48 | return mapper->queue_map[ring]; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, | ||
| 52 | int ring, struct amdgpu_ring *pring) | ||
| 53 | { | ||
| 54 | if (WARN_ON(mapper->queue_map[ring])) { | ||
| 55 | DRM_ERROR("Un-expected ring re-map\n"); | ||
| 56 | return -EINVAL; | ||
| 57 | } | ||
| 58 | |||
| 59 | mapper->queue_map[ring] = pring; | ||
| 60 | |||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | static int amdgpu_identity_map(struct amdgpu_device *adev, | ||
| 65 | struct amdgpu_queue_mapper *mapper, | ||
| 66 | int ring, | ||
| 67 | struct amdgpu_ring **out_ring) | ||
| 68 | { | ||
| 69 | switch (mapper->hw_ip) { | ||
| 70 | case AMDGPU_HW_IP_GFX: | ||
| 71 | *out_ring = &adev->gfx.gfx_ring[ring]; | ||
| 72 | break; | ||
| 73 | case AMDGPU_HW_IP_COMPUTE: | ||
| 74 | *out_ring = &adev->gfx.compute_ring[ring]; | ||
| 75 | break; | ||
| 76 | case AMDGPU_HW_IP_DMA: | ||
| 77 | *out_ring = &adev->sdma.instance[ring].ring; | ||
| 78 | break; | ||
| 79 | case AMDGPU_HW_IP_UVD: | ||
| 80 | *out_ring = &adev->uvd.ring; | ||
| 81 | break; | ||
| 82 | case AMDGPU_HW_IP_VCE: | ||
| 83 | *out_ring = &adev->vce.ring[ring]; | ||
| 84 | break; | ||
| 85 | case AMDGPU_HW_IP_UVD_ENC: | ||
| 86 | *out_ring = &adev->uvd.ring_enc[ring]; | ||
| 87 | break; | ||
| 88 | case AMDGPU_HW_IP_VCN_DEC: | ||
| 89 | *out_ring = &adev->vcn.ring_dec; | ||
| 90 | break; | ||
| 91 | case AMDGPU_HW_IP_VCN_ENC: | ||
| 92 | *out_ring = &adev->vcn.ring_enc[ring]; | ||
| 93 | break; | ||
| 94 | default: | ||
| 95 | *out_ring = NULL; | ||
| 96 | DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); | ||
| 97 | return -EINVAL; | ||
| 98 | } | ||
| 99 | |||
| 100 | return amdgpu_update_cached_map(mapper, ring, *out_ring); | ||
| 101 | } | ||
| 102 | |||
| 103 | static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) | ||
| 104 | { | ||
| 105 | switch (hw_ip) { | ||
| 106 | case AMDGPU_HW_IP_GFX: | ||
| 107 | return AMDGPU_RING_TYPE_GFX; | ||
| 108 | case AMDGPU_HW_IP_COMPUTE: | ||
| 109 | return AMDGPU_RING_TYPE_COMPUTE; | ||
| 110 | case AMDGPU_HW_IP_DMA: | ||
| 111 | return AMDGPU_RING_TYPE_SDMA; | ||
| 112 | case AMDGPU_HW_IP_UVD: | ||
| 113 | return AMDGPU_RING_TYPE_UVD; | ||
| 114 | case AMDGPU_HW_IP_VCE: | ||
| 115 | return AMDGPU_RING_TYPE_VCE; | ||
| 116 | default: | ||
| 117 | DRM_ERROR("Invalid HW IP specified %d\n", hw_ip); | ||
| 118 | return -1; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static int amdgpu_lru_map(struct amdgpu_device *adev, | ||
| 123 | struct amdgpu_queue_mapper *mapper, | ||
| 124 | int user_ring, | ||
| 125 | struct amdgpu_ring **out_ring) | ||
| 126 | { | ||
| 127 | int r, i, j; | ||
| 128 | int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip); | ||
| 129 | int ring_blacklist[AMDGPU_MAX_RINGS]; | ||
| 130 | struct amdgpu_ring *ring; | ||
| 131 | |||
| 132 | /* 0 is a valid ring index, so initialize to -1 */ | ||
| 133 | memset(ring_blacklist, 0xff, sizeof(ring_blacklist)); | ||
| 134 | |||
| 135 | for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) { | ||
| 136 | ring = mapper->queue_map[i]; | ||
| 137 | if (ring) | ||
| 138 | ring_blacklist[j++] = ring->idx; | ||
| 139 | } | ||
| 140 | |||
| 141 | r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist, | ||
| 142 | j, out_ring); | ||
| 143 | if (r) | ||
| 144 | return r; | ||
| 145 | |||
| 146 | return amdgpu_update_cached_map(mapper, user_ring, *out_ring); | ||
| 147 | } | ||
| 148 | |||
| 149 | /** | ||
| 150 | * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct | ||
| 151 | * | ||
| 152 | * @adev: amdgpu_device pointer | ||
| 153 | * @mgr: amdgpu_queue_mgr structure holding queue information | ||
| 154 | * | ||
| 155 | * Initialize the the selected @mgr (all asics). | ||
| 156 | * | ||
| 157 | * Returns 0 on success, error on failure. | ||
| 158 | */ | ||
| 159 | int amdgpu_queue_mgr_init(struct amdgpu_device *adev, | ||
| 160 | struct amdgpu_queue_mgr *mgr) | ||
| 161 | { | ||
| 162 | int i, r; | ||
| 163 | |||
| 164 | if (!adev || !mgr) | ||
| 165 | return -EINVAL; | ||
| 166 | |||
| 167 | memset(mgr, 0, sizeof(*mgr)); | ||
| 168 | |||
| 169 | for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) { | ||
| 170 | r = amdgpu_queue_mapper_init(&mgr->mapper[i], i); | ||
| 171 | if (r) | ||
| 172 | return r; | ||
| 173 | } | ||
| 174 | |||
| 175 | return 0; | ||
| 176 | } | ||
| 177 | |||
| 178 | /** | ||
| 179 | * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct | ||
| 180 | * | ||
| 181 | * @adev: amdgpu_device pointer | ||
| 182 | * @mgr: amdgpu_queue_mgr structure holding queue information | ||
| 183 | * | ||
| 184 | * De-initialize the the selected @mgr (all asics). | ||
| 185 | * | ||
| 186 | * Returns 0 on success, error on failure. | ||
| 187 | */ | ||
| 188 | int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, | ||
| 189 | struct amdgpu_queue_mgr *mgr) | ||
| 190 | { | ||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring | ||
| 196 | * | ||
| 197 | * @adev: amdgpu_device pointer | ||
| 198 | * @mgr: amdgpu_queue_mgr structure holding queue information | ||
| 199 | * @hw_ip: HW IP enum | ||
| 200 | * @instance: HW instance | ||
| 201 | * @ring: user ring id | ||
| 202 | * @our_ring: pointer to mapped amdgpu_ring | ||
| 203 | * | ||
| 204 | * Map a userspace ring id to an appropriate kernel ring. Different | ||
| 205 | * policies are configurable at a HW IP level. | ||
| 206 | * | ||
| 207 | * Returns 0 on success, error on failure. | ||
| 208 | */ | ||
| 209 | int amdgpu_queue_mgr_map(struct amdgpu_device *adev, | ||
| 210 | struct amdgpu_queue_mgr *mgr, | ||
| 211 | int hw_ip, int instance, int ring, | ||
| 212 | struct amdgpu_ring **out_ring) | ||
| 213 | { | ||
| 214 | int r, ip_num_rings; | ||
| 215 | struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip]; | ||
| 216 | |||
| 217 | if (!adev || !mgr || !out_ring) | ||
| 218 | return -EINVAL; | ||
| 219 | |||
| 220 | if (hw_ip >= AMDGPU_MAX_IP_NUM) | ||
| 221 | return -EINVAL; | ||
| 222 | |||
| 223 | if (ring >= AMDGPU_MAX_RINGS) | ||
| 224 | return -EINVAL; | ||
| 225 | |||
| 226 | /* Right now all IPs have only one instance - multiple rings. */ | ||
| 227 | if (instance != 0) { | ||
| 228 | DRM_ERROR("invalid ip instance: %d\n", instance); | ||
| 229 | return -EINVAL; | ||
| 230 | } | ||
| 231 | |||
| 232 | switch (hw_ip) { | ||
| 233 | case AMDGPU_HW_IP_GFX: | ||
| 234 | ip_num_rings = adev->gfx.num_gfx_rings; | ||
| 235 | break; | ||
| 236 | case AMDGPU_HW_IP_COMPUTE: | ||
| 237 | ip_num_rings = adev->gfx.num_compute_rings; | ||
| 238 | break; | ||
| 239 | case AMDGPU_HW_IP_DMA: | ||
| 240 | ip_num_rings = adev->sdma.num_instances; | ||
| 241 | break; | ||
| 242 | case AMDGPU_HW_IP_UVD: | ||
| 243 | ip_num_rings = 1; | ||
| 244 | break; | ||
| 245 | case AMDGPU_HW_IP_VCE: | ||
| 246 | ip_num_rings = adev->vce.num_rings; | ||
| 247 | break; | ||
| 248 | case AMDGPU_HW_IP_UVD_ENC: | ||
| 249 | ip_num_rings = adev->uvd.num_enc_rings; | ||
| 250 | break; | ||
| 251 | case AMDGPU_HW_IP_VCN_DEC: | ||
| 252 | ip_num_rings = 1; | ||
| 253 | break; | ||
| 254 | case AMDGPU_HW_IP_VCN_ENC: | ||
| 255 | ip_num_rings = adev->vcn.num_enc_rings; | ||
| 256 | break; | ||
| 257 | default: | ||
| 258 | DRM_ERROR("unknown ip type: %d\n", hw_ip); | ||
| 259 | return -EINVAL; | ||
| 260 | } | ||
| 261 | |||
| 262 | if (ring >= ip_num_rings) { | ||
| 263 | DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n", | ||
| 264 | ring, ip_num_rings, hw_ip); | ||
| 265 | return -EINVAL; | ||
| 266 | } | ||
| 267 | |||
| 268 | mutex_lock(&mapper->lock); | ||
| 269 | |||
| 270 | *out_ring = amdgpu_get_cached_map(mapper, ring); | ||
| 271 | if (*out_ring) { | ||
| 272 | /* cache hit */ | ||
| 273 | r = 0; | ||
| 274 | goto out_unlock; | ||
| 275 | } | ||
| 276 | |||
| 277 | switch (mapper->hw_ip) { | ||
| 278 | case AMDGPU_HW_IP_GFX: | ||
| 279 | case AMDGPU_HW_IP_UVD: | ||
| 280 | case AMDGPU_HW_IP_VCE: | ||
| 281 | case AMDGPU_HW_IP_UVD_ENC: | ||
| 282 | case AMDGPU_HW_IP_VCN_DEC: | ||
| 283 | case AMDGPU_HW_IP_VCN_ENC: | ||
| 284 | r = amdgpu_identity_map(adev, mapper, ring, out_ring); | ||
| 285 | break; | ||
| 286 | case AMDGPU_HW_IP_DMA: | ||
| 287 | case AMDGPU_HW_IP_COMPUTE: | ||
| 288 | r = amdgpu_lru_map(adev, mapper, ring, out_ring); | ||
| 289 | break; | ||
| 290 | default: | ||
| 291 | *out_ring = NULL; | ||
| 292 | r = -EINVAL; | ||
| 293 | DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); | ||
| 294 | } | ||
| 295 | |||
| 296 | out_unlock: | ||
| 297 | mutex_unlock(&mapper->lock); | ||
| 298 | return r; | ||
| 299 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a85db0c0bc3..75165e07b1cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
| @@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) | |||
| 135 | 135 | ||
| 136 | if (ring->funcs->end_use) | 136 | if (ring->funcs->end_use) |
| 137 | ring->funcs->end_use(ring); | 137 | ring->funcs->end_use(ring); |
| 138 | |||
| 139 | amdgpu_ring_lru_touch(ring->adev, ring); | ||
| 138 | } | 140 | } |
| 139 | 141 | ||
| 140 | /** | 142 | /** |
| @@ -253,10 +255,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
| 253 | } | 255 | } |
| 254 | 256 | ||
| 255 | ring->max_dw = max_dw; | 257 | ring->max_dw = max_dw; |
| 258 | INIT_LIST_HEAD(&ring->lru_list); | ||
| 259 | amdgpu_ring_lru_touch(adev, ring); | ||
| 256 | 260 | ||
| 257 | if (amdgpu_debugfs_ring_init(adev, ring)) { | 261 | if (amdgpu_debugfs_ring_init(adev, ring)) { |
| 258 | DRM_ERROR("Failed to register debugfs file for rings !\n"); | 262 | DRM_ERROR("Failed to register debugfs file for rings !\n"); |
| 259 | } | 263 | } |
| 264 | |||
| 260 | return 0; | 265 | return 0; |
| 261 | } | 266 | } |
| 262 | 267 | ||
| @@ -294,6 +299,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) | |||
| 294 | ring->adev->rings[ring->idx] = NULL; | 299 | ring->adev->rings[ring->idx] = NULL; |
| 295 | } | 300 | } |
| 296 | 301 | ||
| 302 | static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev, | ||
| 303 | struct amdgpu_ring *ring) | ||
| 304 | { | ||
| 305 | /* list_move_tail handles the case where ring isn't part of the list */ | ||
| 306 | list_move_tail(&ring->lru_list, &adev->ring_lru_list); | ||
| 307 | } | ||
| 308 | |||
| 309 | static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, | ||
| 310 | int *blacklist, int num_blacklist) | ||
| 311 | { | ||
| 312 | int i; | ||
| 313 | |||
| 314 | for (i = 0; i < num_blacklist; i++) { | ||
| 315 | if (ring->idx == blacklist[i]) | ||
| 316 | return true; | ||
| 317 | } | ||
| 318 | |||
| 319 | return false; | ||
| 320 | } | ||
| 321 | |||
| 322 | /** | ||
| 323 | * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block | ||
| 324 | * | ||
| 325 | * @adev: amdgpu_device pointer | ||
| 326 | * @type: amdgpu_ring_type enum | ||
| 327 | * @blacklist: blacklisted ring ids array | ||
| 328 | * @num_blacklist: number of entries in @blacklist | ||
| 329 | * @ring: output ring | ||
| 330 | * | ||
| 331 | * Retrieve the amdgpu_ring structure for the least recently used ring of | ||
| 332 | * a specific IP block (all asics). | ||
| 333 | * Returns 0 on success, error on failure. | ||
| 334 | */ | ||
| 335 | int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, | ||
| 336 | int num_blacklist, struct amdgpu_ring **ring) | ||
| 337 | { | ||
| 338 | struct amdgpu_ring *entry; | ||
| 339 | |||
| 340 | /* List is sorted in LRU order, find first entry corresponding | ||
| 341 | * to the desired HW IP */ | ||
| 342 | *ring = NULL; | ||
| 343 | spin_lock(&adev->ring_lru_list_lock); | ||
| 344 | list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { | ||
| 345 | if (entry->funcs->type != type) | ||
| 346 | continue; | ||
| 347 | |||
| 348 | if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) | ||
| 349 | continue; | ||
| 350 | |||
| 351 | *ring = entry; | ||
| 352 | amdgpu_ring_lru_touch_locked(adev, *ring); | ||
| 353 | break; | ||
| 354 | } | ||
| 355 | spin_unlock(&adev->ring_lru_list_lock); | ||
| 356 | |||
| 357 | if (!*ring) { | ||
| 358 | DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); | ||
| 359 | return -EINVAL; | ||
| 360 | } | ||
| 361 | |||
| 362 | return 0; | ||
| 363 | } | ||
| 364 | |||
| 365 | /** | ||
| 366 | * amdgpu_ring_lru_touch - mark a ring as recently being used | ||
| 367 | * | ||
| 368 | * @adev: amdgpu_device pointer | ||
| 369 | * @ring: ring to touch | ||
| 370 | * | ||
| 371 | * Move @ring to the tail of the lru list | ||
| 372 | */ | ||
| 373 | void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) | ||
| 374 | { | ||
| 375 | spin_lock(&adev->ring_lru_list_lock); | ||
| 376 | amdgpu_ring_lru_touch_locked(adev, ring); | ||
| 377 | spin_unlock(&adev->ring_lru_list_lock); | ||
| 378 | } | ||
| 379 | |||
| 297 | /* | 380 | /* |
| 298 | * Debugfs info | 381 | * Debugfs info |
| 299 | */ | 382 | */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 944443c5b90a..bc8dec992f73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
| @@ -47,7 +47,9 @@ enum amdgpu_ring_type { | |||
| 47 | AMDGPU_RING_TYPE_UVD, | 47 | AMDGPU_RING_TYPE_UVD, |
| 48 | AMDGPU_RING_TYPE_VCE, | 48 | AMDGPU_RING_TYPE_VCE, |
| 49 | AMDGPU_RING_TYPE_KIQ, | 49 | AMDGPU_RING_TYPE_KIQ, |
| 50 | AMDGPU_RING_TYPE_UVD_ENC | 50 | AMDGPU_RING_TYPE_UVD_ENC, |
| 51 | AMDGPU_RING_TYPE_VCN_DEC, | ||
| 52 | AMDGPU_RING_TYPE_VCN_ENC | ||
| 51 | }; | 53 | }; |
| 52 | 54 | ||
| 53 | struct amdgpu_device; | 55 | struct amdgpu_device; |
| @@ -76,6 +78,7 @@ struct amdgpu_fence_driver { | |||
| 76 | int amdgpu_fence_driver_init(struct amdgpu_device *adev); | 78 | int amdgpu_fence_driver_init(struct amdgpu_device *adev); |
| 77 | void amdgpu_fence_driver_fini(struct amdgpu_device *adev); | 79 | void amdgpu_fence_driver_fini(struct amdgpu_device *adev); |
| 78 | void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); | 80 | void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); |
| 81 | void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring); | ||
| 79 | 82 | ||
| 80 | int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, | 83 | int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, |
| 81 | unsigned num_hw_submission); | 84 | unsigned num_hw_submission); |
| @@ -130,6 +133,7 @@ struct amdgpu_ring_funcs { | |||
| 130 | int (*test_ib)(struct amdgpu_ring *ring, long timeout); | 133 | int (*test_ib)(struct amdgpu_ring *ring, long timeout); |
| 131 | /* insert NOP packets */ | 134 | /* insert NOP packets */ |
| 132 | void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); | 135 | void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); |
| 136 | void (*insert_start)(struct amdgpu_ring *ring); | ||
| 133 | void (*insert_end)(struct amdgpu_ring *ring); | 137 | void (*insert_end)(struct amdgpu_ring *ring); |
| 134 | /* pad the indirect buffer to the necessary number of dw */ | 138 | /* pad the indirect buffer to the necessary number of dw */ |
| 135 | void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); | 139 | void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); |
| @@ -142,6 +146,7 @@ struct amdgpu_ring_funcs { | |||
| 142 | void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); | 146 | void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); |
| 143 | void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); | 147 | void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); |
| 144 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); | 148 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); |
| 149 | void (*emit_tmz)(struct amdgpu_ring *ring, bool start); | ||
| 145 | }; | 150 | }; |
| 146 | 151 | ||
| 147 | struct amdgpu_ring { | 152 | struct amdgpu_ring { |
| @@ -149,6 +154,7 @@ struct amdgpu_ring { | |||
| 149 | const struct amdgpu_ring_funcs *funcs; | 154 | const struct amdgpu_ring_funcs *funcs; |
| 150 | struct amdgpu_fence_driver fence_drv; | 155 | struct amdgpu_fence_driver fence_drv; |
| 151 | struct amd_gpu_scheduler sched; | 156 | struct amd_gpu_scheduler sched; |
| 157 | struct list_head lru_list; | ||
| 152 | 158 | ||
| 153 | struct amdgpu_bo *ring_obj; | 159 | struct amdgpu_bo *ring_obj; |
| 154 | volatile uint32_t *ring; | 160 | volatile uint32_t *ring; |
| @@ -180,6 +186,7 @@ struct amdgpu_ring { | |||
| 180 | u64 cond_exe_gpu_addr; | 186 | u64 cond_exe_gpu_addr; |
| 181 | volatile u32 *cond_exe_cpu_addr; | 187 | volatile u32 *cond_exe_cpu_addr; |
| 182 | unsigned vm_inv_eng; | 188 | unsigned vm_inv_eng; |
| 189 | bool has_compute_vm_bug; | ||
| 183 | #if defined(CONFIG_DEBUG_FS) | 190 | #if defined(CONFIG_DEBUG_FS) |
| 184 | struct dentry *ent; | 191 | struct dentry *ent; |
| 185 | #endif | 192 | #endif |
| @@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
| 194 | unsigned ring_size, struct amdgpu_irq_src *irq_src, | 201 | unsigned ring_size, struct amdgpu_irq_src *irq_src, |
| 195 | unsigned irq_type); | 202 | unsigned irq_type); |
| 196 | void amdgpu_ring_fini(struct amdgpu_ring *ring); | 203 | void amdgpu_ring_fini(struct amdgpu_ring *ring); |
| 204 | int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, | ||
| 205 | int num_blacklist, struct amdgpu_ring **ring); | ||
| 206 | void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); | ||
| 197 | static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) | 207 | static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) |
| 198 | { | 208 | { |
| 199 | int i = 0; | 209 | int i = 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index ed814e6d0207..a6899180b265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
| @@ -298,6 +298,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) | |||
| 298 | return NULL; | 298 | return NULL; |
| 299 | } | 299 | } |
| 300 | 300 | ||
| 301 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) | ||
| 302 | { | ||
| 303 | struct amdgpu_sync_entry *e; | ||
| 304 | struct hlist_node *tmp; | ||
| 305 | int i, r; | ||
| 306 | |||
| 307 | hash_for_each_safe(sync->fences, i, tmp, e, node) { | ||
| 308 | r = dma_fence_wait(e->fence, intr); | ||
| 309 | if (r) | ||
| 310 | return r; | ||
| 311 | |||
| 312 | hash_del(&e->node); | ||
| 313 | dma_fence_put(e->fence); | ||
| 314 | kmem_cache_free(amdgpu_sync_slab, e); | ||
| 315 | } | ||
| 316 | |||
| 317 | return 0; | ||
| 318 | } | ||
| 319 | |||
| 301 | /** | 320 | /** |
| 302 | * amdgpu_sync_free - free the sync object | 321 | * amdgpu_sync_free - free the sync object |
| 303 | * | 322 | * |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 605be266e07f..dc7687993317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | |||
| @@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
| 49 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | 49 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
| 50 | struct amdgpu_ring *ring); | 50 | struct amdgpu_ring *ring); |
| 51 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); | 51 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); |
| 52 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); | ||
| 52 | void amdgpu_sync_free(struct amdgpu_sync *sync); | 53 | void amdgpu_sync_free(struct amdgpu_sync *sync); |
| 53 | int amdgpu_sync_init(void); | 54 | int amdgpu_sync_init(void); |
| 54 | void amdgpu_sync_fini(void); | 55 | void amdgpu_sync_fini(void); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db0230e45c6..c9b131b13ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
| @@ -29,11 +29,11 @@ | |||
| 29 | * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> | 29 | * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> |
| 30 | * Dave Airlie | 30 | * Dave Airlie |
| 31 | */ | 31 | */ |
| 32 | #include <ttm/ttm_bo_api.h> | 32 | #include <drm/ttm/ttm_bo_api.h> |
| 33 | #include <ttm/ttm_bo_driver.h> | 33 | #include <drm/ttm/ttm_bo_driver.h> |
| 34 | #include <ttm/ttm_placement.h> | 34 | #include <drm/ttm/ttm_placement.h> |
| 35 | #include <ttm/ttm_module.h> | 35 | #include <drm/ttm/ttm_module.h> |
| 36 | #include <ttm/ttm_page_alloc.h> | 36 | #include <drm/ttm/ttm_page_alloc.h> |
| 37 | #include <drm/drmP.h> | 37 | #include <drm/drmP.h> |
| 38 | #include <drm/amdgpu_drm.h> | 38 | #include <drm/amdgpu_drm.h> |
| 39 | #include <linux/seq_file.h> | 39 | #include <linux/seq_file.h> |
| @@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) | |||
| 745 | return r; | 745 | return r; |
| 746 | } | 746 | } |
| 747 | 747 | ||
| 748 | spin_lock(>t->adev->gtt_list_lock); | ||
| 748 | flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); | 749 | flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); |
| 749 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; | 750 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; |
| 750 | r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, | 751 | r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, |
| @@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) | |||
| 753 | if (r) { | 754 | if (r) { |
| 754 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", | 755 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", |
| 755 | ttm->num_pages, gtt->offset); | 756 | ttm->num_pages, gtt->offset); |
| 756 | return r; | 757 | goto error_gart_bind; |
| 757 | } | 758 | } |
| 758 | spin_lock(>t->adev->gtt_list_lock); | 759 | |
| 759 | list_add_tail(>t->list, >t->adev->gtt_list); | 760 | list_add_tail(>t->list, >t->adev->gtt_list); |
| 761 | error_gart_bind: | ||
| 760 | spin_unlock(>t->adev->gtt_list_lock); | 762 | spin_unlock(>t->adev->gtt_list_lock); |
| 761 | return 0; | 763 | return r; |
| 762 | } | 764 | } |
| 763 | 765 | ||
| 764 | int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) | 766 | int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) |
| @@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) | |||
| 789 | static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) | 791 | static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) |
| 790 | { | 792 | { |
| 791 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 793 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| 794 | int r; | ||
| 792 | 795 | ||
| 793 | if (gtt->userptr) | 796 | if (gtt->userptr) |
| 794 | amdgpu_ttm_tt_unpin_userptr(ttm); | 797 | amdgpu_ttm_tt_unpin_userptr(ttm); |
| @@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) | |||
| 797 | return 0; | 800 | return 0; |
| 798 | 801 | ||
| 799 | /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ | 802 | /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ |
| 800 | if (gtt->adev->gart.ready) | ||
| 801 | amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); | ||
| 802 | |||
| 803 | spin_lock(>t->adev->gtt_list_lock); | 803 | spin_lock(>t->adev->gtt_list_lock); |
| 804 | r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); | ||
| 805 | if (r) { | ||
| 806 | DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", | ||
| 807 | gtt->ttm.ttm.num_pages, gtt->offset); | ||
| 808 | goto error_unbind; | ||
| 809 | } | ||
| 804 | list_del_init(>t->list); | 810 | list_del_init(>t->list); |
| 811 | error_unbind: | ||
| 805 | spin_unlock(>t->adev->gtt_list_lock); | 812 | spin_unlock(>t->adev->gtt_list_lock); |
| 806 | 813 | return r; | |
| 807 | return 0; | ||
| 808 | } | 814 | } |
| 809 | 815 | ||
| 810 | static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) | 816 | static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) |
| @@ -1115,7 +1121,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1115 | /* Change the size here instead of the init above so only lpfn is affected */ | 1121 | /* Change the size here instead of the init above so only lpfn is affected */ |
| 1116 | amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); | 1122 | amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); |
| 1117 | 1123 | ||
| 1118 | r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, | 1124 | r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true, |
| 1119 | AMDGPU_GEM_DOMAIN_VRAM, | 1125 | AMDGPU_GEM_DOMAIN_VRAM, |
| 1120 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | 1126 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
| 1121 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | 1127 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, |
| @@ -1462,6 +1468,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, | |||
| 1462 | if (size & 0x3 || *pos & 0x3) | 1468 | if (size & 0x3 || *pos & 0x3) |
| 1463 | return -EINVAL; | 1469 | return -EINVAL; |
| 1464 | 1470 | ||
| 1471 | if (*pos >= adev->mc.mc_vram_size) | ||
| 1472 | return -ENXIO; | ||
| 1473 | |||
| 1465 | while (size) { | 1474 | while (size) { |
| 1466 | unsigned long flags; | 1475 | unsigned long flags; |
| 1467 | uint32_t value; | 1476 | uint32_t value; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index dfd1c98efa7c..4f50eeb65855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | |||
| @@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) | |||
| 197 | } | 197 | } |
| 198 | } | 198 | } |
| 199 | 199 | ||
| 200 | void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr) | ||
| 201 | { | ||
| 202 | uint16_t version_major = le16_to_cpu(hdr->header_version_major); | ||
| 203 | uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); | ||
| 204 | |||
| 205 | DRM_DEBUG("GPU_INFO\n"); | ||
| 206 | amdgpu_ucode_print_common_hdr(hdr); | ||
| 207 | |||
| 208 | if (version_major == 1) { | ||
| 209 | const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr = | ||
| 210 | container_of(hdr, struct gpu_info_firmware_header_v1_0, header); | ||
| 211 | |||
| 212 | DRM_DEBUG("version_major: %u\n", | ||
| 213 | le16_to_cpu(gpu_info_hdr->version_major)); | ||
| 214 | DRM_DEBUG("version_minor: %u\n", | ||
| 215 | le16_to_cpu(gpu_info_hdr->version_minor)); | ||
| 216 | } else { | ||
| 217 | DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 200 | int amdgpu_ucode_validate(const struct firmware *fw) | 221 | int amdgpu_ucode_validate(const struct firmware *fw) |
| 201 | { | 222 | { |
| 202 | const struct common_firmware_header *hdr = | 223 | const struct common_firmware_header *hdr = |
| @@ -253,6 +274,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) | |||
| 253 | return AMDGPU_FW_LOAD_DIRECT; | 274 | return AMDGPU_FW_LOAD_DIRECT; |
| 254 | else | 275 | else |
| 255 | return AMDGPU_FW_LOAD_PSP; | 276 | return AMDGPU_FW_LOAD_PSP; |
| 277 | case CHIP_RAVEN: | ||
| 278 | #if 0 | ||
| 279 | if (!load_type) | ||
| 280 | return AMDGPU_FW_LOAD_DIRECT; | ||
| 281 | else | ||
| 282 | return AMDGPU_FW_LOAD_PSP; | ||
| 283 | #else | ||
| 284 | return AMDGPU_FW_LOAD_DIRECT; | ||
| 285 | #endif | ||
| 256 | default: | 286 | default: |
| 257 | DRM_ERROR("Unknow firmware load type\n"); | 287 | DRM_ERROR("Unknow firmware load type\n"); |
| 258 | } | 288 | } |
| @@ -349,7 +379,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) | |||
| 349 | 379 | ||
| 350 | err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, | 380 | err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, |
| 351 | amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, | 381 | amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, |
| 352 | 0, NULL, NULL, bo); | 382 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, |
| 383 | NULL, NULL, bo); | ||
| 353 | if (err) { | 384 | if (err) { |
| 354 | dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); | 385 | dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); |
| 355 | goto failed; | 386 | goto failed; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 758f03a1770d..30b5500dc152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | |||
| @@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 { | |||
| 113 | uint32_t digest_size; | 113 | uint32_t digest_size; |
| 114 | }; | 114 | }; |
| 115 | 115 | ||
| 116 | /* gpu info payload */ | ||
| 117 | struct gpu_info_firmware_v1_0 { | ||
| 118 | uint32_t gc_num_se; | ||
| 119 | uint32_t gc_num_cu_per_sh; | ||
| 120 | uint32_t gc_num_sh_per_se; | ||
| 121 | uint32_t gc_num_rb_per_se; | ||
| 122 | uint32_t gc_num_tccs; | ||
| 123 | uint32_t gc_num_gprs; | ||
| 124 | uint32_t gc_num_max_gs_thds; | ||
| 125 | uint32_t gc_gs_table_depth; | ||
| 126 | uint32_t gc_gsprim_buff_depth; | ||
| 127 | uint32_t gc_parameter_cache_depth; | ||
| 128 | uint32_t gc_double_offchip_lds_buffer; | ||
| 129 | uint32_t gc_wave_size; | ||
| 130 | uint32_t gc_max_waves_per_simd; | ||
| 131 | uint32_t gc_max_scratch_slots_per_cu; | ||
| 132 | uint32_t gc_lds_size; | ||
| 133 | }; | ||
| 134 | |||
| 135 | /* version_major=1, version_minor=0 */ | ||
| 136 | struct gpu_info_firmware_header_v1_0 { | ||
| 137 | struct common_firmware_header header; | ||
| 138 | uint16_t version_major; /* version */ | ||
| 139 | uint16_t version_minor; /* version */ | ||
| 140 | }; | ||
| 141 | |||
| 116 | /* header is fixed size */ | 142 | /* header is fixed size */ |
| 117 | union amdgpu_firmware_header { | 143 | union amdgpu_firmware_header { |
| 118 | struct common_firmware_header common; | 144 | struct common_firmware_header common; |
| @@ -124,6 +150,7 @@ union amdgpu_firmware_header { | |||
| 124 | struct rlc_firmware_header_v2_0 rlc_v2_0; | 150 | struct rlc_firmware_header_v2_0 rlc_v2_0; |
| 125 | struct sdma_firmware_header_v1_0 sdma; | 151 | struct sdma_firmware_header_v1_0 sdma; |
| 126 | struct sdma_firmware_header_v1_1 sdma_v1_1; | 152 | struct sdma_firmware_header_v1_1 sdma_v1_1; |
| 153 | struct gpu_info_firmware_header_v1_0 gpu_info; | ||
| 127 | uint8_t raw[0x100]; | 154 | uint8_t raw[0x100]; |
| 128 | }; | 155 | }; |
| 129 | 156 | ||
| @@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); | |||
| 184 | void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); | 211 | void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); |
| 185 | void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); | 212 | void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); |
| 186 | void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); | 213 | void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); |
| 214 | void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); | ||
| 187 | int amdgpu_ucode_validate(const struct firmware *fw); | 215 | int amdgpu_ucode_validate(const struct firmware *fw); |
| 188 | bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, | 216 | bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, |
| 189 | uint16_t hdr_major, uint16_t hdr_minor); | 217 | uint16_t hdr_major, uint16_t hdr_minor); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 735c38d7db0d..b692ad402252 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | |||
| @@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) | |||
| 165 | adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | | 165 | adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | |
| 166 | (binary_id << 8)); | 166 | (binary_id << 8)); |
| 167 | 167 | ||
| 168 | /* allocate firmware, stack and heap BO */ | 168 | r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, |
| 169 | 169 | AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo, | |
| 170 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, | 170 | &adev->vce.gpu_addr, &adev->vce.cpu_addr); |
| 171 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 172 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 173 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
| 174 | NULL, NULL, &adev->vce.vcpu_bo); | ||
| 175 | if (r) { | 171 | if (r) { |
| 176 | dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); | 172 | dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); |
| 177 | return r; | 173 | return r; |
| 178 | } | 174 | } |
| 179 | 175 | ||
| 180 | r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); | ||
| 181 | if (r) { | ||
| 182 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | ||
| 183 | dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); | ||
| 184 | return r; | ||
| 185 | } | ||
| 186 | |||
| 187 | r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, | ||
| 188 | &adev->vce.gpu_addr); | ||
| 189 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | ||
| 190 | if (r) { | ||
| 191 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | ||
| 192 | dev_err(adev->dev, "(%d) VCE bo pin failed\n", r); | ||
| 193 | return r; | ||
| 194 | } | ||
| 195 | |||
| 196 | |||
| 197 | ring = &adev->vce.ring[0]; | 176 | ring = &adev->vce.ring[0]; |
| 198 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | 177 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; |
| 199 | r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, | 178 | r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, |
| @@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) | |||
| 230 | 209 | ||
| 231 | amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); | 210 | amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); |
| 232 | 211 | ||
| 233 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | 212 | amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, |
| 213 | (void **)&adev->vce.cpu_addr); | ||
| 234 | 214 | ||
| 235 | for (i = 0; i < adev->vce.num_rings; i++) | 215 | for (i = 0; i < adev->vce.num_rings; i++) |
| 236 | amdgpu_ring_fini(&adev->vce.ring[i]); | 216 | amdgpu_ring_fini(&adev->vce.ring[i]); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 0a7f18c461e4..5ce54cde472d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | |||
| @@ -33,6 +33,8 @@ | |||
| 33 | struct amdgpu_vce { | 33 | struct amdgpu_vce { |
| 34 | struct amdgpu_bo *vcpu_bo; | 34 | struct amdgpu_bo *vcpu_bo; |
| 35 | uint64_t gpu_addr; | 35 | uint64_t gpu_addr; |
| 36 | void *cpu_addr; | ||
| 37 | void *saved_bo; | ||
| 36 | unsigned fw_version; | 38 | unsigned fw_version; |
| 37 | unsigned fb_version; | 39 | unsigned fb_version; |
| 38 | atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; | 40 | atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c new file mode 100644 index 000000000000..09190fadd228 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | |||
| @@ -0,0 +1,654 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 6 | * copy of this software and associated documentation files (the | ||
| 7 | * "Software"), to deal in the Software without restriction, including | ||
| 8 | * without limitation the rights to use, copy, modify, merge, publish, | ||
| 9 | * distribute, sub license, and/or sell copies of the Software, and to | ||
| 10 | * permit persons to whom the Software is furnished to do so, subject to | ||
| 11 | * the following conditions: | ||
| 12 | * | ||
| 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
| 16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
| 17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
| 18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
| 19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 20 | * | ||
| 21 | * The above copyright notice and this permission notice (including the | ||
| 22 | * next paragraph) shall be included in all copies or substantial portions | ||
| 23 | * of the Software. | ||
| 24 | * | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/firmware.h> | ||
| 28 | #include <linux/module.h> | ||
| 29 | #include <drm/drmP.h> | ||
| 30 | #include <drm/drm.h> | ||
| 31 | |||
| 32 | #include "amdgpu.h" | ||
| 33 | #include "amdgpu_pm.h" | ||
| 34 | #include "amdgpu_vcn.h" | ||
| 35 | #include "soc15d.h" | ||
| 36 | #include "soc15_common.h" | ||
| 37 | |||
| 38 | #include "vega10/soc15ip.h" | ||
| 39 | #include "raven1/VCN/vcn_1_0_offset.h" | ||
| 40 | |||
| 41 | /* 1 second timeout */ | ||
| 42 | #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) | ||
| 43 | |||
| 44 | /* Firmware Names */ | ||
| 45 | #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" | ||
| 46 | |||
| 47 | MODULE_FIRMWARE(FIRMWARE_RAVEN); | ||
| 48 | |||
| 49 | static void amdgpu_vcn_idle_work_handler(struct work_struct *work); | ||
| 50 | |||
| 51 | int amdgpu_vcn_sw_init(struct amdgpu_device *adev) | ||
| 52 | { | ||
| 53 | struct amdgpu_ring *ring; | ||
| 54 | struct amd_sched_rq *rq; | ||
| 55 | unsigned long bo_size; | ||
| 56 | const char *fw_name; | ||
| 57 | const struct common_firmware_header *hdr; | ||
| 58 | unsigned version_major, version_minor, family_id; | ||
| 59 | int r; | ||
| 60 | |||
| 61 | INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); | ||
| 62 | |||
| 63 | switch (adev->asic_type) { | ||
| 64 | case CHIP_RAVEN: | ||
| 65 | fw_name = FIRMWARE_RAVEN; | ||
| 66 | break; | ||
| 67 | default: | ||
| 68 | return -EINVAL; | ||
| 69 | } | ||
| 70 | |||
| 71 | r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); | ||
| 72 | if (r) { | ||
| 73 | dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", | ||
| 74 | fw_name); | ||
| 75 | return r; | ||
| 76 | } | ||
| 77 | |||
| 78 | r = amdgpu_ucode_validate(adev->vcn.fw); | ||
| 79 | if (r) { | ||
| 80 | dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", | ||
| 81 | fw_name); | ||
| 82 | release_firmware(adev->vcn.fw); | ||
| 83 | adev->vcn.fw = NULL; | ||
| 84 | return r; | ||
| 85 | } | ||
| 86 | |||
| 87 | hdr = (const struct common_firmware_header *)adev->vcn.fw->data; | ||
| 88 | family_id = le32_to_cpu(hdr->ucode_version) & 0xff; | ||
| 89 | version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; | ||
| 90 | version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; | ||
| 91 | DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", | ||
| 92 | version_major, version_minor, family_id); | ||
| 93 | |||
| 94 | |||
| 95 | bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) | ||
| 96 | + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE | ||
| 97 | + AMDGPU_VCN_SESSION_SIZE * 40; | ||
| 98 | r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, | ||
| 99 | AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, | ||
| 100 | &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); | ||
| 101 | if (r) { | ||
| 102 | dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); | ||
| 103 | return r; | ||
| 104 | } | ||
| 105 | |||
| 106 | ring = &adev->vcn.ring_dec; | ||
| 107 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | ||
| 108 | r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, | ||
| 109 | rq, amdgpu_sched_jobs); | ||
| 110 | if (r != 0) { | ||
| 111 | DRM_ERROR("Failed setting up VCN dec run queue.\n"); | ||
| 112 | return r; | ||
| 113 | } | ||
| 114 | |||
| 115 | ring = &adev->vcn.ring_enc[0]; | ||
| 116 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | ||
| 117 | r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, | ||
| 118 | rq, amdgpu_sched_jobs); | ||
| 119 | if (r != 0) { | ||
| 120 | DRM_ERROR("Failed setting up VCN enc run queue.\n"); | ||
| 121 | return r; | ||
| 122 | } | ||
| 123 | |||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) | ||
| 128 | { | ||
| 129 | int i; | ||
| 130 | |||
| 131 | kfree(adev->vcn.saved_bo); | ||
| 132 | |||
| 133 | amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); | ||
| 134 | |||
| 135 | amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); | ||
| 136 | |||
| 137 | amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, | ||
| 138 | &adev->vcn.gpu_addr, | ||
| 139 | (void **)&adev->vcn.cpu_addr); | ||
| 140 | |||
| 141 | amdgpu_ring_fini(&adev->vcn.ring_dec); | ||
| 142 | |||
| 143 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) | ||
| 144 | amdgpu_ring_fini(&adev->vcn.ring_enc[i]); | ||
| 145 | |||
| 146 | release_firmware(adev->vcn.fw); | ||
| 147 | |||
| 148 | return 0; | ||
| 149 | } | ||
| 150 | |||
| 151 | int amdgpu_vcn_suspend(struct amdgpu_device *adev) | ||
| 152 | { | ||
| 153 | unsigned size; | ||
| 154 | void *ptr; | ||
| 155 | |||
| 156 | if (adev->vcn.vcpu_bo == NULL) | ||
| 157 | return 0; | ||
| 158 | |||
| 159 | cancel_delayed_work_sync(&adev->vcn.idle_work); | ||
| 160 | |||
| 161 | size = amdgpu_bo_size(adev->vcn.vcpu_bo); | ||
| 162 | ptr = adev->vcn.cpu_addr; | ||
| 163 | |||
| 164 | adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); | ||
| 165 | if (!adev->vcn.saved_bo) | ||
| 166 | return -ENOMEM; | ||
| 167 | |||
| 168 | memcpy_fromio(adev->vcn.saved_bo, ptr, size); | ||
| 169 | |||
| 170 | return 0; | ||
| 171 | } | ||
| 172 | |||
| 173 | int amdgpu_vcn_resume(struct amdgpu_device *adev) | ||
| 174 | { | ||
| 175 | unsigned size; | ||
| 176 | void *ptr; | ||
| 177 | |||
| 178 | if (adev->vcn.vcpu_bo == NULL) | ||
| 179 | return -EINVAL; | ||
| 180 | |||
| 181 | size = amdgpu_bo_size(adev->vcn.vcpu_bo); | ||
| 182 | ptr = adev->vcn.cpu_addr; | ||
| 183 | |||
| 184 | if (adev->vcn.saved_bo != NULL) { | ||
| 185 | memcpy_toio(ptr, adev->vcn.saved_bo, size); | ||
| 186 | kfree(adev->vcn.saved_bo); | ||
| 187 | adev->vcn.saved_bo = NULL; | ||
| 188 | } else { | ||
| 189 | const struct common_firmware_header *hdr; | ||
| 190 | unsigned offset; | ||
| 191 | |||
| 192 | hdr = (const struct common_firmware_header *)adev->vcn.fw->data; | ||
| 193 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); | ||
| 194 | memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, | ||
| 195 | le32_to_cpu(hdr->ucode_size_bytes)); | ||
| 196 | size -= le32_to_cpu(hdr->ucode_size_bytes); | ||
| 197 | ptr += le32_to_cpu(hdr->ucode_size_bytes); | ||
| 198 | memset_io(ptr, 0, size); | ||
| 199 | } | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | static void amdgpu_vcn_idle_work_handler(struct work_struct *work) | ||
| 205 | { | ||
| 206 | struct amdgpu_device *adev = | ||
| 207 | container_of(work, struct amdgpu_device, vcn.idle_work.work); | ||
| 208 | unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); | ||
| 209 | |||
| 210 | if (fences == 0) { | ||
| 211 | if (adev->pm.dpm_enabled) { | ||
| 212 | amdgpu_dpm_enable_uvd(adev, false); | ||
| 213 | } else { | ||
| 214 | amdgpu_asic_set_uvd_clocks(adev, 0, 0); | ||
| 215 | } | ||
| 216 | } else { | ||
| 217 | schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) | ||
| 222 | { | ||
| 223 | struct amdgpu_device *adev = ring->adev; | ||
| 224 | bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); | ||
| 225 | |||
| 226 | if (set_clocks) { | ||
| 227 | if (adev->pm.dpm_enabled) { | ||
| 228 | amdgpu_dpm_enable_uvd(adev, true); | ||
| 229 | } else { | ||
| 230 | amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) | ||
| 236 | { | ||
| 237 | schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); | ||
| 238 | } | ||
| 239 | |||
| 240 | int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) | ||
| 241 | { | ||
| 242 | struct amdgpu_device *adev = ring->adev; | ||
| 243 | uint32_t tmp = 0; | ||
| 244 | unsigned i; | ||
| 245 | int r; | ||
| 246 | |||
| 247 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); | ||
| 248 | r = amdgpu_ring_alloc(ring, 3); | ||
| 249 | if (r) { | ||
| 250 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | ||
| 251 | ring->idx, r); | ||
| 252 | return r; | ||
| 253 | } | ||
| 254 | amdgpu_ring_write(ring, | ||
| 255 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); | ||
| 256 | amdgpu_ring_write(ring, 0xDEADBEEF); | ||
| 257 | amdgpu_ring_commit(ring); | ||
| 258 | for (i = 0; i < adev->usec_timeout; i++) { | ||
| 259 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); | ||
| 260 | if (tmp == 0xDEADBEEF) | ||
| 261 | break; | ||
| 262 | DRM_UDELAY(1); | ||
| 263 | } | ||
| 264 | |||
| 265 | if (i < adev->usec_timeout) { | ||
| 266 | DRM_INFO("ring test on %d succeeded in %d usecs\n", | ||
| 267 | ring->idx, i); | ||
| 268 | } else { | ||
| 269 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
| 270 | ring->idx, tmp); | ||
| 271 | r = -EINVAL; | ||
| 272 | } | ||
| 273 | return r; | ||
| 274 | } | ||
| 275 | |||
| 276 | static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, | ||
| 277 | bool direct, struct dma_fence **fence) | ||
| 278 | { | ||
| 279 | struct ttm_validate_buffer tv; | ||
| 280 | struct ww_acquire_ctx ticket; | ||
| 281 | struct list_head head; | ||
| 282 | struct amdgpu_job *job; | ||
| 283 | struct amdgpu_ib *ib; | ||
| 284 | struct dma_fence *f = NULL; | ||
| 285 | struct amdgpu_device *adev = ring->adev; | ||
| 286 | uint64_t addr; | ||
| 287 | int i, r; | ||
| 288 | |||
| 289 | memset(&tv, 0, sizeof(tv)); | ||
| 290 | tv.bo = &bo->tbo; | ||
| 291 | |||
| 292 | INIT_LIST_HEAD(&head); | ||
| 293 | list_add(&tv.head, &head); | ||
| 294 | |||
| 295 | r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); | ||
| 296 | if (r) | ||
| 297 | return r; | ||
| 298 | |||
| 299 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | ||
| 300 | if (r) | ||
| 301 | goto err; | ||
| 302 | |||
| 303 | r = amdgpu_job_alloc_with_ib(adev, 64, &job); | ||
| 304 | if (r) | ||
| 305 | goto err; | ||
| 306 | |||
| 307 | ib = &job->ibs[0]; | ||
| 308 | addr = amdgpu_bo_gpu_offset(bo); | ||
| 309 | ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); | ||
| 310 | ib->ptr[1] = addr; | ||
| 311 | ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); | ||
| 312 | ib->ptr[3] = addr >> 32; | ||
| 313 | ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); | ||
| 314 | ib->ptr[5] = 0; | ||
| 315 | for (i = 6; i < 16; i += 2) { | ||
| 316 | ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); | ||
| 317 | ib->ptr[i+1] = 0; | ||
| 318 | } | ||
| 319 | ib->length_dw = 16; | ||
| 320 | |||
| 321 | if (direct) { | ||
| 322 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); | ||
| 323 | job->fence = dma_fence_get(f); | ||
| 324 | if (r) | ||
| 325 | goto err_free; | ||
| 326 | |||
| 327 | amdgpu_job_free(job); | ||
| 328 | } else { | ||
| 329 | r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, | ||
| 330 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); | ||
| 331 | if (r) | ||
| 332 | goto err_free; | ||
| 333 | } | ||
| 334 | |||
| 335 | ttm_eu_fence_buffer_objects(&ticket, &head, f); | ||
| 336 | |||
| 337 | if (fence) | ||
| 338 | *fence = dma_fence_get(f); | ||
| 339 | amdgpu_bo_unref(&bo); | ||
| 340 | dma_fence_put(f); | ||
| 341 | |||
| 342 | return 0; | ||
| 343 | |||
| 344 | err_free: | ||
| 345 | amdgpu_job_free(job); | ||
| 346 | |||
| 347 | err: | ||
| 348 | ttm_eu_backoff_reservation(&ticket, &head); | ||
| 349 | return r; | ||
| 350 | } | ||
| 351 | |||
| 352 | static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | ||
| 353 | struct dma_fence **fence) | ||
| 354 | { | ||
| 355 | struct amdgpu_device *adev = ring->adev; | ||
| 356 | struct amdgpu_bo *bo; | ||
| 357 | uint32_t *msg; | ||
| 358 | int r, i; | ||
| 359 | |||
| 360 | r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, | ||
| 361 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 362 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 363 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
| 364 | NULL, NULL, &bo); | ||
| 365 | if (r) | ||
| 366 | return r; | ||
| 367 | |||
| 368 | r = amdgpu_bo_reserve(bo, false); | ||
| 369 | if (r) { | ||
| 370 | amdgpu_bo_unref(&bo); | ||
| 371 | return r; | ||
| 372 | } | ||
| 373 | |||
| 374 | r = amdgpu_bo_kmap(bo, (void **)&msg); | ||
| 375 | if (r) { | ||
| 376 | amdgpu_bo_unreserve(bo); | ||
| 377 | amdgpu_bo_unref(&bo); | ||
| 378 | return r; | ||
| 379 | } | ||
| 380 | |||
| 381 | msg[0] = cpu_to_le32(0x00000028); | ||
| 382 | msg[1] = cpu_to_le32(0x00000038); | ||
| 383 | msg[2] = cpu_to_le32(0x00000001); | ||
| 384 | msg[3] = cpu_to_le32(0x00000000); | ||
| 385 | msg[4] = cpu_to_le32(handle); | ||
| 386 | msg[5] = cpu_to_le32(0x00000000); | ||
| 387 | msg[6] = cpu_to_le32(0x00000001); | ||
| 388 | msg[7] = cpu_to_le32(0x00000028); | ||
| 389 | msg[8] = cpu_to_le32(0x00000010); | ||
| 390 | msg[9] = cpu_to_le32(0x00000000); | ||
| 391 | msg[10] = cpu_to_le32(0x00000007); | ||
| 392 | msg[11] = cpu_to_le32(0x00000000); | ||
| 393 | msg[12] = cpu_to_le32(0x00000780); | ||
| 394 | msg[13] = cpu_to_le32(0x00000440); | ||
| 395 | for (i = 14; i < 1024; ++i) | ||
| 396 | msg[i] = cpu_to_le32(0x0); | ||
| 397 | |||
| 398 | amdgpu_bo_kunmap(bo); | ||
| 399 | amdgpu_bo_unreserve(bo); | ||
| 400 | |||
| 401 | return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); | ||
| 402 | } | ||
| 403 | |||
| 404 | static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | ||
| 405 | bool direct, struct dma_fence **fence) | ||
| 406 | { | ||
| 407 | struct amdgpu_device *adev = ring->adev; | ||
| 408 | struct amdgpu_bo *bo; | ||
| 409 | uint32_t *msg; | ||
| 410 | int r, i; | ||
| 411 | |||
| 412 | r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, | ||
| 413 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 414 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 415 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
| 416 | NULL, NULL, &bo); | ||
| 417 | if (r) | ||
| 418 | return r; | ||
| 419 | |||
| 420 | r = amdgpu_bo_reserve(bo, false); | ||
| 421 | if (r) { | ||
| 422 | amdgpu_bo_unref(&bo); | ||
| 423 | return r; | ||
| 424 | } | ||
| 425 | |||
| 426 | r = amdgpu_bo_kmap(bo, (void **)&msg); | ||
| 427 | if (r) { | ||
| 428 | amdgpu_bo_unreserve(bo); | ||
| 429 | amdgpu_bo_unref(&bo); | ||
| 430 | return r; | ||
| 431 | } | ||
| 432 | |||
| 433 | msg[0] = cpu_to_le32(0x00000028); | ||
| 434 | msg[1] = cpu_to_le32(0x00000018); | ||
| 435 | msg[2] = cpu_to_le32(0x00000000); | ||
| 436 | msg[3] = cpu_to_le32(0x00000002); | ||
| 437 | msg[4] = cpu_to_le32(handle); | ||
| 438 | msg[5] = cpu_to_le32(0x00000000); | ||
| 439 | for (i = 6; i < 1024; ++i) | ||
| 440 | msg[i] = cpu_to_le32(0x0); | ||
| 441 | |||
| 442 | amdgpu_bo_kunmap(bo); | ||
| 443 | amdgpu_bo_unreserve(bo); | ||
| 444 | |||
| 445 | return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); | ||
| 446 | } | ||
| 447 | |||
| 448 | int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) | ||
| 449 | { | ||
| 450 | struct dma_fence *fence; | ||
| 451 | long r; | ||
| 452 | |||
| 453 | r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); | ||
| 454 | if (r) { | ||
| 455 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | ||
| 456 | goto error; | ||
| 457 | } | ||
| 458 | |||
| 459 | r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); | ||
| 460 | if (r) { | ||
| 461 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | ||
| 462 | goto error; | ||
| 463 | } | ||
| 464 | |||
| 465 | r = dma_fence_wait_timeout(fence, false, timeout); | ||
| 466 | if (r == 0) { | ||
| 467 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
| 468 | r = -ETIMEDOUT; | ||
| 469 | } else if (r < 0) { | ||
| 470 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
| 471 | } else { | ||
| 472 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); | ||
| 473 | r = 0; | ||
| 474 | } | ||
| 475 | |||
| 476 | dma_fence_put(fence); | ||
| 477 | |||
| 478 | error: | ||
| 479 | return r; | ||
| 480 | } | ||
| 481 | |||
| 482 | int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) | ||
| 483 | { | ||
| 484 | struct amdgpu_device *adev = ring->adev; | ||
| 485 | uint32_t rptr = amdgpu_ring_get_rptr(ring); | ||
| 486 | unsigned i; | ||
| 487 | int r; | ||
| 488 | |||
| 489 | r = amdgpu_ring_alloc(ring, 16); | ||
| 490 | if (r) { | ||
| 491 | DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", | ||
| 492 | ring->idx, r); | ||
| 493 | return r; | ||
| 494 | } | ||
| 495 | amdgpu_ring_write(ring, VCN_ENC_CMD_END); | ||
| 496 | amdgpu_ring_commit(ring); | ||
| 497 | |||
| 498 | for (i = 0; i < adev->usec_timeout; i++) { | ||
| 499 | if (amdgpu_ring_get_rptr(ring) != rptr) | ||
| 500 | break; | ||
| 501 | DRM_UDELAY(1); | ||
| 502 | } | ||
| 503 | |||
| 504 | if (i < adev->usec_timeout) { | ||
| 505 | DRM_INFO("ring test on %d succeeded in %d usecs\n", | ||
| 506 | ring->idx, i); | ||
| 507 | } else { | ||
| 508 | DRM_ERROR("amdgpu: ring %d test failed\n", | ||
| 509 | ring->idx); | ||
| 510 | r = -ETIMEDOUT; | ||
| 511 | } | ||
| 512 | |||
| 513 | return r; | ||
| 514 | } | ||
| 515 | |||
| 516 | static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | ||
| 517 | struct dma_fence **fence) | ||
| 518 | { | ||
| 519 | const unsigned ib_size_dw = 16; | ||
| 520 | struct amdgpu_job *job; | ||
| 521 | struct amdgpu_ib *ib; | ||
| 522 | struct dma_fence *f = NULL; | ||
| 523 | uint64_t dummy; | ||
| 524 | int i, r; | ||
| 525 | |||
| 526 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); | ||
| 527 | if (r) | ||
| 528 | return r; | ||
| 529 | |||
| 530 | ib = &job->ibs[0]; | ||
| 531 | dummy = ib->gpu_addr + 1024; | ||
| 532 | |||
| 533 | ib->length_dw = 0; | ||
| 534 | ib->ptr[ib->length_dw++] = 0x00000018; | ||
| 535 | ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ | ||
| 536 | ib->ptr[ib->length_dw++] = handle; | ||
| 537 | ib->ptr[ib->length_dw++] = upper_32_bits(dummy); | ||
| 538 | ib->ptr[ib->length_dw++] = dummy; | ||
| 539 | ib->ptr[ib->length_dw++] = 0x0000000b; | ||
| 540 | |||
| 541 | ib->ptr[ib->length_dw++] = 0x00000014; | ||
| 542 | ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ | ||
| 543 | ib->ptr[ib->length_dw++] = 0x0000001c; | ||
| 544 | ib->ptr[ib->length_dw++] = 0x00000000; | ||
| 545 | ib->ptr[ib->length_dw++] = 0x00000000; | ||
| 546 | |||
| 547 | ib->ptr[ib->length_dw++] = 0x00000008; | ||
| 548 | ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ | ||
| 549 | |||
| 550 | for (i = ib->length_dw; i < ib_size_dw; ++i) | ||
| 551 | ib->ptr[i] = 0x0; | ||
| 552 | |||
| 553 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); | ||
| 554 | job->fence = dma_fence_get(f); | ||
| 555 | if (r) | ||
| 556 | goto err; | ||
| 557 | |||
| 558 | amdgpu_job_free(job); | ||
| 559 | if (fence) | ||
| 560 | *fence = dma_fence_get(f); | ||
| 561 | dma_fence_put(f); | ||
| 562 | |||
| 563 | return 0; | ||
| 564 | |||
| 565 | err: | ||
| 566 | amdgpu_job_free(job); | ||
| 567 | return r; | ||
| 568 | } | ||
| 569 | |||
| 570 | static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | ||
| 571 | struct dma_fence **fence) | ||
| 572 | { | ||
| 573 | const unsigned ib_size_dw = 16; | ||
| 574 | struct amdgpu_job *job; | ||
| 575 | struct amdgpu_ib *ib; | ||
| 576 | struct dma_fence *f = NULL; | ||
| 577 | uint64_t dummy; | ||
| 578 | int i, r; | ||
| 579 | |||
| 580 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); | ||
| 581 | if (r) | ||
| 582 | return r; | ||
| 583 | |||
| 584 | ib = &job->ibs[0]; | ||
| 585 | dummy = ib->gpu_addr + 1024; | ||
| 586 | |||
| 587 | ib->length_dw = 0; | ||
| 588 | ib->ptr[ib->length_dw++] = 0x00000018; | ||
| 589 | ib->ptr[ib->length_dw++] = 0x00000001; | ||
| 590 | ib->ptr[ib->length_dw++] = handle; | ||
| 591 | ib->ptr[ib->length_dw++] = upper_32_bits(dummy); | ||
| 592 | ib->ptr[ib->length_dw++] = dummy; | ||
| 593 | ib->ptr[ib->length_dw++] = 0x0000000b; | ||
| 594 | |||
| 595 | ib->ptr[ib->length_dw++] = 0x00000014; | ||
| 596 | ib->ptr[ib->length_dw++] = 0x00000002; | ||
| 597 | ib->ptr[ib->length_dw++] = 0x0000001c; | ||
| 598 | ib->ptr[ib->length_dw++] = 0x00000000; | ||
| 599 | ib->ptr[ib->length_dw++] = 0x00000000; | ||
| 600 | |||
| 601 | ib->ptr[ib->length_dw++] = 0x00000008; | ||
| 602 | ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ | ||
| 603 | |||
| 604 | for (i = ib->length_dw; i < ib_size_dw; ++i) | ||
| 605 | ib->ptr[i] = 0x0; | ||
| 606 | |||
| 607 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); | ||
| 608 | job->fence = dma_fence_get(f); | ||
| 609 | if (r) | ||
| 610 | goto err; | ||
| 611 | |||
| 612 | amdgpu_job_free(job); | ||
| 613 | if (fence) | ||
| 614 | *fence = dma_fence_get(f); | ||
| 615 | dma_fence_put(f); | ||
| 616 | |||
| 617 | return 0; | ||
| 618 | |||
| 619 | err: | ||
| 620 | amdgpu_job_free(job); | ||
| 621 | return r; | ||
| 622 | } | ||
| 623 | |||
| 624 | int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) | ||
| 625 | { | ||
| 626 | struct dma_fence *fence = NULL; | ||
| 627 | long r; | ||
| 628 | |||
| 629 | r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); | ||
| 630 | if (r) { | ||
| 631 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | ||
| 632 | goto error; | ||
| 633 | } | ||
| 634 | |||
| 635 | r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); | ||
| 636 | if (r) { | ||
| 637 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | ||
| 638 | goto error; | ||
| 639 | } | ||
| 640 | |||
| 641 | r = dma_fence_wait_timeout(fence, false, timeout); | ||
| 642 | if (r == 0) { | ||
| 643 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
| 644 | r = -ETIMEDOUT; | ||
| 645 | } else if (r < 0) { | ||
| 646 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
| 647 | } else { | ||
| 648 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); | ||
| 649 | r = 0; | ||
| 650 | } | ||
| 651 | error: | ||
| 652 | dma_fence_put(fence); | ||
| 653 | return r; | ||
| 654 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h new file mode 100644 index 000000000000..d50ba0657854 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #ifndef __AMDGPU_VCN_H__ | ||
| 25 | #define __AMDGPU_VCN_H__ | ||
| 26 | |||
| 27 | #define AMDGPU_VCN_STACK_SIZE (200*1024) | ||
| 28 | #define AMDGPU_VCN_HEAP_SIZE (256*1024) | ||
| 29 | #define AMDGPU_VCN_SESSION_SIZE (50*1024) | ||
| 30 | #define AMDGPU_VCN_FIRMWARE_OFFSET 256 | ||
| 31 | #define AMDGPU_VCN_MAX_ENC_RINGS 3 | ||
| 32 | |||
| 33 | #define VCN_DEC_CMD_FENCE 0x00000000 | ||
| 34 | #define VCN_DEC_CMD_TRAP 0x00000001 | ||
| 35 | #define VCN_DEC_CMD_WRITE_REG 0x00000004 | ||
| 36 | #define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006 | ||
| 37 | #define VCN_DEC_CMD_PACKET_START 0x0000000a | ||
| 38 | #define VCN_DEC_CMD_PACKET_END 0x0000000b | ||
| 39 | |||
| 40 | #define VCN_ENC_CMD_NO_OP 0x00000000 | ||
| 41 | #define VCN_ENC_CMD_END 0x00000001 | ||
| 42 | #define VCN_ENC_CMD_IB 0x00000002 | ||
| 43 | #define VCN_ENC_CMD_FENCE 0x00000003 | ||
| 44 | #define VCN_ENC_CMD_TRAP 0x00000004 | ||
| 45 | #define VCN_ENC_CMD_REG_WRITE 0x0000000b | ||
| 46 | #define VCN_ENC_CMD_REG_WAIT 0x0000000c | ||
| 47 | |||
| 48 | struct amdgpu_vcn { | ||
| 49 | struct amdgpu_bo *vcpu_bo; | ||
| 50 | void *cpu_addr; | ||
| 51 | uint64_t gpu_addr; | ||
| 52 | unsigned fw_version; | ||
| 53 | void *saved_bo; | ||
| 54 | struct delayed_work idle_work; | ||
| 55 | const struct firmware *fw; /* VCN firmware */ | ||
| 56 | struct amdgpu_ring ring_dec; | ||
| 57 | struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; | ||
| 58 | struct amdgpu_irq_src irq; | ||
| 59 | struct amd_sched_entity entity_dec; | ||
| 60 | struct amd_sched_entity entity_enc; | ||
| 61 | unsigned num_enc_rings; | ||
| 62 | }; | ||
| 63 | |||
| 64 | int amdgpu_vcn_sw_init(struct amdgpu_device *adev); | ||
| 65 | int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); | ||
| 66 | int amdgpu_vcn_suspend(struct amdgpu_device *adev); | ||
| 67 | int amdgpu_vcn_resume(struct amdgpu_device *adev); | ||
| 68 | void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); | ||
| 69 | void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); | ||
| 70 | |||
| 71 | int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); | ||
| 72 | int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); | ||
| 73 | |||
| 74 | int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); | ||
| 75 | int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); | ||
| 76 | |||
| 77 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 6bf5cea294f2..8a081e162d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #define MAX_KIQ_REG_WAIT 100000 | ||
| 25 | 26 | ||
| 26 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev) | 27 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev) |
| 27 | { | 28 | { |
| @@ -105,8 +106,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) | |||
| 105 | /* enable virtual display */ | 106 | /* enable virtual display */ |
| 106 | adev->mode_info.num_crtc = 1; | 107 | adev->mode_info.num_crtc = 1; |
| 107 | adev->enable_virtual_display = true; | 108 | adev->enable_virtual_display = true; |
| 109 | adev->cg_flags = 0; | ||
| 110 | adev->pg_flags = 0; | ||
| 108 | 111 | ||
| 109 | mutex_init(&adev->virt.lock_kiq); | ||
| 110 | mutex_init(&adev->virt.lock_reset); | 112 | mutex_init(&adev->virt.lock_reset); |
| 111 | } | 113 | } |
| 112 | 114 | ||
| @@ -120,17 +122,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) | |||
| 120 | 122 | ||
| 121 | BUG_ON(!ring->funcs->emit_rreg); | 123 | BUG_ON(!ring->funcs->emit_rreg); |
| 122 | 124 | ||
| 123 | mutex_lock(&adev->virt.lock_kiq); | 125 | mutex_lock(&kiq->ring_mutex); |
| 124 | amdgpu_ring_alloc(ring, 32); | 126 | amdgpu_ring_alloc(ring, 32); |
| 125 | amdgpu_ring_emit_rreg(ring, reg); | 127 | amdgpu_ring_emit_rreg(ring, reg); |
| 126 | amdgpu_fence_emit(ring, &f); | 128 | amdgpu_fence_emit(ring, &f); |
| 127 | amdgpu_ring_commit(ring); | 129 | amdgpu_ring_commit(ring); |
| 128 | mutex_unlock(&adev->virt.lock_kiq); | 130 | mutex_unlock(&kiq->ring_mutex); |
| 129 | 131 | ||
| 130 | r = dma_fence_wait(f, false); | 132 | r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); |
| 131 | if (r) | ||
| 132 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | ||
| 133 | dma_fence_put(f); | 133 | dma_fence_put(f); |
| 134 | if (r < 1) { | ||
| 135 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | ||
| 136 | return ~0; | ||
| 137 | } | ||
| 134 | 138 | ||
| 135 | val = adev->wb.wb[adev->virt.reg_val_offs]; | 139 | val = adev->wb.wb[adev->virt.reg_val_offs]; |
| 136 | 140 | ||
| @@ -146,15 +150,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) | |||
| 146 | 150 | ||
| 147 | BUG_ON(!ring->funcs->emit_wreg); | 151 | BUG_ON(!ring->funcs->emit_wreg); |
| 148 | 152 | ||
| 149 | mutex_lock(&adev->virt.lock_kiq); | 153 | mutex_lock(&kiq->ring_mutex); |
| 150 | amdgpu_ring_alloc(ring, 32); | 154 | amdgpu_ring_alloc(ring, 32); |
| 151 | amdgpu_ring_emit_wreg(ring, reg, v); | 155 | amdgpu_ring_emit_wreg(ring, reg, v); |
| 152 | amdgpu_fence_emit(ring, &f); | 156 | amdgpu_fence_emit(ring, &f); |
| 153 | amdgpu_ring_commit(ring); | 157 | amdgpu_ring_commit(ring); |
| 154 | mutex_unlock(&adev->virt.lock_kiq); | 158 | mutex_unlock(&kiq->ring_mutex); |
| 155 | 159 | ||
| 156 | r = dma_fence_wait(f, false); | 160 | r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); |
| 157 | if (r) | 161 | if (r < 1) |
| 158 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | 162 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); |
| 159 | dma_fence_put(f); | 163 | dma_fence_put(f); |
| 160 | } | 164 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index a8ed162cc0bc..9e1062edb76e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | |||
| @@ -52,7 +52,6 @@ struct amdgpu_virt { | |||
| 52 | uint64_t csa_vmid0_addr; | 52 | uint64_t csa_vmid0_addr; |
| 53 | bool chained_ib_support; | 53 | bool chained_ib_support; |
| 54 | uint32_t reg_val_offs; | 54 | uint32_t reg_val_offs; |
| 55 | struct mutex lock_kiq; | ||
| 56 | struct mutex lock_reset; | 55 | struct mutex lock_reset; |
| 57 | struct amdgpu_irq_src ack_irq; | 56 | struct amdgpu_irq_src ack_irq; |
| 58 | struct amdgpu_irq_src rcv_irq; | 57 | struct amdgpu_irq_src rcv_irq; |
| @@ -97,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); | |||
| 97 | int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); | 96 | int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); |
| 98 | int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); | 97 | int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); |
| 99 | int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); | 98 | int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); |
| 100 | int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); | 99 | int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); |
| 101 | int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); | 100 | int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); |
| 102 | void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); | 101 | void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); |
| 103 | 102 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 07ff3b1514f1..5795f81369f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
| @@ -79,6 +79,12 @@ struct amdgpu_pte_update_params { | |||
| 79 | uint64_t flags); | 79 | uint64_t flags); |
| 80 | /* indicate update pt or its shadow */ | 80 | /* indicate update pt or its shadow */ |
| 81 | bool shadow; | 81 | bool shadow; |
| 82 | /* The next two are used during VM update by CPU | ||
| 83 | * DMA addresses to use for mapping | ||
| 84 | * Kernel pointer of PD/PT BO that needs to be updated | ||
| 85 | */ | ||
| 86 | dma_addr_t *pages_addr; | ||
| 87 | void *kptr; | ||
| 82 | }; | 88 | }; |
| 83 | 89 | ||
| 84 | /* Helper to disable partial resident texture feature from a fence callback */ | 90 | /* Helper to disable partial resident texture feature from a fence callback */ |
| @@ -275,12 +281,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
| 275 | adev->vm_manager.block_size; | 281 | adev->vm_manager.block_size; |
| 276 | unsigned pt_idx, from, to; | 282 | unsigned pt_idx, from, to; |
| 277 | int r; | 283 | int r; |
| 284 | u64 flags; | ||
| 278 | 285 | ||
| 279 | if (!parent->entries) { | 286 | if (!parent->entries) { |
| 280 | unsigned num_entries = amdgpu_vm_num_entries(adev, level); | 287 | unsigned num_entries = amdgpu_vm_num_entries(adev, level); |
| 281 | 288 | ||
| 282 | parent->entries = drm_calloc_large(num_entries, | 289 | parent->entries = kvmalloc_array(num_entries, |
| 283 | sizeof(struct amdgpu_vm_pt)); | 290 | sizeof(struct amdgpu_vm_pt), |
| 291 | GFP_KERNEL | __GFP_ZERO); | ||
| 284 | if (!parent->entries) | 292 | if (!parent->entries) |
| 285 | return -ENOMEM; | 293 | return -ENOMEM; |
| 286 | memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); | 294 | memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); |
| @@ -299,6 +307,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
| 299 | saddr = saddr & ((1 << shift) - 1); | 307 | saddr = saddr & ((1 << shift) - 1); |
| 300 | eaddr = eaddr & ((1 << shift) - 1); | 308 | eaddr = eaddr & ((1 << shift) - 1); |
| 301 | 309 | ||
| 310 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | ||
| 311 | AMDGPU_GEM_CREATE_VRAM_CLEARED; | ||
| 312 | if (vm->use_cpu_for_update) | ||
| 313 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | ||
| 314 | else | ||
| 315 | flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | ||
| 316 | AMDGPU_GEM_CREATE_SHADOW); | ||
| 317 | |||
| 302 | /* walk over the address space and allocate the page tables */ | 318 | /* walk over the address space and allocate the page tables */ |
| 303 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { | 319 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { |
| 304 | struct reservation_object *resv = vm->root.bo->tbo.resv; | 320 | struct reservation_object *resv = vm->root.bo->tbo.resv; |
| @@ -310,10 +326,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
| 310 | amdgpu_vm_bo_size(adev, level), | 326 | amdgpu_vm_bo_size(adev, level), |
| 311 | AMDGPU_GPU_PAGE_SIZE, true, | 327 | AMDGPU_GPU_PAGE_SIZE, true, |
| 312 | AMDGPU_GEM_DOMAIN_VRAM, | 328 | AMDGPU_GEM_DOMAIN_VRAM, |
| 313 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 329 | flags, |
| 314 | AMDGPU_GEM_CREATE_SHADOW | | ||
| 315 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | ||
| 316 | AMDGPU_GEM_CREATE_VRAM_CLEARED, | ||
| 317 | NULL, resv, &pt); | 330 | NULL, resv, &pt); |
| 318 | if (r) | 331 | if (r) |
| 319 | return r; | 332 | return r; |
| @@ -391,6 +404,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, | |||
| 391 | atomic_read(&adev->gpu_reset_counter); | 404 | atomic_read(&adev->gpu_reset_counter); |
| 392 | } | 405 | } |
| 393 | 406 | ||
| 407 | static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) | ||
| 408 | { | ||
| 409 | return !!vm->reserved_vmid[vmhub]; | ||
| 410 | } | ||
| 411 | |||
| 412 | /* idr_mgr->lock must be held */ | ||
| 413 | static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, | ||
| 414 | struct amdgpu_ring *ring, | ||
| 415 | struct amdgpu_sync *sync, | ||
| 416 | struct dma_fence *fence, | ||
| 417 | struct amdgpu_job *job) | ||
| 418 | { | ||
| 419 | struct amdgpu_device *adev = ring->adev; | ||
| 420 | unsigned vmhub = ring->funcs->vmhub; | ||
| 421 | uint64_t fence_context = adev->fence_context + ring->idx; | ||
| 422 | struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; | ||
| 423 | struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; | ||
| 424 | struct dma_fence *updates = sync->last_vm_update; | ||
| 425 | int r = 0; | ||
| 426 | struct dma_fence *flushed, *tmp; | ||
| 427 | bool needs_flush = false; | ||
| 428 | |||
| 429 | flushed = id->flushed_updates; | ||
| 430 | if ((amdgpu_vm_had_gpu_reset(adev, id)) || | ||
| 431 | (atomic64_read(&id->owner) != vm->client_id) || | ||
| 432 | (job->vm_pd_addr != id->pd_gpu_addr) || | ||
| 433 | (updates && (!flushed || updates->context != flushed->context || | ||
| 434 | dma_fence_is_later(updates, flushed))) || | ||
| 435 | (!id->last_flush || (id->last_flush->context != fence_context && | ||
| 436 | !dma_fence_is_signaled(id->last_flush)))) { | ||
| 437 | needs_flush = true; | ||
| 438 | /* to prevent one context starved by another context */ | ||
| 439 | id->pd_gpu_addr = 0; | ||
| 440 | tmp = amdgpu_sync_peek_fence(&id->active, ring); | ||
| 441 | if (tmp) { | ||
| 442 | r = amdgpu_sync_fence(adev, sync, tmp); | ||
| 443 | return r; | ||
| 444 | } | ||
| 445 | } | ||
| 446 | |||
| 447 | /* Good we can use this VMID. Remember this submission as | ||
| 448 | * user of the VMID. | ||
| 449 | */ | ||
| 450 | r = amdgpu_sync_fence(ring->adev, &id->active, fence); | ||
| 451 | if (r) | ||
| 452 | goto out; | ||
| 453 | |||
| 454 | if (updates && (!flushed || updates->context != flushed->context || | ||
| 455 | dma_fence_is_later(updates, flushed))) { | ||
| 456 | dma_fence_put(id->flushed_updates); | ||
| 457 | id->flushed_updates = dma_fence_get(updates); | ||
| 458 | } | ||
| 459 | id->pd_gpu_addr = job->vm_pd_addr; | ||
| 460 | atomic64_set(&id->owner, vm->client_id); | ||
| 461 | job->vm_needs_flush = needs_flush; | ||
| 462 | if (needs_flush) { | ||
| 463 | dma_fence_put(id->last_flush); | ||
| 464 | id->last_flush = NULL; | ||
| 465 | } | ||
| 466 | job->vm_id = id - id_mgr->ids; | ||
| 467 | trace_amdgpu_vm_grab_id(vm, ring, job); | ||
| 468 | out: | ||
| 469 | return r; | ||
| 470 | } | ||
| 471 | |||
| 394 | /** | 472 | /** |
| 395 | * amdgpu_vm_grab_id - allocate the next free VMID | 473 | * amdgpu_vm_grab_id - allocate the next free VMID |
| 396 | * | 474 | * |
| @@ -415,12 +493,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
| 415 | unsigned i; | 493 | unsigned i; |
| 416 | int r = 0; | 494 | int r = 0; |
| 417 | 495 | ||
| 496 | mutex_lock(&id_mgr->lock); | ||
| 497 | if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { | ||
| 498 | r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); | ||
| 499 | mutex_unlock(&id_mgr->lock); | ||
| 500 | return r; | ||
| 501 | } | ||
| 418 | fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); | 502 | fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); |
| 419 | if (!fences) | 503 | if (!fences) { |
| 504 | mutex_unlock(&id_mgr->lock); | ||
| 420 | return -ENOMEM; | 505 | return -ENOMEM; |
| 421 | 506 | } | |
| 422 | mutex_lock(&id_mgr->lock); | ||
| 423 | |||
| 424 | /* Check if we have an idle VMID */ | 507 | /* Check if we have an idle VMID */ |
| 425 | i = 0; | 508 | i = 0; |
| 426 | list_for_each_entry(idle, &id_mgr->ids_lru, list) { | 509 | list_for_each_entry(idle, &id_mgr->ids_lru, list) { |
| @@ -521,7 +604,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
| 521 | id->pd_gpu_addr = job->vm_pd_addr; | 604 | id->pd_gpu_addr = job->vm_pd_addr; |
| 522 | dma_fence_put(id->flushed_updates); | 605 | dma_fence_put(id->flushed_updates); |
| 523 | id->flushed_updates = dma_fence_get(updates); | 606 | id->flushed_updates = dma_fence_get(updates); |
| 524 | id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); | ||
| 525 | atomic64_set(&id->owner, vm->client_id); | 607 | atomic64_set(&id->owner, vm->client_id); |
| 526 | 608 | ||
| 527 | needs_flush: | 609 | needs_flush: |
| @@ -540,40 +622,118 @@ error: | |||
| 540 | return r; | 622 | return r; |
| 541 | } | 623 | } |
| 542 | 624 | ||
| 543 | static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) | 625 | static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, |
| 626 | struct amdgpu_vm *vm, | ||
| 627 | unsigned vmhub) | ||
| 628 | { | ||
| 629 | struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; | ||
| 630 | |||
| 631 | mutex_lock(&id_mgr->lock); | ||
| 632 | if (vm->reserved_vmid[vmhub]) { | ||
| 633 | list_add(&vm->reserved_vmid[vmhub]->list, | ||
| 634 | &id_mgr->ids_lru); | ||
| 635 | vm->reserved_vmid[vmhub] = NULL; | ||
| 636 | atomic_dec(&id_mgr->reserved_vmid_num); | ||
| 637 | } | ||
| 638 | mutex_unlock(&id_mgr->lock); | ||
| 639 | } | ||
| 640 | |||
| 641 | static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, | ||
| 642 | struct amdgpu_vm *vm, | ||
| 643 | unsigned vmhub) | ||
| 644 | { | ||
| 645 | struct amdgpu_vm_id_manager *id_mgr; | ||
| 646 | struct amdgpu_vm_id *idle; | ||
| 647 | int r = 0; | ||
| 648 | |||
| 649 | id_mgr = &adev->vm_manager.id_mgr[vmhub]; | ||
| 650 | mutex_lock(&id_mgr->lock); | ||
| 651 | if (vm->reserved_vmid[vmhub]) | ||
| 652 | goto unlock; | ||
| 653 | if (atomic_inc_return(&id_mgr->reserved_vmid_num) > | ||
| 654 | AMDGPU_VM_MAX_RESERVED_VMID) { | ||
| 655 | DRM_ERROR("Over limitation of reserved vmid\n"); | ||
| 656 | atomic_dec(&id_mgr->reserved_vmid_num); | ||
| 657 | r = -EINVAL; | ||
| 658 | goto unlock; | ||
| 659 | } | ||
| 660 | /* Select the first entry VMID */ | ||
| 661 | idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); | ||
| 662 | list_del_init(&idle->list); | ||
| 663 | vm->reserved_vmid[vmhub] = idle; | ||
| 664 | mutex_unlock(&id_mgr->lock); | ||
| 665 | |||
| 666 | return 0; | ||
| 667 | unlock: | ||
| 668 | mutex_unlock(&id_mgr->lock); | ||
| 669 | return r; | ||
| 670 | } | ||
| 671 | |||
| 672 | /** | ||
| 673 | * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug | ||
| 674 | * | ||
| 675 | * @adev: amdgpu_device pointer | ||
| 676 | */ | ||
| 677 | void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) | ||
| 544 | { | 678 | { |
| 545 | struct amdgpu_device *adev = ring->adev; | ||
| 546 | const struct amdgpu_ip_block *ip_block; | 679 | const struct amdgpu_ip_block *ip_block; |
| 680 | bool has_compute_vm_bug; | ||
| 681 | struct amdgpu_ring *ring; | ||
| 682 | int i; | ||
| 547 | 683 | ||
| 548 | if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) | 684 | has_compute_vm_bug = false; |
| 549 | /* only compute rings */ | ||
| 550 | return false; | ||
| 551 | 685 | ||
| 552 | ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); | 686 | ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); |
| 553 | if (!ip_block) | 687 | if (ip_block) { |
| 554 | return false; | 688 | /* Compute has a VM bug for GFX version < 7. |
| 689 | Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ | ||
| 690 | if (ip_block->version->major <= 7) | ||
| 691 | has_compute_vm_bug = true; | ||
| 692 | else if (ip_block->version->major == 8) | ||
| 693 | if (adev->gfx.mec_fw_version < 673) | ||
| 694 | has_compute_vm_bug = true; | ||
| 695 | } | ||
| 555 | 696 | ||
| 556 | if (ip_block->version->major <= 7) { | 697 | for (i = 0; i < adev->num_rings; i++) { |
| 557 | /* gfx7 has no workaround */ | 698 | ring = adev->rings[i]; |
| 558 | return true; | 699 | if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) |
| 559 | } else if (ip_block->version->major == 8) { | 700 | /* only compute rings */ |
| 560 | if (adev->gfx.mec_fw_version >= 673) | 701 | ring->has_compute_vm_bug = has_compute_vm_bug; |
| 561 | /* gfx8 is fixed in MEC firmware 673 */ | ||
| 562 | return false; | ||
| 563 | else | 702 | else |
| 564 | return true; | 703 | ring->has_compute_vm_bug = false; |
| 565 | } | 704 | } |
| 566 | return false; | ||
| 567 | } | 705 | } |
| 568 | 706 | ||
| 569 | static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) | 707 | bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, |
| 708 | struct amdgpu_job *job) | ||
| 570 | { | 709 | { |
| 571 | u64 addr = mc_addr; | 710 | struct amdgpu_device *adev = ring->adev; |
| 711 | unsigned vmhub = ring->funcs->vmhub; | ||
| 712 | struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; | ||
| 713 | struct amdgpu_vm_id *id; | ||
| 714 | bool gds_switch_needed; | ||
| 715 | bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; | ||
| 716 | |||
| 717 | if (job->vm_id == 0) | ||
| 718 | return false; | ||
| 719 | id = &id_mgr->ids[job->vm_id]; | ||
| 720 | gds_switch_needed = ring->funcs->emit_gds_switch && ( | ||
| 721 | id->gds_base != job->gds_base || | ||
| 722 | id->gds_size != job->gds_size || | ||
| 723 | id->gws_base != job->gws_base || | ||
| 724 | id->gws_size != job->gws_size || | ||
| 725 | id->oa_base != job->oa_base || | ||
| 726 | id->oa_size != job->oa_size); | ||
| 572 | 727 | ||
| 573 | if (adev->gart.gart_funcs->adjust_mc_addr) | 728 | if (amdgpu_vm_had_gpu_reset(adev, id)) |
| 574 | addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); | 729 | return true; |
| 575 | 730 | ||
| 576 | return addr; | 731 | return vm_flush_needed || gds_switch_needed; |
| 732 | } | ||
| 733 | |||
| 734 | static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) | ||
| 735 | { | ||
| 736 | return (adev->mc.real_vram_size == adev->mc.visible_vram_size); | ||
| 577 | } | 737 | } |
| 578 | 738 | ||
| 579 | /** | 739 | /** |
| @@ -598,8 +758,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) | |||
| 598 | id->gws_size != job->gws_size || | 758 | id->gws_size != job->gws_size || |
| 599 | id->oa_base != job->oa_base || | 759 | id->oa_base != job->oa_base || |
| 600 | id->oa_size != job->oa_size); | 760 | id->oa_size != job->oa_size); |
| 601 | bool vm_flush_needed = job->vm_needs_flush || | 761 | bool vm_flush_needed = job->vm_needs_flush; |
| 602 | amdgpu_vm_ring_has_compute_vm_bug(ring); | ||
| 603 | unsigned patch_offset = 0; | 762 | unsigned patch_offset = 0; |
| 604 | int r; | 763 | int r; |
| 605 | 764 | ||
| @@ -614,15 +773,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) | |||
| 614 | if (ring->funcs->init_cond_exec) | 773 | if (ring->funcs->init_cond_exec) |
| 615 | patch_offset = amdgpu_ring_init_cond_exec(ring); | 774 | patch_offset = amdgpu_ring_init_cond_exec(ring); |
| 616 | 775 | ||
| 617 | if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) | ||
| 618 | amdgpu_ring_emit_pipeline_sync(ring); | ||
| 619 | |||
| 620 | if (ring->funcs->emit_vm_flush && vm_flush_needed) { | 776 | if (ring->funcs->emit_vm_flush && vm_flush_needed) { |
| 621 | u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); | ||
| 622 | struct dma_fence *fence; | 777 | struct dma_fence *fence; |
| 623 | 778 | ||
| 624 | trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); | 779 | trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); |
| 625 | amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); | 780 | amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); |
| 626 | 781 | ||
| 627 | r = amdgpu_fence_emit(ring, &fence); | 782 | r = amdgpu_fence_emit(ring, &fence); |
| 628 | if (r) | 783 | if (r) |
| @@ -631,10 +786,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) | |||
| 631 | mutex_lock(&id_mgr->lock); | 786 | mutex_lock(&id_mgr->lock); |
| 632 | dma_fence_put(id->last_flush); | 787 | dma_fence_put(id->last_flush); |
| 633 | id->last_flush = fence; | 788 | id->last_flush = fence; |
| 789 | id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); | ||
| 634 | mutex_unlock(&id_mgr->lock); | 790 | mutex_unlock(&id_mgr->lock); |
| 635 | } | 791 | } |
| 636 | 792 | ||
| 637 | if (gds_switch_needed) { | 793 | if (ring->funcs->emit_gds_switch && gds_switch_needed) { |
| 638 | id->gds_base = job->gds_base; | 794 | id->gds_base = job->gds_base; |
| 639 | id->gds_size = job->gds_size; | 795 | id->gds_size = job->gds_size; |
| 640 | id->gws_base = job->gws_base; | 796 | id->gws_base = job->gws_base; |
| @@ -672,6 +828,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, | |||
| 672 | struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; | 828 | struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; |
| 673 | struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; | 829 | struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; |
| 674 | 830 | ||
| 831 | atomic64_set(&id->owner, 0); | ||
| 675 | id->gds_base = 0; | 832 | id->gds_base = 0; |
| 676 | id->gds_size = 0; | 833 | id->gds_size = 0; |
| 677 | id->gws_base = 0; | 834 | id->gws_base = 0; |
| @@ -681,6 +838,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, | |||
| 681 | } | 838 | } |
| 682 | 839 | ||
| 683 | /** | 840 | /** |
| 841 | * amdgpu_vm_reset_all_id - reset VMID to zero | ||
| 842 | * | ||
| 843 | * @adev: amdgpu device structure | ||
| 844 | * | ||
| 845 | * Reset VMID to force flush on next use | ||
| 846 | */ | ||
| 847 | void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) | ||
| 848 | { | ||
| 849 | unsigned i, j; | ||
| 850 | |||
| 851 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { | ||
| 852 | struct amdgpu_vm_id_manager *id_mgr = | ||
| 853 | &adev->vm_manager.id_mgr[i]; | ||
| 854 | |||
| 855 | for (j = 1; j < id_mgr->num_ids; ++j) | ||
| 856 | amdgpu_vm_reset_id(adev, i, j); | ||
| 857 | } | ||
| 858 | } | ||
| 859 | |||
| 860 | /** | ||
| 684 | * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo | 861 | * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo |
| 685 | * | 862 | * |
| 686 | * @vm: requested vm | 863 | * @vm: requested vm |
| @@ -784,6 +961,53 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) | |||
| 784 | return result; | 961 | return result; |
| 785 | } | 962 | } |
| 786 | 963 | ||
| 964 | /** | ||
| 965 | * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU | ||
| 966 | * | ||
| 967 | * @params: see amdgpu_pte_update_params definition | ||
| 968 | * @pe: kmap addr of the page entry | ||
| 969 | * @addr: dst addr to write into pe | ||
| 970 | * @count: number of page entries to update | ||
| 971 | * @incr: increase next addr by incr bytes | ||
| 972 | * @flags: hw access flags | ||
| 973 | * | ||
| 974 | * Write count number of PT/PD entries directly. | ||
| 975 | */ | ||
| 976 | static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, | ||
| 977 | uint64_t pe, uint64_t addr, | ||
| 978 | unsigned count, uint32_t incr, | ||
| 979 | uint64_t flags) | ||
| 980 | { | ||
| 981 | unsigned int i; | ||
| 982 | uint64_t value; | ||
| 983 | |||
| 984 | for (i = 0; i < count; i++) { | ||
| 985 | value = params->pages_addr ? | ||
| 986 | amdgpu_vm_map_gart(params->pages_addr, addr) : | ||
| 987 | addr; | ||
| 988 | amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, | ||
| 989 | i, value, flags); | ||
| 990 | addr += incr; | ||
| 991 | } | ||
| 992 | |||
| 993 | /* Flush HDP */ | ||
| 994 | mb(); | ||
| 995 | amdgpu_gart_flush_gpu_tlb(params->adev, 0); | ||
| 996 | } | ||
| 997 | |||
| 998 | static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo) | ||
| 999 | { | ||
| 1000 | struct amdgpu_sync sync; | ||
| 1001 | int r; | ||
| 1002 | |||
| 1003 | amdgpu_sync_create(&sync); | ||
| 1004 | amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); | ||
| 1005 | r = amdgpu_sync_wait(&sync, true); | ||
| 1006 | amdgpu_sync_free(&sync); | ||
| 1007 | |||
| 1008 | return r; | ||
| 1009 | } | ||
| 1010 | |||
| 787 | /* | 1011 | /* |
| 788 | * amdgpu_vm_update_level - update a single level in the hierarchy | 1012 | * amdgpu_vm_update_level - update a single level in the hierarchy |
| 789 | * | 1013 | * |
| @@ -800,11 +1024,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 800 | unsigned level) | 1024 | unsigned level) |
| 801 | { | 1025 | { |
| 802 | struct amdgpu_bo *shadow; | 1026 | struct amdgpu_bo *shadow; |
| 803 | struct amdgpu_ring *ring; | 1027 | struct amdgpu_ring *ring = NULL; |
| 804 | uint64_t pd_addr, shadow_addr; | 1028 | uint64_t pd_addr, shadow_addr = 0; |
| 805 | uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); | 1029 | uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); |
| 806 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; | 1030 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; |
| 807 | unsigned count = 0, pt_idx, ndw; | 1031 | unsigned count = 0, pt_idx, ndw = 0; |
| 808 | struct amdgpu_job *job; | 1032 | struct amdgpu_job *job; |
| 809 | struct amdgpu_pte_update_params params; | 1033 | struct amdgpu_pte_update_params params; |
| 810 | struct dma_fence *fence = NULL; | 1034 | struct dma_fence *fence = NULL; |
| @@ -813,34 +1037,54 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 813 | 1037 | ||
| 814 | if (!parent->entries) | 1038 | if (!parent->entries) |
| 815 | return 0; | 1039 | return 0; |
| 816 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); | ||
| 817 | 1040 | ||
| 818 | /* padding, etc. */ | 1041 | memset(¶ms, 0, sizeof(params)); |
| 819 | ndw = 64; | 1042 | params.adev = adev; |
| 1043 | shadow = parent->bo->shadow; | ||
| 1044 | |||
| 1045 | WARN_ON(vm->use_cpu_for_update && shadow); | ||
| 1046 | if (vm->use_cpu_for_update && !shadow) { | ||
| 1047 | r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); | ||
| 1048 | if (r) | ||
| 1049 | return r; | ||
| 1050 | r = amdgpu_vm_bo_wait(adev, parent->bo); | ||
| 1051 | if (unlikely(r)) { | ||
| 1052 | amdgpu_bo_kunmap(parent->bo); | ||
| 1053 | return r; | ||
| 1054 | } | ||
| 1055 | params.func = amdgpu_vm_cpu_set_ptes; | ||
| 1056 | } else { | ||
| 1057 | if (shadow) { | ||
| 1058 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | ||
| 1059 | if (r) | ||
| 1060 | return r; | ||
| 1061 | } | ||
| 1062 | ring = container_of(vm->entity.sched, struct amdgpu_ring, | ||
| 1063 | sched); | ||
| 820 | 1064 | ||
| 821 | /* assume the worst case */ | 1065 | /* padding, etc. */ |
| 822 | ndw += parent->last_entry_used * 6; | 1066 | ndw = 64; |
| 823 | 1067 | ||
| 824 | pd_addr = amdgpu_bo_gpu_offset(parent->bo); | 1068 | /* assume the worst case */ |
| 1069 | ndw += parent->last_entry_used * 6; | ||
| 825 | 1070 | ||
| 826 | shadow = parent->bo->shadow; | 1071 | pd_addr = amdgpu_bo_gpu_offset(parent->bo); |
| 827 | if (shadow) { | 1072 | |
| 828 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | 1073 | if (shadow) { |
| 1074 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | ||
| 1075 | ndw *= 2; | ||
| 1076 | } else { | ||
| 1077 | shadow_addr = 0; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); | ||
| 829 | if (r) | 1081 | if (r) |
| 830 | return r; | 1082 | return r; |
| 831 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | ||
| 832 | ndw *= 2; | ||
| 833 | } else { | ||
| 834 | shadow_addr = 0; | ||
| 835 | } | ||
| 836 | 1083 | ||
| 837 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); | 1084 | params.ib = &job->ibs[0]; |
| 838 | if (r) | 1085 | params.func = amdgpu_vm_do_set_ptes; |
| 839 | return r; | 1086 | } |
| 840 | 1087 | ||
| 841 | memset(¶ms, 0, sizeof(params)); | ||
| 842 | params.adev = adev; | ||
| 843 | params.ib = &job->ibs[0]; | ||
| 844 | 1088 | ||
| 845 | /* walk over the address space and update the directory */ | 1089 | /* walk over the address space and update the directory */ |
| 846 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1090 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
| @@ -860,6 +1104,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 860 | } | 1104 | } |
| 861 | 1105 | ||
| 862 | pt = amdgpu_bo_gpu_offset(bo); | 1106 | pt = amdgpu_bo_gpu_offset(bo); |
| 1107 | pt = amdgpu_gart_get_vm_pde(adev, pt); | ||
| 863 | if (parent->entries[pt_idx].addr == pt) | 1108 | if (parent->entries[pt_idx].addr == pt) |
| 864 | continue; | 1109 | continue; |
| 865 | 1110 | ||
| @@ -871,19 +1116,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 871 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { | 1116 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { |
| 872 | 1117 | ||
| 873 | if (count) { | 1118 | if (count) { |
| 874 | uint64_t pt_addr = | ||
| 875 | amdgpu_vm_adjust_mc_addr(adev, last_pt); | ||
| 876 | |||
| 877 | if (shadow) | 1119 | if (shadow) |
| 878 | amdgpu_vm_do_set_ptes(¶ms, | 1120 | params.func(¶ms, |
| 879 | last_shadow, | 1121 | last_shadow, |
| 880 | pt_addr, count, | 1122 | last_pt, count, |
| 881 | incr, | 1123 | incr, |
| 882 | AMDGPU_PTE_VALID); | 1124 | AMDGPU_PTE_VALID); |
| 883 | 1125 | ||
| 884 | amdgpu_vm_do_set_ptes(¶ms, last_pde, | 1126 | params.func(¶ms, last_pde, |
| 885 | pt_addr, count, incr, | 1127 | last_pt, count, incr, |
| 886 | AMDGPU_PTE_VALID); | 1128 | AMDGPU_PTE_VALID); |
| 887 | } | 1129 | } |
| 888 | 1130 | ||
| 889 | count = 1; | 1131 | count = 1; |
| @@ -896,17 +1138,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 896 | } | 1138 | } |
| 897 | 1139 | ||
| 898 | if (count) { | 1140 | if (count) { |
| 899 | uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); | ||
| 900 | |||
| 901 | if (vm->root.bo->shadow) | 1141 | if (vm->root.bo->shadow) |
| 902 | amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, | 1142 | params.func(¶ms, last_shadow, last_pt, |
| 903 | count, incr, AMDGPU_PTE_VALID); | 1143 | count, incr, AMDGPU_PTE_VALID); |
| 904 | 1144 | ||
| 905 | amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, | 1145 | params.func(¶ms, last_pde, last_pt, |
| 906 | count, incr, AMDGPU_PTE_VALID); | 1146 | count, incr, AMDGPU_PTE_VALID); |
| 907 | } | 1147 | } |
| 908 | 1148 | ||
| 909 | if (params.ib->length_dw == 0) { | 1149 | if (params.func == amdgpu_vm_cpu_set_ptes) |
| 1150 | amdgpu_bo_kunmap(parent->bo); | ||
| 1151 | else if (params.ib->length_dw == 0) { | ||
| 910 | amdgpu_job_free(job); | 1152 | amdgpu_job_free(job); |
| 911 | } else { | 1153 | } else { |
| 912 | amdgpu_ring_pad_ib(ring, params.ib); | 1154 | amdgpu_ring_pad_ib(ring, params.ib); |
| @@ -950,6 +1192,32 @@ error_free: | |||
| 950 | } | 1192 | } |
| 951 | 1193 | ||
| 952 | /* | 1194 | /* |
| 1195 | * amdgpu_vm_invalidate_level - mark all PD levels as invalid | ||
| 1196 | * | ||
| 1197 | * @parent: parent PD | ||
| 1198 | * | ||
| 1199 | * Mark all PD level as invalid after an error. | ||
| 1200 | */ | ||
| 1201 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | ||
| 1202 | { | ||
| 1203 | unsigned pt_idx; | ||
| 1204 | |||
| 1205 | /* | ||
| 1206 | * Recurse into the subdirectories. This recursion is harmless because | ||
| 1207 | * we only have a maximum of 5 layers. | ||
| 1208 | */ | ||
| 1209 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | ||
| 1210 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | ||
| 1211 | |||
| 1212 | if (!entry->bo) | ||
| 1213 | continue; | ||
| 1214 | |||
| 1215 | entry->addr = ~0ULL; | ||
| 1216 | amdgpu_vm_invalidate_level(entry); | ||
| 1217 | } | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | /* | ||
| 953 | * amdgpu_vm_update_directories - make sure that all directories are valid | 1221 | * amdgpu_vm_update_directories - make sure that all directories are valid |
| 954 | * | 1222 | * |
| 955 | * @adev: amdgpu_device pointer | 1223 | * @adev: amdgpu_device pointer |
| @@ -961,7 +1229,13 @@ error_free: | |||
| 961 | int amdgpu_vm_update_directories(struct amdgpu_device *adev, | 1229 | int amdgpu_vm_update_directories(struct amdgpu_device *adev, |
| 962 | struct amdgpu_vm *vm) | 1230 | struct amdgpu_vm *vm) |
| 963 | { | 1231 | { |
| 964 | return amdgpu_vm_update_level(adev, vm, &vm->root, 0); | 1232 | int r; |
| 1233 | |||
| 1234 | r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); | ||
| 1235 | if (r) | ||
| 1236 | amdgpu_vm_invalidate_level(&vm->root); | ||
| 1237 | |||
| 1238 | return r; | ||
| 965 | } | 1239 | } |
| 966 | 1240 | ||
| 967 | /** | 1241 | /** |
| @@ -1001,58 +1275,37 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, | |||
| 1001 | * @flags: mapping flags | 1275 | * @flags: mapping flags |
| 1002 | * | 1276 | * |
| 1003 | * Update the page tables in the range @start - @end. | 1277 | * Update the page tables in the range @start - @end. |
| 1278 | * Returns 0 for success, -EINVAL for failure. | ||
| 1004 | */ | 1279 | */ |
| 1005 | static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | 1280 | static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, |
| 1006 | uint64_t start, uint64_t end, | 1281 | uint64_t start, uint64_t end, |
| 1007 | uint64_t dst, uint64_t flags) | 1282 | uint64_t dst, uint64_t flags) |
| 1008 | { | 1283 | { |
| 1009 | struct amdgpu_device *adev = params->adev; | 1284 | struct amdgpu_device *adev = params->adev; |
| 1010 | const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; | 1285 | const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; |
| 1011 | 1286 | ||
| 1012 | uint64_t cur_pe_start, cur_nptes, cur_dst; | 1287 | uint64_t addr, pe_start; |
| 1013 | uint64_t addr; /* next GPU address to be updated */ | ||
| 1014 | struct amdgpu_bo *pt; | 1288 | struct amdgpu_bo *pt; |
| 1015 | unsigned nptes; /* next number of ptes to be updated */ | 1289 | unsigned nptes; |
| 1016 | uint64_t next_pe_start; | 1290 | int r; |
| 1017 | 1291 | bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); | |
| 1018 | /* initialize the variables */ | ||
| 1019 | addr = start; | ||
| 1020 | pt = amdgpu_vm_get_pt(params, addr); | ||
| 1021 | if (!pt) { | ||
| 1022 | pr_err("PT not found, aborting update_ptes\n"); | ||
| 1023 | return; | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | if (params->shadow) { | ||
| 1027 | if (!pt->shadow) | ||
| 1028 | return; | ||
| 1029 | pt = pt->shadow; | ||
| 1030 | } | ||
| 1031 | if ((addr & ~mask) == (end & ~mask)) | ||
| 1032 | nptes = end - addr; | ||
| 1033 | else | ||
| 1034 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); | ||
| 1035 | |||
| 1036 | cur_pe_start = amdgpu_bo_gpu_offset(pt); | ||
| 1037 | cur_pe_start += (addr & mask) * 8; | ||
| 1038 | cur_nptes = nptes; | ||
| 1039 | cur_dst = dst; | ||
| 1040 | 1292 | ||
| 1041 | /* for next ptb*/ | ||
| 1042 | addr += nptes; | ||
| 1043 | dst += nptes * AMDGPU_GPU_PAGE_SIZE; | ||
| 1044 | 1293 | ||
| 1045 | /* walk over the address space and update the page tables */ | 1294 | /* walk over the address space and update the page tables */ |
| 1046 | while (addr < end) { | 1295 | for (addr = start; addr < end; addr += nptes) { |
| 1047 | pt = amdgpu_vm_get_pt(params, addr); | 1296 | pt = amdgpu_vm_get_pt(params, addr); |
| 1048 | if (!pt) { | 1297 | if (!pt) { |
| 1049 | pr_err("PT not found, aborting update_ptes\n"); | 1298 | pr_err("PT not found, aborting update_ptes\n"); |
| 1050 | return; | 1299 | return -EINVAL; |
| 1051 | } | 1300 | } |
| 1052 | 1301 | ||
| 1053 | if (params->shadow) { | 1302 | if (params->shadow) { |
| 1303 | if (WARN_ONCE(use_cpu_update, | ||
| 1304 | "CPU VM update doesn't suuport shadow pages")) | ||
| 1305 | return 0; | ||
| 1306 | |||
| 1054 | if (!pt->shadow) | 1307 | if (!pt->shadow) |
| 1055 | return; | 1308 | return 0; |
| 1056 | pt = pt->shadow; | 1309 | pt = pt->shadow; |
| 1057 | } | 1310 | } |
| 1058 | 1311 | ||
| @@ -1061,32 +1314,25 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
| 1061 | else | 1314 | else |
| 1062 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); | 1315 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); |
| 1063 | 1316 | ||
| 1064 | next_pe_start = amdgpu_bo_gpu_offset(pt); | 1317 | if (use_cpu_update) { |
| 1065 | next_pe_start += (addr & mask) * 8; | 1318 | r = amdgpu_bo_kmap(pt, (void *)&pe_start); |
| 1319 | if (r) | ||
| 1320 | return r; | ||
| 1321 | } else | ||
| 1322 | pe_start = amdgpu_bo_gpu_offset(pt); | ||
| 1066 | 1323 | ||
| 1067 | if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && | 1324 | pe_start += (addr & mask) * 8; |
| 1068 | ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { | ||
| 1069 | /* The next ptb is consecutive to current ptb. | ||
| 1070 | * Don't call the update function now. | ||
| 1071 | * Will update two ptbs together in future. | ||
| 1072 | */ | ||
| 1073 | cur_nptes += nptes; | ||
| 1074 | } else { | ||
| 1075 | params->func(params, cur_pe_start, cur_dst, cur_nptes, | ||
| 1076 | AMDGPU_GPU_PAGE_SIZE, flags); | ||
| 1077 | 1325 | ||
| 1078 | cur_pe_start = next_pe_start; | 1326 | params->func(params, pe_start, dst, nptes, |
| 1079 | cur_nptes = nptes; | 1327 | AMDGPU_GPU_PAGE_SIZE, flags); |
| 1080 | cur_dst = dst; | ||
| 1081 | } | ||
| 1082 | 1328 | ||
| 1083 | /* for next ptb*/ | ||
| 1084 | addr += nptes; | ||
| 1085 | dst += nptes * AMDGPU_GPU_PAGE_SIZE; | 1329 | dst += nptes * AMDGPU_GPU_PAGE_SIZE; |
| 1330 | |||
| 1331 | if (use_cpu_update) | ||
| 1332 | amdgpu_bo_kunmap(pt); | ||
| 1086 | } | 1333 | } |
| 1087 | 1334 | ||
| 1088 | params->func(params, cur_pe_start, cur_dst, cur_nptes, | 1335 | return 0; |
| 1089 | AMDGPU_GPU_PAGE_SIZE, flags); | ||
| 1090 | } | 1336 | } |
| 1091 | 1337 | ||
| 1092 | /* | 1338 | /* |
| @@ -1098,11 +1344,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
| 1098 | * @end: last PTE to handle | 1344 | * @end: last PTE to handle |
| 1099 | * @dst: addr those PTEs should point to | 1345 | * @dst: addr those PTEs should point to |
| 1100 | * @flags: hw mapping flags | 1346 | * @flags: hw mapping flags |
| 1347 | * Returns 0 for success, -EINVAL for failure. | ||
| 1101 | */ | 1348 | */ |
| 1102 | static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | 1349 | static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, |
| 1103 | uint64_t start, uint64_t end, | 1350 | uint64_t start, uint64_t end, |
| 1104 | uint64_t dst, uint64_t flags) | 1351 | uint64_t dst, uint64_t flags) |
| 1105 | { | 1352 | { |
| 1353 | int r; | ||
| 1354 | |||
| 1106 | /** | 1355 | /** |
| 1107 | * The MC L1 TLB supports variable sized pages, based on a fragment | 1356 | * The MC L1 TLB supports variable sized pages, based on a fragment |
| 1108 | * field in the PTE. When this field is set to a non-zero value, page | 1357 | * field in the PTE. When this field is set to a non-zero value, page |
| @@ -1131,28 +1380,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
| 1131 | 1380 | ||
| 1132 | /* system pages are non continuously */ | 1381 | /* system pages are non continuously */ |
| 1133 | if (params->src || !(flags & AMDGPU_PTE_VALID) || | 1382 | if (params->src || !(flags & AMDGPU_PTE_VALID) || |
| 1134 | (frag_start >= frag_end)) { | 1383 | (frag_start >= frag_end)) |
| 1135 | 1384 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); | |
| 1136 | amdgpu_vm_update_ptes(params, start, end, dst, flags); | ||
| 1137 | return; | ||
| 1138 | } | ||
| 1139 | 1385 | ||
| 1140 | /* handle the 4K area at the beginning */ | 1386 | /* handle the 4K area at the beginning */ |
| 1141 | if (start != frag_start) { | 1387 | if (start != frag_start) { |
| 1142 | amdgpu_vm_update_ptes(params, start, frag_start, | 1388 | r = amdgpu_vm_update_ptes(params, start, frag_start, |
| 1143 | dst, flags); | 1389 | dst, flags); |
| 1390 | if (r) | ||
| 1391 | return r; | ||
| 1144 | dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; | 1392 | dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; |
| 1145 | } | 1393 | } |
| 1146 | 1394 | ||
| 1147 | /* handle the area in the middle */ | 1395 | /* handle the area in the middle */ |
| 1148 | amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, | 1396 | r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, |
| 1149 | flags | frag_flags); | 1397 | flags | frag_flags); |
| 1398 | if (r) | ||
| 1399 | return r; | ||
| 1150 | 1400 | ||
| 1151 | /* handle the 4K area at the end */ | 1401 | /* handle the 4K area at the end */ |
| 1152 | if (frag_end != end) { | 1402 | if (frag_end != end) { |
| 1153 | dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; | 1403 | dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; |
| 1154 | amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); | 1404 | r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); |
| 1155 | } | 1405 | } |
| 1406 | return r; | ||
| 1156 | } | 1407 | } |
| 1157 | 1408 | ||
| 1158 | /** | 1409 | /** |
| @@ -1194,6 +1445,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
| 1194 | params.vm = vm; | 1445 | params.vm = vm; |
| 1195 | params.src = src; | 1446 | params.src = src; |
| 1196 | 1447 | ||
| 1448 | if (vm->use_cpu_for_update) { | ||
| 1449 | /* params.src is used as flag to indicate system Memory */ | ||
| 1450 | if (pages_addr) | ||
| 1451 | params.src = ~0; | ||
| 1452 | |||
| 1453 | /* Wait for PT BOs to be free. PTs share the same resv. object | ||
| 1454 | * as the root PD BO | ||
| 1455 | */ | ||
| 1456 | r = amdgpu_vm_bo_wait(adev, vm->root.bo); | ||
| 1457 | if (unlikely(r)) | ||
| 1458 | return r; | ||
| 1459 | |||
| 1460 | params.func = amdgpu_vm_cpu_set_ptes; | ||
| 1461 | params.pages_addr = pages_addr; | ||
| 1462 | params.shadow = false; | ||
| 1463 | return amdgpu_vm_frag_ptes(¶ms, start, last + 1, | ||
| 1464 | addr, flags); | ||
| 1465 | } | ||
| 1466 | |||
| 1197 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); | 1467 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); |
| 1198 | 1468 | ||
| 1199 | /* sync to everything on unmapping */ | 1469 | /* sync to everything on unmapping */ |
| @@ -1273,9 +1543,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
| 1273 | goto error_free; | 1543 | goto error_free; |
| 1274 | 1544 | ||
| 1275 | params.shadow = true; | 1545 | params.shadow = true; |
| 1276 | amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); | 1546 | r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); |
| 1547 | if (r) | ||
| 1548 | goto error_free; | ||
| 1277 | params.shadow = false; | 1549 | params.shadow = false; |
| 1278 | amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); | 1550 | r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); |
| 1551 | if (r) | ||
| 1552 | goto error_free; | ||
| 1279 | 1553 | ||
| 1280 | amdgpu_ring_pad_ib(ring, params.ib); | 1554 | amdgpu_ring_pad_ib(ring, params.ib); |
| 1281 | WARN_ON(params.ib->length_dw > ndw); | 1555 | WARN_ON(params.ib->length_dw > ndw); |
| @@ -2116,20 +2390,25 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) | |||
| 2116 | * | 2390 | * |
| 2117 | * @adev: amdgpu_device pointer | 2391 | * @adev: amdgpu_device pointer |
| 2118 | * @vm: requested vm | 2392 | * @vm: requested vm |
| 2393 | * @vm_context: Indicates if it GFX or Compute context | ||
| 2119 | * | 2394 | * |
| 2120 | * Init @vm fields. | 2395 | * Init @vm fields. |
| 2121 | */ | 2396 | */ |
| 2122 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 2397 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
| 2398 | int vm_context) | ||
| 2123 | { | 2399 | { |
| 2124 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, | 2400 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, |
| 2125 | AMDGPU_VM_PTE_COUNT(adev) * 8); | 2401 | AMDGPU_VM_PTE_COUNT(adev) * 8); |
| 2126 | unsigned ring_instance; | 2402 | unsigned ring_instance; |
| 2127 | struct amdgpu_ring *ring; | 2403 | struct amdgpu_ring *ring; |
| 2128 | struct amd_sched_rq *rq; | 2404 | struct amd_sched_rq *rq; |
| 2129 | int r; | 2405 | int r, i; |
| 2406 | u64 flags; | ||
| 2130 | 2407 | ||
| 2131 | vm->va = RB_ROOT; | 2408 | vm->va = RB_ROOT; |
| 2132 | vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); | 2409 | vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); |
| 2410 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | ||
| 2411 | vm->reserved_vmid[i] = NULL; | ||
| 2133 | spin_lock_init(&vm->status_lock); | 2412 | spin_lock_init(&vm->status_lock); |
| 2134 | INIT_LIST_HEAD(&vm->invalidated); | 2413 | INIT_LIST_HEAD(&vm->invalidated); |
| 2135 | INIT_LIST_HEAD(&vm->cleared); | 2414 | INIT_LIST_HEAD(&vm->cleared); |
| @@ -2146,14 +2425,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 2146 | if (r) | 2425 | if (r) |
| 2147 | return r; | 2426 | return r; |
| 2148 | 2427 | ||
| 2428 | if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) | ||
| 2429 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | ||
| 2430 | AMDGPU_VM_USE_CPU_FOR_COMPUTE); | ||
| 2431 | else | ||
| 2432 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | ||
| 2433 | AMDGPU_VM_USE_CPU_FOR_GFX); | ||
| 2434 | DRM_DEBUG_DRIVER("VM update mode is %s\n", | ||
| 2435 | vm->use_cpu_for_update ? "CPU" : "SDMA"); | ||
| 2436 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), | ||
| 2437 | "CPU update of VM recommended only for large BAR system\n"); | ||
| 2149 | vm->last_dir_update = NULL; | 2438 | vm->last_dir_update = NULL; |
| 2150 | 2439 | ||
| 2440 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | ||
| 2441 | AMDGPU_GEM_CREATE_VRAM_CLEARED; | ||
| 2442 | if (vm->use_cpu_for_update) | ||
| 2443 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | ||
| 2444 | else | ||
| 2445 | flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | ||
| 2446 | AMDGPU_GEM_CREATE_SHADOW); | ||
| 2447 | |||
| 2151 | r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, | 2448 | r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, |
| 2152 | AMDGPU_GEM_DOMAIN_VRAM, | 2449 | AMDGPU_GEM_DOMAIN_VRAM, |
| 2153 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 2450 | flags, |
| 2154 | AMDGPU_GEM_CREATE_SHADOW | | ||
| 2155 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | ||
| 2156 | AMDGPU_GEM_CREATE_VRAM_CLEARED, | ||
| 2157 | NULL, NULL, &vm->root.bo); | 2451 | NULL, NULL, &vm->root.bo); |
| 2158 | if (r) | 2452 | if (r) |
| 2159 | goto error_free_sched_entity; | 2453 | goto error_free_sched_entity; |
| @@ -2198,7 +2492,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) | |||
| 2198 | for (i = 0; i <= level->last_entry_used; i++) | 2492 | for (i = 0; i <= level->last_entry_used; i++) |
| 2199 | amdgpu_vm_free_levels(&level->entries[i]); | 2493 | amdgpu_vm_free_levels(&level->entries[i]); |
| 2200 | 2494 | ||
| 2201 | drm_free_large(level->entries); | 2495 | kvfree(level->entries); |
| 2202 | } | 2496 | } |
| 2203 | 2497 | ||
| 2204 | /** | 2498 | /** |
| @@ -2214,6 +2508,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 2214 | { | 2508 | { |
| 2215 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2509 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
| 2216 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; | 2510 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; |
| 2511 | int i; | ||
| 2217 | 2512 | ||
| 2218 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); | 2513 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); |
| 2219 | 2514 | ||
| @@ -2237,6 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 2237 | 2532 | ||
| 2238 | amdgpu_vm_free_levels(&vm->root); | 2533 | amdgpu_vm_free_levels(&vm->root); |
| 2239 | dma_fence_put(vm->last_dir_update); | 2534 | dma_fence_put(vm->last_dir_update); |
| 2535 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | ||
| 2536 | amdgpu_vm_free_reserved_vmid(adev, vm, i); | ||
| 2240 | } | 2537 | } |
| 2241 | 2538 | ||
| 2242 | /** | 2539 | /** |
| @@ -2256,6 +2553,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) | |||
| 2256 | 2553 | ||
| 2257 | mutex_init(&id_mgr->lock); | 2554 | mutex_init(&id_mgr->lock); |
| 2258 | INIT_LIST_HEAD(&id_mgr->ids_lru); | 2555 | INIT_LIST_HEAD(&id_mgr->ids_lru); |
| 2556 | atomic_set(&id_mgr->reserved_vmid_num, 0); | ||
| 2259 | 2557 | ||
| 2260 | /* skip over VMID 0, since it is the system VM */ | 2558 | /* skip over VMID 0, since it is the system VM */ |
| 2261 | for (j = 1; j < id_mgr->num_ids; ++j) { | 2559 | for (j = 1; j < id_mgr->num_ids; ++j) { |
| @@ -2270,11 +2568,27 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) | |||
| 2270 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | 2568 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) |
| 2271 | adev->vm_manager.seqno[i] = 0; | 2569 | adev->vm_manager.seqno[i] = 0; |
| 2272 | 2570 | ||
| 2273 | |||
| 2274 | atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); | 2571 | atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); |
| 2275 | atomic64_set(&adev->vm_manager.client_counter, 0); | 2572 | atomic64_set(&adev->vm_manager.client_counter, 0); |
| 2276 | spin_lock_init(&adev->vm_manager.prt_lock); | 2573 | spin_lock_init(&adev->vm_manager.prt_lock); |
| 2277 | atomic_set(&adev->vm_manager.num_prt_users, 0); | 2574 | atomic_set(&adev->vm_manager.num_prt_users, 0); |
| 2575 | |||
| 2576 | /* If not overridden by the user, by default, only in large BAR systems | ||
| 2577 | * Compute VM tables will be updated by CPU | ||
| 2578 | */ | ||
| 2579 | #ifdef CONFIG_X86_64 | ||
| 2580 | if (amdgpu_vm_update_mode == -1) { | ||
| 2581 | if (amdgpu_vm_is_large_bar(adev)) | ||
| 2582 | adev->vm_manager.vm_update_mode = | ||
| 2583 | AMDGPU_VM_USE_CPU_FOR_COMPUTE; | ||
| 2584 | else | ||
| 2585 | adev->vm_manager.vm_update_mode = 0; | ||
| 2586 | } else | ||
| 2587 | adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; | ||
| 2588 | #else | ||
| 2589 | adev->vm_manager.vm_update_mode = 0; | ||
| 2590 | #endif | ||
| 2591 | |||
| 2278 | } | 2592 | } |
| 2279 | 2593 | ||
| 2280 | /** | 2594 | /** |
| @@ -2302,3 +2616,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) | |||
| 2302 | } | 2616 | } |
| 2303 | } | 2617 | } |
| 2304 | } | 2618 | } |
| 2619 | |||
| 2620 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | ||
| 2621 | { | ||
| 2622 | union drm_amdgpu_vm *args = data; | ||
| 2623 | struct amdgpu_device *adev = dev->dev_private; | ||
| 2624 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
| 2625 | int r; | ||
| 2626 | |||
| 2627 | switch (args->in.op) { | ||
| 2628 | case AMDGPU_VM_OP_RESERVE_VMID: | ||
| 2629 | /* current, we only have requirement to reserve vmid from gfxhub */ | ||
| 2630 | r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, | ||
| 2631 | AMDGPU_GFXHUB); | ||
| 2632 | if (r) | ||
| 2633 | return r; | ||
| 2634 | break; | ||
| 2635 | case AMDGPU_VM_OP_UNRESERVE_VMID: | ||
| 2636 | amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); | ||
| 2637 | break; | ||
| 2638 | default: | ||
| 2639 | return -EINVAL; | ||
| 2640 | } | ||
| 2641 | |||
| 2642 | return 0; | ||
| 2643 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d97e28b4bdc4..936f158bc5ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
| @@ -84,6 +84,16 @@ struct amdgpu_bo_list_entry; | |||
| 84 | 84 | ||
| 85 | /* hardcode that limit for now */ | 85 | /* hardcode that limit for now */ |
| 86 | #define AMDGPU_VA_RESERVED_SIZE (8 << 20) | 86 | #define AMDGPU_VA_RESERVED_SIZE (8 << 20) |
| 87 | /* max vmids dedicated for process */ | ||
| 88 | #define AMDGPU_VM_MAX_RESERVED_VMID 1 | ||
| 89 | |||
| 90 | #define AMDGPU_VM_CONTEXT_GFX 0 | ||
| 91 | #define AMDGPU_VM_CONTEXT_COMPUTE 1 | ||
| 92 | |||
| 93 | /* See vm_update_mode */ | ||
| 94 | #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) | ||
| 95 | #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) | ||
| 96 | |||
| 87 | 97 | ||
| 88 | struct amdgpu_vm_pt { | 98 | struct amdgpu_vm_pt { |
| 89 | struct amdgpu_bo *bo; | 99 | struct amdgpu_bo *bo; |
| @@ -123,8 +133,13 @@ struct amdgpu_vm { | |||
| 123 | 133 | ||
| 124 | /* client id */ | 134 | /* client id */ |
| 125 | u64 client_id; | 135 | u64 client_id; |
| 136 | /* dedicated to vm */ | ||
| 137 | struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; | ||
| 126 | /* each VM will map on CSA */ | 138 | /* each VM will map on CSA */ |
| 127 | struct amdgpu_bo_va *csa_bo_va; | 139 | struct amdgpu_bo_va *csa_bo_va; |
| 140 | |||
| 141 | /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ | ||
| 142 | bool use_cpu_for_update; | ||
| 128 | }; | 143 | }; |
| 129 | 144 | ||
| 130 | struct amdgpu_vm_id { | 145 | struct amdgpu_vm_id { |
| @@ -152,6 +167,7 @@ struct amdgpu_vm_id_manager { | |||
| 152 | unsigned num_ids; | 167 | unsigned num_ids; |
| 153 | struct list_head ids_lru; | 168 | struct list_head ids_lru; |
| 154 | struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; | 169 | struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; |
| 170 | atomic_t reserved_vmid_num; | ||
| 155 | }; | 171 | }; |
| 156 | 172 | ||
| 157 | struct amdgpu_vm_manager { | 173 | struct amdgpu_vm_manager { |
| @@ -168,8 +184,6 @@ struct amdgpu_vm_manager { | |||
| 168 | uint32_t block_size; | 184 | uint32_t block_size; |
| 169 | /* vram base address for page table entry */ | 185 | /* vram base address for page table entry */ |
| 170 | u64 vram_base_offset; | 186 | u64 vram_base_offset; |
| 171 | /* is vm enabled? */ | ||
| 172 | bool enabled; | ||
| 173 | /* vm pte handling */ | 187 | /* vm pte handling */ |
| 174 | const struct amdgpu_vm_pte_funcs *vm_pte_funcs; | 188 | const struct amdgpu_vm_pte_funcs *vm_pte_funcs; |
| 175 | struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; | 189 | struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; |
| @@ -181,11 +195,18 @@ struct amdgpu_vm_manager { | |||
| 181 | /* partial resident texture handling */ | 195 | /* partial resident texture handling */ |
| 182 | spinlock_t prt_lock; | 196 | spinlock_t prt_lock; |
| 183 | atomic_t num_prt_users; | 197 | atomic_t num_prt_users; |
| 198 | |||
| 199 | /* controls how VM page tables are updated for Graphics and Compute. | ||
| 200 | * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU | ||
| 201 | * BIT1[= 0] Compute updated by SDMA [= 1] by CPU | ||
| 202 | */ | ||
| 203 | int vm_update_mode; | ||
| 184 | }; | 204 | }; |
| 185 | 205 | ||
| 186 | void amdgpu_vm_manager_init(struct amdgpu_device *adev); | 206 | void amdgpu_vm_manager_init(struct amdgpu_device *adev); |
| 187 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); | 207 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); |
| 188 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 208 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
| 209 | int vm_context); | ||
| 189 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 210 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); |
| 190 | void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | 211 | void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, |
| 191 | struct list_head *validated, | 212 | struct list_head *validated, |
| @@ -204,6 +225,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
| 204 | int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); | 225 | int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); |
| 205 | void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, | 226 | void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, |
| 206 | unsigned vmid); | 227 | unsigned vmid); |
| 228 | void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); | ||
| 207 | int amdgpu_vm_update_directories(struct amdgpu_device *adev, | 229 | int amdgpu_vm_update_directories(struct amdgpu_device *adev, |
| 208 | struct amdgpu_vm *vm); | 230 | struct amdgpu_vm *vm); |
| 209 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | 231 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, |
| @@ -238,5 +260,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
| 238 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | 260 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, |
| 239 | struct amdgpu_bo_va *bo_va); | 261 | struct amdgpu_bo_va *bo_va); |
| 240 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); | 262 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); |
| 263 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); | ||
| 264 | bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, | ||
| 265 | struct amdgpu_job *job); | ||
| 266 | void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); | ||
| 241 | 267 | ||
| 242 | #endif | 268 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a4831fe0223b..a2c59a08b2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | |||
| @@ -220,9 +220,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, | |||
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { | 222 | const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { |
| 223 | amdgpu_vram_mgr_init, | 223 | .init = amdgpu_vram_mgr_init, |
| 224 | amdgpu_vram_mgr_fini, | 224 | .takedown = amdgpu_vram_mgr_fini, |
| 225 | amdgpu_vram_mgr_new, | 225 | .get_node = amdgpu_vram_mgr_new, |
| 226 | amdgpu_vram_mgr_del, | 226 | .put_node = amdgpu_vram_mgr_del, |
| 227 | amdgpu_vram_mgr_debug | 227 | .debug = amdgpu_vram_mgr_debug |
| 228 | }; | 228 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 8c9bc75a9c2d..8a0818b23ea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c | |||
| @@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) | |||
| 165 | struct drm_device *dev = crtc->dev; | 165 | struct drm_device *dev = crtc->dev; |
| 166 | struct amdgpu_device *adev = dev->dev_private; | 166 | struct amdgpu_device *adev = dev->dev_private; |
| 167 | int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); | 167 | int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); |
| 168 | ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; | 168 | ENABLE_DISP_POWER_GATING_PS_ALLOCATION args; |
| 169 | 169 | ||
| 170 | memset(&args, 0, sizeof(args)); | 170 | memset(&args, 0, sizeof(args)); |
| 171 | 171 | ||
| @@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) | |||
| 178 | void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) | 178 | void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) |
| 179 | { | 179 | { |
| 180 | int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); | 180 | int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); |
| 181 | ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; | 181 | ENABLE_DISP_POWER_GATING_PS_ALLOCATION args; |
| 182 | 182 | ||
| 183 | memset(&args, 0, sizeof(args)); | 183 | memset(&args, 0, sizeof(args)); |
| 184 | 184 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 6dc1410b380f..cb508a211b2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include <linux/firmware.h> | 24 | #include <linux/firmware.h> |
| 25 | #include "drmP.h" | 25 | #include <drm/drmP.h> |
| 26 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
| 27 | #include "amdgpu_pm.h" | 27 | #include "amdgpu_pm.h" |
| 28 | #include "amdgpu_ucode.h" | 28 | #include "amdgpu_ucode.h" |
| @@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev) | |||
| 906 | u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); | 906 | u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); |
| 907 | u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; | 907 | u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; |
| 908 | 908 | ||
| 909 | /* disable mclk switching if the refresh is >120Hz, even if the | ||
| 910 | * blanking period would allow it | ||
| 911 | */ | ||
| 912 | if (amdgpu_dpm_get_vrefresh(adev) > 120) | ||
| 913 | return true; | ||
| 914 | |||
| 909 | if (vblank_time < switch_limit) | 915 | if (vblank_time < switch_limit) |
| 910 | return true; | 916 | return true; |
| 911 | else | 917 | else |
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_smc.c b/drivers/gpu/drm/amd/amdgpu/ci_smc.c index 7eb9069db8e3..b8ba51e045b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_smc.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/firmware.h> | 25 | #include <linux/firmware.h> |
| 26 | #include "drmP.h" | 26 | #include <drm/drmP.h> |
| 27 | #include "amdgpu.h" | 27 | #include "amdgpu.h" |
| 28 | #include "cikd.h" | 28 | #include "cikd.h" |
| 29 | #include "ppsmc.h" | 29 | #include "ppsmc.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 9d33e5641419..37a499ab30eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #include <linux/firmware.h> | 24 | #include <linux/firmware.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
| 27 | #include "drmP.h" | 27 | #include <drm/drmP.h> |
| 28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
| 29 | #include "amdgpu_atombios.h" | 29 | #include "amdgpu_atombios.h" |
| 30 | #include "amdgpu_ih.h" | 30 | #include "amdgpu_ih.h" |
| @@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, | |||
| 964 | } | 964 | } |
| 965 | 965 | ||
| 966 | static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { | 966 | static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { |
| 967 | {mmGRBM_STATUS, false}, | 967 | {mmGRBM_STATUS}, |
| 968 | {mmGB_ADDR_CONFIG, false}, | 968 | {mmGB_ADDR_CONFIG}, |
| 969 | {mmMC_ARB_RAMCFG, false}, | 969 | {mmMC_ARB_RAMCFG}, |
| 970 | {mmGB_TILE_MODE0, false}, | 970 | {mmGB_TILE_MODE0}, |
| 971 | {mmGB_TILE_MODE1, false}, | 971 | {mmGB_TILE_MODE1}, |
| 972 | {mmGB_TILE_MODE2, false}, | 972 | {mmGB_TILE_MODE2}, |
| 973 | {mmGB_TILE_MODE3, false}, | 973 | {mmGB_TILE_MODE3}, |
| 974 | {mmGB_TILE_MODE4, false}, | 974 | {mmGB_TILE_MODE4}, |
| 975 | {mmGB_TILE_MODE5, false}, | 975 | {mmGB_TILE_MODE5}, |
| 976 | {mmGB_TILE_MODE6, false}, | 976 | {mmGB_TILE_MODE6}, |
| 977 | {mmGB_TILE_MODE7, false}, | 977 | {mmGB_TILE_MODE7}, |
| 978 | {mmGB_TILE_MODE8, false}, | 978 | {mmGB_TILE_MODE8}, |
| 979 | {mmGB_TILE_MODE9, false}, | 979 | {mmGB_TILE_MODE9}, |
| 980 | {mmGB_TILE_MODE10, false}, | 980 | {mmGB_TILE_MODE10}, |
| 981 | {mmGB_TILE_MODE11, false}, | 981 | {mmGB_TILE_MODE11}, |
| 982 | {mmGB_TILE_MODE12, false}, | 982 | {mmGB_TILE_MODE12}, |
| 983 | {mmGB_TILE_MODE13, false}, | 983 | {mmGB_TILE_MODE13}, |
| 984 | {mmGB_TILE_MODE14, false}, | 984 | {mmGB_TILE_MODE14}, |
| 985 | {mmGB_TILE_MODE15, false}, | 985 | {mmGB_TILE_MODE15}, |
| 986 | {mmGB_TILE_MODE16, false}, | 986 | {mmGB_TILE_MODE16}, |
| 987 | {mmGB_TILE_MODE17, false}, | 987 | {mmGB_TILE_MODE17}, |
| 988 | {mmGB_TILE_MODE18, false}, | 988 | {mmGB_TILE_MODE18}, |
| 989 | {mmGB_TILE_MODE19, false}, | 989 | {mmGB_TILE_MODE19}, |
| 990 | {mmGB_TILE_MODE20, false}, | 990 | {mmGB_TILE_MODE20}, |
| 991 | {mmGB_TILE_MODE21, false}, | 991 | {mmGB_TILE_MODE21}, |
| 992 | {mmGB_TILE_MODE22, false}, | 992 | {mmGB_TILE_MODE22}, |
| 993 | {mmGB_TILE_MODE23, false}, | 993 | {mmGB_TILE_MODE23}, |
| 994 | {mmGB_TILE_MODE24, false}, | 994 | {mmGB_TILE_MODE24}, |
| 995 | {mmGB_TILE_MODE25, false}, | 995 | {mmGB_TILE_MODE25}, |
| 996 | {mmGB_TILE_MODE26, false}, | 996 | {mmGB_TILE_MODE26}, |
| 997 | {mmGB_TILE_MODE27, false}, | 997 | {mmGB_TILE_MODE27}, |
| 998 | {mmGB_TILE_MODE28, false}, | 998 | {mmGB_TILE_MODE28}, |
| 999 | {mmGB_TILE_MODE29, false}, | 999 | {mmGB_TILE_MODE29}, |
| 1000 | {mmGB_TILE_MODE30, false}, | 1000 | {mmGB_TILE_MODE30}, |
| 1001 | {mmGB_TILE_MODE31, false}, | 1001 | {mmGB_TILE_MODE31}, |
| 1002 | {mmGB_MACROTILE_MODE0, false}, | 1002 | {mmGB_MACROTILE_MODE0}, |
| 1003 | {mmGB_MACROTILE_MODE1, false}, | 1003 | {mmGB_MACROTILE_MODE1}, |
| 1004 | {mmGB_MACROTILE_MODE2, false}, | 1004 | {mmGB_MACROTILE_MODE2}, |
| 1005 | {mmGB_MACROTILE_MODE3, false}, | 1005 | {mmGB_MACROTILE_MODE3}, |
| 1006 | {mmGB_MACROTILE_MODE4, false}, | 1006 | {mmGB_MACROTILE_MODE4}, |
| 1007 | {mmGB_MACROTILE_MODE5, false}, | 1007 | {mmGB_MACROTILE_MODE5}, |
| 1008 | {mmGB_MACROTILE_MODE6, false}, | 1008 | {mmGB_MACROTILE_MODE6}, |
| 1009 | {mmGB_MACROTILE_MODE7, false}, | 1009 | {mmGB_MACROTILE_MODE7}, |
| 1010 | {mmGB_MACROTILE_MODE8, false}, | 1010 | {mmGB_MACROTILE_MODE8}, |
| 1011 | {mmGB_MACROTILE_MODE9, false}, | 1011 | {mmGB_MACROTILE_MODE9}, |
| 1012 | {mmGB_MACROTILE_MODE10, false}, | 1012 | {mmGB_MACROTILE_MODE10}, |
| 1013 | {mmGB_MACROTILE_MODE11, false}, | 1013 | {mmGB_MACROTILE_MODE11}, |
| 1014 | {mmGB_MACROTILE_MODE12, false}, | 1014 | {mmGB_MACROTILE_MODE12}, |
| 1015 | {mmGB_MACROTILE_MODE13, false}, | 1015 | {mmGB_MACROTILE_MODE13}, |
| 1016 | {mmGB_MACROTILE_MODE14, false}, | 1016 | {mmGB_MACROTILE_MODE14}, |
| 1017 | {mmGB_MACROTILE_MODE15, false}, | 1017 | {mmGB_MACROTILE_MODE15}, |
| 1018 | {mmCC_RB_BACKEND_DISABLE, false, true}, | 1018 | {mmCC_RB_BACKEND_DISABLE, true}, |
| 1019 | {mmGC_USER_RB_BACKEND_DISABLE, false, true}, | 1019 | {mmGC_USER_RB_BACKEND_DISABLE, true}, |
| 1020 | {mmGB_BACKEND_MAP, false, false}, | 1020 | {mmGB_BACKEND_MAP, false}, |
| 1021 | {mmPA_SC_RASTER_CONFIG, false, true}, | 1021 | {mmPA_SC_RASTER_CONFIG, true}, |
| 1022 | {mmPA_SC_RASTER_CONFIG_1, false, true}, | 1022 | {mmPA_SC_RASTER_CONFIG_1, true}, |
| 1023 | }; | 1023 | }; |
| 1024 | 1024 | ||
| 1025 | static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, | 1025 | static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, |
| @@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, | |||
| 1050 | if (reg_offset != cik_allowed_read_registers[i].reg_offset) | 1050 | if (reg_offset != cik_allowed_read_registers[i].reg_offset) |
| 1051 | continue; | 1051 | continue; |
| 1052 | 1052 | ||
| 1053 | if (!cik_allowed_read_registers[i].untouched) | 1053 | *value = cik_allowed_read_registers[i].grbm_indexed ? |
| 1054 | *value = cik_allowed_read_registers[i].grbm_indexed ? | 1054 | cik_read_indexed_register(adev, se_num, |
| 1055 | cik_read_indexed_register(adev, se_num, | 1055 | sh_num, reg_offset) : |
| 1056 | sh_num, reg_offset) : | 1056 | RREG32(reg_offset); |
| 1057 | RREG32(reg_offset); | ||
| 1058 | return 0; | 1057 | return 0; |
| 1059 | } | 1058 | } |
| 1060 | return -EINVAL; | 1059 | return -EINVAL; |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index c57c3f18af01..b8918432c572 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_ih.h" | 25 | #include "amdgpu_ih.h" |
| 26 | #include "cikd.h" | 26 | #include "cikd.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index a5f294ebff5c..0c1209cdd1cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_ih.h" | 25 | #include "amdgpu_ih.h" |
| 26 | #include "vid.h" | 26 | #include "vid.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 0cdeb6a2e4a0..9f78c03a2e31 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_pm.h" | 25 | #include "amdgpu_pm.h" |
| 26 | #include "amdgpu_i2c.h" | 26 | #include "amdgpu_i2c.h" |
| @@ -1207,8 +1207,11 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, | |||
| 1207 | u32 tmp, wm_mask, lb_vblank_lead_lines = 0; | 1207 | u32 tmp, wm_mask, lb_vblank_lead_lines = 0; |
| 1208 | 1208 | ||
| 1209 | if (amdgpu_crtc->base.enabled && num_heads && mode) { | 1209 | if (amdgpu_crtc->base.enabled && num_heads && mode) { |
| 1210 | active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; | 1210 | active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000, |
| 1211 | line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); | 1211 | (u32)mode->clock); |
| 1212 | line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, | ||
| 1213 | (u32)mode->clock); | ||
| 1214 | line_time = min(line_time, (u32)65535); | ||
| 1212 | 1215 | ||
| 1213 | /* watermark for high clocks */ | 1216 | /* watermark for high clocks */ |
| 1214 | if (adev->pm.dpm_enabled) { | 1217 | if (adev->pm.dpm_enabled) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 773654a19749..4bcf01dc567a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_pm.h" | 25 | #include "amdgpu_pm.h" |
| 26 | #include "amdgpu_i2c.h" | 26 | #include "amdgpu_i2c.h" |
| @@ -1176,8 +1176,11 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, | |||
| 1176 | u32 tmp, wm_mask, lb_vblank_lead_lines = 0; | 1176 | u32 tmp, wm_mask, lb_vblank_lead_lines = 0; |
| 1177 | 1177 | ||
| 1178 | if (amdgpu_crtc->base.enabled && num_heads && mode) { | 1178 | if (amdgpu_crtc->base.enabled && num_heads && mode) { |
| 1179 | active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; | 1179 | active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000, |
| 1180 | line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); | 1180 | (u32)mode->clock); |
| 1181 | line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, | ||
| 1182 | (u32)mode->clock); | ||
| 1183 | line_time = min(line_time, (u32)65535); | ||
| 1181 | 1184 | ||
| 1182 | /* watermark for high clocks */ | 1185 | /* watermark for high clocks */ |
| 1183 | if (adev->pm.dpm_enabled) { | 1186 | if (adev->pm.dpm_enabled) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 1f3552967ba3..fd134a4629d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_pm.h" | 25 | #include "amdgpu_pm.h" |
| 26 | #include "amdgpu_i2c.h" | 26 | #include "amdgpu_i2c.h" |
| @@ -118,14 +118,27 @@ static const struct { | |||
| 118 | static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, | 118 | static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, |
| 119 | u32 block_offset, u32 reg) | 119 | u32 block_offset, u32 reg) |
| 120 | { | 120 | { |
| 121 | DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n"); | 121 | unsigned long flags; |
| 122 | return 0; | 122 | u32 r; |
| 123 | |||
| 124 | spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); | ||
| 125 | WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); | ||
| 126 | r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); | ||
| 127 | spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); | ||
| 128 | |||
| 129 | return r; | ||
| 123 | } | 130 | } |
| 124 | 131 | ||
| 125 | static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, | 132 | static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, |
| 126 | u32 block_offset, u32 reg, u32 v) | 133 | u32 block_offset, u32 reg, u32 v) |
| 127 | { | 134 | { |
| 128 | DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n"); | 135 | unsigned long flags; |
| 136 | |||
| 137 | spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); | ||
| 138 | WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, | ||
| 139 | reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK); | ||
| 140 | WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); | ||
| 141 | spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); | ||
| 129 | } | 142 | } |
| 130 | 143 | ||
| 131 | static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) | 144 | static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) |
| @@ -501,21 +514,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, | |||
| 501 | 514 | ||
| 502 | static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) | 515 | static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) |
| 503 | { | 516 | { |
| 504 | int num_crtc = 0; | ||
| 505 | |||
| 506 | switch (adev->asic_type) { | 517 | switch (adev->asic_type) { |
| 507 | case CHIP_TAHITI: | 518 | case CHIP_TAHITI: |
| 508 | case CHIP_PITCAIRN: | 519 | case CHIP_PITCAIRN: |
| 509 | case CHIP_VERDE: | 520 | case CHIP_VERDE: |
| 510 | num_crtc = 6; | 521 | return 6; |
| 511 | break; | ||
| 512 | case CHIP_OLAND: | 522 | case CHIP_OLAND: |
| 513 | num_crtc = 2; | 523 | return 2; |
| 514 | break; | ||
| 515 | default: | 524 | default: |
| 516 | num_crtc = 0; | 525 | return 0; |
| 517 | } | 526 | } |
| 518 | return num_crtc; | ||
| 519 | } | 527 | } |
| 520 | 528 | ||
| 521 | void dce_v6_0_disable_dce(struct amdgpu_device *adev) | 529 | void dce_v6_0_disable_dce(struct amdgpu_device *adev) |
| @@ -983,8 +991,11 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, | |||
| 983 | fixed20_12 a, b, c; | 991 | fixed20_12 a, b, c; |
| 984 | 992 | ||
| 985 | if (amdgpu_crtc->base.enabled && num_heads && mode) { | 993 | if (amdgpu_crtc->base.enabled && num_heads && mode) { |
| 986 | active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; | 994 | active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000, |
| 987 | line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); | 995 | (u32)mode->clock); |
| 996 | line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, | ||
| 997 | (u32)mode->clock); | ||
| 998 | line_time = min(line_time, (u32)65535); | ||
| 988 | priority_a_cnt = 0; | 999 | priority_a_cnt = 0; |
| 989 | priority_b_cnt = 0; | 1000 | priority_b_cnt = 0; |
| 990 | 1001 | ||
| @@ -1222,17 +1233,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev) | |||
| 1222 | dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads); | 1233 | dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads); |
| 1223 | } | 1234 | } |
| 1224 | } | 1235 | } |
| 1225 | /* | 1236 | |
| 1226 | static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev) | 1237 | static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev) |
| 1227 | { | 1238 | { |
| 1228 | int i; | 1239 | int i; |
| 1229 | u32 offset, tmp; | 1240 | u32 tmp; |
| 1230 | 1241 | ||
| 1231 | for (i = 0; i < adev->mode_info.audio.num_pins; i++) { | 1242 | for (i = 0; i < adev->mode_info.audio.num_pins; i++) { |
| 1232 | offset = adev->mode_info.audio.pin[i].offset; | 1243 | tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset, |
| 1233 | tmp = RREG32_AUDIO_ENDPT(offset, | 1244 | ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); |
| 1234 | AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); | 1245 | if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT, |
| 1235 | if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) | 1246 | PORT_CONNECTIVITY)) |
| 1236 | adev->mode_info.audio.pin[i].connected = false; | 1247 | adev->mode_info.audio.pin[i].connected = false; |
| 1237 | else | 1248 | else |
| 1238 | adev->mode_info.audio.pin[i].connected = true; | 1249 | adev->mode_info.audio.pin[i].connected = true; |
| @@ -1254,45 +1265,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade | |||
| 1254 | return NULL; | 1265 | return NULL; |
| 1255 | } | 1266 | } |
| 1256 | 1267 | ||
| 1257 | static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder) | 1268 | static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) |
| 1258 | { | 1269 | { |
| 1259 | struct amdgpu_device *adev = encoder->dev->dev_private; | 1270 | struct amdgpu_device *adev = encoder->dev->dev_private; |
| 1260 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | 1271 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); |
| 1261 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | 1272 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; |
| 1262 | u32 offset; | ||
| 1263 | 1273 | ||
| 1264 | if (!dig || !dig->afmt || !dig->afmt->pin) | 1274 | if (!dig || !dig->afmt || !dig->afmt->pin) |
| 1265 | return; | 1275 | return; |
| 1266 | 1276 | ||
| 1267 | offset = dig->afmt->offset; | 1277 | WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, |
| 1268 | 1278 | REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, | |
| 1269 | WREG32(AFMT_AUDIO_SRC_CONTROL + offset, | 1279 | dig->afmt->pin->id)); |
| 1270 | AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); | ||
| 1271 | |||
| 1272 | } | 1280 | } |
| 1273 | 1281 | ||
| 1274 | static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, | 1282 | static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, |
| 1275 | struct drm_display_mode *mode) | 1283 | struct drm_display_mode *mode) |
| 1276 | { | 1284 | { |
| 1277 | DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n"); | 1285 | struct amdgpu_device *adev = encoder->dev->dev_private; |
| 1286 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1287 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1288 | struct drm_connector *connector; | ||
| 1289 | struct amdgpu_connector *amdgpu_connector = NULL; | ||
| 1290 | int interlace = 0; | ||
| 1291 | u32 tmp; | ||
| 1292 | |||
| 1293 | list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { | ||
| 1294 | if (connector->encoder == encoder) { | ||
| 1295 | amdgpu_connector = to_amdgpu_connector(connector); | ||
| 1296 | break; | ||
| 1297 | } | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | if (!amdgpu_connector) { | ||
| 1301 | DRM_ERROR("Couldn't find encoder's connector\n"); | ||
| 1302 | return; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | if (mode->flags & DRM_MODE_FLAG_INTERLACE) | ||
| 1306 | interlace = 1; | ||
| 1307 | |||
| 1308 | if (connector->latency_present[interlace]) { | ||
| 1309 | tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, | ||
| 1310 | VIDEO_LIPSYNC, connector->video_latency[interlace]); | ||
| 1311 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, | ||
| 1312 | AUDIO_LIPSYNC, connector->audio_latency[interlace]); | ||
| 1313 | } else { | ||
| 1314 | tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, | ||
| 1315 | VIDEO_LIPSYNC, 0); | ||
| 1316 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, | ||
| 1317 | AUDIO_LIPSYNC, 0); | ||
| 1318 | } | ||
| 1319 | WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, | ||
| 1320 | ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); | ||
| 1278 | } | 1321 | } |
| 1279 | 1322 | ||
| 1280 | static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) | 1323 | static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) |
| 1281 | { | 1324 | { |
| 1282 | DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n"); | 1325 | struct amdgpu_device *adev = encoder->dev->dev_private; |
| 1326 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1327 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1328 | struct drm_connector *connector; | ||
| 1329 | struct amdgpu_connector *amdgpu_connector = NULL; | ||
| 1330 | u8 *sadb = NULL; | ||
| 1331 | int sad_count; | ||
| 1332 | u32 tmp; | ||
| 1333 | |||
| 1334 | list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { | ||
| 1335 | if (connector->encoder == encoder) { | ||
| 1336 | amdgpu_connector = to_amdgpu_connector(connector); | ||
| 1337 | break; | ||
| 1338 | } | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (!amdgpu_connector) { | ||
| 1342 | DRM_ERROR("Couldn't find encoder's connector\n"); | ||
| 1343 | return; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); | ||
| 1347 | if (sad_count < 0) { | ||
| 1348 | DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); | ||
| 1349 | sad_count = 0; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | /* program the speaker allocation */ | ||
| 1353 | tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset, | ||
| 1354 | ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); | ||
| 1355 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, | ||
| 1356 | HDMI_CONNECTION, 0); | ||
| 1357 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, | ||
| 1358 | DP_CONNECTION, 0); | ||
| 1359 | |||
| 1360 | if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) | ||
| 1361 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, | ||
| 1362 | DP_CONNECTION, 1); | ||
| 1363 | else | ||
| 1364 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, | ||
| 1365 | HDMI_CONNECTION, 1); | ||
| 1366 | |||
| 1367 | if (sad_count) | ||
| 1368 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, | ||
| 1369 | SPEAKER_ALLOCATION, sadb[0]); | ||
| 1370 | else | ||
| 1371 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, | ||
| 1372 | SPEAKER_ALLOCATION, 5); /* stereo */ | ||
| 1373 | |||
| 1374 | WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, | ||
| 1375 | ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); | ||
| 1376 | |||
| 1377 | kfree(sadb); | ||
| 1283 | } | 1378 | } |
| 1284 | 1379 | ||
| 1285 | static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) | 1380 | static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) |
| 1286 | { | 1381 | { |
| 1287 | DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n"); | 1382 | struct amdgpu_device *adev = encoder->dev->dev_private; |
| 1383 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1384 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1385 | struct drm_connector *connector; | ||
| 1386 | struct amdgpu_connector *amdgpu_connector = NULL; | ||
| 1387 | struct cea_sad *sads; | ||
| 1388 | int i, sad_count; | ||
| 1389 | |||
| 1390 | static const u16 eld_reg_to_type[][2] = { | ||
| 1391 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, | ||
| 1392 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, | ||
| 1393 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, | ||
| 1394 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, | ||
| 1395 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, | ||
| 1396 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, | ||
| 1397 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, | ||
| 1398 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, | ||
| 1399 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, | ||
| 1400 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, | ||
| 1401 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, | ||
| 1402 | { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, | ||
| 1403 | }; | ||
| 1404 | |||
| 1405 | list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { | ||
| 1406 | if (connector->encoder == encoder) { | ||
| 1407 | amdgpu_connector = to_amdgpu_connector(connector); | ||
| 1408 | break; | ||
| 1409 | } | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | if (!amdgpu_connector) { | ||
| 1413 | DRM_ERROR("Couldn't find encoder's connector\n"); | ||
| 1414 | return; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); | ||
| 1418 | if (sad_count <= 0) { | ||
| 1419 | DRM_ERROR("Couldn't read SADs: %d\n", sad_count); | ||
| 1420 | return; | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { | ||
| 1424 | u32 tmp = 0; | ||
| 1425 | u8 stereo_freqs = 0; | ||
| 1426 | int max_channels = -1; | ||
| 1427 | int j; | ||
| 1428 | |||
| 1429 | for (j = 0; j < sad_count; j++) { | ||
| 1430 | struct cea_sad *sad = &sads[j]; | ||
| 1431 | |||
| 1432 | if (sad->format == eld_reg_to_type[i][1]) { | ||
| 1433 | if (sad->channels > max_channels) { | ||
| 1434 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, | ||
| 1435 | MAX_CHANNELS, sad->channels); | ||
| 1436 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, | ||
| 1437 | DESCRIPTOR_BYTE_2, sad->byte2); | ||
| 1438 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, | ||
| 1439 | SUPPORTED_FREQUENCIES, sad->freq); | ||
| 1440 | max_channels = sad->channels; | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) | ||
| 1444 | stereo_freqs |= sad->freq; | ||
| 1445 | else | ||
| 1446 | break; | ||
| 1447 | } | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, | ||
| 1451 | SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); | ||
| 1452 | WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); | ||
| 1453 | } | ||
| 1454 | |||
| 1455 | kfree(sads); | ||
| 1288 | 1456 | ||
| 1289 | } | 1457 | } |
| 1290 | */ | 1458 | |
| 1291 | static void dce_v6_0_audio_enable(struct amdgpu_device *adev, | 1459 | static void dce_v6_0_audio_enable(struct amdgpu_device *adev, |
| 1292 | struct amdgpu_audio_pin *pin, | 1460 | struct amdgpu_audio_pin *pin, |
| 1293 | bool enable) | 1461 | bool enable) |
| 1294 | { | 1462 | { |
| 1295 | DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n"); | 1463 | if (!pin) |
| 1464 | return; | ||
| 1465 | |||
| 1466 | WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, | ||
| 1467 | enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); | ||
| 1296 | } | 1468 | } |
| 1297 | 1469 | ||
| 1298 | static const u32 pin_offsets[7] = | 1470 | static const u32 pin_offsets[7] = |
| @@ -1308,42 +1480,372 @@ static const u32 pin_offsets[7] = | |||
| 1308 | 1480 | ||
| 1309 | static int dce_v6_0_audio_init(struct amdgpu_device *adev) | 1481 | static int dce_v6_0_audio_init(struct amdgpu_device *adev) |
| 1310 | { | 1482 | { |
| 1483 | int i; | ||
| 1484 | |||
| 1485 | if (!amdgpu_audio) | ||
| 1486 | return 0; | ||
| 1487 | |||
| 1488 | adev->mode_info.audio.enabled = true; | ||
| 1489 | |||
| 1490 | switch (adev->asic_type) { | ||
| 1491 | case CHIP_TAHITI: | ||
| 1492 | case CHIP_PITCAIRN: | ||
| 1493 | case CHIP_VERDE: | ||
| 1494 | default: | ||
| 1495 | adev->mode_info.audio.num_pins = 6; | ||
| 1496 | break; | ||
| 1497 | case CHIP_OLAND: | ||
| 1498 | adev->mode_info.audio.num_pins = 2; | ||
| 1499 | break; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | for (i = 0; i < adev->mode_info.audio.num_pins; i++) { | ||
| 1503 | adev->mode_info.audio.pin[i].channels = -1; | ||
| 1504 | adev->mode_info.audio.pin[i].rate = -1; | ||
| 1505 | adev->mode_info.audio.pin[i].bits_per_sample = -1; | ||
| 1506 | adev->mode_info.audio.pin[i].status_bits = 0; | ||
| 1507 | adev->mode_info.audio.pin[i].category_code = 0; | ||
| 1508 | adev->mode_info.audio.pin[i].connected = false; | ||
| 1509 | adev->mode_info.audio.pin[i].offset = pin_offsets[i]; | ||
| 1510 | adev->mode_info.audio.pin[i].id = i; | ||
| 1511 | dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); | ||
| 1512 | } | ||
| 1513 | |||
| 1311 | return 0; | 1514 | return 0; |
| 1312 | } | 1515 | } |
| 1313 | 1516 | ||
| 1314 | static void dce_v6_0_audio_fini(struct amdgpu_device *adev) | 1517 | static void dce_v6_0_audio_fini(struct amdgpu_device *adev) |
| 1315 | { | 1518 | { |
| 1519 | int i; | ||
| 1316 | 1520 | ||
| 1521 | if (!amdgpu_audio) | ||
| 1522 | return; | ||
| 1523 | |||
| 1524 | if (!adev->mode_info.audio.enabled) | ||
| 1525 | return; | ||
| 1526 | |||
| 1527 | for (i = 0; i < adev->mode_info.audio.num_pins; i++) | ||
| 1528 | dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); | ||
| 1529 | |||
| 1530 | adev->mode_info.audio.enabled = false; | ||
| 1317 | } | 1531 | } |
| 1318 | 1532 | ||
| 1319 | /* | 1533 | static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder) |
| 1320 | static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock) | ||
| 1321 | { | 1534 | { |
| 1322 | DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n"); | 1535 | struct drm_device *dev = encoder->dev; |
| 1536 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1537 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1538 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1539 | u32 tmp; | ||
| 1540 | |||
| 1541 | tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); | ||
| 1542 | tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); | ||
| 1543 | tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); | ||
| 1544 | tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); | ||
| 1545 | WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1323 | } | 1546 | } |
| 1324 | */ | 1547 | |
| 1325 | /* | 1548 | static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder, |
| 1326 | * build a HDMI Video Info Frame | 1549 | uint32_t clock, int bpc) |
| 1327 | */ | 1550 | { |
| 1328 | /* | 1551 | struct drm_device *dev = encoder->dev; |
| 1329 | static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, | 1552 | struct amdgpu_device *adev = dev->dev_private; |
| 1330 | void *buffer, size_t size) | 1553 | struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock); |
| 1554 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1555 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1556 | u32 tmp; | ||
| 1557 | |||
| 1558 | tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset); | ||
| 1559 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1); | ||
| 1560 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, | ||
| 1561 | bpc > 8 ? 0 : 1); | ||
| 1562 | WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1563 | |||
| 1564 | tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset); | ||
| 1565 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz); | ||
| 1566 | WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp); | ||
| 1567 | tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset); | ||
| 1568 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz); | ||
| 1569 | WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp); | ||
| 1570 | |||
| 1571 | tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset); | ||
| 1572 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz); | ||
| 1573 | WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp); | ||
| 1574 | tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset); | ||
| 1575 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz); | ||
| 1576 | WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp); | ||
| 1577 | |||
| 1578 | tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset); | ||
| 1579 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz); | ||
| 1580 | WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp); | ||
| 1581 | tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset); | ||
| 1582 | tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz); | ||
| 1583 | WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp); | ||
| 1584 | } | ||
| 1585 | |||
| 1586 | static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder, | ||
| 1587 | struct drm_display_mode *mode) | ||
| 1331 | { | 1588 | { |
| 1332 | DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n"); | 1589 | struct drm_device *dev = encoder->dev; |
| 1590 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1591 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1592 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1593 | struct hdmi_avi_infoframe frame; | ||
| 1594 | u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; | ||
| 1595 | uint8_t *payload = buffer + 3; | ||
| 1596 | uint8_t *header = buffer; | ||
| 1597 | ssize_t err; | ||
| 1598 | u32 tmp; | ||
| 1599 | |||
| 1600 | err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); | ||
| 1601 | if (err < 0) { | ||
| 1602 | DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); | ||
| 1603 | return; | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); | ||
| 1607 | if (err < 0) { | ||
| 1608 | DRM_ERROR("failed to pack AVI infoframe: %zd\n", err); | ||
| 1609 | return; | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset, | ||
| 1613 | payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24)); | ||
| 1614 | WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset, | ||
| 1615 | payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24)); | ||
| 1616 | WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset, | ||
| 1617 | payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24)); | ||
| 1618 | WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset, | ||
| 1619 | payload[0xC] | (payload[0xD] << 8) | (header[1] << 24)); | ||
| 1620 | |||
| 1621 | tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); | ||
| 1622 | /* anything other than 0 */ | ||
| 1623 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, | ||
| 1624 | HDMI_AUDIO_INFO_LINE, 2); | ||
| 1625 | WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); | ||
| 1333 | } | 1626 | } |
| 1334 | 1627 | ||
| 1335 | static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) | 1628 | static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) |
| 1336 | { | 1629 | { |
| 1337 | DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n"); | 1630 | struct drm_device *dev = encoder->dev; |
| 1631 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1632 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); | ||
| 1633 | int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); | ||
| 1634 | u32 tmp; | ||
| 1635 | |||
| 1636 | /* | ||
| 1637 | * Two dtos: generally use dto0 for hdmi, dto1 for dp. | ||
| 1638 | * Express [24MHz / target pixel clock] as an exact rational | ||
| 1639 | * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE | ||
| 1640 | * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator | ||
| 1641 | */ | ||
| 1642 | tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE); | ||
| 1643 | tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, | ||
| 1644 | DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id); | ||
| 1645 | if (em == ATOM_ENCODER_MODE_HDMI) { | ||
| 1646 | tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, | ||
| 1647 | DCCG_AUDIO_DTO_SEL, 0); | ||
| 1648 | } else if (ENCODER_MODE_IS_DP(em)) { | ||
| 1649 | tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, | ||
| 1650 | DCCG_AUDIO_DTO_SEL, 1); | ||
| 1651 | } | ||
| 1652 | WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp); | ||
| 1653 | if (em == ATOM_ENCODER_MODE_HDMI) { | ||
| 1654 | WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000); | ||
| 1655 | WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock); | ||
| 1656 | } else if (ENCODER_MODE_IS_DP(em)) { | ||
| 1657 | WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000); | ||
| 1658 | WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock); | ||
| 1659 | } | ||
| 1338 | } | 1660 | } |
| 1339 | */ | 1661 | |
| 1340 | /* | 1662 | static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder) |
| 1341 | * update the info frames with the data from the current display mode | 1663 | { |
| 1342 | */ | 1664 | struct drm_device *dev = encoder->dev; |
| 1665 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1666 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1667 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1668 | u32 tmp; | ||
| 1669 | |||
| 1670 | tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset); | ||
| 1671 | tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); | ||
| 1672 | WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); | ||
| 1673 | |||
| 1674 | tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset); | ||
| 1675 | tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1); | ||
| 1676 | WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp); | ||
| 1677 | |||
| 1678 | tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset); | ||
| 1679 | tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); | ||
| 1680 | WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp); | ||
| 1681 | |||
| 1682 | tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset); | ||
| 1683 | tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3); | ||
| 1684 | tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4); | ||
| 1685 | tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5); | ||
| 1686 | tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6); | ||
| 1687 | tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7); | ||
| 1688 | tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8); | ||
| 1689 | WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp); | ||
| 1690 | |||
| 1691 | tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset); | ||
| 1692 | tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff); | ||
| 1693 | WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp); | ||
| 1694 | |||
| 1695 | tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset); | ||
| 1696 | tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); | ||
| 1697 | tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3); | ||
| 1698 | WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1699 | |||
| 1700 | tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); | ||
| 1701 | tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1); | ||
| 1702 | tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); | ||
| 1703 | WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute) | ||
| 1707 | { | ||
| 1708 | struct drm_device *dev = encoder->dev; | ||
| 1709 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1710 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1711 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1712 | u32 tmp; | ||
| 1713 | |||
| 1714 | tmp = RREG32(mmHDMI_GC + dig->afmt->offset); | ||
| 1715 | tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0); | ||
| 1716 | WREG32(mmHDMI_GC + dig->afmt->offset, tmp); | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable) | ||
| 1720 | { | ||
| 1721 | struct drm_device *dev = encoder->dev; | ||
| 1722 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1723 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1724 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1725 | u32 tmp; | ||
| 1726 | |||
| 1727 | if (enable) { | ||
| 1728 | tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); | ||
| 1729 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1); | ||
| 1730 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1); | ||
| 1731 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); | ||
| 1732 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1); | ||
| 1733 | WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); | ||
| 1734 | |||
| 1735 | tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); | ||
| 1736 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2); | ||
| 1737 | WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); | ||
| 1738 | |||
| 1739 | tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); | ||
| 1740 | tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); | ||
| 1741 | WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1742 | } else { | ||
| 1743 | tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); | ||
| 1744 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0); | ||
| 1745 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0); | ||
| 1746 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0); | ||
| 1747 | tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0); | ||
| 1748 | WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); | ||
| 1749 | |||
| 1750 | tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); | ||
| 1751 | tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0); | ||
| 1752 | WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1753 | } | ||
| 1754 | } | ||
| 1755 | |||
| 1756 | static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable) | ||
| 1757 | { | ||
| 1758 | struct drm_device *dev = encoder->dev; | ||
| 1759 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1760 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1761 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1762 | u32 tmp; | ||
| 1763 | |||
| 1764 | if (enable) { | ||
| 1765 | tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); | ||
| 1766 | tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); | ||
| 1767 | WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); | ||
| 1768 | |||
| 1769 | tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset); | ||
| 1770 | tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1); | ||
| 1771 | WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp); | ||
| 1772 | |||
| 1773 | tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset); | ||
| 1774 | tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1); | ||
| 1775 | tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1); | ||
| 1776 | tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1); | ||
| 1777 | tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); | ||
| 1778 | WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp); | ||
| 1779 | } else { | ||
| 1780 | WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0); | ||
| 1781 | } | ||
| 1782 | } | ||
| 1783 | |||
| 1343 | static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, | 1784 | static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, |
| 1344 | struct drm_display_mode *mode) | 1785 | struct drm_display_mode *mode) |
| 1345 | { | 1786 | { |
| 1346 | DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n"); | 1787 | struct drm_device *dev = encoder->dev; |
| 1788 | struct amdgpu_device *adev = dev->dev_private; | ||
| 1789 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | ||
| 1790 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | ||
| 1791 | struct drm_connector *connector; | ||
| 1792 | struct amdgpu_connector *amdgpu_connector = NULL; | ||
| 1793 | int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); | ||
| 1794 | int bpc = 8; | ||
| 1795 | |||
| 1796 | if (!dig || !dig->afmt) | ||
| 1797 | return; | ||
| 1798 | |||
| 1799 | list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { | ||
| 1800 | if (connector->encoder == encoder) { | ||
| 1801 | amdgpu_connector = to_amdgpu_connector(connector); | ||
| 1802 | break; | ||
| 1803 | } | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | if (!amdgpu_connector) { | ||
| 1807 | DRM_ERROR("Couldn't find encoder's connector\n"); | ||
| 1808 | return; | ||
| 1809 | } | ||
| 1810 | |||
| 1811 | if (!dig->afmt->enabled) | ||
| 1812 | return; | ||
| 1813 | |||
| 1814 | dig->afmt->pin = dce_v6_0_audio_get_pin(adev); | ||
| 1815 | if (!dig->afmt->pin) | ||
| 1816 | return; | ||
| 1817 | |||
| 1818 | if (encoder->crtc) { | ||
| 1819 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); | ||
| 1820 | bpc = amdgpu_crtc->bpc; | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | /* disable audio before setting up hw */ | ||
| 1824 | dce_v6_0_audio_enable(adev, dig->afmt->pin, false); | ||
| 1825 | |||
| 1826 | dce_v6_0_audio_set_mute(encoder, true); | ||
| 1827 | dce_v6_0_audio_write_speaker_allocation(encoder); | ||
| 1828 | dce_v6_0_audio_write_sad_regs(encoder); | ||
| 1829 | dce_v6_0_audio_write_latency_fields(encoder, mode); | ||
| 1830 | if (em == ATOM_ENCODER_MODE_HDMI) { | ||
| 1831 | dce_v6_0_audio_set_dto(encoder, mode->clock); | ||
| 1832 | dce_v6_0_audio_set_vbi_packet(encoder); | ||
| 1833 | dce_v6_0_audio_set_acr(encoder, mode->clock, bpc); | ||
| 1834 | } else if (ENCODER_MODE_IS_DP(em)) { | ||
| 1835 | dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10); | ||
| 1836 | } | ||
| 1837 | dce_v6_0_audio_set_packet(encoder); | ||
| 1838 | dce_v6_0_audio_select_pin(encoder); | ||
| 1839 | dce_v6_0_audio_set_avi_infoframe(encoder, mode); | ||
| 1840 | dce_v6_0_audio_set_mute(encoder, false); | ||
| 1841 | if (em == ATOM_ENCODER_MODE_HDMI) { | ||
| 1842 | dce_v6_0_audio_hdmi_enable(encoder, 1); | ||
| 1843 | } else if (ENCODER_MODE_IS_DP(em)) { | ||
| 1844 | dce_v6_0_audio_dp_enable(encoder, 1); | ||
| 1845 | } | ||
| 1846 | |||
| 1847 | /* enable audio after setting up hw */ | ||
| 1848 | dce_v6_0_audio_enable(adev, dig->afmt->pin, true); | ||
| 1347 | } | 1849 | } |
| 1348 | 1850 | ||
| 1349 | static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) | 1851 | static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) |
| @@ -1359,6 +1861,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) | |||
| 1359 | /* Silent, r600_hdmi_enable will raise WARN for us */ | 1861 | /* Silent, r600_hdmi_enable will raise WARN for us */ |
| 1360 | if (enable && dig->afmt->enabled) | 1862 | if (enable && dig->afmt->enabled) |
| 1361 | return; | 1863 | return; |
| 1864 | |||
| 1362 | if (!enable && !dig->afmt->enabled) | 1865 | if (!enable && !dig->afmt->enabled) |
| 1363 | return; | 1866 | return; |
| 1364 | 1867 | ||
| @@ -2753,6 +3256,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, | |||
| 2753 | { | 3256 | { |
| 2754 | 3257 | ||
| 2755 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | 3258 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); |
| 3259 | int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); | ||
| 2756 | 3260 | ||
| 2757 | amdgpu_encoder->pixel_clock = adjusted_mode->clock; | 3261 | amdgpu_encoder->pixel_clock = adjusted_mode->clock; |
| 2758 | 3262 | ||
| @@ -2762,7 +3266,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, | |||
| 2762 | /* set scaler clears this on some chips */ | 3266 | /* set scaler clears this on some chips */ |
| 2763 | dce_v6_0_set_interleave(encoder->crtc, mode); | 3267 | dce_v6_0_set_interleave(encoder->crtc, mode); |
| 2764 | 3268 | ||
| 2765 | if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { | 3269 | if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) { |
| 2766 | dce_v6_0_afmt_enable(encoder, true); | 3270 | dce_v6_0_afmt_enable(encoder, true); |
| 2767 | dce_v6_0_afmt_setmode(encoder, adjusted_mode); | 3271 | dce_v6_0_afmt_setmode(encoder, adjusted_mode); |
| 2768 | } | 3272 | } |
| @@ -2824,11 +3328,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) | |||
| 2824 | 3328 | ||
| 2825 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); | 3329 | struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); |
| 2826 | struct amdgpu_encoder_atom_dig *dig; | 3330 | struct amdgpu_encoder_atom_dig *dig; |
| 3331 | int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); | ||
| 2827 | 3332 | ||
| 2828 | amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); | 3333 | amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); |
| 2829 | 3334 | ||
| 2830 | if (amdgpu_atombios_encoder_is_digital(encoder)) { | 3335 | if (amdgpu_atombios_encoder_is_digital(encoder)) { |
| 2831 | if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) | 3336 | if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) |
| 2832 | dce_v6_0_afmt_enable(encoder, false); | 3337 | dce_v6_0_afmt_enable(encoder, false); |
| 2833 | dig = amdgpu_encoder->enc_priv; | 3338 | dig = amdgpu_encoder->enc_priv; |
| 2834 | dig->dig_encoder = -1; | 3339 | dig->dig_encoder = -1; |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 3c558c170e5e..a9e869554627 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_pm.h" | 25 | #include "amdgpu_pm.h" |
| 26 | #include "amdgpu_i2c.h" | 26 | #include "amdgpu_i2c.h" |
| @@ -1091,8 +1091,11 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, | |||
| 1091 | u32 tmp, wm_mask, lb_vblank_lead_lines = 0; | 1091 | u32 tmp, wm_mask, lb_vblank_lead_lines = 0; |
| 1092 | 1092 | ||
| 1093 | if (amdgpu_crtc->base.enabled && num_heads && mode) { | 1093 | if (amdgpu_crtc->base.enabled && num_heads && mode) { |
| 1094 | active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; | 1094 | active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000, |
| 1095 | line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); | 1095 | (u32)mode->clock); |
| 1096 | line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, | ||
| 1097 | (u32)mode->clock); | ||
| 1098 | line_time = min(line_time, (u32)65535); | ||
| 1096 | 1099 | ||
| 1097 | /* watermark for high clocks */ | 1100 | /* watermark for high clocks */ |
| 1098 | if (adev->pm.dpm_enabled) { | 1101 | if (adev->pm.dpm_enabled) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index f1b479b6ac98..90bb08309a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_pm.h" | 25 | #include "amdgpu_pm.h" |
| 26 | #include "amdgpu_i2c.h" | 26 | #include "amdgpu_i2c.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index a125f9d44577..5173ca1fd159 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | |||
| @@ -393,8 +393,11 @@ out: | |||
| 393 | 393 | ||
| 394 | static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) | 394 | static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) |
| 395 | { | 395 | { |
| 396 | const u32 num_tile_mode_states = 32; | 396 | const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); |
| 397 | u32 reg_offset, gb_tile_moden, split_equal_to_row_size; | 397 | u32 reg_offset, split_equal_to_row_size, *tilemode; |
| 398 | |||
| 399 | memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array)); | ||
| 400 | tilemode = adev->gfx.config.tile_mode_array; | ||
| 398 | 401 | ||
| 399 | switch (adev->gfx.config.mem_row_size_in_kb) { | 402 | switch (adev->gfx.config.mem_row_size_in_kb) { |
| 400 | case 1: | 403 | case 1: |
| @@ -410,887 +413,680 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) | |||
| 410 | } | 413 | } |
| 411 | 414 | ||
| 412 | if (adev->asic_type == CHIP_VERDE) { | 415 | if (adev->asic_type == CHIP_VERDE) { |
| 413 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 416 | tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 414 | switch (reg_offset) { | 417 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 415 | case 0: | 418 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 416 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 419 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
| 417 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 420 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 418 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 421 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 419 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 422 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 420 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 423 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 421 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 424 | tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 422 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 425 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 423 | NUM_BANKS(ADDR_SURF_16_BANK)); | 426 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 424 | break; | 427 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | |
| 425 | case 1: | 428 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 426 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 429 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 427 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 430 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 428 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 431 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 429 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | 432 | tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 430 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 433 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 431 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 434 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 432 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 435 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 433 | NUM_BANKS(ADDR_SURF_16_BANK)); | 436 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 434 | break; | 437 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 435 | case 2: | 438 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 436 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 439 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 437 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 440 | tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 438 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 441 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 439 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 442 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 440 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 443 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 441 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 444 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 442 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 445 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 443 | NUM_BANKS(ADDR_SURF_16_BANK)); | 446 | NUM_BANKS(ADDR_SURF_8_BANK) | |
| 444 | break; | 447 | TILE_SPLIT(split_equal_to_row_size); |
| 445 | case 3: | 448 | tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 446 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 449 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 447 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 450 | PIPE_CONFIG(ADDR_SURF_P4_8x16); |
| 448 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 451 | tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 449 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 452 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 450 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 453 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 451 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 454 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 452 | NUM_BANKS(ADDR_SURF_8_BANK) | | 455 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 453 | TILE_SPLIT(split_equal_to_row_size)); | 456 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 454 | break; | 457 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 455 | case 4: | 458 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 456 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 459 | tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 457 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 460 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 458 | PIPE_CONFIG(ADDR_SURF_P4_8x16)); | 461 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 459 | break; | 462 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 460 | case 5: | 463 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 461 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 464 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 462 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 465 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 463 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 466 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 464 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 467 | tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 465 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 468 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 466 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 469 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 467 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 470 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 468 | NUM_BANKS(ADDR_SURF_4_BANK)); | 471 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 469 | break; | 472 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 470 | case 6: | 473 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 471 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 474 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 472 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 475 | tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); |
| 473 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 476 | tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 474 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 477 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 475 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 478 | PIPE_CONFIG(ADDR_SURF_P4_8x16); |
| 476 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 479 | tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 477 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 480 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 478 | NUM_BANKS(ADDR_SURF_4_BANK)); | 481 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 479 | break; | 482 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 480 | case 7: | 483 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 481 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 484 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 482 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 485 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 483 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 486 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 484 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 487 | tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 485 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 488 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 486 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 489 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 487 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 490 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 488 | NUM_BANKS(ADDR_SURF_2_BANK)); | 491 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 489 | break; | 492 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 490 | case 8: | 493 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 491 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); | 494 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 492 | break; | 495 | tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 493 | case 9: | 496 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 494 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 497 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 495 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 498 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 496 | PIPE_CONFIG(ADDR_SURF_P4_8x16)); | 499 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 497 | break; | 500 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 498 | case 10: | 501 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 499 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 502 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 500 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 503 | tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 501 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 504 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 502 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 505 | PIPE_CONFIG(ADDR_SURF_P4_8x16); |
| 503 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 506 | tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 504 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 507 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 505 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 508 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 506 | NUM_BANKS(ADDR_SURF_16_BANK)); | 509 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 507 | break; | 510 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 508 | case 11: | 511 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 509 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 512 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 510 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 513 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 511 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 514 | tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 512 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 515 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 513 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 516 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 514 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 517 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 515 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 518 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 516 | NUM_BANKS(ADDR_SURF_16_BANK)); | 519 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 517 | break; | 520 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 518 | case 12: | 521 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 519 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 522 | tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 520 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 523 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 521 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 524 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 522 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 525 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 523 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 526 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 524 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 527 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 525 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 528 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 526 | NUM_BANKS(ADDR_SURF_16_BANK)); | 529 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 527 | break; | 530 | tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 528 | case 13: | 531 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 529 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 532 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 530 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 533 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 531 | PIPE_CONFIG(ADDR_SURF_P4_8x16)); | 534 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 532 | break; | 535 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 533 | case 14: | 536 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 534 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 537 | TILE_SPLIT(split_equal_to_row_size); |
| 535 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 538 | tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 536 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 539 | ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
| 537 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 540 | PIPE_CONFIG(ADDR_SURF_P4_8x16); |
| 538 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 541 | tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 539 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 542 | ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | |
| 540 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 543 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 541 | NUM_BANKS(ADDR_SURF_16_BANK)); | 544 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 542 | break; | 545 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 543 | case 15: | 546 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 544 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 547 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 545 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 548 | TILE_SPLIT(split_equal_to_row_size); |
| 546 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 549 | tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 547 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 550 | ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
| 548 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 551 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 549 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 552 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 550 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 553 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 551 | NUM_BANKS(ADDR_SURF_16_BANK)); | 554 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 552 | break; | 555 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 553 | case 16: | 556 | TILE_SPLIT(split_equal_to_row_size); |
| 554 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 557 | tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 555 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 558 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 556 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 559 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 557 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 560 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 558 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 561 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 559 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 562 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 560 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 563 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 561 | NUM_BANKS(ADDR_SURF_16_BANK)); | 564 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 562 | break; | 565 | tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 563 | case 17: | 566 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 564 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 567 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 565 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 568 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 566 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 569 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 567 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 570 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 568 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 571 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 569 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 572 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 570 | NUM_BANKS(ADDR_SURF_16_BANK) | | 573 | tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 571 | TILE_SPLIT(split_equal_to_row_size)); | 574 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 572 | break; | 575 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 573 | case 18: | 576 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 574 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 577 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 575 | ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 578 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 576 | PIPE_CONFIG(ADDR_SURF_P4_8x16)); | 579 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 577 | break; | 580 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 578 | case 19: | 581 | tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 579 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 582 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 580 | ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | 583 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 581 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 584 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 582 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 585 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 583 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 586 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 584 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 587 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 585 | NUM_BANKS(ADDR_SURF_16_BANK) | | 588 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 586 | TILE_SPLIT(split_equal_to_row_size)); | 589 | tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 587 | break; | 590 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 588 | case 20: | 591 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 589 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 592 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 590 | ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 593 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 591 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 594 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 592 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 595 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 593 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 596 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 594 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 597 | tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 595 | NUM_BANKS(ADDR_SURF_16_BANK) | | 598 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 596 | TILE_SPLIT(split_equal_to_row_size)); | 599 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 597 | break; | 600 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 598 | case 21: | 601 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 599 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 602 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 600 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 603 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 601 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 604 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 602 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 605 | tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 603 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 606 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 604 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 607 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 605 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 608 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 606 | NUM_BANKS(ADDR_SURF_8_BANK)); | 609 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 607 | break; | 610 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 608 | case 22: | 611 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 609 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 612 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 610 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 613 | tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 611 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 614 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 612 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 615 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 613 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 616 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 614 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 617 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 615 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 618 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 616 | NUM_BANKS(ADDR_SURF_8_BANK)); | 619 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 617 | break; | 620 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 618 | case 23: | 621 | tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 619 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 622 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 620 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 623 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 621 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 624 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 622 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 625 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 623 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 626 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 624 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 627 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 625 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 628 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 626 | NUM_BANKS(ADDR_SURF_4_BANK)); | 629 | tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 627 | break; | 630 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 628 | case 24: | 631 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 629 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 632 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | |
| 630 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 633 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 631 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 634 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 632 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 635 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 633 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 636 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 634 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 637 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
| 635 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 638 | WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); |
| 636 | NUM_BANKS(ADDR_SURF_4_BANK)); | 639 | } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { |
| 637 | break; | 640 | tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 638 | case 25: | 641 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 639 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 642 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 640 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 643 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
| 641 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 644 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 642 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 645 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 643 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 646 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 644 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 647 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 645 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 648 | tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 646 | NUM_BANKS(ADDR_SURF_2_BANK)); | 649 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 647 | break; | 650 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 648 | case 26: | 651 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | |
| 649 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 652 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 650 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 653 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 651 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 654 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 652 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 655 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 653 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 656 | tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 654 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 657 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 655 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 658 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 656 | NUM_BANKS(ADDR_SURF_2_BANK)); | 659 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 657 | break; | 660 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 658 | case 27: | 661 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 659 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 662 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 660 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 663 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 661 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 664 | tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 662 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 665 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 663 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 666 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 664 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 667 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 665 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 668 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 666 | NUM_BANKS(ADDR_SURF_2_BANK)); | 669 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 667 | break; | 670 | NUM_BANKS(ADDR_SURF_8_BANK) | |
| 668 | case 28: | 671 | TILE_SPLIT(split_equal_to_row_size); |
| 669 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 672 | tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 670 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 673 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 671 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 674 | PIPE_CONFIG(ADDR_SURF_P2); |
| 672 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 675 | tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 673 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 676 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 674 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 677 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 675 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 678 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 676 | NUM_BANKS(ADDR_SURF_2_BANK)); | 679 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 677 | break; | 680 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 678 | case 29: | 681 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 679 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 682 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 680 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 683 | tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 681 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 684 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 682 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 685 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 683 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 686 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 684 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 687 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 685 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 688 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 686 | NUM_BANKS(ADDR_SURF_2_BANK)); | 689 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 687 | break; | 690 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 688 | case 30: | 691 | tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 689 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 692 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 690 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 693 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 691 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 694 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 692 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | 695 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 693 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 696 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 694 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 697 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 695 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 698 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 696 | NUM_BANKS(ADDR_SURF_2_BANK)); | 699 | tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); |
| 697 | break; | 700 | tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 698 | default: | 701 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 699 | continue; | 702 | PIPE_CONFIG(ADDR_SURF_P2); |
| 700 | } | 703 | tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 701 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | 704 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 702 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | 705 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 703 | } | 706 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 704 | } else if (adev->asic_type == CHIP_OLAND || | 707 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 705 | adev->asic_type == CHIP_HAINAN) { | 708 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 706 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 709 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
| 707 | switch (reg_offset) { | 710 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 708 | case 0: | 711 | tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 709 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 712 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 710 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 713 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 711 | PIPE_CONFIG(ADDR_SURF_P2) | | 714 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 712 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 715 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 713 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 716 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 714 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 717 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 715 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 718 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 716 | NUM_BANKS(ADDR_SURF_16_BANK)); | 719 | tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 717 | break; | 720 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 718 | case 1: | 721 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 719 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 722 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 720 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 723 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 721 | PIPE_CONFIG(ADDR_SURF_P2) | | 724 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 722 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | 725 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 723 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 726 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 724 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 727 | tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 725 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 728 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 726 | NUM_BANKS(ADDR_SURF_16_BANK)); | 729 | PIPE_CONFIG(ADDR_SURF_P2); |
| 727 | break; | 730 | tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 728 | case 2: | 731 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 729 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 732 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 730 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 733 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 731 | PIPE_CONFIG(ADDR_SURF_P2) | | 734 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 732 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 735 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 733 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 736 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 734 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 737 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 735 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 738 | tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 736 | NUM_BANKS(ADDR_SURF_16_BANK)); | 739 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 737 | break; | 740 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 738 | case 3: | 741 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 739 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 742 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 740 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 743 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 741 | PIPE_CONFIG(ADDR_SURF_P2) | | 744 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 742 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 745 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 743 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 746 | tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 744 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 747 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 745 | NUM_BANKS(ADDR_SURF_8_BANK) | | 748 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 746 | TILE_SPLIT(split_equal_to_row_size)); | 749 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 747 | break; | 750 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 748 | case 4: | 751 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 749 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 752 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 750 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 753 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 751 | PIPE_CONFIG(ADDR_SURF_P2)); | 754 | tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 752 | break; | 755 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 753 | case 5: | 756 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 754 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 757 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 755 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 758 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 756 | PIPE_CONFIG(ADDR_SURF_P2) | | 759 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 757 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 760 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 758 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 761 | TILE_SPLIT(split_equal_to_row_size); |
| 759 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 762 | tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 760 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 763 | ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
| 761 | NUM_BANKS(ADDR_SURF_8_BANK)); | 764 | PIPE_CONFIG(ADDR_SURF_P2); |
| 762 | break; | 765 | tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 763 | case 6: | 766 | ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | |
| 764 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 767 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 765 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 768 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 766 | PIPE_CONFIG(ADDR_SURF_P2) | | 769 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 767 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 770 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 768 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 771 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 769 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 772 | TILE_SPLIT(split_equal_to_row_size); |
| 770 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 773 | tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 771 | NUM_BANKS(ADDR_SURF_8_BANK)); | 774 | ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
| 772 | break; | 775 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 773 | case 7: | 776 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 774 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 777 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 775 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 778 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 776 | PIPE_CONFIG(ADDR_SURF_P2) | | 779 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 777 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 780 | TILE_SPLIT(split_equal_to_row_size); |
| 778 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 781 | tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 779 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 782 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 780 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 783 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 781 | NUM_BANKS(ADDR_SURF_4_BANK)); | 784 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 782 | break; | 785 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | |
| 783 | case 8: | 786 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 784 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); | 787 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 785 | break; | 788 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 786 | case 9: | 789 | tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 787 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 790 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 788 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 791 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 789 | PIPE_CONFIG(ADDR_SURF_P2)); | 792 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 790 | break; | 793 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | |
| 791 | case 10: | 794 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 792 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 795 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 793 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 796 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 794 | PIPE_CONFIG(ADDR_SURF_P2) | | 797 | tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 795 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 798 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 796 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 799 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 797 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 800 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 798 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | 801 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 799 | NUM_BANKS(ADDR_SURF_16_BANK)); | 802 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 800 | break; | 803 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 801 | case 11: | 804 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 802 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 805 | tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 803 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 806 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 804 | PIPE_CONFIG(ADDR_SURF_P2) | | 807 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 805 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 808 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 806 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 809 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 807 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 810 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 808 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 811 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 809 | NUM_BANKS(ADDR_SURF_16_BANK)); | 812 | NUM_BANKS(ADDR_SURF_8_BANK); |
| 810 | break; | 813 | tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 811 | case 12: | 814 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 812 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 815 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 813 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 816 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 814 | PIPE_CONFIG(ADDR_SURF_P2) | | 817 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 815 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 818 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 816 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 819 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 817 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 820 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 818 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 821 | tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 819 | NUM_BANKS(ADDR_SURF_16_BANK)); | 822 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 820 | break; | 823 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 821 | case 13: | 824 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 822 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 825 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 823 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 826 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 824 | PIPE_CONFIG(ADDR_SURF_P2)); | 827 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 825 | break; | 828 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 826 | case 14: | 829 | tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 827 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 830 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 828 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 831 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 829 | PIPE_CONFIG(ADDR_SURF_P2) | | 832 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 830 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 833 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 831 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 834 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 832 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 835 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 833 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 836 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 834 | NUM_BANKS(ADDR_SURF_16_BANK)); | 837 | tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 835 | break; | 838 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 836 | case 15: | 839 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 837 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 840 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 838 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 841 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 839 | PIPE_CONFIG(ADDR_SURF_P2) | | 842 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 840 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 843 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 841 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 844 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 842 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 845 | tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 843 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 846 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 844 | NUM_BANKS(ADDR_SURF_16_BANK)); | 847 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 845 | break; | 848 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 846 | case 16: | 849 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 847 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 850 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 848 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 851 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 849 | PIPE_CONFIG(ADDR_SURF_P2) | | 852 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 850 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 853 | tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 851 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 854 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 852 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 855 | PIPE_CONFIG(ADDR_SURF_P2) | |
| 853 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 856 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | |
| 854 | NUM_BANKS(ADDR_SURF_16_BANK)); | 857 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 855 | break; | 858 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 856 | case 17: | 859 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 857 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 860 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 858 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 861 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
| 859 | PIPE_CONFIG(ADDR_SURF_P2) | | 862 | WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); |
| 860 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 861 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
| 862 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
| 863 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
| 864 | TILE_SPLIT(split_equal_to_row_size)); | ||
| 865 | break; | ||
| 866 | case 18: | ||
| 867 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 868 | ARRAY_MODE(ARRAY_1D_TILED_THICK) | | ||
| 869 | PIPE_CONFIG(ADDR_SURF_P2)); | ||
| 870 | break; | ||
| 871 | case 19: | ||
| 872 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 873 | ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | ||
| 874 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 875 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 876 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
| 877 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
| 878 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
| 879 | TILE_SPLIT(split_equal_to_row_size)); | ||
| 880 | break; | ||
| 881 | case 20: | ||
| 882 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 883 | ARRAY_MODE(ARRAY_2D_TILED_THICK) | | ||
| 884 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 885 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 886 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
| 887 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
| 888 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
| 889 | TILE_SPLIT(split_equal_to_row_size)); | ||
| 890 | break; | ||
| 891 | case 21: | ||
| 892 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 893 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 894 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 895 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
| 896 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
| 897 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
| 898 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 899 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
| 900 | break; | ||
| 901 | case 22: | ||
| 902 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 903 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 904 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 905 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
| 906 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
| 907 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 908 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 909 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
| 910 | break; | ||
| 911 | case 23: | ||
| 912 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 913 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 914 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 915 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
| 916 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 917 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 918 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 919 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
| 920 | break; | ||
| 921 | case 24: | ||
| 922 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 923 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 924 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 925 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | ||
| 926 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 927 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
| 928 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 929 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
| 930 | break; | ||
| 931 | case 25: | ||
| 932 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 933 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 934 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 935 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 936 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 937 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 938 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 939 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
| 940 | break; | ||
| 941 | case 26: | ||
| 942 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 943 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 944 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 945 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 946 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 947 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 948 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 949 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
| 950 | break; | ||
| 951 | case 27: | ||
| 952 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 953 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 954 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 955 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 956 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 957 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 958 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 959 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
| 960 | break; | ||
| 961 | case 28: | ||
| 962 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 963 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 964 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 965 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 966 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 967 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 968 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 969 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
| 970 | break; | ||
| 971 | case 29: | ||
| 972 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 973 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 974 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 975 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 976 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 977 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 978 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 979 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
| 980 | break; | ||
| 981 | case 30: | ||
| 982 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 983 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 984 | PIPE_CONFIG(ADDR_SURF_P2) | | ||
| 985 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | ||
| 986 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 987 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
| 988 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 989 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
| 990 | break; | ||
| 991 | default: | ||
| 992 | continue; | ||
| 993 | } | ||
| 994 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | ||
| 995 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | ||
| 996 | } | ||
| 997 | } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { | 863 | } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { |
| 998 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 864 | tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 999 | switch (reg_offset) { | 865 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1000 | case 0: | 866 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1001 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 867 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
| 1002 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 868 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1003 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 869 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1004 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 870 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 1005 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 871 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1006 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 872 | tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1007 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 873 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1008 | NUM_BANKS(ADDR_SURF_16_BANK)); | 874 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1009 | break; | 875 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | |
| 1010 | case 1: | 876 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1011 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 877 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1012 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 878 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 1013 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 879 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1014 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | 880 | tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1015 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 881 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1016 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 882 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1017 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 883 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1018 | NUM_BANKS(ADDR_SURF_16_BANK)); | 884 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1019 | break; | 885 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1020 | case 2: | 886 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 1021 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 887 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1022 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 888 | tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1023 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 889 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1024 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 890 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1025 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 891 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1026 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 892 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1027 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 893 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1028 | NUM_BANKS(ADDR_SURF_16_BANK)); | 894 | NUM_BANKS(ADDR_SURF_4_BANK) | |
| 1029 | break; | 895 | TILE_SPLIT(split_equal_to_row_size); |
| 1030 | case 3: | 896 | tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1031 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 897 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 1032 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 898 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); |
| 1033 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 899 | tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1034 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 900 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1035 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 901 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1036 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 902 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 1037 | NUM_BANKS(ADDR_SURF_4_BANK) | | 903 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1038 | TILE_SPLIT(split_equal_to_row_size)); | 904 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1039 | break; | 905 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1040 | case 4: | 906 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1041 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 907 | tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1042 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 908 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1043 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); | 909 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1044 | break; | 910 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1045 | case 5: | 911 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1046 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 912 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | |
| 1047 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 913 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1048 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 914 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1049 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 915 | tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
| 1050 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 916 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1051 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 917 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1052 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 918 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 1053 | NUM_BANKS(ADDR_SURF_2_BANK)); | 919 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1054 | break; | 920 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1055 | case 6: | 921 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1056 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 922 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1057 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 923 | tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); |
| 1058 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 924 | tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 1059 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 925 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 1060 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 926 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); |
| 1061 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | | 927 | tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 1062 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 928 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1063 | NUM_BANKS(ADDR_SURF_2_BANK)); | 929 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1064 | break; | 930 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1065 | case 7: | 931 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1066 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 932 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1067 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 933 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 1068 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | 934 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1069 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | 935 | tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 1070 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 936 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1071 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 937 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1072 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 938 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1073 | NUM_BANKS(ADDR_SURF_2_BANK)); | 939 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1074 | break; | 940 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 1075 | case 8: | 941 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
| 1076 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); | 942 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1077 | break; | 943 | tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | |
| 1078 | case 9: | 944 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1079 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 945 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1080 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 946 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 1081 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); | 947 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1082 | break; | 948 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 1083 | case 10: | 949 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1084 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 950 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1085 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 951 | tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1086 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 952 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
| 1087 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 953 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); |
| 1088 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 954 | tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1089 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 955 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1090 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 956 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1091 | NUM_BANKS(ADDR_SURF_16_BANK)); | 957 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1092 | break; | 958 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1093 | case 11: | 959 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1094 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 960 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1095 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 961 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1096 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 962 | tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1097 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 963 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1098 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 964 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1099 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 965 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1100 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | 966 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1101 | NUM_BANKS(ADDR_SURF_16_BANK)); | 967 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 1102 | break; | 968 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1103 | case 12: | 969 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1104 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | 970 | tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1105 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 971 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1106 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 972 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1107 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 973 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 1108 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 974 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1109 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 975 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 1110 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 976 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1111 | NUM_BANKS(ADDR_SURF_16_BANK)); | 977 | NUM_BANKS(ADDR_SURF_16_BANK); |
| 1112 | break; | 978 | tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1113 | case 13: | 979 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1114 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 980 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1115 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 981 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1116 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); | 982 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 1117 | break; | 983 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1118 | case 14: | 984 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 1119 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 985 | TILE_SPLIT(split_equal_to_row_size); |
| 1120 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 986 | tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1121 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 987 | ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
| 1122 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 988 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); |
| 1123 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 989 | tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1124 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 990 | ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | |
| 1125 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 991 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1126 | NUM_BANKS(ADDR_SURF_16_BANK)); | 992 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1127 | break; | 993 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 1128 | case 15: | 994 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1129 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 995 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 1130 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 996 | TILE_SPLIT(split_equal_to_row_size); |
| 1131 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 997 | tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1132 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 998 | ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
| 1133 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 999 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1134 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | 1000 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1135 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1001 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
| 1136 | NUM_BANKS(ADDR_SURF_16_BANK)); | 1002 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1137 | break; | 1003 | NUM_BANKS(ADDR_SURF_16_BANK) | |
| 1138 | case 16: | 1004 | TILE_SPLIT(split_equal_to_row_size); |
| 1139 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1005 | tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1140 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1006 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1141 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1007 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1142 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 1008 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1143 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1009 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1144 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 1010 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | |
| 1145 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1011 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1146 | NUM_BANKS(ADDR_SURF_16_BANK)); | 1012 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 1147 | break; | 1013 | tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1148 | case 17: | 1014 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1149 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1015 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1150 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1016 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1151 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1017 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1152 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1018 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1153 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 1019 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1154 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1020 | NUM_BANKS(ADDR_SURF_4_BANK); |
| 1155 | NUM_BANKS(ADDR_SURF_16_BANK) | | 1021 | tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1156 | TILE_SPLIT(split_equal_to_row_size)); | 1022 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1157 | break; | 1023 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1158 | case 18: | 1024 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
| 1159 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1025 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1160 | ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1026 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | |
| 1161 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); | 1027 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1162 | break; | 1028 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1163 | case 19: | 1029 | tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1164 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1030 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1165 | ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | 1031 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | |
| 1166 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1032 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
| 1167 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1033 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1168 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 1034 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1169 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1035 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1170 | NUM_BANKS(ADDR_SURF_16_BANK) | | 1036 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1171 | TILE_SPLIT(split_equal_to_row_size)); | 1037 | tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1172 | break; | 1038 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1173 | case 20: | 1039 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1174 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1040 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 1175 | ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1041 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1176 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1042 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1177 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1043 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1178 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | 1044 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1179 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1045 | tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1180 | NUM_BANKS(ADDR_SURF_16_BANK) | | 1046 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1181 | TILE_SPLIT(split_equal_to_row_size)); | 1047 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1182 | break; | 1048 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 1183 | case 21: | 1049 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1184 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1050 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1185 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1051 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1186 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1052 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1187 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 1053 | tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1188 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1054 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1189 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | | 1055 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1190 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1056 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 1191 | NUM_BANKS(ADDR_SURF_4_BANK)); | 1057 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1192 | break; | 1058 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1193 | case 22: | 1059 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1194 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1060 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1195 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1061 | tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1196 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1062 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1197 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 1063 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1198 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1064 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 1199 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 1065 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1200 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1066 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1201 | NUM_BANKS(ADDR_SURF_4_BANK)); | 1067 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1202 | break; | 1068 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1203 | case 23: | 1069 | tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1204 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1070 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1205 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1071 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1206 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1072 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | |
| 1207 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 1073 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1208 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1074 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
| 1209 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | | 1075 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1210 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1076 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1211 | NUM_BANKS(ADDR_SURF_2_BANK)); | 1077 | tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | |
| 1212 | break; | 1078 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 1213 | case 24: | 1079 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | |
| 1214 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | 1080 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | |
| 1215 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1081 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
| 1216 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | 1082 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
| 1217 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 1083 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
| 1218 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | 1084 | NUM_BANKS(ADDR_SURF_2_BANK); |
| 1219 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | 1085 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
| 1220 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | 1086 | WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); |
| 1221 | NUM_BANKS(ADDR_SURF_2_BANK)); | 1087 | } else { |
| 1222 | break; | ||
| 1223 | case 25: | ||
| 1224 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 1225 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 1226 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
| 1227 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 1228 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 1229 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
| 1230 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 1231 | NUM_BANKS(ADDR_SURF_2_BANK)); | ||
| 1232 | break; | ||
| 1233 | case 26: | ||
| 1234 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 1235 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 1236 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
| 1237 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 1238 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 1239 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
| 1240 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 1241 | NUM_BANKS(ADDR_SURF_2_BANK)); | ||
| 1242 | break; | ||
| 1243 | case 27: | ||
| 1244 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 1245 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 1246 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
| 1247 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 1248 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 1249 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
| 1250 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 1251 | NUM_BANKS(ADDR_SURF_2_BANK)); | ||
| 1252 | break; | ||
| 1253 | case 28: | ||
| 1254 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 1255 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 1256 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
| 1257 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 1258 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 1259 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
| 1260 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 1261 | NUM_BANKS(ADDR_SURF_2_BANK)); | ||
| 1262 | break; | ||
| 1263 | case 29: | ||
| 1264 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 1265 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 1266 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
| 1267 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
| 1268 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 1269 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
| 1270 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 1271 | NUM_BANKS(ADDR_SURF_2_BANK)); | ||
| 1272 | break; | ||
| 1273 | case 30: | ||
| 1274 | gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
| 1275 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
| 1276 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
| 1277 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | ||
| 1278 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
| 1279 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
| 1280 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
| 1281 | NUM_BANKS(ADDR_SURF_2_BANK)); | ||
| 1282 | break; | ||
| 1283 | default: | ||
| 1284 | continue; | ||
| 1285 | } | ||
| 1286 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | ||
| 1287 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | ||
| 1288 | } | ||
| 1289 | } else{ | ||
| 1290 | |||
| 1291 | DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); | 1088 | DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); |
| 1292 | } | 1089 | } |
| 1293 | |||
| 1294 | } | 1090 | } |
| 1295 | 1091 | ||
| 1296 | static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, | 1092 | static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, |
| @@ -1318,11 +1114,6 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, | |||
| 1318 | WREG32(mmGRBM_GFX_INDEX, data); | 1114 | WREG32(mmGRBM_GFX_INDEX, data); |
| 1319 | } | 1115 | } |
| 1320 | 1116 | ||
| 1321 | static u32 gfx_v6_0_create_bitmask(u32 bit_width) | ||
| 1322 | { | ||
| 1323 | return (u32)(((u64)1 << bit_width) - 1); | ||
| 1324 | } | ||
| 1325 | |||
| 1326 | static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) | 1117 | static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) |
| 1327 | { | 1118 | { |
| 1328 | u32 data, mask; | 1119 | u32 data, mask; |
| @@ -1332,8 +1123,8 @@ static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) | |||
| 1332 | 1123 | ||
| 1333 | data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); | 1124 | data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); |
| 1334 | 1125 | ||
| 1335 | mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/ | 1126 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/ |
| 1336 | adev->gfx.config.max_sh_per_se); | 1127 | adev->gfx.config.max_sh_per_se); |
| 1337 | 1128 | ||
| 1338 | return ~data & mask; | 1129 | return ~data & mask; |
| 1339 | } | 1130 | } |
| @@ -1399,11 +1190,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, | |||
| 1399 | if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { | 1190 | if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { |
| 1400 | raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; | 1191 | raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; |
| 1401 | 1192 | ||
| 1402 | if (!se_mask[idx]) { | 1193 | if (!se_mask[idx]) |
| 1403 | raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; | 1194 | raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; |
| 1404 | } else { | 1195 | else |
| 1405 | raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; | 1196 | raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; |
| 1406 | } | ||
| 1407 | } | 1197 | } |
| 1408 | 1198 | ||
| 1409 | pkr0_mask &= rb_mask; | 1199 | pkr0_mask &= rb_mask; |
| @@ -1411,11 +1201,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, | |||
| 1411 | if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { | 1201 | if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { |
| 1412 | raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; | 1202 | raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; |
| 1413 | 1203 | ||
| 1414 | if (!pkr0_mask) { | 1204 | if (!pkr0_mask) |
| 1415 | raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; | 1205 | raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; |
| 1416 | } else { | 1206 | else |
| 1417 | raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; | 1207 | raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; |
| 1418 | } | ||
| 1419 | } | 1208 | } |
| 1420 | 1209 | ||
| 1421 | if (rb_per_se >= 2) { | 1210 | if (rb_per_se >= 2) { |
| @@ -1427,13 +1216,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, | |||
| 1427 | if (!rb0_mask || !rb1_mask) { | 1216 | if (!rb0_mask || !rb1_mask) { |
| 1428 | raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; | 1217 | raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; |
| 1429 | 1218 | ||
| 1430 | if (!rb0_mask) { | 1219 | if (!rb0_mask) |
| 1431 | raster_config_se |= | 1220 | raster_config_se |= |
| 1432 | RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; | 1221 | RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; |
| 1433 | } else { | 1222 | else |
| 1434 | raster_config_se |= | 1223 | raster_config_se |= |
| 1435 | RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; | 1224 | RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; |
| 1436 | } | ||
| 1437 | } | 1225 | } |
| 1438 | 1226 | ||
| 1439 | if (rb_per_se > 2) { | 1227 | if (rb_per_se > 2) { |
| @@ -1444,13 +1232,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, | |||
| 1444 | if (!rb0_mask || !rb1_mask) { | 1232 | if (!rb0_mask || !rb1_mask) { |
| 1445 | raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; | 1233 | raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; |
| 1446 | 1234 | ||
| 1447 | if (!rb0_mask) { | 1235 | if (!rb0_mask) |
| 1448 | raster_config_se |= | 1236 | raster_config_se |= |
| 1449 | RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; | 1237 | RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; |
| 1450 | } else { | 1238 | else |
| 1451 | raster_config_se |= | 1239 | raster_config_se |= |
| 1452 | RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; | 1240 | RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; |
| 1453 | } | ||
| 1454 | } | 1241 | } |
| 1455 | } | 1242 | } |
| 1456 | } | 1243 | } |
| @@ -1479,8 +1266,9 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) | |||
| 1479 | for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { | 1266 | for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { |
| 1480 | gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); | 1267 | gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); |
| 1481 | data = gfx_v6_0_get_rb_active_bitmap(adev); | 1268 | data = gfx_v6_0_get_rb_active_bitmap(adev); |
| 1482 | active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * | 1269 | active_rbs |= data << |
| 1483 | rb_bitmap_width_per_sh); | 1270 | ((i * adev->gfx.config.max_sh_per_se + j) * |
| 1271 | rb_bitmap_width_per_sh); | ||
| 1484 | } | 1272 | } |
| 1485 | } | 1273 | } |
| 1486 | gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); | 1274 | gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |
| @@ -1494,13 +1282,12 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) | |||
| 1494 | gfx_v6_0_raster_config(adev, &raster_config); | 1282 | gfx_v6_0_raster_config(adev, &raster_config); |
| 1495 | 1283 | ||
| 1496 | if (!adev->gfx.config.backend_enable_mask || | 1284 | if (!adev->gfx.config.backend_enable_mask || |
| 1497 | adev->gfx.config.num_rbs >= num_rb_pipes) { | 1285 | adev->gfx.config.num_rbs >= num_rb_pipes) |
| 1498 | WREG32(mmPA_SC_RASTER_CONFIG, raster_config); | 1286 | WREG32(mmPA_SC_RASTER_CONFIG, raster_config); |
| 1499 | } else { | 1287 | else |
| 1500 | gfx_v6_0_write_harvested_raster_configs(adev, raster_config, | 1288 | gfx_v6_0_write_harvested_raster_configs(adev, raster_config, |
| 1501 | adev->gfx.config.backend_enable_mask, | 1289 | adev->gfx.config.backend_enable_mask, |
| 1502 | num_rb_pipes); | 1290 | num_rb_pipes); |
| 1503 | } | ||
| 1504 | 1291 | ||
| 1505 | /* cache the values for userspace */ | 1292 | /* cache the values for userspace */ |
| 1506 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { | 1293 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { |
| @@ -1517,11 +1304,6 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) | |||
| 1517 | gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); | 1304 | gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |
| 1518 | mutex_unlock(&adev->grbm_idx_mutex); | 1305 | mutex_unlock(&adev->grbm_idx_mutex); |
| 1519 | } | 1306 | } |
| 1520 | /* | ||
| 1521 | static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev) | ||
| 1522 | { | ||
| 1523 | } | ||
| 1524 | */ | ||
| 1525 | 1307 | ||
| 1526 | static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, | 1308 | static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, |
| 1527 | u32 bitmap) | 1309 | u32 bitmap) |
| @@ -1544,7 +1326,7 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev) | |||
| 1544 | data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | | 1326 | data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | |
| 1545 | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); | 1327 | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); |
| 1546 | 1328 | ||
| 1547 | mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh); | 1329 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); |
| 1548 | return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; | 1330 | return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; |
| 1549 | } | 1331 | } |
| 1550 | 1332 | ||
| @@ -1688,7 +1470,8 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) | |||
| 1688 | WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); | 1470 | WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); |
| 1689 | 1471 | ||
| 1690 | mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); | 1472 | mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); |
| 1691 | mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); | 1473 | adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); |
| 1474 | mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; | ||
| 1692 | 1475 | ||
| 1693 | adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; | 1476 | adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; |
| 1694 | adev->gfx.config.mem_max_burst_length_bytes = 256; | 1477 | adev->gfx.config.mem_max_burst_length_bytes = 256; |
| @@ -3719,6 +3502,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) | |||
| 3719 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; | 3502 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; |
| 3720 | struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; | 3503 | struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; |
| 3721 | unsigned disable_masks[4 * 2]; | 3504 | unsigned disable_masks[4 * 2]; |
| 3505 | u32 ao_cu_num; | ||
| 3506 | |||
| 3507 | if (adev->flags & AMD_IS_APU) | ||
| 3508 | ao_cu_num = 2; | ||
| 3509 | else | ||
| 3510 | ao_cu_num = adev->gfx.config.max_cu_per_sh; | ||
| 3722 | 3511 | ||
| 3723 | memset(cu_info, 0, sizeof(*cu_info)); | 3512 | memset(cu_info, 0, sizeof(*cu_info)); |
| 3724 | 3513 | ||
| @@ -3737,16 +3526,18 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) | |||
| 3737 | bitmap = gfx_v6_0_get_cu_enabled(adev); | 3526 | bitmap = gfx_v6_0_get_cu_enabled(adev); |
| 3738 | cu_info->bitmap[i][j] = bitmap; | 3527 | cu_info->bitmap[i][j] = bitmap; |
| 3739 | 3528 | ||
| 3740 | for (k = 0; k < 16; k++) { | 3529 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { |
| 3741 | if (bitmap & mask) { | 3530 | if (bitmap & mask) { |
| 3742 | if (counter < 2) | 3531 | if (counter < ao_cu_num) |
| 3743 | ao_bitmap |= mask; | 3532 | ao_bitmap |= mask; |
| 3744 | counter ++; | 3533 | counter ++; |
| 3745 | } | 3534 | } |
| 3746 | mask <<= 1; | 3535 | mask <<= 1; |
| 3747 | } | 3536 | } |
| 3748 | active_cu_number += counter; | 3537 | active_cu_number += counter; |
| 3749 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | 3538 | if (i < 2 && j < 2) |
| 3539 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | ||
| 3540 | cu_info->ao_cu_bitmap[i][j] = ao_bitmap; | ||
| 3750 | } | 3541 | } |
| 3751 | } | 3542 | } |
| 3752 | 3543 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index ee2f2139e2eb..37b45e4403d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
| @@ -21,12 +21,13 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "amdgpu_ih.h" | 26 | #include "amdgpu_ih.h" |
| 27 | #include "amdgpu_gfx.h" | 27 | #include "amdgpu_gfx.h" |
| 28 | #include "cikd.h" | 28 | #include "cikd.h" |
| 29 | #include "cik.h" | 29 | #include "cik.h" |
| 30 | #include "cik_structs.h" | ||
| 30 | #include "atom.h" | 31 | #include "atom.h" |
| 31 | #include "amdgpu_ucode.h" | 32 | #include "amdgpu_ucode.h" |
| 32 | #include "clearstate_ci.h" | 33 | #include "clearstate_ci.h" |
| @@ -48,7 +49,7 @@ | |||
| 48 | #include "oss/oss_2_0_sh_mask.h" | 49 | #include "oss/oss_2_0_sh_mask.h" |
| 49 | 50 | ||
| 50 | #define GFX7_NUM_GFX_RINGS 1 | 51 | #define GFX7_NUM_GFX_RINGS 1 |
| 51 | #define GFX7_NUM_COMPUTE_RINGS 8 | 52 | #define GFX7_MEC_HPD_SIZE 2048 |
| 52 | 53 | ||
| 53 | static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); | 54 | static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); |
| 54 | static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); | 55 | static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); |
| @@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, | |||
| 1607 | } | 1608 | } |
| 1608 | 1609 | ||
| 1609 | /** | 1610 | /** |
| 1610 | * gfx_v7_0_create_bitmask - create a bitmask | ||
| 1611 | * | ||
| 1612 | * @bit_width: length of the mask | ||
| 1613 | * | ||
| 1614 | * create a variable length bit mask (CIK). | ||
| 1615 | * Returns the bitmask. | ||
| 1616 | */ | ||
| 1617 | static u32 gfx_v7_0_create_bitmask(u32 bit_width) | ||
| 1618 | { | ||
| 1619 | return (u32)((1ULL << bit_width) - 1); | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | /** | ||
| 1623 | * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs | 1611 | * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs |
| 1624 | * | 1612 | * |
| 1625 | * @adev: amdgpu_device pointer | 1613 | * @adev: amdgpu_device pointer |
| @@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) | |||
| 1637 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; | 1625 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; |
| 1638 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; | 1626 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; |
| 1639 | 1627 | ||
| 1640 | mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / | 1628 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / |
| 1641 | adev->gfx.config.max_sh_per_se); | 1629 | adev->gfx.config.max_sh_per_se); |
| 1642 | 1630 | ||
| 1643 | return (~data) & mask; | 1631 | return (~data) & mask; |
| 1644 | } | 1632 | } |
| @@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) | |||
| 1837 | /** | 1825 | /** |
| 1838 | * gmc_v7_0_init_compute_vmid - gart enable | 1826 | * gmc_v7_0_init_compute_vmid - gart enable |
| 1839 | * | 1827 | * |
| 1840 | * @rdev: amdgpu_device pointer | 1828 | * @adev: amdgpu_device pointer |
| 1841 | * | 1829 | * |
| 1842 | * Initialize compute vmid sh_mem registers | 1830 | * Initialize compute vmid sh_mem registers |
| 1843 | * | 1831 | * |
| @@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) | |||
| 2821 | } | 2809 | } |
| 2822 | } | 2810 | } |
| 2823 | 2811 | ||
| 2824 | #define MEC_HPD_SIZE 2048 | ||
| 2825 | |||
| 2826 | static int gfx_v7_0_mec_init(struct amdgpu_device *adev) | 2812 | static int gfx_v7_0_mec_init(struct amdgpu_device *adev) |
| 2827 | { | 2813 | { |
| 2828 | int r; | 2814 | int r; |
| 2829 | u32 *hpd; | 2815 | u32 *hpd; |
| 2816 | size_t mec_hpd_size; | ||
| 2830 | 2817 | ||
| 2831 | /* | 2818 | bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
| 2832 | * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total | 2819 | |
| 2833 | * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total | 2820 | /* take ownership of the relevant compute queues */ |
| 2834 | * Nonetheless, we assign only 1 pipe because all other pipes will | 2821 | amdgpu_gfx_compute_queue_acquire(adev); |
| 2835 | * be handled by KFD | ||
| 2836 | */ | ||
| 2837 | adev->gfx.mec.num_mec = 1; | ||
| 2838 | adev->gfx.mec.num_pipe = 1; | ||
| 2839 | adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; | ||
| 2840 | 2822 | ||
| 2823 | /* allocate space for ALL pipes (even the ones we don't own) */ | ||
| 2824 | mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec | ||
| 2825 | * GFX7_MEC_HPD_SIZE * 2; | ||
| 2841 | if (adev->gfx.mec.hpd_eop_obj == NULL) { | 2826 | if (adev->gfx.mec.hpd_eop_obj == NULL) { |
| 2842 | r = amdgpu_bo_create(adev, | 2827 | r = amdgpu_bo_create(adev, |
| 2843 | adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, | 2828 | mec_hpd_size, |
| 2844 | PAGE_SIZE, true, | 2829 | PAGE_SIZE, true, |
| 2845 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, | 2830 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, |
| 2846 | &adev->gfx.mec.hpd_eop_obj); | 2831 | &adev->gfx.mec.hpd_eop_obj); |
| @@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) | |||
| 2870 | } | 2855 | } |
| 2871 | 2856 | ||
| 2872 | /* clear memory. Not sure if this is required or not */ | 2857 | /* clear memory. Not sure if this is required or not */ |
| 2873 | memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); | 2858 | memset(hpd, 0, mec_hpd_size); |
| 2874 | 2859 | ||
| 2875 | amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); | 2860 | amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); |
| 2876 | amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); | 2861 | amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); |
| @@ -2917,275 +2902,296 @@ struct hqd_registers | |||
| 2917 | u32 cp_mqd_control; | 2902 | u32 cp_mqd_control; |
| 2918 | }; | 2903 | }; |
| 2919 | 2904 | ||
| 2920 | struct bonaire_mqd | 2905 | static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, |
| 2906 | int mec, int pipe) | ||
| 2921 | { | 2907 | { |
| 2922 | u32 header; | ||
| 2923 | u32 dispatch_initiator; | ||
| 2924 | u32 dimensions[3]; | ||
| 2925 | u32 start_idx[3]; | ||
| 2926 | u32 num_threads[3]; | ||
| 2927 | u32 pipeline_stat_enable; | ||
| 2928 | u32 perf_counter_enable; | ||
| 2929 | u32 pgm[2]; | ||
| 2930 | u32 tba[2]; | ||
| 2931 | u32 tma[2]; | ||
| 2932 | u32 pgm_rsrc[2]; | ||
| 2933 | u32 vmid; | ||
| 2934 | u32 resource_limits; | ||
| 2935 | u32 static_thread_mgmt01[2]; | ||
| 2936 | u32 tmp_ring_size; | ||
| 2937 | u32 static_thread_mgmt23[2]; | ||
| 2938 | u32 restart[3]; | ||
| 2939 | u32 thread_trace_enable; | ||
| 2940 | u32 reserved1; | ||
| 2941 | u32 user_data[16]; | ||
| 2942 | u32 vgtcs_invoke_count[2]; | ||
| 2943 | struct hqd_registers queue_state; | ||
| 2944 | u32 dequeue_cntr; | ||
| 2945 | u32 interrupt_queue[64]; | ||
| 2946 | }; | ||
| 2947 | |||
| 2948 | /** | ||
| 2949 | * gfx_v7_0_cp_compute_resume - setup the compute queue registers | ||
| 2950 | * | ||
| 2951 | * @adev: amdgpu_device pointer | ||
| 2952 | * | ||
| 2953 | * Program the compute queues and test them to make sure they | ||
| 2954 | * are working. | ||
| 2955 | * Returns 0 for success, error for failure. | ||
| 2956 | */ | ||
| 2957 | static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) | ||
| 2958 | { | ||
| 2959 | int r, i, j; | ||
| 2960 | u32 tmp; | ||
| 2961 | bool use_doorbell = true; | ||
| 2962 | u64 hqd_gpu_addr; | ||
| 2963 | u64 mqd_gpu_addr; | ||
| 2964 | u64 eop_gpu_addr; | 2908 | u64 eop_gpu_addr; |
| 2965 | u64 wb_gpu_addr; | 2909 | u32 tmp; |
| 2966 | u32 *buf; | 2910 | size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe) |
| 2967 | struct bonaire_mqd *mqd; | 2911 | * GFX7_MEC_HPD_SIZE * 2; |
| 2968 | struct amdgpu_ring *ring; | ||
| 2969 | |||
| 2970 | /* fix up chicken bits */ | ||
| 2971 | tmp = RREG32(mmCP_CPF_DEBUG); | ||
| 2972 | tmp |= (1 << 23); | ||
| 2973 | WREG32(mmCP_CPF_DEBUG, tmp); | ||
| 2974 | 2912 | ||
| 2975 | /* init the pipes */ | ||
| 2976 | mutex_lock(&adev->srbm_mutex); | 2913 | mutex_lock(&adev->srbm_mutex); |
| 2977 | for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { | 2914 | eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; |
| 2978 | int me = (i < 4) ? 1 : 2; | ||
| 2979 | int pipe = (i < 4) ? i : (i - 4); | ||
| 2980 | 2915 | ||
| 2981 | eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); | 2916 | cik_srbm_select(adev, mec + 1, pipe, 0, 0); |
| 2982 | 2917 | ||
| 2983 | cik_srbm_select(adev, me, pipe, 0, 0); | 2918 | /* write the EOP addr */ |
| 2919 | WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); | ||
| 2920 | WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); | ||
| 2984 | 2921 | ||
| 2985 | /* write the EOP addr */ | 2922 | /* set the VMID assigned */ |
| 2986 | WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); | 2923 | WREG32(mmCP_HPD_EOP_VMID, 0); |
| 2987 | WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); | ||
| 2988 | 2924 | ||
| 2989 | /* set the VMID assigned */ | 2925 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ |
| 2990 | WREG32(mmCP_HPD_EOP_VMID, 0); | 2926 | tmp = RREG32(mmCP_HPD_EOP_CONTROL); |
| 2927 | tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; | ||
| 2928 | tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); | ||
| 2929 | WREG32(mmCP_HPD_EOP_CONTROL, tmp); | ||
| 2991 | 2930 | ||
| 2992 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ | ||
| 2993 | tmp = RREG32(mmCP_HPD_EOP_CONTROL); | ||
| 2994 | tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; | ||
| 2995 | tmp |= order_base_2(MEC_HPD_SIZE / 8); | ||
| 2996 | WREG32(mmCP_HPD_EOP_CONTROL, tmp); | ||
| 2997 | } | ||
| 2998 | cik_srbm_select(adev, 0, 0, 0, 0); | 2931 | cik_srbm_select(adev, 0, 0, 0, 0); |
| 2999 | mutex_unlock(&adev->srbm_mutex); | 2932 | mutex_unlock(&adev->srbm_mutex); |
| 2933 | } | ||
| 3000 | 2934 | ||
| 3001 | /* init the queues. Just two for now. */ | 2935 | static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) |
| 3002 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 2936 | { |
| 3003 | ring = &adev->gfx.compute_ring[i]; | 2937 | int i; |
| 3004 | 2938 | ||
| 3005 | if (ring->mqd_obj == NULL) { | 2939 | /* disable the queue if it's active */ |
| 3006 | r = amdgpu_bo_create(adev, | 2940 | if (RREG32(mmCP_HQD_ACTIVE) & 1) { |
| 3007 | sizeof(struct bonaire_mqd), | 2941 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); |
| 3008 | PAGE_SIZE, true, | 2942 | for (i = 0; i < adev->usec_timeout; i++) { |
| 3009 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, | 2943 | if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) |
| 3010 | &ring->mqd_obj); | 2944 | break; |
| 3011 | if (r) { | 2945 | udelay(1); |
| 3012 | dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); | ||
| 3013 | return r; | ||
| 3014 | } | ||
| 3015 | } | 2946 | } |
| 3016 | 2947 | ||
| 3017 | r = amdgpu_bo_reserve(ring->mqd_obj, false); | 2948 | if (i == adev->usec_timeout) |
| 3018 | if (unlikely(r != 0)) { | 2949 | return -ETIMEDOUT; |
| 3019 | gfx_v7_0_cp_compute_fini(adev); | ||
| 3020 | return r; | ||
| 3021 | } | ||
| 3022 | r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, | ||
| 3023 | &mqd_gpu_addr); | ||
| 3024 | if (r) { | ||
| 3025 | dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); | ||
| 3026 | gfx_v7_0_cp_compute_fini(adev); | ||
| 3027 | return r; | ||
| 3028 | } | ||
| 3029 | r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); | ||
| 3030 | if (r) { | ||
| 3031 | dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); | ||
| 3032 | gfx_v7_0_cp_compute_fini(adev); | ||
| 3033 | return r; | ||
| 3034 | } | ||
| 3035 | 2950 | ||
| 3036 | /* init the mqd struct */ | 2951 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); |
| 3037 | memset(buf, 0, sizeof(struct bonaire_mqd)); | 2952 | WREG32(mmCP_HQD_PQ_RPTR, 0); |
| 2953 | WREG32(mmCP_HQD_PQ_WPTR, 0); | ||
| 2954 | } | ||
| 3038 | 2955 | ||
| 3039 | mqd = (struct bonaire_mqd *)buf; | 2956 | return 0; |
| 3040 | mqd->header = 0xC0310800; | 2957 | } |
| 3041 | mqd->static_thread_mgmt01[0] = 0xffffffff; | ||
| 3042 | mqd->static_thread_mgmt01[1] = 0xffffffff; | ||
| 3043 | mqd->static_thread_mgmt23[0] = 0xffffffff; | ||
| 3044 | mqd->static_thread_mgmt23[1] = 0xffffffff; | ||
| 3045 | 2958 | ||
| 3046 | mutex_lock(&adev->srbm_mutex); | 2959 | static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, |
| 3047 | cik_srbm_select(adev, ring->me, | 2960 | struct cik_mqd *mqd, |
| 3048 | ring->pipe, | 2961 | uint64_t mqd_gpu_addr, |
| 3049 | ring->queue, 0); | 2962 | struct amdgpu_ring *ring) |
| 2963 | { | ||
| 2964 | u64 hqd_gpu_addr; | ||
| 2965 | u64 wb_gpu_addr; | ||
| 3050 | 2966 | ||
| 3051 | /* disable wptr polling */ | 2967 | /* init the mqd struct */ |
| 3052 | tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); | 2968 | memset(mqd, 0, sizeof(struct cik_mqd)); |
| 3053 | tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; | ||
| 3054 | WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); | ||
| 3055 | 2969 | ||
| 3056 | /* enable doorbell? */ | 2970 | mqd->header = 0xC0310800; |
| 3057 | mqd->queue_state.cp_hqd_pq_doorbell_control = | 2971 | mqd->compute_static_thread_mgmt_se0 = 0xffffffff; |
| 3058 | RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); | 2972 | mqd->compute_static_thread_mgmt_se1 = 0xffffffff; |
| 3059 | if (use_doorbell) | 2973 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; |
| 3060 | mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; | 2974 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; |
| 3061 | else | ||
| 3062 | mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; | ||
| 3063 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, | ||
| 3064 | mqd->queue_state.cp_hqd_pq_doorbell_control); | ||
| 3065 | |||
| 3066 | /* disable the queue if it's active */ | ||
| 3067 | mqd->queue_state.cp_hqd_dequeue_request = 0; | ||
| 3068 | mqd->queue_state.cp_hqd_pq_rptr = 0; | ||
| 3069 | mqd->queue_state.cp_hqd_pq_wptr= 0; | ||
| 3070 | if (RREG32(mmCP_HQD_ACTIVE) & 1) { | ||
| 3071 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); | ||
| 3072 | for (j = 0; j < adev->usec_timeout; j++) { | ||
| 3073 | if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) | ||
| 3074 | break; | ||
| 3075 | udelay(1); | ||
| 3076 | } | ||
| 3077 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); | ||
| 3078 | WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); | ||
| 3079 | WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); | ||
| 3080 | } | ||
| 3081 | 2975 | ||
| 3082 | /* set the pointer to the MQD */ | 2976 | /* enable doorbell? */ |
| 3083 | mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; | 2977 | mqd->cp_hqd_pq_doorbell_control = |
| 3084 | mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); | 2978 | RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); |
| 3085 | WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); | 2979 | if (ring->use_doorbell) |
| 3086 | WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); | 2980 | mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; |
| 3087 | /* set MQD vmid to 0 */ | 2981 | else |
| 3088 | mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); | 2982 | mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; |
| 3089 | mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; | 2983 | |
| 3090 | WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); | 2984 | /* set the pointer to the MQD */ |
| 3091 | 2985 | mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; | |
| 3092 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ | 2986 | mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); |
| 3093 | hqd_gpu_addr = ring->gpu_addr >> 8; | 2987 | |
| 3094 | mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; | 2988 | /* set MQD vmid to 0 */ |
| 3095 | mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); | 2989 | mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL); |
| 3096 | WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); | 2990 | mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; |
| 3097 | WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); | 2991 | |
| 3098 | 2992 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ | |
| 3099 | /* set up the HQD, this is similar to CP_RB0_CNTL */ | 2993 | hqd_gpu_addr = ring->gpu_addr >> 8; |
| 3100 | mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); | 2994 | mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; |
| 3101 | mqd->queue_state.cp_hqd_pq_control &= | 2995 | mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); |
| 3102 | ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | | 2996 | |
| 3103 | CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); | 2997 | /* set up the HQD, this is similar to CP_RB0_CNTL */ |
| 3104 | 2998 | mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); | |
| 3105 | mqd->queue_state.cp_hqd_pq_control |= | 2999 | mqd->cp_hqd_pq_control &= |
| 3106 | order_base_2(ring->ring_size / 8); | 3000 | ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | |
| 3107 | mqd->queue_state.cp_hqd_pq_control |= | 3001 | CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); |
| 3108 | (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); | 3002 | |
| 3003 | mqd->cp_hqd_pq_control |= | ||
| 3004 | order_base_2(ring->ring_size / 8); | ||
| 3005 | mqd->cp_hqd_pq_control |= | ||
| 3006 | (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); | ||
| 3109 | #ifdef __BIG_ENDIAN | 3007 | #ifdef __BIG_ENDIAN |
| 3110 | mqd->queue_state.cp_hqd_pq_control |= | 3008 | mqd->cp_hqd_pq_control |= |
| 3111 | 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; | 3009 | 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; |
| 3112 | #endif | 3010 | #endif |
| 3113 | mqd->queue_state.cp_hqd_pq_control &= | 3011 | mqd->cp_hqd_pq_control &= |
| 3114 | ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | | 3012 | ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | |
| 3115 | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | | 3013 | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | |
| 3116 | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); | 3014 | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); |
| 3117 | mqd->queue_state.cp_hqd_pq_control |= | 3015 | mqd->cp_hqd_pq_control |= |
| 3118 | CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | | 3016 | CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | |
| 3119 | CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ | 3017 | CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ |
| 3120 | WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); | ||
| 3121 | |||
| 3122 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ | ||
| 3123 | wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
| 3124 | mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; | ||
| 3125 | mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 3126 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); | ||
| 3127 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, | ||
| 3128 | mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); | ||
| 3129 | |||
| 3130 | /* set the wb address wether it's enabled or not */ | ||
| 3131 | wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); | ||
| 3132 | mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; | ||
| 3133 | mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = | ||
| 3134 | upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 3135 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, | ||
| 3136 | mqd->queue_state.cp_hqd_pq_rptr_report_addr); | ||
| 3137 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
| 3138 | mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); | ||
| 3139 | |||
| 3140 | /* enable the doorbell if requested */ | ||
| 3141 | if (use_doorbell) { | ||
| 3142 | mqd->queue_state.cp_hqd_pq_doorbell_control = | ||
| 3143 | RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); | ||
| 3144 | mqd->queue_state.cp_hqd_pq_doorbell_control &= | ||
| 3145 | ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; | ||
| 3146 | mqd->queue_state.cp_hqd_pq_doorbell_control |= | ||
| 3147 | (ring->doorbell_index << | ||
| 3148 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); | ||
| 3149 | mqd->queue_state.cp_hqd_pq_doorbell_control |= | ||
| 3150 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; | ||
| 3151 | mqd->queue_state.cp_hqd_pq_doorbell_control &= | ||
| 3152 | ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | | ||
| 3153 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); | ||
| 3154 | 3018 | ||
| 3155 | } else { | 3019 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ |
| 3156 | mqd->queue_state.cp_hqd_pq_doorbell_control = 0; | 3020 | wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); |
| 3021 | mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; | ||
| 3022 | mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 3023 | |||
| 3024 | /* set the wb address wether it's enabled or not */ | ||
| 3025 | wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); | ||
| 3026 | mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; | ||
| 3027 | mqd->cp_hqd_pq_rptr_report_addr_hi = | ||
| 3028 | upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 3029 | |||
| 3030 | /* enable the doorbell if requested */ | ||
| 3031 | if (ring->use_doorbell) { | ||
| 3032 | mqd->cp_hqd_pq_doorbell_control = | ||
| 3033 | RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); | ||
| 3034 | mqd->cp_hqd_pq_doorbell_control &= | ||
| 3035 | ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; | ||
| 3036 | mqd->cp_hqd_pq_doorbell_control |= | ||
| 3037 | (ring->doorbell_index << | ||
| 3038 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); | ||
| 3039 | mqd->cp_hqd_pq_doorbell_control |= | ||
| 3040 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; | ||
| 3041 | mqd->cp_hqd_pq_doorbell_control &= | ||
| 3042 | ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | | ||
| 3043 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); | ||
| 3044 | |||
| 3045 | } else { | ||
| 3046 | mqd->cp_hqd_pq_doorbell_control = 0; | ||
| 3047 | } | ||
| 3048 | |||
| 3049 | /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ | ||
| 3050 | ring->wptr = 0; | ||
| 3051 | mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); | ||
| 3052 | mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); | ||
| 3053 | |||
| 3054 | /* set the vmid for the queue */ | ||
| 3055 | mqd->cp_hqd_vmid = 0; | ||
| 3056 | |||
| 3057 | /* defaults */ | ||
| 3058 | mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL); | ||
| 3059 | mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR); | ||
| 3060 | mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI); | ||
| 3061 | mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR); | ||
| 3062 | mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE); | ||
| 3063 | mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD); | ||
| 3064 | mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE); | ||
| 3065 | mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO); | ||
| 3066 | mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI); | ||
| 3067 | mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO); | ||
| 3068 | mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI); | ||
| 3069 | mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); | ||
| 3070 | mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); | ||
| 3071 | mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); | ||
| 3072 | mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); | ||
| 3073 | mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR); | ||
| 3074 | |||
| 3075 | /* activate the queue */ | ||
| 3076 | mqd->cp_hqd_active = 1; | ||
| 3077 | } | ||
| 3078 | |||
| 3079 | int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) | ||
| 3080 | { | ||
| 3081 | uint32_t tmp; | ||
| 3082 | uint32_t mqd_reg; | ||
| 3083 | uint32_t *mqd_data; | ||
| 3084 | |||
| 3085 | /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */ | ||
| 3086 | mqd_data = &mqd->cp_mqd_base_addr_lo; | ||
| 3087 | |||
| 3088 | /* disable wptr polling */ | ||
| 3089 | tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); | ||
| 3090 | tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); | ||
| 3091 | WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); | ||
| 3092 | |||
| 3093 | /* program all HQD registers */ | ||
| 3094 | for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++) | ||
| 3095 | WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); | ||
| 3096 | |||
| 3097 | /* activate the HQD */ | ||
| 3098 | for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) | ||
| 3099 | WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); | ||
| 3100 | |||
| 3101 | return 0; | ||
| 3102 | } | ||
| 3103 | |||
| 3104 | static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) | ||
| 3105 | { | ||
| 3106 | int r; | ||
| 3107 | u64 mqd_gpu_addr; | ||
| 3108 | struct cik_mqd *mqd; | ||
| 3109 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; | ||
| 3110 | |||
| 3111 | if (ring->mqd_obj == NULL) { | ||
| 3112 | r = amdgpu_bo_create(adev, | ||
| 3113 | sizeof(struct cik_mqd), | ||
| 3114 | PAGE_SIZE, true, | ||
| 3115 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, | ||
| 3116 | &ring->mqd_obj); | ||
| 3117 | if (r) { | ||
| 3118 | dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); | ||
| 3119 | return r; | ||
| 3157 | } | 3120 | } |
| 3158 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, | 3121 | } |
| 3159 | mqd->queue_state.cp_hqd_pq_doorbell_control); | 3122 | |
| 3123 | r = amdgpu_bo_reserve(ring->mqd_obj, false); | ||
| 3124 | if (unlikely(r != 0)) | ||
| 3125 | goto out; | ||
| 3160 | 3126 | ||
| 3161 | /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ | 3127 | r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, |
| 3162 | ring->wptr = 0; | 3128 | &mqd_gpu_addr); |
| 3163 | mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); | 3129 | if (r) { |
| 3164 | WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); | 3130 | dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); |
| 3165 | mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); | 3131 | goto out_unreserve; |
| 3132 | } | ||
| 3133 | r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); | ||
| 3134 | if (r) { | ||
| 3135 | dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); | ||
| 3136 | goto out_unreserve; | ||
| 3137 | } | ||
| 3166 | 3138 | ||
| 3167 | /* set the vmid for the queue */ | 3139 | mutex_lock(&adev->srbm_mutex); |
| 3168 | mqd->queue_state.cp_hqd_vmid = 0; | 3140 | cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
| 3169 | WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); | ||
| 3170 | 3141 | ||
| 3171 | /* activate the queue */ | 3142 | gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); |
| 3172 | mqd->queue_state.cp_hqd_active = 1; | 3143 | gfx_v7_0_mqd_deactivate(adev); |
| 3173 | WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); | 3144 | gfx_v7_0_mqd_commit(adev, mqd); |
| 3174 | 3145 | ||
| 3175 | cik_srbm_select(adev, 0, 0, 0, 0); | 3146 | cik_srbm_select(adev, 0, 0, 0, 0); |
| 3176 | mutex_unlock(&adev->srbm_mutex); | 3147 | mutex_unlock(&adev->srbm_mutex); |
| 3177 | 3148 | ||
| 3178 | amdgpu_bo_kunmap(ring->mqd_obj); | 3149 | amdgpu_bo_kunmap(ring->mqd_obj); |
| 3179 | amdgpu_bo_unreserve(ring->mqd_obj); | 3150 | out_unreserve: |
| 3151 | amdgpu_bo_unreserve(ring->mqd_obj); | ||
| 3152 | out: | ||
| 3153 | return 0; | ||
| 3154 | } | ||
| 3180 | 3155 | ||
| 3181 | ring->ready = true; | 3156 | /** |
| 3157 | * gfx_v7_0_cp_compute_resume - setup the compute queue registers | ||
| 3158 | * | ||
| 3159 | * @adev: amdgpu_device pointer | ||
| 3160 | * | ||
| 3161 | * Program the compute queues and test them to make sure they | ||
| 3162 | * are working. | ||
| 3163 | * Returns 0 for success, error for failure. | ||
| 3164 | */ | ||
| 3165 | static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) | ||
| 3166 | { | ||
| 3167 | int r, i, j; | ||
| 3168 | u32 tmp; | ||
| 3169 | struct amdgpu_ring *ring; | ||
| 3170 | |||
| 3171 | /* fix up chicken bits */ | ||
| 3172 | tmp = RREG32(mmCP_CPF_DEBUG); | ||
| 3173 | tmp |= (1 << 23); | ||
| 3174 | WREG32(mmCP_CPF_DEBUG, tmp); | ||
| 3175 | |||
| 3176 | /* init all pipes (even the ones we don't own) */ | ||
| 3177 | for (i = 0; i < adev->gfx.mec.num_mec; i++) | ||
| 3178 | for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) | ||
| 3179 | gfx_v7_0_compute_pipe_init(adev, i, j); | ||
| 3180 | |||
| 3181 | /* init the queues */ | ||
| 3182 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 3183 | r = gfx_v7_0_compute_queue_init(adev, i); | ||
| 3184 | if (r) { | ||
| 3185 | gfx_v7_0_cp_compute_fini(adev); | ||
| 3186 | return r; | ||
| 3187 | } | ||
| 3182 | } | 3188 | } |
| 3183 | 3189 | ||
| 3184 | gfx_v7_0_cp_compute_enable(adev, true); | 3190 | gfx_v7_0_cp_compute_enable(adev, true); |
| 3185 | 3191 | ||
| 3186 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 3192 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 3187 | ring = &adev->gfx.compute_ring[i]; | 3193 | ring = &adev->gfx.compute_ring[i]; |
| 3188 | 3194 | ring->ready = true; | |
| 3189 | r = amdgpu_ring_test_ring(ring); | 3195 | r = amdgpu_ring_test_ring(ring); |
| 3190 | if (r) | 3196 | if (r) |
| 3191 | ring->ready = false; | 3197 | ring->ready = false; |
| @@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) | |||
| 3797 | gfx_v7_0_update_rlc(adev, tmp); | 3803 | gfx_v7_0_update_rlc(adev, tmp); |
| 3798 | 3804 | ||
| 3799 | data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; | 3805 | data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; |
| 3806 | if (orig != data) | ||
| 3807 | WREG32(mmRLC_CGCG_CGLS_CTRL, data); | ||
| 3808 | |||
| 3800 | } else { | 3809 | } else { |
| 3801 | gfx_v7_0_enable_gui_idle_interrupt(adev, false); | 3810 | gfx_v7_0_enable_gui_idle_interrupt(adev, false); |
| 3802 | 3811 | ||
| @@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) | |||
| 3806 | RREG32(mmCB_CGTT_SCLK_CTRL); | 3815 | RREG32(mmCB_CGTT_SCLK_CTRL); |
| 3807 | 3816 | ||
| 3808 | data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); | 3817 | data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); |
| 3809 | } | 3818 | if (orig != data) |
| 3810 | 3819 | WREG32(mmRLC_CGCG_CGLS_CTRL, data); | |
| 3811 | if (orig != data) | ||
| 3812 | WREG32(mmRLC_CGCG_CGLS_CTRL, data); | ||
| 3813 | 3820 | ||
| 3821 | gfx_v7_0_enable_gui_idle_interrupt(adev, true); | ||
| 3822 | } | ||
| 3814 | } | 3823 | } |
| 3815 | 3824 | ||
| 3816 | static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) | 3825 | static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) |
| @@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) | |||
| 4089 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; | 4098 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; |
| 4090 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; | 4099 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; |
| 4091 | 4100 | ||
| 4092 | mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh); | 4101 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); |
| 4093 | 4102 | ||
| 4094 | return (~data) & mask; | 4103 | return (~data) & mask; |
| 4095 | } | 4104 | } |
| @@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle) | |||
| 4470 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 4479 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 4471 | 4480 | ||
| 4472 | adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; | 4481 | adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; |
| 4473 | adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; | 4482 | adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; |
| 4474 | adev->gfx.funcs = &gfx_v7_0_gfx_funcs; | 4483 | adev->gfx.funcs = &gfx_v7_0_gfx_funcs; |
| 4475 | adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; | 4484 | adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; |
| 4476 | gfx_v7_0_set_ring_funcs(adev); | 4485 | gfx_v7_0_set_ring_funcs(adev); |
| @@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 4662 | adev->gfx.config.gb_addr_config = gb_addr_config; | 4671 | adev->gfx.config.gb_addr_config = gb_addr_config; |
| 4663 | } | 4672 | } |
| 4664 | 4673 | ||
| 4674 | static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, | ||
| 4675 | int mec, int pipe, int queue) | ||
| 4676 | { | ||
| 4677 | int r; | ||
| 4678 | unsigned irq_type; | ||
| 4679 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; | ||
| 4680 | |||
| 4681 | /* mec0 is me1 */ | ||
| 4682 | ring->me = mec + 1; | ||
| 4683 | ring->pipe = pipe; | ||
| 4684 | ring->queue = queue; | ||
| 4685 | |||
| 4686 | ring->ring_obj = NULL; | ||
| 4687 | ring->use_doorbell = true; | ||
| 4688 | ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; | ||
| 4689 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 4690 | |||
| 4691 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP | ||
| 4692 | + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) | ||
| 4693 | + ring->pipe; | ||
| 4694 | |||
| 4695 | /* type-2 packets are deprecated on MEC, use type-3 instead */ | ||
| 4696 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 4697 | &adev->gfx.eop_irq, irq_type); | ||
| 4698 | if (r) | ||
| 4699 | return r; | ||
| 4700 | |||
| 4701 | |||
| 4702 | return 0; | ||
| 4703 | } | ||
| 4704 | |||
| 4665 | static int gfx_v7_0_sw_init(void *handle) | 4705 | static int gfx_v7_0_sw_init(void *handle) |
| 4666 | { | 4706 | { |
| 4667 | struct amdgpu_ring *ring; | 4707 | struct amdgpu_ring *ring; |
| 4668 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 4708 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 4669 | int i, r; | 4709 | int i, j, k, r, ring_id; |
| 4710 | |||
| 4711 | switch (adev->asic_type) { | ||
| 4712 | case CHIP_KAVERI: | ||
| 4713 | adev->gfx.mec.num_mec = 2; | ||
| 4714 | break; | ||
| 4715 | case CHIP_BONAIRE: | ||
| 4716 | case CHIP_HAWAII: | ||
| 4717 | case CHIP_KABINI: | ||
| 4718 | case CHIP_MULLINS: | ||
| 4719 | default: | ||
| 4720 | adev->gfx.mec.num_mec = 1; | ||
| 4721 | break; | ||
| 4722 | } | ||
| 4723 | adev->gfx.mec.num_pipe_per_mec = 4; | ||
| 4724 | adev->gfx.mec.num_queue_per_pipe = 8; | ||
| 4670 | 4725 | ||
| 4671 | /* EOP Event */ | 4726 | /* EOP Event */ |
| 4672 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); | 4727 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); |
| @@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle) | |||
| 4716 | return r; | 4771 | return r; |
| 4717 | } | 4772 | } |
| 4718 | 4773 | ||
| 4719 | /* set up the compute queues */ | 4774 | /* set up the compute queues - allocate horizontally across pipes */ |
| 4720 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 4775 | ring_id = 0; |
| 4721 | unsigned irq_type; | 4776 | for (i = 0; i < adev->gfx.mec.num_mec; ++i) { |
| 4722 | 4777 | for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { | |
| 4723 | /* max 32 queues per MEC */ | 4778 | for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { |
| 4724 | if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { | 4779 | if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) |
| 4725 | DRM_ERROR("Too many (%d) compute rings!\n", i); | 4780 | continue; |
| 4726 | break; | 4781 | |
| 4782 | r = gfx_v7_0_compute_ring_init(adev, | ||
| 4783 | ring_id, | ||
| 4784 | i, k, j); | ||
| 4785 | if (r) | ||
| 4786 | return r; | ||
| 4787 | |||
| 4788 | ring_id++; | ||
| 4789 | } | ||
| 4727 | } | 4790 | } |
| 4728 | ring = &adev->gfx.compute_ring[i]; | ||
| 4729 | ring->ring_obj = NULL; | ||
| 4730 | ring->use_doorbell = true; | ||
| 4731 | ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; | ||
| 4732 | ring->me = 1; /* first MEC */ | ||
| 4733 | ring->pipe = i / 8; | ||
| 4734 | ring->queue = i % 8; | ||
| 4735 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 4736 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; | ||
| 4737 | /* type-2 packets are deprecated on MEC, use type-3 instead */ | ||
| 4738 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 4739 | &adev->gfx.eop_irq, irq_type); | ||
| 4740 | if (r) | ||
| 4741 | return r; | ||
| 4742 | } | 4791 | } |
| 4743 | 4792 | ||
| 4744 | /* reserve GDS, GWS and OA resource for gfx */ | 4793 | /* reserve GDS, GWS and OA resource for gfx */ |
| @@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, | |||
| 4969 | u32 mec_int_cntl, mec_int_cntl_reg; | 5018 | u32 mec_int_cntl, mec_int_cntl_reg; |
| 4970 | 5019 | ||
| 4971 | /* | 5020 | /* |
| 4972 | * amdgpu controls only pipe 0 of MEC1. That's why this function only | 5021 | * amdgpu controls only the first MEC. That's why this function only |
| 4973 | * handles the setting of interrupts for this specific pipe. All other | 5022 | * handles the setting of interrupts for this specific MEC. All other |
| 4974 | * pipes' interrupts are set by amdkfd. | 5023 | * pipes' interrupts are set by amdkfd. |
| 4975 | */ | 5024 | */ |
| 4976 | 5025 | ||
| @@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, | |||
| 4979 | case 0: | 5028 | case 0: |
| 4980 | mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; | 5029 | mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; |
| 4981 | break; | 5030 | break; |
| 5031 | case 1: | ||
| 5032 | mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; | ||
| 5033 | break; | ||
| 5034 | case 2: | ||
| 5035 | mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; | ||
| 5036 | break; | ||
| 5037 | case 3: | ||
| 5038 | mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; | ||
| 5039 | break; | ||
| 4982 | default: | 5040 | default: |
| 4983 | DRM_DEBUG("invalid pipe %d\n", pipe); | 5041 | DRM_DEBUG("invalid pipe %d\n", pipe); |
| 4984 | return; | 5042 | return; |
| @@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) | |||
| 5336 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; | 5394 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; |
| 5337 | struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; | 5395 | struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; |
| 5338 | unsigned disable_masks[4 * 2]; | 5396 | unsigned disable_masks[4 * 2]; |
| 5397 | u32 ao_cu_num; | ||
| 5398 | |||
| 5399 | if (adev->flags & AMD_IS_APU) | ||
| 5400 | ao_cu_num = 2; | ||
| 5401 | else | ||
| 5402 | ao_cu_num = adev->gfx.config.max_cu_per_sh; | ||
| 5339 | 5403 | ||
| 5340 | memset(cu_info, 0, sizeof(*cu_info)); | 5404 | memset(cu_info, 0, sizeof(*cu_info)); |
| 5341 | 5405 | ||
| @@ -5354,16 +5418,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) | |||
| 5354 | bitmap = gfx_v7_0_get_cu_active_bitmap(adev); | 5418 | bitmap = gfx_v7_0_get_cu_active_bitmap(adev); |
| 5355 | cu_info->bitmap[i][j] = bitmap; | 5419 | cu_info->bitmap[i][j] = bitmap; |
| 5356 | 5420 | ||
| 5357 | for (k = 0; k < 16; k ++) { | 5421 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { |
| 5358 | if (bitmap & mask) { | 5422 | if (bitmap & mask) { |
| 5359 | if (counter < 2) | 5423 | if (counter < ao_cu_num) |
| 5360 | ao_bitmap |= mask; | 5424 | ao_bitmap |= mask; |
| 5361 | counter ++; | 5425 | counter ++; |
| 5362 | } | 5426 | } |
| 5363 | mask <<= 1; | 5427 | mask <<= 1; |
| 5364 | } | 5428 | } |
| 5365 | active_cu_number += counter; | 5429 | active_cu_number += counter; |
| 5366 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | 5430 | if (i < 2 && j < 2) |
| 5431 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | ||
| 5432 | cu_info->ao_cu_bitmap[i][j] = ao_bitmap; | ||
| 5367 | } | 5433 | } |
| 5368 | } | 5434 | } |
| 5369 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); | 5435 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 2f5164cc0e53..6fb9c1524691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h | |||
| @@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; | |||
| 29 | extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; | 29 | extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; |
| 30 | extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; | 30 | extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; |
| 31 | 31 | ||
| 32 | struct amdgpu_device; | ||
| 33 | struct cik_mqd; | ||
| 34 | |||
| 35 | int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd); | ||
| 36 | |||
| 32 | #endif | 37 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 758d636a6f52..aa5a50f5eac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "amdgpu_gfx.h" | 26 | #include "amdgpu_gfx.h" |
| 27 | #include "vi.h" | 27 | #include "vi.h" |
| @@ -40,7 +40,6 @@ | |||
| 40 | 40 | ||
| 41 | #include "bif/bif_5_0_d.h" | 41 | #include "bif/bif_5_0_d.h" |
| 42 | #include "bif/bif_5_0_sh_mask.h" | 42 | #include "bif/bif_5_0_sh_mask.h" |
| 43 | |||
| 44 | #include "gca/gfx_8_0_d.h" | 43 | #include "gca/gfx_8_0_d.h" |
| 45 | #include "gca/gfx_8_0_enum.h" | 44 | #include "gca/gfx_8_0_enum.h" |
| 46 | #include "gca/gfx_8_0_sh_mask.h" | 45 | #include "gca/gfx_8_0_sh_mask.h" |
| @@ -52,7 +51,7 @@ | |||
| 52 | #include "smu/smu_7_1_3_d.h" | 51 | #include "smu/smu_7_1_3_d.h" |
| 53 | 52 | ||
| 54 | #define GFX8_NUM_GFX_RINGS 1 | 53 | #define GFX8_NUM_GFX_RINGS 1 |
| 55 | #define GFX8_NUM_COMPUTE_RINGS 8 | 54 | #define GFX8_MEC_HPD_SIZE 2048 |
| 56 | 55 | ||
| 57 | #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 | 56 | #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 |
| 58 | #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 | 57 | #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 |
| @@ -657,10 +656,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); | |||
| 657 | static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); | 656 | static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); |
| 658 | static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); | 657 | static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); |
| 659 | static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); | 658 | static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); |
| 660 | static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); | 659 | static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); |
| 661 | static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); | 660 | static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); |
| 662 | static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); | ||
| 663 | static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); | ||
| 664 | 661 | ||
| 665 | static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) | 662 | static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) |
| 666 | { | 663 | { |
| @@ -859,7 +856,8 @@ err1: | |||
| 859 | } | 856 | } |
| 860 | 857 | ||
| 861 | 858 | ||
| 862 | static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { | 859 | static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) |
| 860 | { | ||
| 863 | release_firmware(adev->gfx.pfp_fw); | 861 | release_firmware(adev->gfx.pfp_fw); |
| 864 | adev->gfx.pfp_fw = NULL; | 862 | adev->gfx.pfp_fw = NULL; |
| 865 | release_firmware(adev->gfx.me_fw); | 863 | release_firmware(adev->gfx.me_fw); |
| @@ -941,12 +939,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
| 941 | cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; | 939 | cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; |
| 942 | adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); | 940 | adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); |
| 943 | 941 | ||
| 944 | /* chain ib ucode isn't formal released, just disable it by far | ||
| 945 | * TODO: when ucod ready we should use ucode version to judge if | ||
| 946 | * chain-ib support or not. | ||
| 947 | */ | ||
| 948 | adev->virt.chained_ib_support = false; | ||
| 949 | |||
| 950 | adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); | 942 | adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); |
| 951 | 943 | ||
| 952 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); | 944 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); |
| @@ -960,6 +952,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
| 960 | adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); | 952 | adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); |
| 961 | adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); | 953 | adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); |
| 962 | 954 | ||
| 955 | /* | ||
| 956 | * Support for MCBP/Virtualization in combination with chained IBs is | ||
| 957 | * formal released on feature version #46 | ||
| 958 | */ | ||
| 959 | if (adev->gfx.ce_feature_version >= 46 && | ||
| 960 | adev->gfx.pfp_feature_version >= 46) { | ||
| 961 | adev->virt.chained_ib_support = true; | ||
| 962 | DRM_INFO("Chained IB support enabled!\n"); | ||
| 963 | } else | ||
| 964 | adev->virt.chained_ib_support = false; | ||
| 965 | |||
| 963 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); | 966 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); |
| 964 | err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); | 967 | err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); |
| 965 | if (err) | 968 | if (err) |
| @@ -1373,64 +1376,22 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) | |||
| 1373 | } | 1376 | } |
| 1374 | } | 1377 | } |
| 1375 | 1378 | ||
| 1376 | static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, | ||
| 1377 | struct amdgpu_ring *ring, | ||
| 1378 | struct amdgpu_irq_src *irq) | ||
| 1379 | { | ||
| 1380 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 1381 | int r = 0; | ||
| 1382 | |||
| 1383 | r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); | ||
| 1384 | if (r) | ||
| 1385 | return r; | ||
| 1386 | |||
| 1387 | ring->adev = NULL; | ||
| 1388 | ring->ring_obj = NULL; | ||
| 1389 | ring->use_doorbell = true; | ||
| 1390 | ring->doorbell_index = AMDGPU_DOORBELL_KIQ; | ||
| 1391 | if (adev->gfx.mec2_fw) { | ||
| 1392 | ring->me = 2; | ||
| 1393 | ring->pipe = 0; | ||
| 1394 | } else { | ||
| 1395 | ring->me = 1; | ||
| 1396 | ring->pipe = 1; | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | ring->queue = 0; | ||
| 1400 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | ||
| 1401 | sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 1402 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 1403 | irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); | ||
| 1404 | if (r) | ||
| 1405 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); | ||
| 1406 | |||
| 1407 | return r; | ||
| 1408 | } | ||
| 1409 | static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, | ||
| 1410 | struct amdgpu_irq_src *irq) | ||
| 1411 | { | ||
| 1412 | amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); | ||
| 1413 | amdgpu_ring_fini(ring); | ||
| 1414 | } | ||
| 1415 | |||
| 1416 | #define MEC_HPD_SIZE 2048 | ||
| 1417 | |||
| 1418 | static int gfx_v8_0_mec_init(struct amdgpu_device *adev) | 1379 | static int gfx_v8_0_mec_init(struct amdgpu_device *adev) |
| 1419 | { | 1380 | { |
| 1420 | int r; | 1381 | int r; |
| 1421 | u32 *hpd; | 1382 | u32 *hpd; |
| 1383 | size_t mec_hpd_size; | ||
| 1422 | 1384 | ||
| 1423 | /* | 1385 | bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
| 1424 | * we assign only 1 pipe because all other pipes will | 1386 | |
| 1425 | * be handled by KFD | 1387 | /* take ownership of the relevant compute queues */ |
| 1426 | */ | 1388 | amdgpu_gfx_compute_queue_acquire(adev); |
| 1427 | adev->gfx.mec.num_mec = 1; | 1389 | |
| 1428 | adev->gfx.mec.num_pipe = 1; | 1390 | mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; |
| 1429 | adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; | ||
| 1430 | 1391 | ||
| 1431 | if (adev->gfx.mec.hpd_eop_obj == NULL) { | 1392 | if (adev->gfx.mec.hpd_eop_obj == NULL) { |
| 1432 | r = amdgpu_bo_create(adev, | 1393 | r = amdgpu_bo_create(adev, |
| 1433 | adev->gfx.mec.num_queue * MEC_HPD_SIZE, | 1394 | mec_hpd_size, |
| 1434 | PAGE_SIZE, true, | 1395 | PAGE_SIZE, true, |
| 1435 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, | 1396 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, |
| 1436 | &adev->gfx.mec.hpd_eop_obj); | 1397 | &adev->gfx.mec.hpd_eop_obj); |
| @@ -1459,7 +1420,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) | |||
| 1459 | return r; | 1420 | return r; |
| 1460 | } | 1421 | } |
| 1461 | 1422 | ||
| 1462 | memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); | 1423 | memset(hpd, 0, mec_hpd_size); |
| 1463 | 1424 | ||
| 1464 | amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); | 1425 | amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); |
| 1465 | amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); | 1426 | amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); |
| @@ -1467,38 +1428,6 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) | |||
| 1467 | return 0; | 1428 | return 0; |
| 1468 | } | 1429 | } |
| 1469 | 1430 | ||
| 1470 | static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) | ||
| 1471 | { | ||
| 1472 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 1473 | |||
| 1474 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) | ||
| 1478 | { | ||
| 1479 | int r; | ||
| 1480 | u32 *hpd; | ||
| 1481 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 1482 | |||
| 1483 | r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, | ||
| 1484 | AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, | ||
| 1485 | &kiq->eop_gpu_addr, (void **)&hpd); | ||
| 1486 | if (r) { | ||
| 1487 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); | ||
| 1488 | return r; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | memset(hpd, 0, MEC_HPD_SIZE); | ||
| 1492 | |||
| 1493 | r = amdgpu_bo_reserve(kiq->eop_obj, true); | ||
| 1494 | if (unlikely(r != 0)) | ||
| 1495 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); | ||
| 1496 | amdgpu_bo_kunmap(kiq->eop_obj); | ||
| 1497 | amdgpu_bo_unreserve(kiq->eop_obj); | ||
| 1498 | |||
| 1499 | return 0; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | static const u32 vgpr_init_compute_shader[] = | 1431 | static const u32 vgpr_init_compute_shader[] = |
| 1503 | { | 1432 | { |
| 1504 | 0x7e000209, 0x7e020208, | 1433 | 0x7e000209, 0x7e020208, |
| @@ -1907,46 +1836,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 1907 | adev->gfx.config.max_tile_pipes = 2; | 1836 | adev->gfx.config.max_tile_pipes = 2; |
| 1908 | adev->gfx.config.max_sh_per_se = 1; | 1837 | adev->gfx.config.max_sh_per_se = 1; |
| 1909 | adev->gfx.config.max_backends_per_se = 2; | 1838 | adev->gfx.config.max_backends_per_se = 2; |
| 1910 | 1839 | adev->gfx.config.max_cu_per_sh = 8; | |
| 1911 | switch (adev->pdev->revision) { | ||
| 1912 | case 0xc4: | ||
| 1913 | case 0x84: | ||
| 1914 | case 0xc8: | ||
| 1915 | case 0xcc: | ||
| 1916 | case 0xe1: | ||
| 1917 | case 0xe3: | ||
| 1918 | /* B10 */ | ||
| 1919 | adev->gfx.config.max_cu_per_sh = 8; | ||
| 1920 | break; | ||
| 1921 | case 0xc5: | ||
| 1922 | case 0x81: | ||
| 1923 | case 0x85: | ||
| 1924 | case 0xc9: | ||
| 1925 | case 0xcd: | ||
| 1926 | case 0xe2: | ||
| 1927 | case 0xe4: | ||
| 1928 | /* B8 */ | ||
| 1929 | adev->gfx.config.max_cu_per_sh = 6; | ||
| 1930 | break; | ||
| 1931 | case 0xc6: | ||
| 1932 | case 0xca: | ||
| 1933 | case 0xce: | ||
| 1934 | case 0x88: | ||
| 1935 | case 0xe6: | ||
| 1936 | /* B6 */ | ||
| 1937 | adev->gfx.config.max_cu_per_sh = 6; | ||
| 1938 | break; | ||
| 1939 | case 0xc7: | ||
| 1940 | case 0x87: | ||
| 1941 | case 0xcb: | ||
| 1942 | case 0xe5: | ||
| 1943 | case 0x89: | ||
| 1944 | default: | ||
| 1945 | /* B4 */ | ||
| 1946 | adev->gfx.config.max_cu_per_sh = 4; | ||
| 1947 | break; | ||
| 1948 | } | ||
| 1949 | |||
| 1950 | adev->gfx.config.max_texture_channel_caches = 2; | 1840 | adev->gfx.config.max_texture_channel_caches = 2; |
| 1951 | adev->gfx.config.max_gprs = 256; | 1841 | adev->gfx.config.max_gprs = 256; |
| 1952 | adev->gfx.config.max_gs_threads = 32; | 1842 | adev->gfx.config.max_gs_threads = 32; |
| @@ -1963,35 +1853,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 1963 | adev->gfx.config.max_tile_pipes = 2; | 1853 | adev->gfx.config.max_tile_pipes = 2; |
| 1964 | adev->gfx.config.max_sh_per_se = 1; | 1854 | adev->gfx.config.max_sh_per_se = 1; |
| 1965 | adev->gfx.config.max_backends_per_se = 1; | 1855 | adev->gfx.config.max_backends_per_se = 1; |
| 1966 | 1856 | adev->gfx.config.max_cu_per_sh = 3; | |
| 1967 | switch (adev->pdev->revision) { | ||
| 1968 | case 0x80: | ||
| 1969 | case 0x81: | ||
| 1970 | case 0xc0: | ||
| 1971 | case 0xc1: | ||
| 1972 | case 0xc2: | ||
| 1973 | case 0xc4: | ||
| 1974 | case 0xc8: | ||
| 1975 | case 0xc9: | ||
| 1976 | case 0xd6: | ||
| 1977 | case 0xda: | ||
| 1978 | case 0xe9: | ||
| 1979 | case 0xea: | ||
| 1980 | adev->gfx.config.max_cu_per_sh = 3; | ||
| 1981 | break; | ||
| 1982 | case 0x83: | ||
| 1983 | case 0xd0: | ||
| 1984 | case 0xd1: | ||
| 1985 | case 0xd2: | ||
| 1986 | case 0xd4: | ||
| 1987 | case 0xdb: | ||
| 1988 | case 0xe1: | ||
| 1989 | case 0xe2: | ||
| 1990 | default: | ||
| 1991 | adev->gfx.config.max_cu_per_sh = 2; | ||
| 1992 | break; | ||
| 1993 | } | ||
| 1994 | |||
| 1995 | adev->gfx.config.max_texture_channel_caches = 2; | 1857 | adev->gfx.config.max_texture_channel_caches = 2; |
| 1996 | adev->gfx.config.max_gprs = 256; | 1858 | adev->gfx.config.max_gprs = 256; |
| 1997 | adev->gfx.config.max_gs_threads = 16; | 1859 | adev->gfx.config.max_gs_threads = 16; |
| @@ -2083,13 +1945,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 2083 | return 0; | 1945 | return 0; |
| 2084 | } | 1946 | } |
| 2085 | 1947 | ||
| 1948 | static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, | ||
| 1949 | int mec, int pipe, int queue) | ||
| 1950 | { | ||
| 1951 | int r; | ||
| 1952 | unsigned irq_type; | ||
| 1953 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; | ||
| 1954 | |||
| 1955 | ring = &adev->gfx.compute_ring[ring_id]; | ||
| 1956 | |||
| 1957 | /* mec0 is me1 */ | ||
| 1958 | ring->me = mec + 1; | ||
| 1959 | ring->pipe = pipe; | ||
| 1960 | ring->queue = queue; | ||
| 1961 | |||
| 1962 | ring->ring_obj = NULL; | ||
| 1963 | ring->use_doorbell = true; | ||
| 1964 | ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; | ||
| 1965 | ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr | ||
| 1966 | + (ring_id * GFX8_MEC_HPD_SIZE); | ||
| 1967 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 1968 | |||
| 1969 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP | ||
| 1970 | + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) | ||
| 1971 | + ring->pipe; | ||
| 1972 | |||
| 1973 | /* type-2 packets are deprecated on MEC, use type-3 instead */ | ||
| 1974 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 1975 | &adev->gfx.eop_irq, irq_type); | ||
| 1976 | if (r) | ||
| 1977 | return r; | ||
| 1978 | |||
| 1979 | |||
| 1980 | return 0; | ||
| 1981 | } | ||
| 1982 | |||
| 2086 | static int gfx_v8_0_sw_init(void *handle) | 1983 | static int gfx_v8_0_sw_init(void *handle) |
| 2087 | { | 1984 | { |
| 2088 | int i, r; | 1985 | int i, j, k, r, ring_id; |
| 2089 | struct amdgpu_ring *ring; | 1986 | struct amdgpu_ring *ring; |
| 2090 | struct amdgpu_kiq *kiq; | 1987 | struct amdgpu_kiq *kiq; |
| 2091 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1988 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 2092 | 1989 | ||
| 1990 | switch (adev->asic_type) { | ||
| 1991 | case CHIP_FIJI: | ||
| 1992 | case CHIP_TONGA: | ||
| 1993 | case CHIP_POLARIS11: | ||
| 1994 | case CHIP_POLARIS12: | ||
| 1995 | case CHIP_POLARIS10: | ||
| 1996 | case CHIP_CARRIZO: | ||
| 1997 | adev->gfx.mec.num_mec = 2; | ||
| 1998 | break; | ||
| 1999 | case CHIP_TOPAZ: | ||
| 2000 | case CHIP_STONEY: | ||
| 2001 | default: | ||
| 2002 | adev->gfx.mec.num_mec = 1; | ||
| 2003 | break; | ||
| 2004 | } | ||
| 2005 | |||
| 2006 | adev->gfx.mec.num_pipe_per_mec = 4; | ||
| 2007 | adev->gfx.mec.num_queue_per_pipe = 8; | ||
| 2008 | |||
| 2093 | /* KIQ event */ | 2009 | /* KIQ event */ |
| 2094 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); | 2010 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); |
| 2095 | if (r) | 2011 | if (r) |
| @@ -2151,49 +2067,41 @@ static int gfx_v8_0_sw_init(void *handle) | |||
| 2151 | return r; | 2067 | return r; |
| 2152 | } | 2068 | } |
| 2153 | 2069 | ||
| 2154 | /* set up the compute queues */ | ||
| 2155 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 2156 | unsigned irq_type; | ||
| 2157 | 2070 | ||
| 2158 | /* max 32 queues per MEC */ | 2071 | /* set up the compute queues - allocate horizontally across pipes */ |
| 2159 | if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { | 2072 | ring_id = 0; |
| 2160 | DRM_ERROR("Too many (%d) compute rings!\n", i); | 2073 | for (i = 0; i < adev->gfx.mec.num_mec; ++i) { |
| 2161 | break; | 2074 | for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { |
| 2075 | for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { | ||
| 2076 | if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) | ||
| 2077 | continue; | ||
| 2078 | |||
| 2079 | r = gfx_v8_0_compute_ring_init(adev, | ||
| 2080 | ring_id, | ||
| 2081 | i, k, j); | ||
| 2082 | if (r) | ||
| 2083 | return r; | ||
| 2084 | |||
| 2085 | ring_id++; | ||
| 2086 | } | ||
| 2162 | } | 2087 | } |
| 2163 | ring = &adev->gfx.compute_ring[i]; | ||
| 2164 | ring->ring_obj = NULL; | ||
| 2165 | ring->use_doorbell = true; | ||
| 2166 | ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; | ||
| 2167 | ring->me = 1; /* first MEC */ | ||
| 2168 | ring->pipe = i / 8; | ||
| 2169 | ring->queue = i % 8; | ||
| 2170 | ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); | ||
| 2171 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 2172 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; | ||
| 2173 | /* type-2 packets are deprecated on MEC, use type-3 instead */ | ||
| 2174 | r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, | ||
| 2175 | irq_type); | ||
| 2176 | if (r) | ||
| 2177 | return r; | ||
| 2178 | } | 2088 | } |
| 2179 | 2089 | ||
| 2180 | if (amdgpu_sriov_vf(adev)) { | 2090 | r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); |
| 2181 | r = gfx_v8_0_kiq_init(adev); | 2091 | if (r) { |
| 2182 | if (r) { | 2092 | DRM_ERROR("Failed to init KIQ BOs!\n"); |
| 2183 | DRM_ERROR("Failed to init KIQ BOs!\n"); | 2093 | return r; |
| 2184 | return r; | 2094 | } |
| 2185 | } | ||
| 2186 | 2095 | ||
| 2187 | kiq = &adev->gfx.kiq; | 2096 | kiq = &adev->gfx.kiq; |
| 2188 | r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); | 2097 | r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); |
| 2189 | if (r) | 2098 | if (r) |
| 2190 | return r; | 2099 | return r; |
| 2191 | 2100 | ||
| 2192 | /* create MQD for all compute queues as wel as KIQ for SRIOV case */ | 2101 | /* create MQD for all compute queues as well as KIQ for SRIOV case */ |
| 2193 | r = gfx_v8_0_compute_mqd_sw_init(adev); | 2102 | r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); |
| 2194 | if (r) | 2103 | if (r) |
| 2195 | return r; | 2104 | return r; |
| 2196 | } | ||
| 2197 | 2105 | ||
| 2198 | /* reserve GDS, GWS and OA resource for gfx */ | 2106 | /* reserve GDS, GWS and OA resource for gfx */ |
| 2199 | r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, | 2107 | r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, |
| @@ -2237,11 +2145,9 @@ static int gfx_v8_0_sw_fini(void *handle) | |||
| 2237 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 2145 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
| 2238 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); | 2146 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
| 2239 | 2147 | ||
| 2240 | if (amdgpu_sriov_vf(adev)) { | 2148 | amdgpu_gfx_compute_mqd_sw_fini(adev); |
| 2241 | gfx_v8_0_compute_mqd_sw_fini(adev); | 2149 | amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); |
| 2242 | gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); | 2150 | amdgpu_gfx_kiq_fini(adev); |
| 2243 | gfx_v8_0_kiq_fini(adev); | ||
| 2244 | } | ||
| 2245 | 2151 | ||
| 2246 | gfx_v8_0_mec_fini(adev); | 2152 | gfx_v8_0_mec_fini(adev); |
| 2247 | gfx_v8_0_rlc_fini(adev); | 2153 | gfx_v8_0_rlc_fini(adev); |
| @@ -3594,11 +3500,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, | |||
| 3594 | WREG32(mmGRBM_GFX_INDEX, data); | 3500 | WREG32(mmGRBM_GFX_INDEX, data); |
| 3595 | } | 3501 | } |
| 3596 | 3502 | ||
| 3597 | static u32 gfx_v8_0_create_bitmask(u32 bit_width) | ||
| 3598 | { | ||
| 3599 | return (u32)((1ULL << bit_width) - 1); | ||
| 3600 | } | ||
| 3601 | |||
| 3602 | static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) | 3503 | static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) |
| 3603 | { | 3504 | { |
| 3604 | u32 data, mask; | 3505 | u32 data, mask; |
| @@ -3608,8 +3509,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) | |||
| 3608 | 3509 | ||
| 3609 | data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); | 3510 | data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); |
| 3610 | 3511 | ||
| 3611 | mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / | 3512 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / |
| 3612 | adev->gfx.config.max_sh_per_se); | 3513 | adev->gfx.config.max_sh_per_se); |
| 3613 | 3514 | ||
| 3614 | return (~data) & mask; | 3515 | return (~data) & mask; |
| 3615 | } | 3516 | } |
| @@ -3823,7 +3724,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) | |||
| 3823 | /** | 3724 | /** |
| 3824 | * gfx_v8_0_init_compute_vmid - gart enable | 3725 | * gfx_v8_0_init_compute_vmid - gart enable |
| 3825 | * | 3726 | * |
| 3826 | * @rdev: amdgpu_device pointer | 3727 | * @adev: amdgpu_device pointer |
| 3827 | * | 3728 | * |
| 3828 | * Initialize compute vmid sh_mem registers | 3729 | * Initialize compute vmid sh_mem registers |
| 3829 | * | 3730 | * |
| @@ -4481,6 +4382,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) | |||
| 4481 | 4382 | ||
| 4482 | return 0; | 4383 | return 0; |
| 4483 | } | 4384 | } |
| 4385 | static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) | ||
| 4386 | { | ||
| 4387 | u32 tmp; | ||
| 4388 | /* no gfx doorbells on iceland */ | ||
| 4389 | if (adev->asic_type == CHIP_TOPAZ) | ||
| 4390 | return; | ||
| 4391 | |||
| 4392 | tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); | ||
| 4393 | |||
| 4394 | if (ring->use_doorbell) { | ||
| 4395 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4396 | DOORBELL_OFFSET, ring->doorbell_index); | ||
| 4397 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4398 | DOORBELL_HIT, 0); | ||
| 4399 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4400 | DOORBELL_EN, 1); | ||
| 4401 | } else { | ||
| 4402 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); | ||
| 4403 | } | ||
| 4404 | |||
| 4405 | WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); | ||
| 4406 | |||
| 4407 | if (adev->flags & AMD_IS_APU) | ||
| 4408 | return; | ||
| 4409 | |||
| 4410 | tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, | ||
| 4411 | DOORBELL_RANGE_LOWER, | ||
| 4412 | AMDGPU_DOORBELL_GFX_RING0); | ||
| 4413 | WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); | ||
| 4414 | |||
| 4415 | WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, | ||
| 4416 | CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); | ||
| 4417 | } | ||
| 4484 | 4418 | ||
| 4485 | static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) | 4419 | static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) |
| 4486 | { | 4420 | { |
| @@ -4528,34 +4462,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) | |||
| 4528 | WREG32(mmCP_RB0_BASE, rb_addr); | 4462 | WREG32(mmCP_RB0_BASE, rb_addr); |
| 4529 | WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); | 4463 | WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); |
| 4530 | 4464 | ||
| 4531 | /* no gfx doorbells on iceland */ | 4465 | gfx_v8_0_set_cpg_door_bell(adev, ring); |
| 4532 | if (adev->asic_type != CHIP_TOPAZ) { | ||
| 4533 | tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); | ||
| 4534 | if (ring->use_doorbell) { | ||
| 4535 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4536 | DOORBELL_OFFSET, ring->doorbell_index); | ||
| 4537 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4538 | DOORBELL_HIT, 0); | ||
| 4539 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4540 | DOORBELL_EN, 1); | ||
| 4541 | } else { | ||
| 4542 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, | ||
| 4543 | DOORBELL_EN, 0); | ||
| 4544 | } | ||
| 4545 | WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); | ||
| 4546 | |||
| 4547 | if (adev->asic_type == CHIP_TONGA) { | ||
| 4548 | tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, | ||
| 4549 | DOORBELL_RANGE_LOWER, | ||
| 4550 | AMDGPU_DOORBELL_GFX_RING0); | ||
| 4551 | WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); | ||
| 4552 | |||
| 4553 | WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, | ||
| 4554 | CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); | ||
| 4555 | } | ||
| 4556 | |||
| 4557 | } | ||
| 4558 | |||
| 4559 | /* start the ring */ | 4466 | /* start the ring */ |
| 4560 | amdgpu_ring_clear_ring(ring); | 4467 | amdgpu_ring_clear_ring(ring); |
| 4561 | gfx_v8_0_cp_gfx_start(adev); | 4468 | gfx_v8_0_cp_gfx_start(adev); |
| @@ -4628,29 +4535,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) | |||
| 4628 | return 0; | 4535 | return 0; |
| 4629 | } | 4536 | } |
| 4630 | 4537 | ||
| 4631 | static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) | ||
| 4632 | { | ||
| 4633 | int i, r; | ||
| 4634 | |||
| 4635 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 4636 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | ||
| 4637 | |||
| 4638 | if (ring->mqd_obj) { | ||
| 4639 | r = amdgpu_bo_reserve(ring->mqd_obj, false); | ||
| 4640 | if (unlikely(r != 0)) | ||
| 4641 | dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); | ||
| 4642 | |||
| 4643 | amdgpu_bo_unpin(ring->mqd_obj); | ||
| 4644 | amdgpu_bo_unreserve(ring->mqd_obj); | ||
| 4645 | |||
| 4646 | amdgpu_bo_unref(&ring->mqd_obj); | ||
| 4647 | ring->mqd_obj = NULL; | ||
| 4648 | ring->mqd_ptr = NULL; | ||
| 4649 | ring->mqd_gpu_addr = 0; | ||
| 4650 | } | ||
| 4651 | } | ||
| 4652 | } | ||
| 4653 | |||
| 4654 | /* KIQ functions */ | 4538 | /* KIQ functions */ |
| 4655 | static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) | 4539 | static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) |
| 4656 | { | 4540 | { |
| @@ -4666,45 +4550,111 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) | |||
| 4666 | WREG32(mmRLC_CP_SCHEDULERS, tmp); | 4550 | WREG32(mmRLC_CP_SCHEDULERS, tmp); |
| 4667 | } | 4551 | } |
| 4668 | 4552 | ||
| 4669 | static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) | 4553 | static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) |
| 4670 | { | 4554 | { |
| 4671 | amdgpu_ring_alloc(ring, 8); | 4555 | struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; |
| 4556 | uint32_t scratch, tmp = 0; | ||
| 4557 | uint64_t queue_mask = 0; | ||
| 4558 | int r, i; | ||
| 4559 | |||
| 4560 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { | ||
| 4561 | if (!test_bit(i, adev->gfx.mec.queue_bitmap)) | ||
| 4562 | continue; | ||
| 4563 | |||
| 4564 | /* This situation may be hit in the future if a new HW | ||
| 4565 | * generation exposes more than 64 queues. If so, the | ||
| 4566 | * definition of queue_mask needs updating */ | ||
| 4567 | if (WARN_ON(i > (sizeof(queue_mask)*8))) { | ||
| 4568 | DRM_ERROR("Invalid KCQ enabled: %d\n", i); | ||
| 4569 | break; | ||
| 4570 | } | ||
| 4571 | |||
| 4572 | queue_mask |= (1ull << i); | ||
| 4573 | } | ||
| 4574 | |||
| 4575 | r = amdgpu_gfx_scratch_get(adev, &scratch); | ||
| 4576 | if (r) { | ||
| 4577 | DRM_ERROR("Failed to get scratch reg (%d).\n", r); | ||
| 4578 | return r; | ||
| 4579 | } | ||
| 4580 | WREG32(scratch, 0xCAFEDEAD); | ||
| 4581 | |||
| 4582 | r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); | ||
| 4583 | if (r) { | ||
| 4584 | DRM_ERROR("Failed to lock KIQ (%d).\n", r); | ||
| 4585 | amdgpu_gfx_scratch_free(adev, scratch); | ||
| 4586 | return r; | ||
| 4587 | } | ||
| 4672 | /* set resources */ | 4588 | /* set resources */ |
| 4673 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); | 4589 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); |
| 4674 | amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ | 4590 | amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ |
| 4675 | amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ | 4591 | amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ |
| 4676 | amdgpu_ring_write(ring, 0); /* queue mask hi */ | 4592 | amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ |
| 4677 | amdgpu_ring_write(ring, 0); /* gws mask lo */ | 4593 | amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ |
| 4678 | amdgpu_ring_write(ring, 0); /* gws mask hi */ | 4594 | amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ |
| 4679 | amdgpu_ring_write(ring, 0); /* oac mask */ | 4595 | amdgpu_ring_write(kiq_ring, 0); /* oac mask */ |
| 4680 | amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ | 4596 | amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ |
| 4681 | amdgpu_ring_commit(ring); | 4597 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 4682 | udelay(50); | 4598 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; |
| 4599 | uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); | ||
| 4600 | uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
| 4601 | |||
| 4602 | /* map queues */ | ||
| 4603 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); | ||
| 4604 | /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ | ||
| 4605 | amdgpu_ring_write(kiq_ring, | ||
| 4606 | PACKET3_MAP_QUEUES_NUM_QUEUES(1)); | ||
| 4607 | amdgpu_ring_write(kiq_ring, | ||
| 4608 | PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | | ||
| 4609 | PACKET3_MAP_QUEUES_QUEUE(ring->queue) | | ||
| 4610 | PACKET3_MAP_QUEUES_PIPE(ring->pipe) | | ||
| 4611 | PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ | ||
| 4612 | amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); | ||
| 4613 | amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); | ||
| 4614 | amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); | ||
| 4615 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); | ||
| 4616 | } | ||
| 4617 | /* write to scratch for completion */ | ||
| 4618 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | ||
| 4619 | amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | ||
| 4620 | amdgpu_ring_write(kiq_ring, 0xDEADBEEF); | ||
| 4621 | amdgpu_ring_commit(kiq_ring); | ||
| 4622 | |||
| 4623 | for (i = 0; i < adev->usec_timeout; i++) { | ||
| 4624 | tmp = RREG32(scratch); | ||
| 4625 | if (tmp == 0xDEADBEEF) | ||
| 4626 | break; | ||
| 4627 | DRM_UDELAY(1); | ||
| 4628 | } | ||
| 4629 | if (i >= adev->usec_timeout) { | ||
| 4630 | DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", | ||
| 4631 | scratch, tmp); | ||
| 4632 | r = -EINVAL; | ||
| 4633 | } | ||
| 4634 | amdgpu_gfx_scratch_free(adev, scratch); | ||
| 4635 | |||
| 4636 | return r; | ||
| 4683 | } | 4637 | } |
| 4684 | 4638 | ||
| 4685 | static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, | 4639 | static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) |
| 4686 | struct amdgpu_ring *ring) | ||
| 4687 | { | 4640 | { |
| 4688 | struct amdgpu_device *adev = kiq_ring->adev; | 4641 | int i, r = 0; |
| 4689 | uint64_t mqd_addr, wptr_addr; | ||
| 4690 | 4642 | ||
| 4691 | mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); | 4643 | if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { |
| 4692 | wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | 4644 | WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); |
| 4693 | amdgpu_ring_alloc(kiq_ring, 8); | 4645 | for (i = 0; i < adev->usec_timeout; i++) { |
| 4646 | if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) | ||
| 4647 | break; | ||
| 4648 | udelay(1); | ||
| 4649 | } | ||
| 4650 | if (i == adev->usec_timeout) | ||
| 4651 | r = -ETIMEDOUT; | ||
| 4652 | } | ||
| 4653 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); | ||
| 4654 | WREG32(mmCP_HQD_PQ_RPTR, 0); | ||
| 4655 | WREG32(mmCP_HQD_PQ_WPTR, 0); | ||
| 4694 | 4656 | ||
| 4695 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); | 4657 | return r; |
| 4696 | /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ | ||
| 4697 | amdgpu_ring_write(kiq_ring, 0x21010000); | ||
| 4698 | amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | | ||
| 4699 | (ring->queue << 26) | | ||
| 4700 | (ring->pipe << 29) | | ||
| 4701 | ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ | ||
| 4702 | amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); | ||
| 4703 | amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); | ||
| 4704 | amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); | ||
| 4705 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); | ||
| 4706 | amdgpu_ring_commit(kiq_ring); | ||
| 4707 | udelay(50); | ||
| 4708 | } | 4658 | } |
| 4709 | 4659 | ||
| 4710 | static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) | 4660 | static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) |
| @@ -4721,7 +4671,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) | |||
| 4721 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; | 4671 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; |
| 4722 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; | 4672 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; |
| 4723 | mqd->compute_misc_reserved = 0x00000003; | 4673 | mqd->compute_misc_reserved = 0x00000003; |
| 4724 | 4674 | if (!(adev->flags & AMD_IS_APU)) { | |
| 4675 | mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr | ||
| 4676 | + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); | ||
| 4677 | mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr | ||
| 4678 | + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); | ||
| 4679 | } | ||
| 4725 | eop_base_addr = ring->eop_gpu_addr >> 8; | 4680 | eop_base_addr = ring->eop_gpu_addr >> 8; |
| 4726 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; | 4681 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; |
| 4727 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); | 4682 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); |
| @@ -4729,7 +4684,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) | |||
| 4729 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ | 4684 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ |
| 4730 | tmp = RREG32(mmCP_HQD_EOP_CONTROL); | 4685 | tmp = RREG32(mmCP_HQD_EOP_CONTROL); |
| 4731 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, | 4686 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, |
| 4732 | (order_base_2(MEC_HPD_SIZE / 4) - 1)); | 4687 | (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); |
| 4733 | 4688 | ||
| 4734 | mqd->cp_hqd_eop_control = tmp; | 4689 | mqd->cp_hqd_eop_control = tmp; |
| 4735 | 4690 | ||
| @@ -4741,11 +4696,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) | |||
| 4741 | 4696 | ||
| 4742 | mqd->cp_hqd_pq_doorbell_control = tmp; | 4697 | mqd->cp_hqd_pq_doorbell_control = tmp; |
| 4743 | 4698 | ||
| 4744 | /* disable the queue if it's active */ | ||
| 4745 | mqd->cp_hqd_dequeue_request = 0; | ||
| 4746 | mqd->cp_hqd_pq_rptr = 0; | ||
| 4747 | mqd->cp_hqd_pq_wptr = 0; | ||
| 4748 | |||
| 4749 | /* set the pointer to the MQD */ | 4699 | /* set the pointer to the MQD */ |
| 4750 | mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; | 4700 | mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; |
| 4751 | mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); | 4701 | mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); |
| @@ -4815,149 +4765,160 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) | |||
| 4815 | tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); | 4765 | tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); |
| 4816 | mqd->cp_hqd_persistent_state = tmp; | 4766 | mqd->cp_hqd_persistent_state = tmp; |
| 4817 | 4767 | ||
| 4768 | /* set MTYPE */ | ||
| 4769 | tmp = RREG32(mmCP_HQD_IB_CONTROL); | ||
| 4770 | tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); | ||
| 4771 | tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); | ||
| 4772 | mqd->cp_hqd_ib_control = tmp; | ||
| 4773 | |||
| 4774 | tmp = RREG32(mmCP_HQD_IQ_TIMER); | ||
| 4775 | tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); | ||
| 4776 | mqd->cp_hqd_iq_timer = tmp; | ||
| 4777 | |||
| 4778 | tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); | ||
| 4779 | tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); | ||
| 4780 | mqd->cp_hqd_ctx_save_control = tmp; | ||
| 4781 | |||
| 4782 | /* defaults */ | ||
| 4783 | mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); | ||
| 4784 | mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); | ||
| 4785 | mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); | ||
| 4786 | mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); | ||
| 4787 | mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); | ||
| 4788 | mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); | ||
| 4789 | mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); | ||
| 4790 | mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); | ||
| 4791 | mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); | ||
| 4792 | mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); | ||
| 4793 | mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); | ||
| 4794 | mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); | ||
| 4795 | mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); | ||
| 4796 | mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); | ||
| 4797 | mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); | ||
| 4798 | |||
| 4818 | /* activate the queue */ | 4799 | /* activate the queue */ |
| 4819 | mqd->cp_hqd_active = 1; | 4800 | mqd->cp_hqd_active = 1; |
| 4820 | 4801 | ||
| 4821 | return 0; | 4802 | return 0; |
| 4822 | } | 4803 | } |
| 4823 | 4804 | ||
| 4824 | static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) | 4805 | int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, |
| 4806 | struct vi_mqd *mqd) | ||
| 4825 | { | 4807 | { |
| 4826 | struct amdgpu_device *adev = ring->adev; | 4808 | uint32_t mqd_reg; |
| 4827 | struct vi_mqd *mqd = ring->mqd_ptr; | 4809 | uint32_t *mqd_data; |
| 4828 | int j; | 4810 | |
| 4811 | /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ | ||
| 4812 | mqd_data = &mqd->cp_mqd_base_addr_lo; | ||
| 4829 | 4813 | ||
| 4830 | /* disable wptr polling */ | 4814 | /* disable wptr polling */ |
| 4831 | WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); | 4815 | WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); |
| 4832 | 4816 | ||
| 4833 | WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); | 4817 | /* program all HQD registers */ |
| 4834 | WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); | 4818 | for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) |
| 4819 | WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); | ||
| 4835 | 4820 | ||
| 4836 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ | 4821 | /* Tonga errata: EOP RPTR/WPTR should be left unmodified. |
| 4837 | WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); | 4822 | * This is safe since EOP RPTR==WPTR for any inactive HQD |
| 4838 | 4823 | * on ASICs that do not support context-save. | |
| 4839 | /* enable doorbell? */ | 4824 | * EOP writes/reads can start anywhere in the ring. |
| 4840 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); | 4825 | */ |
| 4841 | 4826 | if (adev->asic_type != CHIP_TONGA) { | |
| 4842 | /* disable the queue if it's active */ | 4827 | WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); |
| 4843 | if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { | 4828 | WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); |
| 4844 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); | 4829 | WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); |
| 4845 | for (j = 0; j < adev->usec_timeout; j++) { | ||
| 4846 | if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) | ||
| 4847 | break; | ||
| 4848 | udelay(1); | ||
| 4849 | } | ||
| 4850 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); | ||
| 4851 | WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); | ||
| 4852 | WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); | ||
| 4853 | } | 4830 | } |
| 4854 | 4831 | ||
| 4855 | /* set the pointer to the MQD */ | 4832 | for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) |
| 4856 | WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); | 4833 | WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); |
| 4857 | WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); | ||
| 4858 | |||
| 4859 | /* set MQD vmid to 0 */ | ||
| 4860 | WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); | ||
| 4861 | 4834 | ||
| 4862 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ | 4835 | /* activate the HQD */ |
| 4863 | WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); | 4836 | for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) |
| 4864 | WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); | 4837 | WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); |
| 4865 | 4838 | ||
| 4866 | /* set up the HQD, this is similar to CP_RB0_CNTL */ | 4839 | return 0; |
| 4867 | WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); | 4840 | } |
| 4868 | |||
| 4869 | /* set the wb address whether it's enabled or not */ | ||
| 4870 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, | ||
| 4871 | mqd->cp_hqd_pq_rptr_report_addr_lo); | ||
| 4872 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
| 4873 | mqd->cp_hqd_pq_rptr_report_addr_hi); | ||
| 4874 | |||
| 4875 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ | ||
| 4876 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); | ||
| 4877 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); | ||
| 4878 | |||
| 4879 | /* enable the doorbell if requested */ | ||
| 4880 | if (ring->use_doorbell) { | ||
| 4881 | if ((adev->asic_type == CHIP_CARRIZO) || | ||
| 4882 | (adev->asic_type == CHIP_FIJI) || | ||
| 4883 | (adev->asic_type == CHIP_STONEY)) { | ||
| 4884 | WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, | ||
| 4885 | AMDGPU_DOORBELL_KIQ << 2); | ||
| 4886 | WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, | ||
| 4887 | AMDGPU_DOORBELL_MEC_RING7 << 2); | ||
| 4888 | } | ||
| 4889 | } | ||
| 4890 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); | ||
| 4891 | 4841 | ||
| 4892 | /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ | 4842 | static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) |
| 4893 | WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); | 4843 | { |
| 4844 | struct amdgpu_device *adev = ring->adev; | ||
| 4845 | struct vi_mqd *mqd = ring->mqd_ptr; | ||
| 4846 | int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; | ||
| 4894 | 4847 | ||
| 4895 | /* set the vmid for the queue */ | 4848 | gfx_v8_0_kiq_setting(ring); |
| 4896 | WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); | ||
| 4897 | 4849 | ||
| 4898 | WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); | 4850 | if (adev->gfx.in_reset) { /* for GPU_RESET case */ |
| 4851 | /* reset MQD to a clean status */ | ||
| 4852 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | ||
| 4853 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); | ||
| 4899 | 4854 | ||
| 4900 | /* activate the queue */ | 4855 | /* reset ring buffer */ |
| 4901 | WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); | 4856 | ring->wptr = 0; |
| 4857 | amdgpu_ring_clear_ring(ring); | ||
| 4858 | mutex_lock(&adev->srbm_mutex); | ||
| 4859 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 4860 | gfx_v8_0_mqd_commit(adev, mqd); | ||
| 4861 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 4862 | mutex_unlock(&adev->srbm_mutex); | ||
| 4863 | } else { | ||
| 4864 | memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); | ||
| 4865 | ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; | ||
| 4866 | ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; | ||
| 4867 | mutex_lock(&adev->srbm_mutex); | ||
| 4868 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 4869 | gfx_v8_0_mqd_init(ring); | ||
| 4870 | gfx_v8_0_mqd_commit(adev, mqd); | ||
| 4871 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 4872 | mutex_unlock(&adev->srbm_mutex); | ||
| 4902 | 4873 | ||
| 4903 | if (ring->use_doorbell) | 4874 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
| 4904 | WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); | 4875 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); |
| 4876 | } | ||
| 4905 | 4877 | ||
| 4906 | return 0; | 4878 | return 0; |
| 4907 | } | 4879 | } |
| 4908 | 4880 | ||
| 4909 | static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) | 4881 | static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) |
| 4910 | { | 4882 | { |
| 4911 | struct amdgpu_device *adev = ring->adev; | 4883 | struct amdgpu_device *adev = ring->adev; |
| 4912 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 4913 | struct vi_mqd *mqd = ring->mqd_ptr; | 4884 | struct vi_mqd *mqd = ring->mqd_ptr; |
| 4914 | bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); | 4885 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; |
| 4915 | int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; | ||
| 4916 | 4886 | ||
| 4917 | if (is_kiq) { | 4887 | if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { |
| 4918 | gfx_v8_0_kiq_setting(&kiq->ring); | 4888 | memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); |
| 4919 | } else { | 4889 | ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; |
| 4920 | mqd_idx = ring - &adev->gfx.compute_ring[0]; | 4890 | ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; |
| 4921 | } | ||
| 4922 | |||
| 4923 | if (!adev->gfx.in_reset) { | ||
| 4924 | memset((void *)mqd, 0, sizeof(*mqd)); | ||
| 4925 | mutex_lock(&adev->srbm_mutex); | 4891 | mutex_lock(&adev->srbm_mutex); |
| 4926 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | 4892 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
| 4927 | gfx_v8_0_mqd_init(ring); | 4893 | gfx_v8_0_mqd_init(ring); |
| 4928 | if (is_kiq) | ||
| 4929 | gfx_v8_0_kiq_init_register(ring); | ||
| 4930 | vi_srbm_select(adev, 0, 0, 0, 0); | 4894 | vi_srbm_select(adev, 0, 0, 0, 0); |
| 4931 | mutex_unlock(&adev->srbm_mutex); | 4895 | mutex_unlock(&adev->srbm_mutex); |
| 4932 | 4896 | ||
| 4933 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 4897 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
| 4934 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); | 4898 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); |
| 4935 | } else { /* for GPU_RESET case */ | 4899 | } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ |
| 4936 | /* reset MQD to a clean status */ | 4900 | /* reset MQD to a clean status */ |
| 4937 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 4901 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
| 4938 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); | 4902 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); |
| 4939 | |||
| 4940 | /* reset ring buffer */ | 4903 | /* reset ring buffer */ |
| 4941 | ring->wptr = 0; | 4904 | ring->wptr = 0; |
| 4942 | amdgpu_ring_clear_ring(ring); | 4905 | amdgpu_ring_clear_ring(ring); |
| 4943 | 4906 | } else { | |
| 4944 | if (is_kiq) { | 4907 | amdgpu_ring_clear_ring(ring); |
| 4945 | mutex_lock(&adev->srbm_mutex); | ||
| 4946 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 4947 | gfx_v8_0_kiq_init_register(ring); | ||
| 4948 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 4949 | mutex_unlock(&adev->srbm_mutex); | ||
| 4950 | } | ||
| 4951 | } | 4908 | } |
| 4952 | |||
| 4953 | if (is_kiq) | ||
| 4954 | gfx_v8_0_kiq_enable(ring); | ||
| 4955 | else | ||
| 4956 | gfx_v8_0_map_queue_enable(&kiq->ring, ring); | ||
| 4957 | |||
| 4958 | return 0; | 4909 | return 0; |
| 4959 | } | 4910 | } |
| 4960 | 4911 | ||
| 4912 | static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) | ||
| 4913 | { | ||
| 4914 | if (adev->asic_type > CHIP_TONGA) { | ||
| 4915 | WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); | ||
| 4916 | WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); | ||
| 4917 | } | ||
| 4918 | /* enable doorbells */ | ||
| 4919 | WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); | ||
| 4920 | } | ||
| 4921 | |||
| 4961 | static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) | 4922 | static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) |
| 4962 | { | 4923 | { |
| 4963 | struct amdgpu_ring *ring = NULL; | 4924 | struct amdgpu_ring *ring = NULL; |
| @@ -4981,13 +4942,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) | |||
| 4981 | if (r) | 4942 | if (r) |
| 4982 | goto done; | 4943 | goto done; |
| 4983 | 4944 | ||
| 4984 | ring->ready = true; | ||
| 4985 | r = amdgpu_ring_test_ring(ring); | ||
| 4986 | if (r) { | ||
| 4987 | ring->ready = false; | ||
| 4988 | goto done; | ||
| 4989 | } | ||
| 4990 | |||
| 4991 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 4945 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 4992 | ring = &adev->gfx.compute_ring[i]; | 4946 | ring = &adev->gfx.compute_ring[i]; |
| 4993 | 4947 | ||
| @@ -4996,272 +4950,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) | |||
| 4996 | goto done; | 4950 | goto done; |
| 4997 | r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); | 4951 | r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); |
| 4998 | if (!r) { | 4952 | if (!r) { |
| 4999 | r = gfx_v8_0_kiq_init_queue(ring); | 4953 | r = gfx_v8_0_kcq_init_queue(ring); |
| 5000 | amdgpu_bo_kunmap(ring->mqd_obj); | 4954 | amdgpu_bo_kunmap(ring->mqd_obj); |
| 5001 | ring->mqd_ptr = NULL; | 4955 | ring->mqd_ptr = NULL; |
| 5002 | } | 4956 | } |
| 5003 | amdgpu_bo_unreserve(ring->mqd_obj); | 4957 | amdgpu_bo_unreserve(ring->mqd_obj); |
| 5004 | if (r) | 4958 | if (r) |
| 5005 | goto done; | 4959 | goto done; |
| 5006 | |||
| 5007 | ring->ready = true; | ||
| 5008 | r = amdgpu_ring_test_ring(ring); | ||
| 5009 | if (r) | ||
| 5010 | ring->ready = false; | ||
| 5011 | } | 4960 | } |
| 5012 | 4961 | ||
| 5013 | done: | 4962 | gfx_v8_0_set_mec_doorbell_range(adev); |
| 5014 | return r; | ||
| 5015 | } | ||
| 5016 | |||
| 5017 | static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) | ||
| 5018 | { | ||
| 5019 | int r, i, j; | ||
| 5020 | u32 tmp; | ||
| 5021 | bool use_doorbell = true; | ||
| 5022 | u64 hqd_gpu_addr; | ||
| 5023 | u64 mqd_gpu_addr; | ||
| 5024 | u64 eop_gpu_addr; | ||
| 5025 | u64 wb_gpu_addr; | ||
| 5026 | u32 *buf; | ||
| 5027 | struct vi_mqd *mqd; | ||
| 5028 | |||
| 5029 | /* init the queues. */ | ||
| 5030 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 5031 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | ||
| 5032 | |||
| 5033 | if (ring->mqd_obj == NULL) { | ||
| 5034 | r = amdgpu_bo_create(adev, | ||
| 5035 | sizeof(struct vi_mqd), | ||
| 5036 | PAGE_SIZE, true, | ||
| 5037 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, | ||
| 5038 | NULL, &ring->mqd_obj); | ||
| 5039 | if (r) { | ||
| 5040 | dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); | ||
| 5041 | return r; | ||
| 5042 | } | ||
| 5043 | } | ||
| 5044 | |||
| 5045 | r = amdgpu_bo_reserve(ring->mqd_obj, false); | ||
| 5046 | if (unlikely(r != 0)) { | ||
| 5047 | gfx_v8_0_cp_compute_fini(adev); | ||
| 5048 | return r; | ||
| 5049 | } | ||
| 5050 | r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, | ||
| 5051 | &mqd_gpu_addr); | ||
| 5052 | if (r) { | ||
| 5053 | dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); | ||
| 5054 | gfx_v8_0_cp_compute_fini(adev); | ||
| 5055 | return r; | ||
| 5056 | } | ||
| 5057 | r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); | ||
| 5058 | if (r) { | ||
| 5059 | dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); | ||
| 5060 | gfx_v8_0_cp_compute_fini(adev); | ||
| 5061 | return r; | ||
| 5062 | } | ||
| 5063 | |||
| 5064 | /* init the mqd struct */ | ||
| 5065 | memset(buf, 0, sizeof(struct vi_mqd)); | ||
| 5066 | |||
| 5067 | mqd = (struct vi_mqd *)buf; | ||
| 5068 | mqd->header = 0xC0310800; | ||
| 5069 | mqd->compute_pipelinestat_enable = 0x00000001; | ||
| 5070 | mqd->compute_static_thread_mgmt_se0 = 0xffffffff; | ||
| 5071 | mqd->compute_static_thread_mgmt_se1 = 0xffffffff; | ||
| 5072 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; | ||
| 5073 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; | ||
| 5074 | mqd->compute_misc_reserved = 0x00000003; | ||
| 5075 | |||
| 5076 | mutex_lock(&adev->srbm_mutex); | ||
| 5077 | vi_srbm_select(adev, ring->me, | ||
| 5078 | ring->pipe, | ||
| 5079 | ring->queue, 0); | ||
| 5080 | |||
| 5081 | eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); | ||
| 5082 | eop_gpu_addr >>= 8; | ||
| 5083 | |||
| 5084 | /* write the EOP addr */ | ||
| 5085 | WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); | ||
| 5086 | WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); | ||
| 5087 | |||
| 5088 | /* set the VMID assigned */ | ||
| 5089 | WREG32(mmCP_HQD_VMID, 0); | ||
| 5090 | |||
| 5091 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ | ||
| 5092 | tmp = RREG32(mmCP_HQD_EOP_CONTROL); | ||
| 5093 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, | ||
| 5094 | (order_base_2(MEC_HPD_SIZE / 4) - 1)); | ||
| 5095 | WREG32(mmCP_HQD_EOP_CONTROL, tmp); | ||
| 5096 | |||
| 5097 | /* disable wptr polling */ | ||
| 5098 | tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); | ||
| 5099 | tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); | ||
| 5100 | WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); | ||
| 5101 | |||
| 5102 | mqd->cp_hqd_eop_base_addr_lo = | ||
| 5103 | RREG32(mmCP_HQD_EOP_BASE_ADDR); | ||
| 5104 | mqd->cp_hqd_eop_base_addr_hi = | ||
| 5105 | RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); | ||
| 5106 | |||
| 5107 | /* enable doorbell? */ | ||
| 5108 | tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); | ||
| 5109 | if (use_doorbell) { | ||
| 5110 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
| 5111 | } else { | ||
| 5112 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); | ||
| 5113 | } | ||
| 5114 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); | ||
| 5115 | mqd->cp_hqd_pq_doorbell_control = tmp; | ||
| 5116 | |||
| 5117 | /* disable the queue if it's active */ | ||
| 5118 | mqd->cp_hqd_dequeue_request = 0; | ||
| 5119 | mqd->cp_hqd_pq_rptr = 0; | ||
| 5120 | mqd->cp_hqd_pq_wptr= 0; | ||
| 5121 | if (RREG32(mmCP_HQD_ACTIVE) & 1) { | ||
| 5122 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); | ||
| 5123 | for (j = 0; j < adev->usec_timeout; j++) { | ||
| 5124 | if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) | ||
| 5125 | break; | ||
| 5126 | udelay(1); | ||
| 5127 | } | ||
| 5128 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); | ||
| 5129 | WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); | ||
| 5130 | WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); | ||
| 5131 | } | ||
| 5132 | |||
| 5133 | /* set the pointer to the MQD */ | ||
| 5134 | mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; | ||
| 5135 | mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); | ||
| 5136 | WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); | ||
| 5137 | WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); | ||
| 5138 | |||
| 5139 | /* set MQD vmid to 0 */ | ||
| 5140 | tmp = RREG32(mmCP_MQD_CONTROL); | ||
| 5141 | tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); | ||
| 5142 | WREG32(mmCP_MQD_CONTROL, tmp); | ||
| 5143 | mqd->cp_mqd_control = tmp; | ||
| 5144 | |||
| 5145 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ | ||
| 5146 | hqd_gpu_addr = ring->gpu_addr >> 8; | ||
| 5147 | mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; | ||
| 5148 | mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); | ||
| 5149 | WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); | ||
| 5150 | WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); | ||
| 5151 | |||
| 5152 | /* set up the HQD, this is similar to CP_RB0_CNTL */ | ||
| 5153 | tmp = RREG32(mmCP_HQD_PQ_CONTROL); | ||
| 5154 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, | ||
| 5155 | (order_base_2(ring->ring_size / 4) - 1)); | ||
| 5156 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, | ||
| 5157 | ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); | ||
| 5158 | #ifdef __BIG_ENDIAN | ||
| 5159 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); | ||
| 5160 | #endif | ||
| 5161 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); | ||
| 5162 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); | ||
| 5163 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); | ||
| 5164 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); | ||
| 5165 | WREG32(mmCP_HQD_PQ_CONTROL, tmp); | ||
| 5166 | mqd->cp_hqd_pq_control = tmp; | ||
| 5167 | |||
| 5168 | /* set the wb address wether it's enabled or not */ | ||
| 5169 | wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); | ||
| 5170 | mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; | ||
| 5171 | mqd->cp_hqd_pq_rptr_report_addr_hi = | ||
| 5172 | upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 5173 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, | ||
| 5174 | mqd->cp_hqd_pq_rptr_report_addr_lo); | ||
| 5175 | WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
| 5176 | mqd->cp_hqd_pq_rptr_report_addr_hi); | ||
| 5177 | |||
| 5178 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ | ||
| 5179 | wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
| 5180 | mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; | ||
| 5181 | mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 5182 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); | ||
| 5183 | WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, | ||
| 5184 | mqd->cp_hqd_pq_wptr_poll_addr_hi); | ||
| 5185 | |||
| 5186 | /* enable the doorbell if requested */ | ||
| 5187 | if (use_doorbell) { | ||
| 5188 | if ((adev->asic_type == CHIP_CARRIZO) || | ||
| 5189 | (adev->asic_type == CHIP_FIJI) || | ||
| 5190 | (adev->asic_type == CHIP_STONEY) || | ||
| 5191 | (adev->asic_type == CHIP_POLARIS11) || | ||
| 5192 | (adev->asic_type == CHIP_POLARIS10) || | ||
| 5193 | (adev->asic_type == CHIP_POLARIS12)) { | ||
| 5194 | WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, | ||
| 5195 | AMDGPU_DOORBELL_KIQ << 2); | ||
| 5196 | WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, | ||
| 5197 | AMDGPU_DOORBELL_MEC_RING7 << 2); | ||
| 5198 | } | ||
| 5199 | tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); | ||
| 5200 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, | ||
| 5201 | DOORBELL_OFFSET, ring->doorbell_index); | ||
| 5202 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
| 5203 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); | ||
| 5204 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); | ||
| 5205 | mqd->cp_hqd_pq_doorbell_control = tmp; | ||
| 5206 | |||
| 5207 | } else { | ||
| 5208 | mqd->cp_hqd_pq_doorbell_control = 0; | ||
| 5209 | } | ||
| 5210 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, | ||
| 5211 | mqd->cp_hqd_pq_doorbell_control); | ||
| 5212 | 4963 | ||
| 5213 | /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ | 4964 | r = gfx_v8_0_kiq_kcq_enable(adev); |
| 5214 | ring->wptr = 0; | 4965 | if (r) |
| 5215 | mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); | 4966 | goto done; |
| 5216 | WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); | ||
| 5217 | mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); | ||
| 5218 | |||
| 5219 | /* set the vmid for the queue */ | ||
| 5220 | mqd->cp_hqd_vmid = 0; | ||
| 5221 | WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); | ||
| 5222 | |||
| 5223 | tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); | ||
| 5224 | tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); | ||
| 5225 | WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); | ||
| 5226 | mqd->cp_hqd_persistent_state = tmp; | ||
| 5227 | if (adev->asic_type == CHIP_STONEY || | ||
| 5228 | adev->asic_type == CHIP_POLARIS11 || | ||
| 5229 | adev->asic_type == CHIP_POLARIS10 || | ||
| 5230 | adev->asic_type == CHIP_POLARIS12) { | ||
| 5231 | tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); | ||
| 5232 | tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); | ||
| 5233 | WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); | ||
| 5234 | } | ||
| 5235 | |||
| 5236 | /* activate the queue */ | ||
| 5237 | mqd->cp_hqd_active = 1; | ||
| 5238 | WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); | ||
| 5239 | |||
| 5240 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 5241 | mutex_unlock(&adev->srbm_mutex); | ||
| 5242 | |||
| 5243 | amdgpu_bo_kunmap(ring->mqd_obj); | ||
| 5244 | amdgpu_bo_unreserve(ring->mqd_obj); | ||
| 5245 | } | ||
| 5246 | 4967 | ||
| 5247 | if (use_doorbell) { | 4968 | /* Test KIQ */ |
| 5248 | tmp = RREG32(mmCP_PQ_STATUS); | 4969 | ring = &adev->gfx.kiq.ring; |
| 5249 | tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); | 4970 | ring->ready = true; |
| 5250 | WREG32(mmCP_PQ_STATUS, tmp); | 4971 | r = amdgpu_ring_test_ring(ring); |
| 4972 | if (r) { | ||
| 4973 | ring->ready = false; | ||
| 4974 | goto done; | ||
| 5251 | } | 4975 | } |
| 5252 | 4976 | ||
| 5253 | gfx_v8_0_cp_compute_enable(adev, true); | 4977 | /* Test KCQs */ |
| 5254 | |||
| 5255 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 4978 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 5256 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | 4979 | ring = &adev->gfx.compute_ring[i]; |
| 5257 | |||
| 5258 | ring->ready = true; | 4980 | ring->ready = true; |
| 5259 | r = amdgpu_ring_test_ring(ring); | 4981 | r = amdgpu_ring_test_ring(ring); |
| 5260 | if (r) | 4982 | if (r) |
| 5261 | ring->ready = false; | 4983 | ring->ready = false; |
| 5262 | } | 4984 | } |
| 5263 | 4985 | ||
| 5264 | return 0; | 4986 | done: |
| 4987 | return r; | ||
| 5265 | } | 4988 | } |
| 5266 | 4989 | ||
| 5267 | static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) | 4990 | static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) |
| @@ -5314,10 +5037,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) | |||
| 5314 | if (r) | 5037 | if (r) |
| 5315 | return r; | 5038 | return r; |
| 5316 | 5039 | ||
| 5317 | if (amdgpu_sriov_vf(adev)) | 5040 | r = gfx_v8_0_kiq_resume(adev); |
| 5318 | r = gfx_v8_0_kiq_resume(adev); | ||
| 5319 | else | ||
| 5320 | r = gfx_v8_0_cp_compute_resume(adev); | ||
| 5321 | if (r) | 5041 | if (r) |
| 5322 | return r; | 5042 | return r; |
| 5323 | 5043 | ||
| @@ -5361,7 +5081,6 @@ static int gfx_v8_0_hw_fini(void *handle) | |||
| 5361 | } | 5081 | } |
| 5362 | gfx_v8_0_cp_enable(adev, false); | 5082 | gfx_v8_0_cp_enable(adev, false); |
| 5363 | gfx_v8_0_rlc_stop(adev); | 5083 | gfx_v8_0_rlc_stop(adev); |
| 5364 | gfx_v8_0_cp_compute_fini(adev); | ||
| 5365 | 5084 | ||
| 5366 | amdgpu_set_powergating_state(adev, | 5085 | amdgpu_set_powergating_state(adev, |
| 5367 | AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); | 5086 | AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); |
| @@ -5372,15 +5091,18 @@ static int gfx_v8_0_hw_fini(void *handle) | |||
| 5372 | static int gfx_v8_0_suspend(void *handle) | 5091 | static int gfx_v8_0_suspend(void *handle) |
| 5373 | { | 5092 | { |
| 5374 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 5093 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 5375 | 5094 | adev->gfx.in_suspend = true; | |
| 5376 | return gfx_v8_0_hw_fini(adev); | 5095 | return gfx_v8_0_hw_fini(adev); |
| 5377 | } | 5096 | } |
| 5378 | 5097 | ||
| 5379 | static int gfx_v8_0_resume(void *handle) | 5098 | static int gfx_v8_0_resume(void *handle) |
| 5380 | { | 5099 | { |
| 5100 | int r; | ||
| 5381 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 5101 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 5382 | 5102 | ||
| 5383 | return gfx_v8_0_hw_init(adev); | 5103 | r = gfx_v8_0_hw_init(adev); |
| 5104 | adev->gfx.in_suspend = false; | ||
| 5105 | return r; | ||
| 5384 | } | 5106 | } |
| 5385 | 5107 | ||
| 5386 | static bool gfx_v8_0_is_idle(void *handle) | 5108 | static bool gfx_v8_0_is_idle(void *handle) |
| @@ -5469,25 +5191,6 @@ static bool gfx_v8_0_check_soft_reset(void *handle) | |||
| 5469 | } | 5191 | } |
| 5470 | } | 5192 | } |
| 5471 | 5193 | ||
| 5472 | static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, | ||
| 5473 | struct amdgpu_ring *ring) | ||
| 5474 | { | ||
| 5475 | int i; | ||
| 5476 | |||
| 5477 | mutex_lock(&adev->srbm_mutex); | ||
| 5478 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 5479 | if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { | ||
| 5480 | WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); | ||
| 5481 | for (i = 0; i < adev->usec_timeout; i++) { | ||
| 5482 | if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) | ||
| 5483 | break; | ||
| 5484 | udelay(1); | ||
| 5485 | } | ||
| 5486 | } | ||
| 5487 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 5488 | mutex_unlock(&adev->srbm_mutex); | ||
| 5489 | } | ||
| 5490 | |||
| 5491 | static int gfx_v8_0_pre_soft_reset(void *handle) | 5194 | static int gfx_v8_0_pre_soft_reset(void *handle) |
| 5492 | { | 5195 | { |
| 5493 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 5196 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| @@ -5517,7 +5220,11 @@ static int gfx_v8_0_pre_soft_reset(void *handle) | |||
| 5517 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 5220 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 5518 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | 5221 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; |
| 5519 | 5222 | ||
| 5520 | gfx_v8_0_inactive_hqd(adev, ring); | 5223 | mutex_lock(&adev->srbm_mutex); |
| 5224 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 5225 | gfx_v8_0_deactivate_hqd(adev, 2); | ||
| 5226 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 5227 | mutex_unlock(&adev->srbm_mutex); | ||
| 5521 | } | 5228 | } |
| 5522 | /* Disable MEC parsing/prefetching */ | 5229 | /* Disable MEC parsing/prefetching */ |
| 5523 | gfx_v8_0_cp_compute_enable(adev, false); | 5230 | gfx_v8_0_cp_compute_enable(adev, false); |
| @@ -5588,18 +5295,6 @@ static int gfx_v8_0_soft_reset(void *handle) | |||
| 5588 | return 0; | 5295 | return 0; |
| 5589 | } | 5296 | } |
| 5590 | 5297 | ||
| 5591 | static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, | ||
| 5592 | struct amdgpu_ring *ring) | ||
| 5593 | { | ||
| 5594 | mutex_lock(&adev->srbm_mutex); | ||
| 5595 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 5596 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); | ||
| 5597 | WREG32(mmCP_HQD_PQ_RPTR, 0); | ||
| 5598 | WREG32(mmCP_HQD_PQ_WPTR, 0); | ||
| 5599 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 5600 | mutex_unlock(&adev->srbm_mutex); | ||
| 5601 | } | ||
| 5602 | |||
| 5603 | static int gfx_v8_0_post_soft_reset(void *handle) | 5298 | static int gfx_v8_0_post_soft_reset(void *handle) |
| 5604 | { | 5299 | { |
| 5605 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 5300 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| @@ -5625,9 +5320,13 @@ static int gfx_v8_0_post_soft_reset(void *handle) | |||
| 5625 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 5320 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 5626 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | 5321 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; |
| 5627 | 5322 | ||
| 5628 | gfx_v8_0_init_hqd(adev, ring); | 5323 | mutex_lock(&adev->srbm_mutex); |
| 5324 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 5325 | gfx_v8_0_deactivate_hqd(adev, 2); | ||
| 5326 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
| 5327 | mutex_unlock(&adev->srbm_mutex); | ||
| 5629 | } | 5328 | } |
| 5630 | gfx_v8_0_cp_compute_resume(adev); | 5329 | gfx_v8_0_kiq_resume(adev); |
| 5631 | } | 5330 | } |
| 5632 | gfx_v8_0_rlc_start(adev); | 5331 | gfx_v8_0_rlc_start(adev); |
| 5633 | 5332 | ||
| @@ -5773,7 +5472,7 @@ static int gfx_v8_0_early_init(void *handle) | |||
| 5773 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 5472 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 5774 | 5473 | ||
| 5775 | adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; | 5474 | adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; |
| 5776 | adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; | 5475 | adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; |
| 5777 | adev->gfx.funcs = &gfx_v8_0_gfx_funcs; | 5476 | adev->gfx.funcs = &gfx_v8_0_gfx_funcs; |
| 5778 | gfx_v8_0_set_ring_funcs(adev); | 5477 | gfx_v8_0_set_ring_funcs(adev); |
| 5779 | gfx_v8_0_set_irq_funcs(adev); | 5478 | gfx_v8_0_set_irq_funcs(adev); |
| @@ -6265,6 +5964,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev | |||
| 6265 | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); | 5964 | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); |
| 6266 | if (temp != data) | 5965 | if (temp != data) |
| 6267 | WREG32(mmRLC_CGCG_CGLS_CTRL, data); | 5966 | WREG32(mmRLC_CGCG_CGLS_CTRL, data); |
| 5967 | /* enable interrupts again for PG */ | ||
| 5968 | gfx_v8_0_enable_gui_idle_interrupt(adev, true); | ||
| 6268 | } | 5969 | } |
| 6269 | 5970 | ||
| 6270 | gfx_v8_0_wait_for_rlc_serdes(adev); | 5971 | gfx_v8_0_wait_for_rlc_serdes(adev); |
| @@ -6568,9 +6269,13 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | |||
| 6568 | 6269 | ||
| 6569 | control |= ib->length_dw | (vm_id << 24); | 6270 | control |= ib->length_dw | (vm_id << 24); |
| 6570 | 6271 | ||
| 6571 | if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) | 6272 | if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { |
| 6572 | control |= INDIRECT_BUFFER_PRE_ENB(1); | 6273 | control |= INDIRECT_BUFFER_PRE_ENB(1); |
| 6573 | 6274 | ||
| 6275 | if (!(ib->flags & AMDGPU_IB_FLAG_CE)) | ||
| 6276 | gfx_v8_0_ring_emit_de_meta(ring); | ||
| 6277 | } | ||
| 6278 | |||
| 6574 | amdgpu_ring_write(ring, header); | 6279 | amdgpu_ring_write(ring, header); |
| 6575 | amdgpu_ring_write(ring, | 6280 | amdgpu_ring_write(ring, |
| 6576 | #ifdef __BIG_ENDIAN | 6281 | #ifdef __BIG_ENDIAN |
| @@ -6753,8 +6458,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | |||
| 6753 | uint32_t dw2 = 0; | 6458 | uint32_t dw2 = 0; |
| 6754 | 6459 | ||
| 6755 | if (amdgpu_sriov_vf(ring->adev)) | 6460 | if (amdgpu_sriov_vf(ring->adev)) |
| 6756 | gfx_v8_0_ring_emit_ce_meta_init(ring, | 6461 | gfx_v8_0_ring_emit_ce_meta(ring); |
| 6757 | (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); | ||
| 6758 | 6462 | ||
| 6759 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ | 6463 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ |
| 6760 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { | 6464 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { |
| @@ -6780,10 +6484,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | |||
| 6780 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); | 6484 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); |
| 6781 | amdgpu_ring_write(ring, dw2); | 6485 | amdgpu_ring_write(ring, dw2); |
| 6782 | amdgpu_ring_write(ring, 0); | 6486 | amdgpu_ring_write(ring, 0); |
| 6783 | |||
| 6784 | if (amdgpu_sriov_vf(ring->adev)) | ||
| 6785 | gfx_v8_0_ring_emit_de_meta_init(ring, | ||
| 6786 | (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); | ||
| 6787 | } | 6487 | } |
| 6788 | 6488 | ||
| 6789 | static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) | 6489 | static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) |
| @@ -6813,7 +6513,6 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne | |||
| 6813 | ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; | 6513 | ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; |
| 6814 | } | 6514 | } |
| 6815 | 6515 | ||
| 6816 | |||
| 6817 | static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) | 6516 | static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) |
| 6818 | { | 6517 | { |
| 6819 | struct amdgpu_device *adev = ring->adev; | 6518 | struct amdgpu_device *adev = ring->adev; |
| @@ -6851,15 +6550,27 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, | |||
| 6851 | int me, int pipe, | 6550 | int me, int pipe, |
| 6852 | enum amdgpu_interrupt_state state) | 6551 | enum amdgpu_interrupt_state state) |
| 6853 | { | 6552 | { |
| 6553 | u32 mec_int_cntl, mec_int_cntl_reg; | ||
| 6554 | |||
| 6854 | /* | 6555 | /* |
| 6855 | * amdgpu controls only pipe 0 of MEC1. That's why this function only | 6556 | * amdgpu controls only the first MEC. That's why this function only |
| 6856 | * handles the setting of interrupts for this specific pipe. All other | 6557 | * handles the setting of interrupts for this specific MEC. All other |
| 6857 | * pipes' interrupts are set by amdkfd. | 6558 | * pipes' interrupts are set by amdkfd. |
| 6858 | */ | 6559 | */ |
| 6859 | 6560 | ||
| 6860 | if (me == 1) { | 6561 | if (me == 1) { |
| 6861 | switch (pipe) { | 6562 | switch (pipe) { |
| 6862 | case 0: | 6563 | case 0: |
| 6564 | mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; | ||
| 6565 | break; | ||
| 6566 | case 1: | ||
| 6567 | mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; | ||
| 6568 | break; | ||
| 6569 | case 2: | ||
| 6570 | mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; | ||
| 6571 | break; | ||
| 6572 | case 3: | ||
| 6573 | mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; | ||
| 6863 | break; | 6574 | break; |
| 6864 | default: | 6575 | default: |
| 6865 | DRM_DEBUG("invalid pipe %d\n", pipe); | 6576 | DRM_DEBUG("invalid pipe %d\n", pipe); |
| @@ -6870,8 +6581,20 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, | |||
| 6870 | return; | 6581 | return; |
| 6871 | } | 6582 | } |
| 6872 | 6583 | ||
| 6873 | WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, | 6584 | switch (state) { |
| 6874 | state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); | 6585 | case AMDGPU_IRQ_STATE_DISABLE: |
| 6586 | mec_int_cntl = RREG32(mec_int_cntl_reg); | ||
| 6587 | mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; | ||
| 6588 | WREG32(mec_int_cntl_reg, mec_int_cntl); | ||
| 6589 | break; | ||
| 6590 | case AMDGPU_IRQ_STATE_ENABLE: | ||
| 6591 | mec_int_cntl = RREG32(mec_int_cntl_reg); | ||
| 6592 | mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; | ||
| 6593 | WREG32(mec_int_cntl_reg, mec_int_cntl); | ||
| 6594 | break; | ||
| 6595 | default: | ||
| 6596 | break; | ||
| 6597 | } | ||
| 6875 | } | 6598 | } |
| 6876 | 6599 | ||
| 6877 | static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, | 6600 | static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, |
| @@ -6992,8 +6715,6 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, | |||
| 6992 | { | 6715 | { |
| 6993 | struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); | 6716 | struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); |
| 6994 | 6717 | ||
| 6995 | BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); | ||
| 6996 | |||
| 6997 | switch (type) { | 6718 | switch (type) { |
| 6998 | case AMDGPU_CP_KIQ_IRQ_DRIVER0: | 6719 | case AMDGPU_CP_KIQ_IRQ_DRIVER0: |
| 6999 | WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, | 6720 | WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, |
| @@ -7023,8 +6744,6 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, | |||
| 7023 | u8 me_id, pipe_id, queue_id; | 6744 | u8 me_id, pipe_id, queue_id; |
| 7024 | struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); | 6745 | struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); |
| 7025 | 6746 | ||
| 7026 | BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); | ||
| 7027 | |||
| 7028 | me_id = (entry->ring_id & 0x0c) >> 2; | 6747 | me_id = (entry->ring_id & 0x0c) >> 2; |
| 7029 | pipe_id = (entry->ring_id & 0x03) >> 0; | 6748 | pipe_id = (entry->ring_id & 0x03) >> 0; |
| 7030 | queue_id = (entry->ring_id & 0x70) >> 4; | 6749 | queue_id = (entry->ring_id & 0x70) >> 4; |
| @@ -7257,7 +6976,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) | |||
| 7257 | data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | | 6976 | data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | |
| 7258 | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); | 6977 | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); |
| 7259 | 6978 | ||
| 7260 | mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); | 6979 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); |
| 7261 | 6980 | ||
| 7262 | return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; | 6981 | return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; |
| 7263 | } | 6982 | } |
| @@ -7268,9 +6987,15 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) | |||
| 7268 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; | 6987 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; |
| 7269 | struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; | 6988 | struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; |
| 7270 | unsigned disable_masks[4 * 2]; | 6989 | unsigned disable_masks[4 * 2]; |
| 6990 | u32 ao_cu_num; | ||
| 7271 | 6991 | ||
| 7272 | memset(cu_info, 0, sizeof(*cu_info)); | 6992 | memset(cu_info, 0, sizeof(*cu_info)); |
| 7273 | 6993 | ||
| 6994 | if (adev->flags & AMD_IS_APU) | ||
| 6995 | ao_cu_num = 2; | ||
| 6996 | else | ||
| 6997 | ao_cu_num = adev->gfx.config.max_cu_per_sh; | ||
| 6998 | |||
| 7274 | amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); | 6999 | amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); |
| 7275 | 7000 | ||
| 7276 | mutex_lock(&adev->grbm_idx_mutex); | 7001 | mutex_lock(&adev->grbm_idx_mutex); |
| @@ -7286,16 +7011,18 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) | |||
| 7286 | bitmap = gfx_v8_0_get_cu_active_bitmap(adev); | 7011 | bitmap = gfx_v8_0_get_cu_active_bitmap(adev); |
| 7287 | cu_info->bitmap[i][j] = bitmap; | 7012 | cu_info->bitmap[i][j] = bitmap; |
| 7288 | 7013 | ||
| 7289 | for (k = 0; k < 16; k ++) { | 7014 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { |
| 7290 | if (bitmap & mask) { | 7015 | if (bitmap & mask) { |
| 7291 | if (counter < 2) | 7016 | if (counter < ao_cu_num) |
| 7292 | ao_bitmap |= mask; | 7017 | ao_bitmap |= mask; |
| 7293 | counter ++; | 7018 | counter ++; |
| 7294 | } | 7019 | } |
| 7295 | mask <<= 1; | 7020 | mask <<= 1; |
| 7296 | } | 7021 | } |
| 7297 | active_cu_number += counter; | 7022 | active_cu_number += counter; |
| 7298 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | 7023 | if (i < 2 && j < 2) |
| 7024 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | ||
| 7025 | cu_info->ao_cu_bitmap[i][j] = ao_bitmap; | ||
| 7299 | } | 7026 | } |
| 7300 | } | 7027 | } |
| 7301 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); | 7028 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |
| @@ -7323,7 +7050,7 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = | |||
| 7323 | .funcs = &gfx_v8_0_ip_funcs, | 7050 | .funcs = &gfx_v8_0_ip_funcs, |
| 7324 | }; | 7051 | }; |
| 7325 | 7052 | ||
| 7326 | static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) | 7053 | static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) |
| 7327 | { | 7054 | { |
| 7328 | uint64_t ce_payload_addr; | 7055 | uint64_t ce_payload_addr; |
| 7329 | int cnt_ce; | 7056 | int cnt_ce; |
| @@ -7333,10 +7060,12 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c | |||
| 7333 | } ce_payload = {}; | 7060 | } ce_payload = {}; |
| 7334 | 7061 | ||
| 7335 | if (ring->adev->virt.chained_ib_support) { | 7062 | if (ring->adev->virt.chained_ib_support) { |
| 7336 | ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); | 7063 | ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + |
| 7064 | offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); | ||
| 7337 | cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; | 7065 | cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; |
| 7338 | } else { | 7066 | } else { |
| 7339 | ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); | 7067 | ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + |
| 7068 | offsetof(struct vi_gfx_meta_data, ce_payload); | ||
| 7340 | cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; | 7069 | cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; |
| 7341 | } | 7070 | } |
| 7342 | 7071 | ||
| @@ -7350,15 +7079,16 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c | |||
| 7350 | amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); | 7079 | amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); |
| 7351 | } | 7080 | } |
| 7352 | 7081 | ||
| 7353 | static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) | 7082 | static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) |
| 7354 | { | 7083 | { |
| 7355 | uint64_t de_payload_addr, gds_addr; | 7084 | uint64_t de_payload_addr, gds_addr, csa_addr; |
| 7356 | int cnt_de; | 7085 | int cnt_de; |
| 7357 | static union { | 7086 | static union { |
| 7358 | struct vi_de_ib_state regular; | 7087 | struct vi_de_ib_state regular; |
| 7359 | struct vi_de_ib_state_chained_ib chained; | 7088 | struct vi_de_ib_state_chained_ib chained; |
| 7360 | } de_payload = {}; | 7089 | } de_payload = {}; |
| 7361 | 7090 | ||
| 7091 | csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; | ||
| 7362 | gds_addr = csa_addr + 4096; | 7092 | gds_addr = csa_addr + 4096; |
| 7363 | if (ring->adev->virt.chained_ib_support) { | 7093 | if (ring->adev->virt.chained_ib_support) { |
| 7364 | de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); | 7094 | de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); |
| @@ -7381,68 +7111,3 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c | |||
| 7381 | amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); | 7111 | amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); |
| 7382 | amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); | 7112 | amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); |
| 7383 | } | 7113 | } |
| 7384 | |||
| 7385 | /* create MQD for each compute queue */ | ||
| 7386 | static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) | ||
| 7387 | { | ||
| 7388 | struct amdgpu_ring *ring = NULL; | ||
| 7389 | int r, i; | ||
| 7390 | |||
| 7391 | /* create MQD for KIQ */ | ||
| 7392 | ring = &adev->gfx.kiq.ring; | ||
| 7393 | if (!ring->mqd_obj) { | ||
| 7394 | r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, | ||
| 7395 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | ||
| 7396 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | ||
| 7397 | if (r) { | ||
| 7398 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | ||
| 7399 | return r; | ||
| 7400 | } | ||
| 7401 | |||
| 7402 | /* prepare MQD backup */ | ||
| 7403 | adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); | ||
| 7404 | if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) | ||
| 7405 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); | ||
| 7406 | } | ||
| 7407 | |||
| 7408 | /* create MQD for each KCQ */ | ||
| 7409 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 7410 | ring = &adev->gfx.compute_ring[i]; | ||
| 7411 | if (!ring->mqd_obj) { | ||
| 7412 | r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, | ||
| 7413 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | ||
| 7414 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | ||
| 7415 | if (r) { | ||
| 7416 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | ||
| 7417 | return r; | ||
| 7418 | } | ||
| 7419 | |||
| 7420 | /* prepare MQD backup */ | ||
| 7421 | adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); | ||
| 7422 | if (!adev->gfx.mec.mqd_backup[i]) | ||
| 7423 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); | ||
| 7424 | } | ||
| 7425 | } | ||
| 7426 | |||
| 7427 | return 0; | ||
| 7428 | } | ||
| 7429 | |||
| 7430 | static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) | ||
| 7431 | { | ||
| 7432 | struct amdgpu_ring *ring = NULL; | ||
| 7433 | int i; | ||
| 7434 | |||
| 7435 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 7436 | ring = &adev->gfx.compute_ring[i]; | ||
| 7437 | kfree(adev->gfx.mec.mqd_backup[i]); | ||
| 7438 | amdgpu_bo_free_kernel(&ring->mqd_obj, | ||
| 7439 | &ring->mqd_gpu_addr, | ||
| 7440 | &ring->mqd_ptr); | ||
| 7441 | } | ||
| 7442 | |||
| 7443 | ring = &adev->gfx.kiq.ring; | ||
| 7444 | kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); | ||
| 7445 | amdgpu_bo_free_kernel(&ring->mqd_obj, | ||
| 7446 | &ring->mqd_gpu_addr, | ||
| 7447 | &ring->mqd_ptr); | ||
| 7448 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h index 788cc3ab584b..ec3f11fa986c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h | |||
| @@ -27,4 +27,9 @@ | |||
| 27 | extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block; | 27 | extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block; |
| 28 | extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block; | 28 | extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block; |
| 29 | 29 | ||
| 30 | struct amdgpu_device; | ||
| 31 | struct vi_mqd; | ||
| 32 | |||
| 33 | int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd); | ||
| 34 | |||
| 30 | #endif | 35 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0c16b7563b73..3a0b69b09ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "amdgpu_gfx.h" | 26 | #include "amdgpu_gfx.h" |
| 27 | #include "soc15.h" | 27 | #include "soc15.h" |
| @@ -38,8 +38,17 @@ | |||
| 38 | #include "v9_structs.h" | 38 | #include "v9_structs.h" |
| 39 | 39 | ||
| 40 | #define GFX9_NUM_GFX_RINGS 1 | 40 | #define GFX9_NUM_GFX_RINGS 1 |
| 41 | #define GFX9_NUM_COMPUTE_RINGS 8 | 41 | #define GFX9_MEC_HPD_SIZE 2048 |
| 42 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 | 42 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L |
| 43 | #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L | ||
| 44 | #define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 | ||
| 45 | |||
| 46 | #define mmPWR_MISC_CNTL_STATUS 0x0183 | ||
| 47 | #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 | ||
| 48 | #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 | ||
| 49 | #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 | ||
| 50 | #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L | ||
| 51 | #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L | ||
| 43 | 52 | ||
| 44 | MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); | 53 | MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); |
| 45 | MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); | 54 | MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); |
| @@ -48,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); | |||
| 48 | MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); | 57 | MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); |
| 49 | MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); | 58 | MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); |
| 50 | 59 | ||
| 60 | MODULE_FIRMWARE("amdgpu/raven_ce.bin"); | ||
| 61 | MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); | ||
| 62 | MODULE_FIRMWARE("amdgpu/raven_me.bin"); | ||
| 63 | MODULE_FIRMWARE("amdgpu/raven_mec.bin"); | ||
| 64 | MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); | ||
| 65 | MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); | ||
| 66 | |||
| 51 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = | 67 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = |
| 52 | { | 68 | { |
| 53 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), | 69 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), |
| @@ -86,14 +102,27 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = | |||
| 86 | 102 | ||
| 87 | static const u32 golden_settings_gc_9_0[] = | 103 | static const u32 golden_settings_gc_9_0[] = |
| 88 | { | 104 | { |
| 89 | SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, | 105 | SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, |
| 106 | SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 107 | SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 108 | SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, | ||
| 109 | SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, | ||
| 110 | SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 90 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, | 111 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, |
| 91 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, | 112 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, |
| 92 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, | 113 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, |
| 114 | SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, | ||
| 115 | SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, | ||
| 116 | SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, | ||
| 117 | SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 118 | SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 119 | SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, | ||
| 93 | SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, | 120 | SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, |
| 94 | SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, | 121 | SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, |
| 95 | SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, | 122 | SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, |
| 96 | SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff | 123 | SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, |
| 124 | SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff, | ||
| 125 | SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 | ||
| 97 | }; | 126 | }; |
| 98 | 127 | ||
| 99 | static const u32 golden_settings_gc_9_0_vg10[] = | 128 | static const u32 golden_settings_gc_9_0_vg10[] = |
| @@ -104,11 +133,47 @@ static const u32 golden_settings_gc_9_0_vg10[] = | |||
| 104 | SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, | 133 | SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, |
| 105 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, | 134 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, |
| 106 | SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, | 135 | SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, |
| 107 | SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, | 136 | SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800 |
| 108 | SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 | 137 | }; |
| 138 | |||
| 139 | static const u32 golden_settings_gc_9_1[] = | ||
| 140 | { | ||
| 141 | SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104, | ||
| 142 | SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 143 | SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 144 | SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 145 | SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, | ||
| 146 | SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, | ||
| 147 | SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 148 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, | ||
| 149 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, | ||
| 150 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, | ||
| 151 | SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, | ||
| 152 | SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, | ||
| 153 | SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, | ||
| 154 | SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 155 | SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, | ||
| 156 | SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, | ||
| 157 | SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000, | ||
| 158 | SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120, | ||
| 159 | SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, | ||
| 160 | SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff, | ||
| 161 | SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 | ||
| 162 | }; | ||
| 163 | |||
| 164 | static const u32 golden_settings_gc_9_1_rv1[] = | ||
| 165 | { | ||
| 166 | SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, | ||
| 167 | SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042, | ||
| 168 | SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042, | ||
| 169 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000, | ||
| 170 | SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000, | ||
| 171 | SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, | ||
| 172 | SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800 | ||
| 109 | }; | 173 | }; |
| 110 | 174 | ||
| 111 | #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 | 175 | #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 |
| 176 | #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 | ||
| 112 | 177 | ||
| 113 | static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); | 178 | static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); |
| 114 | static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); | 179 | static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); |
| @@ -118,6 +183,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, | |||
| 118 | struct amdgpu_cu_info *cu_info); | 183 | struct amdgpu_cu_info *cu_info); |
| 119 | static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); | 184 | static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); |
| 120 | static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); | 185 | static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); |
| 186 | static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); | ||
| 121 | 187 | ||
| 122 | static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) | 188 | static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) |
| 123 | { | 189 | { |
| @@ -130,6 +196,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 130 | golden_settings_gc_9_0_vg10, | 196 | golden_settings_gc_9_0_vg10, |
| 131 | (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); | 197 | (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); |
| 132 | break; | 198 | break; |
| 199 | case CHIP_RAVEN: | ||
| 200 | amdgpu_program_register_sequence(adev, | ||
| 201 | golden_settings_gc_9_1, | ||
| 202 | (const u32)ARRAY_SIZE(golden_settings_gc_9_1)); | ||
| 203 | amdgpu_program_register_sequence(adev, | ||
| 204 | golden_settings_gc_9_1_rv1, | ||
| 205 | (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1)); | ||
| 206 | break; | ||
| 133 | default: | 207 | default: |
| 134 | break; | 208 | break; |
| 135 | } | 209 | } |
| @@ -284,6 +358,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 284 | struct amdgpu_firmware_info *info = NULL; | 358 | struct amdgpu_firmware_info *info = NULL; |
| 285 | const struct common_firmware_header *header = NULL; | 359 | const struct common_firmware_header *header = NULL; |
| 286 | const struct gfx_firmware_header_v1_0 *cp_hdr; | 360 | const struct gfx_firmware_header_v1_0 *cp_hdr; |
| 361 | const struct rlc_firmware_header_v2_0 *rlc_hdr; | ||
| 362 | unsigned int *tmp = NULL; | ||
| 363 | unsigned int i = 0; | ||
| 287 | 364 | ||
| 288 | DRM_DEBUG("\n"); | 365 | DRM_DEBUG("\n"); |
| 289 | 366 | ||
| @@ -291,6 +368,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 291 | case CHIP_VEGA10: | 368 | case CHIP_VEGA10: |
| 292 | chip_name = "vega10"; | 369 | chip_name = "vega10"; |
| 293 | break; | 370 | break; |
| 371 | case CHIP_RAVEN: | ||
| 372 | chip_name = "raven"; | ||
| 373 | break; | ||
| 294 | default: | 374 | default: |
| 295 | BUG(); | 375 | BUG(); |
| 296 | } | 376 | } |
| @@ -333,9 +413,46 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 333 | if (err) | 413 | if (err) |
| 334 | goto out; | 414 | goto out; |
| 335 | err = amdgpu_ucode_validate(adev->gfx.rlc_fw); | 415 | err = amdgpu_ucode_validate(adev->gfx.rlc_fw); |
| 336 | cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; | 416 | rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; |
| 337 | adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); | 417 | adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); |
| 338 | adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); | 418 | adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); |
| 419 | adev->gfx.rlc.save_and_restore_offset = | ||
| 420 | le32_to_cpu(rlc_hdr->save_and_restore_offset); | ||
| 421 | adev->gfx.rlc.clear_state_descriptor_offset = | ||
| 422 | le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); | ||
| 423 | adev->gfx.rlc.avail_scratch_ram_locations = | ||
| 424 | le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); | ||
| 425 | adev->gfx.rlc.reg_restore_list_size = | ||
| 426 | le32_to_cpu(rlc_hdr->reg_restore_list_size); | ||
| 427 | adev->gfx.rlc.reg_list_format_start = | ||
| 428 | le32_to_cpu(rlc_hdr->reg_list_format_start); | ||
| 429 | adev->gfx.rlc.reg_list_format_separate_start = | ||
| 430 | le32_to_cpu(rlc_hdr->reg_list_format_separate_start); | ||
| 431 | adev->gfx.rlc.starting_offsets_start = | ||
| 432 | le32_to_cpu(rlc_hdr->starting_offsets_start); | ||
| 433 | adev->gfx.rlc.reg_list_format_size_bytes = | ||
| 434 | le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); | ||
| 435 | adev->gfx.rlc.reg_list_size_bytes = | ||
| 436 | le32_to_cpu(rlc_hdr->reg_list_size_bytes); | ||
| 437 | adev->gfx.rlc.register_list_format = | ||
| 438 | kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + | ||
| 439 | adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); | ||
| 440 | if (!adev->gfx.rlc.register_list_format) { | ||
| 441 | err = -ENOMEM; | ||
| 442 | goto out; | ||
| 443 | } | ||
| 444 | |||
| 445 | tmp = (unsigned int *)((uintptr_t)rlc_hdr + | ||
| 446 | le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); | ||
| 447 | for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) | ||
| 448 | adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); | ||
| 449 | |||
| 450 | adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; | ||
| 451 | |||
| 452 | tmp = (unsigned int *)((uintptr_t)rlc_hdr + | ||
| 453 | le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); | ||
| 454 | for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) | ||
| 455 | adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); | ||
| 339 | 456 | ||
| 340 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | 457 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); |
| 341 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); | 458 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); |
| @@ -447,6 +564,261 @@ out: | |||
| 447 | return err; | 564 | return err; |
| 448 | } | 565 | } |
| 449 | 566 | ||
| 567 | static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) | ||
| 568 | { | ||
| 569 | u32 count = 0; | ||
| 570 | const struct cs_section_def *sect = NULL; | ||
| 571 | const struct cs_extent_def *ext = NULL; | ||
| 572 | |||
| 573 | /* begin clear state */ | ||
| 574 | count += 2; | ||
| 575 | /* context control state */ | ||
| 576 | count += 3; | ||
| 577 | |||
| 578 | for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { | ||
| 579 | for (ext = sect->section; ext->extent != NULL; ++ext) { | ||
| 580 | if (sect->id == SECT_CONTEXT) | ||
| 581 | count += 2 + ext->reg_count; | ||
| 582 | else | ||
| 583 | return 0; | ||
| 584 | } | ||
| 585 | } | ||
| 586 | |||
| 587 | /* end clear state */ | ||
| 588 | count += 2; | ||
| 589 | /* clear state */ | ||
| 590 | count += 2; | ||
| 591 | |||
| 592 | return count; | ||
| 593 | } | ||
| 594 | |||
| 595 | static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, | ||
| 596 | volatile u32 *buffer) | ||
| 597 | { | ||
| 598 | u32 count = 0, i; | ||
| 599 | const struct cs_section_def *sect = NULL; | ||
| 600 | const struct cs_extent_def *ext = NULL; | ||
| 601 | |||
| 602 | if (adev->gfx.rlc.cs_data == NULL) | ||
| 603 | return; | ||
| 604 | if (buffer == NULL) | ||
| 605 | return; | ||
| 606 | |||
| 607 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); | ||
| 608 | buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); | ||
| 609 | |||
| 610 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); | ||
| 611 | buffer[count++] = cpu_to_le32(0x80000000); | ||
| 612 | buffer[count++] = cpu_to_le32(0x80000000); | ||
| 613 | |||
| 614 | for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { | ||
| 615 | for (ext = sect->section; ext->extent != NULL; ++ext) { | ||
| 616 | if (sect->id == SECT_CONTEXT) { | ||
| 617 | buffer[count++] = | ||
| 618 | cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); | ||
| 619 | buffer[count++] = cpu_to_le32(ext->reg_index - | ||
| 620 | PACKET3_SET_CONTEXT_REG_START); | ||
| 621 | for (i = 0; i < ext->reg_count; i++) | ||
| 622 | buffer[count++] = cpu_to_le32(ext->extent[i]); | ||
| 623 | } else { | ||
| 624 | return; | ||
| 625 | } | ||
| 626 | } | ||
| 627 | } | ||
| 628 | |||
| 629 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); | ||
| 630 | buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); | ||
| 631 | |||
| 632 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); | ||
| 633 | buffer[count++] = cpu_to_le32(0); | ||
| 634 | } | ||
| 635 | |||
| 636 | static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) | ||
| 637 | { | ||
| 638 | uint32_t data; | ||
| 639 | |||
| 640 | /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ | ||
| 641 | WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); | ||
| 642 | WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); | ||
| 643 | WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); | ||
| 644 | WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); | ||
| 645 | |||
| 646 | /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ | ||
| 647 | WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); | ||
| 648 | |||
| 649 | /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ | ||
| 650 | WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); | ||
| 651 | |||
| 652 | mutex_lock(&adev->grbm_idx_mutex); | ||
| 653 | /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ | ||
| 654 | gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); | ||
| 655 | WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); | ||
| 656 | |||
| 657 | /* set mmRLC_LB_PARAMS = 0x003F_1006 */ | ||
| 658 | data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); | ||
| 659 | data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); | ||
| 660 | data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); | ||
| 661 | WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); | ||
| 662 | |||
| 663 | /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ | ||
| 664 | data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); | ||
| 665 | data &= 0x0000FFFF; | ||
| 666 | data |= 0x00C00000; | ||
| 667 | WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); | ||
| 668 | |||
| 669 | /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */ | ||
| 670 | WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF); | ||
| 671 | |||
| 672 | /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, | ||
| 673 | * but used for RLC_LB_CNTL configuration */ | ||
| 674 | data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; | ||
| 675 | data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); | ||
| 676 | data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); | ||
| 677 | WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); | ||
| 678 | mutex_unlock(&adev->grbm_idx_mutex); | ||
| 679 | } | ||
| 680 | |||
| 681 | static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) | ||
| 682 | { | ||
| 683 | WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); | ||
| 684 | } | ||
| 685 | |||
| 686 | static void rv_init_cp_jump_table(struct amdgpu_device *adev) | ||
| 687 | { | ||
| 688 | const __le32 *fw_data; | ||
| 689 | volatile u32 *dst_ptr; | ||
| 690 | int me, i, max_me = 5; | ||
| 691 | u32 bo_offset = 0; | ||
| 692 | u32 table_offset, table_size; | ||
| 693 | |||
| 694 | /* write the cp table buffer */ | ||
| 695 | dst_ptr = adev->gfx.rlc.cp_table_ptr; | ||
| 696 | for (me = 0; me < max_me; me++) { | ||
| 697 | if (me == 0) { | ||
| 698 | const struct gfx_firmware_header_v1_0 *hdr = | ||
| 699 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; | ||
| 700 | fw_data = (const __le32 *) | ||
| 701 | (adev->gfx.ce_fw->data + | ||
| 702 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
| 703 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
| 704 | table_size = le32_to_cpu(hdr->jt_size); | ||
| 705 | } else if (me == 1) { | ||
| 706 | const struct gfx_firmware_header_v1_0 *hdr = | ||
| 707 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; | ||
| 708 | fw_data = (const __le32 *) | ||
| 709 | (adev->gfx.pfp_fw->data + | ||
| 710 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
| 711 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
| 712 | table_size = le32_to_cpu(hdr->jt_size); | ||
| 713 | } else if (me == 2) { | ||
| 714 | const struct gfx_firmware_header_v1_0 *hdr = | ||
| 715 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; | ||
| 716 | fw_data = (const __le32 *) | ||
| 717 | (adev->gfx.me_fw->data + | ||
| 718 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
| 719 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
| 720 | table_size = le32_to_cpu(hdr->jt_size); | ||
| 721 | } else if (me == 3) { | ||
| 722 | const struct gfx_firmware_header_v1_0 *hdr = | ||
| 723 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; | ||
| 724 | fw_data = (const __le32 *) | ||
| 725 | (adev->gfx.mec_fw->data + | ||
| 726 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
| 727 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
| 728 | table_size = le32_to_cpu(hdr->jt_size); | ||
| 729 | } else if (me == 4) { | ||
| 730 | const struct gfx_firmware_header_v1_0 *hdr = | ||
| 731 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; | ||
| 732 | fw_data = (const __le32 *) | ||
| 733 | (adev->gfx.mec2_fw->data + | ||
| 734 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
| 735 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
| 736 | table_size = le32_to_cpu(hdr->jt_size); | ||
| 737 | } | ||
| 738 | |||
| 739 | for (i = 0; i < table_size; i ++) { | ||
| 740 | dst_ptr[bo_offset + i] = | ||
| 741 | cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); | ||
| 742 | } | ||
| 743 | |||
| 744 | bo_offset += table_size; | ||
| 745 | } | ||
| 746 | } | ||
| 747 | |||
| 748 | static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) | ||
| 749 | { | ||
| 750 | /* clear state block */ | ||
| 751 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, | ||
| 752 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
| 753 | (void **)&adev->gfx.rlc.cs_ptr); | ||
| 754 | |||
| 755 | /* jump table block */ | ||
| 756 | amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, | ||
| 757 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
| 758 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
| 759 | } | ||
| 760 | |||
| 761 | static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) | ||
| 762 | { | ||
| 763 | volatile u32 *dst_ptr; | ||
| 764 | u32 dws; | ||
| 765 | const struct cs_section_def *cs_data; | ||
| 766 | int r; | ||
| 767 | |||
| 768 | adev->gfx.rlc.cs_data = gfx9_cs_data; | ||
| 769 | |||
| 770 | cs_data = adev->gfx.rlc.cs_data; | ||
| 771 | |||
| 772 | if (cs_data) { | ||
| 773 | /* clear state block */ | ||
| 774 | adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); | ||
| 775 | if (adev->gfx.rlc.clear_state_obj == NULL) { | ||
| 776 | r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, | ||
| 777 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 778 | &adev->gfx.rlc.clear_state_obj, | ||
| 779 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
| 780 | (void **)&adev->gfx.rlc.cs_ptr); | ||
| 781 | if (r) { | ||
| 782 | dev_err(adev->dev, | ||
| 783 | "(%d) failed to create rlc csb bo\n", r); | ||
| 784 | gfx_v9_0_rlc_fini(adev); | ||
| 785 | return r; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | /* set up the cs buffer */ | ||
| 789 | dst_ptr = adev->gfx.rlc.cs_ptr; | ||
| 790 | gfx_v9_0_get_csb_buffer(adev, dst_ptr); | ||
| 791 | amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); | ||
| 792 | amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); | ||
| 793 | } | ||
| 794 | |||
| 795 | if (adev->asic_type == CHIP_RAVEN) { | ||
| 796 | /* TODO: double check the cp_table_size for RV */ | ||
| 797 | adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ | ||
| 798 | if (adev->gfx.rlc.cp_table_obj == NULL) { | ||
| 799 | r = amdgpu_bo_create_kernel(adev, adev->gfx.rlc.cp_table_size, | ||
| 800 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, | ||
| 801 | &adev->gfx.rlc.cp_table_obj, | ||
| 802 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
| 803 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
| 804 | if (r) { | ||
| 805 | dev_err(adev->dev, | ||
| 806 | "(%d) failed to create cp table bo\n", r); | ||
| 807 | gfx_v9_0_rlc_fini(adev); | ||
| 808 | return r; | ||
| 809 | } | ||
| 810 | } | ||
| 811 | |||
| 812 | rv_init_cp_jump_table(adev); | ||
| 813 | amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); | ||
| 814 | amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); | ||
| 815 | |||
| 816 | gfx_v9_0_init_lbpw(adev); | ||
| 817 | } | ||
| 818 | |||
| 819 | return 0; | ||
| 820 | } | ||
| 821 | |||
| 450 | static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) | 822 | static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) |
| 451 | { | 823 | { |
| 452 | int r; | 824 | int r; |
| @@ -473,8 +845,6 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) | |||
| 473 | } | 845 | } |
| 474 | } | 846 | } |
| 475 | 847 | ||
| 476 | #define MEC_HPD_SIZE 2048 | ||
| 477 | |||
| 478 | static int gfx_v9_0_mec_init(struct amdgpu_device *adev) | 848 | static int gfx_v9_0_mec_init(struct amdgpu_device *adev) |
| 479 | { | 849 | { |
| 480 | int r; | 850 | int r; |
| @@ -482,20 +852,19 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) | |||
| 482 | const __le32 *fw_data; | 852 | const __le32 *fw_data; |
| 483 | unsigned fw_size; | 853 | unsigned fw_size; |
| 484 | u32 *fw; | 854 | u32 *fw; |
| 855 | size_t mec_hpd_size; | ||
| 485 | 856 | ||
| 486 | const struct gfx_firmware_header_v1_0 *mec_hdr; | 857 | const struct gfx_firmware_header_v1_0 *mec_hdr; |
| 487 | 858 | ||
| 488 | /* | 859 | bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
| 489 | * we assign only 1 pipe because all other pipes will | 860 | |
| 490 | * be handled by KFD | 861 | /* take ownership of the relevant compute queues */ |
| 491 | */ | 862 | amdgpu_gfx_compute_queue_acquire(adev); |
| 492 | adev->gfx.mec.num_mec = 1; | 863 | mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; |
| 493 | adev->gfx.mec.num_pipe = 1; | ||
| 494 | adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; | ||
| 495 | 864 | ||
| 496 | if (adev->gfx.mec.hpd_eop_obj == NULL) { | 865 | if (adev->gfx.mec.hpd_eop_obj == NULL) { |
| 497 | r = amdgpu_bo_create(adev, | 866 | r = amdgpu_bo_create(adev, |
| 498 | adev->gfx.mec.num_queue * MEC_HPD_SIZE, | 867 | mec_hpd_size, |
| 499 | PAGE_SIZE, true, | 868 | PAGE_SIZE, true, |
| 500 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, | 869 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, |
| 501 | &adev->gfx.mec.hpd_eop_obj); | 870 | &adev->gfx.mec.hpd_eop_obj); |
| @@ -575,131 +944,6 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) | |||
| 575 | return 0; | 944 | return 0; |
| 576 | } | 945 | } |
| 577 | 946 | ||
| 578 | static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev) | ||
| 579 | { | ||
| 580 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 581 | |||
| 582 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); | ||
| 583 | } | ||
| 584 | |||
| 585 | static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) | ||
| 586 | { | ||
| 587 | int r; | ||
| 588 | u32 *hpd; | ||
| 589 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 590 | |||
| 591 | r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, | ||
| 592 | AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, | ||
| 593 | &kiq->eop_gpu_addr, (void **)&hpd); | ||
| 594 | if (r) { | ||
| 595 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); | ||
| 596 | return r; | ||
| 597 | } | ||
| 598 | |||
| 599 | memset(hpd, 0, MEC_HPD_SIZE); | ||
| 600 | |||
| 601 | r = amdgpu_bo_reserve(kiq->eop_obj, true); | ||
| 602 | if (unlikely(r != 0)) | ||
| 603 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); | ||
| 604 | amdgpu_bo_kunmap(kiq->eop_obj); | ||
| 605 | amdgpu_bo_unreserve(kiq->eop_obj); | ||
| 606 | |||
| 607 | return 0; | ||
| 608 | } | ||
| 609 | |||
| 610 | static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, | ||
| 611 | struct amdgpu_ring *ring, | ||
| 612 | struct amdgpu_irq_src *irq) | ||
| 613 | { | ||
| 614 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 615 | int r = 0; | ||
| 616 | |||
| 617 | r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); | ||
| 618 | if (r) | ||
| 619 | return r; | ||
| 620 | |||
| 621 | ring->adev = NULL; | ||
| 622 | ring->ring_obj = NULL; | ||
| 623 | ring->use_doorbell = true; | ||
| 624 | ring->doorbell_index = AMDGPU_DOORBELL_KIQ; | ||
| 625 | if (adev->gfx.mec2_fw) { | ||
| 626 | ring->me = 2; | ||
| 627 | ring->pipe = 0; | ||
| 628 | } else { | ||
| 629 | ring->me = 1; | ||
| 630 | ring->pipe = 1; | ||
| 631 | } | ||
| 632 | |||
| 633 | ring->queue = 0; | ||
| 634 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | ||
| 635 | sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 636 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 637 | irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); | ||
| 638 | if (r) | ||
| 639 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); | ||
| 640 | |||
| 641 | return r; | ||
| 642 | } | ||
| 643 | static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, | ||
| 644 | struct amdgpu_irq_src *irq) | ||
| 645 | { | ||
| 646 | amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); | ||
| 647 | amdgpu_ring_fini(ring); | ||
| 648 | } | ||
| 649 | |||
| 650 | /* create MQD for each compute queue */ | ||
| 651 | static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev) | ||
| 652 | { | ||
| 653 | struct amdgpu_ring *ring = NULL; | ||
| 654 | int r, i; | ||
| 655 | |||
| 656 | /* create MQD for KIQ */ | ||
| 657 | ring = &adev->gfx.kiq.ring; | ||
| 658 | if (!ring->mqd_obj) { | ||
| 659 | r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, | ||
| 660 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | ||
| 661 | &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); | ||
| 662 | if (r) { | ||
| 663 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | ||
| 664 | return r; | ||
| 665 | } | ||
| 666 | |||
| 667 | /*TODO: prepare MQD backup */ | ||
| 668 | } | ||
| 669 | |||
| 670 | /* create MQD for each KCQ */ | ||
| 671 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 672 | ring = &adev->gfx.compute_ring[i]; | ||
| 673 | if (!ring->mqd_obj) { | ||
| 674 | r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, | ||
| 675 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | ||
| 676 | &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); | ||
| 677 | if (r) { | ||
| 678 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | ||
| 679 | return r; | ||
| 680 | } | ||
| 681 | |||
| 682 | /* TODO: prepare MQD backup */ | ||
| 683 | } | ||
| 684 | } | ||
| 685 | |||
| 686 | return 0; | ||
| 687 | } | ||
| 688 | |||
| 689 | static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev) | ||
| 690 | { | ||
| 691 | struct amdgpu_ring *ring = NULL; | ||
| 692 | int i; | ||
| 693 | |||
| 694 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 695 | ring = &adev->gfx.compute_ring[i]; | ||
| 696 | amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); | ||
| 697 | } | ||
| 698 | |||
| 699 | ring = &adev->gfx.kiq.ring; | ||
| 700 | amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); | ||
| 701 | } | ||
| 702 | |||
| 703 | static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) | 947 | static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) |
| 704 | { | 948 | { |
| 705 | WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, | 949 | WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, |
| @@ -770,23 +1014,21 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 770 | 1014 | ||
| 771 | switch (adev->asic_type) { | 1015 | switch (adev->asic_type) { |
| 772 | case CHIP_VEGA10: | 1016 | case CHIP_VEGA10: |
| 773 | adev->gfx.config.max_shader_engines = 4; | ||
| 774 | adev->gfx.config.max_cu_per_sh = 16; | ||
| 775 | adev->gfx.config.max_sh_per_se = 1; | ||
| 776 | adev->gfx.config.max_backends_per_se = 4; | ||
| 777 | adev->gfx.config.max_texture_channel_caches = 16; | ||
| 778 | adev->gfx.config.max_gprs = 256; | ||
| 779 | adev->gfx.config.max_gs_threads = 32; | ||
| 780 | adev->gfx.config.max_hw_contexts = 8; | 1017 | adev->gfx.config.max_hw_contexts = 8; |
| 781 | |||
| 782 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | 1018 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; |
| 783 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | 1019 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; |
| 784 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | 1020 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; |
| 785 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; | 1021 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; |
| 786 | adev->gfx.config.gs_vgt_table_depth = 32; | ||
| 787 | adev->gfx.config.gs_prim_buffer_depth = 1792; | ||
| 788 | gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; | 1022 | gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; |
| 789 | break; | 1023 | break; |
| 1024 | case CHIP_RAVEN: | ||
| 1025 | adev->gfx.config.max_hw_contexts = 8; | ||
| 1026 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
| 1027 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
| 1028 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
| 1029 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; | ||
| 1030 | gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; | ||
| 1031 | break; | ||
| 790 | default: | 1032 | default: |
| 791 | BUG(); | 1033 | BUG(); |
| 792 | break; | 1034 | break; |
| @@ -1023,13 +1265,61 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) | |||
| 1023 | return 0; | 1265 | return 0; |
| 1024 | } | 1266 | } |
| 1025 | 1267 | ||
| 1268 | static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, | ||
| 1269 | int mec, int pipe, int queue) | ||
| 1270 | { | ||
| 1271 | int r; | ||
| 1272 | unsigned irq_type; | ||
| 1273 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; | ||
| 1274 | |||
| 1275 | ring = &adev->gfx.compute_ring[ring_id]; | ||
| 1276 | |||
| 1277 | /* mec0 is me1 */ | ||
| 1278 | ring->me = mec + 1; | ||
| 1279 | ring->pipe = pipe; | ||
| 1280 | ring->queue = queue; | ||
| 1281 | |||
| 1282 | ring->ring_obj = NULL; | ||
| 1283 | ring->use_doorbell = true; | ||
| 1284 | ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1; | ||
| 1285 | ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr | ||
| 1286 | + (ring_id * GFX9_MEC_HPD_SIZE); | ||
| 1287 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 1288 | |||
| 1289 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP | ||
| 1290 | + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) | ||
| 1291 | + ring->pipe; | ||
| 1292 | |||
| 1293 | /* type-2 packets are deprecated on MEC, use type-3 instead */ | ||
| 1294 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 1295 | &adev->gfx.eop_irq, irq_type); | ||
| 1296 | if (r) | ||
| 1297 | return r; | ||
| 1298 | |||
| 1299 | |||
| 1300 | return 0; | ||
| 1301 | } | ||
| 1302 | |||
| 1026 | static int gfx_v9_0_sw_init(void *handle) | 1303 | static int gfx_v9_0_sw_init(void *handle) |
| 1027 | { | 1304 | { |
| 1028 | int i, r; | 1305 | int i, j, k, r, ring_id; |
| 1029 | struct amdgpu_ring *ring; | 1306 | struct amdgpu_ring *ring; |
| 1030 | struct amdgpu_kiq *kiq; | 1307 | struct amdgpu_kiq *kiq; |
| 1031 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1308 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1032 | 1309 | ||
| 1310 | switch (adev->asic_type) { | ||
| 1311 | case CHIP_VEGA10: | ||
| 1312 | case CHIP_RAVEN: | ||
| 1313 | adev->gfx.mec.num_mec = 2; | ||
| 1314 | break; | ||
| 1315 | default: | ||
| 1316 | adev->gfx.mec.num_mec = 1; | ||
| 1317 | break; | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | adev->gfx.mec.num_pipe_per_mec = 4; | ||
| 1321 | adev->gfx.mec.num_queue_per_pipe = 8; | ||
| 1322 | |||
| 1033 | /* KIQ event */ | 1323 | /* KIQ event */ |
| 1034 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); | 1324 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); |
| 1035 | if (r) | 1325 | if (r) |
| @@ -1062,6 +1352,12 @@ static int gfx_v9_0_sw_init(void *handle) | |||
| 1062 | return r; | 1352 | return r; |
| 1063 | } | 1353 | } |
| 1064 | 1354 | ||
| 1355 | r = gfx_v9_0_rlc_init(adev); | ||
| 1356 | if (r) { | ||
| 1357 | DRM_ERROR("Failed to init rlc BOs!\n"); | ||
| 1358 | return r; | ||
| 1359 | } | ||
| 1360 | |||
| 1065 | r = gfx_v9_0_mec_init(adev); | 1361 | r = gfx_v9_0_mec_init(adev); |
| 1066 | if (r) { | 1362 | if (r) { |
| 1067 | DRM_ERROR("Failed to init MEC BOs!\n"); | 1363 | DRM_ERROR("Failed to init MEC BOs!\n"); |
| @@ -1081,49 +1377,40 @@ static int gfx_v9_0_sw_init(void *handle) | |||
| 1081 | return r; | 1377 | return r; |
| 1082 | } | 1378 | } |
| 1083 | 1379 | ||
| 1084 | /* set up the compute queues */ | 1380 | /* set up the compute queues - allocate horizontally across pipes */ |
| 1085 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 1381 | ring_id = 0; |
| 1086 | unsigned irq_type; | 1382 | for (i = 0; i < adev->gfx.mec.num_mec; ++i) { |
| 1087 | 1383 | for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { | |
| 1088 | /* max 32 queues per MEC */ | 1384 | for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { |
| 1089 | if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { | 1385 | if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) |
| 1090 | DRM_ERROR("Too many (%d) compute rings!\n", i); | 1386 | continue; |
| 1091 | break; | 1387 | |
| 1388 | r = gfx_v9_0_compute_ring_init(adev, | ||
| 1389 | ring_id, | ||
| 1390 | i, k, j); | ||
| 1391 | if (r) | ||
| 1392 | return r; | ||
| 1393 | |||
| 1394 | ring_id++; | ||
| 1395 | } | ||
| 1092 | } | 1396 | } |
| 1093 | ring = &adev->gfx.compute_ring[i]; | ||
| 1094 | ring->ring_obj = NULL; | ||
| 1095 | ring->use_doorbell = true; | ||
| 1096 | ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; | ||
| 1097 | ring->me = 1; /* first MEC */ | ||
| 1098 | ring->pipe = i / 8; | ||
| 1099 | ring->queue = i % 8; | ||
| 1100 | ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); | ||
| 1101 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); | ||
| 1102 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; | ||
| 1103 | /* type-2 packets are deprecated on MEC, use type-3 instead */ | ||
| 1104 | r = amdgpu_ring_init(adev, ring, 1024, | ||
| 1105 | &adev->gfx.eop_irq, irq_type); | ||
| 1106 | if (r) | ||
| 1107 | return r; | ||
| 1108 | } | 1397 | } |
| 1109 | 1398 | ||
| 1110 | if (amdgpu_sriov_vf(adev)) { | 1399 | r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); |
| 1111 | r = gfx_v9_0_kiq_init(adev); | 1400 | if (r) { |
| 1112 | if (r) { | 1401 | DRM_ERROR("Failed to init KIQ BOs!\n"); |
| 1113 | DRM_ERROR("Failed to init KIQ BOs!\n"); | 1402 | return r; |
| 1114 | return r; | 1403 | } |
| 1115 | } | ||
| 1116 | 1404 | ||
| 1117 | kiq = &adev->gfx.kiq; | 1405 | kiq = &adev->gfx.kiq; |
| 1118 | r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); | 1406 | r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); |
| 1119 | if (r) | 1407 | if (r) |
| 1120 | return r; | 1408 | return r; |
| 1121 | 1409 | ||
| 1122 | /* create MQD for all compute queues as wel as KIQ for SRIOV case */ | 1410 | /* create MQD for all compute queues as wel as KIQ for SRIOV case */ |
| 1123 | r = gfx_v9_0_compute_mqd_sw_init(adev); | 1411 | r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd)); |
| 1124 | if (r) | 1412 | if (r) |
| 1125 | return r; | 1413 | return r; |
| 1126 | } | ||
| 1127 | 1414 | ||
| 1128 | /* reserve GDS, GWS and OA resource for gfx */ | 1415 | /* reserve GDS, GWS and OA resource for gfx */ |
| 1129 | r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, | 1416 | r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, |
| @@ -1170,11 +1457,9 @@ static int gfx_v9_0_sw_fini(void *handle) | |||
| 1170 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 1457 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
| 1171 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); | 1458 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
| 1172 | 1459 | ||
| 1173 | if (amdgpu_sriov_vf(adev)) { | 1460 | amdgpu_gfx_compute_mqd_sw_fini(adev); |
| 1174 | gfx_v9_0_compute_mqd_sw_fini(adev); | 1461 | amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); |
| 1175 | gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); | 1462 | amdgpu_gfx_kiq_fini(adev); |
| 1176 | gfx_v9_0_kiq_fini(adev); | ||
| 1177 | } | ||
| 1178 | 1463 | ||
| 1179 | gfx_v9_0_mec_fini(adev); | 1464 | gfx_v9_0_mec_fini(adev); |
| 1180 | gfx_v9_0_ngg_fini(adev); | 1465 | gfx_v9_0_ngg_fini(adev); |
| @@ -1208,11 +1493,6 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh | |||
| 1208 | WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); | 1493 | WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); |
| 1209 | } | 1494 | } |
| 1210 | 1495 | ||
| 1211 | static u32 gfx_v9_0_create_bitmask(u32 bit_width) | ||
| 1212 | { | ||
| 1213 | return (u32)((1ULL << bit_width) - 1); | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) | 1496 | static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) |
| 1217 | { | 1497 | { |
| 1218 | u32 data, mask; | 1498 | u32 data, mask; |
| @@ -1223,8 +1503,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) | |||
| 1223 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; | 1503 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; |
| 1224 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; | 1504 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; |
| 1225 | 1505 | ||
| 1226 | mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / | 1506 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / |
| 1227 | adev->gfx.config.max_sh_per_se); | 1507 | adev->gfx.config.max_sh_per_se); |
| 1228 | 1508 | ||
| 1229 | return (~data) & mask; | 1509 | return (~data) & mask; |
| 1230 | } | 1510 | } |
| @@ -1272,7 +1552,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) | |||
| 1272 | 1552 | ||
| 1273 | sh_mem_config = SH_MEM_ADDRESS_MODE_64 | | 1553 | sh_mem_config = SH_MEM_ADDRESS_MODE_64 | |
| 1274 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << | 1554 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << |
| 1275 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; | 1555 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; |
| 1276 | 1556 | ||
| 1277 | mutex_lock(&adev->srbm_mutex); | 1557 | mutex_lock(&adev->srbm_mutex); |
| 1278 | for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { | 1558 | for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { |
| @@ -1370,9 +1650,6 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, | |||
| 1370 | { | 1650 | { |
| 1371 | u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); | 1651 | u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); |
| 1372 | 1652 | ||
| 1373 | if (enable) | ||
| 1374 | return; | ||
| 1375 | |||
| 1376 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); | 1653 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); |
| 1377 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); | 1654 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); |
| 1378 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); | 1655 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); |
| @@ -1381,6 +1658,373 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, | |||
| 1381 | WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); | 1658 | WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); |
| 1382 | } | 1659 | } |
| 1383 | 1660 | ||
| 1661 | static void gfx_v9_0_init_csb(struct amdgpu_device *adev) | ||
| 1662 | { | ||
| 1663 | /* csib */ | ||
| 1664 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), | ||
| 1665 | adev->gfx.rlc.clear_state_gpu_addr >> 32); | ||
| 1666 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), | ||
| 1667 | adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); | ||
| 1668 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), | ||
| 1669 | adev->gfx.rlc.clear_state_size); | ||
| 1670 | } | ||
| 1671 | |||
| 1672 | static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, | ||
| 1673 | int indirect_offset, | ||
| 1674 | int list_size, | ||
| 1675 | int *unique_indirect_regs, | ||
| 1676 | int *unique_indirect_reg_count, | ||
| 1677 | int max_indirect_reg_count, | ||
| 1678 | int *indirect_start_offsets, | ||
| 1679 | int *indirect_start_offsets_count, | ||
| 1680 | int max_indirect_start_offsets_count) | ||
| 1681 | { | ||
| 1682 | int idx; | ||
| 1683 | bool new_entry = true; | ||
| 1684 | |||
| 1685 | for (; indirect_offset < list_size; indirect_offset++) { | ||
| 1686 | |||
| 1687 | if (new_entry) { | ||
| 1688 | new_entry = false; | ||
| 1689 | indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; | ||
| 1690 | *indirect_start_offsets_count = *indirect_start_offsets_count + 1; | ||
| 1691 | BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); | ||
| 1692 | } | ||
| 1693 | |||
| 1694 | if (register_list_format[indirect_offset] == 0xFFFFFFFF) { | ||
| 1695 | new_entry = true; | ||
| 1696 | continue; | ||
| 1697 | } | ||
| 1698 | |||
| 1699 | indirect_offset += 2; | ||
| 1700 | |||
| 1701 | /* look for the matching indice */ | ||
| 1702 | for (idx = 0; idx < *unique_indirect_reg_count; idx++) { | ||
| 1703 | if (unique_indirect_regs[idx] == | ||
| 1704 | register_list_format[indirect_offset]) | ||
| 1705 | break; | ||
| 1706 | } | ||
| 1707 | |||
| 1708 | if (idx >= *unique_indirect_reg_count) { | ||
| 1709 | unique_indirect_regs[*unique_indirect_reg_count] = | ||
| 1710 | register_list_format[indirect_offset]; | ||
| 1711 | idx = *unique_indirect_reg_count; | ||
| 1712 | *unique_indirect_reg_count = *unique_indirect_reg_count + 1; | ||
| 1713 | BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | register_list_format[indirect_offset] = idx; | ||
| 1717 | } | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | ||
| 1721 | { | ||
| 1722 | int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; | ||
| 1723 | int unique_indirect_reg_count = 0; | ||
| 1724 | |||
| 1725 | int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; | ||
| 1726 | int indirect_start_offsets_count = 0; | ||
| 1727 | |||
| 1728 | int list_size = 0; | ||
| 1729 | int i = 0; | ||
| 1730 | u32 tmp = 0; | ||
| 1731 | |||
| 1732 | u32 *register_list_format = | ||
| 1733 | kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); | ||
| 1734 | if (!register_list_format) | ||
| 1735 | return -ENOMEM; | ||
| 1736 | memcpy(register_list_format, adev->gfx.rlc.register_list_format, | ||
| 1737 | adev->gfx.rlc.reg_list_format_size_bytes); | ||
| 1738 | |||
| 1739 | /* setup unique_indirect_regs array and indirect_start_offsets array */ | ||
| 1740 | gfx_v9_0_parse_ind_reg_list(register_list_format, | ||
| 1741 | GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, | ||
| 1742 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, | ||
| 1743 | unique_indirect_regs, | ||
| 1744 | &unique_indirect_reg_count, | ||
| 1745 | sizeof(unique_indirect_regs)/sizeof(int), | ||
| 1746 | indirect_start_offsets, | ||
| 1747 | &indirect_start_offsets_count, | ||
| 1748 | sizeof(indirect_start_offsets)/sizeof(int)); | ||
| 1749 | |||
| 1750 | /* enable auto inc in case it is disabled */ | ||
| 1751 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); | ||
| 1752 | tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; | ||
| 1753 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); | ||
| 1754 | |||
| 1755 | /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ | ||
| 1756 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), | ||
| 1757 | RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); | ||
| 1758 | for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) | ||
| 1759 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), | ||
| 1760 | adev->gfx.rlc.register_restore[i]); | ||
| 1761 | |||
| 1762 | /* load direct register */ | ||
| 1763 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); | ||
| 1764 | for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) | ||
| 1765 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), | ||
| 1766 | adev->gfx.rlc.register_restore[i]); | ||
| 1767 | |||
| 1768 | /* load indirect register */ | ||
| 1769 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), | ||
| 1770 | adev->gfx.rlc.reg_list_format_start); | ||
| 1771 | for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) | ||
| 1772 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), | ||
| 1773 | register_list_format[i]); | ||
| 1774 | |||
| 1775 | /* set save/restore list size */ | ||
| 1776 | list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; | ||
| 1777 | list_size = list_size >> 1; | ||
| 1778 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), | ||
| 1779 | adev->gfx.rlc.reg_restore_list_size); | ||
| 1780 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); | ||
| 1781 | |||
| 1782 | /* write the starting offsets to RLC scratch ram */ | ||
| 1783 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), | ||
| 1784 | adev->gfx.rlc.starting_offsets_start); | ||
| 1785 | for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) | ||
| 1786 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), | ||
| 1787 | indirect_start_offsets[i]); | ||
| 1788 | |||
| 1789 | /* load unique indirect regs*/ | ||
| 1790 | for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { | ||
| 1791 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, | ||
| 1792 | unique_indirect_regs[i] & 0x3FFFF); | ||
| 1793 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, | ||
| 1794 | unique_indirect_regs[i] >> 20); | ||
| 1795 | } | ||
| 1796 | |||
| 1797 | kfree(register_list_format); | ||
| 1798 | return 0; | ||
| 1799 | } | ||
| 1800 | |||
| 1801 | static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) | ||
| 1802 | { | ||
| 1803 | u32 tmp = 0; | ||
| 1804 | |||
| 1805 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); | ||
| 1806 | tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; | ||
| 1807 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); | ||
| 1808 | } | ||
| 1809 | |||
| 1810 | static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, | ||
| 1811 | bool enable) | ||
| 1812 | { | ||
| 1813 | uint32_t data = 0; | ||
| 1814 | uint32_t default_data = 0; | ||
| 1815 | |||
| 1816 | default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); | ||
| 1817 | if (enable == true) { | ||
| 1818 | /* enable GFXIP control over CGPG */ | ||
| 1819 | data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; | ||
| 1820 | if(default_data != data) | ||
| 1821 | WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); | ||
| 1822 | |||
| 1823 | /* update status */ | ||
| 1824 | data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; | ||
| 1825 | data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); | ||
| 1826 | if(default_data != data) | ||
| 1827 | WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); | ||
| 1828 | } else { | ||
| 1829 | /* restore GFXIP control over GCPG */ | ||
| 1830 | data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; | ||
| 1831 | if(default_data != data) | ||
| 1832 | WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); | ||
| 1833 | } | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) | ||
| 1837 | { | ||
| 1838 | uint32_t data = 0; | ||
| 1839 | |||
| 1840 | if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | | ||
| 1841 | AMD_PG_SUPPORT_GFX_SMG | | ||
| 1842 | AMD_PG_SUPPORT_GFX_DMG)) { | ||
| 1843 | /* init IDLE_POLL_COUNT = 60 */ | ||
| 1844 | data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); | ||
| 1845 | data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; | ||
| 1846 | data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); | ||
| 1847 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); | ||
| 1848 | |||
| 1849 | /* init RLC PG Delay */ | ||
| 1850 | data = 0; | ||
| 1851 | data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); | ||
| 1852 | data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); | ||
| 1853 | data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); | ||
| 1854 | data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); | ||
| 1855 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); | ||
| 1856 | |||
| 1857 | data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); | ||
| 1858 | data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; | ||
| 1859 | data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); | ||
| 1860 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); | ||
| 1861 | |||
| 1862 | data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); | ||
| 1863 | data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; | ||
| 1864 | data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); | ||
| 1865 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); | ||
| 1866 | |||
| 1867 | data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); | ||
| 1868 | data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; | ||
| 1869 | |||
| 1870 | /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ | ||
| 1871 | data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); | ||
| 1872 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); | ||
| 1873 | |||
| 1874 | pwr_10_0_gfxip_control_over_cgpg(adev, true); | ||
| 1875 | } | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, | ||
| 1879 | bool enable) | ||
| 1880 | { | ||
| 1881 | uint32_t data = 0; | ||
| 1882 | uint32_t default_data = 0; | ||
| 1883 | |||
| 1884 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1885 | |||
| 1886 | if (enable == true) { | ||
| 1887 | data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; | ||
| 1888 | if (default_data != data) | ||
| 1889 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1890 | } else { | ||
| 1891 | data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; | ||
| 1892 | if(default_data != data) | ||
| 1893 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1894 | } | ||
| 1895 | } | ||
| 1896 | |||
| 1897 | static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, | ||
| 1898 | bool enable) | ||
| 1899 | { | ||
| 1900 | uint32_t data = 0; | ||
| 1901 | uint32_t default_data = 0; | ||
| 1902 | |||
| 1903 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1904 | |||
| 1905 | if (enable == true) { | ||
| 1906 | data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; | ||
| 1907 | if(default_data != data) | ||
| 1908 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1909 | } else { | ||
| 1910 | data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; | ||
| 1911 | if(default_data != data) | ||
| 1912 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1913 | } | ||
| 1914 | } | ||
| 1915 | |||
| 1916 | static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, | ||
| 1917 | bool enable) | ||
| 1918 | { | ||
| 1919 | uint32_t data = 0; | ||
| 1920 | uint32_t default_data = 0; | ||
| 1921 | |||
| 1922 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1923 | |||
| 1924 | if (enable == true) { | ||
| 1925 | data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; | ||
| 1926 | if(default_data != data) | ||
| 1927 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1928 | } else { | ||
| 1929 | data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; | ||
| 1930 | if(default_data != data) | ||
| 1931 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1932 | } | ||
| 1933 | } | ||
| 1934 | |||
| 1935 | static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, | ||
| 1936 | bool enable) | ||
| 1937 | { | ||
| 1938 | uint32_t data, default_data; | ||
| 1939 | |||
| 1940 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1941 | if (enable == true) | ||
| 1942 | data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; | ||
| 1943 | else | ||
| 1944 | data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; | ||
| 1945 | if(default_data != data) | ||
| 1946 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, | ||
| 1950 | bool enable) | ||
| 1951 | { | ||
| 1952 | uint32_t data, default_data; | ||
| 1953 | |||
| 1954 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1955 | if (enable == true) | ||
| 1956 | data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; | ||
| 1957 | else | ||
| 1958 | data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; | ||
| 1959 | if(default_data != data) | ||
| 1960 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1961 | |||
| 1962 | if (!enable) | ||
| 1963 | /* read any GFX register to wake up GFX */ | ||
| 1964 | data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, | ||
| 1968 | bool enable) | ||
| 1969 | { | ||
| 1970 | uint32_t data, default_data; | ||
| 1971 | |||
| 1972 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1973 | if (enable == true) | ||
| 1974 | data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; | ||
| 1975 | else | ||
| 1976 | data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; | ||
| 1977 | if(default_data != data) | ||
| 1978 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, | ||
| 1982 | bool enable) | ||
| 1983 | { | ||
| 1984 | uint32_t data, default_data; | ||
| 1985 | |||
| 1986 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | ||
| 1987 | if (enable == true) | ||
| 1988 | data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; | ||
| 1989 | else | ||
| 1990 | data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; | ||
| 1991 | if(default_data != data) | ||
| 1992 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
| 1993 | } | ||
| 1994 | |||
| 1995 | static void gfx_v9_0_init_pg(struct amdgpu_device *adev) | ||
| 1996 | { | ||
| 1997 | if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | | ||
| 1998 | AMD_PG_SUPPORT_GFX_SMG | | ||
| 1999 | AMD_PG_SUPPORT_GFX_DMG | | ||
| 2000 | AMD_PG_SUPPORT_CP | | ||
| 2001 | AMD_PG_SUPPORT_GDS | | ||
| 2002 | AMD_PG_SUPPORT_RLC_SMU_HS)) { | ||
| 2003 | gfx_v9_0_init_csb(adev); | ||
| 2004 | gfx_v9_0_init_rlc_save_restore_list(adev); | ||
| 2005 | gfx_v9_0_enable_save_restore_machine(adev); | ||
| 2006 | |||
| 2007 | if (adev->asic_type == CHIP_RAVEN) { | ||
| 2008 | WREG32(mmRLC_JUMP_TABLE_RESTORE, | ||
| 2009 | adev->gfx.rlc.cp_table_gpu_addr >> 8); | ||
| 2010 | gfx_v9_0_init_gfx_power_gating(adev); | ||
| 2011 | |||
| 2012 | if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { | ||
| 2013 | gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); | ||
| 2014 | gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); | ||
| 2015 | } else { | ||
| 2016 | gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); | ||
| 2017 | gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); | ||
| 2018 | } | ||
| 2019 | |||
| 2020 | if (adev->pg_flags & AMD_PG_SUPPORT_CP) | ||
| 2021 | gfx_v9_0_enable_cp_power_gating(adev, true); | ||
| 2022 | else | ||
| 2023 | gfx_v9_0_enable_cp_power_gating(adev, false); | ||
| 2024 | } | ||
| 2025 | } | ||
| 2026 | } | ||
| 2027 | |||
| 1384 | void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) | 2028 | void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) |
| 1385 | { | 2029 | { |
| 1386 | u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); | 2030 | u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); |
| @@ -1425,7 +2069,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) | |||
| 1425 | * default is 0x9C4 to create a 100us interval */ | 2069 | * default is 0x9C4 to create a 100us interval */ |
| 1426 | WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); | 2070 | WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); |
| 1427 | /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr | 2071 | /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr |
| 1428 | * to disable the page fault retry interrupts, default is | 2072 | * to disable the page fault retry interrupts, default is |
| 1429 | * 0x100 (256) */ | 2073 | * 0x100 (256) */ |
| 1430 | WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); | 2074 | WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); |
| 1431 | } | 2075 | } |
| @@ -1474,6 +2118,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) | |||
| 1474 | 2118 | ||
| 1475 | gfx_v9_0_rlc_reset(adev); | 2119 | gfx_v9_0_rlc_reset(adev); |
| 1476 | 2120 | ||
| 2121 | gfx_v9_0_init_pg(adev); | ||
| 2122 | |||
| 1477 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { | 2123 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { |
| 1478 | /* legacy rlc firmware loading */ | 2124 | /* legacy rlc firmware loading */ |
| 1479 | r = gfx_v9_0_rlc_load_microcode(adev); | 2125 | r = gfx_v9_0_rlc_load_microcode(adev); |
| @@ -1481,6 +2127,13 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) | |||
| 1481 | return r; | 2127 | return r; |
| 1482 | } | 2128 | } |
| 1483 | 2129 | ||
| 2130 | if (adev->asic_type == CHIP_RAVEN) { | ||
| 2131 | if (amdgpu_lbpw != 0) | ||
| 2132 | gfx_v9_0_enable_lbpw(adev, true); | ||
| 2133 | else | ||
| 2134 | gfx_v9_0_enable_lbpw(adev, false); | ||
| 2135 | } | ||
| 2136 | |||
| 1484 | gfx_v9_0_rlc_start(adev); | 2137 | gfx_v9_0_rlc_start(adev); |
| 1485 | 2138 | ||
| 1486 | return 0; | 2139 | return 0; |
| @@ -1559,35 +2212,6 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) | |||
| 1559 | return 0; | 2212 | return 0; |
| 1560 | } | 2213 | } |
| 1561 | 2214 | ||
| 1562 | static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) | ||
| 1563 | { | ||
| 1564 | u32 count = 0; | ||
| 1565 | const struct cs_section_def *sect = NULL; | ||
| 1566 | const struct cs_extent_def *ext = NULL; | ||
| 1567 | |||
| 1568 | /* begin clear state */ | ||
| 1569 | count += 2; | ||
| 1570 | /* context control state */ | ||
| 1571 | count += 3; | ||
| 1572 | |||
| 1573 | for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { | ||
| 1574 | for (ext = sect->section; ext->extent != NULL; ++ext) { | ||
| 1575 | if (sect->id == SECT_CONTEXT) | ||
| 1576 | count += 2 + ext->reg_count; | ||
| 1577 | else | ||
| 1578 | return 0; | ||
| 1579 | } | ||
| 1580 | } | ||
| 1581 | /* pa_sc_raster_config/pa_sc_raster_config1 */ | ||
| 1582 | count += 4; | ||
| 1583 | /* end clear state */ | ||
| 1584 | count += 2; | ||
| 1585 | /* clear state */ | ||
| 1586 | count += 2; | ||
| 1587 | |||
| 1588 | return count; | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | 2215 | static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) |
| 1592 | { | 2216 | { |
| 1593 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; | 2217 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; |
| @@ -1730,13 +2354,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) | |||
| 1730 | udelay(50); | 2354 | udelay(50); |
| 1731 | } | 2355 | } |
| 1732 | 2356 | ||
| 1733 | static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) | ||
| 1734 | { | ||
| 1735 | gfx_v9_0_cp_compute_enable(adev, true); | ||
| 1736 | |||
| 1737 | return 0; | ||
| 1738 | } | ||
| 1739 | |||
| 1740 | static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) | 2357 | static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) |
| 1741 | { | 2358 | { |
| 1742 | const struct gfx_firmware_header_v1_0 *mec_hdr; | 2359 | const struct gfx_firmware_header_v1_0 *mec_hdr; |
| @@ -1764,7 +2381,7 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) | |||
| 1764 | adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); | 2381 | adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); |
| 1765 | WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, | 2382 | WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, |
| 1766 | upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); | 2383 | upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); |
| 1767 | 2384 | ||
| 1768 | /* MEC1 */ | 2385 | /* MEC1 */ |
| 1769 | WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, | 2386 | WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, |
| 1770 | mec_hdr->jt_offset); | 2387 | mec_hdr->jt_offset); |
| @@ -1779,45 +2396,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) | |||
| 1779 | return 0; | 2396 | return 0; |
| 1780 | } | 2397 | } |
| 1781 | 2398 | ||
| 1782 | static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) | ||
| 1783 | { | ||
| 1784 | int i, r; | ||
| 1785 | |||
| 1786 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 1787 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | ||
| 1788 | |||
| 1789 | if (ring->mqd_obj) { | ||
| 1790 | r = amdgpu_bo_reserve(ring->mqd_obj, true); | ||
| 1791 | if (unlikely(r != 0)) | ||
| 1792 | dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); | ||
| 1793 | |||
| 1794 | amdgpu_bo_unpin(ring->mqd_obj); | ||
| 1795 | amdgpu_bo_unreserve(ring->mqd_obj); | ||
| 1796 | |||
| 1797 | amdgpu_bo_unref(&ring->mqd_obj); | ||
| 1798 | ring->mqd_obj = NULL; | ||
| 1799 | } | ||
| 1800 | } | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); | ||
| 1804 | |||
| 1805 | static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) | ||
| 1806 | { | ||
| 1807 | int i, r; | ||
| 1808 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 1809 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | ||
| 1810 | if (gfx_v9_0_init_queue(ring)) | ||
| 1811 | dev_warn(adev->dev, "compute queue %d init failed!\n", i); | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | r = gfx_v9_0_cp_compute_start(adev); | ||
| 1815 | if (r) | ||
| 1816 | return r; | ||
| 1817 | |||
| 1818 | return 0; | ||
| 1819 | } | ||
| 1820 | |||
| 1821 | /* KIQ functions */ | 2399 | /* KIQ functions */ |
| 1822 | static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) | 2400 | static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) |
| 1823 | { | 2401 | { |
| @@ -1833,51 +2411,95 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) | |||
| 1833 | WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); | 2411 | WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); |
| 1834 | } | 2412 | } |
| 1835 | 2413 | ||
| 1836 | static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) | 2414 | static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) |
| 1837 | { | 2415 | { |
| 1838 | amdgpu_ring_alloc(ring, 8); | 2416 | struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; |
| 1839 | /* set resources */ | 2417 | uint32_t scratch, tmp = 0; |
| 1840 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); | 2418 | uint64_t queue_mask = 0; |
| 1841 | amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ | 2419 | int r, i; |
| 1842 | amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ | 2420 | |
| 1843 | amdgpu_ring_write(ring, 0); /* queue mask hi */ | 2421 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { |
| 1844 | amdgpu_ring_write(ring, 0); /* gws mask lo */ | 2422 | if (!test_bit(i, adev->gfx.mec.queue_bitmap)) |
| 1845 | amdgpu_ring_write(ring, 0); /* gws mask hi */ | 2423 | continue; |
| 1846 | amdgpu_ring_write(ring, 0); /* oac mask */ | 2424 | |
| 1847 | amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ | 2425 | /* This situation may be hit in the future if a new HW |
| 1848 | amdgpu_ring_commit(ring); | 2426 | * generation exposes more than 64 queues. If so, the |
| 1849 | udelay(50); | 2427 | * definition of queue_mask needs updating */ |
| 1850 | } | 2428 | if (WARN_ON(i > (sizeof(queue_mask)*8))) { |
| 2429 | DRM_ERROR("Invalid KCQ enabled: %d\n", i); | ||
| 2430 | break; | ||
| 2431 | } | ||
| 1851 | 2432 | ||
| 1852 | static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, | 2433 | queue_mask |= (1ull << i); |
| 1853 | struct amdgpu_ring *ring) | 2434 | } |
| 1854 | { | 2435 | |
| 1855 | struct amdgpu_device *adev = kiq_ring->adev; | 2436 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
| 1856 | uint64_t mqd_addr, wptr_addr; | 2437 | if (r) { |
| 1857 | 2438 | DRM_ERROR("Failed to get scratch reg (%d).\n", r); | |
| 1858 | mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); | 2439 | return r; |
| 1859 | wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | 2440 | } |
| 1860 | amdgpu_ring_alloc(kiq_ring, 8); | 2441 | WREG32(scratch, 0xCAFEDEAD); |
| 1861 | 2442 | ||
| 1862 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); | 2443 | r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11); |
| 1863 | /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ | 2444 | if (r) { |
| 1864 | amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ | 2445 | DRM_ERROR("Failed to lock KIQ (%d).\n", r); |
| 1865 | (0 << 4) | /* Queue_Sel */ | 2446 | amdgpu_gfx_scratch_free(adev, scratch); |
| 1866 | (0 << 8) | /* VMID */ | 2447 | return r; |
| 1867 | (ring->queue << 13 ) | | 2448 | } |
| 1868 | (ring->pipe << 16) | | 2449 | |
| 1869 | ((ring->me == 1 ? 0 : 1) << 18) | | 2450 | /* set resources */ |
| 1870 | (0 << 21) | /*queue_type: normal compute queue */ | 2451 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); |
| 1871 | (1 << 24) | /* alloc format: all_on_one_pipe */ | 2452 | amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | |
| 1872 | (0 << 26) | /* engine_sel: compute */ | 2453 | PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ |
| 1873 | (1 << 29)); /* num_queues: must be 1 */ | 2454 | amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ |
| 1874 | amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2)); | 2455 | amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ |
| 1875 | amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); | 2456 | amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ |
| 1876 | amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); | 2457 | amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ |
| 1877 | amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); | 2458 | amdgpu_ring_write(kiq_ring, 0); /* oac mask */ |
| 1878 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); | 2459 | amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ |
| 2460 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
| 2461 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | ||
| 2462 | uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); | ||
| 2463 | uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
| 2464 | |||
| 2465 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); | ||
| 2466 | /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ | ||
| 2467 | amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ | ||
| 2468 | PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ | ||
| 2469 | PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ | ||
| 2470 | PACKET3_MAP_QUEUES_QUEUE(ring->queue) | | ||
| 2471 | PACKET3_MAP_QUEUES_PIPE(ring->pipe) | | ||
| 2472 | PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | | ||
| 2473 | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ | ||
| 2474 | PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ | ||
| 2475 | PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ | ||
| 2476 | PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ | ||
| 2477 | amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); | ||
| 2478 | amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); | ||
| 2479 | amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); | ||
| 2480 | amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); | ||
| 2481 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); | ||
| 2482 | } | ||
| 2483 | /* write to scratch for completion */ | ||
| 2484 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | ||
| 2485 | amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | ||
| 2486 | amdgpu_ring_write(kiq_ring, 0xDEADBEEF); | ||
| 1879 | amdgpu_ring_commit(kiq_ring); | 2487 | amdgpu_ring_commit(kiq_ring); |
| 1880 | udelay(50); | 2488 | |
| 2489 | for (i = 0; i < adev->usec_timeout; i++) { | ||
| 2490 | tmp = RREG32(scratch); | ||
| 2491 | if (tmp == 0xDEADBEEF) | ||
| 2492 | break; | ||
| 2493 | DRM_UDELAY(1); | ||
| 2494 | } | ||
| 2495 | if (i >= adev->usec_timeout) { | ||
| 2496 | DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", | ||
| 2497 | scratch, tmp); | ||
| 2498 | r = -EINVAL; | ||
| 2499 | } | ||
| 2500 | amdgpu_gfx_scratch_free(adev, scratch); | ||
| 2501 | |||
| 2502 | return r; | ||
| 1881 | } | 2503 | } |
| 1882 | 2504 | ||
| 1883 | static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) | 2505 | static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) |
| @@ -1902,7 +2524,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) | |||
| 1902 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ | 2524 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ |
| 1903 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); | 2525 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); |
| 1904 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, | 2526 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, |
| 1905 | (order_base_2(MEC_HPD_SIZE / 4) - 1)); | 2527 | (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); |
| 1906 | 2528 | ||
| 1907 | mqd->cp_hqd_eop_control = tmp; | 2529 | mqd->cp_hqd_eop_control = tmp; |
| 1908 | 2530 | ||
| @@ -2119,47 +2741,69 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) | |||
| 2119 | static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) | 2741 | static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) |
| 2120 | { | 2742 | { |
| 2121 | struct amdgpu_device *adev = ring->adev; | 2743 | struct amdgpu_device *adev = ring->adev; |
| 2122 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
| 2123 | struct v9_mqd *mqd = ring->mqd_ptr; | 2744 | struct v9_mqd *mqd = ring->mqd_ptr; |
| 2124 | bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); | ||
| 2125 | int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; | 2745 | int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; |
| 2126 | 2746 | ||
| 2127 | if (is_kiq) { | 2747 | gfx_v9_0_kiq_setting(ring); |
| 2128 | gfx_v9_0_kiq_setting(&kiq->ring); | 2748 | |
| 2749 | if (adev->gfx.in_reset) { /* for GPU_RESET case */ | ||
| 2750 | /* reset MQD to a clean status */ | ||
| 2751 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | ||
| 2752 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); | ||
| 2753 | |||
| 2754 | /* reset ring buffer */ | ||
| 2755 | ring->wptr = 0; | ||
| 2756 | amdgpu_ring_clear_ring(ring); | ||
| 2757 | |||
| 2758 | mutex_lock(&adev->srbm_mutex); | ||
| 2759 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 2760 | gfx_v9_0_kiq_init_register(ring); | ||
| 2761 | soc15_grbm_select(adev, 0, 0, 0, 0); | ||
| 2762 | mutex_unlock(&adev->srbm_mutex); | ||
| 2129 | } else { | 2763 | } else { |
| 2130 | mqd_idx = ring - &adev->gfx.compute_ring[0]; | 2764 | memset((void *)mqd, 0, sizeof(*mqd)); |
| 2765 | mutex_lock(&adev->srbm_mutex); | ||
| 2766 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 2767 | gfx_v9_0_mqd_init(ring); | ||
| 2768 | gfx_v9_0_kiq_init_register(ring); | ||
| 2769 | soc15_grbm_select(adev, 0, 0, 0, 0); | ||
| 2770 | mutex_unlock(&adev->srbm_mutex); | ||
| 2771 | |||
| 2772 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | ||
| 2773 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); | ||
| 2131 | } | 2774 | } |
| 2132 | 2775 | ||
| 2133 | if (!adev->gfx.in_reset) { | 2776 | return 0; |
| 2777 | } | ||
| 2778 | |||
| 2779 | static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) | ||
| 2780 | { | ||
| 2781 | struct amdgpu_device *adev = ring->adev; | ||
| 2782 | struct v9_mqd *mqd = ring->mqd_ptr; | ||
| 2783 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; | ||
| 2784 | |||
| 2785 | if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { | ||
| 2134 | memset((void *)mqd, 0, sizeof(*mqd)); | 2786 | memset((void *)mqd, 0, sizeof(*mqd)); |
| 2135 | mutex_lock(&adev->srbm_mutex); | 2787 | mutex_lock(&adev->srbm_mutex); |
| 2136 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | 2788 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
| 2137 | gfx_v9_0_mqd_init(ring); | 2789 | gfx_v9_0_mqd_init(ring); |
| 2138 | if (is_kiq) | ||
| 2139 | gfx_v9_0_kiq_init_register(ring); | ||
| 2140 | soc15_grbm_select(adev, 0, 0, 0, 0); | 2790 | soc15_grbm_select(adev, 0, 0, 0, 0); |
| 2141 | mutex_unlock(&adev->srbm_mutex); | 2791 | mutex_unlock(&adev->srbm_mutex); |
| 2142 | 2792 | ||
| 2143 | } else { /* for GPU_RESET case */ | 2793 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
| 2794 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); | ||
| 2795 | } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ | ||
| 2144 | /* reset MQD to a clean status */ | 2796 | /* reset MQD to a clean status */ |
| 2797 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | ||
| 2798 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); | ||
| 2145 | 2799 | ||
| 2146 | /* reset ring buffer */ | 2800 | /* reset ring buffer */ |
| 2147 | ring->wptr = 0; | 2801 | ring->wptr = 0; |
| 2148 | 2802 | amdgpu_ring_clear_ring(ring); | |
| 2149 | if (is_kiq) { | 2803 | } else { |
| 2150 | mutex_lock(&adev->srbm_mutex); | 2804 | amdgpu_ring_clear_ring(ring); |
| 2151 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
| 2152 | gfx_v9_0_kiq_init_register(ring); | ||
| 2153 | soc15_grbm_select(adev, 0, 0, 0, 0); | ||
| 2154 | mutex_unlock(&adev->srbm_mutex); | ||
| 2155 | } | ||
| 2156 | } | 2805 | } |
| 2157 | 2806 | ||
| 2158 | if (is_kiq) | ||
| 2159 | gfx_v9_0_kiq_enable(ring); | ||
| 2160 | else | ||
| 2161 | gfx_v9_0_map_queue_enable(&kiq->ring, ring); | ||
| 2162 | |||
| 2163 | return 0; | 2807 | return 0; |
| 2164 | } | 2808 | } |
| 2165 | 2809 | ||
| @@ -2194,7 +2838,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) | |||
| 2194 | goto done; | 2838 | goto done; |
| 2195 | r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); | 2839 | r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); |
| 2196 | if (!r) { | 2840 | if (!r) { |
| 2197 | r = gfx_v9_0_kiq_init_queue(ring); | 2841 | r = gfx_v9_0_kcq_init_queue(ring); |
| 2198 | amdgpu_bo_kunmap(ring->mqd_obj); | 2842 | amdgpu_bo_kunmap(ring->mqd_obj); |
| 2199 | ring->mqd_ptr = NULL; | 2843 | ring->mqd_ptr = NULL; |
| 2200 | } | 2844 | } |
| @@ -2203,13 +2847,14 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) | |||
| 2203 | goto done; | 2847 | goto done; |
| 2204 | } | 2848 | } |
| 2205 | 2849 | ||
| 2850 | r = gfx_v9_0_kiq_kcq_enable(adev); | ||
| 2206 | done: | 2851 | done: |
| 2207 | return r; | 2852 | return r; |
| 2208 | } | 2853 | } |
| 2209 | 2854 | ||
| 2210 | static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) | 2855 | static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) |
| 2211 | { | 2856 | { |
| 2212 | int r,i; | 2857 | int r, i; |
| 2213 | struct amdgpu_ring *ring; | 2858 | struct amdgpu_ring *ring; |
| 2214 | 2859 | ||
| 2215 | if (!(adev->flags & AMD_IS_APU)) | 2860 | if (!(adev->flags & AMD_IS_APU)) |
| @@ -2230,10 +2875,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) | |||
| 2230 | if (r) | 2875 | if (r) |
| 2231 | return r; | 2876 | return r; |
| 2232 | 2877 | ||
| 2233 | if (amdgpu_sriov_vf(adev)) | 2878 | r = gfx_v9_0_kiq_resume(adev); |
| 2234 | r = gfx_v9_0_kiq_resume(adev); | ||
| 2235 | else | ||
| 2236 | r = gfx_v9_0_cp_compute_resume(adev); | ||
| 2237 | if (r) | 2879 | if (r) |
| 2238 | return r; | 2880 | return r; |
| 2239 | 2881 | ||
| @@ -2243,6 +2885,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) | |||
| 2243 | ring->ready = false; | 2885 | ring->ready = false; |
| 2244 | return r; | 2886 | return r; |
| 2245 | } | 2887 | } |
| 2888 | |||
| 2889 | ring = &adev->gfx.kiq.ring; | ||
| 2890 | ring->ready = true; | ||
| 2891 | r = amdgpu_ring_test_ring(ring); | ||
| 2892 | if (r) | ||
| 2893 | ring->ready = false; | ||
| 2894 | |||
| 2246 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 2895 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
| 2247 | ring = &adev->gfx.compute_ring[i]; | 2896 | ring = &adev->gfx.compute_ring[i]; |
| 2248 | 2897 | ||
| @@ -2252,14 +2901,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) | |||
| 2252 | ring->ready = false; | 2901 | ring->ready = false; |
| 2253 | } | 2902 | } |
| 2254 | 2903 | ||
| 2255 | if (amdgpu_sriov_vf(adev)) { | ||
| 2256 | ring = &adev->gfx.kiq.ring; | ||
| 2257 | ring->ready = true; | ||
| 2258 | r = amdgpu_ring_test_ring(ring); | ||
| 2259 | if (r) | ||
| 2260 | ring->ready = false; | ||
| 2261 | } | ||
| 2262 | |||
| 2263 | gfx_v9_0_enable_gui_idle_interrupt(adev, true); | 2904 | gfx_v9_0_enable_gui_idle_interrupt(adev, true); |
| 2264 | 2905 | ||
| 2265 | return 0; | 2906 | return 0; |
| @@ -2307,7 +2948,6 @@ static int gfx_v9_0_hw_fini(void *handle) | |||
| 2307 | } | 2948 | } |
| 2308 | gfx_v9_0_cp_enable(adev, false); | 2949 | gfx_v9_0_cp_enable(adev, false); |
| 2309 | gfx_v9_0_rlc_stop(adev); | 2950 | gfx_v9_0_rlc_stop(adev); |
| 2310 | gfx_v9_0_cp_compute_fini(adev); | ||
| 2311 | 2951 | ||
| 2312 | return 0; | 2952 | return 0; |
| 2313 | } | 2953 | } |
| @@ -2316,14 +2956,18 @@ static int gfx_v9_0_suspend(void *handle) | |||
| 2316 | { | 2956 | { |
| 2317 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 2957 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 2318 | 2958 | ||
| 2959 | adev->gfx.in_suspend = true; | ||
| 2319 | return gfx_v9_0_hw_fini(adev); | 2960 | return gfx_v9_0_hw_fini(adev); |
| 2320 | } | 2961 | } |
| 2321 | 2962 | ||
| 2322 | static int gfx_v9_0_resume(void *handle) | 2963 | static int gfx_v9_0_resume(void *handle) |
| 2323 | { | 2964 | { |
| 2324 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 2965 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 2966 | int r; | ||
| 2325 | 2967 | ||
| 2326 | return gfx_v9_0_hw_init(adev); | 2968 | r = gfx_v9_0_hw_init(adev); |
| 2969 | adev->gfx.in_suspend = false; | ||
| 2970 | return r; | ||
| 2327 | } | 2971 | } |
| 2328 | 2972 | ||
| 2329 | static bool gfx_v9_0_is_idle(void *handle) | 2973 | static bool gfx_v9_0_is_idle(void *handle) |
| @@ -2470,7 +3114,7 @@ static int gfx_v9_0_early_init(void *handle) | |||
| 2470 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 3114 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 2471 | 3115 | ||
| 2472 | adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; | 3116 | adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; |
| 2473 | adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; | 3117 | adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; |
| 2474 | gfx_v9_0_set_ring_funcs(adev); | 3118 | gfx_v9_0_set_ring_funcs(adev); |
| 2475 | gfx_v9_0_set_irq_funcs(adev); | 3119 | gfx_v9_0_set_irq_funcs(adev); |
| 2476 | gfx_v9_0_set_gds_init(adev); | 3120 | gfx_v9_0_set_gds_init(adev); |
| @@ -2549,6 +3193,43 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) | |||
| 2549 | } | 3193 | } |
| 2550 | } | 3194 | } |
| 2551 | 3195 | ||
| 3196 | static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, | ||
| 3197 | bool enable) | ||
| 3198 | { | ||
| 3199 | /* TODO: double check if we need to perform under safe mdoe */ | ||
| 3200 | /* gfx_v9_0_enter_rlc_safe_mode(adev); */ | ||
| 3201 | |||
| 3202 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { | ||
| 3203 | gfx_v9_0_enable_gfx_cg_power_gating(adev, true); | ||
| 3204 | if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) | ||
| 3205 | gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); | ||
| 3206 | } else { | ||
| 3207 | gfx_v9_0_enable_gfx_cg_power_gating(adev, false); | ||
| 3208 | gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); | ||
| 3209 | } | ||
| 3210 | |||
| 3211 | /* gfx_v9_0_exit_rlc_safe_mode(adev); */ | ||
| 3212 | } | ||
| 3213 | |||
| 3214 | static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, | ||
| 3215 | bool enable) | ||
| 3216 | { | ||
| 3217 | /* TODO: double check if we need to perform under safe mode */ | ||
| 3218 | /* gfx_v9_0_enter_rlc_safe_mode(adev); */ | ||
| 3219 | |||
| 3220 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) | ||
| 3221 | gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); | ||
| 3222 | else | ||
| 3223 | gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); | ||
| 3224 | |||
| 3225 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) | ||
| 3226 | gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); | ||
| 3227 | else | ||
| 3228 | gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); | ||
| 3229 | |||
| 3230 | /* gfx_v9_0_exit_rlc_safe_mode(adev); */ | ||
| 3231 | } | ||
| 3232 | |||
| 2552 | static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, | 3233 | static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, |
| 2553 | bool enable) | 3234 | bool enable) |
| 2554 | { | 3235 | { |
| @@ -2739,6 +3420,34 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { | |||
| 2739 | static int gfx_v9_0_set_powergating_state(void *handle, | 3420 | static int gfx_v9_0_set_powergating_state(void *handle, |
| 2740 | enum amd_powergating_state state) | 3421 | enum amd_powergating_state state) |
| 2741 | { | 3422 | { |
| 3423 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 3424 | bool enable = (state == AMD_PG_STATE_GATE) ? true : false; | ||
| 3425 | |||
| 3426 | switch (adev->asic_type) { | ||
| 3427 | case CHIP_RAVEN: | ||
| 3428 | if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { | ||
| 3429 | gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); | ||
| 3430 | gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); | ||
| 3431 | } else { | ||
| 3432 | gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); | ||
| 3433 | gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); | ||
| 3434 | } | ||
| 3435 | |||
| 3436 | if (adev->pg_flags & AMD_PG_SUPPORT_CP) | ||
| 3437 | gfx_v9_0_enable_cp_power_gating(adev, true); | ||
| 3438 | else | ||
| 3439 | gfx_v9_0_enable_cp_power_gating(adev, false); | ||
| 3440 | |||
| 3441 | /* update gfx cgpg state */ | ||
| 3442 | gfx_v9_0_update_gfx_cg_power_gating(adev, enable); | ||
| 3443 | |||
| 3444 | /* update mgcg state */ | ||
| 3445 | gfx_v9_0_update_gfx_mg_power_gating(adev, enable); | ||
| 3446 | break; | ||
| 3447 | default: | ||
| 3448 | break; | ||
| 3449 | } | ||
| 3450 | |||
| 2742 | return 0; | 3451 | return 0; |
| 2743 | } | 3452 | } |
| 2744 | 3453 | ||
| @@ -2752,6 +3461,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, | |||
| 2752 | 3461 | ||
| 2753 | switch (adev->asic_type) { | 3462 | switch (adev->asic_type) { |
| 2754 | case CHIP_VEGA10: | 3463 | case CHIP_VEGA10: |
| 3464 | case CHIP_RAVEN: | ||
| 2755 | gfx_v9_0_update_gfx_clock_gating(adev, | 3465 | gfx_v9_0_update_gfx_clock_gating(adev, |
| 2756 | state == AMD_CG_STATE_GATE ? true : false); | 3466 | state == AMD_CG_STATE_GATE ? true : false); |
| 2757 | break; | 3467 | break; |
| @@ -2879,31 +3589,33 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | |||
| 2879 | struct amdgpu_ib *ib, | 3589 | struct amdgpu_ib *ib, |
| 2880 | unsigned vm_id, bool ctx_switch) | 3590 | unsigned vm_id, bool ctx_switch) |
| 2881 | { | 3591 | { |
| 2882 | u32 header, control = 0; | 3592 | u32 header, control = 0; |
| 2883 | 3593 | ||
| 2884 | if (ib->flags & AMDGPU_IB_FLAG_CE) | 3594 | if (ib->flags & AMDGPU_IB_FLAG_CE) |
| 2885 | header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); | 3595 | header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); |
| 2886 | else | 3596 | else |
| 2887 | header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); | 3597 | header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); |
| 2888 | 3598 | ||
| 2889 | control |= ib->length_dw | (vm_id << 24); | 3599 | control |= ib->length_dw | (vm_id << 24); |
| 2890 | 3600 | ||
| 2891 | if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) | 3601 | if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { |
| 2892 | control |= INDIRECT_BUFFER_PRE_ENB(1); | 3602 | control |= INDIRECT_BUFFER_PRE_ENB(1); |
| 2893 | 3603 | ||
| 2894 | amdgpu_ring_write(ring, header); | 3604 | if (!(ib->flags & AMDGPU_IB_FLAG_CE)) |
| 2895 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ | 3605 | gfx_v9_0_ring_emit_de_meta(ring); |
| 2896 | amdgpu_ring_write(ring, | 3606 | } |
| 3607 | |||
| 3608 | amdgpu_ring_write(ring, header); | ||
| 3609 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ | ||
| 3610 | amdgpu_ring_write(ring, | ||
| 2897 | #ifdef __BIG_ENDIAN | 3611 | #ifdef __BIG_ENDIAN |
| 2898 | (2 << 0) | | 3612 | (2 << 0) | |
| 2899 | #endif | 3613 | #endif |
| 2900 | lower_32_bits(ib->gpu_addr)); | 3614 | lower_32_bits(ib->gpu_addr)); |
| 2901 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | 3615 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
| 2902 | amdgpu_ring_write(ring, control); | 3616 | amdgpu_ring_write(ring, control); |
| 2903 | } | 3617 | } |
| 2904 | 3618 | ||
| 2905 | #define INDIRECT_BUFFER_VALID (1 << 23) | ||
| 2906 | |||
| 2907 | static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, | 3619 | static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
| 2908 | struct amdgpu_ib *ib, | 3620 | struct amdgpu_ib *ib, |
| 2909 | unsigned vm_id, bool ctx_switch) | 3621 | unsigned vm_id, bool ctx_switch) |
| @@ -2971,9 +3683,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
| 2971 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); | 3683 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); |
| 2972 | unsigned eng = ring->vm_inv_eng; | 3684 | unsigned eng = ring->vm_inv_eng; |
| 2973 | 3685 | ||
| 2974 | pd_addr = pd_addr | 0x1; /* valid bit */ | 3686 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); |
| 2975 | /* now only use physical base address of PDE and valid */ | 3687 | pd_addr |= AMDGPU_PTE_VALID; |
| 2976 | BUG_ON(pd_addr & 0xFFFF00000000003EULL); | ||
| 2977 | 3688 | ||
| 2978 | gfx_v9_0_write_data_to_reg(ring, usepfp, true, | 3689 | gfx_v9_0_write_data_to_reg(ring, usepfp, true, |
| 2979 | hub->ctx0_ptb_addr_lo32 + (2 * vm_id), | 3690 | hub->ctx0_ptb_addr_lo32 + (2 * vm_id), |
| @@ -3130,9 +3841,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | |||
| 3130 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); | 3841 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); |
| 3131 | amdgpu_ring_write(ring, dw2); | 3842 | amdgpu_ring_write(ring, dw2); |
| 3132 | amdgpu_ring_write(ring, 0); | 3843 | amdgpu_ring_write(ring, 0); |
| 3133 | |||
| 3134 | if (amdgpu_sriov_vf(ring->adev)) | ||
| 3135 | gfx_v9_0_ring_emit_de_meta(ring); | ||
| 3136 | } | 3844 | } |
| 3137 | 3845 | ||
| 3138 | static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) | 3846 | static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) |
| @@ -3160,6 +3868,12 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne | |||
| 3160 | ring->ring[offset] = (ring->ring_size>>2) - offset + cur; | 3868 | ring->ring[offset] = (ring->ring_size>>2) - offset + cur; |
| 3161 | } | 3869 | } |
| 3162 | 3870 | ||
| 3871 | static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) | ||
| 3872 | { | ||
| 3873 | amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); | ||
| 3874 | amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ | ||
| 3875 | } | ||
| 3876 | |||
| 3163 | static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) | 3877 | static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) |
| 3164 | { | 3878 | { |
| 3165 | struct amdgpu_device *adev = ring->adev; | 3879 | struct amdgpu_device *adev = ring->adev; |
| @@ -3208,8 +3922,8 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, | |||
| 3208 | u32 mec_int_cntl, mec_int_cntl_reg; | 3922 | u32 mec_int_cntl, mec_int_cntl_reg; |
| 3209 | 3923 | ||
| 3210 | /* | 3924 | /* |
| 3211 | * amdgpu controls only pipe 0 of MEC1. That's why this function only | 3925 | * amdgpu controls only the first MEC. That's why this function only |
| 3212 | * handles the setting of interrupts for this specific pipe. All other | 3926 | * handles the setting of interrupts for this specific MEC. All other |
| 3213 | * pipes' interrupts are set by amdkfd. | 3927 | * pipes' interrupts are set by amdkfd. |
| 3214 | */ | 3928 | */ |
| 3215 | 3929 | ||
| @@ -3218,6 +3932,15 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, | |||
| 3218 | case 0: | 3932 | case 0: |
| 3219 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); | 3933 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); |
| 3220 | break; | 3934 | break; |
| 3935 | case 1: | ||
| 3936 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); | ||
| 3937 | break; | ||
| 3938 | case 2: | ||
| 3939 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); | ||
| 3940 | break; | ||
| 3941 | case 3: | ||
| 3942 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); | ||
| 3943 | break; | ||
| 3221 | default: | 3944 | default: |
| 3222 | DRM_DEBUG("invalid pipe %d\n", pipe); | 3945 | DRM_DEBUG("invalid pipe %d\n", pipe); |
| 3223 | return; | 3946 | return; |
| @@ -3494,6 +4217,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { | |||
| 3494 | .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, | 4217 | .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, |
| 3495 | .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, | 4218 | .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, |
| 3496 | .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, | 4219 | .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, |
| 4220 | .emit_tmz = gfx_v9_0_ring_emit_tmz, | ||
| 3497 | }; | 4221 | }; |
| 3498 | 4222 | ||
| 3499 | static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { | 4223 | static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { |
| @@ -3605,6 +4329,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) | |||
| 3605 | { | 4329 | { |
| 3606 | switch (adev->asic_type) { | 4330 | switch (adev->asic_type) { |
| 3607 | case CHIP_VEGA10: | 4331 | case CHIP_VEGA10: |
| 4332 | case CHIP_RAVEN: | ||
| 3608 | adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; | 4333 | adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; |
| 3609 | break; | 4334 | break; |
| 3610 | default: | 4335 | default: |
| @@ -3640,6 +4365,20 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) | |||
| 3640 | } | 4365 | } |
| 3641 | } | 4366 | } |
| 3642 | 4367 | ||
| 4368 | static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, | ||
| 4369 | u32 bitmap) | ||
| 4370 | { | ||
| 4371 | u32 data; | ||
| 4372 | |||
| 4373 | if (!bitmap) | ||
| 4374 | return; | ||
| 4375 | |||
| 4376 | data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; | ||
| 4377 | data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; | ||
| 4378 | |||
| 4379 | WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); | ||
| 4380 | } | ||
| 4381 | |||
| 3643 | static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) | 4382 | static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) |
| 3644 | { | 4383 | { |
| 3645 | u32 data, mask; | 4384 | u32 data, mask; |
| @@ -3650,7 +4389,7 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) | |||
| 3650 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; | 4389 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; |
| 3651 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; | 4390 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; |
| 3652 | 4391 | ||
| 3653 | mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); | 4392 | mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); |
| 3654 | 4393 | ||
| 3655 | return (~data) & mask; | 4394 | return (~data) & mask; |
| 3656 | } | 4395 | } |
| @@ -3660,11 +4399,12 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, | |||
| 3660 | { | 4399 | { |
| 3661 | int i, j, k, counter, active_cu_number = 0; | 4400 | int i, j, k, counter, active_cu_number = 0; |
| 3662 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; | 4401 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; |
| 4402 | unsigned disable_masks[4 * 2]; | ||
| 3663 | 4403 | ||
| 3664 | if (!adev || !cu_info) | 4404 | if (!adev || !cu_info) |
| 3665 | return -EINVAL; | 4405 | return -EINVAL; |
| 3666 | 4406 | ||
| 3667 | memset(cu_info, 0, sizeof(*cu_info)); | 4407 | amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); |
| 3668 | 4408 | ||
| 3669 | mutex_lock(&adev->grbm_idx_mutex); | 4409 | mutex_lock(&adev->grbm_idx_mutex); |
| 3670 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { | 4410 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { |
| @@ -3673,19 +4413,24 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, | |||
| 3673 | ao_bitmap = 0; | 4413 | ao_bitmap = 0; |
| 3674 | counter = 0; | 4414 | counter = 0; |
| 3675 | gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); | 4415 | gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); |
| 4416 | if (i < 4 && j < 2) | ||
| 4417 | gfx_v9_0_set_user_cu_inactive_bitmap( | ||
| 4418 | adev, disable_masks[i * 2 + j]); | ||
| 3676 | bitmap = gfx_v9_0_get_cu_active_bitmap(adev); | 4419 | bitmap = gfx_v9_0_get_cu_active_bitmap(adev); |
| 3677 | cu_info->bitmap[i][j] = bitmap; | 4420 | cu_info->bitmap[i][j] = bitmap; |
| 3678 | 4421 | ||
| 3679 | for (k = 0; k < 16; k ++) { | 4422 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { |
| 3680 | if (bitmap & mask) { | 4423 | if (bitmap & mask) { |
| 3681 | if (counter < 2) | 4424 | if (counter < adev->gfx.config.max_cu_per_sh) |
| 3682 | ao_bitmap |= mask; | 4425 | ao_bitmap |= mask; |
| 3683 | counter ++; | 4426 | counter ++; |
| 3684 | } | 4427 | } |
| 3685 | mask <<= 1; | 4428 | mask <<= 1; |
| 3686 | } | 4429 | } |
| 3687 | active_cu_number += counter; | 4430 | active_cu_number += counter; |
| 3688 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | 4431 | if (i < 2 && j < 2) |
| 4432 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | ||
| 4433 | cu_info->ao_cu_bitmap[i][j] = ao_bitmap; | ||
| 3689 | } | 4434 | } |
| 3690 | } | 4435 | } |
| 3691 | gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); | 4436 | gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |
| @@ -3697,218 +4442,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, | |||
| 3697 | return 0; | 4442 | return 0; |
| 3698 | } | 4443 | } |
| 3699 | 4444 | ||
| 3700 | static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) | ||
| 3701 | { | ||
| 3702 | int r, j; | ||
| 3703 | u32 tmp; | ||
| 3704 | bool use_doorbell = true; | ||
| 3705 | u64 hqd_gpu_addr; | ||
| 3706 | u64 mqd_gpu_addr; | ||
| 3707 | u64 eop_gpu_addr; | ||
| 3708 | u64 wb_gpu_addr; | ||
| 3709 | u32 *buf; | ||
| 3710 | struct v9_mqd *mqd; | ||
| 3711 | struct amdgpu_device *adev; | ||
| 3712 | |||
| 3713 | adev = ring->adev; | ||
| 3714 | if (ring->mqd_obj == NULL) { | ||
| 3715 | r = amdgpu_bo_create(adev, | ||
| 3716 | sizeof(struct v9_mqd), | ||
| 3717 | PAGE_SIZE,true, | ||
| 3718 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, | ||
| 3719 | NULL, &ring->mqd_obj); | ||
| 3720 | if (r) { | ||
| 3721 | dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); | ||
| 3722 | return r; | ||
| 3723 | } | ||
| 3724 | } | ||
| 3725 | |||
| 3726 | r = amdgpu_bo_reserve(ring->mqd_obj, false); | ||
| 3727 | if (unlikely(r != 0)) { | ||
| 3728 | gfx_v9_0_cp_compute_fini(adev); | ||
| 3729 | return r; | ||
| 3730 | } | ||
| 3731 | |||
| 3732 | r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, | ||
| 3733 | &mqd_gpu_addr); | ||
| 3734 | if (r) { | ||
| 3735 | dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); | ||
| 3736 | gfx_v9_0_cp_compute_fini(adev); | ||
| 3737 | return r; | ||
| 3738 | } | ||
| 3739 | r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); | ||
| 3740 | if (r) { | ||
| 3741 | dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); | ||
| 3742 | gfx_v9_0_cp_compute_fini(adev); | ||
| 3743 | return r; | ||
| 3744 | } | ||
| 3745 | |||
| 3746 | /* init the mqd struct */ | ||
| 3747 | memset(buf, 0, sizeof(struct v9_mqd)); | ||
| 3748 | |||
| 3749 | mqd = (struct v9_mqd *)buf; | ||
| 3750 | mqd->header = 0xC0310800; | ||
| 3751 | mqd->compute_pipelinestat_enable = 0x00000001; | ||
| 3752 | mqd->compute_static_thread_mgmt_se0 = 0xffffffff; | ||
| 3753 | mqd->compute_static_thread_mgmt_se1 = 0xffffffff; | ||
| 3754 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; | ||
| 3755 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; | ||
| 3756 | mqd->compute_misc_reserved = 0x00000003; | ||
| 3757 | mutex_lock(&adev->srbm_mutex); | ||
| 3758 | soc15_grbm_select(adev, ring->me, | ||
| 3759 | ring->pipe, | ||
| 3760 | ring->queue, 0); | ||
| 3761 | /* disable wptr polling */ | ||
| 3762 | WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); | ||
| 3763 | |||
| 3764 | /* write the EOP addr */ | ||
| 3765 | BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ | ||
| 3766 | eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); | ||
| 3767 | eop_gpu_addr >>= 8; | ||
| 3768 | |||
| 3769 | WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); | ||
| 3770 | WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); | ||
| 3771 | mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); | ||
| 3772 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); | ||
| 3773 | |||
| 3774 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ | ||
| 3775 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); | ||
| 3776 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, | ||
| 3777 | (order_base_2(MEC_HPD_SIZE / 4) - 1)); | ||
| 3778 | WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); | ||
| 3779 | |||
| 3780 | /* enable doorbell? */ | ||
| 3781 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); | ||
| 3782 | if (use_doorbell) | ||
| 3783 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
| 3784 | else | ||
| 3785 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); | ||
| 3786 | |||
| 3787 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); | ||
| 3788 | mqd->cp_hqd_pq_doorbell_control = tmp; | ||
| 3789 | |||
| 3790 | /* disable the queue if it's active */ | ||
| 3791 | ring->wptr = 0; | ||
| 3792 | mqd->cp_hqd_dequeue_request = 0; | ||
| 3793 | mqd->cp_hqd_pq_rptr = 0; | ||
| 3794 | mqd->cp_hqd_pq_wptr_lo = 0; | ||
| 3795 | mqd->cp_hqd_pq_wptr_hi = 0; | ||
| 3796 | if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { | ||
| 3797 | WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); | ||
| 3798 | for (j = 0; j < adev->usec_timeout; j++) { | ||
| 3799 | if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) | ||
| 3800 | break; | ||
| 3801 | udelay(1); | ||
| 3802 | } | ||
| 3803 | WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); | ||
| 3804 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); | ||
| 3805 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); | ||
| 3806 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); | ||
| 3807 | } | ||
| 3808 | |||
| 3809 | /* set the pointer to the MQD */ | ||
| 3810 | mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; | ||
| 3811 | mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); | ||
| 3812 | WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); | ||
| 3813 | WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); | ||
| 3814 | |||
| 3815 | /* set MQD vmid to 0 */ | ||
| 3816 | tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); | ||
| 3817 | tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); | ||
| 3818 | WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); | ||
| 3819 | mqd->cp_mqd_control = tmp; | ||
| 3820 | |||
| 3821 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ | ||
| 3822 | hqd_gpu_addr = ring->gpu_addr >> 8; | ||
| 3823 | mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; | ||
| 3824 | mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); | ||
| 3825 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); | ||
| 3826 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); | ||
| 3827 | |||
| 3828 | /* set up the HQD, this is similar to CP_RB0_CNTL */ | ||
| 3829 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); | ||
| 3830 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, | ||
| 3831 | (order_base_2(ring->ring_size / 4) - 1)); | ||
| 3832 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, | ||
| 3833 | ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); | ||
| 3834 | #ifdef __BIG_ENDIAN | ||
| 3835 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); | ||
| 3836 | #endif | ||
| 3837 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); | ||
| 3838 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); | ||
| 3839 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); | ||
| 3840 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); | ||
| 3841 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); | ||
| 3842 | mqd->cp_hqd_pq_control = tmp; | ||
| 3843 | |||
| 3844 | /* set the wb address wether it's enabled or not */ | ||
| 3845 | wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); | ||
| 3846 | mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; | ||
| 3847 | mqd->cp_hqd_pq_rptr_report_addr_hi = | ||
| 3848 | upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 3849 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, | ||
| 3850 | mqd->cp_hqd_pq_rptr_report_addr_lo); | ||
| 3851 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
| 3852 | mqd->cp_hqd_pq_rptr_report_addr_hi); | ||
| 3853 | |||
| 3854 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ | ||
| 3855 | wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
| 3856 | mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; | ||
| 3857 | mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; | ||
| 3858 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, | ||
| 3859 | mqd->cp_hqd_pq_wptr_poll_addr_lo); | ||
| 3860 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, | ||
| 3861 | mqd->cp_hqd_pq_wptr_poll_addr_hi); | ||
| 3862 | |||
| 3863 | /* enable the doorbell if requested */ | ||
| 3864 | if (use_doorbell) { | ||
| 3865 | WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, | ||
| 3866 | (AMDGPU_DOORBELL64_KIQ * 2) << 2); | ||
| 3867 | WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, | ||
| 3868 | (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); | ||
| 3869 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); | ||
| 3870 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, | ||
| 3871 | DOORBELL_OFFSET, ring->doorbell_index); | ||
| 3872 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
| 3873 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); | ||
| 3874 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); | ||
| 3875 | mqd->cp_hqd_pq_doorbell_control = tmp; | ||
| 3876 | |||
| 3877 | } else { | ||
| 3878 | mqd->cp_hqd_pq_doorbell_control = 0; | ||
| 3879 | } | ||
| 3880 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, | ||
| 3881 | mqd->cp_hqd_pq_doorbell_control); | ||
| 3882 | |||
| 3883 | /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ | ||
| 3884 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); | ||
| 3885 | WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); | ||
| 3886 | |||
| 3887 | /* set the vmid for the queue */ | ||
| 3888 | mqd->cp_hqd_vmid = 0; | ||
| 3889 | WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); | ||
| 3890 | |||
| 3891 | tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); | ||
| 3892 | tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); | ||
| 3893 | WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); | ||
| 3894 | mqd->cp_hqd_persistent_state = tmp; | ||
| 3895 | |||
| 3896 | /* activate the queue */ | ||
| 3897 | mqd->cp_hqd_active = 1; | ||
| 3898 | WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); | ||
| 3899 | |||
| 3900 | soc15_grbm_select(adev, 0, 0, 0, 0); | ||
| 3901 | mutex_unlock(&adev->srbm_mutex); | ||
| 3902 | |||
| 3903 | amdgpu_bo_kunmap(ring->mqd_obj); | ||
| 3904 | amdgpu_bo_unreserve(ring->mqd_obj); | ||
| 3905 | |||
| 3906 | if (use_doorbell) | ||
| 3907 | WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); | ||
| 3908 | |||
| 3909 | return 0; | ||
| 3910 | } | ||
| 3911 | |||
| 3912 | const struct amdgpu_ip_block_version gfx_v9_0_ip_block = | 4445 | const struct amdgpu_ip_block_version gfx_v9_0_ip_block = |
| 3913 | { | 4446 | { |
| 3914 | .type = AMD_IP_BLOCK_TYPE_GFX, | 4447 | .type = AMD_IP_BLOCK_TYPE_GFX, |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 005075ff00f7..a42f483767e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | |||
| @@ -31,178 +31,161 @@ | |||
| 31 | 31 | ||
| 32 | #include "soc15_common.h" | 32 | #include "soc15_common.h" |
| 33 | 33 | ||
| 34 | int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) | 34 | u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) |
| 35 | { | 35 | { |
| 36 | u32 tmp; | 36 | return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; |
| 37 | u64 value; | 37 | } |
| 38 | u32 i; | ||
| 39 | 38 | ||
| 40 | /* Program MC. */ | 39 | static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) |
| 41 | /* Update configuration */ | 40 | { |
| 42 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), | 41 | uint64_t value; |
| 43 | adev->mc.vram_start >> 18); | ||
| 44 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), | ||
| 45 | adev->mc.vram_end >> 18); | ||
| 46 | 42 | ||
| 47 | value = adev->vram_scratch.gpu_addr - adev->mc.vram_start | 43 | BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); |
| 44 | value = adev->gart.table_addr - adev->mc.vram_start | ||
| 48 | + adev->vm_manager.vram_base_offset; | 45 | + adev->vm_manager.vram_base_offset; |
| 49 | WREG32(SOC15_REG_OFFSET(GC, 0, | 46 | value &= 0x0000FFFFFFFFF000ULL; |
| 50 | mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), | 47 | value |= 0x1; /*valid bit*/ |
| 51 | (u32)(value >> 12)); | ||
| 52 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 53 | mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), | ||
| 54 | (u32)(value >> 44)); | ||
| 55 | 48 | ||
| 56 | if (amdgpu_sriov_vf(adev)) { | 49 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, |
| 57 | /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so | 50 | lower_32_bits(value)); |
| 58 | vbios post doesn't program them, for SRIOV driver need to program them */ | 51 | |
| 59 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE), | 52 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, |
| 60 | adev->mc.vram_start >> 24); | 53 | upper_32_bits(value)); |
| 61 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP), | 54 | } |
| 62 | adev->mc.vram_end >> 24); | 55 | |
| 63 | } | 56 | static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) |
| 57 | { | ||
| 58 | gfxhub_v1_0_init_gart_pt_regs(adev); | ||
| 59 | |||
| 60 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, | ||
| 61 | (u32)(adev->mc.gtt_start >> 12)); | ||
| 62 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, | ||
| 63 | (u32)(adev->mc.gtt_start >> 44)); | ||
| 64 | |||
| 65 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, | ||
| 66 | (u32)(adev->mc.gtt_end >> 12)); | ||
| 67 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, | ||
| 68 | (u32)(adev->mc.gtt_end >> 44)); | ||
| 69 | } | ||
| 70 | |||
| 71 | static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) | ||
| 72 | { | ||
| 73 | uint64_t value; | ||
| 64 | 74 | ||
| 65 | /* Disable AGP. */ | 75 | /* Disable AGP. */ |
| 66 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0); | 76 | WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); |
| 67 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0); | 77 | WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); |
| 68 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF); | 78 | WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF); |
| 69 | 79 | ||
| 70 | /* GART Enable. */ | 80 | /* Program the system aperture low logical page number. */ |
| 81 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, | ||
| 82 | adev->mc.vram_start >> 18); | ||
| 83 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, | ||
| 84 | adev->mc.vram_end >> 18); | ||
| 85 | |||
| 86 | /* Set default page address. */ | ||
| 87 | value = adev->vram_scratch.gpu_addr - adev->mc.vram_start | ||
| 88 | + adev->vm_manager.vram_base_offset; | ||
| 89 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, | ||
| 90 | (u32)(value >> 12)); | ||
| 91 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, | ||
| 92 | (u32)(value >> 44)); | ||
| 93 | |||
| 94 | /* Program "protection fault". */ | ||
| 95 | WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, | ||
| 96 | (u32)(adev->dummy_page.addr >> 12)); | ||
| 97 | WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, | ||
| 98 | (u32)((u64)adev->dummy_page.addr >> 44)); | ||
| 99 | |||
| 100 | WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, | ||
| 101 | ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); | ||
| 102 | } | ||
| 103 | |||
| 104 | static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) | ||
| 105 | { | ||
| 106 | uint32_t tmp; | ||
| 71 | 107 | ||
| 72 | /* Setup TLB control */ | 108 | /* Setup TLB control */ |
| 73 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); | 109 | tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); |
| 110 | |||
| 74 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); | 111 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); |
| 75 | tmp = REG_SET_FIELD(tmp, | 112 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); |
| 76 | MC_VM_MX_L1_TLB_CNTL, | 113 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, |
| 77 | SYSTEM_ACCESS_MODE, | 114 | ENABLE_ADVANCED_DRIVER_MODEL, 1); |
| 78 | 3); | 115 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, |
| 79 | tmp = REG_SET_FIELD(tmp, | 116 | SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); |
| 80 | MC_VM_MX_L1_TLB_CNTL, | 117 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); |
| 81 | ENABLE_ADVANCED_DRIVER_MODEL, | 118 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, |
| 82 | 1); | 119 | MTYPE, MTYPE_UC);/* XXX for emulation. */ |
| 83 | tmp = REG_SET_FIELD(tmp, | 120 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); |
| 84 | MC_VM_MX_L1_TLB_CNTL, | 121 | |
| 85 | SYSTEM_APERTURE_UNMAPPED_ACCESS, | 122 | WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); |
| 86 | 0); | 123 | } |
| 87 | tmp = REG_SET_FIELD(tmp, | 124 | |
| 88 | MC_VM_MX_L1_TLB_CNTL, | 125 | static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) |
| 89 | ECO_BITS, | 126 | { |
| 90 | 0); | 127 | uint32_t tmp; |
| 91 | tmp = REG_SET_FIELD(tmp, | ||
| 92 | MC_VM_MX_L1_TLB_CNTL, | ||
| 93 | MTYPE, | ||
| 94 | MTYPE_UC);/* XXX for emulation. */ | ||
| 95 | tmp = REG_SET_FIELD(tmp, | ||
| 96 | MC_VM_MX_L1_TLB_CNTL, | ||
| 97 | ATC_EN, | ||
| 98 | 1); | ||
| 99 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); | ||
| 100 | 128 | ||
| 101 | /* Setup L2 cache */ | 129 | /* Setup L2 cache */ |
| 102 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); | 130 | tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); |
| 103 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); | 131 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); |
| 104 | tmp = REG_SET_FIELD(tmp, | 132 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); |
| 105 | VM_L2_CNTL, | 133 | /* XXX for emulation, Refer to closed source code.*/ |
| 106 | ENABLE_L2_FRAGMENT_PROCESSING, | 134 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, |
| 107 | 0); | 135 | 0); |
| 108 | tmp = REG_SET_FIELD(tmp, | ||
| 109 | VM_L2_CNTL, | ||
| 110 | L2_PDE0_CACHE_TAG_GENERATION_MODE, | ||
| 111 | 0);/* XXX for emulation, Refer to closed source code.*/ | ||
| 112 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); | 136 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); |
| 113 | tmp = REG_SET_FIELD(tmp, | 137 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); |
| 114 | VM_L2_CNTL, | 138 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); |
| 115 | CONTEXT1_IDENTITY_ACCESS_MODE, | 139 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp); |
| 116 | 1); | ||
| 117 | tmp = REG_SET_FIELD(tmp, | ||
| 118 | VM_L2_CNTL, | ||
| 119 | IDENTITY_MODE_FRAGMENT_SIZE, | ||
| 120 | 0); | ||
| 121 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); | ||
| 122 | 140 | ||
| 123 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2)); | 141 | tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2); |
| 124 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 142 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
| 125 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 143 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
| 126 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp); | 144 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); |
| 127 | 145 | ||
| 128 | tmp = mmVM_L2_CNTL3_DEFAULT; | 146 | tmp = mmVM_L2_CNTL3_DEFAULT; |
| 129 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp); | 147 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); |
| 130 | 148 | ||
| 131 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4)); | 149 | tmp = mmVM_L2_CNTL4_DEFAULT; |
| 132 | tmp = REG_SET_FIELD(tmp, | 150 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); |
| 133 | VM_L2_CNTL4, | 151 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); |
| 134 | VMC_TAP_PDE_REQUEST_PHYSICAL, | 152 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp); |
| 135 | 0); | 153 | } |
| 136 | tmp = REG_SET_FIELD(tmp, | ||
| 137 | VM_L2_CNTL4, | ||
| 138 | VMC_TAP_PTE_REQUEST_PHYSICAL, | ||
| 139 | 0); | ||
| 140 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp); | ||
| 141 | |||
| 142 | /* setup context0 */ | ||
| 143 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 144 | mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), | ||
| 145 | (u32)(adev->mc.gtt_start >> 12)); | ||
| 146 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 147 | mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), | ||
| 148 | (u32)(adev->mc.gtt_start >> 44)); | ||
| 149 | |||
| 150 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 151 | mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), | ||
| 152 | (u32)(adev->mc.gtt_end >> 12)); | ||
| 153 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 154 | mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), | ||
| 155 | (u32)(adev->mc.gtt_end >> 44)); | ||
| 156 | 154 | ||
| 157 | BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); | 155 | static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) |
| 158 | value = adev->gart.table_addr - adev->mc.vram_start | 156 | { |
| 159 | + adev->vm_manager.vram_base_offset; | 157 | uint32_t tmp; |
| 160 | value &= 0x0000FFFFFFFFF000ULL; | ||
| 161 | value |= 0x1; /*valid bit*/ | ||
| 162 | 158 | ||
| 163 | WREG32(SOC15_REG_OFFSET(GC, 0, | 159 | tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); |
| 164 | mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), | ||
| 165 | (u32)value); | ||
| 166 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 167 | mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), | ||
| 168 | (u32)(value >> 32)); | ||
| 169 | |||
| 170 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 171 | mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), | ||
| 172 | (u32)(adev->dummy_page.addr >> 12)); | ||
| 173 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 174 | mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), | ||
| 175 | (u32)((u64)adev->dummy_page.addr >> 44)); | ||
| 176 | |||
| 177 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); | ||
| 178 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, | ||
| 179 | ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, | ||
| 180 | 1); | ||
| 181 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); | ||
| 182 | |||
| 183 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL)); | ||
| 184 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); | 160 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); |
| 185 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); | 161 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); |
| 186 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp); | 162 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); |
| 163 | } | ||
| 164 | |||
| 165 | static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) | ||
| 166 | { | ||
| 167 | WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, | ||
| 168 | 0XFFFFFFFF); | ||
| 169 | WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, | ||
| 170 | 0x0000000F); | ||
| 171 | |||
| 172 | WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, | ||
| 173 | 0); | ||
| 174 | WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, | ||
| 175 | 0); | ||
| 187 | 176 | ||
| 188 | /* Disable identity aperture.*/ | 177 | WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); |
| 189 | WREG32(SOC15_REG_OFFSET(GC, 0, | 178 | WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); |
| 190 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); | ||
| 191 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 192 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); | ||
| 193 | 179 | ||
| 194 | WREG32(SOC15_REG_OFFSET(GC, 0, | 180 | } |
| 195 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); | ||
| 196 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 197 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); | ||
| 198 | 181 | ||
| 199 | WREG32(SOC15_REG_OFFSET(GC, 0, | 182 | static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) |
| 200 | mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); | 183 | { |
| 201 | WREG32(SOC15_REG_OFFSET(GC, 0, | 184 | int i; |
| 202 | mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); | 185 | uint32_t tmp; |
| 203 | 186 | ||
| 204 | for (i = 0; i <= 14; i++) { | 187 | for (i = 0; i <= 14; i++) { |
| 205 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i); | 188 | tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); |
| 206 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); | 189 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); |
| 207 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, | 190 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, |
| 208 | adev->vm_manager.num_level); | 191 | adev->vm_manager.num_level); |
| @@ -223,15 +206,52 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) | |||
| 223 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, | 206 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, |
| 224 | PAGE_TABLE_BLOCK_SIZE, | 207 | PAGE_TABLE_BLOCK_SIZE, |
| 225 | adev->vm_manager.block_size - 9); | 208 | adev->vm_manager.block_size - 9); |
| 226 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp); | 209 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); |
| 227 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); | 210 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); |
| 228 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); | 211 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); |
| 229 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, | 212 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, |
| 230 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | 213 | lower_32_bits(adev->vm_manager.max_pfn - 1)); |
| 231 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, | 214 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, |
| 232 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | 215 | upper_32_bits(adev->vm_manager.max_pfn - 1)); |
| 233 | } | 216 | } |
| 217 | } | ||
| 218 | |||
| 219 | static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) | ||
| 220 | { | ||
| 221 | unsigned i; | ||
| 222 | |||
| 223 | for (i = 0 ; i < 18; ++i) { | ||
| 224 | WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, | ||
| 225 | 2 * i, 0xffffffff); | ||
| 226 | WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, | ||
| 227 | 2 * i, 0x1f); | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) | ||
| 232 | { | ||
| 233 | if (amdgpu_sriov_vf(adev)) { | ||
| 234 | /* | ||
| 235 | * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are | ||
| 236 | * VF copy registers so vbios post doesn't program them, for | ||
| 237 | * SRIOV driver need to program them | ||
| 238 | */ | ||
| 239 | WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, | ||
| 240 | adev->mc.vram_start >> 24); | ||
| 241 | WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, | ||
| 242 | adev->mc.vram_end >> 24); | ||
| 243 | } | ||
| 234 | 244 | ||
| 245 | /* GART Enable. */ | ||
| 246 | gfxhub_v1_0_init_gart_aperture_regs(adev); | ||
| 247 | gfxhub_v1_0_init_system_aperture_regs(adev); | ||
| 248 | gfxhub_v1_0_init_tlb_regs(adev); | ||
| 249 | gfxhub_v1_0_init_cache_regs(adev); | ||
| 250 | |||
| 251 | gfxhub_v1_0_enable_system_domain(adev); | ||
| 252 | gfxhub_v1_0_disable_identity_aperture(adev); | ||
| 253 | gfxhub_v1_0_setup_vmid_config(adev); | ||
| 254 | gfxhub_v1_0_program_invalidation(adev); | ||
| 235 | 255 | ||
| 236 | return 0; | 256 | return 0; |
| 237 | } | 257 | } |
| @@ -243,22 +263,20 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) | |||
| 243 | 263 | ||
| 244 | /* Disable all tables */ | 264 | /* Disable all tables */ |
| 245 | for (i = 0; i < 16; i++) | 265 | for (i = 0; i < 16; i++) |
| 246 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0); | 266 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0); |
| 247 | 267 | ||
| 248 | /* Setup TLB control */ | 268 | /* Setup TLB control */ |
| 249 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); | 269 | tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); |
| 250 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); | 270 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); |
| 251 | tmp = REG_SET_FIELD(tmp, | 271 | tmp = REG_SET_FIELD(tmp, |
| 252 | MC_VM_MX_L1_TLB_CNTL, | 272 | MC_VM_MX_L1_TLB_CNTL, |
| 253 | ENABLE_ADVANCED_DRIVER_MODEL, | 273 | ENABLE_ADVANCED_DRIVER_MODEL, |
| 254 | 0); | 274 | 0); |
| 255 | WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); | 275 | WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); |
| 256 | 276 | ||
| 257 | /* Setup L2 cache */ | 277 | /* Setup L2 cache */ |
| 258 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); | 278 | WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); |
| 259 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); | 279 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); |
| 260 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); | ||
| 261 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0); | ||
| 262 | } | 280 | } |
| 263 | 281 | ||
| 264 | /** | 282 | /** |
| @@ -271,7 +289,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, | |||
| 271 | bool value) | 289 | bool value) |
| 272 | { | 290 | { |
| 273 | u32 tmp; | 291 | u32 tmp; |
| 274 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); | 292 | tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); |
| 275 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 293 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
| 276 | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 294 | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
| 277 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 295 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
| @@ -296,22 +314,11 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, | |||
| 296 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 314 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
| 297 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 315 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
| 298 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 316 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
| 299 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); | 317 | WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); |
| 300 | } | ||
| 301 | |||
| 302 | static int gfxhub_v1_0_early_init(void *handle) | ||
| 303 | { | ||
| 304 | return 0; | ||
| 305 | } | ||
| 306 | |||
| 307 | static int gfxhub_v1_0_late_init(void *handle) | ||
| 308 | { | ||
| 309 | return 0; | ||
| 310 | } | 318 | } |
| 311 | 319 | ||
| 312 | static int gfxhub_v1_0_sw_init(void *handle) | 320 | void gfxhub_v1_0_init(struct amdgpu_device *adev) |
| 313 | { | 321 | { |
| 314 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 315 | struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; | 322 | struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; |
| 316 | 323 | ||
| 317 | hub->ctx0_ptb_addr_lo32 = | 324 | hub->ctx0_ptb_addr_lo32 = |
| @@ -330,96 +337,4 @@ static int gfxhub_v1_0_sw_init(void *handle) | |||
| 330 | SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); | 337 | SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); |
| 331 | hub->vm_l2_pro_fault_cntl = | 338 | hub->vm_l2_pro_fault_cntl = |
| 332 | SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); | 339 | SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); |
| 333 | |||
| 334 | return 0; | ||
| 335 | } | ||
| 336 | |||
| 337 | static int gfxhub_v1_0_sw_fini(void *handle) | ||
| 338 | { | ||
| 339 | return 0; | ||
| 340 | } | 340 | } |
| 341 | |||
| 342 | static int gfxhub_v1_0_hw_init(void *handle) | ||
| 343 | { | ||
| 344 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 345 | unsigned i; | ||
| 346 | |||
| 347 | for (i = 0 ; i < 18; ++i) { | ||
| 348 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 349 | mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + | ||
| 350 | 2 * i, 0xffffffff); | ||
| 351 | WREG32(SOC15_REG_OFFSET(GC, 0, | ||
| 352 | mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + | ||
| 353 | 2 * i, 0x1f); | ||
| 354 | } | ||
| 355 | |||
| 356 | return 0; | ||
| 357 | } | ||
| 358 | |||
| 359 | static int gfxhub_v1_0_hw_fini(void *handle) | ||
| 360 | { | ||
| 361 | return 0; | ||
| 362 | } | ||
| 363 | |||
| 364 | static int gfxhub_v1_0_suspend(void *handle) | ||
| 365 | { | ||
| 366 | return 0; | ||
| 367 | } | ||
| 368 | |||
| 369 | static int gfxhub_v1_0_resume(void *handle) | ||
| 370 | { | ||
| 371 | return 0; | ||
| 372 | } | ||
| 373 | |||
| 374 | static bool gfxhub_v1_0_is_idle(void *handle) | ||
| 375 | { | ||
| 376 | return true; | ||
| 377 | } | ||
| 378 | |||
| 379 | static int gfxhub_v1_0_wait_for_idle(void *handle) | ||
| 380 | { | ||
| 381 | return 0; | ||
| 382 | } | ||
| 383 | |||
| 384 | static int gfxhub_v1_0_soft_reset(void *handle) | ||
| 385 | { | ||
| 386 | return 0; | ||
| 387 | } | ||
| 388 | |||
| 389 | static int gfxhub_v1_0_set_clockgating_state(void *handle, | ||
| 390 | enum amd_clockgating_state state) | ||
| 391 | { | ||
| 392 | return 0; | ||
| 393 | } | ||
| 394 | |||
| 395 | static int gfxhub_v1_0_set_powergating_state(void *handle, | ||
| 396 | enum amd_powergating_state state) | ||
| 397 | { | ||
| 398 | return 0; | ||
| 399 | } | ||
| 400 | |||
| 401 | const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = { | ||
| 402 | .name = "gfxhub_v1_0", | ||
| 403 | .early_init = gfxhub_v1_0_early_init, | ||
| 404 | .late_init = gfxhub_v1_0_late_init, | ||
| 405 | .sw_init = gfxhub_v1_0_sw_init, | ||
| 406 | .sw_fini = gfxhub_v1_0_sw_fini, | ||
| 407 | .hw_init = gfxhub_v1_0_hw_init, | ||
| 408 | .hw_fini = gfxhub_v1_0_hw_fini, | ||
| 409 | .suspend = gfxhub_v1_0_suspend, | ||
| 410 | .resume = gfxhub_v1_0_resume, | ||
| 411 | .is_idle = gfxhub_v1_0_is_idle, | ||
| 412 | .wait_for_idle = gfxhub_v1_0_wait_for_idle, | ||
| 413 | .soft_reset = gfxhub_v1_0_soft_reset, | ||
| 414 | .set_clockgating_state = gfxhub_v1_0_set_clockgating_state, | ||
| 415 | .set_powergating_state = gfxhub_v1_0_set_powergating_state, | ||
| 416 | }; | ||
| 417 | |||
| 418 | const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block = | ||
| 419 | { | ||
| 420 | .type = AMD_IP_BLOCK_TYPE_GFXHUB, | ||
| 421 | .major = 1, | ||
| 422 | .minor = 0, | ||
| 423 | .rev = 0, | ||
| 424 | .funcs = &gfxhub_v1_0_ip_funcs, | ||
| 425 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 5129a8ff0932..d2dbb085f480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h | |||
| @@ -28,7 +28,8 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); | |||
| 28 | void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); | 28 | void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); |
| 29 | void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, | 29 | void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, |
| 30 | bool value); | 30 | bool value); |
| 31 | 31 | void gfxhub_v1_0_init(struct amdgpu_device *adev); | |
| 32 | u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); | ||
| 32 | extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; | 33 | extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; |
| 33 | extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; | 34 | extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; |
| 34 | 35 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a572979f186c..d0214d942bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "gmc_v6_0.h" | 26 | #include "gmc_v6_0.h" |
| 27 | #include "amdgpu_ucode.h" | 27 | #include "amdgpu_ucode.h" |
| @@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, | |||
| 395 | return pte_flag; | 395 | return pte_flag; |
| 396 | } | 396 | } |
| 397 | 397 | ||
| 398 | static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) | ||
| 399 | { | ||
| 400 | BUG_ON(addr & 0xFFFFFF0000000FFFULL); | ||
| 401 | return addr; | ||
| 402 | } | ||
| 403 | |||
| 398 | static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, | 404 | static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, |
| 399 | bool value) | 405 | bool value) |
| 400 | { | 406 | { |
| @@ -614,33 +620,6 @@ static void gmc_v6_0_gart_fini(struct amdgpu_device *adev) | |||
| 614 | amdgpu_gart_fini(adev); | 620 | amdgpu_gart_fini(adev); |
| 615 | } | 621 | } |
| 616 | 622 | ||
| 617 | static int gmc_v6_0_vm_init(struct amdgpu_device *adev) | ||
| 618 | { | ||
| 619 | /* | ||
| 620 | * number of VMs | ||
| 621 | * VMID 0 is reserved for System | ||
| 622 | * amdgpu graphics/compute will use VMIDs 1-7 | ||
| 623 | * amdkfd will use VMIDs 8-15 | ||
| 624 | */ | ||
| 625 | adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; | ||
| 626 | adev->vm_manager.num_level = 1; | ||
| 627 | amdgpu_vm_manager_init(adev); | ||
| 628 | |||
| 629 | /* base offset of vram pages */ | ||
| 630 | if (adev->flags & AMD_IS_APU) { | ||
| 631 | u64 tmp = RREG32(mmMC_VM_FB_OFFSET); | ||
| 632 | tmp <<= 22; | ||
| 633 | adev->vm_manager.vram_base_offset = tmp; | ||
| 634 | } else | ||
| 635 | adev->vm_manager.vram_base_offset = 0; | ||
| 636 | |||
| 637 | return 0; | ||
| 638 | } | ||
| 639 | |||
| 640 | static void gmc_v6_0_vm_fini(struct amdgpu_device *adev) | ||
| 641 | { | ||
| 642 | } | ||
| 643 | |||
| 644 | static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, | 623 | static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, |
| 645 | u32 status, u32 addr, u32 mc_client) | 624 | u32 status, u32 addr, u32 mc_client) |
| 646 | { | 625 | { |
| @@ -815,14 +794,6 @@ static int gmc_v6_0_early_init(void *handle) | |||
| 815 | gmc_v6_0_set_gart_funcs(adev); | 794 | gmc_v6_0_set_gart_funcs(adev); |
| 816 | gmc_v6_0_set_irq_funcs(adev); | 795 | gmc_v6_0_set_irq_funcs(adev); |
| 817 | 796 | ||
| 818 | if (adev->flags & AMD_IS_APU) { | ||
| 819 | adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; | ||
| 820 | } else { | ||
| 821 | u32 tmp = RREG32(mmMC_SEQ_MISC0); | ||
| 822 | tmp &= MC_SEQ_MISC0__MT__MASK; | ||
| 823 | adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp); | ||
| 824 | } | ||
| 825 | |||
| 826 | return 0; | 797 | return 0; |
| 827 | } | 798 | } |
| 828 | 799 | ||
| @@ -842,6 +813,14 @@ static int gmc_v6_0_sw_init(void *handle) | |||
| 842 | int dma_bits; | 813 | int dma_bits; |
| 843 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 814 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 844 | 815 | ||
| 816 | if (adev->flags & AMD_IS_APU) { | ||
| 817 | adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; | ||
| 818 | } else { | ||
| 819 | u32 tmp = RREG32(mmMC_SEQ_MISC0); | ||
| 820 | tmp &= MC_SEQ_MISC0__MT__MASK; | ||
| 821 | adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp); | ||
| 822 | } | ||
| 823 | |||
| 845 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); | 824 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); |
| 846 | if (r) | 825 | if (r) |
| 847 | return r; | 826 | return r; |
| @@ -855,6 +834,8 @@ static int gmc_v6_0_sw_init(void *handle) | |||
| 855 | 834 | ||
| 856 | adev->mc.mc_mask = 0xffffffffffULL; | 835 | adev->mc.mc_mask = 0xffffffffffULL; |
| 857 | 836 | ||
| 837 | adev->mc.stolen_size = 256 * 1024; | ||
| 838 | |||
| 858 | adev->need_dma32 = false; | 839 | adev->need_dma32 = false; |
| 859 | dma_bits = adev->need_dma32 ? 32 : 40; | 840 | dma_bits = adev->need_dma32 ? 32 : 40; |
| 860 | r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); | 841 | r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); |
| @@ -887,26 +868,34 @@ static int gmc_v6_0_sw_init(void *handle) | |||
| 887 | if (r) | 868 | if (r) |
| 888 | return r; | 869 | return r; |
| 889 | 870 | ||
| 890 | if (!adev->vm_manager.enabled) { | 871 | /* |
| 891 | r = gmc_v6_0_vm_init(adev); | 872 | * number of VMs |
| 892 | if (r) { | 873 | * VMID 0 is reserved for System |
| 893 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | 874 | * amdgpu graphics/compute will use VMIDs 1-7 |
| 894 | return r; | 875 | * amdkfd will use VMIDs 8-15 |
| 895 | } | 876 | */ |
| 896 | adev->vm_manager.enabled = true; | 877 | adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; |
| 878 | adev->vm_manager.num_level = 1; | ||
| 879 | amdgpu_vm_manager_init(adev); | ||
| 880 | |||
| 881 | /* base offset of vram pages */ | ||
| 882 | if (adev->flags & AMD_IS_APU) { | ||
| 883 | u64 tmp = RREG32(mmMC_VM_FB_OFFSET); | ||
| 884 | |||
| 885 | tmp <<= 22; | ||
| 886 | adev->vm_manager.vram_base_offset = tmp; | ||
| 887 | } else { | ||
| 888 | adev->vm_manager.vram_base_offset = 0; | ||
| 897 | } | 889 | } |
| 898 | 890 | ||
| 899 | return r; | 891 | return 0; |
| 900 | } | 892 | } |
| 901 | 893 | ||
| 902 | static int gmc_v6_0_sw_fini(void *handle) | 894 | static int gmc_v6_0_sw_fini(void *handle) |
| 903 | { | 895 | { |
| 904 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 896 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 905 | 897 | ||
| 906 | if (adev->vm_manager.enabled) { | 898 | amdgpu_vm_manager_fini(adev); |
| 907 | gmc_v6_0_vm_fini(adev); | ||
| 908 | adev->vm_manager.enabled = false; | ||
| 909 | } | ||
| 910 | gmc_v6_0_gart_fini(adev); | 899 | gmc_v6_0_gart_fini(adev); |
| 911 | amdgpu_gem_force_release(adev); | 900 | amdgpu_gem_force_release(adev); |
| 912 | amdgpu_bo_fini(adev); | 901 | amdgpu_bo_fini(adev); |
| @@ -950,10 +939,6 @@ static int gmc_v6_0_suspend(void *handle) | |||
| 950 | { | 939 | { |
| 951 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 940 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 952 | 941 | ||
| 953 | if (adev->vm_manager.enabled) { | ||
| 954 | gmc_v6_0_vm_fini(adev); | ||
| 955 | adev->vm_manager.enabled = false; | ||
| 956 | } | ||
| 957 | gmc_v6_0_hw_fini(adev); | 942 | gmc_v6_0_hw_fini(adev); |
| 958 | 943 | ||
| 959 | return 0; | 944 | return 0; |
| @@ -968,16 +953,9 @@ static int gmc_v6_0_resume(void *handle) | |||
| 968 | if (r) | 953 | if (r) |
| 969 | return r; | 954 | return r; |
| 970 | 955 | ||
| 971 | if (!adev->vm_manager.enabled) { | 956 | amdgpu_vm_reset_all_ids(adev); |
| 972 | r = gmc_v6_0_vm_init(adev); | ||
| 973 | if (r) { | ||
| 974 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | ||
| 975 | return r; | ||
| 976 | } | ||
| 977 | adev->vm_manager.enabled = true; | ||
| 978 | } | ||
| 979 | 957 | ||
| 980 | return r; | 958 | return 0; |
| 981 | } | 959 | } |
| 982 | 960 | ||
| 983 | static bool gmc_v6_0_is_idle(void *handle) | 961 | static bool gmc_v6_0_is_idle(void *handle) |
| @@ -995,16 +973,10 @@ static bool gmc_v6_0_is_idle(void *handle) | |||
| 995 | static int gmc_v6_0_wait_for_idle(void *handle) | 973 | static int gmc_v6_0_wait_for_idle(void *handle) |
| 996 | { | 974 | { |
| 997 | unsigned i; | 975 | unsigned i; |
| 998 | u32 tmp; | ||
| 999 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 976 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1000 | 977 | ||
| 1001 | for (i = 0; i < adev->usec_timeout; i++) { | 978 | for (i = 0; i < adev->usec_timeout; i++) { |
| 1002 | tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | | 979 | if (gmc_v6_0_is_idle(handle)) |
| 1003 | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | | ||
| 1004 | SRBM_STATUS__MCC_BUSY_MASK | | ||
| 1005 | SRBM_STATUS__MCD_BUSY_MASK | | ||
| 1006 | SRBM_STATUS__VMC_BUSY_MASK); | ||
| 1007 | if (!tmp) | ||
| 1008 | return 0; | 980 | return 0; |
| 1009 | udelay(1); | 981 | udelay(1); |
| 1010 | } | 982 | } |
| @@ -1157,6 +1129,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { | |||
| 1157 | .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, | 1129 | .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, |
| 1158 | .set_pte_pde = gmc_v6_0_gart_set_pte_pde, | 1130 | .set_pte_pde = gmc_v6_0_gart_set_pte_pde, |
| 1159 | .set_prt = gmc_v6_0_set_prt, | 1131 | .set_prt = gmc_v6_0_set_prt, |
| 1132 | .get_vm_pde = gmc_v6_0_get_vm_pde, | ||
| 1160 | .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags | 1133 | .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags |
| 1161 | }; | 1134 | }; |
| 1162 | 1135 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index a9083a16a250..7e9ea53edf8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "cikd.h" | 26 | #include "cikd.h" |
| 27 | #include "cik.h" | 27 | #include "cik.h" |
| @@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, | |||
| 472 | return pte_flag; | 472 | return pte_flag; |
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) | ||
| 476 | { | ||
| 477 | BUG_ON(addr & 0xFFFFFF0000000FFFULL); | ||
| 478 | return addr; | ||
| 479 | } | ||
| 480 | |||
| 475 | /** | 481 | /** |
| 476 | * gmc_v8_0_set_fault_enable_default - update VM fault handling | 482 | * gmc_v8_0_set_fault_enable_default - update VM fault handling |
| 477 | * | 483 | * |
| @@ -724,55 +730,6 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) | |||
| 724 | amdgpu_gart_fini(adev); | 730 | amdgpu_gart_fini(adev); |
| 725 | } | 731 | } |
| 726 | 732 | ||
| 727 | /* | ||
| 728 | * vm | ||
| 729 | * VMID 0 is the physical GPU addresses as used by the kernel. | ||
| 730 | * VMIDs 1-15 are used for userspace clients and are handled | ||
| 731 | * by the amdgpu vm/hsa code. | ||
| 732 | */ | ||
| 733 | /** | ||
| 734 | * gmc_v7_0_vm_init - cik vm init callback | ||
| 735 | * | ||
| 736 | * @adev: amdgpu_device pointer | ||
| 737 | * | ||
| 738 | * Inits cik specific vm parameters (number of VMs, base of vram for | ||
| 739 | * VMIDs 1-15) (CIK). | ||
| 740 | * Returns 0 for success. | ||
| 741 | */ | ||
| 742 | static int gmc_v7_0_vm_init(struct amdgpu_device *adev) | ||
| 743 | { | ||
| 744 | /* | ||
| 745 | * number of VMs | ||
| 746 | * VMID 0 is reserved for System | ||
| 747 | * amdgpu graphics/compute will use VMIDs 1-7 | ||
| 748 | * amdkfd will use VMIDs 8-15 | ||
| 749 | */ | ||
| 750 | adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; | ||
| 751 | adev->vm_manager.num_level = 1; | ||
| 752 | amdgpu_vm_manager_init(adev); | ||
| 753 | |||
| 754 | /* base offset of vram pages */ | ||
| 755 | if (adev->flags & AMD_IS_APU) { | ||
| 756 | u64 tmp = RREG32(mmMC_VM_FB_OFFSET); | ||
| 757 | tmp <<= 22; | ||
| 758 | adev->vm_manager.vram_base_offset = tmp; | ||
| 759 | } else | ||
| 760 | adev->vm_manager.vram_base_offset = 0; | ||
| 761 | |||
| 762 | return 0; | ||
| 763 | } | ||
| 764 | |||
| 765 | /** | ||
| 766 | * gmc_v7_0_vm_fini - cik vm fini callback | ||
| 767 | * | ||
| 768 | * @adev: amdgpu_device pointer | ||
| 769 | * | ||
| 770 | * Tear down any asic specific VM setup (CIK). | ||
| 771 | */ | ||
| 772 | static void gmc_v7_0_vm_fini(struct amdgpu_device *adev) | ||
| 773 | { | ||
| 774 | } | ||
| 775 | |||
| 776 | /** | 733 | /** |
| 777 | * gmc_v7_0_vm_decode_fault - print human readable fault info | 734 | * gmc_v7_0_vm_decode_fault - print human readable fault info |
| 778 | * | 735 | * |
| @@ -1013,6 +970,8 @@ static int gmc_v7_0_sw_init(void *handle) | |||
| 1013 | */ | 970 | */ |
| 1014 | adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ | 971 | adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ |
| 1015 | 972 | ||
| 973 | adev->mc.stolen_size = 256 * 1024; | ||
| 974 | |||
| 1016 | /* set DMA mask + need_dma32 flags. | 975 | /* set DMA mask + need_dma32 flags. |
| 1017 | * PCIE - can handle 40-bits. | 976 | * PCIE - can handle 40-bits. |
| 1018 | * IGP - can handle 40-bits | 977 | * IGP - can handle 40-bits |
| @@ -1051,27 +1010,34 @@ static int gmc_v7_0_sw_init(void *handle) | |||
| 1051 | if (r) | 1010 | if (r) |
| 1052 | return r; | 1011 | return r; |
| 1053 | 1012 | ||
| 1054 | if (!adev->vm_manager.enabled) { | 1013 | /* |
| 1055 | r = gmc_v7_0_vm_init(adev); | 1014 | * number of VMs |
| 1056 | if (r) { | 1015 | * VMID 0 is reserved for System |
| 1057 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | 1016 | * amdgpu graphics/compute will use VMIDs 1-7 |
| 1058 | return r; | 1017 | * amdkfd will use VMIDs 8-15 |
| 1059 | } | 1018 | */ |
| 1060 | adev->vm_manager.enabled = true; | 1019 | adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; |
| 1020 | adev->vm_manager.num_level = 1; | ||
| 1021 | amdgpu_vm_manager_init(adev); | ||
| 1022 | |||
| 1023 | /* base offset of vram pages */ | ||
| 1024 | if (adev->flags & AMD_IS_APU) { | ||
| 1025 | u64 tmp = RREG32(mmMC_VM_FB_OFFSET); | ||
| 1026 | |||
| 1027 | tmp <<= 22; | ||
| 1028 | adev->vm_manager.vram_base_offset = tmp; | ||
| 1029 | } else { | ||
| 1030 | adev->vm_manager.vram_base_offset = 0; | ||
| 1061 | } | 1031 | } |
| 1062 | 1032 | ||
| 1063 | return r; | 1033 | return 0; |
| 1064 | } | 1034 | } |
| 1065 | 1035 | ||
| 1066 | static int gmc_v7_0_sw_fini(void *handle) | 1036 | static int gmc_v7_0_sw_fini(void *handle) |
| 1067 | { | 1037 | { |
| 1068 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1038 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1069 | 1039 | ||
| 1070 | if (adev->vm_manager.enabled) { | 1040 | amdgpu_vm_manager_fini(adev); |
| 1071 | amdgpu_vm_manager_fini(adev); | ||
| 1072 | gmc_v7_0_vm_fini(adev); | ||
| 1073 | adev->vm_manager.enabled = false; | ||
| 1074 | } | ||
| 1075 | gmc_v7_0_gart_fini(adev); | 1041 | gmc_v7_0_gart_fini(adev); |
| 1076 | amdgpu_gem_force_release(adev); | 1042 | amdgpu_gem_force_release(adev); |
| 1077 | amdgpu_bo_fini(adev); | 1043 | amdgpu_bo_fini(adev); |
| @@ -1117,10 +1083,6 @@ static int gmc_v7_0_suspend(void *handle) | |||
| 1117 | { | 1083 | { |
| 1118 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1084 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1119 | 1085 | ||
| 1120 | if (adev->vm_manager.enabled) { | ||
| 1121 | gmc_v7_0_vm_fini(adev); | ||
| 1122 | adev->vm_manager.enabled = false; | ||
| 1123 | } | ||
| 1124 | gmc_v7_0_hw_fini(adev); | 1086 | gmc_v7_0_hw_fini(adev); |
| 1125 | 1087 | ||
| 1126 | return 0; | 1088 | return 0; |
| @@ -1135,16 +1097,9 @@ static int gmc_v7_0_resume(void *handle) | |||
| 1135 | if (r) | 1097 | if (r) |
| 1136 | return r; | 1098 | return r; |
| 1137 | 1099 | ||
| 1138 | if (!adev->vm_manager.enabled) { | 1100 | amdgpu_vm_reset_all_ids(adev); |
| 1139 | r = gmc_v7_0_vm_init(adev); | ||
| 1140 | if (r) { | ||
| 1141 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | ||
| 1142 | return r; | ||
| 1143 | } | ||
| 1144 | adev->vm_manager.enabled = true; | ||
| 1145 | } | ||
| 1146 | 1101 | ||
| 1147 | return r; | 1102 | return 0; |
| 1148 | } | 1103 | } |
| 1149 | 1104 | ||
| 1150 | static bool gmc_v7_0_is_idle(void *handle) | 1105 | static bool gmc_v7_0_is_idle(void *handle) |
| @@ -1346,7 +1301,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { | |||
| 1346 | .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, | 1301 | .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, |
| 1347 | .set_pte_pde = gmc_v7_0_gart_set_pte_pde, | 1302 | .set_pte_pde = gmc_v7_0_gart_set_pte_pde, |
| 1348 | .set_prt = gmc_v7_0_set_prt, | 1303 | .set_prt = gmc_v7_0_set_prt, |
| 1349 | .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags | 1304 | .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, |
| 1305 | .get_vm_pde = gmc_v7_0_get_vm_pde | ||
| 1350 | }; | 1306 | }; |
| 1351 | 1307 | ||
| 1352 | static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { | 1308 | static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 4ac99784160a..cc9f88057cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "gmc_v8_0.h" | 26 | #include "gmc_v8_0.h" |
| 27 | #include "amdgpu_ucode.h" | 27 | #include "amdgpu_ucode.h" |
| @@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, | |||
| 656 | return pte_flag; | 656 | return pte_flag; |
| 657 | } | 657 | } |
| 658 | 658 | ||
| 659 | static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) | ||
| 660 | { | ||
| 661 | BUG_ON(addr & 0xFFFFFF0000000FFFULL); | ||
| 662 | return addr; | ||
| 663 | } | ||
| 664 | |||
| 659 | /** | 665 | /** |
| 660 | * gmc_v8_0_set_fault_enable_default - update VM fault handling | 666 | * gmc_v8_0_set_fault_enable_default - update VM fault handling |
| 661 | * | 667 | * |
| @@ -927,55 +933,6 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) | |||
| 927 | amdgpu_gart_fini(adev); | 933 | amdgpu_gart_fini(adev); |
| 928 | } | 934 | } |
| 929 | 935 | ||
| 930 | /* | ||
| 931 | * vm | ||
| 932 | * VMID 0 is the physical GPU addresses as used by the kernel. | ||
| 933 | * VMIDs 1-15 are used for userspace clients and are handled | ||
| 934 | * by the amdgpu vm/hsa code. | ||
| 935 | */ | ||
| 936 | /** | ||
| 937 | * gmc_v8_0_vm_init - cik vm init callback | ||
| 938 | * | ||
| 939 | * @adev: amdgpu_device pointer | ||
| 940 | * | ||
| 941 | * Inits cik specific vm parameters (number of VMs, base of vram for | ||
| 942 | * VMIDs 1-15) (CIK). | ||
| 943 | * Returns 0 for success. | ||
| 944 | */ | ||
| 945 | static int gmc_v8_0_vm_init(struct amdgpu_device *adev) | ||
| 946 | { | ||
| 947 | /* | ||
| 948 | * number of VMs | ||
| 949 | * VMID 0 is reserved for System | ||
| 950 | * amdgpu graphics/compute will use VMIDs 1-7 | ||
| 951 | * amdkfd will use VMIDs 8-15 | ||
| 952 | */ | ||
| 953 | adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; | ||
| 954 | adev->vm_manager.num_level = 1; | ||
| 955 | amdgpu_vm_manager_init(adev); | ||
| 956 | |||
| 957 | /* base offset of vram pages */ | ||
| 958 | if (adev->flags & AMD_IS_APU) { | ||
| 959 | u64 tmp = RREG32(mmMC_VM_FB_OFFSET); | ||
| 960 | tmp <<= 22; | ||
| 961 | adev->vm_manager.vram_base_offset = tmp; | ||
| 962 | } else | ||
| 963 | adev->vm_manager.vram_base_offset = 0; | ||
| 964 | |||
| 965 | return 0; | ||
| 966 | } | ||
| 967 | |||
| 968 | /** | ||
| 969 | * gmc_v8_0_vm_fini - cik vm fini callback | ||
| 970 | * | ||
| 971 | * @adev: amdgpu_device pointer | ||
| 972 | * | ||
| 973 | * Tear down any asic specific VM setup (CIK). | ||
| 974 | */ | ||
| 975 | static void gmc_v8_0_vm_fini(struct amdgpu_device *adev) | ||
| 976 | { | ||
| 977 | } | ||
| 978 | |||
| 979 | /** | 936 | /** |
| 980 | * gmc_v8_0_vm_decode_fault - print human readable fault info | 937 | * gmc_v8_0_vm_decode_fault - print human readable fault info |
| 981 | * | 938 | * |
| @@ -1097,6 +1054,8 @@ static int gmc_v8_0_sw_init(void *handle) | |||
| 1097 | */ | 1054 | */ |
| 1098 | adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ | 1055 | adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ |
| 1099 | 1056 | ||
| 1057 | adev->mc.stolen_size = 256 * 1024; | ||
| 1058 | |||
| 1100 | /* set DMA mask + need_dma32 flags. | 1059 | /* set DMA mask + need_dma32 flags. |
| 1101 | * PCIE - can handle 40-bits. | 1060 | * PCIE - can handle 40-bits. |
| 1102 | * IGP - can handle 40-bits | 1061 | * IGP - can handle 40-bits |
| @@ -1135,27 +1094,34 @@ static int gmc_v8_0_sw_init(void *handle) | |||
| 1135 | if (r) | 1094 | if (r) |
| 1136 | return r; | 1095 | return r; |
| 1137 | 1096 | ||
| 1138 | if (!adev->vm_manager.enabled) { | 1097 | /* |
| 1139 | r = gmc_v8_0_vm_init(adev); | 1098 | * number of VMs |
| 1140 | if (r) { | 1099 | * VMID 0 is reserved for System |
| 1141 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | 1100 | * amdgpu graphics/compute will use VMIDs 1-7 |
| 1142 | return r; | 1101 | * amdkfd will use VMIDs 8-15 |
| 1143 | } | 1102 | */ |
| 1144 | adev->vm_manager.enabled = true; | 1103 | adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; |
| 1104 | adev->vm_manager.num_level = 1; | ||
| 1105 | amdgpu_vm_manager_init(adev); | ||
| 1106 | |||
| 1107 | /* base offset of vram pages */ | ||
| 1108 | if (adev->flags & AMD_IS_APU) { | ||
| 1109 | u64 tmp = RREG32(mmMC_VM_FB_OFFSET); | ||
| 1110 | |||
| 1111 | tmp <<= 22; | ||
| 1112 | adev->vm_manager.vram_base_offset = tmp; | ||
| 1113 | } else { | ||
| 1114 | adev->vm_manager.vram_base_offset = 0; | ||
| 1145 | } | 1115 | } |
| 1146 | 1116 | ||
| 1147 | return r; | 1117 | return 0; |
| 1148 | } | 1118 | } |
| 1149 | 1119 | ||
| 1150 | static int gmc_v8_0_sw_fini(void *handle) | 1120 | static int gmc_v8_0_sw_fini(void *handle) |
| 1151 | { | 1121 | { |
| 1152 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1122 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1153 | 1123 | ||
| 1154 | if (adev->vm_manager.enabled) { | 1124 | amdgpu_vm_manager_fini(adev); |
| 1155 | amdgpu_vm_manager_fini(adev); | ||
| 1156 | gmc_v8_0_vm_fini(adev); | ||
| 1157 | adev->vm_manager.enabled = false; | ||
| 1158 | } | ||
| 1159 | gmc_v8_0_gart_fini(adev); | 1125 | gmc_v8_0_gart_fini(adev); |
| 1160 | amdgpu_gem_force_release(adev); | 1126 | amdgpu_gem_force_release(adev); |
| 1161 | amdgpu_bo_fini(adev); | 1127 | amdgpu_bo_fini(adev); |
| @@ -1209,10 +1175,6 @@ static int gmc_v8_0_suspend(void *handle) | |||
| 1209 | { | 1175 | { |
| 1210 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1176 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1211 | 1177 | ||
| 1212 | if (adev->vm_manager.enabled) { | ||
| 1213 | gmc_v8_0_vm_fini(adev); | ||
| 1214 | adev->vm_manager.enabled = false; | ||
| 1215 | } | ||
| 1216 | gmc_v8_0_hw_fini(adev); | 1178 | gmc_v8_0_hw_fini(adev); |
| 1217 | 1179 | ||
| 1218 | return 0; | 1180 | return 0; |
| @@ -1227,16 +1189,9 @@ static int gmc_v8_0_resume(void *handle) | |||
| 1227 | if (r) | 1189 | if (r) |
| 1228 | return r; | 1190 | return r; |
| 1229 | 1191 | ||
| 1230 | if (!adev->vm_manager.enabled) { | 1192 | amdgpu_vm_reset_all_ids(adev); |
| 1231 | r = gmc_v8_0_vm_init(adev); | ||
| 1232 | if (r) { | ||
| 1233 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | ||
| 1234 | return r; | ||
| 1235 | } | ||
| 1236 | adev->vm_manager.enabled = true; | ||
| 1237 | } | ||
| 1238 | 1193 | ||
| 1239 | return r; | 1194 | return 0; |
| 1240 | } | 1195 | } |
| 1241 | 1196 | ||
| 1242 | static bool gmc_v8_0_is_idle(void *handle) | 1197 | static bool gmc_v8_0_is_idle(void *handle) |
| @@ -1665,7 +1620,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { | |||
| 1665 | .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, | 1620 | .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, |
| 1666 | .set_pte_pde = gmc_v8_0_gart_set_pte_pde, | 1621 | .set_pte_pde = gmc_v8_0_gart_set_pte_pde, |
| 1667 | .set_prt = gmc_v8_0_set_prt, | 1622 | .set_prt = gmc_v8_0_set_prt, |
| 1668 | .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags | 1623 | .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, |
| 1624 | .get_vm_pde = gmc_v8_0_get_vm_pde | ||
| 1669 | }; | 1625 | }; |
| 1670 | 1626 | ||
| 1671 | static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { | 1627 | static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index dc1e1c1d6b24..175ba5f9691c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include "soc15_common.h" | 33 | #include "soc15_common.h" |
| 34 | 34 | ||
| 35 | #include "nbio_v6_1.h" | 35 | #include "nbio_v6_1.h" |
| 36 | #include "nbio_v7_0.h" | ||
| 36 | #include "gfxhub_v1_0.h" | 37 | #include "gfxhub_v1_0.h" |
| 37 | #include "mmhub_v1_0.h" | 38 | #include "mmhub_v1_0.h" |
| 38 | 39 | ||
| @@ -215,7 +216,10 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, | |||
| 215 | unsigned i, j; | 216 | unsigned i, j; |
| 216 | 217 | ||
| 217 | /* flush hdp cache */ | 218 | /* flush hdp cache */ |
| 218 | nbio_v6_1_hdp_flush(adev); | 219 | if (adev->flags & AMD_IS_APU) |
| 220 | nbio_v7_0_hdp_flush(adev); | ||
| 221 | else | ||
| 222 | nbio_v6_1_hdp_flush(adev); | ||
| 219 | 223 | ||
| 220 | spin_lock(&adev->mc.invalidate_lock); | 224 | spin_lock(&adev->mc.invalidate_lock); |
| 221 | 225 | ||
| @@ -354,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, | |||
| 354 | return pte_flag; | 358 | return pte_flag; |
| 355 | } | 359 | } |
| 356 | 360 | ||
| 357 | static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) | 361 | static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr) |
| 358 | { | 362 | { |
| 359 | return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; | 363 | addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start; |
| 364 | BUG_ON(addr & 0xFFFF00000000003FULL); | ||
| 365 | return addr; | ||
| 360 | } | 366 | } |
| 361 | 367 | ||
| 362 | static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { | 368 | static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { |
| 363 | .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, | 369 | .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, |
| 364 | .set_pte_pde = gmc_v9_0_gart_set_pte_pde, | 370 | .set_pte_pde = gmc_v9_0_gart_set_pte_pde, |
| 365 | .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, | ||
| 366 | .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, | ||
| 367 | .get_invalidate_req = gmc_v9_0_get_invalidate_req, | 371 | .get_invalidate_req = gmc_v9_0_get_invalidate_req, |
| 372 | .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, | ||
| 373 | .get_vm_pde = gmc_v9_0_get_vm_pde | ||
| 368 | }; | 374 | }; |
| 369 | 375 | ||
| 370 | static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) | 376 | static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) |
| @@ -415,6 +421,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, | |||
| 415 | amdgpu_vram_location(adev, &adev->mc, base); | 421 | amdgpu_vram_location(adev, &adev->mc, base); |
| 416 | adev->mc.gtt_base_align = 0; | 422 | adev->mc.gtt_base_align = 0; |
| 417 | amdgpu_gtt_location(adev, mc); | 423 | amdgpu_gtt_location(adev, mc); |
| 424 | /* base offset of vram pages */ | ||
| 425 | if (adev->flags & AMD_IS_APU) | ||
| 426 | adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); | ||
| 427 | else | ||
| 428 | adev->vm_manager.vram_base_offset = 0; | ||
| 418 | } | 429 | } |
| 419 | 430 | ||
| 420 | /** | 431 | /** |
| @@ -434,7 +445,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) | |||
| 434 | /* hbm memory channel size */ | 445 | /* hbm memory channel size */ |
| 435 | chansize = 128; | 446 | chansize = 128; |
| 436 | 447 | ||
| 437 | tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0)); | 448 | tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); |
| 438 | tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; | 449 | tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; |
| 439 | tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; | 450 | tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; |
| 440 | switch (tmp) { | 451 | switch (tmp) { |
| @@ -474,7 +485,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) | |||
| 474 | adev->mc.aper_size = pci_resource_len(adev->pdev, 0); | 485 | adev->mc.aper_size = pci_resource_len(adev->pdev, 0); |
| 475 | /* size in MB on si */ | 486 | /* size in MB on si */ |
| 476 | adev->mc.mc_vram_size = | 487 | adev->mc.mc_vram_size = |
| 477 | nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL; | 488 | ((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) : |
| 489 | nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL; | ||
| 478 | adev->mc.real_vram_size = adev->mc.mc_vram_size; | 490 | adev->mc.real_vram_size = adev->mc.mc_vram_size; |
| 479 | adev->mc.visible_vram_size = adev->mc.aper_size; | 491 | adev->mc.visible_vram_size = adev->mc.aper_size; |
| 480 | 492 | ||
| @@ -514,64 +526,15 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) | |||
| 514 | return amdgpu_gart_table_vram_alloc(adev); | 526 | return amdgpu_gart_table_vram_alloc(adev); |
| 515 | } | 527 | } |
| 516 | 528 | ||
| 517 | /* | ||
| 518 | * vm | ||
| 519 | * VMID 0 is the physical GPU addresses as used by the kernel. | ||
| 520 | * VMIDs 1-15 are used for userspace clients and are handled | ||
| 521 | * by the amdgpu vm/hsa code. | ||
| 522 | */ | ||
| 523 | /** | ||
| 524 | * gmc_v9_0_vm_init - vm init callback | ||
| 525 | * | ||
| 526 | * @adev: amdgpu_device pointer | ||
| 527 | * | ||
| 528 | * Inits vega10 specific vm parameters (number of VMs, base of vram for | ||
| 529 | * VMIDs 1-15) (vega10). | ||
| 530 | * Returns 0 for success. | ||
| 531 | */ | ||
| 532 | static int gmc_v9_0_vm_init(struct amdgpu_device *adev) | ||
| 533 | { | ||
| 534 | /* | ||
| 535 | * number of VMs | ||
| 536 | * VMID 0 is reserved for System | ||
| 537 | * amdgpu graphics/compute will use VMIDs 1-7 | ||
| 538 | * amdkfd will use VMIDs 8-15 | ||
| 539 | */ | ||
| 540 | adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; | ||
| 541 | adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; | ||
| 542 | |||
| 543 | /* TODO: fix num_level for APU when updating vm size and block size */ | ||
| 544 | if (adev->flags & AMD_IS_APU) | ||
| 545 | adev->vm_manager.num_level = 1; | ||
| 546 | else | ||
| 547 | adev->vm_manager.num_level = 3; | ||
| 548 | amdgpu_vm_manager_init(adev); | ||
| 549 | |||
| 550 | /* base offset of vram pages */ | ||
| 551 | /*XXX This value is not zero for APU*/ | ||
| 552 | adev->vm_manager.vram_base_offset = 0; | ||
| 553 | |||
| 554 | return 0; | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * gmc_v9_0_vm_fini - vm fini callback | ||
| 559 | * | ||
| 560 | * @adev: amdgpu_device pointer | ||
| 561 | * | ||
| 562 | * Tear down any asic specific VM setup. | ||
| 563 | */ | ||
| 564 | static void gmc_v9_0_vm_fini(struct amdgpu_device *adev) | ||
| 565 | { | ||
| 566 | return; | ||
| 567 | } | ||
| 568 | |||
| 569 | static int gmc_v9_0_sw_init(void *handle) | 529 | static int gmc_v9_0_sw_init(void *handle) |
| 570 | { | 530 | { |
| 571 | int r; | 531 | int r; |
| 572 | int dma_bits; | 532 | int dma_bits; |
| 573 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 533 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 574 | 534 | ||
| 535 | gfxhub_v1_0_init(adev); | ||
| 536 | mmhub_v1_0_init(adev); | ||
| 537 | |||
| 575 | spin_lock_init(&adev->mc.invalidate_lock); | 538 | spin_lock_init(&adev->mc.invalidate_lock); |
| 576 | 539 | ||
| 577 | if (adev->flags & AMD_IS_APU) { | 540 | if (adev->flags & AMD_IS_APU) { |
| @@ -609,6 +572,12 @@ static int gmc_v9_0_sw_init(void *handle) | |||
| 609 | */ | 572 | */ |
| 610 | adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ | 573 | adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ |
| 611 | 574 | ||
| 575 | /* | ||
| 576 | * It needs to reserve 8M stolen memory for vega10 | ||
| 577 | * TODO: Figure out how to avoid that... | ||
| 578 | */ | ||
| 579 | adev->mc.stolen_size = 8 * 1024 * 1024; | ||
| 580 | |||
| 612 | /* set DMA mask + need_dma32 flags. | 581 | /* set DMA mask + need_dma32 flags. |
| 613 | * PCIE - can handle 44-bits. | 582 | * PCIE - can handle 44-bits. |
| 614 | * IGP - can handle 44-bits | 583 | * IGP - can handle 44-bits |
| @@ -641,15 +610,23 @@ static int gmc_v9_0_sw_init(void *handle) | |||
| 641 | if (r) | 610 | if (r) |
| 642 | return r; | 611 | return r; |
| 643 | 612 | ||
| 644 | if (!adev->vm_manager.enabled) { | 613 | /* |
| 645 | r = gmc_v9_0_vm_init(adev); | 614 | * number of VMs |
| 646 | if (r) { | 615 | * VMID 0 is reserved for System |
| 647 | dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); | 616 | * amdgpu graphics/compute will use VMIDs 1-7 |
| 648 | return r; | 617 | * amdkfd will use VMIDs 8-15 |
| 649 | } | 618 | */ |
| 650 | adev->vm_manager.enabled = true; | 619 | adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; |
| 651 | } | 620 | adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; |
| 652 | return r; | 621 | |
| 622 | /* TODO: fix num_level for APU when updating vm size and block size */ | ||
| 623 | if (adev->flags & AMD_IS_APU) | ||
| 624 | adev->vm_manager.num_level = 1; | ||
| 625 | else | ||
| 626 | adev->vm_manager.num_level = 3; | ||
| 627 | amdgpu_vm_manager_init(adev); | ||
| 628 | |||
| 629 | return 0; | ||
| 653 | } | 630 | } |
| 654 | 631 | ||
| 655 | /** | 632 | /** |
| @@ -669,11 +646,7 @@ static int gmc_v9_0_sw_fini(void *handle) | |||
| 669 | { | 646 | { |
| 670 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 647 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 671 | 648 | ||
| 672 | if (adev->vm_manager.enabled) { | 649 | amdgpu_vm_manager_fini(adev); |
| 673 | amdgpu_vm_manager_fini(adev); | ||
| 674 | gmc_v9_0_vm_fini(adev); | ||
| 675 | adev->vm_manager.enabled = false; | ||
| 676 | } | ||
| 677 | gmc_v9_0_gart_fini(adev); | 650 | gmc_v9_0_gart_fini(adev); |
| 678 | amdgpu_gem_force_release(adev); | 651 | amdgpu_gem_force_release(adev); |
| 679 | amdgpu_bo_fini(adev); | 652 | amdgpu_bo_fini(adev); |
| @@ -686,6 +659,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 686 | switch (adev->asic_type) { | 659 | switch (adev->asic_type) { |
| 687 | case CHIP_VEGA10: | 660 | case CHIP_VEGA10: |
| 688 | break; | 661 | break; |
| 662 | case CHIP_RAVEN: | ||
| 663 | break; | ||
| 689 | default: | 664 | default: |
| 690 | break; | 665 | break; |
| 691 | } | 666 | } |
| @@ -715,7 +690,19 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) | |||
| 715 | return r; | 690 | return r; |
| 716 | 691 | ||
| 717 | /* After HDP is initialized, flush HDP.*/ | 692 | /* After HDP is initialized, flush HDP.*/ |
| 718 | nbio_v6_1_hdp_flush(adev); | 693 | if (adev->flags & AMD_IS_APU) |
| 694 | nbio_v7_0_hdp_flush(adev); | ||
| 695 | else | ||
| 696 | nbio_v6_1_hdp_flush(adev); | ||
| 697 | |||
| 698 | switch (adev->asic_type) { | ||
| 699 | case CHIP_RAVEN: | ||
| 700 | mmhub_v1_0_initialize_power_gating(adev); | ||
| 701 | mmhub_v1_0_update_power_gating(adev, true); | ||
| 702 | break; | ||
| 703 | default: | ||
| 704 | break; | ||
| 705 | } | ||
| 719 | 706 | ||
| 720 | r = gfxhub_v1_0_gart_enable(adev); | 707 | r = gfxhub_v1_0_gart_enable(adev); |
| 721 | if (r) | 708 | if (r) |
| @@ -725,12 +712,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) | |||
| 725 | if (r) | 712 | if (r) |
| 726 | return r; | 713 | return r; |
| 727 | 714 | ||
| 728 | tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL)); | 715 | tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); |
| 729 | tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; | 716 | tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; |
| 730 | WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp); | 717 | WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); |
| 731 | 718 | ||
| 732 | tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL)); | 719 | tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); |
| 733 | WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp); | 720 | WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); |
| 734 | 721 | ||
| 735 | 722 | ||
| 736 | if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) | 723 | if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) |
| @@ -781,6 +768,12 @@ static int gmc_v9_0_hw_fini(void *handle) | |||
| 781 | { | 768 | { |
| 782 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 769 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 783 | 770 | ||
| 771 | if (amdgpu_sriov_vf(adev)) { | ||
| 772 | /* full access mode, so don't touch any GMC register */ | ||
| 773 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); | ||
| 774 | return 0; | ||
| 775 | } | ||
| 776 | |||
| 784 | amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); | 777 | amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); |
| 785 | gmc_v9_0_gart_disable(adev); | 778 | gmc_v9_0_gart_disable(adev); |
| 786 | 779 | ||
| @@ -791,10 +784,6 @@ static int gmc_v9_0_suspend(void *handle) | |||
| 791 | { | 784 | { |
| 792 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 785 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 793 | 786 | ||
| 794 | if (adev->vm_manager.enabled) { | ||
| 795 | gmc_v9_0_vm_fini(adev); | ||
| 796 | adev->vm_manager.enabled = false; | ||
| 797 | } | ||
| 798 | gmc_v9_0_hw_fini(adev); | 787 | gmc_v9_0_hw_fini(adev); |
| 799 | 788 | ||
| 800 | return 0; | 789 | return 0; |
| @@ -809,17 +798,9 @@ static int gmc_v9_0_resume(void *handle) | |||
| 809 | if (r) | 798 | if (r) |
| 810 | return r; | 799 | return r; |
| 811 | 800 | ||
| 812 | if (!adev->vm_manager.enabled) { | 801 | amdgpu_vm_reset_all_ids(adev); |
| 813 | r = gmc_v9_0_vm_init(adev); | ||
| 814 | if (r) { | ||
| 815 | dev_err(adev->dev, | ||
| 816 | "vm manager initialization failed (%d).\n", r); | ||
| 817 | return r; | ||
| 818 | } | ||
| 819 | adev->vm_manager.enabled = true; | ||
| 820 | } | ||
| 821 | 802 | ||
| 822 | return r; | 803 | return 0; |
| 823 | } | 804 | } |
| 824 | 805 | ||
| 825 | static bool gmc_v9_0_is_idle(void *handle) | 806 | static bool gmc_v9_0_is_idle(void *handle) |
| @@ -843,7 +824,16 @@ static int gmc_v9_0_soft_reset(void *handle) | |||
| 843 | static int gmc_v9_0_set_clockgating_state(void *handle, | 824 | static int gmc_v9_0_set_clockgating_state(void *handle, |
| 844 | enum amd_clockgating_state state) | 825 | enum amd_clockgating_state state) |
| 845 | { | 826 | { |
| 846 | return 0; | 827 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 828 | |||
| 829 | return mmhub_v1_0_set_clockgating(adev, state); | ||
| 830 | } | ||
| 831 | |||
| 832 | static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) | ||
| 833 | { | ||
| 834 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 835 | |||
| 836 | mmhub_v1_0_get_clockgating(adev, flags); | ||
| 847 | } | 837 | } |
| 848 | 838 | ||
| 849 | static int gmc_v9_0_set_powergating_state(void *handle, | 839 | static int gmc_v9_0_set_powergating_state(void *handle, |
| @@ -867,6 +857,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = { | |||
| 867 | .soft_reset = gmc_v9_0_soft_reset, | 857 | .soft_reset = gmc_v9_0_soft_reset, |
| 868 | .set_clockgating_state = gmc_v9_0_set_clockgating_state, | 858 | .set_clockgating_state = gmc_v9_0_set_clockgating_state, |
| 869 | .set_powergating_state = gmc_v9_0_set_powergating_state, | 859 | .set_powergating_state = gmc_v9_0_set_powergating_state, |
| 860 | .get_clockgating_state = gmc_v9_0_get_clockgating_state, | ||
| 870 | }; | 861 | }; |
| 871 | 862 | ||
| 872 | const struct amdgpu_ip_block_version gmc_v9_0_ip_block = | 863 | const struct amdgpu_ip_block_version gmc_v9_0_ip_block = |
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index cb622add99a7..7a0ea27ac429 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_ih.h" | 25 | #include "amdgpu_ih.h" |
| 26 | #include "vid.h" | 26 | #include "vid.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 79a52ad2c80d..3bbf2ccfca89 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "amdgpu_pm.h" | 26 | #include "amdgpu_pm.h" |
| 27 | #include "cikd.h" | 27 | #include "cikd.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c index e6b7b42acfe1..b82e33c01571 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | * Authors: Alex Deucher | 22 | * Authors: Alex Deucher |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include "drmP.h" | 25 | #include <drm/drmP.h> |
| 26 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
| 27 | #include "cikd.h" | 27 | #include "cikd.h" |
| 28 | #include "kv_dpm.h" | 28 | #include "kv_dpm.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index dbfe48d1207a..9804318f3488 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | |||
| @@ -34,9 +34,12 @@ | |||
| 34 | 34 | ||
| 35 | #include "soc15_common.h" | 35 | #include "soc15_common.h" |
| 36 | 36 | ||
| 37 | #define mmDAGB0_CNTL_MISC2_RV 0x008f | ||
| 38 | #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 | ||
| 39 | |||
| 37 | u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) | 40 | u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) |
| 38 | { | 41 | { |
| 39 | u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE)); | 42 | u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); |
| 40 | 43 | ||
| 41 | base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; | 44 | base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; |
| 42 | base <<= 24; | 45 | base <<= 24; |
| @@ -44,184 +47,160 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) | |||
| 44 | return base; | 47 | return base; |
| 45 | } | 48 | } |
| 46 | 49 | ||
| 47 | int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) | 50 | static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) |
| 48 | { | 51 | { |
| 49 | u32 tmp; | 52 | uint64_t value; |
| 50 | u64 value; | ||
| 51 | uint64_t addr; | ||
| 52 | u32 i; | ||
| 53 | 53 | ||
| 54 | /* Program MC. */ | 54 | BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); |
| 55 | /* Update configuration */ | 55 | value = adev->gart.table_addr - adev->mc.vram_start + |
| 56 | DRM_INFO("%s -- in\n", __func__); | ||
| 57 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), | ||
| 58 | adev->mc.vram_start >> 18); | ||
| 59 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), | ||
| 60 | adev->mc.vram_end >> 18); | ||
| 61 | value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + | ||
| 62 | adev->vm_manager.vram_base_offset; | 56 | adev->vm_manager.vram_base_offset; |
| 63 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 57 | value &= 0x0000FFFFFFFFF000ULL; |
| 64 | mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), | 58 | value |= 0x1; /* valid bit */ |
| 65 | (u32)(value >> 12)); | ||
| 66 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 67 | mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), | ||
| 68 | (u32)(value >> 44)); | ||
| 69 | 59 | ||
| 70 | if (amdgpu_sriov_vf(adev)) { | 60 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, |
| 71 | /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so | 61 | lower_32_bits(value)); |
| 72 | vbios post doesn't program them, for SRIOV driver need to program them */ | 62 | |
| 73 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE), | 63 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, |
| 74 | adev->mc.vram_start >> 24); | 64 | upper_32_bits(value)); |
| 75 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP), | 65 | } |
| 76 | adev->mc.vram_end >> 24); | 66 | |
| 77 | } | 67 | static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) |
| 68 | { | ||
| 69 | mmhub_v1_0_init_gart_pt_regs(adev); | ||
| 70 | |||
| 71 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, | ||
| 72 | (u32)(adev->mc.gtt_start >> 12)); | ||
| 73 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, | ||
| 74 | (u32)(adev->mc.gtt_start >> 44)); | ||
| 75 | |||
| 76 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, | ||
| 77 | (u32)(adev->mc.gtt_end >> 12)); | ||
| 78 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, | ||
| 79 | (u32)(adev->mc.gtt_end >> 44)); | ||
| 80 | } | ||
| 81 | |||
| 82 | static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) | ||
| 83 | { | ||
| 84 | uint64_t value; | ||
| 85 | uint32_t tmp; | ||
| 78 | 86 | ||
| 79 | /* Disable AGP. */ | 87 | /* Disable AGP. */ |
| 80 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0); | 88 | WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0); |
| 81 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0); | 89 | WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0); |
| 82 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF); | 90 | WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF); |
| 83 | 91 | ||
| 84 | /* GART Enable. */ | 92 | /* Program the system aperture low logical page number. */ |
| 93 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, | ||
| 94 | adev->mc.vram_start >> 18); | ||
| 95 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, | ||
| 96 | adev->mc.vram_end >> 18); | ||
| 97 | |||
| 98 | /* Set default page address. */ | ||
| 99 | value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + | ||
| 100 | adev->vm_manager.vram_base_offset; | ||
| 101 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, | ||
| 102 | (u32)(value >> 12)); | ||
| 103 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, | ||
| 104 | (u32)(value >> 44)); | ||
| 105 | |||
| 106 | /* Program "protection fault". */ | ||
| 107 | WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, | ||
| 108 | (u32)(adev->dummy_page.addr >> 12)); | ||
| 109 | WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, | ||
| 110 | (u32)((u64)adev->dummy_page.addr >> 44)); | ||
| 111 | |||
| 112 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2); | ||
| 113 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, | ||
| 114 | ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); | ||
| 115 | WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp); | ||
| 116 | } | ||
| 117 | |||
| 118 | static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) | ||
| 119 | { | ||
| 120 | uint32_t tmp; | ||
| 85 | 121 | ||
| 86 | /* Setup TLB control */ | 122 | /* Setup TLB control */ |
| 87 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); | 123 | tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); |
| 124 | |||
| 88 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); | 125 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); |
| 89 | tmp = REG_SET_FIELD(tmp, | 126 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); |
| 90 | MC_VM_MX_L1_TLB_CNTL, | 127 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, |
| 91 | SYSTEM_ACCESS_MODE, | 128 | ENABLE_ADVANCED_DRIVER_MODEL, 1); |
| 92 | 3); | 129 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, |
| 93 | tmp = REG_SET_FIELD(tmp, | 130 | SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); |
| 94 | MC_VM_MX_L1_TLB_CNTL, | 131 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); |
| 95 | ENABLE_ADVANCED_DRIVER_MODEL, | 132 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, |
| 96 | 1); | 133 | MTYPE, MTYPE_UC);/* XXX for emulation. */ |
| 97 | tmp = REG_SET_FIELD(tmp, | 134 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); |
| 98 | MC_VM_MX_L1_TLB_CNTL, | 135 | |
| 99 | SYSTEM_APERTURE_UNMAPPED_ACCESS, | 136 | WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); |
| 100 | 0); | 137 | } |
| 101 | tmp = REG_SET_FIELD(tmp, | 138 | |
| 102 | MC_VM_MX_L1_TLB_CNTL, | 139 | static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) |
| 103 | ECO_BITS, | 140 | { |
| 104 | 0); | 141 | uint32_t tmp; |
| 105 | tmp = REG_SET_FIELD(tmp, | ||
| 106 | MC_VM_MX_L1_TLB_CNTL, | ||
| 107 | MTYPE, | ||
| 108 | MTYPE_UC);/* XXX for emulation. */ | ||
| 109 | tmp = REG_SET_FIELD(tmp, | ||
| 110 | MC_VM_MX_L1_TLB_CNTL, | ||
| 111 | ATC_EN, | ||
| 112 | 1); | ||
| 113 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); | ||
| 114 | 142 | ||
| 115 | /* Setup L2 cache */ | 143 | /* Setup L2 cache */ |
| 116 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); | 144 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); |
| 117 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); | 145 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); |
| 118 | tmp = REG_SET_FIELD(tmp, | 146 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); |
| 119 | VM_L2_CNTL, | 147 | /* XXX for emulation, Refer to closed source code.*/ |
| 120 | ENABLE_L2_FRAGMENT_PROCESSING, | 148 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, |
| 121 | 0); | 149 | 0); |
| 122 | tmp = REG_SET_FIELD(tmp, | ||
| 123 | VM_L2_CNTL, | ||
| 124 | L2_PDE0_CACHE_TAG_GENERATION_MODE, | ||
| 125 | 0);/* XXX for emulation, Refer to closed source code.*/ | ||
| 126 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); | 150 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); |
| 127 | tmp = REG_SET_FIELD(tmp, | 151 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); |
| 128 | VM_L2_CNTL, | 152 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); |
| 129 | CONTEXT1_IDENTITY_ACCESS_MODE, | 153 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); |
| 130 | 1); | ||
| 131 | tmp = REG_SET_FIELD(tmp, | ||
| 132 | VM_L2_CNTL, | ||
| 133 | IDENTITY_MODE_FRAGMENT_SIZE, | ||
| 134 | 0); | ||
| 135 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); | ||
| 136 | 154 | ||
| 137 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2)); | 155 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2); |
| 138 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 156 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
| 139 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 157 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
| 140 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp); | 158 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); |
| 141 | 159 | ||
| 142 | tmp = mmVM_L2_CNTL3_DEFAULT; | 160 | tmp = mmVM_L2_CNTL3_DEFAULT; |
| 143 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp); | 161 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); |
| 144 | |||
| 145 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4)); | ||
| 146 | tmp = REG_SET_FIELD(tmp, | ||
| 147 | VM_L2_CNTL4, | ||
| 148 | VMC_TAP_PDE_REQUEST_PHYSICAL, | ||
| 149 | 0); | ||
| 150 | tmp = REG_SET_FIELD(tmp, | ||
| 151 | VM_L2_CNTL4, | ||
| 152 | VMC_TAP_PTE_REQUEST_PHYSICAL, | ||
| 153 | 0); | ||
| 154 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp); | ||
| 155 | |||
| 156 | /* setup context0 */ | ||
| 157 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 158 | mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), | ||
| 159 | (u32)(adev->mc.gtt_start >> 12)); | ||
| 160 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 161 | mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), | ||
| 162 | (u32)(adev->mc.gtt_start >> 44)); | ||
| 163 | |||
| 164 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 165 | mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), | ||
| 166 | (u32)(adev->mc.gtt_end >> 12)); | ||
| 167 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 168 | mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), | ||
| 169 | (u32)(adev->mc.gtt_end >> 44)); | ||
| 170 | |||
| 171 | BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); | ||
| 172 | value = adev->gart.table_addr - adev->mc.vram_start + | ||
| 173 | adev->vm_manager.vram_base_offset; | ||
| 174 | value &= 0x0000FFFFFFFFF000ULL; | ||
| 175 | value |= 0x1; /* valid bit */ | ||
| 176 | 162 | ||
| 177 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 163 | tmp = mmVM_L2_CNTL4_DEFAULT; |
| 178 | mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), | 164 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); |
| 179 | (u32)value); | 165 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); |
| 180 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 166 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp); |
| 181 | mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), | 167 | } |
| 182 | (u32)(value >> 32)); | ||
| 183 | |||
| 184 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 185 | mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), | ||
| 186 | (u32)(adev->dummy_page.addr >> 12)); | ||
| 187 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 188 | mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), | ||
| 189 | (u32)((u64)adev->dummy_page.addr >> 44)); | ||
| 190 | |||
| 191 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); | ||
| 192 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, | ||
| 193 | ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, | ||
| 194 | 1); | ||
| 195 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); | ||
| 196 | 168 | ||
| 197 | addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); | 169 | static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) |
| 198 | tmp = RREG32(addr); | 170 | { |
| 171 | uint32_t tmp; | ||
| 199 | 172 | ||
| 173 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); | ||
| 200 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); | 174 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); |
| 201 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); | 175 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); |
| 202 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp); | 176 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); |
| 203 | 177 | } | |
| 204 | tmp = RREG32(addr); | ||
| 205 | |||
| 206 | /* Disable identity aperture.*/ | ||
| 207 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 208 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); | ||
| 209 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 210 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); | ||
| 211 | 178 | ||
| 212 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 179 | static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) |
| 213 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); | 180 | { |
| 214 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 181 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, |
| 215 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); | 182 | 0XFFFFFFFF); |
| 183 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, | ||
| 184 | 0x0000000F); | ||
| 185 | |||
| 186 | WREG32_SOC15(MMHUB, 0, | ||
| 187 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); | ||
| 188 | WREG32_SOC15(MMHUB, 0, | ||
| 189 | mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); | ||
| 190 | |||
| 191 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, | ||
| 192 | 0); | ||
| 193 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, | ||
| 194 | 0); | ||
| 195 | } | ||
| 216 | 196 | ||
| 217 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 197 | static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) |
| 218 | mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); | 198 | { |
| 219 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | 199 | int i; |
| 220 | mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); | 200 | uint32_t tmp; |
| 221 | 201 | ||
| 222 | for (i = 0; i <= 14; i++) { | 202 | for (i = 0; i <= 14; i++) { |
| 223 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) | 203 | tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i); |
| 224 | + i); | ||
| 225 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, | 204 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, |
| 226 | ENABLE_CONTEXT, 1); | 205 | ENABLE_CONTEXT, 1); |
| 227 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, | 206 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, |
| @@ -243,14 +222,270 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) | |||
| 243 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, | 222 | tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, |
| 244 | PAGE_TABLE_BLOCK_SIZE, | 223 | PAGE_TABLE_BLOCK_SIZE, |
| 245 | adev->vm_manager.block_size - 9); | 224 | adev->vm_manager.block_size - 9); |
| 246 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp); | 225 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); |
| 247 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); | 226 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); |
| 248 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); | 227 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); |
| 249 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, | 228 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, |
| 250 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | 229 | lower_32_bits(adev->vm_manager.max_pfn - 1)); |
| 251 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, | 230 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, |
| 252 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | 231 | upper_32_bits(adev->vm_manager.max_pfn - 1)); |
| 253 | } | 232 | } |
| 233 | } | ||
| 234 | |||
| 235 | static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) | ||
| 236 | { | ||
| 237 | unsigned i; | ||
| 238 | |||
| 239 | for (i = 0; i < 18; ++i) { | ||
| 240 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, | ||
| 241 | 2 * i, 0xffffffff); | ||
| 242 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, | ||
| 243 | 2 * i, 0x1f); | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | struct pctl_data { | ||
| 248 | uint32_t index; | ||
| 249 | uint32_t data; | ||
| 250 | }; | ||
| 251 | |||
| 252 | const struct pctl_data pctl0_data[] = { | ||
| 253 | {0x0, 0x7a640}, | ||
| 254 | {0x9, 0x2a64a}, | ||
| 255 | {0xd, 0x2a680}, | ||
| 256 | {0x11, 0x6a684}, | ||
| 257 | {0x19, 0xea68e}, | ||
| 258 | {0x29, 0xa69e}, | ||
| 259 | {0x2b, 0x34a6c0}, | ||
| 260 | {0x61, 0x83a707}, | ||
| 261 | {0xe6, 0x8a7a4}, | ||
| 262 | {0xf0, 0x1a7b8}, | ||
| 263 | {0xf3, 0xfa7cc}, | ||
| 264 | {0x104, 0x17a7dd}, | ||
| 265 | {0x11d, 0xa7dc}, | ||
| 266 | {0x11f, 0x12a7f5}, | ||
| 267 | {0x133, 0xa808}, | ||
| 268 | {0x135, 0x12a810}, | ||
| 269 | {0x149, 0x7a82c} | ||
| 270 | }; | ||
| 271 | #define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0])) | ||
| 272 | |||
| 273 | #define PCTL0_RENG_EXEC_END_PTR 0x151 | ||
| 274 | #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 | ||
| 275 | #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 | ||
| 276 | |||
| 277 | const struct pctl_data pctl1_data[] = { | ||
| 278 | {0x0, 0x39a000}, | ||
| 279 | {0x3b, 0x44a040}, | ||
| 280 | {0x81, 0x2a08d}, | ||
| 281 | {0x85, 0x6ba094}, | ||
| 282 | {0xf2, 0x18a100}, | ||
| 283 | {0x10c, 0x4a132}, | ||
| 284 | {0x112, 0xca141}, | ||
| 285 | {0x120, 0x2fa158}, | ||
| 286 | {0x151, 0x17a1d0}, | ||
| 287 | {0x16a, 0x1a1e9}, | ||
| 288 | {0x16d, 0x13a1ec}, | ||
| 289 | {0x182, 0x7a201}, | ||
| 290 | {0x18b, 0x3a20a}, | ||
| 291 | {0x190, 0x7a580}, | ||
| 292 | {0x199, 0xa590}, | ||
| 293 | {0x19b, 0x4a594}, | ||
| 294 | {0x1a1, 0x1a59c}, | ||
| 295 | {0x1a4, 0x7a82c}, | ||
| 296 | {0x1ad, 0xfa7cc}, | ||
| 297 | {0x1be, 0x17a7dd}, | ||
| 298 | {0x1d7, 0x12a810} | ||
| 299 | }; | ||
| 300 | #define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0])) | ||
| 301 | |||
| 302 | #define PCTL1_RENG_EXEC_END_PTR 0x1ea | ||
| 303 | #define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 | ||
| 304 | #define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d | ||
| 305 | #define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580 | ||
| 306 | #define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d | ||
| 307 | #define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE 0xa82c | ||
| 308 | #define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833 | ||
| 309 | |||
| 310 | static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev) | ||
| 311 | { | ||
| 312 | uint32_t tmp = 0; | ||
| 313 | |||
| 314 | /* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */ | ||
| 315 | tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0, | ||
| 316 | STCTRL_REGISTER_SAVE_BASE, | ||
| 317 | PCTL0_STCTRL_REG_SAVE_RANGE0_BASE); | ||
| 318 | tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0, | ||
| 319 | STCTRL_REGISTER_SAVE_LIMIT, | ||
| 320 | PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT); | ||
| 321 | WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp); | ||
| 322 | |||
| 323 | /* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */ | ||
| 324 | tmp = 0; | ||
| 325 | tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0, | ||
| 326 | STCTRL_REGISTER_SAVE_BASE, | ||
| 327 | PCTL1_STCTRL_REG_SAVE_RANGE0_BASE); | ||
| 328 | tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0, | ||
| 329 | STCTRL_REGISTER_SAVE_LIMIT, | ||
| 330 | PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT); | ||
| 331 | WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp); | ||
| 332 | |||
| 333 | /* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */ | ||
| 334 | tmp = 0; | ||
| 335 | tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1, | ||
| 336 | STCTRL_REGISTER_SAVE_BASE, | ||
| 337 | PCTL1_STCTRL_REG_SAVE_RANGE1_BASE); | ||
| 338 | tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1, | ||
| 339 | STCTRL_REGISTER_SAVE_LIMIT, | ||
| 340 | PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT); | ||
| 341 | WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp); | ||
| 342 | |||
| 343 | /* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */ | ||
| 344 | tmp = 0; | ||
| 345 | tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2, | ||
| 346 | STCTRL_REGISTER_SAVE_BASE, | ||
| 347 | PCTL1_STCTRL_REG_SAVE_RANGE2_BASE); | ||
| 348 | tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2, | ||
| 349 | STCTRL_REGISTER_SAVE_LIMIT, | ||
| 350 | PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT); | ||
| 351 | WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp); | ||
| 352 | } | ||
| 353 | |||
| 354 | void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) | ||
| 355 | { | ||
| 356 | uint32_t pctl0_misc = 0; | ||
| 357 | uint32_t pctl0_reng_execute = 0; | ||
| 358 | uint32_t pctl1_misc = 0; | ||
| 359 | uint32_t pctl1_reng_execute = 0; | ||
| 360 | int i = 0; | ||
| 361 | |||
| 362 | if (amdgpu_sriov_vf(adev)) | ||
| 363 | return; | ||
| 364 | |||
| 365 | pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); | ||
| 366 | pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); | ||
| 367 | pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); | ||
| 368 | pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); | ||
| 369 | |||
| 370 | /* Light sleep must be disabled before writing to pctl0 registers */ | ||
| 371 | pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; | ||
| 372 | WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); | ||
| 373 | |||
| 374 | /* Write data used to access ram of register engine */ | ||
| 375 | for (i = 0; i < PCTL0_DATA_LEN; i++) { | ||
| 376 | WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX, | ||
| 377 | pctl0_data[i].index); | ||
| 378 | WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA, | ||
| 379 | pctl0_data[i].data); | ||
| 380 | } | ||
| 381 | |||
| 382 | /* Set the reng execute end ptr for pctl0 */ | ||
| 383 | pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, | ||
| 384 | PCTL0_RENG_EXECUTE, | ||
| 385 | RENG_EXECUTE_END_PTR, | ||
| 386 | PCTL0_RENG_EXEC_END_PTR); | ||
| 387 | WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); | ||
| 388 | |||
| 389 | /* Light sleep must be disabled before writing to pctl1 registers */ | ||
| 390 | pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; | ||
| 391 | WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); | ||
| 392 | |||
| 393 | /* Write data used to access ram of register engine */ | ||
| 394 | for (i = 0; i < PCTL1_DATA_LEN; i++) { | ||
| 395 | WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX, | ||
| 396 | pctl1_data[i].index); | ||
| 397 | WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA, | ||
| 398 | pctl1_data[i].data); | ||
| 399 | } | ||
| 400 | |||
| 401 | /* Set the reng execute end ptr for pctl1 */ | ||
| 402 | pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, | ||
| 403 | PCTL1_RENG_EXECUTE, | ||
| 404 | RENG_EXECUTE_END_PTR, | ||
| 405 | PCTL1_RENG_EXEC_END_PTR); | ||
| 406 | WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); | ||
| 407 | |||
| 408 | mmhub_v1_0_power_gating_write_save_ranges(adev); | ||
| 409 | |||
| 410 | /* Re-enable light sleep */ | ||
| 411 | pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; | ||
| 412 | WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); | ||
| 413 | pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; | ||
| 414 | WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); | ||
| 415 | } | ||
| 416 | |||
| 417 | void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, | ||
| 418 | bool enable) | ||
| 419 | { | ||
| 420 | uint32_t pctl0_reng_execute = 0; | ||
| 421 | uint32_t pctl1_reng_execute = 0; | ||
| 422 | |||
| 423 | if (amdgpu_sriov_vf(adev)) | ||
| 424 | return; | ||
| 425 | |||
| 426 | pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); | ||
| 427 | pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); | ||
| 428 | |||
| 429 | if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { | ||
| 430 | pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, | ||
| 431 | PCTL0_RENG_EXECUTE, | ||
| 432 | RENG_EXECUTE_ON_PWR_UP, 1); | ||
| 433 | pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, | ||
| 434 | PCTL0_RENG_EXECUTE, | ||
| 435 | RENG_EXECUTE_ON_REG_UPDATE, 1); | ||
| 436 | WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); | ||
| 437 | |||
| 438 | pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, | ||
| 439 | PCTL1_RENG_EXECUTE, | ||
| 440 | RENG_EXECUTE_ON_PWR_UP, 1); | ||
| 441 | pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, | ||
| 442 | PCTL1_RENG_EXECUTE, | ||
| 443 | RENG_EXECUTE_ON_REG_UPDATE, 1); | ||
| 444 | WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); | ||
| 445 | |||
| 446 | } else { | ||
| 447 | pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, | ||
| 448 | PCTL0_RENG_EXECUTE, | ||
| 449 | RENG_EXECUTE_ON_PWR_UP, 0); | ||
| 450 | pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, | ||
| 451 | PCTL0_RENG_EXECUTE, | ||
| 452 | RENG_EXECUTE_ON_REG_UPDATE, 0); | ||
| 453 | WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); | ||
| 454 | |||
| 455 | pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, | ||
| 456 | PCTL1_RENG_EXECUTE, | ||
| 457 | RENG_EXECUTE_ON_PWR_UP, 0); | ||
| 458 | pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, | ||
| 459 | PCTL1_RENG_EXECUTE, | ||
| 460 | RENG_EXECUTE_ON_REG_UPDATE, 0); | ||
| 461 | WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) | ||
| 466 | { | ||
| 467 | if (amdgpu_sriov_vf(adev)) { | ||
| 468 | /* | ||
| 469 | * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are | ||
| 470 | * VF copy registers so vbios post doesn't program them, for | ||
| 471 | * SRIOV driver need to program them | ||
| 472 | */ | ||
| 473 | WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE, | ||
| 474 | adev->mc.vram_start >> 24); | ||
| 475 | WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP, | ||
| 476 | adev->mc.vram_end >> 24); | ||
| 477 | } | ||
| 478 | |||
| 479 | /* GART Enable. */ | ||
| 480 | mmhub_v1_0_init_gart_aperture_regs(adev); | ||
| 481 | mmhub_v1_0_init_system_aperture_regs(adev); | ||
| 482 | mmhub_v1_0_init_tlb_regs(adev); | ||
| 483 | mmhub_v1_0_init_cache_regs(adev); | ||
| 484 | |||
| 485 | mmhub_v1_0_enable_system_domain(adev); | ||
| 486 | mmhub_v1_0_disable_identity_aperture(adev); | ||
| 487 | mmhub_v1_0_setup_vmid_config(adev); | ||
| 488 | mmhub_v1_0_program_invalidation(adev); | ||
| 254 | 489 | ||
| 255 | return 0; | 490 | return 0; |
| 256 | } | 491 | } |
| @@ -262,22 +497,22 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) | |||
| 262 | 497 | ||
| 263 | /* Disable all tables */ | 498 | /* Disable all tables */ |
| 264 | for (i = 0; i < 16; i++) | 499 | for (i = 0; i < 16; i++) |
| 265 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0); | 500 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0); |
| 266 | 501 | ||
| 267 | /* Setup TLB control */ | 502 | /* Setup TLB control */ |
| 268 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); | 503 | tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); |
| 269 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); | 504 | tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); |
| 270 | tmp = REG_SET_FIELD(tmp, | 505 | tmp = REG_SET_FIELD(tmp, |
| 271 | MC_VM_MX_L1_TLB_CNTL, | 506 | MC_VM_MX_L1_TLB_CNTL, |
| 272 | ENABLE_ADVANCED_DRIVER_MODEL, | 507 | ENABLE_ADVANCED_DRIVER_MODEL, |
| 273 | 0); | 508 | 0); |
| 274 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); | 509 | WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); |
| 275 | 510 | ||
| 276 | /* Setup L2 cache */ | 511 | /* Setup L2 cache */ |
| 277 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); | 512 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); |
| 278 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); | 513 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); |
| 279 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); | 514 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); |
| 280 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0); | 515 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); |
| 281 | } | 516 | } |
| 282 | 517 | ||
| 283 | /** | 518 | /** |
| @@ -289,7 +524,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) | |||
| 289 | void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) | 524 | void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) |
| 290 | { | 525 | { |
| 291 | u32 tmp; | 526 | u32 tmp; |
| 292 | tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); | 527 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); |
| 293 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 528 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
| 294 | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 529 | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
| 295 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 530 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
| @@ -314,22 +549,11 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) | |||
| 314 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 549 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
| 315 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 550 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
| 316 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 551 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
| 317 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); | 552 | WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); |
| 318 | } | 553 | } |
| 319 | 554 | ||
| 320 | static int mmhub_v1_0_early_init(void *handle) | 555 | void mmhub_v1_0_init(struct amdgpu_device *adev) |
| 321 | { | 556 | { |
| 322 | return 0; | ||
| 323 | } | ||
| 324 | |||
| 325 | static int mmhub_v1_0_late_init(void *handle) | ||
| 326 | { | ||
| 327 | return 0; | ||
| 328 | } | ||
| 329 | |||
| 330 | static int mmhub_v1_0_sw_init(void *handle) | ||
| 331 | { | ||
| 332 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 333 | struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; | 557 | struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; |
| 334 | 558 | ||
| 335 | hub->ctx0_ptb_addr_lo32 = | 559 | hub->ctx0_ptb_addr_lo32 = |
| @@ -349,69 +573,20 @@ static int mmhub_v1_0_sw_init(void *handle) | |||
| 349 | hub->vm_l2_pro_fault_cntl = | 573 | hub->vm_l2_pro_fault_cntl = |
| 350 | SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); | 574 | SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); |
| 351 | 575 | ||
| 352 | return 0; | ||
| 353 | } | ||
| 354 | |||
| 355 | static int mmhub_v1_0_sw_fini(void *handle) | ||
| 356 | { | ||
| 357 | return 0; | ||
| 358 | } | ||
| 359 | |||
| 360 | static int mmhub_v1_0_hw_init(void *handle) | ||
| 361 | { | ||
| 362 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 363 | unsigned i; | ||
| 364 | |||
| 365 | for (i = 0; i < 18; ++i) { | ||
| 366 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 367 | mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + | ||
| 368 | 2 * i, 0xffffffff); | ||
| 369 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 370 | mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + | ||
| 371 | 2 * i, 0x1f); | ||
| 372 | } | ||
| 373 | |||
| 374 | return 0; | ||
| 375 | } | ||
| 376 | |||
| 377 | static int mmhub_v1_0_hw_fini(void *handle) | ||
| 378 | { | ||
| 379 | return 0; | ||
| 380 | } | ||
| 381 | |||
| 382 | static int mmhub_v1_0_suspend(void *handle) | ||
| 383 | { | ||
| 384 | return 0; | ||
| 385 | } | ||
| 386 | |||
| 387 | static int mmhub_v1_0_resume(void *handle) | ||
| 388 | { | ||
| 389 | return 0; | ||
| 390 | } | ||
| 391 | |||
| 392 | static bool mmhub_v1_0_is_idle(void *handle) | ||
| 393 | { | ||
| 394 | return true; | ||
| 395 | } | ||
| 396 | |||
| 397 | static int mmhub_v1_0_wait_for_idle(void *handle) | ||
| 398 | { | ||
| 399 | return 0; | ||
| 400 | } | ||
| 401 | |||
| 402 | static int mmhub_v1_0_soft_reset(void *handle) | ||
| 403 | { | ||
| 404 | return 0; | ||
| 405 | } | 576 | } |
| 406 | 577 | ||
| 407 | static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, | 578 | static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, |
| 408 | bool enable) | 579 | bool enable) |
| 409 | { | 580 | { |
| 410 | uint32_t def, data, def1, data1, def2, data2; | 581 | uint32_t def, data, def1, data1, def2 = 0, data2 = 0; |
| 411 | 582 | ||
| 412 | def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); | 583 | def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); |
| 413 | def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2)); | 584 | |
| 414 | def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2)); | 585 | if (adev->asic_type != CHIP_RAVEN) { |
| 586 | def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); | ||
| 587 | def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2); | ||
| 588 | } else | ||
| 589 | def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV); | ||
| 415 | 590 | ||
| 416 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { | 591 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { |
| 417 | data |= ATC_L2_MISC_CG__ENABLE_MASK; | 592 | data |= ATC_L2_MISC_CG__ENABLE_MASK; |
| @@ -423,12 +598,13 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad | |||
| 423 | DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | | 598 | DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | |
| 424 | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); | 599 | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); |
| 425 | 600 | ||
| 426 | data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | | 601 | if (adev->asic_type != CHIP_RAVEN) |
| 427 | DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | | 602 | data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | |
| 428 | DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | | 603 | DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | |
| 429 | DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | | 604 | DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | |
| 430 | DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | | 605 | DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | |
| 431 | DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); | 606 | DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | |
| 607 | DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); | ||
| 432 | } else { | 608 | } else { |
| 433 | data &= ~ATC_L2_MISC_CG__ENABLE_MASK; | 609 | data &= ~ATC_L2_MISC_CG__ENABLE_MASK; |
| 434 | 610 | ||
| @@ -439,22 +615,27 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad | |||
| 439 | DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | | 615 | DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | |
| 440 | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); | 616 | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); |
| 441 | 617 | ||
| 442 | data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | | 618 | if (adev->asic_type != CHIP_RAVEN) |
| 443 | DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | | 619 | data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | |
| 444 | DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | | 620 | DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | |
| 445 | DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | | 621 | DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | |
| 446 | DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | | 622 | DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | |
| 447 | DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); | 623 | DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | |
| 624 | DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); | ||
| 448 | } | 625 | } |
| 449 | 626 | ||
| 450 | if (def != data) | 627 | if (def != data) |
| 451 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); | 628 | WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); |
| 452 | 629 | ||
| 453 | if (def1 != data1) | 630 | if (def1 != data1) { |
| 454 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1); | 631 | if (adev->asic_type != CHIP_RAVEN) |
| 632 | WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); | ||
| 633 | else | ||
| 634 | WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1); | ||
| 635 | } | ||
| 455 | 636 | ||
| 456 | if (def2 != data2) | 637 | if (adev->asic_type != CHIP_RAVEN && def2 != data2) |
| 457 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2); | 638 | WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); |
| 458 | } | 639 | } |
| 459 | 640 | ||
| 460 | static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, | 641 | static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, |
| @@ -462,7 +643,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, | |||
| 462 | { | 643 | { |
| 463 | uint32_t def, data; | 644 | uint32_t def, data; |
| 464 | 645 | ||
| 465 | def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); | 646 | def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); |
| 466 | 647 | ||
| 467 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) | 648 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) |
| 468 | data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; | 649 | data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; |
| @@ -470,7 +651,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, | |||
| 470 | data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; | 651 | data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; |
| 471 | 652 | ||
| 472 | if (def != data) | 653 | if (def != data) |
| 473 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); | 654 | WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); |
| 474 | } | 655 | } |
| 475 | 656 | ||
| 476 | static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, | 657 | static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, |
| @@ -478,7 +659,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade | |||
| 478 | { | 659 | { |
| 479 | uint32_t def, data; | 660 | uint32_t def, data; |
| 480 | 661 | ||
| 481 | def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); | 662 | def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); |
| 482 | 663 | ||
| 483 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) | 664 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) |
| 484 | data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; | 665 | data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; |
| @@ -486,7 +667,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade | |||
| 486 | data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; | 667 | data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; |
| 487 | 668 | ||
| 488 | if (def != data) | 669 | if (def != data) |
| 489 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); | 670 | WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); |
| 490 | } | 671 | } |
| 491 | 672 | ||
| 492 | static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, | 673 | static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, |
| @@ -494,7 +675,7 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, | |||
| 494 | { | 675 | { |
| 495 | uint32_t def, data; | 676 | uint32_t def, data; |
| 496 | 677 | ||
| 497 | def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); | 678 | def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); |
| 498 | 679 | ||
| 499 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && | 680 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && |
| 500 | (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) | 681 | (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) |
| @@ -503,19 +684,18 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, | |||
| 503 | data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; | 684 | data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; |
| 504 | 685 | ||
| 505 | if(def != data) | 686 | if(def != data) |
| 506 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); | 687 | WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); |
| 507 | } | 688 | } |
| 508 | 689 | ||
| 509 | static int mmhub_v1_0_set_clockgating_state(void *handle, | 690 | int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, |
| 510 | enum amd_clockgating_state state) | 691 | enum amd_clockgating_state state) |
| 511 | { | 692 | { |
| 512 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 513 | |||
| 514 | if (amdgpu_sriov_vf(adev)) | 693 | if (amdgpu_sriov_vf(adev)) |
| 515 | return 0; | 694 | return 0; |
| 516 | 695 | ||
| 517 | switch (adev->asic_type) { | 696 | switch (adev->asic_type) { |
| 518 | case CHIP_VEGA10: | 697 | case CHIP_VEGA10: |
| 698 | case CHIP_RAVEN: | ||
| 519 | mmhub_v1_0_update_medium_grain_clock_gating(adev, | 699 | mmhub_v1_0_update_medium_grain_clock_gating(adev, |
| 520 | state == AMD_CG_STATE_GATE ? true : false); | 700 | state == AMD_CG_STATE_GATE ? true : false); |
| 521 | athub_update_medium_grain_clock_gating(adev, | 701 | athub_update_medium_grain_clock_gating(adev, |
| @@ -532,54 +712,20 @@ static int mmhub_v1_0_set_clockgating_state(void *handle, | |||
| 532 | return 0; | 712 | return 0; |
| 533 | } | 713 | } |
| 534 | 714 | ||
| 535 | static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags) | 715 | void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) |
| 536 | { | 716 | { |
| 537 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 538 | int data; | 717 | int data; |
| 539 | 718 | ||
| 540 | if (amdgpu_sriov_vf(adev)) | 719 | if (amdgpu_sriov_vf(adev)) |
| 541 | *flags = 0; | 720 | *flags = 0; |
| 542 | 721 | ||
| 543 | /* AMD_CG_SUPPORT_MC_MGCG */ | 722 | /* AMD_CG_SUPPORT_MC_MGCG */ |
| 544 | data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); | 723 | data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); |
| 545 | if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) | 724 | if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) |
| 546 | *flags |= AMD_CG_SUPPORT_MC_MGCG; | 725 | *flags |= AMD_CG_SUPPORT_MC_MGCG; |
| 547 | 726 | ||
| 548 | /* AMD_CG_SUPPORT_MC_LS */ | 727 | /* AMD_CG_SUPPORT_MC_LS */ |
| 549 | data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); | 728 | data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); |
| 550 | if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) | 729 | if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) |
| 551 | *flags |= AMD_CG_SUPPORT_MC_LS; | 730 | *flags |= AMD_CG_SUPPORT_MC_LS; |
| 552 | } | 731 | } |
| 553 | |||
| 554 | static int mmhub_v1_0_set_powergating_state(void *handle, | ||
| 555 | enum amd_powergating_state state) | ||
| 556 | { | ||
| 557 | return 0; | ||
| 558 | } | ||
| 559 | |||
| 560 | const struct amd_ip_funcs mmhub_v1_0_ip_funcs = { | ||
| 561 | .name = "mmhub_v1_0", | ||
| 562 | .early_init = mmhub_v1_0_early_init, | ||
| 563 | .late_init = mmhub_v1_0_late_init, | ||
| 564 | .sw_init = mmhub_v1_0_sw_init, | ||
| 565 | .sw_fini = mmhub_v1_0_sw_fini, | ||
| 566 | .hw_init = mmhub_v1_0_hw_init, | ||
| 567 | .hw_fini = mmhub_v1_0_hw_fini, | ||
| 568 | .suspend = mmhub_v1_0_suspend, | ||
| 569 | .resume = mmhub_v1_0_resume, | ||
| 570 | .is_idle = mmhub_v1_0_is_idle, | ||
| 571 | .wait_for_idle = mmhub_v1_0_wait_for_idle, | ||
| 572 | .soft_reset = mmhub_v1_0_soft_reset, | ||
| 573 | .set_clockgating_state = mmhub_v1_0_set_clockgating_state, | ||
| 574 | .set_powergating_state = mmhub_v1_0_set_powergating_state, | ||
| 575 | .get_clockgating_state = mmhub_v1_0_get_clockgating_state, | ||
| 576 | }; | ||
| 577 | |||
| 578 | const struct amdgpu_ip_block_version mmhub_v1_0_ip_block = | ||
| 579 | { | ||
| 580 | .type = AMD_IP_BLOCK_TYPE_MMHUB, | ||
| 581 | .major = 1, | ||
| 582 | .minor = 0, | ||
| 583 | .rev = 0, | ||
| 584 | .funcs = &mmhub_v1_0_ip_funcs, | ||
| 585 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index aadedf99c028..57bb940c0ecd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h | |||
| @@ -28,6 +28,13 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); | |||
| 28 | void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); | 28 | void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); |
| 29 | void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, | 29 | void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, |
| 30 | bool value); | 30 | bool value); |
| 31 | void mmhub_v1_0_init(struct amdgpu_device *adev); | ||
| 32 | int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, | ||
| 33 | enum amd_clockgating_state state); | ||
| 34 | void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); | ||
| 35 | void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev); | ||
| 36 | void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, | ||
| 37 | bool enable); | ||
| 31 | 38 | ||
| 32 | extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; | 39 | extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; |
| 33 | extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; | 40 | extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 1493301b6a94..bde3ca3c21c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | |||
| @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) | |||
| 124 | r = -ETIME; | 124 | r = -ETIME; |
| 125 | break; | 125 | break; |
| 126 | } | 126 | } |
| 127 | msleep(1); | 127 | mdelay(5); |
| 128 | timeout -= 1; | 128 | timeout -= 5; |
| 129 | 129 | ||
| 130 | reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, | 130 | reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, |
| 131 | mmBIF_BX_PF0_MAILBOX_CONTROL)); | 131 | mmBIF_BX_PF0_MAILBOX_CONTROL)); |
| @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) | |||
| 141 | r = xgpu_ai_mailbox_rcv_msg(adev, event); | 141 | r = xgpu_ai_mailbox_rcv_msg(adev, event); |
| 142 | while (r) { | 142 | while (r) { |
| 143 | if (timeout <= 0) { | 143 | if (timeout <= 0) { |
| 144 | pr_err("Doesn't get ack from pf.\n"); | 144 | pr_err("Doesn't get msg:%d from pf.\n", event); |
| 145 | r = -ETIME; | 145 | r = -ETIME; |
| 146 | break; | 146 | break; |
| 147 | } | 147 | } |
| 148 | msleep(1); | 148 | mdelay(5); |
| 149 | timeout -= 1; | 149 | timeout -= 5; |
| 150 | 150 | ||
| 151 | r = xgpu_ai_mailbox_rcv_msg(adev, event); | 151 | r = xgpu_ai_mailbox_rcv_msg(adev, event); |
| 152 | } | 152 | } |
| @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, | |||
| 165 | /* start to poll ack */ | 165 | /* start to poll ack */ |
| 166 | r = xgpu_ai_poll_ack(adev); | 166 | r = xgpu_ai_poll_ack(adev); |
| 167 | if (r) | 167 | if (r) |
| 168 | return r; | 168 | pr_err("Doesn't get ack from pf, continue\n"); |
| 169 | 169 | ||
| 170 | xgpu_ai_mailbox_set_valid(adev, false); | 170 | xgpu_ai_mailbox_set_valid(adev, false); |
| 171 | 171 | ||
| @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, | |||
| 174 | req == IDH_REQ_GPU_FINI_ACCESS || | 174 | req == IDH_REQ_GPU_FINI_ACCESS || |
| 175 | req == IDH_REQ_GPU_RESET_ACCESS) { | 175 | req == IDH_REQ_GPU_RESET_ACCESS) { |
| 176 | r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); | 176 | r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); |
| 177 | if (r) | 177 | if (r) { |
| 178 | pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); | ||
| 178 | return r; | 179 | return r; |
| 180 | } | ||
| 179 | } | 181 | } |
| 180 | 182 | ||
| 181 | return 0; | 183 | return 0; |
| @@ -241,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) | |||
| 241 | } | 243 | } |
| 242 | 244 | ||
| 243 | /* Trigger recovery due to world switch failure */ | 245 | /* Trigger recovery due to world switch failure */ |
| 244 | amdgpu_sriov_gpu_reset(adev, false); | 246 | amdgpu_sriov_gpu_reset(adev, NULL); |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 247 | static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, | 249 | static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, |
| @@ -264,12 +266,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, | |||
| 264 | { | 266 | { |
| 265 | int r; | 267 | int r; |
| 266 | 268 | ||
| 267 | /* see what event we get */ | 269 | /* trigger gpu-reset by hypervisor only if TDR disbaled */ |
| 268 | r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); | 270 | if (amdgpu_lockup_timeout == 0) { |
| 271 | /* see what event we get */ | ||
| 272 | r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); | ||
| 269 | 273 | ||
| 270 | /* only handle FLR_NOTIFY now */ | 274 | /* only handle FLR_NOTIFY now */ |
| 271 | if (!r) | 275 | if (!r) |
| 272 | schedule_work(&adev->virt.flr_work); | 276 | schedule_work(&adev->virt.flr_work); |
| 277 | } | ||
| 273 | 278 | ||
| 274 | return 0; | 279 | return 0; |
| 275 | } | 280 | } |
| @@ -296,11 +301,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev) | |||
| 296 | { | 301 | { |
| 297 | int r; | 302 | int r; |
| 298 | 303 | ||
| 299 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); | 304 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); |
| 300 | if (r) | 305 | if (r) |
| 301 | return r; | 306 | return r; |
| 302 | 307 | ||
| 303 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); | 308 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); |
| 304 | if (r) { | 309 | if (r) { |
| 305 | amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); | 310 | amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); |
| 306 | return r; | 311 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 7bdc51b02326..171a658135b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | |||
| @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) | |||
| 398 | r = -ETIME; | 398 | r = -ETIME; |
| 399 | break; | 399 | break; |
| 400 | } | 400 | } |
| 401 | msleep(1); | 401 | mdelay(5); |
| 402 | timeout -= 1; | 402 | timeout -= 5; |
| 403 | 403 | ||
| 404 | reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); | 404 | reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); |
| 405 | } | 405 | } |
| @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event) | |||
| 418 | r = -ETIME; | 418 | r = -ETIME; |
| 419 | break; | 419 | break; |
| 420 | } | 420 | } |
| 421 | msleep(1); | 421 | mdelay(5); |
| 422 | timeout -= 1; | 422 | timeout -= 5; |
| 423 | 423 | ||
| 424 | r = xgpu_vi_mailbox_rcv_msg(adev, event); | 424 | r = xgpu_vi_mailbox_rcv_msg(adev, event); |
| 425 | } | 425 | } |
| @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, | |||
| 447 | request == IDH_REQ_GPU_RESET_ACCESS) { | 447 | request == IDH_REQ_GPU_RESET_ACCESS) { |
| 448 | r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); | 448 | r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); |
| 449 | if (r) | 449 | if (r) |
| 450 | return r; | 450 | pr_err("Doesn't get ack from pf, continue\n"); |
| 451 | } | 451 | } |
| 452 | 452 | ||
| 453 | return 0; | 453 | return 0; |
| @@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) | |||
| 514 | } | 514 | } |
| 515 | 515 | ||
| 516 | /* Trigger recovery due to world switch failure */ | 516 | /* Trigger recovery due to world switch failure */ |
| 517 | amdgpu_sriov_gpu_reset(adev, false); | 517 | amdgpu_sriov_gpu_reset(adev, NULL); |
| 518 | } | 518 | } |
| 519 | 519 | ||
| 520 | static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, | 520 | static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, |
| @@ -537,12 +537,15 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, | |||
| 537 | { | 537 | { |
| 538 | int r; | 538 | int r; |
| 539 | 539 | ||
| 540 | /* see what event we get */ | 540 | /* trigger gpu-reset by hypervisor only if TDR disbaled */ |
| 541 | r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); | 541 | if (amdgpu_lockup_timeout == 0) { |
| 542 | /* see what event we get */ | ||
| 543 | r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); | ||
| 542 | 544 | ||
| 543 | /* only handle FLR_NOTIFY now */ | 545 | /* only handle FLR_NOTIFY now */ |
| 544 | if (!r) | 546 | if (!r) |
| 545 | schedule_work(&adev->virt.flr_work); | 547 | schedule_work(&adev->virt.flr_work); |
| 548 | } | ||
| 546 | 549 | ||
| 547 | return 0; | 550 | return 0; |
| 548 | } | 551 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 97057f4a10de..1e272f785def 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | |||
| @@ -35,7 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) | 36 | u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) |
| 37 | { | 37 | { |
| 38 | u32 tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0)); | 38 | u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); |
| 39 | 39 | ||
| 40 | tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; | 40 | tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; |
| 41 | tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; | 41 | tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; |
| @@ -46,32 +46,33 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) | |||
| 46 | u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, | 46 | u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, |
| 47 | uint32_t idx) | 47 | uint32_t idx) |
| 48 | { | 48 | { |
| 49 | return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx); | 49 | return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, | 52 | void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, |
| 53 | uint32_t idx, uint32_t val) | 53 | uint32_t idx, uint32_t val) |
| 54 | { | 54 | { |
| 55 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx, val); | 55 | WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) | 58 | void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) |
| 59 | { | 59 | { |
| 60 | if (enable) | 60 | if (enable) |
| 61 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), | 61 | WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, |
| 62 | BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); | 62 | BIF_FB_EN__FB_READ_EN_MASK | |
| 63 | BIF_FB_EN__FB_WRITE_EN_MASK); | ||
| 63 | else | 64 | else |
| 64 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), 0); | 65 | WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) | 68 | void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) |
| 68 | { | 69 | { |
| 69 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); | 70 | WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); |
| 70 | } | 71 | } |
| 71 | 72 | ||
| 72 | u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) | 73 | u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) |
| 73 | { | 74 | { |
| 74 | return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE)); | 75 | return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE); |
| 75 | } | 76 | } |
| 76 | 77 | ||
| 77 | static const u32 nbio_sdma_doorbell_range_reg[] = | 78 | static const u32 nbio_sdma_doorbell_range_reg[] = |
| @@ -97,15 +98,7 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, | |||
| 97 | void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, | 98 | void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, |
| 98 | bool enable) | 99 | bool enable) |
| 99 | { | 100 | { |
| 100 | u32 tmp; | 101 | WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); |
| 101 | |||
| 102 | tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN)); | ||
| 103 | if (enable) | ||
| 104 | tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 1); | ||
| 105 | else | ||
| 106 | tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 0); | ||
| 107 | |||
| 108 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN), tmp); | ||
| 109 | } | 102 | } |
| 110 | 103 | ||
| 111 | void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, | 104 | void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, |
| @@ -115,23 +108,23 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, | |||
| 115 | 108 | ||
| 116 | if (enable) { | 109 | if (enable) { |
| 117 | tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | | 110 | tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | |
| 118 | REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | | 111 | REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | |
| 119 | REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); | 112 | REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); |
| 120 | 113 | ||
| 121 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW), | 114 | WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, |
| 122 | lower_32_bits(adev->doorbell.base)); | 115 | lower_32_bits(adev->doorbell.base)); |
| 123 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH), | 116 | WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, |
| 124 | upper_32_bits(adev->doorbell.base)); | 117 | upper_32_bits(adev->doorbell.base)); |
| 125 | } | 118 | } |
| 126 | 119 | ||
| 127 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL), tmp); | 120 | WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); |
| 128 | } | 121 | } |
| 129 | 122 | ||
| 130 | 123 | ||
| 131 | void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, | 124 | void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, |
| 132 | bool use_doorbell, int doorbell_index) | 125 | bool use_doorbell, int doorbell_index) |
| 133 | { | 126 | { |
| 134 | u32 ih_doorbell_range = RREG32(SOC15_REG_OFFSET(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE)); | 127 | u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); |
| 135 | 128 | ||
| 136 | if (use_doorbell) { | 129 | if (use_doorbell) { |
| 137 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); | 130 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); |
| @@ -139,7 +132,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, | |||
| 139 | } else | 132 | } else |
| 140 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); | 133 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); |
| 141 | 134 | ||
| 142 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_IH_DOORBELL_RANGE), ih_doorbell_range); | 135 | WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); |
| 143 | } | 136 | } |
| 144 | 137 | ||
| 145 | void nbio_v6_1_ih_control(struct amdgpu_device *adev) | 138 | void nbio_v6_1_ih_control(struct amdgpu_device *adev) |
| @@ -147,15 +140,15 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev) | |||
| 147 | u32 interrupt_cntl; | 140 | u32 interrupt_cntl; |
| 148 | 141 | ||
| 149 | /* setup interrupt control */ | 142 | /* setup interrupt control */ |
| 150 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL2), adev->dummy_page.addr >> 8); | 143 | WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); |
| 151 | interrupt_cntl = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL)); | 144 | interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); |
| 152 | /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi | 145 | /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi |
| 153 | * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN | 146 | * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN |
| 154 | */ | 147 | */ |
| 155 | interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); | 148 | interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); |
| 156 | /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ | 149 | /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ |
| 157 | interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); | 150 | interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); |
| 158 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL), interrupt_cntl); | 151 | WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); |
| 159 | } | 152 | } |
| 160 | 153 | ||
| 161 | void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, | 154 | void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, |
| @@ -251,8 +244,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) | |||
| 251 | { | 244 | { |
| 252 | uint32_t reg; | 245 | uint32_t reg; |
| 253 | 246 | ||
| 254 | reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, | 247 | reg = RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER); |
| 255 | mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER)); | ||
| 256 | if (reg & 1) | 248 | if (reg & 1) |
| 257 | adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; | 249 | adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; |
| 258 | 250 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c new file mode 100644 index 000000000000..aa04632523fa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | |||
| @@ -0,0 +1,212 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | #include "amdgpu.h" | ||
| 24 | #include "amdgpu_atombios.h" | ||
| 25 | #include "nbio_v7_0.h" | ||
| 26 | |||
| 27 | #include "vega10/soc15ip.h" | ||
| 28 | #include "raven1/NBIO/nbio_7_0_default.h" | ||
| 29 | #include "raven1/NBIO/nbio_7_0_offset.h" | ||
| 30 | #include "raven1/NBIO/nbio_7_0_sh_mask.h" | ||
| 31 | #include "vega10/vega10_enum.h" | ||
| 32 | |||
| 33 | #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c | ||
| 34 | |||
| 35 | u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) | ||
| 36 | { | ||
| 37 | u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); | ||
| 38 | |||
| 39 | tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; | ||
| 40 | tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; | ||
| 41 | |||
| 42 | return tmp; | ||
| 43 | } | ||
| 44 | |||
| 45 | u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, | ||
| 46 | uint32_t idx) | ||
| 47 | { | ||
| 48 | return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); | ||
| 49 | } | ||
| 50 | |||
| 51 | void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, | ||
| 52 | uint32_t idx, uint32_t val) | ||
| 53 | { | ||
| 54 | WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); | ||
| 55 | } | ||
| 56 | |||
| 57 | void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) | ||
| 58 | { | ||
| 59 | if (enable) | ||
| 60 | WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, | ||
| 61 | BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); | ||
| 62 | else | ||
| 63 | WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); | ||
| 64 | } | ||
| 65 | |||
| 66 | void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) | ||
| 67 | { | ||
| 68 | WREG32_SOC15(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); | ||
| 69 | } | ||
| 70 | |||
| 71 | u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) | ||
| 72 | { | ||
| 73 | return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); | ||
| 74 | } | ||
| 75 | |||
| 76 | static const u32 nbio_sdma_doorbell_range_reg[] = | ||
| 77 | { | ||
| 78 | SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), | ||
| 79 | SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) | ||
| 80 | }; | ||
| 81 | |||
| 82 | void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, | ||
| 83 | bool use_doorbell, int doorbell_index) | ||
| 84 | { | ||
| 85 | u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); | ||
| 86 | |||
| 87 | if (use_doorbell) { | ||
| 88 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); | ||
| 89 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); | ||
| 90 | } else | ||
| 91 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); | ||
| 92 | |||
| 93 | WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); | ||
| 94 | } | ||
| 95 | |||
| 96 | void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, | ||
| 97 | bool enable) | ||
| 98 | { | ||
| 99 | WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); | ||
| 100 | } | ||
| 101 | |||
| 102 | void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, | ||
| 103 | bool use_doorbell, int doorbell_index) | ||
| 104 | { | ||
| 105 | u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); | ||
| 106 | |||
| 107 | if (use_doorbell) { | ||
| 108 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); | ||
| 109 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); | ||
| 110 | } else | ||
| 111 | ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); | ||
| 112 | |||
| 113 | WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); | ||
| 114 | } | ||
| 115 | |||
| 116 | static uint32_t nbio_7_0_read_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset) | ||
| 117 | { | ||
| 118 | uint32_t data; | ||
| 119 | |||
| 120 | WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset); | ||
| 121 | data = RREG32_SOC15(NBIO, 0, mmSYSHUB_DATA); | ||
| 122 | |||
| 123 | return data; | ||
| 124 | } | ||
| 125 | |||
| 126 | static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset, | ||
| 127 | uint32_t data) | ||
| 128 | { | ||
| 129 | WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset); | ||
| 130 | WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data); | ||
| 131 | } | ||
| 132 | |||
| 133 | void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, | ||
| 134 | bool enable) | ||
| 135 | { | ||
| 136 | uint32_t def, data; | ||
| 137 | |||
| 138 | /* NBIF_MGCG_CTRL_LCLK */ | ||
| 139 | def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); | ||
| 140 | |||
| 141 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) | ||
| 142 | data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK; | ||
| 143 | else | ||
| 144 | data &= ~NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK; | ||
| 145 | |||
| 146 | if (def != data) | ||
| 147 | WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data); | ||
| 148 | |||
| 149 | /* SYSHUB_MGCG_CTRL_SOCCLK */ | ||
| 150 | def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK); | ||
| 151 | |||
| 152 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) | ||
| 153 | data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK; | ||
| 154 | else | ||
| 155 | data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK; | ||
| 156 | |||
| 157 | if (def != data) | ||
| 158 | nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK, data); | ||
| 159 | |||
| 160 | /* SYSHUB_MGCG_CTRL_SHUBCLK */ | ||
| 161 | def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK); | ||
| 162 | |||
| 163 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) | ||
| 164 | data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK; | ||
| 165 | else | ||
| 166 | data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK; | ||
| 167 | |||
| 168 | if (def != data) | ||
| 169 | nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data); | ||
| 170 | } | ||
| 171 | |||
| 172 | void nbio_v7_0_ih_control(struct amdgpu_device *adev) | ||
| 173 | { | ||
| 174 | u32 interrupt_cntl; | ||
| 175 | |||
| 176 | /* setup interrupt control */ | ||
| 177 | WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); | ||
| 178 | interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); | ||
| 179 | /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi | ||
| 180 | * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN | ||
| 181 | */ | ||
| 182 | interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); | ||
| 183 | /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ | ||
| 184 | interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); | ||
| 185 | WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); | ||
| 186 | } | ||
| 187 | |||
| 188 | struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; | ||
| 189 | struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; | ||
| 190 | |||
| 191 | int nbio_v7_0_init(struct amdgpu_device *adev) | ||
| 192 | { | ||
| 193 | nbio_v7_0_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); | ||
| 194 | nbio_v7_0_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); | ||
| 195 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK; | ||
| 196 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK; | ||
| 197 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK; | ||
| 198 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK; | ||
| 199 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK; | ||
| 200 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK; | ||
| 201 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK; | ||
| 202 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK; | ||
| 203 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK; | ||
| 204 | nbio_v7_0_hdp_flush_reg.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK; | ||
| 205 | nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK; | ||
| 206 | nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK; | ||
| 207 | |||
| 208 | nbio_v7_0_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); | ||
| 209 | nbio_v7_0_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); | ||
| 210 | |||
| 211 | return 0; | ||
| 212 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h new file mode 100644 index 000000000000..054ff49427e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #ifndef __NBIO_V7_0_H__ | ||
| 25 | #define __NBIO_V7_0_H__ | ||
| 26 | |||
| 27 | #include "soc15_common.h" | ||
| 28 | |||
| 29 | extern struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; | ||
| 30 | extern struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; | ||
| 31 | int nbio_v7_0_init(struct amdgpu_device *adev); | ||
| 32 | u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, | ||
| 33 | uint32_t idx); | ||
| 34 | void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, | ||
| 35 | uint32_t idx, uint32_t val); | ||
| 36 | void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable); | ||
| 37 | void nbio_v7_0_hdp_flush(struct amdgpu_device *adev); | ||
| 38 | u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev); | ||
| 39 | void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, | ||
| 40 | bool use_doorbell, int doorbell_index); | ||
| 41 | void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, | ||
| 42 | bool enable); | ||
| 43 | void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, | ||
| 44 | bool use_doorbell, int doorbell_index); | ||
| 45 | void nbio_v7_0_ih_control(struct amdgpu_device *adev); | ||
| 46 | u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev); | ||
| 47 | void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, | ||
| 48 | bool enable); | ||
| 49 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c new file mode 100644 index 000000000000..2258323a3c26 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | |||
| @@ -0,0 +1,308 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | * Author: Huang Rui | ||
| 23 | * | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/firmware.h> | ||
| 27 | #include "amdgpu.h" | ||
| 28 | #include "amdgpu_psp.h" | ||
| 29 | #include "amdgpu_ucode.h" | ||
| 30 | #include "soc15_common.h" | ||
| 31 | #include "psp_v10_0.h" | ||
| 32 | |||
| 33 | #include "vega10/soc15ip.h" | ||
| 34 | #include "raven1/MP/mp_10_0_offset.h" | ||
| 35 | #include "raven1/GC/gc_9_1_offset.h" | ||
| 36 | #include "raven1/SDMA0/sdma0_4_1_offset.h" | ||
| 37 | |||
| 38 | static int | ||
| 39 | psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) | ||
| 40 | { | ||
| 41 | switch(ucode->ucode_id) { | ||
| 42 | case AMDGPU_UCODE_ID_SDMA0: | ||
| 43 | *type = GFX_FW_TYPE_SDMA0; | ||
| 44 | break; | ||
| 45 | case AMDGPU_UCODE_ID_SDMA1: | ||
| 46 | *type = GFX_FW_TYPE_SDMA1; | ||
| 47 | break; | ||
| 48 | case AMDGPU_UCODE_ID_CP_CE: | ||
| 49 | *type = GFX_FW_TYPE_CP_CE; | ||
| 50 | break; | ||
| 51 | case AMDGPU_UCODE_ID_CP_PFP: | ||
| 52 | *type = GFX_FW_TYPE_CP_PFP; | ||
| 53 | break; | ||
| 54 | case AMDGPU_UCODE_ID_CP_ME: | ||
| 55 | *type = GFX_FW_TYPE_CP_ME; | ||
| 56 | break; | ||
| 57 | case AMDGPU_UCODE_ID_CP_MEC1: | ||
| 58 | *type = GFX_FW_TYPE_CP_MEC; | ||
| 59 | break; | ||
| 60 | case AMDGPU_UCODE_ID_CP_MEC1_JT: | ||
| 61 | *type = GFX_FW_TYPE_CP_MEC_ME1; | ||
| 62 | break; | ||
| 63 | case AMDGPU_UCODE_ID_CP_MEC2: | ||
| 64 | *type = GFX_FW_TYPE_CP_MEC; | ||
| 65 | break; | ||
| 66 | case AMDGPU_UCODE_ID_CP_MEC2_JT: | ||
| 67 | *type = GFX_FW_TYPE_CP_MEC_ME2; | ||
| 68 | break; | ||
| 69 | case AMDGPU_UCODE_ID_RLC_G: | ||
| 70 | *type = GFX_FW_TYPE_RLC_G; | ||
| 71 | break; | ||
| 72 | case AMDGPU_UCODE_ID_SMC: | ||
| 73 | *type = GFX_FW_TYPE_SMU; | ||
| 74 | break; | ||
| 75 | case AMDGPU_UCODE_ID_UVD: | ||
| 76 | *type = GFX_FW_TYPE_UVD; | ||
| 77 | break; | ||
| 78 | case AMDGPU_UCODE_ID_VCE: | ||
| 79 | *type = GFX_FW_TYPE_VCE; | ||
| 80 | break; | ||
| 81 | case AMDGPU_UCODE_ID_MAXIMUM: | ||
| 82 | default: | ||
| 83 | return -EINVAL; | ||
| 84 | } | ||
| 85 | |||
| 86 | return 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) | ||
| 90 | { | ||
| 91 | int ret; | ||
| 92 | uint64_t fw_mem_mc_addr = ucode->mc_addr; | ||
| 93 | struct common_firmware_header *header; | ||
| 94 | |||
| 95 | memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); | ||
| 96 | header = (struct common_firmware_header *)ucode->fw; | ||
| 97 | |||
| 98 | cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; | ||
| 99 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); | ||
| 100 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); | ||
| 101 | cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes); | ||
| 102 | |||
| 103 | ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); | ||
| 104 | if (ret) | ||
| 105 | DRM_ERROR("Unknown firmware type\n"); | ||
| 106 | |||
| 107 | return ret; | ||
| 108 | } | ||
| 109 | |||
| 110 | int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) | ||
| 111 | { | ||
| 112 | int ret = 0; | ||
| 113 | unsigned int psp_ring_reg = 0; | ||
| 114 | struct psp_ring *ring; | ||
| 115 | struct amdgpu_device *adev = psp->adev; | ||
| 116 | |||
| 117 | ring = &psp->km_ring; | ||
| 118 | |||
| 119 | ring->ring_type = ring_type; | ||
| 120 | |||
| 121 | /* allocate 4k Page of Local Frame Buffer memory for ring */ | ||
| 122 | ring->ring_size = 0x1000; | ||
| 123 | ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, | ||
| 124 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 125 | &adev->firmware.rbuf, | ||
| 126 | &ring->ring_mem_mc_addr, | ||
| 127 | (void **)&ring->ring_mem); | ||
| 128 | if (ret) { | ||
| 129 | ring->ring_size = 0; | ||
| 130 | return ret; | ||
| 131 | } | ||
| 132 | |||
| 133 | /* Write low address of the ring to C2PMSG_69 */ | ||
| 134 | psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); | ||
| 135 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); | ||
| 136 | /* Write high address of the ring to C2PMSG_70 */ | ||
| 137 | psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); | ||
| 138 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); | ||
| 139 | /* Write size of ring to C2PMSG_71 */ | ||
| 140 | psp_ring_reg = ring->ring_size; | ||
| 141 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); | ||
| 142 | /* Write the ring initialization command to C2PMSG_64 */ | ||
| 143 | psp_ring_reg = ring_type; | ||
| 144 | psp_ring_reg = psp_ring_reg << 16; | ||
| 145 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); | ||
| 146 | /* Wait for response flag (bit 31) in C2PMSG_64 */ | ||
| 147 | psp_ring_reg = 0; | ||
| 148 | while ((psp_ring_reg & 0x80000000) == 0) { | ||
| 149 | psp_ring_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64); | ||
| 150 | } | ||
| 151 | |||
| 152 | return 0; | ||
| 153 | } | ||
| 154 | |||
| 155 | int psp_v10_0_cmd_submit(struct psp_context *psp, | ||
| 156 | struct amdgpu_firmware_info *ucode, | ||
| 157 | uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, | ||
| 158 | int index) | ||
| 159 | { | ||
| 160 | unsigned int psp_write_ptr_reg = 0; | ||
| 161 | struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; | ||
| 162 | struct psp_ring *ring = &psp->km_ring; | ||
| 163 | struct amdgpu_device *adev = psp->adev; | ||
| 164 | |||
| 165 | /* KM (GPCOM) prepare write pointer */ | ||
| 166 | psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); | ||
| 167 | |||
| 168 | /* Update KM RB frame pointer to new frame */ | ||
| 169 | if ((psp_write_ptr_reg % ring->ring_size) == 0) | ||
| 170 | write_frame = ring->ring_mem; | ||
| 171 | else | ||
| 172 | write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4)); | ||
| 173 | |||
| 174 | /* Update KM RB frame */ | ||
| 175 | write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); | ||
| 176 | write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); | ||
| 177 | write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); | ||
| 178 | write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); | ||
| 179 | write_frame->fence_value = index; | ||
| 180 | |||
| 181 | /* Update the write Pointer in DWORDs */ | ||
| 182 | psp_write_ptr_reg += sizeof(struct psp_gfx_rb_frame) / 4; | ||
| 183 | psp_write_ptr_reg = (psp_write_ptr_reg >= ring->ring_size) ? 0 : psp_write_ptr_reg; | ||
| 184 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); | ||
| 185 | |||
| 186 | return 0; | ||
| 187 | } | ||
| 188 | |||
| 189 | static int | ||
| 190 | psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, | ||
| 191 | unsigned int *sram_data_reg_offset, | ||
| 192 | enum AMDGPU_UCODE_ID ucode_id) | ||
| 193 | { | ||
| 194 | int ret = 0; | ||
| 195 | |||
| 196 | switch(ucode_id) { | ||
| 197 | /* TODO: needs to confirm */ | ||
| 198 | #if 0 | ||
| 199 | case AMDGPU_UCODE_ID_SMC: | ||
| 200 | *sram_offset = 0; | ||
| 201 | *sram_addr_reg_offset = 0; | ||
| 202 | *sram_data_reg_offset = 0; | ||
| 203 | break; | ||
| 204 | #endif | ||
| 205 | |||
| 206 | case AMDGPU_UCODE_ID_CP_CE: | ||
| 207 | *sram_offset = 0x0; | ||
| 208 | *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); | ||
| 209 | *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); | ||
| 210 | break; | ||
| 211 | |||
| 212 | case AMDGPU_UCODE_ID_CP_PFP: | ||
| 213 | *sram_offset = 0x0; | ||
| 214 | *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); | ||
| 215 | *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); | ||
| 216 | break; | ||
| 217 | |||
| 218 | case AMDGPU_UCODE_ID_CP_ME: | ||
| 219 | *sram_offset = 0x0; | ||
| 220 | *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); | ||
| 221 | *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); | ||
| 222 | break; | ||
| 223 | |||
| 224 | case AMDGPU_UCODE_ID_CP_MEC1: | ||
| 225 | *sram_offset = 0x10000; | ||
| 226 | *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); | ||
| 227 | *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); | ||
| 228 | break; | ||
| 229 | |||
| 230 | case AMDGPU_UCODE_ID_CP_MEC2: | ||
| 231 | *sram_offset = 0x10000; | ||
| 232 | *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); | ||
| 233 | *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); | ||
| 234 | break; | ||
| 235 | |||
| 236 | case AMDGPU_UCODE_ID_RLC_G: | ||
| 237 | *sram_offset = 0x2000; | ||
| 238 | *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); | ||
| 239 | *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); | ||
| 240 | break; | ||
| 241 | |||
| 242 | case AMDGPU_UCODE_ID_SDMA0: | ||
| 243 | *sram_offset = 0x0; | ||
| 244 | *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); | ||
| 245 | *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); | ||
| 246 | break; | ||
| 247 | |||
| 248 | /* TODO: needs to confirm */ | ||
| 249 | #if 0 | ||
| 250 | case AMDGPU_UCODE_ID_SDMA1: | ||
| 251 | *sram_offset = ; | ||
| 252 | *sram_addr_reg_offset = ; | ||
| 253 | break; | ||
| 254 | |||
| 255 | case AMDGPU_UCODE_ID_UVD: | ||
| 256 | *sram_offset = ; | ||
| 257 | *sram_addr_reg_offset = ; | ||
| 258 | break; | ||
| 259 | |||
| 260 | case AMDGPU_UCODE_ID_VCE: | ||
| 261 | *sram_offset = ; | ||
| 262 | *sram_addr_reg_offset = ; | ||
| 263 | break; | ||
| 264 | #endif | ||
| 265 | |||
| 266 | case AMDGPU_UCODE_ID_MAXIMUM: | ||
| 267 | default: | ||
| 268 | ret = -EINVAL; | ||
| 269 | break; | ||
| 270 | } | ||
| 271 | |||
| 272 | return ret; | ||
| 273 | } | ||
| 274 | |||
| 275 | bool psp_v10_0_compare_sram_data(struct psp_context *psp, | ||
| 276 | struct amdgpu_firmware_info *ucode, | ||
| 277 | enum AMDGPU_UCODE_ID ucode_type) | ||
| 278 | { | ||
| 279 | int err = 0; | ||
| 280 | unsigned int fw_sram_reg_val = 0; | ||
| 281 | unsigned int fw_sram_addr_reg_offset = 0; | ||
| 282 | unsigned int fw_sram_data_reg_offset = 0; | ||
| 283 | unsigned int ucode_size; | ||
| 284 | uint32_t *ucode_mem = NULL; | ||
| 285 | struct amdgpu_device *adev = psp->adev; | ||
| 286 | |||
| 287 | err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, | ||
| 288 | &fw_sram_data_reg_offset, ucode_type); | ||
| 289 | if (err) | ||
| 290 | return false; | ||
| 291 | |||
| 292 | WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); | ||
| 293 | |||
| 294 | ucode_size = ucode->ucode_size; | ||
| 295 | ucode_mem = (uint32_t *)ucode->kaddr; | ||
| 296 | while (!ucode_size) { | ||
| 297 | fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); | ||
| 298 | |||
| 299 | if (*ucode_mem != fw_sram_reg_val) | ||
| 300 | return false; | ||
| 301 | |||
| 302 | ucode_mem++; | ||
| 303 | /* 4 bytes */ | ||
| 304 | ucode_size -= 4; | ||
| 305 | } | ||
| 306 | |||
| 307 | return true; | ||
| 308 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h new file mode 100644 index 000000000000..2022b7b7151e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | * Author: Huang Rui | ||
| 23 | * | ||
| 24 | */ | ||
| 25 | #ifndef __PSP_V10_0_H__ | ||
| 26 | #define __PSP_V10_0_H__ | ||
| 27 | |||
| 28 | #include "amdgpu_psp.h" | ||
| 29 | |||
| 30 | extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, | ||
| 31 | struct psp_gfx_cmd_resp *cmd); | ||
| 32 | extern int psp_v10_0_ring_init(struct psp_context *psp, | ||
| 33 | enum psp_ring_type ring_type); | ||
| 34 | extern int psp_v10_0_cmd_submit(struct psp_context *psp, | ||
| 35 | struct amdgpu_firmware_info *ucode, | ||
| 36 | uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, | ||
| 37 | int index); | ||
| 38 | extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, | ||
| 39 | struct amdgpu_firmware_info *ucode, | ||
| 40 | enum AMDGPU_UCODE_ID ucode_type); | ||
| 41 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 60a6407ba267..c98d77d0c8f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | #include <linux/firmware.h> | 26 | #include <linux/firmware.h> |
| 27 | #include "drmP.h" | 27 | #include <drm/drmP.h> |
| 28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
| 29 | #include "amdgpu_psp.h" | 29 | #include "amdgpu_psp.h" |
| 30 | #include "amdgpu_ucode.h" | 30 | #include "amdgpu_ucode.h" |
| @@ -172,7 +172,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) | |||
| 172 | /* Check sOS sign of life register to confirm sys driver and sOS | 172 | /* Check sOS sign of life register to confirm sys driver and sOS |
| 173 | * are already been loaded. | 173 | * are already been loaded. |
| 174 | */ | 174 | */ |
| 175 | sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); | 175 | sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); |
| 176 | if (sol_reg) | 176 | if (sol_reg) |
| 177 | return 0; | 177 | return 0; |
| 178 | 178 | ||
| @@ -188,10 +188,10 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) | |||
| 188 | memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); | 188 | memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); |
| 189 | 189 | ||
| 190 | /* Provide the sys driver to bootrom */ | 190 | /* Provide the sys driver to bootrom */ |
| 191 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), | 191 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, |
| 192 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); | 192 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); |
| 193 | psp_gfxdrv_command_reg = 1 << 16; | 193 | psp_gfxdrv_command_reg = 1 << 16; |
| 194 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), | 194 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, |
| 195 | psp_gfxdrv_command_reg); | 195 | psp_gfxdrv_command_reg); |
| 196 | 196 | ||
| 197 | /* there might be handshake issue with hardware which needs delay */ | 197 | /* there might be handshake issue with hardware which needs delay */ |
| @@ -213,7 +213,7 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) | |||
| 213 | /* Check sOS sign of life register to confirm sys driver and sOS | 213 | /* Check sOS sign of life register to confirm sys driver and sOS |
| 214 | * are already been loaded. | 214 | * are already been loaded. |
| 215 | */ | 215 | */ |
| 216 | sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); | 216 | sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); |
| 217 | if (sol_reg) | 217 | if (sol_reg) |
| 218 | return 0; | 218 | return 0; |
| 219 | 219 | ||
| @@ -229,17 +229,17 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) | |||
| 229 | memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); | 229 | memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); |
| 230 | 230 | ||
| 231 | /* Provide the PSP secure OS to bootrom */ | 231 | /* Provide the PSP secure OS to bootrom */ |
| 232 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), | 232 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, |
| 233 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); | 233 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); |
| 234 | psp_gfxdrv_command_reg = 2 << 16; | 234 | psp_gfxdrv_command_reg = 2 << 16; |
| 235 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), | 235 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, |
| 236 | psp_gfxdrv_command_reg); | 236 | psp_gfxdrv_command_reg); |
| 237 | 237 | ||
| 238 | /* there might be handshake issue with hardware which needs delay */ | 238 | /* there might be handshake issue with hardware which needs delay */ |
| 239 | mdelay(20); | 239 | mdelay(20); |
| 240 | #if 0 | 240 | #if 0 |
| 241 | ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), | 241 | ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), |
| 242 | RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), | 242 | RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), |
| 243 | 0, true); | 243 | 0, true); |
| 244 | #endif | 244 | #endif |
| 245 | 245 | ||
| @@ -254,8 +254,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd | |||
| 254 | memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); | 254 | memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); |
| 255 | 255 | ||
| 256 | cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; | 256 | cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; |
| 257 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr; | 257 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); |
| 258 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32); | 258 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); |
| 259 | cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; | 259 | cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; |
| 260 | 260 | ||
| 261 | ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); | 261 | ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); |
| @@ -299,17 +299,17 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) | |||
| 299 | 299 | ||
| 300 | /* Write low address of the ring to C2PMSG_69 */ | 300 | /* Write low address of the ring to C2PMSG_69 */ |
| 301 | psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); | 301 | psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); |
| 302 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); | 302 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); |
| 303 | /* Write high address of the ring to C2PMSG_70 */ | 303 | /* Write high address of the ring to C2PMSG_70 */ |
| 304 | psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); | 304 | psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); |
| 305 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_70), psp_ring_reg); | 305 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); |
| 306 | /* Write size of ring to C2PMSG_71 */ | 306 | /* Write size of ring to C2PMSG_71 */ |
| 307 | psp_ring_reg = ring->ring_size; | 307 | psp_ring_reg = ring->ring_size; |
| 308 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_71), psp_ring_reg); | 308 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); |
| 309 | /* Write the ring initialization command to C2PMSG_64 */ | 309 | /* Write the ring initialization command to C2PMSG_64 */ |
| 310 | psp_ring_reg = ring_type; | 310 | psp_ring_reg = ring_type; |
| 311 | psp_ring_reg = psp_ring_reg << 16; | 311 | psp_ring_reg = psp_ring_reg << 16; |
| 312 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); | 312 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); |
| 313 | 313 | ||
| 314 | /* there might be handshake issue with hardware which needs delay */ | 314 | /* there might be handshake issue with hardware which needs delay */ |
| 315 | mdelay(20); | 315 | mdelay(20); |
| @@ -332,7 +332,7 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) | |||
| 332 | 332 | ||
| 333 | /* Write the ring destroy command to C2PMSG_64 */ | 333 | /* Write the ring destroy command to C2PMSG_64 */ |
| 334 | psp_ring_reg = 3 << 16; | 334 | psp_ring_reg = 3 << 16; |
| 335 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); | 335 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); |
| 336 | 336 | ||
| 337 | /* there might be handshake issue with hardware which needs delay */ | 337 | /* there might be handshake issue with hardware which needs delay */ |
| 338 | mdelay(20); | 338 | mdelay(20); |
| @@ -361,7 +361,7 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, | |||
| 361 | uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; | 361 | uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; |
| 362 | 362 | ||
| 363 | /* KM (GPCOM) prepare write pointer */ | 363 | /* KM (GPCOM) prepare write pointer */ |
| 364 | psp_write_ptr_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67)); | 364 | psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); |
| 365 | 365 | ||
| 366 | /* Update KM RB frame pointer to new frame */ | 366 | /* Update KM RB frame pointer to new frame */ |
| 367 | /* write_frame ptr increments by size of rb_frame in bytes */ | 367 | /* write_frame ptr increments by size of rb_frame in bytes */ |
| @@ -375,15 +375,15 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, | |||
| 375 | memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); | 375 | memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); |
| 376 | 376 | ||
| 377 | /* Update KM RB frame */ | 377 | /* Update KM RB frame */ |
| 378 | write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32); | 378 | write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); |
| 379 | write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr); | 379 | write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); |
| 380 | write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32); | 380 | write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); |
| 381 | write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr); | 381 | write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); |
| 382 | write_frame->fence_value = index; | 382 | write_frame->fence_value = index; |
| 383 | 383 | ||
| 384 | /* Update the write Pointer in DWORDs */ | 384 | /* Update the write Pointer in DWORDs */ |
| 385 | psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; | 385 | psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; |
| 386 | WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67), psp_write_ptr_reg); | 386 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); |
| 387 | 387 | ||
| 388 | return 0; | 388 | return 0; |
| 389 | } | 389 | } |
| @@ -515,7 +515,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) | |||
| 515 | uint32_t reg; | 515 | uint32_t reg; |
| 516 | 516 | ||
| 517 | reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000; | 517 | reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000; |
| 518 | WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg); | 518 | WREG32_SOC15(NBIO, 0, mmPCIE_INDEX2, reg); |
| 519 | reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)); | 519 | reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2); |
| 520 | return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; | 520 | return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; |
| 521 | } | 521 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a69e5d4e1d2a..1d766ae98dc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
| @@ -643,8 +643,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
| 643 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); | 643 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); |
| 644 | 644 | ||
| 645 | /* Initialize the ring buffer's read and write pointers */ | 645 | /* Initialize the ring buffer's read and write pointers */ |
| 646 | ring->wptr = 0; | ||
| 646 | WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); | 647 | WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); |
| 647 | WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); | 648 | sdma_v3_0_ring_set_wptr(ring); |
| 648 | WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); | 649 | WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); |
| 649 | WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); | 650 | WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); |
| 650 | 651 | ||
| @@ -659,9 +660,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
| 659 | WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); | 660 | WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); |
| 660 | WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); | 661 | WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); |
| 661 | 662 | ||
| 662 | ring->wptr = 0; | ||
| 663 | WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); | ||
| 664 | |||
| 665 | doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); | 663 | doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); |
| 666 | 664 | ||
| 667 | if (ring->use_doorbell) { | 665 | if (ring->use_doorbell) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ecc70a730a54..4a65697ccc94 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include "vega10/MMHUB/mmhub_1_0_offset.h" | 35 | #include "vega10/MMHUB/mmhub_1_0_offset.h" |
| 36 | #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" | 36 | #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" |
| 37 | #include "vega10/HDP/hdp_4_0_offset.h" | 37 | #include "vega10/HDP/hdp_4_0_offset.h" |
| 38 | #include "raven1/SDMA0/sdma0_4_1_default.h" | ||
| 38 | 39 | ||
| 39 | #include "soc15_common.h" | 40 | #include "soc15_common.h" |
| 40 | #include "soc15.h" | 41 | #include "soc15.h" |
| @@ -42,6 +43,10 @@ | |||
| 42 | 43 | ||
| 43 | MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); | 44 | MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); |
| 44 | MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); | 45 | MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); |
| 46 | MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); | ||
| 47 | |||
| 48 | #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L | ||
| 49 | #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L | ||
| 45 | 50 | ||
| 46 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); | 51 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); |
| 47 | static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); | 52 | static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); |
| @@ -82,6 +87,26 @@ static const u32 golden_settings_sdma_vg10[] = { | |||
| 82 | SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 | 87 | SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 |
| 83 | }; | 88 | }; |
| 84 | 89 | ||
| 90 | static const u32 golden_settings_sdma_4_1[] = | ||
| 91 | { | ||
| 92 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, | ||
| 93 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, | ||
| 94 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, | ||
| 95 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, | ||
| 96 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051, | ||
| 97 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100, | ||
| 98 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, | ||
| 99 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100, | ||
| 100 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, | ||
| 101 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0 | ||
| 102 | }; | ||
| 103 | |||
| 104 | static const u32 golden_settings_sdma_rv1[] = | ||
| 105 | { | ||
| 106 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002, | ||
| 107 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002 | ||
| 108 | }; | ||
| 109 | |||
| 85 | static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) | 110 | static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) |
| 86 | { | 111 | { |
| 87 | u32 base = 0; | 112 | u32 base = 0; |
| @@ -112,25 +137,19 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 112 | golden_settings_sdma_vg10, | 137 | golden_settings_sdma_vg10, |
| 113 | (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); | 138 | (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); |
| 114 | break; | 139 | break; |
| 140 | case CHIP_RAVEN: | ||
| 141 | amdgpu_program_register_sequence(adev, | ||
| 142 | golden_settings_sdma_4_1, | ||
| 143 | (const u32)ARRAY_SIZE(golden_settings_sdma_4_1)); | ||
| 144 | amdgpu_program_register_sequence(adev, | ||
| 145 | golden_settings_sdma_rv1, | ||
| 146 | (const u32)ARRAY_SIZE(golden_settings_sdma_rv1)); | ||
| 147 | break; | ||
| 115 | default: | 148 | default: |
| 116 | break; | 149 | break; |
| 117 | } | 150 | } |
| 118 | } | 151 | } |
| 119 | 152 | ||
| 120 | static void sdma_v4_0_print_ucode_regs(void *handle) | ||
| 121 | { | ||
| 122 | int i; | ||
| 123 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 124 | |||
| 125 | dev_info(adev->dev, "VEGA10 SDMA ucode registers\n"); | ||
| 126 | for (i = 0; i < adev->sdma.num_instances; i++) { | ||
| 127 | dev_info(adev->dev, " SDMA%d_UCODE_ADDR=0x%08X\n", | ||
| 128 | i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR))); | ||
| 129 | dev_info(adev->dev, " SDMA%d_UCODE_CHECKSUM=0x%08X\n", | ||
| 130 | i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_CHECKSUM))); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | /** | 153 | /** |
| 135 | * sdma_v4_0_init_microcode - load ucode images from disk | 154 | * sdma_v4_0_init_microcode - load ucode images from disk |
| 136 | * | 155 | * |
| @@ -158,6 +177,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) | |||
| 158 | case CHIP_VEGA10: | 177 | case CHIP_VEGA10: |
| 159 | chip_name = "vega10"; | 178 | chip_name = "vega10"; |
| 160 | break; | 179 | break; |
| 180 | case CHIP_RAVEN: | ||
| 181 | chip_name = "raven"; | ||
| 182 | break; | ||
| 161 | default: | 183 | default: |
| 162 | BUG(); | 184 | BUG(); |
| 163 | } | 185 | } |
| @@ -350,7 +372,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | |||
| 350 | u32 ref_and_mask = 0; | 372 | u32 ref_and_mask = 0; |
| 351 | struct nbio_hdp_flush_reg *nbio_hf_reg; | 373 | struct nbio_hdp_flush_reg *nbio_hf_reg; |
| 352 | 374 | ||
| 353 | if (ring->adev->asic_type == CHIP_VEGA10) | 375 | if (ring->adev->flags & AMD_IS_APU) |
| 376 | nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; | ||
| 377 | else | ||
| 354 | nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; | 378 | nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; |
| 355 | 379 | ||
| 356 | if (ring == &ring->adev->sdma.instance[0].ring) | 380 | if (ring == &ring->adev->sdma.instance[0].ring) |
| @@ -581,7 +605,10 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) | |||
| 581 | } | 605 | } |
| 582 | WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); | 606 | WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); |
| 583 | WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); | 607 | WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); |
| 584 | nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); | 608 | if (adev->flags & AMD_IS_APU) |
| 609 | nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); | ||
| 610 | else | ||
| 611 | nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); | ||
| 585 | 612 | ||
| 586 | if (amdgpu_sriov_vf(adev)) | 613 | if (amdgpu_sriov_vf(adev)) |
| 587 | sdma_v4_0_ring_set_wptr(ring); | 614 | sdma_v4_0_ring_set_wptr(ring); |
| @@ -633,6 +660,69 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) | |||
| 633 | return 0; | 660 | return 0; |
| 634 | } | 661 | } |
| 635 | 662 | ||
| 663 | static void | ||
| 664 | sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable) | ||
| 665 | { | ||
| 666 | uint32_t def, data; | ||
| 667 | |||
| 668 | if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) { | ||
| 669 | /* disable idle interrupt */ | ||
| 670 | def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); | ||
| 671 | data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; | ||
| 672 | |||
| 673 | if (data != def) | ||
| 674 | WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); | ||
| 675 | } else { | ||
| 676 | /* disable idle interrupt */ | ||
| 677 | def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); | ||
| 678 | data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; | ||
| 679 | if (data != def) | ||
| 680 | WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); | ||
| 681 | } | ||
| 682 | } | ||
| 683 | |||
| 684 | static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev) | ||
| 685 | { | ||
| 686 | uint32_t def, data; | ||
| 687 | |||
| 688 | /* Enable HW based PG. */ | ||
| 689 | def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); | ||
| 690 | data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK; | ||
| 691 | if (data != def) | ||
| 692 | WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); | ||
| 693 | |||
| 694 | /* enable interrupt */ | ||
| 695 | def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); | ||
| 696 | data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; | ||
| 697 | if (data != def) | ||
| 698 | WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); | ||
| 699 | |||
| 700 | /* Configure hold time to filter in-valid power on/off request. Use default right now */ | ||
| 701 | def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); | ||
| 702 | data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK; | ||
| 703 | data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK); | ||
| 704 | /* Configure switch time for hysteresis purpose. Use default right now */ | ||
| 705 | data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK; | ||
| 706 | data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK); | ||
| 707 | if(data != def) | ||
| 708 | WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); | ||
| 709 | } | ||
| 710 | |||
| 711 | static void sdma_v4_0_init_pg(struct amdgpu_device *adev) | ||
| 712 | { | ||
| 713 | if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA)) | ||
| 714 | return; | ||
| 715 | |||
| 716 | switch (adev->asic_type) { | ||
| 717 | case CHIP_RAVEN: | ||
| 718 | sdma_v4_1_init_power_gating(adev); | ||
| 719 | sdma_v4_1_update_power_gating(adev, true); | ||
| 720 | break; | ||
| 721 | default: | ||
| 722 | break; | ||
| 723 | } | ||
| 724 | } | ||
| 725 | |||
| 636 | /** | 726 | /** |
| 637 | * sdma_v4_0_rlc_resume - setup and start the async dma engines | 727 | * sdma_v4_0_rlc_resume - setup and start the async dma engines |
| 638 | * | 728 | * |
| @@ -643,7 +733,8 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) | |||
| 643 | */ | 733 | */ |
| 644 | static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) | 734 | static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) |
| 645 | { | 735 | { |
| 646 | /* XXX todo */ | 736 | sdma_v4_0_init_pg(adev); |
| 737 | |||
| 647 | return 0; | 738 | return 0; |
| 648 | } | 739 | } |
| 649 | 740 | ||
| @@ -699,8 +790,6 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) | |||
| 699 | WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); | 790 | WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); |
| 700 | } | 791 | } |
| 701 | 792 | ||
| 702 | sdma_v4_0_print_ucode_regs(adev); | ||
| 703 | |||
| 704 | return 0; | 793 | return 0; |
| 705 | } | 794 | } |
| 706 | 795 | ||
| @@ -726,7 +815,6 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) | |||
| 726 | } | 815 | } |
| 727 | 816 | ||
| 728 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { | 817 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { |
| 729 | DRM_INFO("Loading via direct write\n"); | ||
| 730 | r = sdma_v4_0_load_microcode(adev); | 818 | r = sdma_v4_0_load_microcode(adev); |
| 731 | if (r) | 819 | if (r) |
| 732 | return r; | 820 | return r; |
| @@ -764,8 +852,6 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) | |||
| 764 | u32 tmp; | 852 | u32 tmp; |
| 765 | u64 gpu_addr; | 853 | u64 gpu_addr; |
| 766 | 854 | ||
| 767 | DRM_INFO("In Ring test func\n"); | ||
| 768 | |||
| 769 | r = amdgpu_wb_get(adev, &index); | 855 | r = amdgpu_wb_get(adev, &index); |
| 770 | if (r) { | 856 | if (r) { |
| 771 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); | 857 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); |
| @@ -1038,9 +1124,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
| 1038 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); | 1124 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); |
| 1039 | unsigned eng = ring->vm_inv_eng; | 1125 | unsigned eng = ring->vm_inv_eng; |
| 1040 | 1126 | ||
| 1041 | pd_addr = pd_addr | 0x1; /* valid bit */ | 1127 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); |
| 1042 | /* now only use physical base address of PDE and valid */ | 1128 | pd_addr |= AMDGPU_PTE_VALID; |
| 1043 | BUG_ON(pd_addr & 0xFFFF00000000003EULL); | ||
| 1044 | 1129 | ||
| 1045 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | | 1130 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | |
| 1046 | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); | 1131 | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); |
| @@ -1074,7 +1159,10 @@ static int sdma_v4_0_early_init(void *handle) | |||
| 1074 | { | 1159 | { |
| 1075 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1160 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1076 | 1161 | ||
| 1077 | adev->sdma.num_instances = 2; | 1162 | if (adev->asic_type == CHIP_RAVEN) |
| 1163 | adev->sdma.num_instances = 1; | ||
| 1164 | else | ||
| 1165 | adev->sdma.num_instances = 2; | ||
| 1078 | 1166 | ||
| 1079 | sdma_v4_0_set_ring_funcs(adev); | 1167 | sdma_v4_0_set_ring_funcs(adev); |
| 1080 | sdma_v4_0_set_buffer_funcs(adev); | 1168 | sdma_v4_0_set_buffer_funcs(adev); |
| @@ -1406,6 +1494,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, | |||
| 1406 | 1494 | ||
| 1407 | switch (adev->asic_type) { | 1495 | switch (adev->asic_type) { |
| 1408 | case CHIP_VEGA10: | 1496 | case CHIP_VEGA10: |
| 1497 | case CHIP_RAVEN: | ||
| 1409 | sdma_v4_0_update_medium_grain_clock_gating(adev, | 1498 | sdma_v4_0_update_medium_grain_clock_gating(adev, |
| 1410 | state == AMD_CG_STATE_GATE ? true : false); | 1499 | state == AMD_CG_STATE_GATE ? true : false); |
| 1411 | sdma_v4_0_update_medium_grain_light_sleep(adev, | 1500 | sdma_v4_0_update_medium_grain_light_sleep(adev, |
| @@ -1420,6 +1509,17 @@ static int sdma_v4_0_set_clockgating_state(void *handle, | |||
| 1420 | static int sdma_v4_0_set_powergating_state(void *handle, | 1509 | static int sdma_v4_0_set_powergating_state(void *handle, |
| 1421 | enum amd_powergating_state state) | 1510 | enum amd_powergating_state state) |
| 1422 | { | 1511 | { |
| 1512 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 1513 | |||
| 1514 | switch (adev->asic_type) { | ||
| 1515 | case CHIP_RAVEN: | ||
| 1516 | sdma_v4_1_update_power_gating(adev, | ||
| 1517 | state == AMD_PG_STATE_GATE ? true : false); | ||
| 1518 | break; | ||
| 1519 | default: | ||
| 1520 | break; | ||
| 1521 | } | ||
| 1522 | |||
| 1423 | return 0; | 1523 | return 0; |
| 1424 | } | 1524 | } |
| 1425 | 1525 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index c0b1aabf282f..f45fb0f022b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #include <linux/firmware.h> | 24 | #include <linux/firmware.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
| 27 | #include "drmP.h" | 27 | #include <drm/drmP.h> |
| 28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
| 29 | #include "amdgpu_atombios.h" | 29 | #include "amdgpu_atombios.h" |
| 30 | #include "amdgpu_ih.h" | 30 | #include "amdgpu_ih.h" |
| @@ -971,44 +971,44 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) | |||
| 971 | } | 971 | } |
| 972 | 972 | ||
| 973 | static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { | 973 | static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { |
| 974 | {GRBM_STATUS, false}, | 974 | {GRBM_STATUS}, |
| 975 | {GB_ADDR_CONFIG, false}, | 975 | {GB_ADDR_CONFIG}, |
| 976 | {MC_ARB_RAMCFG, false}, | 976 | {MC_ARB_RAMCFG}, |
| 977 | {GB_TILE_MODE0, false}, | 977 | {GB_TILE_MODE0}, |
| 978 | {GB_TILE_MODE1, false}, | 978 | {GB_TILE_MODE1}, |
| 979 | {GB_TILE_MODE2, false}, | 979 | {GB_TILE_MODE2}, |
| 980 | {GB_TILE_MODE3, false}, | 980 | {GB_TILE_MODE3}, |
| 981 | {GB_TILE_MODE4, false}, | 981 | {GB_TILE_MODE4}, |
| 982 | {GB_TILE_MODE5, false}, | 982 | {GB_TILE_MODE5}, |
| 983 | {GB_TILE_MODE6, false}, | 983 | {GB_TILE_MODE6}, |
| 984 | {GB_TILE_MODE7, false}, | 984 | {GB_TILE_MODE7}, |
| 985 | {GB_TILE_MODE8, false}, | 985 | {GB_TILE_MODE8}, |
| 986 | {GB_TILE_MODE9, false}, | 986 | {GB_TILE_MODE9}, |
| 987 | {GB_TILE_MODE10, false}, | 987 | {GB_TILE_MODE10}, |
| 988 | {GB_TILE_MODE11, false}, | 988 | {GB_TILE_MODE11}, |
| 989 | {GB_TILE_MODE12, false}, | 989 | {GB_TILE_MODE12}, |
| 990 | {GB_TILE_MODE13, false}, | 990 | {GB_TILE_MODE13}, |
| 991 | {GB_TILE_MODE14, false}, | 991 | {GB_TILE_MODE14}, |
| 992 | {GB_TILE_MODE15, false}, | 992 | {GB_TILE_MODE15}, |
| 993 | {GB_TILE_MODE16, false}, | 993 | {GB_TILE_MODE16}, |
| 994 | {GB_TILE_MODE17, false}, | 994 | {GB_TILE_MODE17}, |
| 995 | {GB_TILE_MODE18, false}, | 995 | {GB_TILE_MODE18}, |
| 996 | {GB_TILE_MODE19, false}, | 996 | {GB_TILE_MODE19}, |
| 997 | {GB_TILE_MODE20, false}, | 997 | {GB_TILE_MODE20}, |
| 998 | {GB_TILE_MODE21, false}, | 998 | {GB_TILE_MODE21}, |
| 999 | {GB_TILE_MODE22, false}, | 999 | {GB_TILE_MODE22}, |
| 1000 | {GB_TILE_MODE23, false}, | 1000 | {GB_TILE_MODE23}, |
| 1001 | {GB_TILE_MODE24, false}, | 1001 | {GB_TILE_MODE24}, |
| 1002 | {GB_TILE_MODE25, false}, | 1002 | {GB_TILE_MODE25}, |
| 1003 | {GB_TILE_MODE26, false}, | 1003 | {GB_TILE_MODE26}, |
| 1004 | {GB_TILE_MODE27, false}, | 1004 | {GB_TILE_MODE27}, |
| 1005 | {GB_TILE_MODE28, false}, | 1005 | {GB_TILE_MODE28}, |
| 1006 | {GB_TILE_MODE29, false}, | 1006 | {GB_TILE_MODE29}, |
| 1007 | {GB_TILE_MODE30, false}, | 1007 | {GB_TILE_MODE30}, |
| 1008 | {GB_TILE_MODE31, false}, | 1008 | {GB_TILE_MODE31}, |
| 1009 | {CC_RB_BACKEND_DISABLE, false, true}, | 1009 | {CC_RB_BACKEND_DISABLE, true}, |
| 1010 | {GC_USER_RB_BACKEND_DISABLE, false, true}, | 1010 | {GC_USER_RB_BACKEND_DISABLE, true}, |
| 1011 | {PA_SC_RASTER_CONFIG, false, true}, | 1011 | {PA_SC_RASTER_CONFIG, true}, |
| 1012 | }; | 1012 | }; |
| 1013 | 1013 | ||
| 1014 | static uint32_t si_get_register_value(struct amdgpu_device *adev, | 1014 | static uint32_t si_get_register_value(struct amdgpu_device *adev, |
| @@ -1093,13 +1093,13 @@ static int si_read_register(struct amdgpu_device *adev, u32 se_num, | |||
| 1093 | 1093 | ||
| 1094 | *value = 0; | 1094 | *value = 0; |
| 1095 | for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) { | 1095 | for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) { |
| 1096 | bool indexed = si_allowed_read_registers[i].grbm_indexed; | ||
| 1097 | |||
| 1096 | if (reg_offset != si_allowed_read_registers[i].reg_offset) | 1098 | if (reg_offset != si_allowed_read_registers[i].reg_offset) |
| 1097 | continue; | 1099 | continue; |
| 1098 | 1100 | ||
| 1099 | if (!si_allowed_read_registers[i].untouched) | 1101 | *value = si_get_register_value(adev, indexed, se_num, sh_num, |
| 1100 | *value = si_get_register_value(adev, | 1102 | reg_offset); |
| 1101 | si_allowed_read_registers[i].grbm_indexed, | ||
| 1102 | se_num, sh_num, reg_offset); | ||
| 1103 | return 0; | 1103 | return 0; |
| 1104 | } | 1104 | } |
| 1105 | return -EINVAL; | 1105 | return -EINVAL; |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 7c1c5d127281..a7ad8390981c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "amdgpu_pm.h" | 26 | #include "amdgpu_pm.h" |
| 27 | #include "amdgpu_dpm.h" | 27 | #include "amdgpu_dpm.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index e66084211c74..ce25e03a077d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_ih.h" | 25 | #include "amdgpu_ih.h" |
| 26 | #include "sid.h" | 26 | #include "sid.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c index 0726bc3b6f90..4a2fd8b61940 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/firmware.h> | 25 | #include <linux/firmware.h> |
| 26 | #include "drmP.h" | 26 | #include <drm/drmP.h> |
| 27 | #include "amdgpu.h" | 27 | #include "amdgpu.h" |
| 28 | #include "sid.h" | 28 | #include "sid.h" |
| 29 | #include "ppsmc.h" | 29 | #include "ppsmc.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6b55d451ae7f..a7341d88a320 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include <linux/firmware.h> | 23 | #include <linux/firmware.h> |
| 24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 26 | #include "drmP.h" | 26 | #include <drm/drmP.h> |
| 27 | #include "amdgpu.h" | 27 | #include "amdgpu.h" |
| 28 | #include "amdgpu_atomfirmware.h" | 28 | #include "amdgpu_atomfirmware.h" |
| 29 | #include "amdgpu_ih.h" | 29 | #include "amdgpu_ih.h" |
| @@ -57,6 +57,7 @@ | |||
| 57 | #include "sdma_v4_0.h" | 57 | #include "sdma_v4_0.h" |
| 58 | #include "uvd_v7_0.h" | 58 | #include "uvd_v7_0.h" |
| 59 | #include "vce_v4_0.h" | 59 | #include "vce_v4_0.h" |
| 60 | #include "vcn_v1_0.h" | ||
| 60 | #include "amdgpu_powerplay.h" | 61 | #include "amdgpu_powerplay.h" |
| 61 | #include "dce_virtual.h" | 62 | #include "dce_virtual.h" |
| 62 | #include "mxgpu_ai.h" | 63 | #include "mxgpu_ai.h" |
| @@ -104,10 +105,10 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) | |||
| 104 | u32 r; | 105 | u32 r; |
| 105 | struct nbio_pcie_index_data *nbio_pcie_id; | 106 | struct nbio_pcie_index_data *nbio_pcie_id; |
| 106 | 107 | ||
| 107 | if (adev->asic_type == CHIP_VEGA10) | 108 | if (adev->flags & AMD_IS_APU) |
| 108 | nbio_pcie_id = &nbio_v6_1_pcie_index_data; | 109 | nbio_pcie_id = &nbio_v7_0_pcie_index_data; |
| 109 | else | 110 | else |
| 110 | BUG(); | 111 | nbio_pcie_id = &nbio_v6_1_pcie_index_data; |
| 111 | 112 | ||
| 112 | address = nbio_pcie_id->index_offset; | 113 | address = nbio_pcie_id->index_offset; |
| 113 | data = nbio_pcie_id->data_offset; | 114 | data = nbio_pcie_id->data_offset; |
| @@ -125,10 +126,10 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) | |||
| 125 | unsigned long flags, address, data; | 126 | unsigned long flags, address, data; |
| 126 | struct nbio_pcie_index_data *nbio_pcie_id; | 127 | struct nbio_pcie_index_data *nbio_pcie_id; |
| 127 | 128 | ||
| 128 | if (adev->asic_type == CHIP_VEGA10) | 129 | if (adev->flags & AMD_IS_APU) |
| 129 | nbio_pcie_id = &nbio_v6_1_pcie_index_data; | 130 | nbio_pcie_id = &nbio_v7_0_pcie_index_data; |
| 130 | else | 131 | else |
| 131 | BUG(); | 132 | nbio_pcie_id = &nbio_v6_1_pcie_index_data; |
| 132 | 133 | ||
| 133 | address = nbio_pcie_id->index_offset; | 134 | address = nbio_pcie_id->index_offset; |
| 134 | data = nbio_pcie_id->data_offset; | 135 | data = nbio_pcie_id->data_offset; |
| @@ -199,13 +200,20 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) | |||
| 199 | 200 | ||
| 200 | static u32 soc15_get_config_memsize(struct amdgpu_device *adev) | 201 | static u32 soc15_get_config_memsize(struct amdgpu_device *adev) |
| 201 | { | 202 | { |
| 202 | return nbio_v6_1_get_memsize(adev); | 203 | if (adev->flags & AMD_IS_APU) |
| 204 | return nbio_v7_0_get_memsize(adev); | ||
| 205 | else | ||
| 206 | return nbio_v6_1_get_memsize(adev); | ||
| 203 | } | 207 | } |
| 204 | 208 | ||
| 205 | static const u32 vega10_golden_init[] = | 209 | static const u32 vega10_golden_init[] = |
| 206 | { | 210 | { |
| 207 | }; | 211 | }; |
| 208 | 212 | ||
| 213 | static const u32 raven_golden_init[] = | ||
| 214 | { | ||
| 215 | }; | ||
| 216 | |||
| 209 | static void soc15_init_golden_registers(struct amdgpu_device *adev) | 217 | static void soc15_init_golden_registers(struct amdgpu_device *adev) |
| 210 | { | 218 | { |
| 211 | /* Some of the registers might be dependent on GRBM_GFX_INDEX */ | 219 | /* Some of the registers might be dependent on GRBM_GFX_INDEX */ |
| @@ -217,6 +225,11 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) | |||
| 217 | vega10_golden_init, | 225 | vega10_golden_init, |
| 218 | (const u32)ARRAY_SIZE(vega10_golden_init)); | 226 | (const u32)ARRAY_SIZE(vega10_golden_init)); |
| 219 | break; | 227 | break; |
| 228 | case CHIP_RAVEN: | ||
| 229 | amdgpu_program_register_sequence(adev, | ||
| 230 | raven_golden_init, | ||
| 231 | (const u32)ARRAY_SIZE(raven_golden_init)); | ||
| 232 | break; | ||
| 220 | default: | 233 | default: |
| 221 | break; | 234 | break; |
| 222 | } | 235 | } |
| @@ -280,29 +293,25 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, | |||
| 280 | return true; | 293 | return true; |
| 281 | } | 294 | } |
| 282 | 295 | ||
| 283 | static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] = { | ||
| 284 | /* todo */ | ||
| 285 | }; | ||
| 286 | |||
| 287 | static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { | 296 | static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { |
| 288 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS), false}, | 297 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)}, |
| 289 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2), false}, | 298 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)}, |
| 290 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0), false}, | 299 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)}, |
| 291 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1), false}, | 300 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)}, |
| 292 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2), false}, | 301 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)}, |
| 293 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3), false}, | 302 | { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)}, |
| 294 | { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG), false}, | 303 | { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)}, |
| 295 | { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG), false}, | 304 | { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)}, |
| 296 | { SOC15_REG_OFFSET(GC, 0, mmCP_STAT), false}, | 305 | { SOC15_REG_OFFSET(GC, 0, mmCP_STAT)}, |
| 297 | { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1), false}, | 306 | { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)}, |
| 298 | { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2), false}, | 307 | { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)}, |
| 299 | { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3), false}, | 308 | { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)}, |
| 300 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT), false}, | 309 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)}, |
| 301 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1), false}, | 310 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)}, |
| 302 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS), false}, | 311 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)}, |
| 303 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1), false}, | 312 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)}, |
| 304 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS), false}, | 313 | { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)}, |
| 305 | { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), false}, | 314 | { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)}, |
| 306 | }; | 315 | }; |
| 307 | 316 | ||
| 308 | static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, | 317 | static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, |
| @@ -341,41 +350,16 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, | |||
| 341 | static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, | 350 | static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, |
| 342 | u32 sh_num, u32 reg_offset, u32 *value) | 351 | u32 sh_num, u32 reg_offset, u32 *value) |
| 343 | { | 352 | { |
| 344 | struct amdgpu_allowed_register_entry *asic_register_table = NULL; | 353 | uint32_t i; |
| 345 | struct amdgpu_allowed_register_entry *asic_register_entry; | ||
| 346 | uint32_t size, i; | ||
| 347 | 354 | ||
| 348 | *value = 0; | 355 | *value = 0; |
| 349 | switch (adev->asic_type) { | ||
| 350 | case CHIP_VEGA10: | ||
| 351 | asic_register_table = vega10_allowed_read_registers; | ||
| 352 | size = ARRAY_SIZE(vega10_allowed_read_registers); | ||
| 353 | break; | ||
| 354 | default: | ||
| 355 | return -EINVAL; | ||
| 356 | } | ||
| 357 | |||
| 358 | if (asic_register_table) { | ||
| 359 | for (i = 0; i < size; i++) { | ||
| 360 | asic_register_entry = asic_register_table + i; | ||
| 361 | if (reg_offset != asic_register_entry->reg_offset) | ||
| 362 | continue; | ||
| 363 | if (!asic_register_entry->untouched) | ||
| 364 | *value = soc15_get_register_value(adev, | ||
| 365 | asic_register_entry->grbm_indexed, | ||
| 366 | se_num, sh_num, reg_offset); | ||
| 367 | return 0; | ||
| 368 | } | ||
| 369 | } | ||
| 370 | |||
| 371 | for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { | 356 | for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { |
| 372 | if (reg_offset != soc15_allowed_read_registers[i].reg_offset) | 357 | if (reg_offset != soc15_allowed_read_registers[i].reg_offset) |
| 373 | continue; | 358 | continue; |
| 374 | 359 | ||
| 375 | if (!soc15_allowed_read_registers[i].untouched) | 360 | *value = soc15_get_register_value(adev, |
| 376 | *value = soc15_get_register_value(adev, | 361 | soc15_allowed_read_registers[i].grbm_indexed, |
| 377 | soc15_allowed_read_registers[i].grbm_indexed, | 362 | se_num, sh_num, reg_offset); |
| 378 | se_num, sh_num, reg_offset); | ||
| 379 | return 0; | 363 | return 0; |
| 380 | } | 364 | } |
| 381 | return -EINVAL; | 365 | return -EINVAL; |
| @@ -396,7 +380,10 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) | |||
| 396 | 380 | ||
| 397 | /* wait for asic to come out of reset */ | 381 | /* wait for asic to come out of reset */ |
| 398 | for (i = 0; i < adev->usec_timeout; i++) { | 382 | for (i = 0; i < adev->usec_timeout; i++) { |
| 399 | if (nbio_v6_1_get_memsize(adev) != 0xffffffff) | 383 | u32 memsize = (adev->flags & AMD_IS_APU) ? |
| 384 | nbio_v7_0_get_memsize(adev) : | ||
| 385 | nbio_v6_1_get_memsize(adev); | ||
| 386 | if (memsize != 0xffffffff) | ||
| 400 | break; | 387 | break; |
| 401 | udelay(1); | 388 | udelay(1); |
| 402 | } | 389 | } |
| @@ -470,8 +457,12 @@ static void soc15_program_aspm(struct amdgpu_device *adev) | |||
| 470 | static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, | 457 | static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, |
| 471 | bool enable) | 458 | bool enable) |
| 472 | { | 459 | { |
| 473 | nbio_v6_1_enable_doorbell_aperture(adev, enable); | 460 | if (adev->flags & AMD_IS_APU) { |
| 474 | nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); | 461 | nbio_v7_0_enable_doorbell_aperture(adev, enable); |
| 462 | } else { | ||
| 463 | nbio_v6_1_enable_doorbell_aperture(adev, enable); | ||
| 464 | nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); | ||
| 465 | } | ||
| 475 | } | 466 | } |
| 476 | 467 | ||
| 477 | static const struct amdgpu_ip_block_version vega10_common_ip_block = | 468 | static const struct amdgpu_ip_block_version vega10_common_ip_block = |
| @@ -493,8 +484,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) | |||
| 493 | switch (adev->asic_type) { | 484 | switch (adev->asic_type) { |
| 494 | case CHIP_VEGA10: | 485 | case CHIP_VEGA10: |
| 495 | amdgpu_ip_block_add(adev, &vega10_common_ip_block); | 486 | amdgpu_ip_block_add(adev, &vega10_common_ip_block); |
| 496 | amdgpu_ip_block_add(adev, &gfxhub_v1_0_ip_block); | ||
| 497 | amdgpu_ip_block_add(adev, &mmhub_v1_0_ip_block); | ||
| 498 | amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); | 487 | amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); |
| 499 | amdgpu_ip_block_add(adev, &vega10_ih_ip_block); | 488 | amdgpu_ip_block_add(adev, &vega10_ih_ip_block); |
| 500 | if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) | 489 | if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) |
| @@ -508,6 +497,18 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) | |||
| 508 | amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); | 497 | amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); |
| 509 | amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); | 498 | amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); |
| 510 | break; | 499 | break; |
| 500 | case CHIP_RAVEN: | ||
| 501 | amdgpu_ip_block_add(adev, &vega10_common_ip_block); | ||
| 502 | amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); | ||
| 503 | amdgpu_ip_block_add(adev, &vega10_ih_ip_block); | ||
| 504 | amdgpu_ip_block_add(adev, &psp_v10_0_ip_block); | ||
| 505 | amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); | ||
| 506 | if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) | ||
| 507 | amdgpu_ip_block_add(adev, &dce_virtual_ip_block); | ||
| 508 | amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); | ||
| 509 | amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); | ||
| 510 | amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block); | ||
| 511 | break; | ||
| 511 | default: | 512 | default: |
| 512 | return -EINVAL; | 513 | return -EINVAL; |
| 513 | } | 514 | } |
| @@ -517,7 +518,10 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) | |||
| 517 | 518 | ||
| 518 | static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) | 519 | static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) |
| 519 | { | 520 | { |
| 520 | return nbio_v6_1_get_rev_id(adev); | 521 | if (adev->flags & AMD_IS_APU) |
| 522 | return nbio_v7_0_get_rev_id(adev); | ||
| 523 | else | ||
| 524 | return nbio_v6_1_get_rev_id(adev); | ||
| 521 | } | 525 | } |
| 522 | 526 | ||
| 523 | 527 | ||
| @@ -560,11 +564,6 @@ static int soc15_common_early_init(void *handle) | |||
| 560 | (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) | 564 | (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) |
| 561 | psp_enabled = true; | 565 | psp_enabled = true; |
| 562 | 566 | ||
| 563 | if (amdgpu_sriov_vf(adev)) { | ||
| 564 | amdgpu_virt_init_setting(adev); | ||
| 565 | xgpu_ai_mailbox_set_irq_funcs(adev); | ||
| 566 | } | ||
| 567 | |||
| 568 | /* | 567 | /* |
| 569 | * nbio need be used for both sdma and gfx9, but only | 568 | * nbio need be used for both sdma and gfx9, but only |
| 570 | * initializes once | 569 | * initializes once |
| @@ -573,6 +572,9 @@ static int soc15_common_early_init(void *handle) | |||
| 573 | case CHIP_VEGA10: | 572 | case CHIP_VEGA10: |
| 574 | nbio_v6_1_init(adev); | 573 | nbio_v6_1_init(adev); |
| 575 | break; | 574 | break; |
| 575 | case CHIP_RAVEN: | ||
| 576 | nbio_v7_0_init(adev); | ||
| 577 | break; | ||
| 576 | default: | 578 | default: |
| 577 | return -EINVAL; | 579 | return -EINVAL; |
| 578 | } | 580 | } |
| @@ -603,11 +605,40 @@ static int soc15_common_early_init(void *handle) | |||
| 603 | adev->pg_flags = 0; | 605 | adev->pg_flags = 0; |
| 604 | adev->external_rev_id = 0x1; | 606 | adev->external_rev_id = 0x1; |
| 605 | break; | 607 | break; |
| 608 | case CHIP_RAVEN: | ||
| 609 | adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | | ||
| 610 | AMD_CG_SUPPORT_GFX_MGLS | | ||
| 611 | AMD_CG_SUPPORT_GFX_RLC_LS | | ||
| 612 | AMD_CG_SUPPORT_GFX_CP_LS | | ||
| 613 | AMD_CG_SUPPORT_GFX_3D_CGCG | | ||
| 614 | AMD_CG_SUPPORT_GFX_3D_CGLS | | ||
| 615 | AMD_CG_SUPPORT_GFX_CGCG | | ||
| 616 | AMD_CG_SUPPORT_GFX_CGLS | | ||
| 617 | AMD_CG_SUPPORT_BIF_MGCG | | ||
| 618 | AMD_CG_SUPPORT_BIF_LS | | ||
| 619 | AMD_CG_SUPPORT_HDP_MGCG | | ||
| 620 | AMD_CG_SUPPORT_HDP_LS | | ||
| 621 | AMD_CG_SUPPORT_DRM_MGCG | | ||
| 622 | AMD_CG_SUPPORT_DRM_LS | | ||
| 623 | AMD_CG_SUPPORT_ROM_MGCG | | ||
| 624 | AMD_CG_SUPPORT_MC_MGCG | | ||
| 625 | AMD_CG_SUPPORT_MC_LS | | ||
| 626 | AMD_CG_SUPPORT_SDMA_MGCG | | ||
| 627 | AMD_CG_SUPPORT_SDMA_LS; | ||
| 628 | adev->pg_flags = AMD_PG_SUPPORT_SDMA | | ||
| 629 | AMD_PG_SUPPORT_MMHUB; | ||
| 630 | adev->external_rev_id = 0x1; | ||
| 631 | break; | ||
| 606 | default: | 632 | default: |
| 607 | /* FIXME: not supported yet */ | 633 | /* FIXME: not supported yet */ |
| 608 | return -EINVAL; | 634 | return -EINVAL; |
| 609 | } | 635 | } |
| 610 | 636 | ||
| 637 | if (amdgpu_sriov_vf(adev)) { | ||
| 638 | amdgpu_virt_init_setting(adev); | ||
| 639 | xgpu_ai_mailbox_set_irq_funcs(adev); | ||
| 640 | } | ||
| 641 | |||
| 611 | adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); | 642 | adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); |
| 612 | 643 | ||
| 613 | amdgpu_get_pcie_info(adev); | 644 | amdgpu_get_pcie_info(adev); |
| @@ -825,6 +856,20 @@ static int soc15_common_set_clockgating_state(void *handle, | |||
| 825 | soc15_update_df_medium_grain_clock_gating(adev, | 856 | soc15_update_df_medium_grain_clock_gating(adev, |
| 826 | state == AMD_CG_STATE_GATE ? true : false); | 857 | state == AMD_CG_STATE_GATE ? true : false); |
| 827 | break; | 858 | break; |
| 859 | case CHIP_RAVEN: | ||
| 860 | nbio_v7_0_update_medium_grain_clock_gating(adev, | ||
| 861 | state == AMD_CG_STATE_GATE ? true : false); | ||
| 862 | nbio_v6_1_update_medium_grain_light_sleep(adev, | ||
| 863 | state == AMD_CG_STATE_GATE ? true : false); | ||
| 864 | soc15_update_hdp_light_sleep(adev, | ||
| 865 | state == AMD_CG_STATE_GATE ? true : false); | ||
| 866 | soc15_update_drm_clock_gating(adev, | ||
| 867 | state == AMD_CG_STATE_GATE ? true : false); | ||
| 868 | soc15_update_drm_light_sleep(adev, | ||
| 869 | state == AMD_CG_STATE_GATE ? true : false); | ||
| 870 | soc15_update_rom_medium_grain_clock_gating(adev, | ||
| 871 | state == AMD_CG_STATE_GATE ? true : false); | ||
| 872 | break; | ||
| 828 | default: | 873 | default: |
| 829 | break; | 874 | break; |
| 830 | } | 875 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 378a46da585a..acb3cdb119f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #define __SOC15_H__ | 25 | #define __SOC15_H__ |
| 26 | 26 | ||
| 27 | #include "nbio_v6_1.h" | 27 | #include "nbio_v6_1.h" |
| 28 | #include "nbio_v7_0.h" | ||
| 28 | 29 | ||
| 29 | extern const struct amd_ip_funcs soc15_common_ip_funcs; | 30 | extern const struct amd_ip_funcs soc15_common_ip_funcs; |
| 30 | 31 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index e8df6d820dbe..e2d330eed952 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h | |||
| @@ -63,6 +63,13 @@ struct nbio_pcie_index_data { | |||
| 63 | (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ | 63 | (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ |
| 64 | (ip##_BASE__INST##inst##_SEG4 + reg)))))) | 64 | (ip##_BASE__INST##inst##_SEG4 + reg)))))) |
| 65 | 65 | ||
| 66 | #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ | ||
| 67 | RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ | ||
| 68 | (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ | ||
| 69 | (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ | ||
| 70 | (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ | ||
| 71 | (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset) | ||
| 72 | |||
| 66 | #define WREG32_SOC15(ip, inst, reg, value) \ | 73 | #define WREG32_SOC15(ip, inst, reg, value) \ |
| 67 | WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ | 74 | WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ |
| 68 | (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ | 75 | (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ |
| @@ -70,6 +77,13 @@ struct nbio_pcie_index_data { | |||
| 70 | (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ | 77 | (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ |
| 71 | (ip##_BASE__INST##inst##_SEG4 + reg))))), value) | 78 | (ip##_BASE__INST##inst##_SEG4 + reg))))), value) |
| 72 | 79 | ||
| 80 | #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ | ||
| 81 | WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ | ||
| 82 | (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ | ||
| 83 | (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ | ||
| 84 | (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ | ||
| 85 | (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value) | ||
| 86 | |||
| 73 | #endif | 87 | #endif |
| 74 | 88 | ||
| 75 | 89 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 75403c7c8c9e..e79befd80eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h | |||
| @@ -132,6 +132,7 @@ | |||
| 132 | * 1 - pfp | 132 | * 1 - pfp |
| 133 | */ | 133 | */ |
| 134 | #define PACKET3_INDIRECT_BUFFER 0x3F | 134 | #define PACKET3_INDIRECT_BUFFER 0x3F |
| 135 | #define INDIRECT_BUFFER_VALID (1 << 23) | ||
| 135 | #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) | 136 | #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) |
| 136 | /* 0 - LRU | 137 | /* 0 - LRU |
| 137 | * 1 - Stream | 138 | * 1 - Stream |
| @@ -259,8 +260,97 @@ | |||
| 259 | #define PACKET3_WAIT_ON_CE_COUNTER 0x86 | 260 | #define PACKET3_WAIT_ON_CE_COUNTER 0x86 |
| 260 | #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 | 261 | #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 |
| 261 | #define PACKET3_SWITCH_BUFFER 0x8B | 262 | #define PACKET3_SWITCH_BUFFER 0x8B |
| 263 | #define PACKET3_FRAME_CONTROL 0x90 | ||
| 264 | # define FRAME_CMD(x) ((x) << 28) | ||
| 265 | /* | ||
| 266 | * x=0: tmz_begin | ||
| 267 | * x=1: tmz_end | ||
| 268 | */ | ||
| 269 | |||
| 262 | #define PACKET3_SET_RESOURCES 0xA0 | 270 | #define PACKET3_SET_RESOURCES 0xA0 |
| 271 | /* 1. header | ||
| 272 | * 2. CONTROL | ||
| 273 | * 3. QUEUE_MASK_LO [31:0] | ||
| 274 | * 4. QUEUE_MASK_HI [31:0] | ||
| 275 | * 5. GWS_MASK_LO [31:0] | ||
| 276 | * 6. GWS_MASK_HI [31:0] | ||
| 277 | * 7. OAC_MASK [15:0] | ||
| 278 | * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] | ||
| 279 | */ | ||
| 280 | # define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) | ||
| 281 | # define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) | ||
| 282 | # define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) | ||
| 263 | #define PACKET3_MAP_QUEUES 0xA2 | 283 | #define PACKET3_MAP_QUEUES 0xA2 |
| 284 | /* 1. header | ||
| 285 | * 2. CONTROL | ||
| 286 | * 3. CONTROL2 | ||
| 287 | * 4. MQD_ADDR_LO [31:0] | ||
| 288 | * 5. MQD_ADDR_HI [31:0] | ||
| 289 | * 6. WPTR_ADDR_LO [31:0] | ||
| 290 | * 7. WPTR_ADDR_HI [31:0] | ||
| 291 | */ | ||
| 292 | /* CONTROL */ | ||
| 293 | # define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) | ||
| 294 | # define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) | ||
| 295 | # define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13) | ||
| 296 | # define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) | ||
| 297 | # define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) | ||
| 298 | # define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) | ||
| 299 | # define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) | ||
| 300 | # define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) | ||
| 301 | # define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) | ||
| 302 | /* CONTROL2 */ | ||
| 303 | # define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) | ||
| 304 | # define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) | ||
| 305 | #define PACKET3_UNMAP_QUEUES 0xA3 | ||
| 306 | /* 1. header | ||
| 307 | * 2. CONTROL | ||
| 308 | * 3. CONTROL2 | ||
| 309 | * 4. CONTROL3 | ||
| 310 | * 5. CONTROL4 | ||
| 311 | * 6. CONTROL5 | ||
| 312 | */ | ||
| 313 | /* CONTROL */ | ||
| 314 | # define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) | ||
| 315 | /* 0 - PREEMPT_QUEUES | ||
| 316 | * 1 - RESET_QUEUES | ||
| 317 | * 2 - DISABLE_PROCESS_QUEUES | ||
| 318 | * 3 - PREEMPT_QUEUES_NO_UNMAP | ||
| 319 | */ | ||
| 320 | # define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) | ||
| 321 | # define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) | ||
| 322 | # define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) | ||
| 323 | /* CONTROL2a */ | ||
| 324 | # define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) | ||
| 325 | /* CONTROL2b */ | ||
| 326 | # define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) | ||
| 327 | /* CONTROL3a */ | ||
| 328 | # define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) | ||
| 329 | /* CONTROL3b */ | ||
| 330 | # define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) | ||
| 331 | /* CONTROL4 */ | ||
| 332 | # define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) | ||
| 333 | /* CONTROL5 */ | ||
| 334 | # define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) | ||
| 335 | #define PACKET3_QUERY_STATUS 0xA4 | ||
| 336 | /* 1. header | ||
| 337 | * 2. CONTROL | ||
| 338 | * 3. CONTROL2 | ||
| 339 | * 4. ADDR_LO [31:0] | ||
| 340 | * 5. ADDR_HI [31:0] | ||
| 341 | * 6. DATA_LO [31:0] | ||
| 342 | * 7. DATA_HI [31:0] | ||
| 343 | */ | ||
| 344 | /* CONTROL */ | ||
| 345 | # define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) | ||
| 346 | # define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) | ||
| 347 | # define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) | ||
| 348 | /* CONTROL2a */ | ||
| 349 | # define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) | ||
| 350 | /* CONTROL2b */ | ||
| 351 | # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) | ||
| 352 | # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) | ||
| 353 | |||
| 264 | 354 | ||
| 265 | #define VCE_CMD_NO_OP 0x00000000 | 355 | #define VCE_CMD_NO_OP 0x00000000 |
| 266 | #define VCE_CMD_END 0x00000001 | 356 | #define VCE_CMD_END 0x00000001 |
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 3a5097ac2bb4..923df2c0e535 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_ih.h" | 25 | #include "amdgpu_ih.h" |
| 26 | #include "vid.h" | 26 | #include "vid.h" |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eca8f6e01e97..987b958368ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | |||
| @@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) | |||
| 58 | { | 58 | { |
| 59 | struct amdgpu_device *adev = ring->adev; | 59 | struct amdgpu_device *adev = ring->adev; |
| 60 | 60 | ||
| 61 | return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); | 61 | return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | /** | 64 | /** |
| @@ -73,9 +73,9 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) | |||
| 73 | struct amdgpu_device *adev = ring->adev; | 73 | struct amdgpu_device *adev = ring->adev; |
| 74 | 74 | ||
| 75 | if (ring == &adev->uvd.ring_enc[0]) | 75 | if (ring == &adev->uvd.ring_enc[0]) |
| 76 | return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR)); | 76 | return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); |
| 77 | else | 77 | else |
| 78 | return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2)); | 78 | return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | /** | 81 | /** |
| @@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) | |||
| 89 | { | 89 | { |
| 90 | struct amdgpu_device *adev = ring->adev; | 90 | struct amdgpu_device *adev = ring->adev; |
| 91 | 91 | ||
| 92 | return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR)); | 92 | return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | /** | 95 | /** |
| @@ -107,9 +107,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) | |||
| 107 | return adev->wb.wb[ring->wptr_offs]; | 107 | return adev->wb.wb[ring->wptr_offs]; |
| 108 | 108 | ||
| 109 | if (ring == &adev->uvd.ring_enc[0]) | 109 | if (ring == &adev->uvd.ring_enc[0]) |
| 110 | return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); | 110 | return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); |
| 111 | else | 111 | else |
| 112 | return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2)); | 112 | return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | /** | 115 | /** |
| @@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 123 | { | 123 | { |
| 124 | struct amdgpu_device *adev = ring->adev; | 124 | struct amdgpu_device *adev = ring->adev; |
| 125 | 125 | ||
| 126 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), lower_32_bits(ring->wptr)); | 126 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | /** | 129 | /** |
| @@ -145,10 +145,10 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | if (ring == &adev->uvd.ring_enc[0]) | 147 | if (ring == &adev->uvd.ring_enc[0]) |
| 148 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), | 148 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, |
| 149 | lower_32_bits(ring->wptr)); | 149 | lower_32_bits(ring->wptr)); |
| 150 | else | 150 | else |
| 151 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), | 151 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, |
| 152 | lower_32_bits(ring->wptr)); | 152 | lower_32_bits(ring->wptr)); |
| 153 | } | 153 | } |
| 154 | 154 | ||
| @@ -562,7 +562,13 @@ static int uvd_v7_0_hw_fini(void *handle) | |||
| 562 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 562 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 563 | struct amdgpu_ring *ring = &adev->uvd.ring; | 563 | struct amdgpu_ring *ring = &adev->uvd.ring; |
| 564 | 564 | ||
| 565 | uvd_v7_0_stop(adev); | 565 | if (!amdgpu_sriov_vf(adev)) |
| 566 | uvd_v7_0_stop(adev); | ||
| 567 | else { | ||
| 568 | /* full access mode, so don't touch any UVD register */ | ||
| 569 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); | ||
| 570 | } | ||
| 571 | |||
| 566 | ring->ready = false; | 572 | ring->ready = false; |
| 567 | 573 | ||
| 568 | return 0; | 574 | return 0; |
| @@ -611,46 +617,46 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) | |||
| 611 | uint32_t offset; | 617 | uint32_t offset; |
| 612 | 618 | ||
| 613 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | 619 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 614 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), | 620 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
| 615 | lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); | 621 | lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); |
| 616 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), | 622 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
| 617 | upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); | 623 | upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); |
| 618 | offset = 0; | 624 | offset = 0; |
| 619 | } else { | 625 | } else { |
| 620 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), | 626 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
| 621 | lower_32_bits(adev->uvd.gpu_addr)); | 627 | lower_32_bits(adev->uvd.gpu_addr)); |
| 622 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), | 628 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
| 623 | upper_32_bits(adev->uvd.gpu_addr)); | 629 | upper_32_bits(adev->uvd.gpu_addr)); |
| 624 | offset = size; | 630 | offset = size; |
| 625 | } | 631 | } |
| 626 | 632 | ||
| 627 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), | 633 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, |
| 628 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); | 634 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); |
| 629 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); | 635 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); |
| 630 | 636 | ||
| 631 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), | 637 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, |
| 632 | lower_32_bits(adev->uvd.gpu_addr + offset)); | 638 | lower_32_bits(adev->uvd.gpu_addr + offset)); |
| 633 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), | 639 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, |
| 634 | upper_32_bits(adev->uvd.gpu_addr + offset)); | 640 | upper_32_bits(adev->uvd.gpu_addr + offset)); |
| 635 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); | 641 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); |
| 636 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); | 642 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); |
| 637 | 643 | ||
| 638 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), | 644 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, |
| 639 | lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | 645 | lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); |
| 640 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), | 646 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, |
| 641 | upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | 647 | upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); |
| 642 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); | 648 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); |
| 643 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), | 649 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, |
| 644 | AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); | 650 | AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); |
| 645 | 651 | ||
| 646 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), | 652 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, |
| 647 | adev->gfx.config.gb_addr_config); | 653 | adev->gfx.config.gb_addr_config); |
| 648 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), | 654 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, |
| 649 | adev->gfx.config.gb_addr_config); | 655 | adev->gfx.config.gb_addr_config); |
| 650 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), | 656 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, |
| 651 | adev->gfx.config.gb_addr_config); | 657 | adev->gfx.config.gb_addr_config); |
| 652 | 658 | ||
| 653 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); | 659 | WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); |
| 654 | } | 660 | } |
| 655 | 661 | ||
| 656 | static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, | 662 | static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, |
| @@ -664,29 +670,29 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, | |||
| 664 | size = header->header_size + header->vce_table_size + header->uvd_table_size; | 670 | size = header->header_size + header->vce_table_size + header->uvd_table_size; |
| 665 | 671 | ||
| 666 | /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ | 672 | /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ |
| 667 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); | 673 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); |
| 668 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); | 674 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); |
| 669 | 675 | ||
| 670 | /* 2, update vmid of descriptor */ | 676 | /* 2, update vmid of descriptor */ |
| 671 | data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); | 677 | data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID); |
| 672 | data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; | 678 | data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; |
| 673 | data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ | 679 | data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ |
| 674 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); | 680 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID, data); |
| 675 | 681 | ||
| 676 | /* 3, notify mmsch about the size of this descriptor */ | 682 | /* 3, notify mmsch about the size of this descriptor */ |
| 677 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); | 683 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE, size); |
| 678 | 684 | ||
| 679 | /* 4, set resp to zero */ | 685 | /* 4, set resp to zero */ |
| 680 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); | 686 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); |
| 681 | 687 | ||
| 682 | /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ | 688 | /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ |
| 683 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); | 689 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); |
| 684 | 690 | ||
| 685 | data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); | 691 | data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP); |
| 686 | loop = 1000; | 692 | loop = 1000; |
| 687 | while ((data & 0x10000002) != 0x10000002) { | 693 | while ((data & 0x10000002) != 0x10000002) { |
| 688 | udelay(10); | 694 | udelay(10); |
| 689 | data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); | 695 | data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP); |
| 690 | loop--; | 696 | loop--; |
| 691 | if (!loop) | 697 | if (!loop) |
| 692 | break; | 698 | break; |
| @@ -696,6 +702,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, | |||
| 696 | dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); | 702 | dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); |
| 697 | return -EBUSY; | 703 | return -EBUSY; |
| 698 | } | 704 | } |
| 705 | WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); | ||
| 699 | 706 | ||
| 700 | return 0; | 707 | return 0; |
| 701 | } | 708 | } |
| @@ -928,7 +935,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 928 | mdelay(1); | 935 | mdelay(1); |
| 929 | 936 | ||
| 930 | /* put LMI, VCPU, RBC etc... into reset */ | 937 | /* put LMI, VCPU, RBC etc... into reset */ |
| 931 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | 938 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, |
| 932 | UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | | 939 | UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | |
| 933 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | | 940 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | |
| 934 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | | 941 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | |
| @@ -940,7 +947,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 940 | mdelay(5); | 947 | mdelay(5); |
| 941 | 948 | ||
| 942 | /* initialize UVD memory controller */ | 949 | /* initialize UVD memory controller */ |
| 943 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), | 950 | WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, |
| 944 | (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | | 951 | (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | |
| 945 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | | 952 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | |
| 946 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | | 953 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | |
| @@ -953,23 +960,23 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 953 | lmi_swap_cntl = 0xa; | 960 | lmi_swap_cntl = 0xa; |
| 954 | mp_swap_cntl = 0; | 961 | mp_swap_cntl = 0; |
| 955 | #endif | 962 | #endif |
| 956 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), lmi_swap_cntl); | 963 | WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); |
| 957 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), mp_swap_cntl); | 964 | WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); |
| 958 | 965 | ||
| 959 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); | 966 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); |
| 960 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); | 967 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); |
| 961 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); | 968 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); |
| 962 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); | 969 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); |
| 963 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); | 970 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); |
| 964 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); | 971 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); |
| 965 | 972 | ||
| 966 | /* take all subblocks out of reset, except VCPU */ | 973 | /* take all subblocks out of reset, except VCPU */ |
| 967 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | 974 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, |
| 968 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | 975 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); |
| 969 | mdelay(5); | 976 | mdelay(5); |
| 970 | 977 | ||
| 971 | /* enable VCPU clock */ | 978 | /* enable VCPU clock */ |
| 972 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), | 979 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, |
| 973 | UVD_VCPU_CNTL__CLK_EN_MASK); | 980 | UVD_VCPU_CNTL__CLK_EN_MASK); |
| 974 | 981 | ||
| 975 | /* enable UMC */ | 982 | /* enable UMC */ |
| @@ -977,14 +984,14 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 977 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | 984 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); |
| 978 | 985 | ||
| 979 | /* boot up the VCPU */ | 986 | /* boot up the VCPU */ |
| 980 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); | 987 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); |
| 981 | mdelay(10); | 988 | mdelay(10); |
| 982 | 989 | ||
| 983 | for (i = 0; i < 10; ++i) { | 990 | for (i = 0; i < 10; ++i) { |
| 984 | uint32_t status; | 991 | uint32_t status; |
| 985 | 992 | ||
| 986 | for (j = 0; j < 100; ++j) { | 993 | for (j = 0; j < 100; ++j) { |
| 987 | status = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS)); | 994 | status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); |
| 988 | if (status & 2) | 995 | if (status & 2) |
| 989 | break; | 996 | break; |
| 990 | mdelay(10); | 997 | mdelay(10); |
| @@ -1025,44 +1032,44 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 1025 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); | 1032 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); |
| 1026 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); | 1033 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); |
| 1027 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); | 1034 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); |
| 1028 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); | 1035 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); |
| 1029 | 1036 | ||
| 1030 | /* set the write pointer delay */ | 1037 | /* set the write pointer delay */ |
| 1031 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); | 1038 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); |
| 1032 | 1039 | ||
| 1033 | /* set the wb address */ | 1040 | /* set the wb address */ |
| 1034 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), | 1041 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, |
| 1035 | (upper_32_bits(ring->gpu_addr) >> 2)); | 1042 | (upper_32_bits(ring->gpu_addr) >> 2)); |
| 1036 | 1043 | ||
| 1037 | /* programm the RB_BASE for ring buffer */ | 1044 | /* programm the RB_BASE for ring buffer */ |
| 1038 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), | 1045 | WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, |
| 1039 | lower_32_bits(ring->gpu_addr)); | 1046 | lower_32_bits(ring->gpu_addr)); |
| 1040 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), | 1047 | WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, |
| 1041 | upper_32_bits(ring->gpu_addr)); | 1048 | upper_32_bits(ring->gpu_addr)); |
| 1042 | 1049 | ||
| 1043 | /* Initialize the ring buffer's read and write pointers */ | 1050 | /* Initialize the ring buffer's read and write pointers */ |
| 1044 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR), 0); | 1051 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); |
| 1045 | 1052 | ||
| 1046 | ring->wptr = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); | 1053 | ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); |
| 1047 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), | 1054 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, |
| 1048 | lower_32_bits(ring->wptr)); | 1055 | lower_32_bits(ring->wptr)); |
| 1049 | 1056 | ||
| 1050 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, | 1057 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, |
| 1051 | ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); | 1058 | ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); |
| 1052 | 1059 | ||
| 1053 | ring = &adev->uvd.ring_enc[0]; | 1060 | ring = &adev->uvd.ring_enc[0]; |
| 1054 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR), lower_32_bits(ring->wptr)); | 1061 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); |
| 1055 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), lower_32_bits(ring->wptr)); | 1062 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); |
| 1056 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); | 1063 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); |
| 1057 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); | 1064 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); |
| 1058 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); | 1065 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); |
| 1059 | 1066 | ||
| 1060 | ring = &adev->uvd.ring_enc[1]; | 1067 | ring = &adev->uvd.ring_enc[1]; |
| 1061 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2), lower_32_bits(ring->wptr)); | 1068 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); |
| 1062 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), lower_32_bits(ring->wptr)); | 1069 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); |
| 1063 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr); | 1070 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); |
| 1064 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); | 1071 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); |
| 1065 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4); | 1072 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); |
| 1066 | 1073 | ||
| 1067 | return 0; | 1074 | return 0; |
| 1068 | } | 1075 | } |
| @@ -1077,7 +1084,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 1077 | static void uvd_v7_0_stop(struct amdgpu_device *adev) | 1084 | static void uvd_v7_0_stop(struct amdgpu_device *adev) |
| 1078 | { | 1085 | { |
| 1079 | /* force RBC into idle state */ | 1086 | /* force RBC into idle state */ |
| 1080 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0x11010101); | 1087 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); |
| 1081 | 1088 | ||
| 1082 | /* Stall UMC and register bus before resetting VCPU */ | 1089 | /* Stall UMC and register bus before resetting VCPU */ |
| 1083 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | 1090 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), |
| @@ -1086,12 +1093,12 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) | |||
| 1086 | mdelay(1); | 1093 | mdelay(1); |
| 1087 | 1094 | ||
| 1088 | /* put VCPU into reset */ | 1095 | /* put VCPU into reset */ |
| 1089 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | 1096 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, |
| 1090 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | 1097 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); |
| 1091 | mdelay(5); | 1098 | mdelay(5); |
| 1092 | 1099 | ||
| 1093 | /* disable VCPU clock */ | 1100 | /* disable VCPU clock */ |
| 1094 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0x0); | 1101 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); |
| 1095 | 1102 | ||
| 1096 | /* Unstall UMC and register bus */ | 1103 | /* Unstall UMC and register bus */ |
| 1097 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, | 1104 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, |
| @@ -1196,7 +1203,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
| 1196 | unsigned i; | 1203 | unsigned i; |
| 1197 | int r; | 1204 | int r; |
| 1198 | 1205 | ||
| 1199 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); | 1206 | WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
| 1200 | r = amdgpu_ring_alloc(ring, 3); | 1207 | r = amdgpu_ring_alloc(ring, 3); |
| 1201 | if (r) { | 1208 | if (r) { |
| 1202 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 1209 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", |
| @@ -1208,7 +1215,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
| 1208 | amdgpu_ring_write(ring, 0xDEADBEEF); | 1215 | amdgpu_ring_write(ring, 0xDEADBEEF); |
| 1209 | amdgpu_ring_commit(ring); | 1216 | amdgpu_ring_commit(ring); |
| 1210 | for (i = 0; i < adev->usec_timeout; i++) { | 1217 | for (i = 0; i < adev->usec_timeout; i++) { |
| 1211 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); | 1218 | tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); |
| 1212 | if (tmp == 0xDEADBEEF) | 1219 | if (tmp == 0xDEADBEEF) |
| 1213 | break; | 1220 | break; |
| 1214 | DRM_UDELAY(1); | 1221 | DRM_UDELAY(1); |
| @@ -1309,9 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
| 1309 | uint32_t data0, data1, mask; | 1316 | uint32_t data0, data1, mask; |
| 1310 | unsigned eng = ring->vm_inv_eng; | 1317 | unsigned eng = ring->vm_inv_eng; |
| 1311 | 1318 | ||
| 1312 | pd_addr = pd_addr | 0x1; /* valid bit */ | 1319 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); |
| 1313 | /* now only use physical base address of PDE and valid */ | 1320 | pd_addr |= AMDGPU_PTE_VALID; |
| 1314 | BUG_ON(pd_addr & 0xFFFF00000000003EULL); | ||
| 1315 | 1321 | ||
| 1316 | data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; | 1322 | data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; |
| 1317 | data1 = upper_32_bits(pd_addr); | 1323 | data1 = upper_32_bits(pd_addr); |
| @@ -1350,9 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
| 1350 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); | 1356 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); |
| 1351 | unsigned eng = ring->vm_inv_eng; | 1357 | unsigned eng = ring->vm_inv_eng; |
| 1352 | 1358 | ||
| 1353 | pd_addr = pd_addr | 0x1; /* valid bit */ | 1359 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); |
| 1354 | /* now only use physical base address of PDE and valid */ | 1360 | pd_addr |= AMDGPU_PTE_VALID; |
| 1355 | BUG_ON(pd_addr & 0xFFFF00000000003EULL); | ||
| 1356 | 1361 | ||
| 1357 | amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); | 1362 | amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); |
| 1358 | amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); | 1363 | amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); |
| @@ -1408,8 +1413,8 @@ static bool uvd_v7_0_check_soft_reset(void *handle) | |||
| 1408 | 1413 | ||
| 1409 | if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || | 1414 | if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || |
| 1410 | REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || | 1415 | REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || |
| 1411 | (RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS) & | 1416 | (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & |
| 1412 | AMDGPU_UVD_STATUS_BUSY_MASK))) | 1417 | AMDGPU_UVD_STATUS_BUSY_MASK)) |
| 1413 | srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, | 1418 | srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, |
| 1414 | SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); | 1419 | SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); |
| 1415 | 1420 | ||
| @@ -1516,9 +1521,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) | |||
| 1516 | { | 1521 | { |
| 1517 | uint32_t data, data1, data2, suvd_flags; | 1522 | uint32_t data, data1, data2, suvd_flags; |
| 1518 | 1523 | ||
| 1519 | data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL)); | 1524 | data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); |
| 1520 | data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); | 1525 | data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); |
| 1521 | data2 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL)); | 1526 | data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); |
| 1522 | 1527 | ||
| 1523 | data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | | 1528 | data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | |
| 1524 | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); | 1529 | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); |
| @@ -1562,18 +1567,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) | |||
| 1562 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); | 1567 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); |
| 1563 | data1 |= suvd_flags; | 1568 | data1 |= suvd_flags; |
| 1564 | 1569 | ||
| 1565 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), data); | 1570 | WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); |
| 1566 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), 0); | 1571 | WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); |
| 1567 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); | 1572 | WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); |
| 1568 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL), data2); | 1573 | WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); |
| 1569 | } | 1574 | } |
| 1570 | 1575 | ||
| 1571 | static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) | 1576 | static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) |
| 1572 | { | 1577 | { |
| 1573 | uint32_t data, data1, cgc_flags, suvd_flags; | 1578 | uint32_t data, data1, cgc_flags, suvd_flags; |
| 1574 | 1579 | ||
| 1575 | data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE)); | 1580 | data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); |
| 1576 | data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); | 1581 | data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); |
| 1577 | 1582 | ||
| 1578 | cgc_flags = UVD_CGC_GATE__SYS_MASK | | 1583 | cgc_flags = UVD_CGC_GATE__SYS_MASK | |
| 1579 | UVD_CGC_GATE__UDEC_MASK | | 1584 | UVD_CGC_GATE__UDEC_MASK | |
| @@ -1605,8 +1610,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) | |||
| 1605 | data |= cgc_flags; | 1610 | data |= cgc_flags; |
| 1606 | data1 |= suvd_flags; | 1611 | data1 |= suvd_flags; |
| 1607 | 1612 | ||
| 1608 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), data); | 1613 | WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); |
| 1609 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); | 1614 | WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); |
| 1610 | } | 1615 | } |
| 1611 | 1616 | ||
| 1612 | static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) | 1617 | static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) |
| @@ -1665,7 +1670,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, | |||
| 1665 | if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) | 1670 | if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) |
| 1666 | return 0; | 1671 | return 0; |
| 1667 | 1672 | ||
| 1668 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), UVD_POWER_STATUS__UVD_PG_EN_MASK); | 1673 | WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); |
| 1669 | 1674 | ||
| 1670 | if (state == AMD_PG_STATE_GATE) { | 1675 | if (state == AMD_PG_STATE_GATE) { |
| 1671 | uvd_v7_0_stop(adev); | 1676 | uvd_v7_0_stop(adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index fb0819359909..90332f55cfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | |||
| @@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle, | |||
| 77 | static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) | 77 | static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) |
| 78 | { | 78 | { |
| 79 | struct amdgpu_device *adev = ring->adev; | 79 | struct amdgpu_device *adev = ring->adev; |
| 80 | u32 v; | ||
| 81 | |||
| 82 | mutex_lock(&adev->grbm_idx_mutex); | ||
| 83 | if (adev->vce.harvest_config == 0 || | ||
| 84 | adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) | ||
| 85 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); | ||
| 86 | else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) | ||
| 87 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); | ||
| 80 | 88 | ||
| 81 | if (ring == &adev->vce.ring[0]) | 89 | if (ring == &adev->vce.ring[0]) |
| 82 | return RREG32(mmVCE_RB_RPTR); | 90 | v = RREG32(mmVCE_RB_RPTR); |
| 83 | else if (ring == &adev->vce.ring[1]) | 91 | else if (ring == &adev->vce.ring[1]) |
| 84 | return RREG32(mmVCE_RB_RPTR2); | 92 | v = RREG32(mmVCE_RB_RPTR2); |
| 85 | else | 93 | else |
| 86 | return RREG32(mmVCE_RB_RPTR3); | 94 | v = RREG32(mmVCE_RB_RPTR3); |
| 95 | |||
| 96 | WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); | ||
| 97 | mutex_unlock(&adev->grbm_idx_mutex); | ||
| 98 | |||
| 99 | return v; | ||
| 87 | } | 100 | } |
| 88 | 101 | ||
| 89 | /** | 102 | /** |
| @@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) | |||
| 96 | static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) | 109 | static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) |
| 97 | { | 110 | { |
| 98 | struct amdgpu_device *adev = ring->adev; | 111 | struct amdgpu_device *adev = ring->adev; |
| 112 | u32 v; | ||
| 113 | |||
| 114 | mutex_lock(&adev->grbm_idx_mutex); | ||
| 115 | if (adev->vce.harvest_config == 0 || | ||
| 116 | adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) | ||
| 117 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); | ||
| 118 | else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) | ||
| 119 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); | ||
| 99 | 120 | ||
| 100 | if (ring == &adev->vce.ring[0]) | 121 | if (ring == &adev->vce.ring[0]) |
| 101 | return RREG32(mmVCE_RB_WPTR); | 122 | v = RREG32(mmVCE_RB_WPTR); |
| 102 | else if (ring == &adev->vce.ring[1]) | 123 | else if (ring == &adev->vce.ring[1]) |
| 103 | return RREG32(mmVCE_RB_WPTR2); | 124 | v = RREG32(mmVCE_RB_WPTR2); |
| 104 | else | 125 | else |
| 105 | return RREG32(mmVCE_RB_WPTR3); | 126 | v = RREG32(mmVCE_RB_WPTR3); |
| 127 | |||
| 128 | WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); | ||
| 129 | mutex_unlock(&adev->grbm_idx_mutex); | ||
| 130 | |||
| 131 | return v; | ||
| 106 | } | 132 | } |
| 107 | 133 | ||
| 108 | /** | 134 | /** |
| @@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 116 | { | 142 | { |
| 117 | struct amdgpu_device *adev = ring->adev; | 143 | struct amdgpu_device *adev = ring->adev; |
| 118 | 144 | ||
| 145 | mutex_lock(&adev->grbm_idx_mutex); | ||
| 146 | if (adev->vce.harvest_config == 0 || | ||
| 147 | adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) | ||
| 148 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); | ||
| 149 | else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) | ||
| 150 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); | ||
| 151 | |||
| 119 | if (ring == &adev->vce.ring[0]) | 152 | if (ring == &adev->vce.ring[0]) |
| 120 | WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); | 153 | WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); |
| 121 | else if (ring == &adev->vce.ring[1]) | 154 | else if (ring == &adev->vce.ring[1]) |
| 122 | WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); | 155 | WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); |
| 123 | else | 156 | else |
| 124 | WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); | 157 | WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); |
| 158 | |||
| 159 | WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); | ||
| 160 | mutex_unlock(&adev->grbm_idx_mutex); | ||
| 125 | } | 161 | } |
| 126 | 162 | ||
| 127 | static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) | 163 | static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) |
| @@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev) | |||
| 231 | struct amdgpu_ring *ring; | 267 | struct amdgpu_ring *ring; |
| 232 | int idx, r; | 268 | int idx, r; |
| 233 | 269 | ||
| 234 | ring = &adev->vce.ring[0]; | ||
| 235 | WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); | ||
| 236 | WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); | ||
| 237 | WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); | ||
| 238 | WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); | ||
| 239 | WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); | ||
| 240 | |||
| 241 | ring = &adev->vce.ring[1]; | ||
| 242 | WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); | ||
| 243 | WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); | ||
| 244 | WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); | ||
| 245 | WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); | ||
| 246 | WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); | ||
| 247 | |||
| 248 | ring = &adev->vce.ring[2]; | ||
| 249 | WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); | ||
| 250 | WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); | ||
| 251 | WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); | ||
| 252 | WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); | ||
| 253 | WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); | ||
| 254 | |||
| 255 | mutex_lock(&adev->grbm_idx_mutex); | 270 | mutex_lock(&adev->grbm_idx_mutex); |
| 256 | for (idx = 0; idx < 2; ++idx) { | 271 | for (idx = 0; idx < 2; ++idx) { |
| 257 | if (adev->vce.harvest_config & (1 << idx)) | 272 | if (adev->vce.harvest_config & (1 << idx)) |
| 258 | continue; | 273 | continue; |
| 259 | 274 | ||
| 260 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); | 275 | WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); |
| 276 | |||
| 277 | /* Program instance 0 reg space for two instances or instance 0 case | ||
| 278 | program instance 1 reg space for only instance 1 available case */ | ||
| 279 | if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) { | ||
| 280 | ring = &adev->vce.ring[0]; | ||
| 281 | WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); | ||
| 282 | WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); | ||
| 283 | WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); | ||
| 284 | WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); | ||
| 285 | WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); | ||
| 286 | |||
| 287 | ring = &adev->vce.ring[1]; | ||
| 288 | WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); | ||
| 289 | WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); | ||
| 290 | WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); | ||
| 291 | WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); | ||
| 292 | WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); | ||
| 293 | |||
| 294 | ring = &adev->vce.ring[2]; | ||
| 295 | WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); | ||
| 296 | WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); | ||
| 297 | WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); | ||
| 298 | WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); | ||
| 299 | WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); | ||
| 300 | } | ||
| 301 | |||
| 261 | vce_v3_0_mc_resume(adev, idx); | 302 | vce_v3_0_mc_resume(adev, idx); |
| 262 | WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); | 303 | WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); |
| 263 | 304 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 139f964196b4..1ecd6bb90c1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | |||
| @@ -190,6 +190,7 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, | |||
| 190 | dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); | 190 | dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); |
| 191 | return -EBUSY; | 191 | return -EBUSY; |
| 192 | } | 192 | } |
| 193 | WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); | ||
| 193 | 194 | ||
| 194 | return 0; | 195 | return 0; |
| 195 | } | 196 | } |
| @@ -418,15 +419,19 @@ static int vce_v4_0_sw_init(void *handle) | |||
| 418 | 419 | ||
| 419 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | 420 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 420 | const struct common_firmware_header *hdr; | 421 | const struct common_firmware_header *hdr; |
| 422 | unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); | ||
| 423 | |||
| 424 | adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); | ||
| 425 | if (!adev->vce.saved_bo) | ||
| 426 | return -ENOMEM; | ||
| 427 | |||
| 421 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | 428 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; |
| 422 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; | 429 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; |
| 423 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; | 430 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; |
| 424 | adev->firmware.fw_size += | 431 | adev->firmware.fw_size += |
| 425 | ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); | 432 | ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); |
| 426 | DRM_INFO("PSP loading VCE firmware\n"); | 433 | DRM_INFO("PSP loading VCE firmware\n"); |
| 427 | } | 434 | } else { |
| 428 | |||
| 429 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { | ||
| 430 | r = amdgpu_vce_resume(adev); | 435 | r = amdgpu_vce_resume(adev); |
| 431 | if (r) | 436 | if (r) |
| 432 | return r; | 437 | return r; |
| @@ -465,6 +470,11 @@ static int vce_v4_0_sw_fini(void *handle) | |||
| 465 | /* free MM table */ | 470 | /* free MM table */ |
| 466 | amdgpu_virt_free_mm_table(adev); | 471 | amdgpu_virt_free_mm_table(adev); |
| 467 | 472 | ||
| 473 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | ||
| 474 | kfree(adev->vce.saved_bo); | ||
| 475 | adev->vce.saved_bo = NULL; | ||
| 476 | } | ||
| 477 | |||
| 468 | r = amdgpu_vce_suspend(adev); | 478 | r = amdgpu_vce_suspend(adev); |
| 469 | if (r) | 479 | if (r) |
| 470 | return r; | 480 | return r; |
| @@ -505,8 +515,14 @@ static int vce_v4_0_hw_fini(void *handle) | |||
| 505 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 515 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 506 | int i; | 516 | int i; |
| 507 | 517 | ||
| 508 | /* vce_v4_0_wait_for_idle(handle); */ | 518 | if (!amdgpu_sriov_vf(adev)) { |
| 509 | vce_v4_0_stop(adev); | 519 | /* vce_v4_0_wait_for_idle(handle); */ |
| 520 | vce_v4_0_stop(adev); | ||
| 521 | } else { | ||
| 522 | /* full access mode, so don't touch any VCE register */ | ||
| 523 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); | ||
| 524 | } | ||
| 525 | |||
| 510 | for (i = 0; i < adev->vce.num_rings; i++) | 526 | for (i = 0; i < adev->vce.num_rings; i++) |
| 511 | adev->vce.ring[i].ready = false; | 527 | adev->vce.ring[i].ready = false; |
| 512 | 528 | ||
| @@ -515,8 +531,18 @@ static int vce_v4_0_hw_fini(void *handle) | |||
| 515 | 531 | ||
| 516 | static int vce_v4_0_suspend(void *handle) | 532 | static int vce_v4_0_suspend(void *handle) |
| 517 | { | 533 | { |
| 518 | int r; | ||
| 519 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 534 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 535 | int r; | ||
| 536 | |||
| 537 | if (adev->vce.vcpu_bo == NULL) | ||
| 538 | return 0; | ||
| 539 | |||
| 540 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | ||
| 541 | unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); | ||
| 542 | void *ptr = adev->vce.cpu_addr; | ||
| 543 | |||
| 544 | memcpy_fromio(adev->vce.saved_bo, ptr, size); | ||
| 545 | } | ||
| 520 | 546 | ||
| 521 | r = vce_v4_0_hw_fini(adev); | 547 | r = vce_v4_0_hw_fini(adev); |
| 522 | if (r) | 548 | if (r) |
| @@ -527,12 +553,22 @@ static int vce_v4_0_suspend(void *handle) | |||
| 527 | 553 | ||
| 528 | static int vce_v4_0_resume(void *handle) | 554 | static int vce_v4_0_resume(void *handle) |
| 529 | { | 555 | { |
| 530 | int r; | ||
| 531 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 556 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 557 | int r; | ||
| 532 | 558 | ||
| 533 | r = amdgpu_vce_resume(adev); | 559 | if (adev->vce.vcpu_bo == NULL) |
| 534 | if (r) | 560 | return -EINVAL; |
| 535 | return r; | 561 | |
| 562 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | ||
| 563 | unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); | ||
| 564 | void *ptr = adev->vce.cpu_addr; | ||
| 565 | |||
| 566 | memcpy_toio(ptr, adev->vce.saved_bo, size); | ||
| 567 | } else { | ||
| 568 | r = amdgpu_vce_resume(adev); | ||
| 569 | if (r) | ||
| 570 | return r; | ||
| 571 | } | ||
| 536 | 572 | ||
| 537 | return vce_v4_0_hw_init(adev); | 573 | return vce_v4_0_hw_init(adev); |
| 538 | } | 574 | } |
| @@ -919,9 +955,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, | |||
| 919 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); | 955 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); |
| 920 | unsigned eng = ring->vm_inv_eng; | 956 | unsigned eng = ring->vm_inv_eng; |
| 921 | 957 | ||
| 922 | pd_addr = pd_addr | 0x1; /* valid bit */ | 958 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); |
| 923 | /* now only use physical base address of PDE and valid */ | 959 | pd_addr |= AMDGPU_PTE_VALID; |
| 924 | BUG_ON(pd_addr & 0xFFFF00000000003EULL); | ||
| 925 | 960 | ||
| 926 | amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); | 961 | amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); |
| 927 | amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); | 962 | amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c new file mode 100644 index 000000000000..21e7b88401e1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | |||
| @@ -0,0 +1,1189 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include <linux/firmware.h> | ||
| 25 | #include <drm/drmP.h> | ||
| 26 | #include "amdgpu.h" | ||
| 27 | #include "amdgpu_vcn.h" | ||
| 28 | #include "soc15d.h" | ||
| 29 | #include "soc15_common.h" | ||
| 30 | |||
| 31 | #include "vega10/soc15ip.h" | ||
| 32 | #include "raven1/VCN/vcn_1_0_offset.h" | ||
| 33 | #include "raven1/VCN/vcn_1_0_sh_mask.h" | ||
| 34 | #include "vega10/HDP/hdp_4_0_offset.h" | ||
| 35 | #include "raven1/MMHUB/mmhub_9_1_offset.h" | ||
| 36 | #include "raven1/MMHUB/mmhub_9_1_sh_mask.h" | ||
| 37 | |||
| 38 | static int vcn_v1_0_start(struct amdgpu_device *adev); | ||
| 39 | static int vcn_v1_0_stop(struct amdgpu_device *adev); | ||
| 40 | static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); | ||
| 41 | static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); | ||
| 42 | static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); | ||
| 43 | |||
| 44 | /** | ||
| 45 | * vcn_v1_0_early_init - set function pointers | ||
| 46 | * | ||
| 47 | * @handle: amdgpu_device pointer | ||
| 48 | * | ||
| 49 | * Set ring and irq function pointers | ||
| 50 | */ | ||
| 51 | static int vcn_v1_0_early_init(void *handle) | ||
| 52 | { | ||
| 53 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 54 | |||
| 55 | adev->vcn.num_enc_rings = 2; | ||
| 56 | |||
| 57 | vcn_v1_0_set_dec_ring_funcs(adev); | ||
| 58 | vcn_v1_0_set_enc_ring_funcs(adev); | ||
| 59 | vcn_v1_0_set_irq_funcs(adev); | ||
| 60 | |||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | /** | ||
| 65 | * vcn_v1_0_sw_init - sw init for VCN block | ||
| 66 | * | ||
| 67 | * @handle: amdgpu_device pointer | ||
| 68 | * | ||
| 69 | * Load firmware and sw initialization | ||
| 70 | */ | ||
| 71 | static int vcn_v1_0_sw_init(void *handle) | ||
| 72 | { | ||
| 73 | struct amdgpu_ring *ring; | ||
| 74 | int i, r; | ||
| 75 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 76 | |||
| 77 | /* VCN DEC TRAP */ | ||
| 78 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq); | ||
| 79 | if (r) | ||
| 80 | return r; | ||
| 81 | |||
| 82 | /* VCN ENC TRAP */ | ||
| 83 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { | ||
| 84 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119, | ||
| 85 | &adev->vcn.irq); | ||
| 86 | if (r) | ||
| 87 | return r; | ||
| 88 | } | ||
| 89 | |||
| 90 | r = amdgpu_vcn_sw_init(adev); | ||
| 91 | if (r) | ||
| 92 | return r; | ||
| 93 | |||
| 94 | r = amdgpu_vcn_resume(adev); | ||
| 95 | if (r) | ||
| 96 | return r; | ||
| 97 | |||
| 98 | ring = &adev->vcn.ring_dec; | ||
| 99 | sprintf(ring->name, "vcn_dec"); | ||
| 100 | r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); | ||
| 101 | if (r) | ||
| 102 | return r; | ||
| 103 | |||
| 104 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { | ||
| 105 | ring = &adev->vcn.ring_enc[i]; | ||
| 106 | sprintf(ring->name, "vcn_enc%d", i); | ||
| 107 | r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); | ||
| 108 | if (r) | ||
| 109 | return r; | ||
| 110 | } | ||
| 111 | |||
| 112 | return r; | ||
| 113 | } | ||
| 114 | |||
| 115 | /** | ||
| 116 | * vcn_v1_0_sw_fini - sw fini for VCN block | ||
| 117 | * | ||
| 118 | * @handle: amdgpu_device pointer | ||
| 119 | * | ||
| 120 | * VCN suspend and free up sw allocation | ||
| 121 | */ | ||
| 122 | static int vcn_v1_0_sw_fini(void *handle) | ||
| 123 | { | ||
| 124 | int r; | ||
| 125 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 126 | |||
| 127 | r = amdgpu_vcn_suspend(adev); | ||
| 128 | if (r) | ||
| 129 | return r; | ||
| 130 | |||
| 131 | r = amdgpu_vcn_sw_fini(adev); | ||
| 132 | |||
| 133 | return r; | ||
| 134 | } | ||
| 135 | |||
| 136 | /** | ||
| 137 | * vcn_v1_0_hw_init - start and test VCN block | ||
| 138 | * | ||
| 139 | * @handle: amdgpu_device pointer | ||
| 140 | * | ||
| 141 | * Initialize the hardware, boot up the VCPU and do some testing | ||
| 142 | */ | ||
| 143 | static int vcn_v1_0_hw_init(void *handle) | ||
| 144 | { | ||
| 145 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 146 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; | ||
| 147 | int i, r; | ||
| 148 | |||
| 149 | r = vcn_v1_0_start(adev); | ||
| 150 | if (r) | ||
| 151 | goto done; | ||
| 152 | |||
| 153 | ring->ready = true; | ||
| 154 | r = amdgpu_ring_test_ring(ring); | ||
| 155 | if (r) { | ||
| 156 | ring->ready = false; | ||
| 157 | goto done; | ||
| 158 | } | ||
| 159 | |||
| 160 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { | ||
| 161 | ring = &adev->vcn.ring_enc[i]; | ||
| 162 | ring->ready = true; | ||
| 163 | r = amdgpu_ring_test_ring(ring); | ||
| 164 | if (r) { | ||
| 165 | ring->ready = false; | ||
| 166 | goto done; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | done: | ||
| 171 | if (!r) | ||
| 172 | DRM_INFO("VCN decode and encode initialized successfully.\n"); | ||
| 173 | |||
| 174 | return r; | ||
| 175 | } | ||
| 176 | |||
| 177 | /** | ||
| 178 | * vcn_v1_0_hw_fini - stop the hardware block | ||
| 179 | * | ||
| 180 | * @handle: amdgpu_device pointer | ||
| 181 | * | ||
| 182 | * Stop the VCN block, mark ring as not ready any more | ||
| 183 | */ | ||
| 184 | static int vcn_v1_0_hw_fini(void *handle) | ||
| 185 | { | ||
| 186 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 187 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; | ||
| 188 | int r; | ||
| 189 | |||
| 190 | r = vcn_v1_0_stop(adev); | ||
| 191 | if (r) | ||
| 192 | return r; | ||
| 193 | |||
| 194 | ring->ready = false; | ||
| 195 | |||
| 196 | return 0; | ||
| 197 | } | ||
| 198 | |||
| 199 | /** | ||
| 200 | * vcn_v1_0_suspend - suspend VCN block | ||
| 201 | * | ||
| 202 | * @handle: amdgpu_device pointer | ||
| 203 | * | ||
| 204 | * HW fini and suspend VCN block | ||
| 205 | */ | ||
| 206 | static int vcn_v1_0_suspend(void *handle) | ||
| 207 | { | ||
| 208 | int r; | ||
| 209 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 210 | |||
| 211 | r = vcn_v1_0_hw_fini(adev); | ||
| 212 | if (r) | ||
| 213 | return r; | ||
| 214 | |||
| 215 | r = amdgpu_vcn_suspend(adev); | ||
| 216 | |||
| 217 | return r; | ||
| 218 | } | ||
| 219 | |||
| 220 | /** | ||
| 221 | * vcn_v1_0_resume - resume VCN block | ||
| 222 | * | ||
| 223 | * @handle: amdgpu_device pointer | ||
| 224 | * | ||
| 225 | * Resume firmware and hw init VCN block | ||
| 226 | */ | ||
| 227 | static int vcn_v1_0_resume(void *handle) | ||
| 228 | { | ||
| 229 | int r; | ||
| 230 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 231 | |||
| 232 | r = amdgpu_vcn_resume(adev); | ||
| 233 | if (r) | ||
| 234 | return r; | ||
| 235 | |||
| 236 | r = vcn_v1_0_hw_init(adev); | ||
| 237 | |||
| 238 | return r; | ||
| 239 | } | ||
| 240 | |||
| 241 | /** | ||
| 242 | * vcn_v1_0_mc_resume - memory controller programming | ||
| 243 | * | ||
| 244 | * @adev: amdgpu_device pointer | ||
| 245 | * | ||
| 246 | * Let the VCN memory controller know it's offsets | ||
| 247 | */ | ||
| 248 | static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) | ||
| 249 | { | ||
| 250 | uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); | ||
| 251 | |||
| 252 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, | ||
| 253 | lower_32_bits(adev->vcn.gpu_addr)); | ||
| 254 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, | ||
| 255 | upper_32_bits(adev->vcn.gpu_addr)); | ||
| 256 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, | ||
| 257 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); | ||
| 258 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); | ||
| 259 | |||
| 260 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, | ||
| 261 | lower_32_bits(adev->vcn.gpu_addr + size)); | ||
| 262 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, | ||
| 263 | upper_32_bits(adev->vcn.gpu_addr + size)); | ||
| 264 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); | ||
| 265 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE); | ||
| 266 | |||
| 267 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, | ||
| 268 | lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); | ||
| 269 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, | ||
| 270 | upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); | ||
| 271 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); | ||
| 272 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, | ||
| 273 | AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); | ||
| 274 | |||
| 275 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, | ||
| 276 | adev->gfx.config.gb_addr_config); | ||
| 277 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, | ||
| 278 | adev->gfx.config.gb_addr_config); | ||
| 279 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, | ||
| 280 | adev->gfx.config.gb_addr_config); | ||
| 281 | } | ||
| 282 | |||
| 283 | /** | ||
| 284 | * vcn_v1_0_disable_clock_gating - disable VCN clock gating | ||
| 285 | * | ||
| 286 | * @adev: amdgpu_device pointer | ||
| 287 | * @sw: enable SW clock gating | ||
| 288 | * | ||
| 289 | * Disable clock gating for VCN block | ||
| 290 | */ | ||
| 291 | static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) | ||
| 292 | { | ||
| 293 | uint32_t data; | ||
| 294 | |||
| 295 | /* JPEG disable CGC */ | ||
| 296 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); | ||
| 297 | |||
| 298 | if (sw) | ||
| 299 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | ||
| 300 | else | ||
| 301 | data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; | ||
| 302 | |||
| 303 | data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; | ||
| 304 | data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; | ||
| 305 | WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data); | ||
| 306 | |||
| 307 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); | ||
| 308 | data &= ~(JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK); | ||
| 309 | WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data); | ||
| 310 | |||
| 311 | /* UVD disable CGC */ | ||
| 312 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); | ||
| 313 | if (sw) | ||
| 314 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | ||
| 315 | else | ||
| 316 | data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; | ||
| 317 | |||
| 318 | data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; | ||
| 319 | data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; | ||
| 320 | WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); | ||
| 321 | |||
| 322 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); | ||
| 323 | data &= ~(UVD_CGC_GATE__SYS_MASK | ||
| 324 | | UVD_CGC_GATE__UDEC_MASK | ||
| 325 | | UVD_CGC_GATE__MPEG2_MASK | ||
| 326 | | UVD_CGC_GATE__REGS_MASK | ||
| 327 | | UVD_CGC_GATE__RBC_MASK | ||
| 328 | | UVD_CGC_GATE__LMI_MC_MASK | ||
| 329 | | UVD_CGC_GATE__LMI_UMC_MASK | ||
| 330 | | UVD_CGC_GATE__IDCT_MASK | ||
| 331 | | UVD_CGC_GATE__MPRD_MASK | ||
| 332 | | UVD_CGC_GATE__MPC_MASK | ||
| 333 | | UVD_CGC_GATE__LBSI_MASK | ||
| 334 | | UVD_CGC_GATE__LRBBM_MASK | ||
| 335 | | UVD_CGC_GATE__UDEC_RE_MASK | ||
| 336 | | UVD_CGC_GATE__UDEC_CM_MASK | ||
| 337 | | UVD_CGC_GATE__UDEC_IT_MASK | ||
| 338 | | UVD_CGC_GATE__UDEC_DB_MASK | ||
| 339 | | UVD_CGC_GATE__UDEC_MP_MASK | ||
| 340 | | UVD_CGC_GATE__WCB_MASK | ||
| 341 | | UVD_CGC_GATE__VCPU_MASK | ||
| 342 | | UVD_CGC_GATE__SCPU_MASK); | ||
| 343 | WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); | ||
| 344 | |||
| 345 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); | ||
| 346 | data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | ||
| 347 | | UVD_CGC_CTRL__UDEC_CM_MODE_MASK | ||
| 348 | | UVD_CGC_CTRL__UDEC_IT_MODE_MASK | ||
| 349 | | UVD_CGC_CTRL__UDEC_DB_MODE_MASK | ||
| 350 | | UVD_CGC_CTRL__UDEC_MP_MODE_MASK | ||
| 351 | | UVD_CGC_CTRL__SYS_MODE_MASK | ||
| 352 | | UVD_CGC_CTRL__UDEC_MODE_MASK | ||
| 353 | | UVD_CGC_CTRL__MPEG2_MODE_MASK | ||
| 354 | | UVD_CGC_CTRL__REGS_MODE_MASK | ||
| 355 | | UVD_CGC_CTRL__RBC_MODE_MASK | ||
| 356 | | UVD_CGC_CTRL__LMI_MC_MODE_MASK | ||
| 357 | | UVD_CGC_CTRL__LMI_UMC_MODE_MASK | ||
| 358 | | UVD_CGC_CTRL__IDCT_MODE_MASK | ||
| 359 | | UVD_CGC_CTRL__MPRD_MODE_MASK | ||
| 360 | | UVD_CGC_CTRL__MPC_MODE_MASK | ||
| 361 | | UVD_CGC_CTRL__LBSI_MODE_MASK | ||
| 362 | | UVD_CGC_CTRL__LRBBM_MODE_MASK | ||
| 363 | | UVD_CGC_CTRL__WCB_MODE_MASK | ||
| 364 | | UVD_CGC_CTRL__VCPU_MODE_MASK | ||
| 365 | | UVD_CGC_CTRL__SCPU_MODE_MASK); | ||
| 366 | WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); | ||
| 367 | |||
| 368 | /* turn on */ | ||
| 369 | data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); | ||
| 370 | data |= (UVD_SUVD_CGC_GATE__SRE_MASK | ||
| 371 | | UVD_SUVD_CGC_GATE__SIT_MASK | ||
| 372 | | UVD_SUVD_CGC_GATE__SMP_MASK | ||
| 373 | | UVD_SUVD_CGC_GATE__SCM_MASK | ||
| 374 | | UVD_SUVD_CGC_GATE__SDB_MASK | ||
| 375 | | UVD_SUVD_CGC_GATE__SRE_H264_MASK | ||
| 376 | | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK | ||
| 377 | | UVD_SUVD_CGC_GATE__SIT_H264_MASK | ||
| 378 | | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK | ||
| 379 | | UVD_SUVD_CGC_GATE__SCM_H264_MASK | ||
| 380 | | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK | ||
| 381 | | UVD_SUVD_CGC_GATE__SDB_H264_MASK | ||
| 382 | | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK | ||
| 383 | | UVD_SUVD_CGC_GATE__SCLR_MASK | ||
| 384 | | UVD_SUVD_CGC_GATE__UVD_SC_MASK | ||
| 385 | | UVD_SUVD_CGC_GATE__ENT_MASK | ||
| 386 | | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK | ||
| 387 | | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK | ||
| 388 | | UVD_SUVD_CGC_GATE__SITE_MASK | ||
| 389 | | UVD_SUVD_CGC_GATE__SRE_VP9_MASK | ||
| 390 | | UVD_SUVD_CGC_GATE__SCM_VP9_MASK | ||
| 391 | | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK | ||
| 392 | | UVD_SUVD_CGC_GATE__SDB_VP9_MASK | ||
| 393 | | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); | ||
| 394 | WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); | ||
| 395 | |||
| 396 | data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); | ||
| 397 | data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | ||
| 398 | | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | ||
| 399 | | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | ||
| 400 | | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | ||
| 401 | | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK | ||
| 402 | | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK | ||
| 403 | | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK | ||
| 404 | | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK | ||
| 405 | | UVD_SUVD_CGC_CTRL__IME_MODE_MASK | ||
| 406 | | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); | ||
| 407 | WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); | ||
| 408 | } | ||
| 409 | |||
| 410 | /** | ||
| 411 | * vcn_v1_0_enable_clock_gating - enable VCN clock gating | ||
| 412 | * | ||
| 413 | * @adev: amdgpu_device pointer | ||
| 414 | * @sw: enable SW clock gating | ||
| 415 | * | ||
| 416 | * Enable clock gating for VCN block | ||
| 417 | */ | ||
| 418 | static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) | ||
| 419 | { | ||
| 420 | uint32_t data = 0; | ||
| 421 | |||
| 422 | /* enable JPEG CGC */ | ||
| 423 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); | ||
| 424 | if (sw) | ||
| 425 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | ||
| 426 | else | ||
| 427 | data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | ||
| 428 | data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; | ||
| 429 | data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; | ||
| 430 | WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data); | ||
| 431 | |||
| 432 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); | ||
| 433 | data |= (JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK); | ||
| 434 | WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data); | ||
| 435 | |||
| 436 | /* enable UVD CGC */ | ||
| 437 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); | ||
| 438 | if (sw) | ||
| 439 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | ||
| 440 | else | ||
| 441 | data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | ||
| 442 | data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; | ||
| 443 | data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; | ||
| 444 | WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); | ||
| 445 | |||
| 446 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); | ||
| 447 | data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK | ||
| 448 | | UVD_CGC_CTRL__UDEC_CM_MODE_MASK | ||
| 449 | | UVD_CGC_CTRL__UDEC_IT_MODE_MASK | ||
| 450 | | UVD_CGC_CTRL__UDEC_DB_MODE_MASK | ||
| 451 | | UVD_CGC_CTRL__UDEC_MP_MODE_MASK | ||
| 452 | | UVD_CGC_CTRL__SYS_MODE_MASK | ||
| 453 | | UVD_CGC_CTRL__UDEC_MODE_MASK | ||
| 454 | | UVD_CGC_CTRL__MPEG2_MODE_MASK | ||
| 455 | | UVD_CGC_CTRL__REGS_MODE_MASK | ||
| 456 | | UVD_CGC_CTRL__RBC_MODE_MASK | ||
| 457 | | UVD_CGC_CTRL__LMI_MC_MODE_MASK | ||
| 458 | | UVD_CGC_CTRL__LMI_UMC_MODE_MASK | ||
| 459 | | UVD_CGC_CTRL__IDCT_MODE_MASK | ||
| 460 | | UVD_CGC_CTRL__MPRD_MODE_MASK | ||
| 461 | | UVD_CGC_CTRL__MPC_MODE_MASK | ||
| 462 | | UVD_CGC_CTRL__LBSI_MODE_MASK | ||
| 463 | | UVD_CGC_CTRL__LRBBM_MODE_MASK | ||
| 464 | | UVD_CGC_CTRL__WCB_MODE_MASK | ||
| 465 | | UVD_CGC_CTRL__VCPU_MODE_MASK | ||
| 466 | | UVD_CGC_CTRL__SCPU_MODE_MASK); | ||
| 467 | WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); | ||
| 468 | |||
| 469 | data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); | ||
| 470 | data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | ||
| 471 | | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | ||
| 472 | | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | ||
| 473 | | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | ||
| 474 | | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK | ||
| 475 | | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK | ||
| 476 | | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK | ||
| 477 | | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK | ||
| 478 | | UVD_SUVD_CGC_CTRL__IME_MODE_MASK | ||
| 479 | | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); | ||
| 480 | WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); | ||
| 481 | } | ||
| 482 | |||
| 483 | /** | ||
| 484 | * vcn_v1_0_start - start VCN block | ||
| 485 | * | ||
| 486 | * @adev: amdgpu_device pointer | ||
| 487 | * | ||
| 488 | * Setup and start the VCN block | ||
| 489 | */ | ||
| 490 | static int vcn_v1_0_start(struct amdgpu_device *adev) | ||
| 491 | { | ||
| 492 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; | ||
| 493 | uint32_t rb_bufsz, tmp; | ||
| 494 | uint32_t lmi_swap_cntl; | ||
| 495 | int i, j, r; | ||
| 496 | |||
| 497 | /* disable byte swapping */ | ||
| 498 | lmi_swap_cntl = 0; | ||
| 499 | |||
| 500 | vcn_v1_0_mc_resume(adev); | ||
| 501 | |||
| 502 | /* disable clock gating */ | ||
| 503 | vcn_v1_0_disable_clock_gating(adev, true); | ||
| 504 | |||
| 505 | /* disable interupt */ | ||
| 506 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, | ||
| 507 | ~UVD_MASTINT_EN__VCPU_EN_MASK); | ||
| 508 | |||
| 509 | /* stall UMC and register bus before resetting VCPU */ | ||
| 510 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | ||
| 511 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, | ||
| 512 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 513 | mdelay(1); | ||
| 514 | |||
| 515 | /* put LMI, VCPU, RBC etc... into reset */ | ||
| 516 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, | ||
| 517 | UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | | ||
| 518 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | | ||
| 519 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | | ||
| 520 | UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | | ||
| 521 | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | | ||
| 522 | UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | | ||
| 523 | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | | ||
| 524 | UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); | ||
| 525 | mdelay(5); | ||
| 526 | |||
| 527 | /* initialize VCN memory controller */ | ||
| 528 | WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, | ||
| 529 | (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | | ||
| 530 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | | ||
| 531 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | | ||
| 532 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | | ||
| 533 | UVD_LMI_CTRL__REQ_MODE_MASK | | ||
| 534 | 0x00100000L); | ||
| 535 | |||
| 536 | #ifdef __BIG_ENDIAN | ||
| 537 | /* swap (8 in 32) RB and IB */ | ||
| 538 | lmi_swap_cntl = 0xa; | ||
| 539 | #endif | ||
| 540 | WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); | ||
| 541 | |||
| 542 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); | ||
| 543 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); | ||
| 544 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); | ||
| 545 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); | ||
| 546 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); | ||
| 547 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); | ||
| 548 | |||
| 549 | /* take all subblocks out of reset, except VCPU */ | ||
| 550 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, | ||
| 551 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 552 | mdelay(5); | ||
| 553 | |||
| 554 | /* enable VCPU clock */ | ||
| 555 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, | ||
| 556 | UVD_VCPU_CNTL__CLK_EN_MASK); | ||
| 557 | |||
| 558 | /* enable UMC */ | ||
| 559 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, | ||
| 560 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 561 | |||
| 562 | /* boot up the VCPU */ | ||
| 563 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); | ||
| 564 | mdelay(10); | ||
| 565 | |||
| 566 | for (i = 0; i < 10; ++i) { | ||
| 567 | uint32_t status; | ||
| 568 | |||
| 569 | for (j = 0; j < 100; ++j) { | ||
| 570 | status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); | ||
| 571 | if (status & 2) | ||
| 572 | break; | ||
| 573 | mdelay(10); | ||
| 574 | } | ||
| 575 | r = 0; | ||
| 576 | if (status & 2) | ||
| 577 | break; | ||
| 578 | |||
| 579 | DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); | ||
| 580 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | ||
| 581 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, | ||
| 582 | ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 583 | mdelay(10); | ||
| 584 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, | ||
| 585 | ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 586 | mdelay(10); | ||
| 587 | r = -1; | ||
| 588 | } | ||
| 589 | |||
| 590 | if (r) { | ||
| 591 | DRM_ERROR("VCN decode not responding, giving up!!!\n"); | ||
| 592 | return r; | ||
| 593 | } | ||
| 594 | /* enable master interrupt */ | ||
| 595 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), | ||
| 596 | (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), | ||
| 597 | ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); | ||
| 598 | |||
| 599 | /* clear the bit 4 of VCN_STATUS */ | ||
| 600 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, | ||
| 601 | ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); | ||
| 602 | |||
| 603 | /* force RBC into idle state */ | ||
| 604 | rb_bufsz = order_base_2(ring->ring_size); | ||
| 605 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); | ||
| 606 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); | ||
| 607 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); | ||
| 608 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); | ||
| 609 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); | ||
| 610 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); | ||
| 611 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); | ||
| 612 | |||
| 613 | /* set the write pointer delay */ | ||
| 614 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); | ||
| 615 | |||
| 616 | /* set the wb address */ | ||
| 617 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, | ||
| 618 | (upper_32_bits(ring->gpu_addr) >> 2)); | ||
| 619 | |||
| 620 | /* programm the RB_BASE for ring buffer */ | ||
| 621 | WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, | ||
| 622 | lower_32_bits(ring->gpu_addr)); | ||
| 623 | WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, | ||
| 624 | upper_32_bits(ring->gpu_addr)); | ||
| 625 | |||
| 626 | /* Initialize the ring buffer's read and write pointers */ | ||
| 627 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); | ||
| 628 | |||
| 629 | ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); | ||
| 630 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, | ||
| 631 | lower_32_bits(ring->wptr)); | ||
| 632 | |||
| 633 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, | ||
| 634 | ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); | ||
| 635 | |||
| 636 | ring = &adev->vcn.ring_enc[0]; | ||
| 637 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); | ||
| 638 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); | ||
| 639 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); | ||
| 640 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); | ||
| 641 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); | ||
| 642 | |||
| 643 | ring = &adev->vcn.ring_enc[1]; | ||
| 644 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); | ||
| 645 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); | ||
| 646 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); | ||
| 647 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); | ||
| 648 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); | ||
| 649 | |||
| 650 | return 0; | ||
| 651 | } | ||
| 652 | |||
| 653 | /** | ||
| 654 | * vcn_v1_0_stop - stop VCN block | ||
| 655 | * | ||
| 656 | * @adev: amdgpu_device pointer | ||
| 657 | * | ||
| 658 | * stop the VCN block | ||
| 659 | */ | ||
| 660 | static int vcn_v1_0_stop(struct amdgpu_device *adev) | ||
| 661 | { | ||
| 662 | /* force RBC into idle state */ | ||
| 663 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); | ||
| 664 | |||
| 665 | /* Stall UMC and register bus before resetting VCPU */ | ||
| 666 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | ||
| 667 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, | ||
| 668 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 669 | mdelay(1); | ||
| 670 | |||
| 671 | /* put VCPU into reset */ | ||
| 672 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, | ||
| 673 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 674 | mdelay(5); | ||
| 675 | |||
| 676 | /* disable VCPU clock */ | ||
| 677 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); | ||
| 678 | |||
| 679 | /* Unstall UMC and register bus */ | ||
| 680 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, | ||
| 681 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 682 | |||
| 683 | /* enable clock gating */ | ||
| 684 | vcn_v1_0_enable_clock_gating(adev, true); | ||
| 685 | |||
| 686 | return 0; | ||
| 687 | } | ||
| 688 | |||
| 689 | static int vcn_v1_0_set_clockgating_state(void *handle, | ||
| 690 | enum amd_clockgating_state state) | ||
| 691 | { | ||
| 692 | /* needed for driver unload*/ | ||
| 693 | return 0; | ||
| 694 | } | ||
| 695 | |||
| 696 | /** | ||
| 697 | * vcn_v1_0_dec_ring_get_rptr - get read pointer | ||
| 698 | * | ||
| 699 | * @ring: amdgpu_ring pointer | ||
| 700 | * | ||
| 701 | * Returns the current hardware read pointer | ||
| 702 | */ | ||
| 703 | static uint64_t vcn_v1_0_dec_ring_get_rptr(struct amdgpu_ring *ring) | ||
| 704 | { | ||
| 705 | struct amdgpu_device *adev = ring->adev; | ||
| 706 | |||
| 707 | return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); | ||
| 708 | } | ||
| 709 | |||
| 710 | /** | ||
| 711 | * vcn_v1_0_dec_ring_get_wptr - get write pointer | ||
| 712 | * | ||
| 713 | * @ring: amdgpu_ring pointer | ||
| 714 | * | ||
| 715 | * Returns the current hardware write pointer | ||
| 716 | */ | ||
| 717 | static uint64_t vcn_v1_0_dec_ring_get_wptr(struct amdgpu_ring *ring) | ||
| 718 | { | ||
| 719 | struct amdgpu_device *adev = ring->adev; | ||
| 720 | |||
| 721 | return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); | ||
| 722 | } | ||
| 723 | |||
| 724 | /** | ||
| 725 | * vcn_v1_0_dec_ring_set_wptr - set write pointer | ||
| 726 | * | ||
| 727 | * @ring: amdgpu_ring pointer | ||
| 728 | * | ||
| 729 | * Commits the write pointer to the hardware | ||
| 730 | */ | ||
| 731 | static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring) | ||
| 732 | { | ||
| 733 | struct amdgpu_device *adev = ring->adev; | ||
| 734 | |||
| 735 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); | ||
| 736 | } | ||
| 737 | |||
| 738 | /** | ||
| 739 | * vcn_v1_0_dec_ring_insert_start - insert a start command | ||
| 740 | * | ||
| 741 | * @ring: amdgpu_ring pointer | ||
| 742 | * | ||
| 743 | * Write a start command to the ring. | ||
| 744 | */ | ||
| 745 | static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) | ||
| 746 | { | ||
| 747 | amdgpu_ring_write(ring, | ||
| 748 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | ||
| 749 | amdgpu_ring_write(ring, 0); | ||
| 750 | amdgpu_ring_write(ring, | ||
| 751 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | ||
| 752 | amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); | ||
| 753 | } | ||
| 754 | |||
| 755 | /** | ||
| 756 | * vcn_v1_0_dec_ring_insert_end - insert a end command | ||
| 757 | * | ||
| 758 | * @ring: amdgpu_ring pointer | ||
| 759 | * | ||
| 760 | * Write a end command to the ring. | ||
| 761 | */ | ||
| 762 | static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) | ||
| 763 | { | ||
| 764 | amdgpu_ring_write(ring, | ||
| 765 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | ||
| 766 | amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); | ||
| 767 | } | ||
| 768 | |||
| 769 | /** | ||
| 770 | * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command | ||
| 771 | * | ||
| 772 | * @ring: amdgpu_ring pointer | ||
| 773 | * @fence: fence to emit | ||
| 774 | * | ||
| 775 | * Write a fence and a trap command to the ring. | ||
| 776 | */ | ||
| 777 | static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, | ||
| 778 | unsigned flags) | ||
| 779 | { | ||
| 780 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); | ||
| 781 | |||
| 782 | amdgpu_ring_write(ring, | ||
| 783 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); | ||
| 784 | amdgpu_ring_write(ring, seq); | ||
| 785 | amdgpu_ring_write(ring, | ||
| 786 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | ||
| 787 | amdgpu_ring_write(ring, addr & 0xffffffff); | ||
| 788 | amdgpu_ring_write(ring, | ||
| 789 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | ||
| 790 | amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); | ||
| 791 | amdgpu_ring_write(ring, | ||
| 792 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | ||
| 793 | amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); | ||
| 794 | |||
| 795 | amdgpu_ring_write(ring, | ||
| 796 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | ||
| 797 | amdgpu_ring_write(ring, 0); | ||
| 798 | amdgpu_ring_write(ring, | ||
| 799 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | ||
| 800 | amdgpu_ring_write(ring, 0); | ||
| 801 | amdgpu_ring_write(ring, | ||
| 802 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | ||
| 803 | amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); | ||
| 804 | } | ||
| 805 | |||
| 806 | /** | ||
| 807 | * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate | ||
| 808 | * | ||
| 809 | * @ring: amdgpu_ring pointer | ||
| 810 | * | ||
| 811 | * Emits an hdp invalidate. | ||
| 812 | */ | ||
| 813 | static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) | ||
| 814 | { | ||
| 815 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); | ||
| 816 | amdgpu_ring_write(ring, 1); | ||
| 817 | } | ||
| 818 | |||
| 819 | /** | ||
| 820 | * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer | ||
| 821 | * | ||
| 822 | * @ring: amdgpu_ring pointer | ||
| 823 | * @ib: indirect buffer to execute | ||
| 824 | * | ||
| 825 | * Write ring commands to execute the indirect buffer | ||
| 826 | */ | ||
| 827 | static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, | ||
| 828 | struct amdgpu_ib *ib, | ||
| 829 | unsigned vm_id, bool ctx_switch) | ||
| 830 | { | ||
| 831 | amdgpu_ring_write(ring, | ||
| 832 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); | ||
| 833 | amdgpu_ring_write(ring, vm_id); | ||
| 834 | |||
| 835 | amdgpu_ring_write(ring, | ||
| 836 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); | ||
| 837 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | ||
| 838 | amdgpu_ring_write(ring, | ||
| 839 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); | ||
| 840 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | ||
| 841 | amdgpu_ring_write(ring, | ||
| 842 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); | ||
| 843 | amdgpu_ring_write(ring, ib->length_dw); | ||
| 844 | } | ||
| 845 | |||
| 846 | static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, | ||
| 847 | uint32_t data0, uint32_t data1) | ||
| 848 | { | ||
| 849 | amdgpu_ring_write(ring, | ||
| 850 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | ||
| 851 | amdgpu_ring_write(ring, data0); | ||
| 852 | amdgpu_ring_write(ring, | ||
| 853 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | ||
| 854 | amdgpu_ring_write(ring, data1); | ||
| 855 | amdgpu_ring_write(ring, | ||
| 856 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | ||
| 857 | amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); | ||
| 858 | } | ||
| 859 | |||
| 860 | static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, | ||
| 861 | uint32_t data0, uint32_t data1, uint32_t mask) | ||
| 862 | { | ||
| 863 | amdgpu_ring_write(ring, | ||
| 864 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | ||
| 865 | amdgpu_ring_write(ring, data0); | ||
| 866 | amdgpu_ring_write(ring, | ||
| 867 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | ||
| 868 | amdgpu_ring_write(ring, data1); | ||
| 869 | amdgpu_ring_write(ring, | ||
| 870 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); | ||
| 871 | amdgpu_ring_write(ring, mask); | ||
| 872 | amdgpu_ring_write(ring, | ||
| 873 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | ||
| 874 | amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); | ||
| 875 | } | ||
| 876 | |||
| 877 | static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, | ||
| 878 | unsigned vm_id, uint64_t pd_addr) | ||
| 879 | { | ||
| 880 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; | ||
| 881 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); | ||
| 882 | uint32_t data0, data1, mask; | ||
| 883 | unsigned eng = ring->vm_inv_eng; | ||
| 884 | |||
| 885 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); | ||
| 886 | pd_addr |= AMDGPU_PTE_VALID; | ||
| 887 | |||
| 888 | data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; | ||
| 889 | data1 = upper_32_bits(pd_addr); | ||
| 890 | vcn_v1_0_dec_vm_reg_write(ring, data0, data1); | ||
| 891 | |||
| 892 | data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; | ||
| 893 | data1 = lower_32_bits(pd_addr); | ||
| 894 | vcn_v1_0_dec_vm_reg_write(ring, data0, data1); | ||
| 895 | |||
| 896 | data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; | ||
| 897 | data1 = lower_32_bits(pd_addr); | ||
| 898 | mask = 0xffffffff; | ||
| 899 | vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); | ||
| 900 | |||
| 901 | /* flush TLB */ | ||
| 902 | data0 = (hub->vm_inv_eng0_req + eng) << 2; | ||
| 903 | data1 = req; | ||
| 904 | vcn_v1_0_dec_vm_reg_write(ring, data0, data1); | ||
| 905 | |||
| 906 | /* wait for flush */ | ||
| 907 | data0 = (hub->vm_inv_eng0_ack + eng) << 2; | ||
| 908 | data1 = 1 << vm_id; | ||
| 909 | mask = 1 << vm_id; | ||
| 910 | vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); | ||
| 911 | } | ||
| 912 | |||
| 913 | /** | ||
| 914 | * vcn_v1_0_enc_ring_get_rptr - get enc read pointer | ||
| 915 | * | ||
| 916 | * @ring: amdgpu_ring pointer | ||
| 917 | * | ||
| 918 | * Returns the current hardware enc read pointer | ||
| 919 | */ | ||
| 920 | static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring) | ||
| 921 | { | ||
| 922 | struct amdgpu_device *adev = ring->adev; | ||
| 923 | |||
| 924 | if (ring == &adev->vcn.ring_enc[0]) | ||
| 925 | return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); | ||
| 926 | else | ||
| 927 | return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); | ||
| 928 | } | ||
| 929 | |||
| 930 | /** | ||
| 931 | * vcn_v1_0_enc_ring_get_wptr - get enc write pointer | ||
| 932 | * | ||
| 933 | * @ring: amdgpu_ring pointer | ||
| 934 | * | ||
| 935 | * Returns the current hardware enc write pointer | ||
| 936 | */ | ||
| 937 | static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring) | ||
| 938 | { | ||
| 939 | struct amdgpu_device *adev = ring->adev; | ||
| 940 | |||
| 941 | if (ring == &adev->vcn.ring_enc[0]) | ||
| 942 | return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); | ||
| 943 | else | ||
| 944 | return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); | ||
| 945 | } | ||
| 946 | |||
| 947 | /** | ||
| 948 | * vcn_v1_0_enc_ring_set_wptr - set enc write pointer | ||
| 949 | * | ||
| 950 | * @ring: amdgpu_ring pointer | ||
| 951 | * | ||
| 952 | * Commits the enc write pointer to the hardware | ||
| 953 | */ | ||
| 954 | static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) | ||
| 955 | { | ||
| 956 | struct amdgpu_device *adev = ring->adev; | ||
| 957 | |||
| 958 | if (ring == &adev->vcn.ring_enc[0]) | ||
| 959 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, | ||
| 960 | lower_32_bits(ring->wptr)); | ||
| 961 | else | ||
| 962 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, | ||
| 963 | lower_32_bits(ring->wptr)); | ||
| 964 | } | ||
| 965 | |||
| 966 | /** | ||
| 967 | * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command | ||
| 968 | * | ||
| 969 | * @ring: amdgpu_ring pointer | ||
| 970 | * @fence: fence to emit | ||
| 971 | * | ||
| 972 | * Write enc a fence and a trap command to the ring. | ||
| 973 | */ | ||
| 974 | static void vcn_v1_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | ||
| 975 | u64 seq, unsigned flags) | ||
| 976 | { | ||
| 977 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); | ||
| 978 | |||
| 979 | amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE); | ||
| 980 | amdgpu_ring_write(ring, addr); | ||
| 981 | amdgpu_ring_write(ring, upper_32_bits(addr)); | ||
| 982 | amdgpu_ring_write(ring, seq); | ||
| 983 | amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); | ||
| 984 | } | ||
| 985 | |||
| 986 | static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) | ||
| 987 | { | ||
| 988 | amdgpu_ring_write(ring, VCN_ENC_CMD_END); | ||
| 989 | } | ||
| 990 | |||
| 991 | /** | ||
| 992 | * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer | ||
| 993 | * | ||
| 994 | * @ring: amdgpu_ring pointer | ||
| 995 | * @ib: indirect buffer to execute | ||
| 996 | * | ||
| 997 | * Write enc ring commands to execute the indirect buffer | ||
| 998 | */ | ||
| 999 | static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, | ||
| 1000 | struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) | ||
| 1001 | { | ||
| 1002 | amdgpu_ring_write(ring, VCN_ENC_CMD_IB); | ||
| 1003 | amdgpu_ring_write(ring, vm_id); | ||
| 1004 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | ||
| 1005 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | ||
| 1006 | amdgpu_ring_write(ring, ib->length_dw); | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, | ||
| 1010 | unsigned int vm_id, uint64_t pd_addr) | ||
| 1011 | { | ||
| 1012 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; | ||
| 1013 | uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); | ||
| 1014 | unsigned eng = ring->vm_inv_eng; | ||
| 1015 | |||
| 1016 | pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); | ||
| 1017 | pd_addr |= AMDGPU_PTE_VALID; | ||
| 1018 | |||
| 1019 | amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); | ||
| 1020 | amdgpu_ring_write(ring, | ||
| 1021 | (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); | ||
| 1022 | amdgpu_ring_write(ring, upper_32_bits(pd_addr)); | ||
| 1023 | |||
| 1024 | amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); | ||
| 1025 | amdgpu_ring_write(ring, | ||
| 1026 | (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); | ||
| 1027 | amdgpu_ring_write(ring, lower_32_bits(pd_addr)); | ||
| 1028 | |||
| 1029 | amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); | ||
| 1030 | amdgpu_ring_write(ring, | ||
| 1031 | (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); | ||
| 1032 | amdgpu_ring_write(ring, 0xffffffff); | ||
| 1033 | amdgpu_ring_write(ring, lower_32_bits(pd_addr)); | ||
| 1034 | |||
| 1035 | /* flush TLB */ | ||
| 1036 | amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); | ||
| 1037 | amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); | ||
| 1038 | amdgpu_ring_write(ring, req); | ||
| 1039 | |||
| 1040 | /* wait for flush */ | ||
| 1041 | amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); | ||
| 1042 | amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); | ||
| 1043 | amdgpu_ring_write(ring, 1 << vm_id); | ||
| 1044 | amdgpu_ring_write(ring, 1 << vm_id); | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, | ||
| 1048 | struct amdgpu_irq_src *source, | ||
| 1049 | unsigned type, | ||
| 1050 | enum amdgpu_interrupt_state state) | ||
| 1051 | { | ||
| 1052 | return 0; | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, | ||
| 1056 | struct amdgpu_irq_src *source, | ||
| 1057 | struct amdgpu_iv_entry *entry) | ||
| 1058 | { | ||
| 1059 | DRM_DEBUG("IH: VCN TRAP\n"); | ||
| 1060 | |||
| 1061 | switch (entry->src_id) { | ||
| 1062 | case 124: | ||
| 1063 | amdgpu_fence_process(&adev->vcn.ring_dec); | ||
| 1064 | break; | ||
| 1065 | case 119: | ||
| 1066 | amdgpu_fence_process(&adev->vcn.ring_enc[0]); | ||
| 1067 | break; | ||
| 1068 | case 120: | ||
| 1069 | amdgpu_fence_process(&adev->vcn.ring_enc[1]); | ||
| 1070 | break; | ||
| 1071 | default: | ||
| 1072 | DRM_ERROR("Unhandled interrupt: %d %d\n", | ||
| 1073 | entry->src_id, entry->src_data[0]); | ||
| 1074 | break; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | return 0; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { | ||
| 1081 | .name = "vcn_v1_0", | ||
| 1082 | .early_init = vcn_v1_0_early_init, | ||
| 1083 | .late_init = NULL, | ||
| 1084 | .sw_init = vcn_v1_0_sw_init, | ||
| 1085 | .sw_fini = vcn_v1_0_sw_fini, | ||
| 1086 | .hw_init = vcn_v1_0_hw_init, | ||
| 1087 | .hw_fini = vcn_v1_0_hw_fini, | ||
| 1088 | .suspend = vcn_v1_0_suspend, | ||
| 1089 | .resume = vcn_v1_0_resume, | ||
| 1090 | .is_idle = NULL /* vcn_v1_0_is_idle */, | ||
| 1091 | .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, | ||
| 1092 | .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, | ||
| 1093 | .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, | ||
| 1094 | .soft_reset = NULL /* vcn_v1_0_soft_reset */, | ||
| 1095 | .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, | ||
| 1096 | .set_clockgating_state = vcn_v1_0_set_clockgating_state, | ||
| 1097 | .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, | ||
| 1098 | }; | ||
| 1099 | |||
| 1100 | static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { | ||
| 1101 | .type = AMDGPU_RING_TYPE_VCN_DEC, | ||
| 1102 | .align_mask = 0xf, | ||
| 1103 | .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), | ||
| 1104 | .support_64bit_ptrs = false, | ||
| 1105 | .vmhub = AMDGPU_MMHUB, | ||
| 1106 | .get_rptr = vcn_v1_0_dec_ring_get_rptr, | ||
| 1107 | .get_wptr = vcn_v1_0_dec_ring_get_wptr, | ||
| 1108 | .set_wptr = vcn_v1_0_dec_ring_set_wptr, | ||
| 1109 | .emit_frame_size = | ||
| 1110 | 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */ | ||
| 1111 | 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */ | ||
| 1112 | 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ | ||
| 1113 | 6, | ||
| 1114 | .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ | ||
| 1115 | .emit_ib = vcn_v1_0_dec_ring_emit_ib, | ||
| 1116 | .emit_fence = vcn_v1_0_dec_ring_emit_fence, | ||
| 1117 | .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, | ||
| 1118 | .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate, | ||
| 1119 | .test_ring = amdgpu_vcn_dec_ring_test_ring, | ||
| 1120 | .test_ib = amdgpu_vcn_dec_ring_test_ib, | ||
| 1121 | .insert_nop = amdgpu_ring_insert_nop, | ||
| 1122 | .insert_start = vcn_v1_0_dec_ring_insert_start, | ||
| 1123 | .insert_end = vcn_v1_0_dec_ring_insert_end, | ||
| 1124 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
| 1125 | .begin_use = amdgpu_vcn_ring_begin_use, | ||
| 1126 | .end_use = amdgpu_vcn_ring_end_use, | ||
| 1127 | }; | ||
| 1128 | |||
| 1129 | static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { | ||
| 1130 | .type = AMDGPU_RING_TYPE_VCN_ENC, | ||
| 1131 | .align_mask = 0x3f, | ||
| 1132 | .nop = VCN_ENC_CMD_NO_OP, | ||
| 1133 | .support_64bit_ptrs = false, | ||
| 1134 | .vmhub = AMDGPU_MMHUB, | ||
| 1135 | .get_rptr = vcn_v1_0_enc_ring_get_rptr, | ||
| 1136 | .get_wptr = vcn_v1_0_enc_ring_get_wptr, | ||
| 1137 | .set_wptr = vcn_v1_0_enc_ring_set_wptr, | ||
| 1138 | .emit_frame_size = | ||
| 1139 | 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */ | ||
| 1140 | 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */ | ||
| 1141 | 1, /* vcn_v1_0_enc_ring_insert_end */ | ||
| 1142 | .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ | ||
| 1143 | .emit_ib = vcn_v1_0_enc_ring_emit_ib, | ||
| 1144 | .emit_fence = vcn_v1_0_enc_ring_emit_fence, | ||
| 1145 | .emit_vm_flush = vcn_v1_0_enc_ring_emit_vm_flush, | ||
| 1146 | .test_ring = amdgpu_vcn_enc_ring_test_ring, | ||
| 1147 | .test_ib = amdgpu_vcn_enc_ring_test_ib, | ||
| 1148 | .insert_nop = amdgpu_ring_insert_nop, | ||
| 1149 | .insert_end = vcn_v1_0_enc_ring_insert_end, | ||
| 1150 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
| 1151 | .begin_use = amdgpu_vcn_ring_begin_use, | ||
| 1152 | .end_use = amdgpu_vcn_ring_end_use, | ||
| 1153 | }; | ||
| 1154 | |||
| 1155 | static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) | ||
| 1156 | { | ||
| 1157 | adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; | ||
| 1158 | DRM_INFO("VCN decode is enabled in VM mode\n"); | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) | ||
| 1162 | { | ||
| 1163 | int i; | ||
| 1164 | |||
| 1165 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) | ||
| 1166 | adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; | ||
| 1167 | |||
| 1168 | DRM_INFO("VCN encode is enabled in VM mode\n"); | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { | ||
| 1172 | .set = vcn_v1_0_set_interrupt_state, | ||
| 1173 | .process = vcn_v1_0_process_interrupt, | ||
| 1174 | }; | ||
| 1175 | |||
| 1176 | static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) | ||
| 1177 | { | ||
| 1178 | adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1; | ||
| 1179 | adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | const struct amdgpu_ip_block_version vcn_v1_0_ip_block = | ||
| 1183 | { | ||
| 1184 | .type = AMD_IP_BLOCK_TYPE_VCN, | ||
| 1185 | .major = 1, | ||
| 1186 | .minor = 0, | ||
| 1187 | .rev = 0, | ||
| 1188 | .funcs = &vcn_v1_0_ip_funcs, | ||
| 1189 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h new file mode 100644 index 000000000000..2a497a7a4840 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #ifndef __VCN_V1_0_H__ | ||
| 25 | #define __VCN_V1_0_H__ | ||
| 26 | |||
| 27 | extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block; | ||
| 28 | |||
| 29 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 071f56e439bb..56150e8d1ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include "drmP.h" | 23 | #include <drm/drmP.h> |
| 24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_ih.h" | 25 | #include "amdgpu_ih.h" |
| 26 | #include "soc15.h" | 26 | #include "soc15.h" |
| @@ -97,7 +97,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) | |||
| 97 | /* disable irqs */ | 97 | /* disable irqs */ |
| 98 | vega10_ih_disable_interrupts(adev); | 98 | vega10_ih_disable_interrupts(adev); |
| 99 | 99 | ||
| 100 | nbio_v6_1_ih_control(adev); | 100 | if (adev->flags & AMD_IS_APU) |
| 101 | nbio_v7_0_ih_control(adev); | ||
| 102 | else | ||
| 103 | nbio_v6_1_ih_control(adev); | ||
| 101 | 104 | ||
| 102 | ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); | 105 | ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); |
| 103 | /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ | 106 | /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ |
| @@ -148,7 +151,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) | |||
| 148 | ENABLE, 0); | 151 | ENABLE, 0); |
| 149 | } | 152 | } |
| 150 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); | 153 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); |
| 151 | nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); | 154 | if (adev->flags & AMD_IS_APU) |
| 155 | nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); | ||
| 156 | else | ||
| 157 | nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); | ||
| 152 | 158 | ||
| 153 | tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); | 159 | tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); |
| 154 | tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, | 160 | tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, |
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index b1132f5e84fc..6cac291c96da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
| 24 | #include "drmP.h" | 24 | #include <drm/drmP.h> |
| 25 | #include "amdgpu.h" | 25 | #include "amdgpu.h" |
| 26 | #include "amdgpu_atombios.h" | 26 | #include "amdgpu_atombios.h" |
| 27 | #include "amdgpu_ih.h" | 27 | #include "amdgpu_ih.h" |
| @@ -463,89 +463,83 @@ static void vi_detect_hw_virtualization(struct amdgpu_device *adev) | |||
| 463 | } | 463 | } |
| 464 | } | 464 | } |
| 465 | 465 | ||
| 466 | static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { | ||
| 467 | }; | ||
| 468 | |||
| 469 | static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { | ||
| 470 | }; | ||
| 471 | |||
| 472 | static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { | 466 | static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { |
| 473 | {mmGRBM_STATUS, false}, | 467 | {mmGRBM_STATUS}, |
| 474 | {mmGRBM_STATUS2, false}, | 468 | {mmGRBM_STATUS2}, |
| 475 | {mmGRBM_STATUS_SE0, false}, | 469 | {mmGRBM_STATUS_SE0}, |
| 476 | {mmGRBM_STATUS_SE1, false}, | 470 | {mmGRBM_STATUS_SE1}, |
| 477 | {mmGRBM_STATUS_SE2, false}, | 471 | {mmGRBM_STATUS_SE2}, |
| 478 | {mmGRBM_STATUS_SE3, false}, | 472 | {mmGRBM_STATUS_SE3}, |
| 479 | {mmSRBM_STATUS, false}, | 473 | {mmSRBM_STATUS}, |
| 480 | {mmSRBM_STATUS2, false}, | 474 | {mmSRBM_STATUS2}, |
| 481 | {mmSRBM_STATUS3, false}, | 475 | {mmSRBM_STATUS3}, |
| 482 | {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET, false}, | 476 | {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET}, |
| 483 | {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET, false}, | 477 | {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET}, |
| 484 | {mmCP_STAT, false}, | 478 | {mmCP_STAT}, |
| 485 | {mmCP_STALLED_STAT1, false}, | 479 | {mmCP_STALLED_STAT1}, |
| 486 | {mmCP_STALLED_STAT2, false}, | 480 | {mmCP_STALLED_STAT2}, |
| 487 | {mmCP_STALLED_STAT3, false}, | 481 | {mmCP_STALLED_STAT3}, |
| 488 | {mmCP_CPF_BUSY_STAT, false}, | 482 | {mmCP_CPF_BUSY_STAT}, |
| 489 | {mmCP_CPF_STALLED_STAT1, false}, | 483 | {mmCP_CPF_STALLED_STAT1}, |
| 490 | {mmCP_CPF_STATUS, false}, | 484 | {mmCP_CPF_STATUS}, |
| 491 | {mmCP_CPC_BUSY_STAT, false}, | 485 | {mmCP_CPC_BUSY_STAT}, |
| 492 | {mmCP_CPC_STALLED_STAT1, false}, | 486 | {mmCP_CPC_STALLED_STAT1}, |
| 493 | {mmCP_CPC_STATUS, false}, | 487 | {mmCP_CPC_STATUS}, |
| 494 | {mmGB_ADDR_CONFIG, false}, | 488 | {mmGB_ADDR_CONFIG}, |
| 495 | {mmMC_ARB_RAMCFG, false}, | 489 | {mmMC_ARB_RAMCFG}, |
| 496 | {mmGB_TILE_MODE0, false}, | 490 | {mmGB_TILE_MODE0}, |
| 497 | {mmGB_TILE_MODE1, false}, | 491 | {mmGB_TILE_MODE1}, |
| 498 | {mmGB_TILE_MODE2, false}, | 492 | {mmGB_TILE_MODE2}, |
| 499 | {mmGB_TILE_MODE3, false}, | 493 | {mmGB_TILE_MODE3}, |
| 500 | {mmGB_TILE_MODE4, false}, | 494 | {mmGB_TILE_MODE4}, |
| 501 | {mmGB_TILE_MODE5, false}, | 495 | {mmGB_TILE_MODE5}, |
| 502 | {mmGB_TILE_MODE6, false}, | 496 | {mmGB_TILE_MODE6}, |
| 503 | {mmGB_TILE_MODE7, false}, | 497 | {mmGB_TILE_MODE7}, |
| 504 | {mmGB_TILE_MODE8, false}, | 498 | {mmGB_TILE_MODE8}, |
| 505 | {mmGB_TILE_MODE9, false}, | 499 | {mmGB_TILE_MODE9}, |
| 506 | {mmGB_TILE_MODE10, false}, | 500 | {mmGB_TILE_MODE10}, |
| 507 | {mmGB_TILE_MODE11, false}, | 501 | {mmGB_TILE_MODE11}, |
| 508 | {mmGB_TILE_MODE12, false}, | 502 | {mmGB_TILE_MODE12}, |
| 509 | {mmGB_TILE_MODE13, false}, | 503 | {mmGB_TILE_MODE13}, |
| 510 | {mmGB_TILE_MODE14, false}, | 504 | {mmGB_TILE_MODE14}, |
| 511 | {mmGB_TILE_MODE15, false}, | 505 | {mmGB_TILE_MODE15}, |
| 512 | {mmGB_TILE_MODE16, false}, | 506 | {mmGB_TILE_MODE16}, |
| 513 | {mmGB_TILE_MODE17, false}, | 507 | {mmGB_TILE_MODE17}, |
| 514 | {mmGB_TILE_MODE18, false}, | 508 | {mmGB_TILE_MODE18}, |
| 515 | {mmGB_TILE_MODE19, false}, | 509 | {mmGB_TILE_MODE19}, |
| 516 | {mmGB_TILE_MODE20, false}, | 510 | {mmGB_TILE_MODE20}, |
| 517 | {mmGB_TILE_MODE21, false}, | 511 | {mmGB_TILE_MODE21}, |
| 518 | {mmGB_TILE_MODE22, false}, | 512 | {mmGB_TILE_MODE22}, |
| 519 | {mmGB_TILE_MODE23, false}, | 513 | {mmGB_TILE_MODE23}, |
| 520 | {mmGB_TILE_MODE24, false}, | 514 | {mmGB_TILE_MODE24}, |
| 521 | {mmGB_TILE_MODE25, false}, | 515 | {mmGB_TILE_MODE25}, |
| 522 | {mmGB_TILE_MODE26, false}, | 516 | {mmGB_TILE_MODE26}, |
| 523 | {mmGB_TILE_MODE27, false}, | 517 | {mmGB_TILE_MODE27}, |
| 524 | {mmGB_TILE_MODE28, false}, | 518 | {mmGB_TILE_MODE28}, |
| 525 | {mmGB_TILE_MODE29, false}, | 519 | {mmGB_TILE_MODE29}, |
| 526 | {mmGB_TILE_MODE30, false}, | 520 | {mmGB_TILE_MODE30}, |
| 527 | {mmGB_TILE_MODE31, false}, | 521 | {mmGB_TILE_MODE31}, |
| 528 | {mmGB_MACROTILE_MODE0, false}, | 522 | {mmGB_MACROTILE_MODE0}, |
| 529 | {mmGB_MACROTILE_MODE1, false}, | 523 | {mmGB_MACROTILE_MODE1}, |
| 530 | {mmGB_MACROTILE_MODE2, false}, | 524 | {mmGB_MACROTILE_MODE2}, |
| 531 | {mmGB_MACROTILE_MODE3, false}, | 525 | {mmGB_MACROTILE_MODE3}, |
| 532 | {mmGB_MACROTILE_MODE4, false}, | 526 | {mmGB_MACROTILE_MODE4}, |
| 533 | {mmGB_MACROTILE_MODE5, false}, | 527 | {mmGB_MACROTILE_MODE5}, |
| 534 | {mmGB_MACROTILE_MODE6, false}, | 528 | {mmGB_MACROTILE_MODE6}, |
| 535 | {mmGB_MACROTILE_MODE7, false}, | 529 | {mmGB_MACROTILE_MODE7}, |
| 536 | {mmGB_MACROTILE_MODE8, false}, | 530 | {mmGB_MACROTILE_MODE8}, |
| 537 | {mmGB_MACROTILE_MODE9, false}, | 531 | {mmGB_MACROTILE_MODE9}, |
| 538 | {mmGB_MACROTILE_MODE10, false}, | 532 | {mmGB_MACROTILE_MODE10}, |
| 539 | {mmGB_MACROTILE_MODE11, false}, | 533 | {mmGB_MACROTILE_MODE11}, |
| 540 | {mmGB_MACROTILE_MODE12, false}, | 534 | {mmGB_MACROTILE_MODE12}, |
| 541 | {mmGB_MACROTILE_MODE13, false}, | 535 | {mmGB_MACROTILE_MODE13}, |
| 542 | {mmGB_MACROTILE_MODE14, false}, | 536 | {mmGB_MACROTILE_MODE14}, |
| 543 | {mmGB_MACROTILE_MODE15, false}, | 537 | {mmGB_MACROTILE_MODE15}, |
| 544 | {mmCC_RB_BACKEND_DISABLE, false, true}, | 538 | {mmCC_RB_BACKEND_DISABLE, true}, |
| 545 | {mmGC_USER_RB_BACKEND_DISABLE, false, true}, | 539 | {mmGC_USER_RB_BACKEND_DISABLE, true}, |
| 546 | {mmGB_BACKEND_MAP, false, false}, | 540 | {mmGB_BACKEND_MAP, false}, |
| 547 | {mmPA_SC_RASTER_CONFIG, false, true}, | 541 | {mmPA_SC_RASTER_CONFIG, true}, |
| 548 | {mmPA_SC_RASTER_CONFIG_1, false, true}, | 542 | {mmPA_SC_RASTER_CONFIG_1, true}, |
| 549 | }; | 543 | }; |
| 550 | 544 | ||
| 551 | static uint32_t vi_get_register_value(struct amdgpu_device *adev, | 545 | static uint32_t vi_get_register_value(struct amdgpu_device *adev, |
| @@ -647,51 +641,17 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev, | |||
| 647 | static int vi_read_register(struct amdgpu_device *adev, u32 se_num, | 641 | static int vi_read_register(struct amdgpu_device *adev, u32 se_num, |
| 648 | u32 sh_num, u32 reg_offset, u32 *value) | 642 | u32 sh_num, u32 reg_offset, u32 *value) |
| 649 | { | 643 | { |
| 650 | const struct amdgpu_allowed_register_entry *asic_register_table = NULL; | 644 | uint32_t i; |
| 651 | const struct amdgpu_allowed_register_entry *asic_register_entry; | ||
| 652 | uint32_t size, i; | ||
| 653 | 645 | ||
| 654 | *value = 0; | 646 | *value = 0; |
| 655 | switch (adev->asic_type) { | ||
| 656 | case CHIP_TOPAZ: | ||
| 657 | asic_register_table = tonga_allowed_read_registers; | ||
| 658 | size = ARRAY_SIZE(tonga_allowed_read_registers); | ||
| 659 | break; | ||
| 660 | case CHIP_FIJI: | ||
| 661 | case CHIP_TONGA: | ||
| 662 | case CHIP_POLARIS11: | ||
| 663 | case CHIP_POLARIS10: | ||
| 664 | case CHIP_POLARIS12: | ||
| 665 | case CHIP_CARRIZO: | ||
| 666 | case CHIP_STONEY: | ||
| 667 | asic_register_table = cz_allowed_read_registers; | ||
| 668 | size = ARRAY_SIZE(cz_allowed_read_registers); | ||
| 669 | break; | ||
| 670 | default: | ||
| 671 | return -EINVAL; | ||
| 672 | } | ||
| 673 | |||
| 674 | if (asic_register_table) { | ||
| 675 | for (i = 0; i < size; i++) { | ||
| 676 | asic_register_entry = asic_register_table + i; | ||
| 677 | if (reg_offset != asic_register_entry->reg_offset) | ||
| 678 | continue; | ||
| 679 | if (!asic_register_entry->untouched) | ||
| 680 | *value = vi_get_register_value(adev, | ||
| 681 | asic_register_entry->grbm_indexed, | ||
| 682 | se_num, sh_num, reg_offset); | ||
| 683 | return 0; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | |||
| 687 | for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) { | 647 | for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) { |
| 648 | bool indexed = vi_allowed_read_registers[i].grbm_indexed; | ||
| 649 | |||
| 688 | if (reg_offset != vi_allowed_read_registers[i].reg_offset) | 650 | if (reg_offset != vi_allowed_read_registers[i].reg_offset) |
| 689 | continue; | 651 | continue; |
| 690 | 652 | ||
| 691 | if (!vi_allowed_read_registers[i].untouched) | 653 | *value = vi_get_register_value(adev, indexed, se_num, sh_num, |
| 692 | *value = vi_get_register_value(adev, | 654 | reg_offset); |
| 693 | vi_allowed_read_registers[i].grbm_indexed, | ||
| 694 | se_num, sh_num, reg_offset); | ||
| 695 | return 0; | 655 | return 0; |
| 696 | } | 656 | } |
| 697 | return -EINVAL; | 657 | return -EINVAL; |
| @@ -934,11 +894,6 @@ static int vi_common_early_init(void *handle) | |||
| 934 | (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) | 894 | (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) |
| 935 | smc_enabled = true; | 895 | smc_enabled = true; |
| 936 | 896 | ||
| 937 | if (amdgpu_sriov_vf(adev)) { | ||
| 938 | amdgpu_virt_init_setting(adev); | ||
| 939 | xgpu_vi_mailbox_set_irq_funcs(adev); | ||
| 940 | } | ||
| 941 | |||
| 942 | adev->rev_id = vi_get_rev_id(adev); | 897 | adev->rev_id = vi_get_rev_id(adev); |
| 943 | adev->external_rev_id = 0xFF; | 898 | adev->external_rev_id = 0xFF; |
| 944 | switch (adev->asic_type) { | 899 | switch (adev->asic_type) { |
| @@ -1073,7 +1028,7 @@ static int vi_common_early_init(void *handle) | |||
| 1073 | /* rev0 hardware requires workarounds to support PG */ | 1028 | /* rev0 hardware requires workarounds to support PG */ |
| 1074 | adev->pg_flags = 0; | 1029 | adev->pg_flags = 0; |
| 1075 | if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { | 1030 | if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { |
| 1076 | adev->pg_flags |= | 1031 | adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | |
| 1077 | AMD_PG_SUPPORT_GFX_SMG | | 1032 | AMD_PG_SUPPORT_GFX_SMG | |
| 1078 | AMD_PG_SUPPORT_GFX_PIPELINE | | 1033 | AMD_PG_SUPPORT_GFX_PIPELINE | |
| 1079 | AMD_PG_SUPPORT_CP | | 1034 | AMD_PG_SUPPORT_CP | |
| @@ -1111,6 +1066,11 @@ static int vi_common_early_init(void *handle) | |||
| 1111 | return -EINVAL; | 1066 | return -EINVAL; |
| 1112 | } | 1067 | } |
| 1113 | 1068 | ||
| 1069 | if (amdgpu_sriov_vf(adev)) { | ||
| 1070 | amdgpu_virt_init_setting(adev); | ||
| 1071 | xgpu_vi_mailbox_set_irq_funcs(adev); | ||
| 1072 | } | ||
| 1073 | |||
| 1114 | /* vi use smc load by default */ | 1074 | /* vi use smc load by default */ |
| 1115 | adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); | 1075 | adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); |
| 1116 | 1076 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 5f2ab9c1609a..a6485254a169 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h | |||
| @@ -361,6 +361,12 @@ | |||
| 361 | #define PACKET3_WAIT_ON_CE_COUNTER 0x86 | 361 | #define PACKET3_WAIT_ON_CE_COUNTER 0x86 |
| 362 | #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 | 362 | #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 |
| 363 | #define PACKET3_SWITCH_BUFFER 0x8B | 363 | #define PACKET3_SWITCH_BUFFER 0x8B |
| 364 | #define PACKET3_FRAME_CONTROL 0x90 | ||
| 365 | # define FRAME_CMD(x) ((x) << 28) | ||
| 366 | /* | ||
| 367 | * x=0: tmz_begin | ||
| 368 | * x=1: tmz_end | ||
| 369 | */ | ||
| 364 | #define PACKET3_SET_RESOURCES 0xA0 | 370 | #define PACKET3_SET_RESOURCES 0xA0 |
| 365 | /* 1. header | 371 | /* 1. header |
| 366 | * 2. CONTROL | 372 | * 2. CONTROL |
