diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2017-10-03 05:09:16 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2017-10-03 05:09:16 -0400 |
commit | 0d3c24e936feefeca854073ccb40613cd6eba9a9 (patch) | |
tree | 1f675397b924846740b0931b066ddce6f3d7eb3d /drivers/gpu/drm/amd/amdgpu | |
parent | 1af0838de60e723cb02253ecc9b555c30f8f6a6f (diff) | |
parent | ebec44a2456fbe5fe18aae88f6010f6878f0cb4a (diff) |
Merge airlied/drm-next into drm-misc-next
Just catching up with upstream.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
87 files changed, 3549 insertions, 1904 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 658bac0cdc5e..25a95c95df14 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
@@ -133,5 +133,3 @@ include $(FULL_AMD_PATH)/powerplay/Makefile | |||
133 | amdgpu-y += $(AMD_POWERPLAY_FILES) | 133 | amdgpu-y += $(AMD_POWERPLAY_FILES) |
134 | 134 | ||
135 | obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o | 135 | obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o |
136 | |||
137 | CFLAGS_amdgpu_trace_points.o := -I$(src) | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a5427cf4b19d..ebfc267467ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #include "amdgpu_uvd.h" | 65 | #include "amdgpu_uvd.h" |
66 | #include "amdgpu_vce.h" | 66 | #include "amdgpu_vce.h" |
67 | #include "amdgpu_vcn.h" | 67 | #include "amdgpu_vcn.h" |
68 | #include "amdgpu_mn.h" | ||
68 | 69 | ||
69 | #include "gpu_scheduler.h" | 70 | #include "gpu_scheduler.h" |
70 | #include "amdgpu_virt.h" | 71 | #include "amdgpu_virt.h" |
@@ -76,7 +77,7 @@ | |||
76 | extern int amdgpu_modeset; | 77 | extern int amdgpu_modeset; |
77 | extern int amdgpu_vram_limit; | 78 | extern int amdgpu_vram_limit; |
78 | extern int amdgpu_vis_vram_limit; | 79 | extern int amdgpu_vis_vram_limit; |
79 | extern unsigned amdgpu_gart_size; | 80 | extern int amdgpu_gart_size; |
80 | extern int amdgpu_gtt_size; | 81 | extern int amdgpu_gtt_size; |
81 | extern int amdgpu_moverate; | 82 | extern int amdgpu_moverate; |
82 | extern int amdgpu_benchmarking; | 83 | extern int amdgpu_benchmarking; |
@@ -91,11 +92,12 @@ extern int amdgpu_dpm; | |||
91 | extern int amdgpu_fw_load_type; | 92 | extern int amdgpu_fw_load_type; |
92 | extern int amdgpu_aspm; | 93 | extern int amdgpu_aspm; |
93 | extern int amdgpu_runtime_pm; | 94 | extern int amdgpu_runtime_pm; |
94 | extern unsigned amdgpu_ip_block_mask; | 95 | extern uint amdgpu_ip_block_mask; |
95 | extern int amdgpu_bapm; | 96 | extern int amdgpu_bapm; |
96 | extern int amdgpu_deep_color; | 97 | extern int amdgpu_deep_color; |
97 | extern int amdgpu_vm_size; | 98 | extern int amdgpu_vm_size; |
98 | extern int amdgpu_vm_block_size; | 99 | extern int amdgpu_vm_block_size; |
100 | extern int amdgpu_vm_fragment_size; | ||
99 | extern int amdgpu_vm_fault_stop; | 101 | extern int amdgpu_vm_fault_stop; |
100 | extern int amdgpu_vm_debug; | 102 | extern int amdgpu_vm_debug; |
101 | extern int amdgpu_vm_update_mode; | 103 | extern int amdgpu_vm_update_mode; |
@@ -103,14 +105,14 @@ extern int amdgpu_sched_jobs; | |||
103 | extern int amdgpu_sched_hw_submission; | 105 | extern int amdgpu_sched_hw_submission; |
104 | extern int amdgpu_no_evict; | 106 | extern int amdgpu_no_evict; |
105 | extern int amdgpu_direct_gma_size; | 107 | extern int amdgpu_direct_gma_size; |
106 | extern unsigned amdgpu_pcie_gen_cap; | 108 | extern uint amdgpu_pcie_gen_cap; |
107 | extern unsigned amdgpu_pcie_lane_cap; | 109 | extern uint amdgpu_pcie_lane_cap; |
108 | extern unsigned amdgpu_cg_mask; | 110 | extern uint amdgpu_cg_mask; |
109 | extern unsigned amdgpu_pg_mask; | 111 | extern uint amdgpu_pg_mask; |
110 | extern unsigned amdgpu_sdma_phase_quantum; | 112 | extern uint amdgpu_sdma_phase_quantum; |
111 | extern char *amdgpu_disable_cu; | 113 | extern char *amdgpu_disable_cu; |
112 | extern char *amdgpu_virtual_display; | 114 | extern char *amdgpu_virtual_display; |
113 | extern unsigned amdgpu_pp_feature_mask; | 115 | extern uint amdgpu_pp_feature_mask; |
114 | extern int amdgpu_vram_page_split; | 116 | extern int amdgpu_vram_page_split; |
115 | extern int amdgpu_ngg; | 117 | extern int amdgpu_ngg; |
116 | extern int amdgpu_prim_buf_per_se; | 118 | extern int amdgpu_prim_buf_per_se; |
@@ -177,6 +179,7 @@ struct amdgpu_cs_parser; | |||
177 | struct amdgpu_job; | 179 | struct amdgpu_job; |
178 | struct amdgpu_irq_src; | 180 | struct amdgpu_irq_src; |
179 | struct amdgpu_fpriv; | 181 | struct amdgpu_fpriv; |
182 | struct amdgpu_bo_va_mapping; | ||
180 | 183 | ||
181 | enum amdgpu_cp_irq { | 184 | enum amdgpu_cp_irq { |
182 | AMDGPU_CP_IRQ_GFX_EOP = 0, | 185 | AMDGPU_CP_IRQ_GFX_EOP = 0, |
@@ -291,14 +294,25 @@ struct amdgpu_buffer_funcs { | |||
291 | 294 | ||
292 | /* provided by hw blocks that can write ptes, e.g., sdma */ | 295 | /* provided by hw blocks that can write ptes, e.g., sdma */ |
293 | struct amdgpu_vm_pte_funcs { | 296 | struct amdgpu_vm_pte_funcs { |
297 | /* number of dw to reserve per operation */ | ||
298 | unsigned copy_pte_num_dw; | ||
299 | |||
294 | /* copy pte entries from GART */ | 300 | /* copy pte entries from GART */ |
295 | void (*copy_pte)(struct amdgpu_ib *ib, | 301 | void (*copy_pte)(struct amdgpu_ib *ib, |
296 | uint64_t pe, uint64_t src, | 302 | uint64_t pe, uint64_t src, |
297 | unsigned count); | 303 | unsigned count); |
304 | |||
298 | /* write pte one entry at a time with addr mapping */ | 305 | /* write pte one entry at a time with addr mapping */ |
299 | void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, | 306 | void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, |
300 | uint64_t value, unsigned count, | 307 | uint64_t value, unsigned count, |
301 | uint32_t incr); | 308 | uint32_t incr); |
309 | |||
310 | /* maximum nums of PTEs/PDEs in a single operation */ | ||
311 | uint32_t set_max_nums_pte_pde; | ||
312 | |||
313 | /* number of dw to reserve per operation */ | ||
314 | unsigned set_pte_pde_num_dw; | ||
315 | |||
302 | /* for linear pte/pde updates without addr mapping */ | 316 | /* for linear pte/pde updates without addr mapping */ |
303 | void (*set_pte_pde)(struct amdgpu_ib *ib, | 317 | void (*set_pte_pde)(struct amdgpu_ib *ib, |
304 | uint64_t pe, | 318 | uint64_t pe, |
@@ -331,6 +345,7 @@ struct amdgpu_gart_funcs { | |||
331 | struct amdgpu_ih_funcs { | 345 | struct amdgpu_ih_funcs { |
332 | /* ring read/write ptr handling, called from interrupt context */ | 346 | /* ring read/write ptr handling, called from interrupt context */ |
333 | u32 (*get_wptr)(struct amdgpu_device *adev); | 347 | u32 (*get_wptr)(struct amdgpu_device *adev); |
348 | bool (*prescreen_iv)(struct amdgpu_device *adev); | ||
334 | void (*decode_iv)(struct amdgpu_device *adev, | 349 | void (*decode_iv)(struct amdgpu_device *adev, |
335 | struct amdgpu_iv_entry *entry); | 350 | struct amdgpu_iv_entry *entry); |
336 | void (*set_rptr)(struct amdgpu_device *adev); | 351 | void (*set_rptr)(struct amdgpu_device *adev); |
@@ -398,6 +413,7 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); | |||
398 | struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); | 413 | struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); |
399 | void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); | 414 | void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); |
400 | void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); | 415 | void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); |
416 | int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); | ||
401 | int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); | 417 | int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); |
402 | 418 | ||
403 | /* sub-allocation manager, it has to be protected by another lock. | 419 | /* sub-allocation manager, it has to be protected by another lock. |
@@ -454,9 +470,10 @@ struct amdgpu_sa_bo { | |||
454 | */ | 470 | */ |
455 | void amdgpu_gem_force_release(struct amdgpu_device *adev); | 471 | void amdgpu_gem_force_release(struct amdgpu_device *adev); |
456 | int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, | 472 | int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, |
457 | int alignment, u32 initial_domain, | 473 | int alignment, u32 initial_domain, |
458 | u64 flags, bool kernel, | 474 | u64 flags, bool kernel, |
459 | struct drm_gem_object **obj); | 475 | struct reservation_object *resv, |
476 | struct drm_gem_object **obj); | ||
460 | 477 | ||
461 | int amdgpu_mode_dumb_create(struct drm_file *file_priv, | 478 | int amdgpu_mode_dumb_create(struct drm_file *file_priv, |
462 | struct drm_device *dev, | 479 | struct drm_device *dev, |
@@ -730,8 +747,8 @@ struct amdgpu_ctx_mgr { | |||
730 | struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); | 747 | struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); |
731 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); | 748 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); |
732 | 749 | ||
733 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | 750 | int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, |
734 | struct dma_fence *fence); | 751 | struct dma_fence *fence, uint64_t *seq); |
735 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | 752 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, |
736 | struct amdgpu_ring *ring, uint64_t seq); | 753 | struct amdgpu_ring *ring, uint64_t seq); |
737 | 754 | ||
@@ -748,6 +765,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); | |||
748 | struct amdgpu_fpriv { | 765 | struct amdgpu_fpriv { |
749 | struct amdgpu_vm vm; | 766 | struct amdgpu_vm vm; |
750 | struct amdgpu_bo_va *prt_va; | 767 | struct amdgpu_bo_va *prt_va; |
768 | struct amdgpu_bo_va *csa_va; | ||
751 | struct mutex bo_list_lock; | 769 | struct mutex bo_list_lock; |
752 | struct idr bo_list_handles; | 770 | struct idr bo_list_handles; |
753 | struct amdgpu_ctx_mgr ctx_mgr; | 771 | struct amdgpu_ctx_mgr ctx_mgr; |
@@ -1012,7 +1030,6 @@ struct amdgpu_gfx { | |||
1012 | /* reset mask */ | 1030 | /* reset mask */ |
1013 | uint32_t grbm_soft_reset; | 1031 | uint32_t grbm_soft_reset; |
1014 | uint32_t srbm_soft_reset; | 1032 | uint32_t srbm_soft_reset; |
1015 | bool in_reset; | ||
1016 | /* s3/s4 mask */ | 1033 | /* s3/s4 mask */ |
1017 | bool in_suspend; | 1034 | bool in_suspend; |
1018 | /* NGG */ | 1035 | /* NGG */ |
@@ -1054,6 +1071,7 @@ struct amdgpu_cs_parser { | |||
1054 | /* buffer objects */ | 1071 | /* buffer objects */ |
1055 | struct ww_acquire_ctx ticket; | 1072 | struct ww_acquire_ctx ticket; |
1056 | struct amdgpu_bo_list *bo_list; | 1073 | struct amdgpu_bo_list *bo_list; |
1074 | struct amdgpu_mn *mn; | ||
1057 | struct amdgpu_bo_list_entry vm_pd; | 1075 | struct amdgpu_bo_list_entry vm_pd; |
1058 | struct list_head validated; | 1076 | struct list_head validated; |
1059 | struct dma_fence *fence; | 1077 | struct dma_fence *fence; |
@@ -1181,6 +1199,9 @@ struct amdgpu_firmware { | |||
1181 | 1199 | ||
1182 | /* gpu info firmware data pointer */ | 1200 | /* gpu info firmware data pointer */ |
1183 | const struct firmware *gpu_info_fw; | 1201 | const struct firmware *gpu_info_fw; |
1202 | |||
1203 | void *fw_buf_ptr; | ||
1204 | uint64_t fw_buf_mc; | ||
1184 | }; | 1205 | }; |
1185 | 1206 | ||
1186 | /* | 1207 | /* |
@@ -1195,20 +1216,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); | |||
1195 | void amdgpu_test_moves(struct amdgpu_device *adev); | 1216 | void amdgpu_test_moves(struct amdgpu_device *adev); |
1196 | 1217 | ||
1197 | /* | 1218 | /* |
1198 | * MMU Notifier | ||
1199 | */ | ||
1200 | #if defined(CONFIG_MMU_NOTIFIER) | ||
1201 | int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); | ||
1202 | void amdgpu_mn_unregister(struct amdgpu_bo *bo); | ||
1203 | #else | ||
1204 | static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | ||
1205 | { | ||
1206 | return -ENODEV; | ||
1207 | } | ||
1208 | static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} | ||
1209 | #endif | ||
1210 | |||
1211 | /* | ||
1212 | * Debugfs | 1219 | * Debugfs |
1213 | */ | 1220 | */ |
1214 | struct amdgpu_debugfs { | 1221 | struct amdgpu_debugfs { |
@@ -1482,9 +1489,6 @@ struct amdgpu_device { | |||
1482 | struct amdgpu_mman mman; | 1489 | struct amdgpu_mman mman; |
1483 | struct amdgpu_vram_scratch vram_scratch; | 1490 | struct amdgpu_vram_scratch vram_scratch; |
1484 | struct amdgpu_wb wb; | 1491 | struct amdgpu_wb wb; |
1485 | atomic64_t vram_usage; | ||
1486 | atomic64_t vram_vis_usage; | ||
1487 | atomic64_t gtt_usage; | ||
1488 | atomic64_t num_bytes_moved; | 1492 | atomic64_t num_bytes_moved; |
1489 | atomic64_t num_evictions; | 1493 | atomic64_t num_evictions; |
1490 | atomic64_t num_vram_cpu_page_faults; | 1494 | atomic64_t num_vram_cpu_page_faults; |
@@ -1593,6 +1597,7 @@ struct amdgpu_device { | |||
1593 | 1597 | ||
1594 | /* record last mm index being written through WREG32*/ | 1598 | /* record last mm index being written through WREG32*/ |
1595 | unsigned long last_mm_index; | 1599 | unsigned long last_mm_index; |
1600 | bool in_sriov_reset; | ||
1596 | }; | 1601 | }; |
1597 | 1602 | ||
1598 | static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) | 1603 | static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) |
@@ -1760,6 +1765,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
1760 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) | 1765 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) |
1761 | #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) | 1766 | #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) |
1762 | #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) | 1767 | #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) |
1768 | #define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev)) | ||
1763 | #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) | 1769 | #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) |
1764 | #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) | 1770 | #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) |
1765 | #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) | 1771 | #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) |
@@ -1792,18 +1798,6 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, | |||
1792 | u64 num_vis_bytes); | 1798 | u64 num_vis_bytes); |
1793 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); | 1799 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); |
1794 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); | 1800 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); |
1795 | int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); | ||
1796 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | ||
1797 | uint32_t flags); | ||
1798 | bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); | ||
1799 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); | ||
1800 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | ||
1801 | unsigned long end); | ||
1802 | bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, | ||
1803 | int *last_invalidated); | ||
1804 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); | ||
1805 | uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, | ||
1806 | struct ttm_mem_reg *mem); | ||
1807 | void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); | 1801 | void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); |
1808 | void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); | 1802 | void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); |
1809 | void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); | 1803 | void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); |
@@ -1886,10 +1880,9 @@ static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } | |||
1886 | static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } | 1880 | static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } |
1887 | #endif | 1881 | #endif |
1888 | 1882 | ||
1889 | struct amdgpu_bo_va_mapping * | 1883 | int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, |
1890 | amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | 1884 | uint64_t addr, struct amdgpu_bo **bo, |
1891 | uint64_t addr, struct amdgpu_bo **bo); | 1885 | struct amdgpu_bo_va_mapping **mapping); |
1892 | int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser); | ||
1893 | 1886 | ||
1894 | #include "amdgpu_object.h" | 1887 | #include "amdgpu_object.h" |
1895 | #endif | 1888 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c7bcf5207d79..5432af39a674 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
@@ -28,14 +28,14 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | 29 | ||
30 | const struct kgd2kfd_calls *kgd2kfd; | 30 | const struct kgd2kfd_calls *kgd2kfd; |
31 | bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); | 31 | bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); |
32 | 32 | ||
33 | int amdgpu_amdkfd_init(void) | 33 | int amdgpu_amdkfd_init(void) |
34 | { | 34 | { |
35 | int ret; | 35 | int ret; |
36 | 36 | ||
37 | #if defined(CONFIG_HSA_AMD_MODULE) | 37 | #if defined(CONFIG_HSA_AMD_MODULE) |
38 | int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); | 38 | int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); |
39 | 39 | ||
40 | kgd2kfd_init_p = symbol_request(kgd2kfd_init); | 40 | kgd2kfd_init_p = symbol_request(kgd2kfd_init); |
41 | 41 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b8802a561cbd..8d689ab7e429 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | |||
@@ -26,6 +26,7 @@ | |||
26 | #define AMDGPU_AMDKFD_H_INCLUDED | 26 | #define AMDGPU_AMDKFD_H_INCLUDED |
27 | 27 | ||
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/mmu_context.h> | ||
29 | #include <kgd_kfd_interface.h> | 30 | #include <kgd_kfd_interface.h> |
30 | 31 | ||
31 | struct amdgpu_device; | 32 | struct amdgpu_device; |
@@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); | |||
60 | 61 | ||
61 | uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); | 62 | uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); |
62 | 63 | ||
64 | #define read_user_wptr(mmptr, wptr, dst) \ | ||
65 | ({ \ | ||
66 | bool valid = false; \ | ||
67 | if ((mmptr) && (wptr)) { \ | ||
68 | if ((mmptr) == current->mm) { \ | ||
69 | valid = !get_user((dst), (wptr)); \ | ||
70 | } else if (current->mm == NULL) { \ | ||
71 | use_mm(mmptr); \ | ||
72 | valid = !get_user((dst), (wptr)); \ | ||
73 | unuse_mm(mmptr); \ | ||
74 | } \ | ||
75 | } \ | ||
76 | valid; \ | ||
77 | }) | ||
78 | |||
63 | #endif /* AMDGPU_AMDKFD_H_INCLUDED */ | 79 | #endif /* AMDGPU_AMDKFD_H_INCLUDED */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 5254562fd0f9..dc7e25cce741 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
@@ -39,6 +39,12 @@ | |||
39 | #include "gmc/gmc_7_1_sh_mask.h" | 39 | #include "gmc/gmc_7_1_sh_mask.h" |
40 | #include "cik_structs.h" | 40 | #include "cik_structs.h" |
41 | 41 | ||
42 | enum hqd_dequeue_request_type { | ||
43 | NO_ACTION = 0, | ||
44 | DRAIN_PIPE, | ||
45 | RESET_WAVES | ||
46 | }; | ||
47 | |||
42 | enum { | 48 | enum { |
43 | MAX_TRAPID = 8, /* 3 bits in the bitfield. */ | 49 | MAX_TRAPID = 8, /* 3 bits in the bitfield. */ |
44 | MAX_WATCH_ADDRESSES = 4 | 50 | MAX_WATCH_ADDRESSES = 4 |
@@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | |||
96 | uint32_t hpd_size, uint64_t hpd_gpu_addr); | 102 | uint32_t hpd_size, uint64_t hpd_gpu_addr); |
97 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | 103 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); |
98 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | 104 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
99 | uint32_t queue_id, uint32_t __user *wptr); | 105 | uint32_t queue_id, uint32_t __user *wptr, |
106 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
107 | struct mm_struct *mm); | ||
100 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); | 108 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); |
101 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | 109 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
102 | uint32_t pipe_id, uint32_t queue_id); | 110 | uint32_t pipe_id, uint32_t queue_id); |
103 | 111 | ||
104 | static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, | 112 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
113 | enum kfd_preempt_type reset_type, | ||
105 | unsigned int utimeout, uint32_t pipe_id, | 114 | unsigned int utimeout, uint32_t pipe_id, |
106 | uint32_t queue_id); | 115 | uint32_t queue_id); |
107 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | 116 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); |
@@ -126,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |||
126 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); | 135 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); |
127 | 136 | ||
128 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | 137 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); |
138 | static void set_scratch_backing_va(struct kgd_dev *kgd, | ||
139 | uint64_t va, uint32_t vmid); | ||
140 | |||
141 | /* Because of REG_GET_FIELD() being used, we put this function in the | ||
142 | * asic specific file. | ||
143 | */ | ||
144 | static int get_tile_config(struct kgd_dev *kgd, | ||
145 | struct tile_config *config) | ||
146 | { | ||
147 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | ||
148 | |||
149 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | ||
150 | config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, | ||
151 | MC_ARB_RAMCFG, NOOFBANK); | ||
152 | config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, | ||
153 | MC_ARB_RAMCFG, NOOFRANKS); | ||
154 | |||
155 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | ||
156 | config->num_tile_configs = | ||
157 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | ||
158 | config->macro_tile_config_ptr = | ||
159 | adev->gfx.config.macrotile_mode_array; | ||
160 | config->num_macro_tile_configs = | ||
161 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | ||
162 | |||
163 | return 0; | ||
164 | } | ||
129 | 165 | ||
130 | static const struct kfd2kgd_calls kfd2kgd = { | 166 | static const struct kfd2kgd_calls kfd2kgd = { |
131 | .init_gtt_mem_allocation = alloc_gtt_mem, | 167 | .init_gtt_mem_allocation = alloc_gtt_mem, |
@@ -133,6 +169,8 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
133 | .get_vmem_size = get_vmem_size, | 169 | .get_vmem_size = get_vmem_size, |
134 | .get_gpu_clock_counter = get_gpu_clock_counter, | 170 | .get_gpu_clock_counter = get_gpu_clock_counter, |
135 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | 171 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, |
172 | .alloc_pasid = amdgpu_vm_alloc_pasid, | ||
173 | .free_pasid = amdgpu_vm_free_pasid, | ||
136 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 174 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
137 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 175 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
138 | .init_pipeline = kgd_init_pipeline, | 176 | .init_pipeline = kgd_init_pipeline, |
@@ -150,7 +188,9 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
150 | .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, | 188 | .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, |
151 | .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, | 189 | .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, |
152 | .write_vmid_invalidate_request = write_vmid_invalidate_request, | 190 | .write_vmid_invalidate_request = write_vmid_invalidate_request, |
153 | .get_fw_version = get_fw_version | 191 | .get_fw_version = get_fw_version, |
192 | .set_scratch_backing_va = set_scratch_backing_va, | ||
193 | .get_tile_config = get_tile_config, | ||
154 | }; | 194 | }; |
155 | 195 | ||
156 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) | 196 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) |
@@ -186,7 +226,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | |||
186 | { | 226 | { |
187 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 227 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
188 | 228 | ||
189 | uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | 229 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
190 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | 230 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
191 | 231 | ||
192 | lock_srbm(kgd, mec, pipe, queue_id, 0); | 232 | lock_srbm(kgd, mec, pipe, queue_id, 0); |
@@ -290,20 +330,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) | |||
290 | } | 330 | } |
291 | 331 | ||
292 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | 332 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
293 | uint32_t queue_id, uint32_t __user *wptr) | 333 | uint32_t queue_id, uint32_t __user *wptr, |
334 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
335 | struct mm_struct *mm) | ||
294 | { | 336 | { |
295 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 337 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
296 | uint32_t wptr_shadow, is_wptr_shadow_valid; | ||
297 | struct cik_mqd *m; | 338 | struct cik_mqd *m; |
339 | uint32_t *mqd_hqd; | ||
340 | uint32_t reg, wptr_val, data; | ||
298 | 341 | ||
299 | m = get_mqd(mqd); | 342 | m = get_mqd(mqd); |
300 | 343 | ||
301 | is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); | ||
302 | if (is_wptr_shadow_valid) | ||
303 | m->cp_hqd_pq_wptr = wptr_shadow; | ||
304 | |||
305 | acquire_queue(kgd, pipe_id, queue_id); | 344 | acquire_queue(kgd, pipe_id, queue_id); |
306 | gfx_v7_0_mqd_commit(adev, m); | 345 | |
346 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ | ||
347 | mqd_hqd = &m->cp_mqd_base_addr_lo; | ||
348 | |||
349 | for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) | ||
350 | WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); | ||
351 | |||
352 | /* Copy userspace write pointer value to register. | ||
353 | * Activate doorbell logic to monitor subsequent changes. | ||
354 | */ | ||
355 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | ||
356 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
357 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); | ||
358 | |||
359 | if (read_user_wptr(mm, wptr, wptr_val)) | ||
360 | WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); | ||
361 | |||
362 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | ||
363 | WREG32(mmCP_HQD_ACTIVE, data); | ||
364 | |||
307 | release_queue(kgd); | 365 | release_queue(kgd); |
308 | 366 | ||
309 | return 0; | 367 | return 0; |
@@ -382,30 +440,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | |||
382 | return false; | 440 | return false; |
383 | } | 441 | } |
384 | 442 | ||
385 | static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, | 443 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
444 | enum kfd_preempt_type reset_type, | ||
386 | unsigned int utimeout, uint32_t pipe_id, | 445 | unsigned int utimeout, uint32_t pipe_id, |
387 | uint32_t queue_id) | 446 | uint32_t queue_id) |
388 | { | 447 | { |
389 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 448 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
390 | uint32_t temp; | 449 | uint32_t temp; |
391 | int timeout = utimeout; | 450 | enum hqd_dequeue_request_type type; |
451 | unsigned long flags, end_jiffies; | ||
452 | int retry; | ||
392 | 453 | ||
393 | acquire_queue(kgd, pipe_id, queue_id); | 454 | acquire_queue(kgd, pipe_id, queue_id); |
394 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); | 455 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); |
395 | 456 | ||
396 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); | 457 | switch (reset_type) { |
458 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | ||
459 | type = DRAIN_PIPE; | ||
460 | break; | ||
461 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | ||
462 | type = RESET_WAVES; | ||
463 | break; | ||
464 | default: | ||
465 | type = DRAIN_PIPE; | ||
466 | break; | ||
467 | } | ||
468 | |||
469 | /* Workaround: If IQ timer is active and the wait time is close to or | ||
470 | * equal to 0, dequeueing is not safe. Wait until either the wait time | ||
471 | * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is | ||
472 | * cleared before continuing. Also, ensure wait times are set to at | ||
473 | * least 0x3. | ||
474 | */ | ||
475 | local_irq_save(flags); | ||
476 | preempt_disable(); | ||
477 | retry = 5000; /* wait for 500 usecs at maximum */ | ||
478 | while (true) { | ||
479 | temp = RREG32(mmCP_HQD_IQ_TIMER); | ||
480 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { | ||
481 | pr_debug("HW is processing IQ\n"); | ||
482 | goto loop; | ||
483 | } | ||
484 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { | ||
485 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) | ||
486 | == 3) /* SEM-rearm is safe */ | ||
487 | break; | ||
488 | /* Wait time 3 is safe for CP, but our MMIO read/write | ||
489 | * time is close to 1 microsecond, so check for 10 to | ||
490 | * leave more buffer room | ||
491 | */ | ||
492 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) | ||
493 | >= 10) | ||
494 | break; | ||
495 | pr_debug("IQ timer is active\n"); | ||
496 | } else | ||
497 | break; | ||
498 | loop: | ||
499 | if (!retry) { | ||
500 | pr_err("CP HQD IQ timer status time out\n"); | ||
501 | break; | ||
502 | } | ||
503 | ndelay(100); | ||
504 | --retry; | ||
505 | } | ||
506 | retry = 1000; | ||
507 | while (true) { | ||
508 | temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); | ||
509 | if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) | ||
510 | break; | ||
511 | pr_debug("Dequeue request is pending\n"); | ||
397 | 512 | ||
513 | if (!retry) { | ||
514 | pr_err("CP HQD dequeue request time out\n"); | ||
515 | break; | ||
516 | } | ||
517 | ndelay(100); | ||
518 | --retry; | ||
519 | } | ||
520 | local_irq_restore(flags); | ||
521 | preempt_enable(); | ||
522 | |||
523 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); | ||
524 | |||
525 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | ||
398 | while (true) { | 526 | while (true) { |
399 | temp = RREG32(mmCP_HQD_ACTIVE); | 527 | temp = RREG32(mmCP_HQD_ACTIVE); |
400 | if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) | 528 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) |
401 | break; | 529 | break; |
402 | if (timeout <= 0) { | 530 | if (time_after(jiffies, end_jiffies)) { |
403 | pr_err("kfd: cp queue preemption time out.\n"); | 531 | pr_err("cp queue preemption time out\n"); |
404 | release_queue(kgd); | 532 | release_queue(kgd); |
405 | return -ETIME; | 533 | return -ETIME; |
406 | } | 534 | } |
407 | msleep(20); | 535 | usleep_range(500, 1000); |
408 | timeout -= 20; | ||
409 | } | 536 | } |
410 | 537 | ||
411 | release_queue(kgd); | 538 | release_queue(kgd); |
@@ -556,6 +683,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | |||
556 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | 683 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); |
557 | } | 684 | } |
558 | 685 | ||
686 | static void set_scratch_backing_va(struct kgd_dev *kgd, | ||
687 | uint64_t va, uint32_t vmid) | ||
688 | { | ||
689 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
690 | |||
691 | lock_srbm(kgd, 0, 0, 0, vmid); | ||
692 | WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); | ||
693 | unlock_srbm(kgd); | ||
694 | } | ||
695 | |||
559 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | 696 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) |
560 | { | 697 | { |
561 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 698 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
@@ -566,42 +703,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |||
566 | switch (type) { | 703 | switch (type) { |
567 | case KGD_ENGINE_PFP: | 704 | case KGD_ENGINE_PFP: |
568 | hdr = (const union amdgpu_firmware_header *) | 705 | hdr = (const union amdgpu_firmware_header *) |
569 | adev->gfx.pfp_fw->data; | 706 | adev->gfx.pfp_fw->data; |
570 | break; | 707 | break; |
571 | 708 | ||
572 | case KGD_ENGINE_ME: | 709 | case KGD_ENGINE_ME: |
573 | hdr = (const union amdgpu_firmware_header *) | 710 | hdr = (const union amdgpu_firmware_header *) |
574 | adev->gfx.me_fw->data; | 711 | adev->gfx.me_fw->data; |
575 | break; | 712 | break; |
576 | 713 | ||
577 | case KGD_ENGINE_CE: | 714 | case KGD_ENGINE_CE: |
578 | hdr = (const union amdgpu_firmware_header *) | 715 | hdr = (const union amdgpu_firmware_header *) |
579 | adev->gfx.ce_fw->data; | 716 | adev->gfx.ce_fw->data; |
580 | break; | 717 | break; |
581 | 718 | ||
582 | case KGD_ENGINE_MEC1: | 719 | case KGD_ENGINE_MEC1: |
583 | hdr = (const union amdgpu_firmware_header *) | 720 | hdr = (const union amdgpu_firmware_header *) |
584 | adev->gfx.mec_fw->data; | 721 | adev->gfx.mec_fw->data; |
585 | break; | 722 | break; |
586 | 723 | ||
587 | case KGD_ENGINE_MEC2: | 724 | case KGD_ENGINE_MEC2: |
588 | hdr = (const union amdgpu_firmware_header *) | 725 | hdr = (const union amdgpu_firmware_header *) |
589 | adev->gfx.mec2_fw->data; | 726 | adev->gfx.mec2_fw->data; |
590 | break; | 727 | break; |
591 | 728 | ||
592 | case KGD_ENGINE_RLC: | 729 | case KGD_ENGINE_RLC: |
593 | hdr = (const union amdgpu_firmware_header *) | 730 | hdr = (const union amdgpu_firmware_header *) |
594 | adev->gfx.rlc_fw->data; | 731 | adev->gfx.rlc_fw->data; |
595 | break; | 732 | break; |
596 | 733 | ||
597 | case KGD_ENGINE_SDMA1: | 734 | case KGD_ENGINE_SDMA1: |
598 | hdr = (const union amdgpu_firmware_header *) | 735 | hdr = (const union amdgpu_firmware_header *) |
599 | adev->sdma.instance[0].fw->data; | 736 | adev->sdma.instance[0].fw->data; |
600 | break; | 737 | break; |
601 | 738 | ||
602 | case KGD_ENGINE_SDMA2: | 739 | case KGD_ENGINE_SDMA2: |
603 | hdr = (const union amdgpu_firmware_header *) | 740 | hdr = (const union amdgpu_firmware_header *) |
604 | adev->sdma.instance[1].fw->data; | 741 | adev->sdma.instance[1].fw->data; |
605 | break; | 742 | break; |
606 | 743 | ||
607 | default: | 744 | default: |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 133d06671e46..c678c69936a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | |||
@@ -39,6 +39,12 @@ | |||
39 | #include "vi_structs.h" | 39 | #include "vi_structs.h" |
40 | #include "vid.h" | 40 | #include "vid.h" |
41 | 41 | ||
42 | enum hqd_dequeue_request_type { | ||
43 | NO_ACTION = 0, | ||
44 | DRAIN_PIPE, | ||
45 | RESET_WAVES | ||
46 | }; | ||
47 | |||
42 | struct cik_sdma_rlc_registers; | 48 | struct cik_sdma_rlc_registers; |
43 | 49 | ||
44 | /* | 50 | /* |
@@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | |||
55 | uint32_t hpd_size, uint64_t hpd_gpu_addr); | 61 | uint32_t hpd_size, uint64_t hpd_gpu_addr); |
56 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | 62 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); |
57 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | 63 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
58 | uint32_t queue_id, uint32_t __user *wptr); | 64 | uint32_t queue_id, uint32_t __user *wptr, |
65 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
66 | struct mm_struct *mm); | ||
59 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); | 67 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); |
60 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | 68 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
61 | uint32_t pipe_id, uint32_t queue_id); | 69 | uint32_t pipe_id, uint32_t queue_id); |
62 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | 70 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); |
63 | static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, | 71 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
72 | enum kfd_preempt_type reset_type, | ||
64 | unsigned int utimeout, uint32_t pipe_id, | 73 | unsigned int utimeout, uint32_t pipe_id, |
65 | uint32_t queue_id); | 74 | uint32_t queue_id); |
66 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | 75 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, |
@@ -85,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |||
85 | uint8_t vmid); | 94 | uint8_t vmid); |
86 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); | 95 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); |
87 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | 96 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); |
97 | static void set_scratch_backing_va(struct kgd_dev *kgd, | ||
98 | uint64_t va, uint32_t vmid); | ||
99 | |||
100 | /* Because of REG_GET_FIELD() being used, we put this function in the | ||
101 | * asic specific file. | ||
102 | */ | ||
103 | static int get_tile_config(struct kgd_dev *kgd, | ||
104 | struct tile_config *config) | ||
105 | { | ||
106 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | ||
107 | |||
108 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | ||
109 | config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, | ||
110 | MC_ARB_RAMCFG, NOOFBANK); | ||
111 | config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, | ||
112 | MC_ARB_RAMCFG, NOOFRANKS); | ||
113 | |||
114 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | ||
115 | config->num_tile_configs = | ||
116 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | ||
117 | config->macro_tile_config_ptr = | ||
118 | adev->gfx.config.macrotile_mode_array; | ||
119 | config->num_macro_tile_configs = | ||
120 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
88 | 124 | ||
89 | static const struct kfd2kgd_calls kfd2kgd = { | 125 | static const struct kfd2kgd_calls kfd2kgd = { |
90 | .init_gtt_mem_allocation = alloc_gtt_mem, | 126 | .init_gtt_mem_allocation = alloc_gtt_mem, |
@@ -92,6 +128,8 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
92 | .get_vmem_size = get_vmem_size, | 128 | .get_vmem_size = get_vmem_size, |
93 | .get_gpu_clock_counter = get_gpu_clock_counter, | 129 | .get_gpu_clock_counter = get_gpu_clock_counter, |
94 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | 130 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, |
131 | .alloc_pasid = amdgpu_vm_alloc_pasid, | ||
132 | .free_pasid = amdgpu_vm_free_pasid, | ||
95 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 133 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
96 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 134 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
97 | .init_pipeline = kgd_init_pipeline, | 135 | .init_pipeline = kgd_init_pipeline, |
@@ -111,7 +149,9 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
111 | .get_atc_vmid_pasid_mapping_valid = | 149 | .get_atc_vmid_pasid_mapping_valid = |
112 | get_atc_vmid_pasid_mapping_valid, | 150 | get_atc_vmid_pasid_mapping_valid, |
113 | .write_vmid_invalidate_request = write_vmid_invalidate_request, | 151 | .write_vmid_invalidate_request = write_vmid_invalidate_request, |
114 | .get_fw_version = get_fw_version | 152 | .get_fw_version = get_fw_version, |
153 | .set_scratch_backing_va = set_scratch_backing_va, | ||
154 | .get_tile_config = get_tile_config, | ||
115 | }; | 155 | }; |
116 | 156 | ||
117 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) | 157 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) |
@@ -147,7 +187,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | |||
147 | { | 187 | { |
148 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 188 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
149 | 189 | ||
150 | uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | 190 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
151 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | 191 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
152 | 192 | ||
153 | lock_srbm(kgd, mec, pipe, queue_id, 0); | 193 | lock_srbm(kgd, mec, pipe, queue_id, 0); |
@@ -216,7 +256,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | |||
216 | uint32_t mec; | 256 | uint32_t mec; |
217 | uint32_t pipe; | 257 | uint32_t pipe; |
218 | 258 | ||
219 | mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | 259 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
220 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | 260 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
221 | 261 | ||
222 | lock_srbm(kgd, mec, pipe, 0, 0); | 262 | lock_srbm(kgd, mec, pipe, 0, 0); |
@@ -244,20 +284,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) | |||
244 | } | 284 | } |
245 | 285 | ||
246 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | 286 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
247 | uint32_t queue_id, uint32_t __user *wptr) | 287 | uint32_t queue_id, uint32_t __user *wptr, |
288 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
289 | struct mm_struct *mm) | ||
248 | { | 290 | { |
249 | struct vi_mqd *m; | ||
250 | uint32_t shadow_wptr, valid_wptr; | ||
251 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 291 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
292 | struct vi_mqd *m; | ||
293 | uint32_t *mqd_hqd; | ||
294 | uint32_t reg, wptr_val, data; | ||
252 | 295 | ||
253 | m = get_mqd(mqd); | 296 | m = get_mqd(mqd); |
254 | 297 | ||
255 | valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); | ||
256 | if (valid_wptr == 0) | ||
257 | m->cp_hqd_pq_wptr = shadow_wptr; | ||
258 | |||
259 | acquire_queue(kgd, pipe_id, queue_id); | 298 | acquire_queue(kgd, pipe_id, queue_id); |
260 | gfx_v8_0_mqd_commit(adev, mqd); | 299 | |
300 | /* HIQ is set during driver init period with vmid set to 0*/ | ||
301 | if (m->cp_hqd_vmid == 0) { | ||
302 | uint32_t value, mec, pipe; | ||
303 | |||
304 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | ||
305 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | ||
306 | |||
307 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | ||
308 | mec, pipe, queue_id); | ||
309 | value = RREG32(mmRLC_CP_SCHEDULERS); | ||
310 | value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, | ||
311 | ((mec << 5) | (pipe << 3) | queue_id | 0x80)); | ||
312 | WREG32(mmRLC_CP_SCHEDULERS, value); | ||
313 | } | ||
314 | |||
315 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ | ||
316 | mqd_hqd = &m->cp_mqd_base_addr_lo; | ||
317 | |||
318 | for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) | ||
319 | WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); | ||
320 | |||
321 | /* Tonga errata: EOP RPTR/WPTR should be left unmodified. | ||
322 | * This is safe since EOP RPTR==WPTR for any inactive HQD | ||
323 | * on ASICs that do not support context-save. | ||
324 | * EOP writes/reads can start anywhere in the ring. | ||
325 | */ | ||
326 | if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { | ||
327 | WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); | ||
328 | WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); | ||
329 | WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); | ||
330 | } | ||
331 | |||
332 | for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) | ||
333 | WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); | ||
334 | |||
335 | /* Copy userspace write pointer value to register. | ||
336 | * Activate doorbell logic to monitor subsequent changes. | ||
337 | */ | ||
338 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | ||
339 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
340 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); | ||
341 | |||
342 | if (read_user_wptr(mm, wptr, wptr_val)) | ||
343 | WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); | ||
344 | |||
345 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | ||
346 | WREG32(mmCP_HQD_ACTIVE, data); | ||
347 | |||
261 | release_queue(kgd); | 348 | release_queue(kgd); |
262 | 349 | ||
263 | return 0; | 350 | return 0; |
@@ -308,29 +395,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | |||
308 | return false; | 395 | return false; |
309 | } | 396 | } |
310 | 397 | ||
311 | static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, | 398 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
399 | enum kfd_preempt_type reset_type, | ||
312 | unsigned int utimeout, uint32_t pipe_id, | 400 | unsigned int utimeout, uint32_t pipe_id, |
313 | uint32_t queue_id) | 401 | uint32_t queue_id) |
314 | { | 402 | { |
315 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 403 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
316 | uint32_t temp; | 404 | uint32_t temp; |
317 | int timeout = utimeout; | 405 | enum hqd_dequeue_request_type type; |
406 | unsigned long flags, end_jiffies; | ||
407 | int retry; | ||
408 | struct vi_mqd *m = get_mqd(mqd); | ||
318 | 409 | ||
319 | acquire_queue(kgd, pipe_id, queue_id); | 410 | acquire_queue(kgd, pipe_id, queue_id); |
320 | 411 | ||
321 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); | 412 | if (m->cp_hqd_vmid == 0) |
413 | WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); | ||
322 | 414 | ||
415 | switch (reset_type) { | ||
416 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | ||
417 | type = DRAIN_PIPE; | ||
418 | break; | ||
419 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | ||
420 | type = RESET_WAVES; | ||
421 | break; | ||
422 | default: | ||
423 | type = DRAIN_PIPE; | ||
424 | break; | ||
425 | } | ||
426 | |||
427 | /* Workaround: If IQ timer is active and the wait time is close to or | ||
428 | * equal to 0, dequeueing is not safe. Wait until either the wait time | ||
429 | * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is | ||
430 | * cleared before continuing. Also, ensure wait times are set to at | ||
431 | * least 0x3. | ||
432 | */ | ||
433 | local_irq_save(flags); | ||
434 | preempt_disable(); | ||
435 | retry = 5000; /* wait for 500 usecs at maximum */ | ||
436 | while (true) { | ||
437 | temp = RREG32(mmCP_HQD_IQ_TIMER); | ||
438 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { | ||
439 | pr_debug("HW is processing IQ\n"); | ||
440 | goto loop; | ||
441 | } | ||
442 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { | ||
443 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) | ||
444 | == 3) /* SEM-rearm is safe */ | ||
445 | break; | ||
446 | /* Wait time 3 is safe for CP, but our MMIO read/write | ||
447 | * time is close to 1 microsecond, so check for 10 to | ||
448 | * leave more buffer room | ||
449 | */ | ||
450 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) | ||
451 | >= 10) | ||
452 | break; | ||
453 | pr_debug("IQ timer is active\n"); | ||
454 | } else | ||
455 | break; | ||
456 | loop: | ||
457 | if (!retry) { | ||
458 | pr_err("CP HQD IQ timer status time out\n"); | ||
459 | break; | ||
460 | } | ||
461 | ndelay(100); | ||
462 | --retry; | ||
463 | } | ||
464 | retry = 1000; | ||
465 | while (true) { | ||
466 | temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); | ||
467 | if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) | ||
468 | break; | ||
469 | pr_debug("Dequeue request is pending\n"); | ||
470 | |||
471 | if (!retry) { | ||
472 | pr_err("CP HQD dequeue request time out\n"); | ||
473 | break; | ||
474 | } | ||
475 | ndelay(100); | ||
476 | --retry; | ||
477 | } | ||
478 | local_irq_restore(flags); | ||
479 | preempt_enable(); | ||
480 | |||
481 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); | ||
482 | |||
483 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | ||
323 | while (true) { | 484 | while (true) { |
324 | temp = RREG32(mmCP_HQD_ACTIVE); | 485 | temp = RREG32(mmCP_HQD_ACTIVE); |
325 | if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) | 486 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) |
326 | break; | 487 | break; |
327 | if (timeout <= 0) { | 488 | if (time_after(jiffies, end_jiffies)) { |
328 | pr_err("kfd: cp queue preemption time out.\n"); | 489 | pr_err("cp queue preemption time out.\n"); |
329 | release_queue(kgd); | 490 | release_queue(kgd); |
330 | return -ETIME; | 491 | return -ETIME; |
331 | } | 492 | } |
332 | msleep(20); | 493 | usleep_range(500, 1000); |
333 | timeout -= 20; | ||
334 | } | 494 | } |
335 | 495 | ||
336 | release_queue(kgd); | 496 | release_queue(kgd); |
@@ -444,6 +604,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |||
444 | return 0; | 604 | return 0; |
445 | } | 605 | } |
446 | 606 | ||
607 | static void set_scratch_backing_va(struct kgd_dev *kgd, | ||
608 | uint64_t va, uint32_t vmid) | ||
609 | { | ||
610 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
611 | |||
612 | lock_srbm(kgd, 0, 0, 0, vmid); | ||
613 | WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); | ||
614 | unlock_srbm(kgd); | ||
615 | } | ||
616 | |||
447 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | 617 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) |
448 | { | 618 | { |
449 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 619 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
@@ -454,42 +624,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |||
454 | switch (type) { | 624 | switch (type) { |
455 | case KGD_ENGINE_PFP: | 625 | case KGD_ENGINE_PFP: |
456 | hdr = (const union amdgpu_firmware_header *) | 626 | hdr = (const union amdgpu_firmware_header *) |
457 | adev->gfx.pfp_fw->data; | 627 | adev->gfx.pfp_fw->data; |
458 | break; | 628 | break; |
459 | 629 | ||
460 | case KGD_ENGINE_ME: | 630 | case KGD_ENGINE_ME: |
461 | hdr = (const union amdgpu_firmware_header *) | 631 | hdr = (const union amdgpu_firmware_header *) |
462 | adev->gfx.me_fw->data; | 632 | adev->gfx.me_fw->data; |
463 | break; | 633 | break; |
464 | 634 | ||
465 | case KGD_ENGINE_CE: | 635 | case KGD_ENGINE_CE: |
466 | hdr = (const union amdgpu_firmware_header *) | 636 | hdr = (const union amdgpu_firmware_header *) |
467 | adev->gfx.ce_fw->data; | 637 | adev->gfx.ce_fw->data; |
468 | break; | 638 | break; |
469 | 639 | ||
470 | case KGD_ENGINE_MEC1: | 640 | case KGD_ENGINE_MEC1: |
471 | hdr = (const union amdgpu_firmware_header *) | 641 | hdr = (const union amdgpu_firmware_header *) |
472 | adev->gfx.mec_fw->data; | 642 | adev->gfx.mec_fw->data; |
473 | break; | 643 | break; |
474 | 644 | ||
475 | case KGD_ENGINE_MEC2: | 645 | case KGD_ENGINE_MEC2: |
476 | hdr = (const union amdgpu_firmware_header *) | 646 | hdr = (const union amdgpu_firmware_header *) |
477 | adev->gfx.mec2_fw->data; | 647 | adev->gfx.mec2_fw->data; |
478 | break; | 648 | break; |
479 | 649 | ||
480 | case KGD_ENGINE_RLC: | 650 | case KGD_ENGINE_RLC: |
481 | hdr = (const union amdgpu_firmware_header *) | 651 | hdr = (const union amdgpu_firmware_header *) |
482 | adev->gfx.rlc_fw->data; | 652 | adev->gfx.rlc_fw->data; |
483 | break; | 653 | break; |
484 | 654 | ||
485 | case KGD_ENGINE_SDMA1: | 655 | case KGD_ENGINE_SDMA1: |
486 | hdr = (const union amdgpu_firmware_header *) | 656 | hdr = (const union amdgpu_firmware_header *) |
487 | adev->sdma.instance[0].fw->data; | 657 | adev->sdma.instance[0].fw->data; |
488 | break; | 658 | break; |
489 | 659 | ||
490 | case KGD_ENGINE_SDMA2: | 660 | case KGD_ENGINE_SDMA2: |
491 | hdr = (const union amdgpu_firmware_header *) | 661 | hdr = (const union amdgpu_firmware_header *) |
492 | adev->sdma.instance[1].fw->data; | 662 | adev->sdma.instance[1].fw->data; |
493 | break; | 663 | break; |
494 | 664 | ||
495 | default: | 665 | default: |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index fd435a96481c..383204e911a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | |||
@@ -45,7 +45,6 @@ struct amdgpu_cgs_device { | |||
45 | static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, | 45 | static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, |
46 | enum cgs_gpu_mem_type type, | 46 | enum cgs_gpu_mem_type type, |
47 | uint64_t size, uint64_t align, | 47 | uint64_t size, uint64_t align, |
48 | uint64_t min_offset, uint64_t max_offset, | ||
49 | cgs_handle_t *handle) | 48 | cgs_handle_t *handle) |
50 | { | 49 | { |
51 | CGS_FUNC_ADEV; | 50 | CGS_FUNC_ADEV; |
@@ -53,13 +52,6 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, | |||
53 | int ret = 0; | 52 | int ret = 0; |
54 | uint32_t domain = 0; | 53 | uint32_t domain = 0; |
55 | struct amdgpu_bo *obj; | 54 | struct amdgpu_bo *obj; |
56 | struct ttm_placement placement; | ||
57 | struct ttm_place place; | ||
58 | |||
59 | if (min_offset > max_offset) { | ||
60 | BUG_ON(1); | ||
61 | return -EINVAL; | ||
62 | } | ||
63 | 55 | ||
64 | /* fail if the alignment is not a power of 2 */ | 56 | /* fail if the alignment is not a power of 2 */ |
65 | if (((align != 1) && (align & (align - 1))) | 57 | if (((align != 1) && (align & (align - 1))) |
@@ -73,41 +65,19 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, | |||
73 | flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | 65 | flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
74 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | 66 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
75 | domain = AMDGPU_GEM_DOMAIN_VRAM; | 67 | domain = AMDGPU_GEM_DOMAIN_VRAM; |
76 | if (max_offset > adev->mc.real_vram_size) | ||
77 | return -EINVAL; | ||
78 | place.fpfn = min_offset >> PAGE_SHIFT; | ||
79 | place.lpfn = max_offset >> PAGE_SHIFT; | ||
80 | place.flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | | ||
81 | TTM_PL_FLAG_VRAM; | ||
82 | break; | 68 | break; |
83 | case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: | 69 | case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: |
84 | case CGS_GPU_MEM_TYPE__INVISIBLE_FB: | 70 | case CGS_GPU_MEM_TYPE__INVISIBLE_FB: |
85 | flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 71 | flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
86 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | 72 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
87 | domain = AMDGPU_GEM_DOMAIN_VRAM; | 73 | domain = AMDGPU_GEM_DOMAIN_VRAM; |
88 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { | ||
89 | place.fpfn = | ||
90 | max(min_offset, adev->mc.visible_vram_size) >> PAGE_SHIFT; | ||
91 | place.lpfn = | ||
92 | min(max_offset, adev->mc.real_vram_size) >> PAGE_SHIFT; | ||
93 | place.flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | | ||
94 | TTM_PL_FLAG_VRAM; | ||
95 | } | ||
96 | |||
97 | break; | 74 | break; |
98 | case CGS_GPU_MEM_TYPE__GART_CACHEABLE: | 75 | case CGS_GPU_MEM_TYPE__GART_CACHEABLE: |
99 | domain = AMDGPU_GEM_DOMAIN_GTT; | 76 | domain = AMDGPU_GEM_DOMAIN_GTT; |
100 | place.fpfn = min_offset >> PAGE_SHIFT; | ||
101 | place.lpfn = max_offset >> PAGE_SHIFT; | ||
102 | place.flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; | ||
103 | break; | 77 | break; |
104 | case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: | 78 | case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: |
105 | flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; | 79 | flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; |
106 | domain = AMDGPU_GEM_DOMAIN_GTT; | 80 | domain = AMDGPU_GEM_DOMAIN_GTT; |
107 | place.fpfn = min_offset >> PAGE_SHIFT; | ||
108 | place.lpfn = max_offset >> PAGE_SHIFT; | ||
109 | place.flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT | | ||
110 | TTM_PL_FLAG_UNCACHED; | ||
111 | break; | 81 | break; |
112 | default: | 82 | default: |
113 | return -EINVAL; | 83 | return -EINVAL; |
@@ -116,15 +86,8 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, | |||
116 | 86 | ||
117 | *handle = 0; | 87 | *handle = 0; |
118 | 88 | ||
119 | placement.placement = &place; | 89 | ret = amdgpu_bo_create(adev, size, align, true, domain, flags, |
120 | placement.num_placement = 1; | 90 | NULL, NULL, 0, &obj); |
121 | placement.busy_placement = &place; | ||
122 | placement.num_busy_placement = 1; | ||
123 | |||
124 | ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, | ||
125 | true, domain, flags, | ||
126 | NULL, &placement, NULL, | ||
127 | 0, &obj); | ||
128 | if (ret) { | 91 | if (ret) { |
129 | DRM_ERROR("(%d) bo create failed\n", ret); | 92 | DRM_ERROR("(%d) bo create failed\n", ret); |
130 | return ret; | 93 | return ret; |
@@ -155,19 +118,14 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h | |||
155 | uint64_t *mcaddr) | 118 | uint64_t *mcaddr) |
156 | { | 119 | { |
157 | int r; | 120 | int r; |
158 | u64 min_offset, max_offset; | ||
159 | struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; | 121 | struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; |
160 | 122 | ||
161 | WARN_ON_ONCE(obj->placement.num_placement > 1); | 123 | WARN_ON_ONCE(obj->placement.num_placement > 1); |
162 | 124 | ||
163 | min_offset = obj->placements[0].fpfn << PAGE_SHIFT; | ||
164 | max_offset = obj->placements[0].lpfn << PAGE_SHIFT; | ||
165 | |||
166 | r = amdgpu_bo_reserve(obj, true); | 125 | r = amdgpu_bo_reserve(obj, true); |
167 | if (unlikely(r != 0)) | 126 | if (unlikely(r != 0)) |
168 | return r; | 127 | return r; |
169 | r = amdgpu_bo_pin_restricted(obj, obj->preferred_domains, | 128 | r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr); |
170 | min_offset, max_offset, mcaddr); | ||
171 | amdgpu_bo_unreserve(obj); | 129 | amdgpu_bo_unreserve(obj); |
172 | return r; | 130 | return r; |
173 | } | 131 | } |
@@ -675,6 +633,85 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
675 | 633 | ||
676 | if (!adev->pm.fw) { | 634 | if (!adev->pm.fw) { |
677 | switch (adev->asic_type) { | 635 | switch (adev->asic_type) { |
636 | case CHIP_TAHITI: | ||
637 | strcpy(fw_name, "radeon/tahiti_smc.bin"); | ||
638 | break; | ||
639 | case CHIP_PITCAIRN: | ||
640 | if ((adev->pdev->revision == 0x81) && | ||
641 | ((adev->pdev->device == 0x6810) || | ||
642 | (adev->pdev->device == 0x6811))) { | ||
643 | info->is_kicker = true; | ||
644 | strcpy(fw_name, "radeon/pitcairn_k_smc.bin"); | ||
645 | } else { | ||
646 | strcpy(fw_name, "radeon/pitcairn_smc.bin"); | ||
647 | } | ||
648 | break; | ||
649 | case CHIP_VERDE: | ||
650 | if (((adev->pdev->device == 0x6820) && | ||
651 | ((adev->pdev->revision == 0x81) || | ||
652 | (adev->pdev->revision == 0x83))) || | ||
653 | ((adev->pdev->device == 0x6821) && | ||
654 | ((adev->pdev->revision == 0x83) || | ||
655 | (adev->pdev->revision == 0x87))) || | ||
656 | ((adev->pdev->revision == 0x87) && | ||
657 | ((adev->pdev->device == 0x6823) || | ||
658 | (adev->pdev->device == 0x682b)))) { | ||
659 | info->is_kicker = true; | ||
660 | strcpy(fw_name, "radeon/verde_k_smc.bin"); | ||
661 | } else { | ||
662 | strcpy(fw_name, "radeon/verde_smc.bin"); | ||
663 | } | ||
664 | break; | ||
665 | case CHIP_OLAND: | ||
666 | if (((adev->pdev->revision == 0x81) && | ||
667 | ((adev->pdev->device == 0x6600) || | ||
668 | (adev->pdev->device == 0x6604) || | ||
669 | (adev->pdev->device == 0x6605) || | ||
670 | (adev->pdev->device == 0x6610))) || | ||
671 | ((adev->pdev->revision == 0x83) && | ||
672 | (adev->pdev->device == 0x6610))) { | ||
673 | info->is_kicker = true; | ||
674 | strcpy(fw_name, "radeon/oland_k_smc.bin"); | ||
675 | } else { | ||
676 | strcpy(fw_name, "radeon/oland_smc.bin"); | ||
677 | } | ||
678 | break; | ||
679 | case CHIP_HAINAN: | ||
680 | if (((adev->pdev->revision == 0x81) && | ||
681 | (adev->pdev->device == 0x6660)) || | ||
682 | ((adev->pdev->revision == 0x83) && | ||
683 | ((adev->pdev->device == 0x6660) || | ||
684 | (adev->pdev->device == 0x6663) || | ||
685 | (adev->pdev->device == 0x6665) || | ||
686 | (adev->pdev->device == 0x6667)))) { | ||
687 | info->is_kicker = true; | ||
688 | strcpy(fw_name, "radeon/hainan_k_smc.bin"); | ||
689 | } else if ((adev->pdev->revision == 0xc3) && | ||
690 | (adev->pdev->device == 0x6665)) { | ||
691 | info->is_kicker = true; | ||
692 | strcpy(fw_name, "radeon/banks_k_2_smc.bin"); | ||
693 | } else { | ||
694 | strcpy(fw_name, "radeon/hainan_smc.bin"); | ||
695 | } | ||
696 | break; | ||
697 | case CHIP_BONAIRE: | ||
698 | if ((adev->pdev->revision == 0x80) || | ||
699 | (adev->pdev->revision == 0x81) || | ||
700 | (adev->pdev->device == 0x665f)) { | ||
701 | info->is_kicker = true; | ||
702 | strcpy(fw_name, "radeon/bonaire_k_smc.bin"); | ||
703 | } else { | ||
704 | strcpy(fw_name, "radeon/bonaire_smc.bin"); | ||
705 | } | ||
706 | break; | ||
707 | case CHIP_HAWAII: | ||
708 | if (adev->pdev->revision == 0x80) { | ||
709 | info->is_kicker = true; | ||
710 | strcpy(fw_name, "radeon/hawaii_k_smc.bin"); | ||
711 | } else { | ||
712 | strcpy(fw_name, "radeon/hawaii_smc.bin"); | ||
713 | } | ||
714 | break; | ||
678 | case CHIP_TOPAZ: | 715 | case CHIP_TOPAZ: |
679 | if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || | 716 | if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || |
680 | ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || | 717 | ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || |
@@ -838,6 +875,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device, | |||
838 | case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID: | 875 | case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID: |
839 | sys_info->value = adev->pdev->subsystem_vendor; | 876 | sys_info->value = adev->pdev->subsystem_vendor; |
840 | break; | 877 | break; |
878 | case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN: | ||
879 | sys_info->value = adev->pdev->devfn; | ||
880 | break; | ||
841 | default: | 881 | default: |
842 | return -ENODEV; | 882 | return -ENODEV; |
843 | } | 883 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 8d1cf2d3e663..f51b41f094ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | |||
@@ -346,10 +346,8 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) | |||
346 | { | 346 | { |
347 | struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); | 347 | struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); |
348 | 348 | ||
349 | if (amdgpu_connector->edid) { | 349 | kfree(amdgpu_connector->edid); |
350 | kfree(amdgpu_connector->edid); | 350 | amdgpu_connector->edid = NULL; |
351 | amdgpu_connector->edid = NULL; | ||
352 | } | ||
353 | } | 351 | } |
354 | 352 | ||
355 | static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) | 353 | static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c05479ec825a..c6a214f1e991 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, | |||
246 | } | 246 | } |
247 | 247 | ||
248 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; | 248 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; |
249 | used_vram = atomic64_read(&adev->vram_usage); | 249 | used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
250 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; | 250 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; |
251 | 251 | ||
252 | spin_lock(&adev->mm_stats.lock); | 252 | spin_lock(&adev->mm_stats.lock); |
@@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, | |||
292 | /* Do the same for visible VRAM if half of it is free */ | 292 | /* Do the same for visible VRAM if half of it is free */ |
293 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { | 293 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { |
294 | u64 total_vis_vram = adev->mc.visible_vram_size; | 294 | u64 total_vis_vram = adev->mc.visible_vram_size; |
295 | u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); | 295 | u64 used_vis_vram = |
296 | amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | ||
296 | 297 | ||
297 | if (used_vis_vram < total_vis_vram) { | 298 | if (used_vis_vram < total_vis_vram) { |
298 | u64 free_vis_vram = total_vis_vram - used_vis_vram; | 299 | u64 free_vis_vram = total_vis_vram - used_vis_vram; |
@@ -472,11 +473,16 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, | |||
472 | return -EPERM; | 473 | return -EPERM; |
473 | 474 | ||
474 | /* Check if we have user pages and nobody bound the BO already */ | 475 | /* Check if we have user pages and nobody bound the BO already */ |
475 | if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { | 476 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && |
476 | size_t size = sizeof(struct page *); | 477 | lobj->user_pages) { |
477 | 478 | amdgpu_ttm_placement_from_domain(bo, | |
478 | size *= bo->tbo.ttm->num_pages; | 479 | AMDGPU_GEM_DOMAIN_CPU); |
479 | memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); | 480 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, |
481 | false); | ||
482 | if (r) | ||
483 | return r; | ||
484 | amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, | ||
485 | lobj->user_pages); | ||
480 | binding_userptr = true; | 486 | binding_userptr = true; |
481 | } | 487 | } |
482 | 488 | ||
@@ -501,7 +507,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
501 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 507 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
502 | struct amdgpu_bo_list_entry *e; | 508 | struct amdgpu_bo_list_entry *e; |
503 | struct list_head duplicates; | 509 | struct list_head duplicates; |
504 | bool need_mmap_lock = false; | ||
505 | unsigned i, tries = 10; | 510 | unsigned i, tries = 10; |
506 | int r; | 511 | int r; |
507 | 512 | ||
@@ -509,9 +514,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
509 | 514 | ||
510 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | 515 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); |
511 | if (p->bo_list) { | 516 | if (p->bo_list) { |
512 | need_mmap_lock = p->bo_list->first_userptr != | ||
513 | p->bo_list->num_entries; | ||
514 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); | 517 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); |
518 | if (p->bo_list->first_userptr != p->bo_list->num_entries) | ||
519 | p->mn = amdgpu_mn_get(p->adev); | ||
515 | } | 520 | } |
516 | 521 | ||
517 | INIT_LIST_HEAD(&duplicates); | 522 | INIT_LIST_HEAD(&duplicates); |
@@ -520,9 +525,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
520 | if (p->uf_entry.robj) | 525 | if (p->uf_entry.robj) |
521 | list_add(&p->uf_entry.tv.head, &p->validated); | 526 | list_add(&p->uf_entry.tv.head, &p->validated); |
522 | 527 | ||
523 | if (need_mmap_lock) | ||
524 | down_read(¤t->mm->mmap_sem); | ||
525 | |||
526 | while (1) { | 528 | while (1) { |
527 | struct list_head need_pages; | 529 | struct list_head need_pages; |
528 | unsigned i; | 530 | unsigned i; |
@@ -542,23 +544,25 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
542 | INIT_LIST_HEAD(&need_pages); | 544 | INIT_LIST_HEAD(&need_pages); |
543 | for (i = p->bo_list->first_userptr; | 545 | for (i = p->bo_list->first_userptr; |
544 | i < p->bo_list->num_entries; ++i) { | 546 | i < p->bo_list->num_entries; ++i) { |
547 | struct amdgpu_bo *bo; | ||
545 | 548 | ||
546 | e = &p->bo_list->array[i]; | 549 | e = &p->bo_list->array[i]; |
550 | bo = e->robj; | ||
547 | 551 | ||
548 | if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, | 552 | if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, |
549 | &e->user_invalidated) && e->user_pages) { | 553 | &e->user_invalidated) && e->user_pages) { |
550 | 554 | ||
551 | /* We acquired a page array, but somebody | 555 | /* We acquired a page array, but somebody |
552 | * invalidated it. Free it and try again | 556 | * invalidated it. Free it and try again |
553 | */ | 557 | */ |
554 | release_pages(e->user_pages, | 558 | release_pages(e->user_pages, |
555 | e->robj->tbo.ttm->num_pages, | 559 | bo->tbo.ttm->num_pages, |
556 | false); | 560 | false); |
557 | kvfree(e->user_pages); | 561 | kvfree(e->user_pages); |
558 | e->user_pages = NULL; | 562 | e->user_pages = NULL; |
559 | } | 563 | } |
560 | 564 | ||
561 | if (e->robj->tbo.ttm->state != tt_bound && | 565 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && |
562 | !e->user_pages) { | 566 | !e->user_pages) { |
563 | list_del(&e->tv.head); | 567 | list_del(&e->tv.head); |
564 | list_add(&e->tv.head, &need_pages); | 568 | list_add(&e->tv.head, &need_pages); |
@@ -635,9 +639,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
635 | 639 | ||
636 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, | 640 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, |
637 | p->bytes_moved_vis); | 641 | p->bytes_moved_vis); |
638 | fpriv->vm.last_eviction_counter = | ||
639 | atomic64_read(&p->adev->num_evictions); | ||
640 | |||
641 | if (p->bo_list) { | 642 | if (p->bo_list) { |
642 | struct amdgpu_bo *gds = p->bo_list->gds_obj; | 643 | struct amdgpu_bo *gds = p->bo_list->gds_obj; |
643 | struct amdgpu_bo *gws = p->bo_list->gws_obj; | 644 | struct amdgpu_bo *gws = p->bo_list->gws_obj; |
@@ -673,16 +674,11 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
673 | } | 674 | } |
674 | 675 | ||
675 | error_validate: | 676 | error_validate: |
676 | if (r) { | 677 | if (r) |
677 | amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); | ||
678 | ttm_eu_backoff_reservation(&p->ticket, &p->validated); | 678 | ttm_eu_backoff_reservation(&p->ticket, &p->validated); |
679 | } | ||
680 | 679 | ||
681 | error_free_pages: | 680 | error_free_pages: |
682 | 681 | ||
683 | if (need_mmap_lock) | ||
684 | up_read(¤t->mm->mmap_sem); | ||
685 | |||
686 | if (p->bo_list) { | 682 | if (p->bo_list) { |
687 | for (i = p->bo_list->first_userptr; | 683 | for (i = p->bo_list->first_userptr; |
688 | i < p->bo_list->num_entries; ++i) { | 684 | i < p->bo_list->num_entries; ++i) { |
@@ -724,21 +720,14 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) | |||
724 | * If error is set than unvalidate buffer, otherwise just free memory | 720 | * If error is set than unvalidate buffer, otherwise just free memory |
725 | * used by parsing context. | 721 | * used by parsing context. |
726 | **/ | 722 | **/ |
727 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) | 723 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, |
724 | bool backoff) | ||
728 | { | 725 | { |
729 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; | ||
730 | unsigned i; | 726 | unsigned i; |
731 | 727 | ||
732 | if (!error) { | 728 | if (error && backoff) |
733 | amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); | ||
734 | |||
735 | ttm_eu_fence_buffer_objects(&parser->ticket, | ||
736 | &parser->validated, | ||
737 | parser->fence); | ||
738 | } else if (backoff) { | ||
739 | ttm_eu_backoff_reservation(&parser->ticket, | 729 | ttm_eu_backoff_reservation(&parser->ticket, |
740 | &parser->validated); | 730 | &parser->validated); |
741 | } | ||
742 | 731 | ||
743 | for (i = 0; i < parser->num_post_dep_syncobjs; i++) | 732 | for (i = 0; i < parser->num_post_dep_syncobjs; i++) |
744 | drm_syncobj_put(parser->post_dep_syncobjs[i]); | 733 | drm_syncobj_put(parser->post_dep_syncobjs[i]); |
@@ -772,10 +761,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
772 | if (r) | 761 | if (r) |
773 | return r; | 762 | return r; |
774 | 763 | ||
775 | r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update); | ||
776 | if (r) | ||
777 | return r; | ||
778 | |||
779 | r = amdgpu_vm_clear_freed(adev, vm, NULL); | 764 | r = amdgpu_vm_clear_freed(adev, vm, NULL); |
780 | if (r) | 765 | if (r) |
781 | return r; | 766 | return r; |
@@ -791,7 +776,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
791 | 776 | ||
792 | if (amdgpu_sriov_vf(adev)) { | 777 | if (amdgpu_sriov_vf(adev)) { |
793 | struct dma_fence *f; | 778 | struct dma_fence *f; |
794 | bo_va = vm->csa_bo_va; | 779 | |
780 | bo_va = fpriv->csa_va; | ||
795 | BUG_ON(!bo_va); | 781 | BUG_ON(!bo_va); |
796 | r = amdgpu_vm_bo_update(adev, bo_va, false); | 782 | r = amdgpu_vm_bo_update(adev, bo_va, false); |
797 | if (r) | 783 | if (r) |
@@ -828,7 +814,13 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
828 | 814 | ||
829 | } | 815 | } |
830 | 816 | ||
831 | r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); | 817 | r = amdgpu_vm_handle_moved(adev, vm); |
818 | if (r) | ||
819 | return r; | ||
820 | |||
821 | r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update); | ||
822 | if (r) | ||
823 | return r; | ||
832 | 824 | ||
833 | if (amdgpu_vm_debug && p->bo_list) { | 825 | if (amdgpu_vm_debug && p->bo_list) { |
834 | /* Invalidate all BOs to test for userspace bugs */ | 826 | /* Invalidate all BOs to test for userspace bugs */ |
@@ -838,7 +830,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
838 | if (!bo) | 830 | if (!bo) |
839 | continue; | 831 | continue; |
840 | 832 | ||
841 | amdgpu_vm_bo_invalidate(adev, bo); | 833 | amdgpu_vm_bo_invalidate(adev, bo, false); |
842 | } | 834 | } |
843 | } | 835 | } |
844 | 836 | ||
@@ -863,7 +855,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
863 | } | 855 | } |
864 | 856 | ||
865 | if (p->job->vm) { | 857 | if (p->job->vm) { |
866 | p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); | 858 | p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); |
867 | 859 | ||
868 | r = amdgpu_bo_vm_update_pte(p); | 860 | r = amdgpu_bo_vm_update_pte(p); |
869 | if (r) | 861 | if (r) |
@@ -931,11 +923,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
931 | uint64_t offset; | 923 | uint64_t offset; |
932 | uint8_t *kptr; | 924 | uint8_t *kptr; |
933 | 925 | ||
934 | m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, | 926 | r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, |
935 | &aobj); | 927 | &aobj, &m); |
936 | if (!aobj) { | 928 | if (r) { |
937 | DRM_ERROR("IB va_start is invalid\n"); | 929 | DRM_ERROR("IB va_start is invalid\n"); |
938 | return -EINVAL; | 930 | return r; |
939 | } | 931 | } |
940 | 932 | ||
941 | if ((chunk_ib->va_start + chunk_ib->ib_bytes) > | 933 | if ((chunk_ib->va_start + chunk_ib->ib_bytes) > |
@@ -1038,7 +1030,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, | |||
1038 | { | 1030 | { |
1039 | int r; | 1031 | int r; |
1040 | struct dma_fence *fence; | 1032 | struct dma_fence *fence; |
1041 | r = drm_syncobj_fence_get(p->filp, handle, &fence); | 1033 | r = drm_syncobj_find_fence(p->filp, handle, &fence); |
1042 | if (r) | 1034 | if (r) |
1043 | return r; | 1035 | return r; |
1044 | 1036 | ||
@@ -1082,6 +1074,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, | |||
1082 | GFP_KERNEL); | 1074 | GFP_KERNEL); |
1083 | p->num_post_dep_syncobjs = 0; | 1075 | p->num_post_dep_syncobjs = 0; |
1084 | 1076 | ||
1077 | if (!p->post_dep_syncobjs) | ||
1078 | return -ENOMEM; | ||
1079 | |||
1085 | for (i = 0; i < num_deps; ++i) { | 1080 | for (i = 0; i < num_deps; ++i) { |
1086 | p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); | 1081 | p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); |
1087 | if (!p->post_dep_syncobjs[i]) | 1082 | if (!p->post_dep_syncobjs[i]) |
@@ -1133,14 +1128,31 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1133 | struct amdgpu_ring *ring = p->job->ring; | 1128 | struct amdgpu_ring *ring = p->job->ring; |
1134 | struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; | 1129 | struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; |
1135 | struct amdgpu_job *job; | 1130 | struct amdgpu_job *job; |
1131 | unsigned i; | ||
1132 | uint64_t seq; | ||
1133 | |||
1136 | int r; | 1134 | int r; |
1137 | 1135 | ||
1136 | amdgpu_mn_lock(p->mn); | ||
1137 | if (p->bo_list) { | ||
1138 | for (i = p->bo_list->first_userptr; | ||
1139 | i < p->bo_list->num_entries; ++i) { | ||
1140 | struct amdgpu_bo *bo = p->bo_list->array[i].robj; | ||
1141 | |||
1142 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { | ||
1143 | amdgpu_mn_unlock(p->mn); | ||
1144 | return -ERESTARTSYS; | ||
1145 | } | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1138 | job = p->job; | 1149 | job = p->job; |
1139 | p->job = NULL; | 1150 | p->job = NULL; |
1140 | 1151 | ||
1141 | r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); | 1152 | r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); |
1142 | if (r) { | 1153 | if (r) { |
1143 | amdgpu_job_free(job); | 1154 | amdgpu_job_free(job); |
1155 | amdgpu_mn_unlock(p->mn); | ||
1144 | return r; | 1156 | return r; |
1145 | } | 1157 | } |
1146 | 1158 | ||
@@ -1148,15 +1160,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1148 | job->fence_ctx = entity->fence_context; | 1160 | job->fence_ctx = entity->fence_context; |
1149 | p->fence = dma_fence_get(&job->base.s_fence->finished); | 1161 | p->fence = dma_fence_get(&job->base.s_fence->finished); |
1150 | 1162 | ||
1163 | r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); | ||
1164 | if (r) { | ||
1165 | dma_fence_put(p->fence); | ||
1166 | dma_fence_put(&job->base.s_fence->finished); | ||
1167 | amdgpu_job_free(job); | ||
1168 | amdgpu_mn_unlock(p->mn); | ||
1169 | return r; | ||
1170 | } | ||
1171 | |||
1151 | amdgpu_cs_post_dependencies(p); | 1172 | amdgpu_cs_post_dependencies(p); |
1152 | 1173 | ||
1153 | cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); | 1174 | cs->out.handle = seq; |
1154 | job->uf_sequence = cs->out.handle; | 1175 | job->uf_sequence = seq; |
1176 | |||
1155 | amdgpu_job_free_resources(job); | 1177 | amdgpu_job_free_resources(job); |
1156 | amdgpu_cs_parser_fini(p, 0, true); | ||
1157 | 1178 | ||
1158 | trace_amdgpu_cs_ioctl(job); | 1179 | trace_amdgpu_cs_ioctl(job); |
1159 | amd_sched_entity_push_job(&job->base); | 1180 | amd_sched_entity_push_job(&job->base); |
1181 | |||
1182 | ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); | ||
1183 | amdgpu_mn_unlock(p->mn); | ||
1184 | |||
1160 | return 0; | 1185 | return 0; |
1161 | } | 1186 | } |
1162 | 1187 | ||
@@ -1211,10 +1236,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1211 | goto out; | 1236 | goto out; |
1212 | 1237 | ||
1213 | r = amdgpu_cs_submit(&parser, cs); | 1238 | r = amdgpu_cs_submit(&parser, cs); |
1214 | if (r) | ||
1215 | goto out; | ||
1216 | 1239 | ||
1217 | return 0; | ||
1218 | out: | 1240 | out: |
1219 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); | 1241 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); |
1220 | return r; | 1242 | return r; |
@@ -1387,6 +1409,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, | |||
1387 | array[i] = fence; | 1409 | array[i] = fence; |
1388 | } else { /* NULL, the fence has been already signaled */ | 1410 | } else { /* NULL, the fence has been already signaled */ |
1389 | r = 1; | 1411 | r = 1; |
1412 | first = i; | ||
1390 | goto out; | 1413 | goto out; |
1391 | } | 1414 | } |
1392 | } | 1415 | } |
@@ -1466,78 +1489,36 @@ err_free_fences: | |||
1466 | * virtual memory address. Returns allocation structure when found, NULL | 1489 | * virtual memory address. Returns allocation structure when found, NULL |
1467 | * otherwise. | 1490 | * otherwise. |
1468 | */ | 1491 | */ |
1469 | struct amdgpu_bo_va_mapping * | 1492 | int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, |
1470 | amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | 1493 | uint64_t addr, struct amdgpu_bo **bo, |
1471 | uint64_t addr, struct amdgpu_bo **bo) | 1494 | struct amdgpu_bo_va_mapping **map) |
1472 | { | 1495 | { |
1496 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; | ||
1497 | struct amdgpu_vm *vm = &fpriv->vm; | ||
1473 | struct amdgpu_bo_va_mapping *mapping; | 1498 | struct amdgpu_bo_va_mapping *mapping; |
1474 | unsigned i; | 1499 | int r; |
1475 | |||
1476 | if (!parser->bo_list) | ||
1477 | return NULL; | ||
1478 | 1500 | ||
1479 | addr /= AMDGPU_GPU_PAGE_SIZE; | 1501 | addr /= AMDGPU_GPU_PAGE_SIZE; |
1480 | 1502 | ||
1481 | for (i = 0; i < parser->bo_list->num_entries; i++) { | 1503 | mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); |
1482 | struct amdgpu_bo_list_entry *lobj; | 1504 | if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) |
1483 | 1505 | return -EINVAL; | |
1484 | lobj = &parser->bo_list->array[i]; | ||
1485 | if (!lobj->bo_va) | ||
1486 | continue; | ||
1487 | |||
1488 | list_for_each_entry(mapping, &lobj->bo_va->valids, list) { | ||
1489 | if (mapping->start > addr || | ||
1490 | addr > mapping->last) | ||
1491 | continue; | ||
1492 | |||
1493 | *bo = lobj->bo_va->bo; | ||
1494 | return mapping; | ||
1495 | } | ||
1496 | |||
1497 | list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { | ||
1498 | if (mapping->start > addr || | ||
1499 | addr > mapping->last) | ||
1500 | continue; | ||
1501 | 1506 | ||
1502 | *bo = lobj->bo_va->bo; | 1507 | *bo = mapping->bo_va->base.bo; |
1503 | return mapping; | 1508 | *map = mapping; |
1504 | } | ||
1505 | } | ||
1506 | 1509 | ||
1507 | return NULL; | 1510 | /* Double check that the BO is reserved by this CS */ |
1508 | } | 1511 | if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) |
1512 | return -EINVAL; | ||
1509 | 1513 | ||
1510 | /** | 1514 | r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); |
1511 | * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM | 1515 | if (unlikely(r)) |
1512 | * | 1516 | return r; |
1513 | * @parser: command submission parser context | ||
1514 | * | ||
1515 | * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM. | ||
1516 | */ | ||
1517 | int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) | ||
1518 | { | ||
1519 | unsigned i; | ||
1520 | int r; | ||
1521 | 1517 | ||
1522 | if (!parser->bo_list) | 1518 | if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) |
1523 | return 0; | 1519 | return 0; |
1524 | 1520 | ||
1525 | for (i = 0; i < parser->bo_list->num_entries; i++) { | 1521 | (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
1526 | struct amdgpu_bo *bo = parser->bo_list->array[i].robj; | 1522 | amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); |
1527 | 1523 | return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); | |
1528 | r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); | ||
1529 | if (unlikely(r)) | ||
1530 | return r; | ||
1531 | |||
1532 | if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) | ||
1533 | continue; | ||
1534 | |||
1535 | bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | ||
1536 | amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains); | ||
1537 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); | ||
1538 | if (unlikely(r)) | ||
1539 | return r; | ||
1540 | } | ||
1541 | |||
1542 | return 0; | ||
1543 | } | 1524 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e44340b23..75c933b1a432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
@@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) | |||
246 | return 0; | 246 | return 0; |
247 | } | 247 | } |
248 | 248 | ||
249 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | 249 | int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, |
250 | struct dma_fence *fence) | 250 | struct dma_fence *fence, uint64_t* handler) |
251 | { | 251 | { |
252 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; | 252 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; |
253 | uint64_t seq = cring->sequence; | 253 | uint64_t seq = cring->sequence; |
@@ -258,9 +258,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | |||
258 | other = cring->fences[idx]; | 258 | other = cring->fences[idx]; |
259 | if (other) { | 259 | if (other) { |
260 | signed long r; | 260 | signed long r; |
261 | r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); | 261 | r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); |
262 | if (r < 0) | 262 | if (r < 0) |
263 | DRM_ERROR("Error (%ld) waiting for fence!\n", r); | 263 | return r; |
264 | } | 264 | } |
265 | 265 | ||
266 | dma_fence_get(fence); | 266 | dma_fence_get(fence); |
@@ -271,8 +271,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | |||
271 | spin_unlock(&ctx->ring_lock); | 271 | spin_unlock(&ctx->ring_lock); |
272 | 272 | ||
273 | dma_fence_put(other); | 273 | dma_fence_put(other); |
274 | if (handler) | ||
275 | *handler = seq; | ||
274 | 276 | ||
275 | return seq; | 277 | return 0; |
276 | } | 278 | } |
277 | 279 | ||
278 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | 280 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a6f6cb0f2e02..3e84ddf9e3b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -65,6 +65,7 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); | |||
65 | static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); | 65 | static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); |
66 | static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); | 66 | static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); |
67 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); | 67 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); |
68 | static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev); | ||
68 | 69 | ||
69 | static const char *amdgpu_asic_name[] = { | 70 | static const char *amdgpu_asic_name[] = { |
70 | "TAHITI", | 71 | "TAHITI", |
@@ -402,6 +403,15 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev) | |||
402 | */ | 403 | */ |
403 | static int amdgpu_doorbell_init(struct amdgpu_device *adev) | 404 | static int amdgpu_doorbell_init(struct amdgpu_device *adev) |
404 | { | 405 | { |
406 | /* No doorbell on SI hardware generation */ | ||
407 | if (adev->asic_type < CHIP_BONAIRE) { | ||
408 | adev->doorbell.base = 0; | ||
409 | adev->doorbell.size = 0; | ||
410 | adev->doorbell.num_doorbells = 0; | ||
411 | adev->doorbell.ptr = NULL; | ||
412 | return 0; | ||
413 | } | ||
414 | |||
405 | /* doorbell bar mapping */ | 415 | /* doorbell bar mapping */ |
406 | adev->doorbell.base = pci_resource_start(adev->pdev, 2); | 416 | adev->doorbell.base = pci_resource_start(adev->pdev, 2); |
407 | adev->doorbell.size = pci_resource_len(adev->pdev, 2); | 417 | adev->doorbell.size = pci_resource_len(adev->pdev, 2); |
@@ -887,6 +897,20 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) | |||
887 | return r; | 897 | return r; |
888 | } | 898 | } |
889 | 899 | ||
900 | static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, | ||
901 | struct device_attribute *attr, | ||
902 | char *buf) | ||
903 | { | ||
904 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
905 | struct amdgpu_device *adev = ddev->dev_private; | ||
906 | struct atom_context *ctx = adev->mode_info.atom_context; | ||
907 | |||
908 | return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version); | ||
909 | } | ||
910 | |||
911 | static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, | ||
912 | NULL); | ||
913 | |||
890 | /** | 914 | /** |
891 | * amdgpu_atombios_fini - free the driver info and callbacks for atombios | 915 | * amdgpu_atombios_fini - free the driver info and callbacks for atombios |
892 | * | 916 | * |
@@ -906,6 +930,7 @@ static void amdgpu_atombios_fini(struct amdgpu_device *adev) | |||
906 | adev->mode_info.atom_context = NULL; | 930 | adev->mode_info.atom_context = NULL; |
907 | kfree(adev->mode_info.atom_card_info); | 931 | kfree(adev->mode_info.atom_card_info); |
908 | adev->mode_info.atom_card_info = NULL; | 932 | adev->mode_info.atom_card_info = NULL; |
933 | device_remove_file(adev->dev, &dev_attr_vbios_version); | ||
909 | } | 934 | } |
910 | 935 | ||
911 | /** | 936 | /** |
@@ -922,6 +947,7 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev) | |||
922 | { | 947 | { |
923 | struct card_info *atom_card_info = | 948 | struct card_info *atom_card_info = |
924 | kzalloc(sizeof(struct card_info), GFP_KERNEL); | 949 | kzalloc(sizeof(struct card_info), GFP_KERNEL); |
950 | int ret; | ||
925 | 951 | ||
926 | if (!atom_card_info) | 952 | if (!atom_card_info) |
927 | return -ENOMEM; | 953 | return -ENOMEM; |
@@ -958,6 +984,13 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev) | |||
958 | amdgpu_atombios_scratch_regs_init(adev); | 984 | amdgpu_atombios_scratch_regs_init(adev); |
959 | amdgpu_atombios_allocate_fb_scratch(adev); | 985 | amdgpu_atombios_allocate_fb_scratch(adev); |
960 | } | 986 | } |
987 | |||
988 | ret = device_create_file(adev->dev, &dev_attr_vbios_version); | ||
989 | if (ret) { | ||
990 | DRM_ERROR("Failed to create device file for VBIOS version\n"); | ||
991 | return ret; | ||
992 | } | ||
993 | |||
961 | return 0; | 994 | return 0; |
962 | } | 995 | } |
963 | 996 | ||
@@ -1062,11 +1095,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) | |||
1062 | amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); | 1095 | amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); |
1063 | } | 1096 | } |
1064 | 1097 | ||
1065 | if (amdgpu_gart_size < 32) { | 1098 | if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { |
1066 | /* gart size must be greater or equal to 32M */ | 1099 | /* gart size must be greater or equal to 32M */ |
1067 | dev_warn(adev->dev, "gart size (%d) too small\n", | 1100 | dev_warn(adev->dev, "gart size (%d) too small\n", |
1068 | amdgpu_gart_size); | 1101 | amdgpu_gart_size); |
1069 | amdgpu_gart_size = 32; | 1102 | amdgpu_gart_size = -1; |
1070 | } | 1103 | } |
1071 | 1104 | ||
1072 | if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { | 1105 | if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { |
@@ -1076,6 +1109,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) | |||
1076 | amdgpu_gtt_size = -1; | 1109 | amdgpu_gtt_size = -1; |
1077 | } | 1110 | } |
1078 | 1111 | ||
1112 | /* valid range is between 4 and 9 inclusive */ | ||
1113 | if (amdgpu_vm_fragment_size != -1 && | ||
1114 | (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { | ||
1115 | dev_warn(adev->dev, "valid range is between 4 and 9\n"); | ||
1116 | amdgpu_vm_fragment_size = -1; | ||
1117 | } | ||
1118 | |||
1079 | amdgpu_check_vm_size(adev); | 1119 | amdgpu_check_vm_size(adev); |
1080 | 1120 | ||
1081 | amdgpu_check_block_size(adev); | 1121 | amdgpu_check_block_size(adev); |
@@ -1750,10 +1790,8 @@ static int amdgpu_fini(struct amdgpu_device *adev) | |||
1750 | adev->ip_blocks[i].status.late_initialized = false; | 1790 | adev->ip_blocks[i].status.late_initialized = false; |
1751 | } | 1791 | } |
1752 | 1792 | ||
1753 | if (amdgpu_sriov_vf(adev)) { | 1793 | if (amdgpu_sriov_vf(adev)) |
1754 | amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL); | ||
1755 | amdgpu_virt_release_full_gpu(adev, false); | 1794 | amdgpu_virt_release_full_gpu(adev, false); |
1756 | } | ||
1757 | 1795 | ||
1758 | return 0; | 1796 | return 0; |
1759 | } | 1797 | } |
@@ -2044,9 +2082,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2044 | DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); | 2082 | DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); |
2045 | DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); | 2083 | DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); |
2046 | 2084 | ||
2047 | if (adev->asic_type >= CHIP_BONAIRE) | 2085 | /* doorbell bar mapping */ |
2048 | /* doorbell bar mapping */ | 2086 | amdgpu_doorbell_init(adev); |
2049 | amdgpu_doorbell_init(adev); | ||
2050 | 2087 | ||
2051 | /* io port mapping */ | 2088 | /* io port mapping */ |
2052 | for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { | 2089 | for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { |
@@ -2194,6 +2231,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2194 | if (r) | 2231 | if (r) |
2195 | DRM_ERROR("registering firmware debugfs failed (%d).\n", r); | 2232 | DRM_ERROR("registering firmware debugfs failed (%d).\n", r); |
2196 | 2233 | ||
2234 | r = amdgpu_debugfs_vbios_dump_init(adev); | ||
2235 | if (r) | ||
2236 | DRM_ERROR("Creating vbios dump debugfs failed (%d).\n", r); | ||
2237 | |||
2197 | if ((amdgpu_testing & 1)) { | 2238 | if ((amdgpu_testing & 1)) { |
2198 | if (adev->accel_working) | 2239 | if (adev->accel_working) |
2199 | amdgpu_test_moves(adev); | 2240 | amdgpu_test_moves(adev); |
@@ -2269,8 +2310,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
2269 | adev->rio_mem = NULL; | 2310 | adev->rio_mem = NULL; |
2270 | iounmap(adev->rmmio); | 2311 | iounmap(adev->rmmio); |
2271 | adev->rmmio = NULL; | 2312 | adev->rmmio = NULL; |
2272 | if (adev->asic_type >= CHIP_BONAIRE) | 2313 | amdgpu_doorbell_fini(adev); |
2273 | amdgpu_doorbell_fini(adev); | ||
2274 | amdgpu_debugfs_regs_cleanup(adev); | 2314 | amdgpu_debugfs_regs_cleanup(adev); |
2275 | } | 2315 | } |
2276 | 2316 | ||
@@ -2539,7 +2579,8 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) | |||
2539 | if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || | 2579 | if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || |
2540 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || | 2580 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || |
2541 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || | 2581 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || |
2542 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)) { | 2582 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || |
2583 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { | ||
2543 | if (adev->ip_blocks[i].status.hang) { | 2584 | if (adev->ip_blocks[i].status.hang) { |
2544 | DRM_INFO("Some block need full reset!\n"); | 2585 | DRM_INFO("Some block need full reset!\n"); |
2545 | return true; | 2586 | return true; |
@@ -2615,12 +2656,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, | |||
2615 | goto err; | 2656 | goto err; |
2616 | } | 2657 | } |
2617 | 2658 | ||
2618 | r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); | ||
2619 | if (r) { | ||
2620 | DRM_ERROR("%p bind failed\n", bo->shadow); | ||
2621 | goto err; | ||
2622 | } | ||
2623 | |||
2624 | r = amdgpu_bo_restore_from_shadow(adev, ring, bo, | 2659 | r = amdgpu_bo_restore_from_shadow(adev, ring, bo, |
2625 | NULL, fence, true); | 2660 | NULL, fence, true); |
2626 | if (r) { | 2661 | if (r) { |
@@ -2653,7 +2688,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) | |||
2653 | 2688 | ||
2654 | mutex_lock(&adev->virt.lock_reset); | 2689 | mutex_lock(&adev->virt.lock_reset); |
2655 | atomic_inc(&adev->gpu_reset_counter); | 2690 | atomic_inc(&adev->gpu_reset_counter); |
2656 | adev->gfx.in_reset = true; | 2691 | adev->in_sriov_reset = true; |
2657 | 2692 | ||
2658 | /* block TTM */ | 2693 | /* block TTM */ |
2659 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); | 2694 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); |
@@ -2764,7 +2799,7 @@ give_up_reset: | |||
2764 | dev_info(adev->dev, "GPU reset successed!\n"); | 2799 | dev_info(adev->dev, "GPU reset successed!\n"); |
2765 | } | 2800 | } |
2766 | 2801 | ||
2767 | adev->gfx.in_reset = false; | 2802 | adev->in_sriov_reset = false; |
2768 | mutex_unlock(&adev->virt.lock_reset); | 2803 | mutex_unlock(&adev->virt.lock_reset); |
2769 | return r; | 2804 | return r; |
2770 | } | 2805 | } |
@@ -3462,10 +3497,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, | |||
3462 | 3497 | ||
3463 | valuesize = sizeof(values); | 3498 | valuesize = sizeof(values); |
3464 | if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) | 3499 | if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) |
3465 | r = adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, &values[0], &valuesize); | 3500 | r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); |
3466 | else if (adev->pm.funcs && adev->pm.funcs->read_sensor) | ||
3467 | r = adev->pm.funcs->read_sensor(adev, idx, &values[0], | ||
3468 | &valuesize); | ||
3469 | else | 3501 | else |
3470 | return -EINVAL; | 3502 | return -EINVAL; |
3471 | 3503 | ||
@@ -3753,6 +3785,28 @@ int amdgpu_debugfs_init(struct drm_minor *minor) | |||
3753 | { | 3785 | { |
3754 | return 0; | 3786 | return 0; |
3755 | } | 3787 | } |
3788 | |||
3789 | static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data) | ||
3790 | { | ||
3791 | struct drm_info_node *node = (struct drm_info_node *) m->private; | ||
3792 | struct drm_device *dev = node->minor->dev; | ||
3793 | struct amdgpu_device *adev = dev->dev_private; | ||
3794 | |||
3795 | seq_write(m, adev->bios, adev->bios_size); | ||
3796 | return 0; | ||
3797 | } | ||
3798 | |||
3799 | static const struct drm_info_list amdgpu_vbios_dump_list[] = { | ||
3800 | {"amdgpu_vbios", | ||
3801 | amdgpu_debugfs_get_vbios_dump, | ||
3802 | 0, NULL}, | ||
3803 | }; | ||
3804 | |||
3805 | static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev) | ||
3806 | { | ||
3807 | return amdgpu_debugfs_add_files(adev, | ||
3808 | amdgpu_vbios_dump_list, 1); | ||
3809 | } | ||
3756 | #else | 3810 | #else |
3757 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) | 3811 | static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) |
3758 | { | 3812 | { |
@@ -3762,5 +3816,9 @@ static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) | |||
3762 | { | 3816 | { |
3763 | return 0; | 3817 | return 0; |
3764 | } | 3818 | } |
3819 | static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev) | ||
3820 | { | ||
3821 | return 0; | ||
3822 | } | ||
3765 | static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } | 3823 | static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } |
3766 | #endif | 3824 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 1cb52fd19060..e997ebbe43ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | |||
@@ -960,8 +960,10 @@ u8 amdgpu_encode_pci_lane_width(u32 lanes) | |||
960 | } | 960 | } |
961 | 961 | ||
962 | struct amd_vce_state* | 962 | struct amd_vce_state* |
963 | amdgpu_get_vce_clock_state(struct amdgpu_device *adev, unsigned idx) | 963 | amdgpu_get_vce_clock_state(void *handle, u32 idx) |
964 | { | 964 | { |
965 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
966 | |||
965 | if (idx < adev->pm.dpm.num_of_vce_states) | 967 | if (idx < adev->pm.dpm.num_of_vce_states) |
966 | return &adev->pm.dpm.vce_states[idx]; | 968 | return &adev->pm.dpm.vce_states[idx]; |
967 | 969 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 8c96a4caa715..f79f9ea58b17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | |||
@@ -241,177 +241,119 @@ enum amdgpu_pcie_gen { | |||
241 | AMDGPU_PCIE_GEN_INVALID = 0xffff | 241 | AMDGPU_PCIE_GEN_INVALID = 0xffff |
242 | }; | 242 | }; |
243 | 243 | ||
244 | struct amdgpu_dpm_funcs { | 244 | #define amdgpu_dpm_pre_set_power_state(adev) \ |
245 | int (*get_temperature)(struct amdgpu_device *adev); | 245 | ((adev)->powerplay.pp_funcs->pre_set_power_state((adev)->powerplay.pp_handle)) |
246 | int (*pre_set_power_state)(struct amdgpu_device *adev); | 246 | |
247 | int (*set_power_state)(struct amdgpu_device *adev); | 247 | #define amdgpu_dpm_set_power_state(adev) \ |
248 | void (*post_set_power_state)(struct amdgpu_device *adev); | 248 | ((adev)->powerplay.pp_funcs->set_power_state((adev)->powerplay.pp_handle)) |
249 | void (*display_configuration_changed)(struct amdgpu_device *adev); | 249 | |
250 | u32 (*get_sclk)(struct amdgpu_device *adev, bool low); | 250 | #define amdgpu_dpm_post_set_power_state(adev) \ |
251 | u32 (*get_mclk)(struct amdgpu_device *adev, bool low); | 251 | ((adev)->powerplay.pp_funcs->post_set_power_state((adev)->powerplay.pp_handle)) |
252 | void (*print_power_state)(struct amdgpu_device *adev, struct amdgpu_ps *ps); | 252 | |
253 | void (*debugfs_print_current_performance_level)(struct amdgpu_device *adev, struct seq_file *m); | 253 | #define amdgpu_dpm_display_configuration_changed(adev) \ |
254 | int (*force_performance_level)(struct amdgpu_device *adev, enum amd_dpm_forced_level level); | 254 | ((adev)->powerplay.pp_funcs->display_configuration_changed((adev)->powerplay.pp_handle)) |
255 | bool (*vblank_too_short)(struct amdgpu_device *adev); | ||
256 | void (*powergate_uvd)(struct amdgpu_device *adev, bool gate); | ||
257 | void (*powergate_vce)(struct amdgpu_device *adev, bool gate); | ||
258 | void (*enable_bapm)(struct amdgpu_device *adev, bool enable); | ||
259 | void (*set_fan_control_mode)(struct amdgpu_device *adev, u32 mode); | ||
260 | u32 (*get_fan_control_mode)(struct amdgpu_device *adev); | ||
261 | int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed); | ||
262 | int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed); | ||
263 | int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask); | ||
264 | int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf); | ||
265 | int (*get_sclk_od)(struct amdgpu_device *adev); | ||
266 | int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value); | ||
267 | int (*get_mclk_od)(struct amdgpu_device *adev); | ||
268 | int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value); | ||
269 | int (*check_state_equal)(struct amdgpu_device *adev, | ||
270 | struct amdgpu_ps *cps, | ||
271 | struct amdgpu_ps *rps, | ||
272 | bool *equal); | ||
273 | int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value, | ||
274 | int *size); | ||
275 | |||
276 | struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device *adev, unsigned idx); | ||
277 | int (*reset_power_profile_state)(struct amdgpu_device *adev, | ||
278 | struct amd_pp_profile *request); | ||
279 | int (*get_power_profile_state)(struct amdgpu_device *adev, | ||
280 | struct amd_pp_profile *query); | ||
281 | int (*set_power_profile_state)(struct amdgpu_device *adev, | ||
282 | struct amd_pp_profile *request); | ||
283 | int (*switch_power_profile)(struct amdgpu_device *adev, | ||
284 | enum amd_pp_profile_type type); | ||
285 | }; | ||
286 | 255 | ||
287 | #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) | 256 | #define amdgpu_dpm_print_power_state(adev, ps) \ |
288 | #define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev)) | 257 | ((adev)->powerplay.pp_funcs->print_power_state((adev)->powerplay.pp_handle, (ps))) |
289 | #define amdgpu_dpm_post_set_power_state(adev) (adev)->pm.funcs->post_set_power_state((adev)) | 258 | |
290 | #define amdgpu_dpm_display_configuration_changed(adev) (adev)->pm.funcs->display_configuration_changed((adev)) | 259 | #define amdgpu_dpm_vblank_too_short(adev) \ |
291 | #define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps)) | 260 | ((adev)->powerplay.pp_funcs->vblank_too_short((adev)->powerplay.pp_handle)) |
292 | #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev)) | 261 | |
293 | #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) | 262 | #define amdgpu_dpm_enable_bapm(adev, e) \ |
263 | ((adev)->powerplay.pp_funcs->enable_bapm((adev)->powerplay.pp_handle, (e))) | ||
294 | 264 | ||
295 | #define amdgpu_dpm_read_sensor(adev, idx, value, size) \ | 265 | #define amdgpu_dpm_read_sensor(adev, idx, value, size) \ |
296 | ((adev)->pp_enabled ? \ | 266 | ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size))) |
297 | (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value), (size)) : \ | ||
298 | (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size))) | ||
299 | 267 | ||
300 | #define amdgpu_dpm_get_temperature(adev) \ | 268 | #define amdgpu_dpm_get_temperature(adev) \ |
301 | ((adev)->pp_enabled ? \ | 269 | ((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle)) |
302 | (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \ | ||
303 | (adev)->pm.funcs->get_temperature((adev))) | ||
304 | 270 | ||
305 | #define amdgpu_dpm_set_fan_control_mode(adev, m) \ | 271 | #define amdgpu_dpm_set_fan_control_mode(adev, m) \ |
306 | ((adev)->pp_enabled ? \ | 272 | ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m))) |
307 | (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \ | ||
308 | (adev)->pm.funcs->set_fan_control_mode((adev), (m))) | ||
309 | 273 | ||
310 | #define amdgpu_dpm_get_fan_control_mode(adev) \ | 274 | #define amdgpu_dpm_get_fan_control_mode(adev) \ |
311 | ((adev)->pp_enabled ? \ | 275 | ((adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle)) |
312 | (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \ | ||
313 | (adev)->pm.funcs->get_fan_control_mode((adev))) | ||
314 | 276 | ||
315 | #define amdgpu_dpm_set_fan_speed_percent(adev, s) \ | 277 | #define amdgpu_dpm_set_fan_speed_percent(adev, s) \ |
316 | ((adev)->pp_enabled ? \ | 278 | ((adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s))) |
317 | (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \ | ||
318 | (adev)->pm.funcs->set_fan_speed_percent((adev), (s))) | ||
319 | 279 | ||
320 | #define amdgpu_dpm_get_fan_speed_percent(adev, s) \ | 280 | #define amdgpu_dpm_get_fan_speed_percent(adev, s) \ |
321 | ((adev)->pp_enabled ? \ | 281 | ((adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s))) |
322 | (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \ | ||
323 | (adev)->pm.funcs->get_fan_speed_percent((adev), (s))) | ||
324 | 282 | ||
325 | #define amdgpu_dpm_get_fan_speed_rpm(adev, s) \ | 283 | #define amdgpu_dpm_get_fan_speed_rpm(adev, s) \ |
326 | ((adev)->pp_enabled ? \ | 284 | ((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s)) |
327 | (adev)->powerplay.pp_funcs->get_fan_speed_rpm((adev)->powerplay.pp_handle, (s)) : \ | ||
328 | -EINVAL) | ||
329 | 285 | ||
330 | #define amdgpu_dpm_get_sclk(adev, l) \ | 286 | #define amdgpu_dpm_get_sclk(adev, l) \ |
331 | ((adev)->pp_enabled ? \ | 287 | ((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l))) |
332 | (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \ | ||
333 | (adev)->pm.funcs->get_sclk((adev), (l))) | ||
334 | 288 | ||
335 | #define amdgpu_dpm_get_mclk(adev, l) \ | 289 | #define amdgpu_dpm_get_mclk(adev, l) \ |
336 | ((adev)->pp_enabled ? \ | 290 | ((adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l))) |
337 | (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \ | ||
338 | (adev)->pm.funcs->get_mclk((adev), (l))) | ||
339 | |||
340 | 291 | ||
341 | #define amdgpu_dpm_force_performance_level(adev, l) \ | 292 | #define amdgpu_dpm_force_performance_level(adev, l) \ |
342 | ((adev)->pp_enabled ? \ | 293 | ((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l))) |
343 | (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \ | ||
344 | (adev)->pm.funcs->force_performance_level((adev), (l))) | ||
345 | 294 | ||
346 | #define amdgpu_dpm_powergate_uvd(adev, g) \ | 295 | #define amdgpu_dpm_powergate_uvd(adev, g) \ |
347 | ((adev)->pp_enabled ? \ | 296 | ((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g))) |
348 | (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \ | ||
349 | (adev)->pm.funcs->powergate_uvd((adev), (g))) | ||
350 | 297 | ||
351 | #define amdgpu_dpm_powergate_vce(adev, g) \ | 298 | #define amdgpu_dpm_powergate_vce(adev, g) \ |
352 | ((adev)->pp_enabled ? \ | 299 | ((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g))) |
353 | (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \ | ||
354 | (adev)->pm.funcs->powergate_vce((adev), (g))) | ||
355 | 300 | ||
356 | #define amdgpu_dpm_get_current_power_state(adev) \ | 301 | #define amdgpu_dpm_get_current_power_state(adev) \ |
357 | (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle) | 302 | ((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)) |
358 | 303 | ||
359 | #define amdgpu_dpm_get_pp_num_states(adev, data) \ | 304 | #define amdgpu_dpm_get_pp_num_states(adev, data) \ |
360 | (adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data) | 305 | ((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)) |
361 | 306 | ||
362 | #define amdgpu_dpm_get_pp_table(adev, table) \ | 307 | #define amdgpu_dpm_get_pp_table(adev, table) \ |
363 | (adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table) | 308 | ((adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table)) |
364 | 309 | ||
365 | #define amdgpu_dpm_set_pp_table(adev, buf, size) \ | 310 | #define amdgpu_dpm_set_pp_table(adev, buf, size) \ |
366 | (adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size) | 311 | ((adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size)) |
367 | 312 | ||
368 | #define amdgpu_dpm_print_clock_levels(adev, type, buf) \ | 313 | #define amdgpu_dpm_print_clock_levels(adev, type, buf) \ |
369 | (adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf) | 314 | ((adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf)) |
370 | 315 | ||
371 | #define amdgpu_dpm_force_clock_level(adev, type, level) \ | 316 | #define amdgpu_dpm_force_clock_level(adev, type, level) \ |
372 | (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level) | 317 | ((adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)) |
373 | 318 | ||
374 | #define amdgpu_dpm_get_sclk_od(adev) \ | 319 | #define amdgpu_dpm_get_sclk_od(adev) \ |
375 | (adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle) | 320 | ((adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)) |
376 | 321 | ||
377 | #define amdgpu_dpm_set_sclk_od(adev, value) \ | 322 | #define amdgpu_dpm_set_sclk_od(adev, value) \ |
378 | (adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value) | 323 | ((adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)) |
379 | 324 | ||
380 | #define amdgpu_dpm_get_mclk_od(adev) \ | 325 | #define amdgpu_dpm_get_mclk_od(adev) \ |
381 | ((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle)) | 326 | ((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle)) |
382 | 327 | ||
383 | #define amdgpu_dpm_set_mclk_od(adev, value) \ | 328 | #define amdgpu_dpm_set_mclk_od(adev, value) \ |
384 | ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value)) | 329 | ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value)) |
385 | 330 | ||
386 | #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ | 331 | #define amdgpu_dpm_dispatch_task(adev, task_id, input, output) \ |
387 | (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) | 332 | ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output)) |
388 | 333 | ||
389 | #define amgdpu_dpm_check_state_equal(adev, cps, rps, equal) (adev)->pm.funcs->check_state_equal((adev), (cps),(rps),(equal)) | 334 | #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \ |
335 | ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal))) | ||
390 | 336 | ||
391 | #define amdgpu_dpm_get_vce_clock_state(adev, i) \ | 337 | #define amdgpu_dpm_get_vce_clock_state(adev, i) \ |
392 | ((adev)->pp_enabled ? \ | 338 | ((adev)->powerplay.pp_funcs->get_vce_clock_state((adev)->powerplay.pp_handle, (i))) |
393 | (adev)->powerplay.pp_funcs->get_vce_clock_state((adev)->powerplay.pp_handle, (i)) : \ | ||
394 | (adev)->pm.funcs->get_vce_clock_state((adev), (i))) | ||
395 | 339 | ||
396 | #define amdgpu_dpm_get_performance_level(adev) \ | 340 | #define amdgpu_dpm_get_performance_level(adev) \ |
397 | ((adev)->pp_enabled ? \ | 341 | ((adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle)) |
398 | (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) : \ | ||
399 | (adev)->pm.dpm.forced_level) | ||
400 | 342 | ||
401 | #define amdgpu_dpm_reset_power_profile_state(adev, request) \ | 343 | #define amdgpu_dpm_reset_power_profile_state(adev, request) \ |
402 | ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ | 344 | ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ |
403 | (adev)->powerplay.pp_handle, request)) | 345 | (adev)->powerplay.pp_handle, request)) |
404 | 346 | ||
405 | #define amdgpu_dpm_get_power_profile_state(adev, query) \ | 347 | #define amdgpu_dpm_get_power_profile_state(adev, query) \ |
406 | ((adev)->powerplay.pp_funcs->get_power_profile_state(\ | 348 | ((adev)->powerplay.pp_funcs->get_power_profile_state(\ |
407 | (adev)->powerplay.pp_handle, query)) | 349 | (adev)->powerplay.pp_handle, query)) |
408 | 350 | ||
409 | #define amdgpu_dpm_set_power_profile_state(adev, request) \ | 351 | #define amdgpu_dpm_set_power_profile_state(adev, request) \ |
410 | ((adev)->powerplay.pp_funcs->set_power_profile_state(\ | 352 | ((adev)->powerplay.pp_funcs->set_power_profile_state(\ |
411 | (adev)->powerplay.pp_handle, request)) | 353 | (adev)->powerplay.pp_handle, request)) |
412 | 354 | ||
413 | #define amdgpu_dpm_switch_power_profile(adev, type) \ | 355 | #define amdgpu_dpm_switch_power_profile(adev, type) \ |
414 | ((adev)->powerplay.pp_funcs->switch_power_profile(\ | 356 | ((adev)->powerplay.pp_funcs->switch_power_profile(\ |
415 | (adev)->powerplay.pp_handle, type)) | 357 | (adev)->powerplay.pp_handle, type)) |
416 | 358 | ||
417 | struct amdgpu_dpm { | 359 | struct amdgpu_dpm { |
@@ -485,7 +427,6 @@ struct amdgpu_pm { | |||
485 | struct amdgpu_dpm dpm; | 427 | struct amdgpu_dpm dpm; |
486 | const struct firmware *fw; /* SMC firmware */ | 428 | const struct firmware *fw; /* SMC firmware */ |
487 | uint32_t fw_version; | 429 | uint32_t fw_version; |
488 | const struct amdgpu_dpm_funcs *funcs; | ||
489 | uint32_t pcie_gen_mask; | 430 | uint32_t pcie_gen_mask; |
490 | uint32_t pcie_mlw_mask; | 431 | uint32_t pcie_mlw_mask; |
491 | struct amd_pp_display_configuration pm_display_cfg;/* set by DAL */ | 432 | struct amd_pp_display_configuration pm_display_cfg;/* set by DAL */ |
@@ -551,6 +492,6 @@ u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev, | |||
551 | u8 amdgpu_encode_pci_lane_width(u32 lanes); | 492 | u8 amdgpu_encode_pci_lane_width(u32 lanes); |
552 | 493 | ||
553 | struct amd_vce_state* | 494 | struct amd_vce_state* |
554 | amdgpu_get_vce_clock_state(struct amdgpu_device *adev, unsigned idx); | 495 | amdgpu_get_vce_clock_state(void *handle, u32 idx); |
555 | 496 | ||
556 | #endif | 497 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5e9ce8a29669..4f98960e47f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -68,14 +68,16 @@ | |||
68 | * - 3.16.0 - Add reserved vmid support | 68 | * - 3.16.0 - Add reserved vmid support |
69 | * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. | 69 | * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. |
70 | * - 3.18.0 - Export gpu always on cu bitmap | 70 | * - 3.18.0 - Export gpu always on cu bitmap |
71 | * - 3.19.0 - Add support for UVD MJPEG decode | ||
72 | * - 3.20.0 - Add support for local BOs | ||
71 | */ | 73 | */ |
72 | #define KMS_DRIVER_MAJOR 3 | 74 | #define KMS_DRIVER_MAJOR 3 |
73 | #define KMS_DRIVER_MINOR 18 | 75 | #define KMS_DRIVER_MINOR 20 |
74 | #define KMS_DRIVER_PATCHLEVEL 0 | 76 | #define KMS_DRIVER_PATCHLEVEL 0 |
75 | 77 | ||
76 | int amdgpu_vram_limit = 0; | 78 | int amdgpu_vram_limit = 0; |
77 | int amdgpu_vis_vram_limit = 0; | 79 | int amdgpu_vis_vram_limit = 0; |
78 | unsigned amdgpu_gart_size = 256; | 80 | int amdgpu_gart_size = -1; /* auto */ |
79 | int amdgpu_gtt_size = -1; /* auto */ | 81 | int amdgpu_gtt_size = -1; /* auto */ |
80 | int amdgpu_moverate = -1; /* auto */ | 82 | int amdgpu_moverate = -1; /* auto */ |
81 | int amdgpu_benchmarking = 0; | 83 | int amdgpu_benchmarking = 0; |
@@ -90,10 +92,11 @@ int amdgpu_dpm = -1; | |||
90 | int amdgpu_fw_load_type = -1; | 92 | int amdgpu_fw_load_type = -1; |
91 | int amdgpu_aspm = -1; | 93 | int amdgpu_aspm = -1; |
92 | int amdgpu_runtime_pm = -1; | 94 | int amdgpu_runtime_pm = -1; |
93 | unsigned amdgpu_ip_block_mask = 0xffffffff; | 95 | uint amdgpu_ip_block_mask = 0xffffffff; |
94 | int amdgpu_bapm = -1; | 96 | int amdgpu_bapm = -1; |
95 | int amdgpu_deep_color = 0; | 97 | int amdgpu_deep_color = 0; |
96 | int amdgpu_vm_size = -1; | 98 | int amdgpu_vm_size = -1; |
99 | int amdgpu_vm_fragment_size = -1; | ||
97 | int amdgpu_vm_block_size = -1; | 100 | int amdgpu_vm_block_size = -1; |
98 | int amdgpu_vm_fault_stop = 0; | 101 | int amdgpu_vm_fault_stop = 0; |
99 | int amdgpu_vm_debug = 0; | 102 | int amdgpu_vm_debug = 0; |
@@ -104,14 +107,14 @@ int amdgpu_sched_jobs = 32; | |||
104 | int amdgpu_sched_hw_submission = 2; | 107 | int amdgpu_sched_hw_submission = 2; |
105 | int amdgpu_no_evict = 0; | 108 | int amdgpu_no_evict = 0; |
106 | int amdgpu_direct_gma_size = 0; | 109 | int amdgpu_direct_gma_size = 0; |
107 | unsigned amdgpu_pcie_gen_cap = 0; | 110 | uint amdgpu_pcie_gen_cap = 0; |
108 | unsigned amdgpu_pcie_lane_cap = 0; | 111 | uint amdgpu_pcie_lane_cap = 0; |
109 | unsigned amdgpu_cg_mask = 0xffffffff; | 112 | uint amdgpu_cg_mask = 0xffffffff; |
110 | unsigned amdgpu_pg_mask = 0xffffffff; | 113 | uint amdgpu_pg_mask = 0xffffffff; |
111 | unsigned amdgpu_sdma_phase_quantum = 32; | 114 | uint amdgpu_sdma_phase_quantum = 32; |
112 | char *amdgpu_disable_cu = NULL; | 115 | char *amdgpu_disable_cu = NULL; |
113 | char *amdgpu_virtual_display = NULL; | 116 | char *amdgpu_virtual_display = NULL; |
114 | unsigned amdgpu_pp_feature_mask = 0xffffffff; | 117 | uint amdgpu_pp_feature_mask = 0xffffffff; |
115 | int amdgpu_ngg = 0; | 118 | int amdgpu_ngg = 0; |
116 | int amdgpu_prim_buf_per_se = 0; | 119 | int amdgpu_prim_buf_per_se = 0; |
117 | int amdgpu_pos_buf_per_se = 0; | 120 | int amdgpu_pos_buf_per_se = 0; |
@@ -126,7 +129,7 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); | |||
126 | MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes"); | 129 | MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes"); |
127 | module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); | 130 | module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); |
128 | 131 | ||
129 | MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)"); | 132 | MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); |
130 | module_param_named(gartsize, amdgpu_gart_size, uint, 0600); | 133 | module_param_named(gartsize, amdgpu_gart_size, uint, 0600); |
131 | 134 | ||
132 | MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); | 135 | MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); |
@@ -183,6 +186,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444); | |||
183 | MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); | 186 | MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); |
184 | module_param_named(vm_size, amdgpu_vm_size, int, 0444); | 187 | module_param_named(vm_size, amdgpu_vm_size, int, 0444); |
185 | 188 | ||
189 | MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)"); | ||
190 | module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444); | ||
191 | |||
186 | MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); | 192 | MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); |
187 | module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); | 193 | module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); |
188 | 194 | ||
@@ -603,6 +609,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) | |||
603 | 609 | ||
604 | drm_dev_unregister(dev); | 610 | drm_dev_unregister(dev); |
605 | drm_dev_unref(dev); | 611 | drm_dev_unref(dev); |
612 | pci_disable_device(pdev); | ||
613 | pci_set_drvdata(pdev, NULL); | ||
606 | } | 614 | } |
607 | 615 | ||
608 | static void | 616 | static void |
@@ -847,6 +855,7 @@ static struct drm_driver kms_driver = { | |||
847 | .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, | 855 | .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, |
848 | .gem_prime_vmap = amdgpu_gem_prime_vmap, | 856 | .gem_prime_vmap = amdgpu_gem_prime_vmap, |
849 | .gem_prime_vunmap = amdgpu_gem_prime_vunmap, | 857 | .gem_prime_vunmap = amdgpu_gem_prime_vunmap, |
858 | .gem_prime_mmap = amdgpu_gem_prime_mmap, | ||
850 | 859 | ||
851 | .name = DRIVER_NAME, | 860 | .name = DRIVER_NAME, |
852 | .desc = DRIVER_DESC, | 861 | .desc = DRIVER_DESC, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9afa9c097e1f..562930b17a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | |||
@@ -149,7 +149,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, | |||
149 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | 149 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
150 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | 150 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | |
151 | AMDGPU_GEM_CREATE_VRAM_CLEARED, | 151 | AMDGPU_GEM_CREATE_VRAM_CLEARED, |
152 | true, &gobj); | 152 | true, NULL, &gobj); |
153 | if (ret) { | 153 | if (ret) { |
154 | pr_err("failed to allocate framebuffer (%d)\n", aligned_size); | 154 | pr_err("failed to allocate framebuffer (%d)\n", aligned_size); |
155 | return -ENOMEM; | 155 | return -ENOMEM; |
@@ -303,10 +303,10 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb | |||
303 | if (rfb->obj) { | 303 | if (rfb->obj) { |
304 | amdgpufb_destroy_pinned_object(rfb->obj); | 304 | amdgpufb_destroy_pinned_object(rfb->obj); |
305 | rfb->obj = NULL; | 305 | rfb->obj = NULL; |
306 | drm_framebuffer_unregister_private(&rfb->base); | ||
307 | drm_framebuffer_cleanup(&rfb->base); | ||
306 | } | 308 | } |
307 | drm_fb_helper_fini(&rfbdev->helper); | 309 | drm_fb_helper_fini(&rfbdev->helper); |
308 | drm_framebuffer_unregister_private(&rfb->base); | ||
309 | drm_framebuffer_cleanup(&rfb->base); | ||
310 | 310 | ||
311 | return 0; | 311 | return 0; |
312 | } | 312 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 94c1e2e8e34c..f4370081f6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | |||
@@ -57,18 +57,6 @@ | |||
57 | */ | 57 | */ |
58 | 58 | ||
59 | /** | 59 | /** |
60 | * amdgpu_gart_set_defaults - set the default gart_size | ||
61 | * | ||
62 | * @adev: amdgpu_device pointer | ||
63 | * | ||
64 | * Set the default gart_size based on parameters and available VRAM. | ||
65 | */ | ||
66 | void amdgpu_gart_set_defaults(struct amdgpu_device *adev) | ||
67 | { | ||
68 | adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20; | ||
69 | } | ||
70 | |||
71 | /** | ||
72 | * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table | 60 | * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table |
73 | * | 61 | * |
74 | * @adev: amdgpu_device pointer | 62 | * @adev: amdgpu_device pointer |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index d4cce6936200..afbe803b1a13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | |||
@@ -56,7 +56,6 @@ struct amdgpu_gart { | |||
56 | const struct amdgpu_gart_funcs *gart_funcs; | 56 | const struct amdgpu_gart_funcs *gart_funcs; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | void amdgpu_gart_set_defaults(struct amdgpu_device *adev); | ||
60 | int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); | 59 | int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); |
61 | void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); | 60 | void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); |
62 | int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); | 61 | int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 81127ffcefb2..b0d45c8e6bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
@@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) | |||
44 | } | 44 | } |
45 | 45 | ||
46 | int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, | 46 | int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, |
47 | int alignment, u32 initial_domain, | 47 | int alignment, u32 initial_domain, |
48 | u64 flags, bool kernel, | 48 | u64 flags, bool kernel, |
49 | struct drm_gem_object **obj) | 49 | struct reservation_object *resv, |
50 | struct drm_gem_object **obj) | ||
50 | { | 51 | { |
51 | struct amdgpu_bo *robj; | 52 | struct amdgpu_bo *bo; |
52 | int r; | 53 | int r; |
53 | 54 | ||
54 | *obj = NULL; | 55 | *obj = NULL; |
@@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, | |||
59 | 60 | ||
60 | retry: | 61 | retry: |
61 | r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, | 62 | r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, |
62 | flags, NULL, NULL, 0, &robj); | 63 | flags, NULL, resv, 0, &bo); |
63 | if (r) { | 64 | if (r) { |
64 | if (r != -ERESTARTSYS) { | 65 | if (r != -ERESTARTSYS) { |
65 | if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { | 66 | if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { |
@@ -71,7 +72,7 @@ retry: | |||
71 | } | 72 | } |
72 | return r; | 73 | return r; |
73 | } | 74 | } |
74 | *obj = &robj->gem_base; | 75 | *obj = &bo->gem_base; |
75 | 76 | ||
76 | return 0; | 77 | return 0; |
77 | } | 78 | } |
@@ -112,7 +113,17 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, | |||
112 | struct amdgpu_fpriv *fpriv = file_priv->driver_priv; | 113 | struct amdgpu_fpriv *fpriv = file_priv->driver_priv; |
113 | struct amdgpu_vm *vm = &fpriv->vm; | 114 | struct amdgpu_vm *vm = &fpriv->vm; |
114 | struct amdgpu_bo_va *bo_va; | 115 | struct amdgpu_bo_va *bo_va; |
116 | struct mm_struct *mm; | ||
115 | int r; | 117 | int r; |
118 | |||
119 | mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm); | ||
120 | if (mm && mm != current->mm) | ||
121 | return -EPERM; | ||
122 | |||
123 | if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && | ||
124 | abo->tbo.resv != vm->root.base.bo->tbo.resv) | ||
125 | return -EPERM; | ||
126 | |||
116 | r = amdgpu_bo_reserve(abo, false); | 127 | r = amdgpu_bo_reserve(abo, false); |
117 | if (r) | 128 | if (r) |
118 | return r; | 129 | return r; |
@@ -127,35 +138,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, | |||
127 | return 0; | 138 | return 0; |
128 | } | 139 | } |
129 | 140 | ||
130 | static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo) | ||
131 | { | ||
132 | /* if anything is swapped out don't swap it in here, | ||
133 | just abort and wait for the next CS */ | ||
134 | if (!amdgpu_bo_gpu_accessible(bo)) | ||
135 | return -ERESTARTSYS; | ||
136 | |||
137 | if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) | ||
138 | return -ERESTARTSYS; | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev, | ||
144 | struct amdgpu_vm *vm, | ||
145 | struct list_head *list) | ||
146 | { | ||
147 | struct ttm_validate_buffer *entry; | ||
148 | |||
149 | list_for_each_entry(entry, list, head) { | ||
150 | struct amdgpu_bo *bo = | ||
151 | container_of(entry->bo, struct amdgpu_bo, tbo); | ||
152 | if (amdgpu_gem_vm_check(NULL, bo)) | ||
153 | return false; | ||
154 | } | ||
155 | |||
156 | return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL); | ||
157 | } | ||
158 | |||
159 | void amdgpu_gem_object_close(struct drm_gem_object *obj, | 141 | void amdgpu_gem_object_close(struct drm_gem_object *obj, |
160 | struct drm_file *file_priv) | 142 | struct drm_file *file_priv) |
161 | { | 143 | { |
@@ -165,13 +147,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, | |||
165 | struct amdgpu_vm *vm = &fpriv->vm; | 147 | struct amdgpu_vm *vm = &fpriv->vm; |
166 | 148 | ||
167 | struct amdgpu_bo_list_entry vm_pd; | 149 | struct amdgpu_bo_list_entry vm_pd; |
168 | struct list_head list; | 150 | struct list_head list, duplicates; |
169 | struct ttm_validate_buffer tv; | 151 | struct ttm_validate_buffer tv; |
170 | struct ww_acquire_ctx ticket; | 152 | struct ww_acquire_ctx ticket; |
171 | struct amdgpu_bo_va *bo_va; | 153 | struct amdgpu_bo_va *bo_va; |
172 | int r; | 154 | int r; |
173 | 155 | ||
174 | INIT_LIST_HEAD(&list); | 156 | INIT_LIST_HEAD(&list); |
157 | INIT_LIST_HEAD(&duplicates); | ||
175 | 158 | ||
176 | tv.bo = &bo->tbo; | 159 | tv.bo = &bo->tbo; |
177 | tv.shared = true; | 160 | tv.shared = true; |
@@ -179,7 +162,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, | |||
179 | 162 | ||
180 | amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); | 163 | amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); |
181 | 164 | ||
182 | r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); | 165 | r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); |
183 | if (r) { | 166 | if (r) { |
184 | dev_err(adev->dev, "leaking bo va because " | 167 | dev_err(adev->dev, "leaking bo va because " |
185 | "we fail to reserve bo (%d)\n", r); | 168 | "we fail to reserve bo (%d)\n", r); |
@@ -189,7 +172,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, | |||
189 | if (bo_va && --bo_va->ref_count == 0) { | 172 | if (bo_va && --bo_va->ref_count == 0) { |
190 | amdgpu_vm_bo_rmv(adev, bo_va); | 173 | amdgpu_vm_bo_rmv(adev, bo_va); |
191 | 174 | ||
192 | if (amdgpu_gem_vm_ready(adev, vm, &list)) { | 175 | if (amdgpu_vm_ready(vm)) { |
193 | struct dma_fence *fence = NULL; | 176 | struct dma_fence *fence = NULL; |
194 | 177 | ||
195 | r = amdgpu_vm_clear_freed(adev, vm, &fence); | 178 | r = amdgpu_vm_clear_freed(adev, vm, &fence); |
@@ -214,20 +197,22 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
214 | struct drm_file *filp) | 197 | struct drm_file *filp) |
215 | { | 198 | { |
216 | struct amdgpu_device *adev = dev->dev_private; | 199 | struct amdgpu_device *adev = dev->dev_private; |
200 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
201 | struct amdgpu_vm *vm = &fpriv->vm; | ||
217 | union drm_amdgpu_gem_create *args = data; | 202 | union drm_amdgpu_gem_create *args = data; |
203 | uint64_t flags = args->in.domain_flags; | ||
218 | uint64_t size = args->in.bo_size; | 204 | uint64_t size = args->in.bo_size; |
205 | struct reservation_object *resv = NULL; | ||
219 | struct drm_gem_object *gobj; | 206 | struct drm_gem_object *gobj; |
220 | uint32_t handle; | 207 | uint32_t handle; |
221 | bool kernel = false; | ||
222 | int r; | 208 | int r; |
223 | 209 | ||
224 | /* reject invalid gem flags */ | 210 | /* reject invalid gem flags */ |
225 | if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | 211 | if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
226 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 212 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
227 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | | 213 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
228 | AMDGPU_GEM_CREATE_VRAM_CLEARED| | 214 | AMDGPU_GEM_CREATE_VRAM_CLEARED | |
229 | AMDGPU_GEM_CREATE_SHADOW | | 215 | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) |
230 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) | ||
231 | return -EINVAL; | 216 | return -EINVAL; |
232 | 217 | ||
233 | /* reject invalid gem domains */ | 218 | /* reject invalid gem domains */ |
@@ -242,7 +227,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
242 | /* create a gem object to contain this object in */ | 227 | /* create a gem object to contain this object in */ |
243 | if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | | 228 | if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | |
244 | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { | 229 | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { |
245 | kernel = true; | 230 | flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; |
246 | if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) | 231 | if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) |
247 | size = size << AMDGPU_GDS_SHIFT; | 232 | size = size << AMDGPU_GDS_SHIFT; |
248 | else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) | 233 | else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) |
@@ -254,10 +239,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
254 | } | 239 | } |
255 | size = roundup(size, PAGE_SIZE); | 240 | size = roundup(size, PAGE_SIZE); |
256 | 241 | ||
242 | if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { | ||
243 | r = amdgpu_bo_reserve(vm->root.base.bo, false); | ||
244 | if (r) | ||
245 | return r; | ||
246 | |||
247 | resv = vm->root.base.bo->tbo.resv; | ||
248 | } | ||
249 | |||
257 | r = amdgpu_gem_object_create(adev, size, args->in.alignment, | 250 | r = amdgpu_gem_object_create(adev, size, args->in.alignment, |
258 | (u32)(0xffffffff & args->in.domains), | 251 | (u32)(0xffffffff & args->in.domains), |
259 | args->in.domain_flags, | 252 | flags, false, resv, &gobj); |
260 | kernel, &gobj); | 253 | if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { |
254 | if (!r) { | ||
255 | struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); | ||
256 | |||
257 | abo->parent = amdgpu_bo_ref(vm->root.base.bo); | ||
258 | } | ||
259 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
260 | } | ||
261 | if (r) | 261 | if (r) |
262 | return r; | 262 | return r; |
263 | 263 | ||
@@ -299,9 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
299 | } | 299 | } |
300 | 300 | ||
301 | /* create a gem object to contain this object in */ | 301 | /* create a gem object to contain this object in */ |
302 | r = amdgpu_gem_object_create(adev, args->size, 0, | 302 | r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, |
303 | AMDGPU_GEM_DOMAIN_CPU, 0, | 303 | 0, 0, NULL, &gobj); |
304 | 0, &gobj); | ||
305 | if (r) | 304 | if (r) |
306 | return r; | 305 | return r; |
307 | 306 | ||
@@ -319,8 +318,6 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
319 | } | 318 | } |
320 | 319 | ||
321 | if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { | 320 | if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { |
322 | down_read(¤t->mm->mmap_sem); | ||
323 | |||
324 | r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, | 321 | r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, |
325 | bo->tbo.ttm->pages); | 322 | bo->tbo.ttm->pages); |
326 | if (r) | 323 | if (r) |
@@ -335,8 +332,6 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
335 | amdgpu_bo_unreserve(bo); | 332 | amdgpu_bo_unreserve(bo); |
336 | if (r) | 333 | if (r) |
337 | goto free_pages; | 334 | goto free_pages; |
338 | |||
339 | up_read(¤t->mm->mmap_sem); | ||
340 | } | 335 | } |
341 | 336 | ||
342 | r = drm_gem_handle_create(filp, gobj, &handle); | 337 | r = drm_gem_handle_create(filp, gobj, &handle); |
@@ -513,10 +508,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, | |||
513 | struct list_head *list, | 508 | struct list_head *list, |
514 | uint32_t operation) | 509 | uint32_t operation) |
515 | { | 510 | { |
516 | int r = -ERESTARTSYS; | 511 | int r; |
517 | 512 | ||
518 | if (!amdgpu_gem_vm_ready(adev, vm, list)) | 513 | if (!amdgpu_vm_ready(vm)) |
519 | goto error; | 514 | return; |
520 | 515 | ||
521 | r = amdgpu_vm_update_directories(adev, vm); | 516 | r = amdgpu_vm_update_directories(adev, vm); |
522 | if (r) | 517 | if (r) |
@@ -553,7 +548,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
553 | struct amdgpu_bo_list_entry vm_pd; | 548 | struct amdgpu_bo_list_entry vm_pd; |
554 | struct ttm_validate_buffer tv; | 549 | struct ttm_validate_buffer tv; |
555 | struct ww_acquire_ctx ticket; | 550 | struct ww_acquire_ctx ticket; |
556 | struct list_head list; | 551 | struct list_head list, duplicates; |
557 | uint64_t va_flags; | 552 | uint64_t va_flags; |
558 | int r = 0; | 553 | int r = 0; |
559 | 554 | ||
@@ -589,6 +584,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
589 | } | 584 | } |
590 | 585 | ||
591 | INIT_LIST_HEAD(&list); | 586 | INIT_LIST_HEAD(&list); |
587 | INIT_LIST_HEAD(&duplicates); | ||
592 | if ((args->operation != AMDGPU_VA_OP_CLEAR) && | 588 | if ((args->operation != AMDGPU_VA_OP_CLEAR) && |
593 | !(args->flags & AMDGPU_VM_PAGE_PRT)) { | 589 | !(args->flags & AMDGPU_VM_PAGE_PRT)) { |
594 | gobj = drm_gem_object_lookup(filp, args->handle); | 590 | gobj = drm_gem_object_lookup(filp, args->handle); |
@@ -605,7 +601,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
605 | 601 | ||
606 | amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); | 602 | amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); |
607 | 603 | ||
608 | r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); | 604 | r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); |
609 | if (r) | 605 | if (r) |
610 | goto error_unref; | 606 | goto error_unref; |
611 | 607 | ||
@@ -623,7 +619,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
623 | 619 | ||
624 | switch (args->operation) { | 620 | switch (args->operation) { |
625 | case AMDGPU_VA_OP_MAP: | 621 | case AMDGPU_VA_OP_MAP: |
626 | r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, | 622 | r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, |
627 | args->map_size); | 623 | args->map_size); |
628 | if (r) | 624 | if (r) |
629 | goto error_backoff; | 625 | goto error_backoff; |
@@ -643,7 +639,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
643 | args->map_size); | 639 | args->map_size); |
644 | break; | 640 | break; |
645 | case AMDGPU_VA_OP_REPLACE: | 641 | case AMDGPU_VA_OP_REPLACE: |
646 | r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, | 642 | r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, |
647 | args->map_size); | 643 | args->map_size); |
648 | if (r) | 644 | if (r) |
649 | goto error_backoff; | 645 | goto error_backoff; |
@@ -671,6 +667,7 @@ error_unref: | |||
671 | int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, | 667 | int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, |
672 | struct drm_file *filp) | 668 | struct drm_file *filp) |
673 | { | 669 | { |
670 | struct amdgpu_device *adev = dev->dev_private; | ||
674 | struct drm_amdgpu_gem_op *args = data; | 671 | struct drm_amdgpu_gem_op *args = data; |
675 | struct drm_gem_object *gobj; | 672 | struct drm_gem_object *gobj; |
676 | struct amdgpu_bo *robj; | 673 | struct amdgpu_bo *robj; |
@@ -718,6 +715,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, | |||
718 | if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) | 715 | if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) |
719 | robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; | 716 | robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; |
720 | 717 | ||
718 | if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) | ||
719 | amdgpu_vm_bo_invalidate(adev, robj, true); | ||
720 | |||
721 | amdgpu_bo_unreserve(robj); | 721 | amdgpu_bo_unreserve(robj); |
722 | break; | 722 | break; |
723 | default: | 723 | default: |
@@ -747,8 +747,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, | |||
747 | r = amdgpu_gem_object_create(adev, args->size, 0, | 747 | r = amdgpu_gem_object_create(adev, args->size, 0, |
748 | AMDGPU_GEM_DOMAIN_VRAM, | 748 | AMDGPU_GEM_DOMAIN_VRAM, |
749 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, | 749 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, |
750 | ttm_bo_type_device, | 750 | false, NULL, &gobj); |
751 | &gobj); | ||
752 | if (r) | 751 | if (r) |
753 | return -ENOMEM; | 752 | return -ENOMEM; |
754 | 753 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 4f6c68fc1dd9..4fcd98e65998 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | |||
@@ -260,8 +260,13 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, | |||
260 | /* create MQD for KIQ */ | 260 | /* create MQD for KIQ */ |
261 | ring = &adev->gfx.kiq.ring; | 261 | ring = &adev->gfx.kiq.ring; |
262 | if (!ring->mqd_obj) { | 262 | if (!ring->mqd_obj) { |
263 | /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must | ||
264 | * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD | ||
265 | * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for | ||
266 | * KIQ MQD no matter SRIOV or Bare-metal | ||
267 | */ | ||
263 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, | 268 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, |
264 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | 269 | AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj, |
265 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | 270 | &ring->mqd_gpu_addr, &ring->mqd_ptr); |
266 | if (r) { | 271 | if (r) { |
267 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | 272 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 5e6b90c6794f..0d15eb7d31d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | |||
@@ -28,7 +28,7 @@ | |||
28 | struct amdgpu_gtt_mgr { | 28 | struct amdgpu_gtt_mgr { |
29 | struct drm_mm mm; | 29 | struct drm_mm mm; |
30 | spinlock_t lock; | 30 | spinlock_t lock; |
31 | uint64_t available; | 31 | atomic64_t available; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | /** | 34 | /** |
@@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, | |||
54 | size = (adev->mc.gart_size >> PAGE_SHIFT) - start; | 54 | size = (adev->mc.gart_size >> PAGE_SHIFT) - start; |
55 | drm_mm_init(&mgr->mm, start, size); | 55 | drm_mm_init(&mgr->mm, start, size); |
56 | spin_lock_init(&mgr->lock); | 56 | spin_lock_init(&mgr->lock); |
57 | mgr->available = p_size; | 57 | atomic64_set(&mgr->available, p_size); |
58 | man->priv = mgr; | 58 | man->priv = mgr; |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
@@ -108,10 +108,10 @@ bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem) | |||
108 | * | 108 | * |
109 | * Allocate the address space for a node. | 109 | * Allocate the address space for a node. |
110 | */ | 110 | */ |
111 | int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, | 111 | static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, |
112 | struct ttm_buffer_object *tbo, | 112 | struct ttm_buffer_object *tbo, |
113 | const struct ttm_place *place, | 113 | const struct ttm_place *place, |
114 | struct ttm_mem_reg *mem) | 114 | struct ttm_mem_reg *mem) |
115 | { | 115 | { |
116 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | 116 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); |
117 | struct amdgpu_gtt_mgr *mgr = man->priv; | 117 | struct amdgpu_gtt_mgr *mgr = man->priv; |
@@ -143,25 +143,12 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, | |||
143 | fpfn, lpfn, mode); | 143 | fpfn, lpfn, mode); |
144 | spin_unlock(&mgr->lock); | 144 | spin_unlock(&mgr->lock); |
145 | 145 | ||
146 | if (!r) { | 146 | if (!r) |
147 | mem->start = node->start; | 147 | mem->start = node->start; |
148 | if (&tbo->mem == mem) | ||
149 | tbo->offset = (tbo->mem.start << PAGE_SHIFT) + | ||
150 | tbo->bdev->man[tbo->mem.mem_type].gpu_offset; | ||
151 | } | ||
152 | 148 | ||
153 | return r; | 149 | return r; |
154 | } | 150 | } |
155 | 151 | ||
156 | void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man) | ||
157 | { | ||
158 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | ||
159 | struct amdgpu_gtt_mgr *mgr = man->priv; | ||
160 | |||
161 | seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", | ||
162 | man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20); | ||
163 | |||
164 | } | ||
165 | /** | 152 | /** |
166 | * amdgpu_gtt_mgr_new - allocate a new node | 153 | * amdgpu_gtt_mgr_new - allocate a new node |
167 | * | 154 | * |
@@ -182,11 +169,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, | |||
182 | int r; | 169 | int r; |
183 | 170 | ||
184 | spin_lock(&mgr->lock); | 171 | spin_lock(&mgr->lock); |
185 | if (mgr->available < mem->num_pages) { | 172 | if (atomic64_read(&mgr->available) < mem->num_pages) { |
186 | spin_unlock(&mgr->lock); | 173 | spin_unlock(&mgr->lock); |
187 | return 0; | 174 | return 0; |
188 | } | 175 | } |
189 | mgr->available -= mem->num_pages; | 176 | atomic64_sub(mem->num_pages, &mgr->available); |
190 | spin_unlock(&mgr->lock); | 177 | spin_unlock(&mgr->lock); |
191 | 178 | ||
192 | node = kzalloc(sizeof(*node), GFP_KERNEL); | 179 | node = kzalloc(sizeof(*node), GFP_KERNEL); |
@@ -213,9 +200,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, | |||
213 | 200 | ||
214 | return 0; | 201 | return 0; |
215 | err_out: | 202 | err_out: |
216 | spin_lock(&mgr->lock); | 203 | atomic64_add(mem->num_pages, &mgr->available); |
217 | mgr->available += mem->num_pages; | ||
218 | spin_unlock(&mgr->lock); | ||
219 | 204 | ||
220 | return r; | 205 | return r; |
221 | } | 206 | } |
@@ -242,30 +227,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, | |||
242 | spin_lock(&mgr->lock); | 227 | spin_lock(&mgr->lock); |
243 | if (node->start != AMDGPU_BO_INVALID_OFFSET) | 228 | if (node->start != AMDGPU_BO_INVALID_OFFSET) |
244 | drm_mm_remove_node(node); | 229 | drm_mm_remove_node(node); |
245 | mgr->available += mem->num_pages; | ||
246 | spin_unlock(&mgr->lock); | 230 | spin_unlock(&mgr->lock); |
231 | atomic64_add(mem->num_pages, &mgr->available); | ||
247 | 232 | ||
248 | kfree(node); | 233 | kfree(node); |
249 | mem->mm_node = NULL; | 234 | mem->mm_node = NULL; |
250 | } | 235 | } |
251 | 236 | ||
252 | /** | 237 | /** |
238 | * amdgpu_gtt_mgr_usage - return usage of GTT domain | ||
239 | * | ||
240 | * @man: TTM memory type manager | ||
241 | * | ||
242 | * Return how many bytes are used in the GTT domain | ||
243 | */ | ||
244 | uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) | ||
245 | { | ||
246 | struct amdgpu_gtt_mgr *mgr = man->priv; | ||
247 | |||
248 | return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; | ||
249 | } | ||
250 | |||
251 | /** | ||
253 | * amdgpu_gtt_mgr_debug - dump VRAM table | 252 | * amdgpu_gtt_mgr_debug - dump VRAM table |
254 | * | 253 | * |
255 | * @man: TTM memory type manager | 254 | * @man: TTM memory type manager |
256 | * @prefix: text prefix | 255 | * @printer: DRM printer to use |
257 | * | 256 | * |
258 | * Dump the table content using printk. | 257 | * Dump the table content using printk. |
259 | */ | 258 | */ |
260 | static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, | 259 | static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, |
261 | const char *prefix) | 260 | struct drm_printer *printer) |
262 | { | 261 | { |
263 | struct amdgpu_gtt_mgr *mgr = man->priv; | 262 | struct amdgpu_gtt_mgr *mgr = man->priv; |
264 | struct drm_printer p = drm_debug_printer(prefix); | ||
265 | 263 | ||
266 | spin_lock(&mgr->lock); | 264 | spin_lock(&mgr->lock); |
267 | drm_mm_print(&mgr->mm, &p); | 265 | drm_mm_print(&mgr->mm, printer); |
268 | spin_unlock(&mgr->lock); | 266 | spin_unlock(&mgr->lock); |
267 | |||
268 | drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", | ||
269 | man->size, (u64)atomic64_read(&mgr->available), | ||
270 | amdgpu_gtt_mgr_usage(man) >> 20); | ||
269 | } | 271 | } |
270 | 272 | ||
271 | const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { | 273 | const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 3ab4c65ecc8b..f5f27e4f0f7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | |||
@@ -169,6 +169,12 @@ restart_ih: | |||
169 | while (adev->irq.ih.rptr != wptr) { | 169 | while (adev->irq.ih.rptr != wptr) { |
170 | u32 ring_index = adev->irq.ih.rptr >> 2; | 170 | u32 ring_index = adev->irq.ih.rptr >> 2; |
171 | 171 | ||
172 | /* Prescreening of high-frequency interrupts */ | ||
173 | if (!amdgpu_ih_prescreen_iv(adev)) { | ||
174 | adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; | ||
175 | continue; | ||
176 | } | ||
177 | |||
172 | /* Before dispatching irq to IP blocks, send it to amdkfd */ | 178 | /* Before dispatching irq to IP blocks, send it to amdkfd */ |
173 | amdgpu_amdkfd_interrupt(adev, | 179 | amdgpu_amdkfd_interrupt(adev, |
174 | (const void *) &adev->irq.ih.ring[ring_index]); | 180 | (const void *) &adev->irq.ih.ring[ring_index]); |
@@ -190,3 +196,79 @@ restart_ih: | |||
190 | 196 | ||
191 | return IRQ_HANDLED; | 197 | return IRQ_HANDLED; |
192 | } | 198 | } |
199 | |||
200 | /** | ||
201 | * amdgpu_ih_add_fault - Add a page fault record | ||
202 | * | ||
203 | * @adev: amdgpu device pointer | ||
204 | * @key: 64-bit encoding of PASID and address | ||
205 | * | ||
206 | * This should be called when a retry page fault interrupt is | ||
207 | * received. If this is a new page fault, it will be added to a hash | ||
208 | * table. The return value indicates whether this is a new fault, or | ||
209 | * a fault that was already known and is already being handled. | ||
210 | * | ||
211 | * If there are too many pending page faults, this will fail. Retry | ||
212 | * interrupts should be ignored in this case until there is enough | ||
213 | * free space. | ||
214 | * | ||
215 | * Returns 0 if the fault was added, 1 if the fault was already known, | ||
216 | * -ENOSPC if there are too many pending faults. | ||
217 | */ | ||
218 | int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key) | ||
219 | { | ||
220 | unsigned long flags; | ||
221 | int r = -ENOSPC; | ||
222 | |||
223 | if (WARN_ON_ONCE(!adev->irq.ih.faults)) | ||
224 | /* Should be allocated in <IP>_ih_sw_init on GPUs that | ||
225 | * support retry faults and require retry filtering. | ||
226 | */ | ||
227 | return r; | ||
228 | |||
229 | spin_lock_irqsave(&adev->irq.ih.faults->lock, flags); | ||
230 | |||
231 | /* Only let the hash table fill up to 50% for best performance */ | ||
232 | if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) | ||
233 | goto unlock_out; | ||
234 | |||
235 | r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL); | ||
236 | if (!r) | ||
237 | adev->irq.ih.faults->count++; | ||
238 | |||
239 | /* chash_table_copy_in should never fail unless we're losing count */ | ||
240 | WARN_ON_ONCE(r < 0); | ||
241 | |||
242 | unlock_out: | ||
243 | spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags); | ||
244 | return r; | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * amdgpu_ih_clear_fault - Remove a page fault record | ||
249 | * | ||
250 | * @adev: amdgpu device pointer | ||
251 | * @key: 64-bit encoding of PASID and address | ||
252 | * | ||
253 | * This should be called when a page fault has been handled. Any | ||
254 | * future interrupt with this key will be processed as a new | ||
255 | * page fault. | ||
256 | */ | ||
257 | void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key) | ||
258 | { | ||
259 | unsigned long flags; | ||
260 | int r; | ||
261 | |||
262 | if (!adev->irq.ih.faults) | ||
263 | return; | ||
264 | |||
265 | spin_lock_irqsave(&adev->irq.ih.faults->lock, flags); | ||
266 | |||
267 | r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL); | ||
268 | if (!WARN_ON_ONCE(r < 0)) { | ||
269 | adev->irq.ih.faults->count--; | ||
270 | WARN_ON_ONCE(adev->irq.ih.faults->count < 0); | ||
271 | } | ||
272 | |||
273 | spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags); | ||
274 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 3de8e74e5b3a..ada89358e220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | |||
@@ -24,6 +24,8 @@ | |||
24 | #ifndef __AMDGPU_IH_H__ | 24 | #ifndef __AMDGPU_IH_H__ |
25 | #define __AMDGPU_IH_H__ | 25 | #define __AMDGPU_IH_H__ |
26 | 26 | ||
27 | #include <linux/chash.h> | ||
28 | |||
27 | struct amdgpu_device; | 29 | struct amdgpu_device; |
28 | /* | 30 | /* |
29 | * vega10+ IH clients | 31 | * vega10+ IH clients |
@@ -69,6 +71,13 @@ enum amdgpu_ih_clientid | |||
69 | 71 | ||
70 | #define AMDGPU_IH_CLIENTID_LEGACY 0 | 72 | #define AMDGPU_IH_CLIENTID_LEGACY 0 |
71 | 73 | ||
74 | #define AMDGPU_PAGEFAULT_HASH_BITS 8 | ||
75 | struct amdgpu_retryfault_hashtable { | ||
76 | DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); | ||
77 | spinlock_t lock; | ||
78 | int count; | ||
79 | }; | ||
80 | |||
72 | /* | 81 | /* |
73 | * R6xx+ IH ring | 82 | * R6xx+ IH ring |
74 | */ | 83 | */ |
@@ -87,6 +96,7 @@ struct amdgpu_ih_ring { | |||
87 | bool use_doorbell; | 96 | bool use_doorbell; |
88 | bool use_bus_addr; | 97 | bool use_bus_addr; |
89 | dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ | 98 | dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ |
99 | struct amdgpu_retryfault_hashtable *faults; | ||
90 | }; | 100 | }; |
91 | 101 | ||
92 | #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 | 102 | #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 |
@@ -109,5 +119,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, | |||
109 | bool use_bus_addr); | 119 | bool use_bus_addr); |
110 | void amdgpu_ih_ring_fini(struct amdgpu_device *adev); | 120 | void amdgpu_ih_ring_fini(struct amdgpu_device *adev); |
111 | int amdgpu_ih_process(struct amdgpu_device *adev); | 121 | int amdgpu_ih_process(struct amdgpu_device *adev); |
122 | int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key); | ||
123 | void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key); | ||
112 | 124 | ||
113 | #endif | 125 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 4bdd851f56d0..538e5f27d120 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | |||
@@ -221,8 +221,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev) | |||
221 | 221 | ||
222 | spin_lock_init(&adev->irq.lock); | 222 | spin_lock_init(&adev->irq.lock); |
223 | 223 | ||
224 | /* Disable vblank irqs aggressively for power-saving */ | 224 | if (!adev->enable_virtual_display) |
225 | adev->ddev->vblank_disable_immediate = true; | 225 | /* Disable vblank irqs aggressively for power-saving */ |
226 | adev->ddev->vblank_disable_immediate = true; | ||
226 | 227 | ||
227 | r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc); | 228 | r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc); |
228 | if (r) { | 229 | if (r) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index c908f972283c..4fd06f8d9768 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
@@ -455,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
455 | ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); | 455 | ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); |
456 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 456 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
457 | case AMDGPU_INFO_VRAM_USAGE: | 457 | case AMDGPU_INFO_VRAM_USAGE: |
458 | ui64 = atomic64_read(&adev->vram_usage); | 458 | ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
459 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 459 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
460 | case AMDGPU_INFO_VIS_VRAM_USAGE: | 460 | case AMDGPU_INFO_VIS_VRAM_USAGE: |
461 | ui64 = atomic64_read(&adev->vram_vis_usage); | 461 | ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
462 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 462 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
463 | case AMDGPU_INFO_GTT_USAGE: | 463 | case AMDGPU_INFO_GTT_USAGE: |
464 | ui64 = atomic64_read(&adev->gtt_usage); | 464 | ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); |
465 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 465 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
466 | case AMDGPU_INFO_GDS_CONFIG: { | 466 | case AMDGPU_INFO_GDS_CONFIG: { |
467 | struct drm_amdgpu_info_gds gds_info; | 467 | struct drm_amdgpu_info_gds gds_info; |
@@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
497 | mem.vram.total_heap_size = adev->mc.real_vram_size; | 497 | mem.vram.total_heap_size = adev->mc.real_vram_size; |
498 | mem.vram.usable_heap_size = | 498 | mem.vram.usable_heap_size = |
499 | adev->mc.real_vram_size - adev->vram_pin_size; | 499 | adev->mc.real_vram_size - adev->vram_pin_size; |
500 | mem.vram.heap_usage = atomic64_read(&adev->vram_usage); | 500 | mem.vram.heap_usage = |
501 | amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | ||
501 | mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; | 502 | mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; |
502 | 503 | ||
503 | mem.cpu_accessible_vram.total_heap_size = | 504 | mem.cpu_accessible_vram.total_heap_size = |
@@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
506 | adev->mc.visible_vram_size - | 507 | adev->mc.visible_vram_size - |
507 | (adev->vram_pin_size - adev->invisible_pin_size); | 508 | (adev->vram_pin_size - adev->invisible_pin_size); |
508 | mem.cpu_accessible_vram.heap_usage = | 509 | mem.cpu_accessible_vram.heap_usage = |
509 | atomic64_read(&adev->vram_vis_usage); | 510 | amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
510 | mem.cpu_accessible_vram.max_allocation = | 511 | mem.cpu_accessible_vram.max_allocation = |
511 | mem.cpu_accessible_vram.usable_heap_size * 3 / 4; | 512 | mem.cpu_accessible_vram.usable_heap_size * 3 / 4; |
512 | 513 | ||
@@ -514,7 +515,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
514 | mem.gtt.total_heap_size *= PAGE_SIZE; | 515 | mem.gtt.total_heap_size *= PAGE_SIZE; |
515 | mem.gtt.usable_heap_size = mem.gtt.total_heap_size | 516 | mem.gtt.usable_heap_size = mem.gtt.total_heap_size |
516 | - adev->gart_pin_size; | 517 | - adev->gart_pin_size; |
517 | mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); | 518 | mem.gtt.heap_usage = |
519 | amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); | ||
518 | mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; | 520 | mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; |
519 | 521 | ||
520 | return copy_to_user(out, &mem, | 522 | return copy_to_user(out, &mem, |
@@ -588,11 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
588 | dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; | 590 | dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; |
589 | dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; | 591 | dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; |
590 | dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); | 592 | dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); |
591 | dev_info.pte_fragment_size = | 593 | dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; |
592 | (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) * | ||
593 | AMDGPU_GPU_PAGE_SIZE; | ||
594 | dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; | 594 | dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; |
595 | |||
596 | dev_info.cu_active_number = adev->gfx.cu_info.number; | 595 | dev_info.cu_active_number = adev->gfx.cu_info.number; |
597 | dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; | 596 | dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; |
598 | dev_info.ce_ram_size = adev->gfx.ce_ram_size; | 597 | dev_info.ce_ram_size = adev->gfx.ce_ram_size; |
@@ -826,7 +825,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
826 | } | 825 | } |
827 | 826 | ||
828 | r = amdgpu_vm_init(adev, &fpriv->vm, | 827 | r = amdgpu_vm_init(adev, &fpriv->vm, |
829 | AMDGPU_VM_CONTEXT_GFX); | 828 | AMDGPU_VM_CONTEXT_GFX, 0); |
830 | if (r) { | 829 | if (r) { |
831 | kfree(fpriv); | 830 | kfree(fpriv); |
832 | goto out_suspend; | 831 | goto out_suspend; |
@@ -841,9 +840,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
841 | } | 840 | } |
842 | 841 | ||
843 | if (amdgpu_sriov_vf(adev)) { | 842 | if (amdgpu_sriov_vf(adev)) { |
844 | r = amdgpu_map_static_csa(adev, &fpriv->vm); | 843 | r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); |
845 | if (r) | 844 | if (r) { |
845 | amdgpu_vm_fini(adev, &fpriv->vm); | ||
846 | kfree(fpriv); | ||
846 | goto out_suspend; | 847 | goto out_suspend; |
848 | } | ||
847 | } | 849 | } |
848 | 850 | ||
849 | mutex_init(&fpriv->bo_list_lock); | 851 | mutex_init(&fpriv->bo_list_lock); |
@@ -894,8 +896,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
894 | if (amdgpu_sriov_vf(adev)) { | 896 | if (amdgpu_sriov_vf(adev)) { |
895 | /* TODO: how to handle reserve failure */ | 897 | /* TODO: how to handle reserve failure */ |
896 | BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); | 898 | BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); |
897 | amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); | 899 | amdgpu_vm_bo_rmv(adev, fpriv->csa_va); |
898 | fpriv->vm.csa_bo_va = NULL; | 900 | fpriv->csa_va = NULL; |
899 | amdgpu_bo_unreserve(adev->virt.csa_obj); | 901 | amdgpu_bo_unreserve(adev->virt.csa_obj); |
900 | } | 902 | } |
901 | 903 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 6558a3ed57a7..bd67f4cb8e6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | |||
@@ -50,8 +50,10 @@ struct amdgpu_mn { | |||
50 | struct hlist_node node; | 50 | struct hlist_node node; |
51 | 51 | ||
52 | /* objects protected by lock */ | 52 | /* objects protected by lock */ |
53 | struct mutex lock; | 53 | struct rw_semaphore lock; |
54 | struct rb_root objects; | 54 | struct rb_root_cached objects; |
55 | struct mutex read_lock; | ||
56 | atomic_t recursion; | ||
55 | }; | 57 | }; |
56 | 58 | ||
57 | struct amdgpu_mn_node { | 59 | struct amdgpu_mn_node { |
@@ -74,17 +76,17 @@ static void amdgpu_mn_destroy(struct work_struct *work) | |||
74 | struct amdgpu_bo *bo, *next_bo; | 76 | struct amdgpu_bo *bo, *next_bo; |
75 | 77 | ||
76 | mutex_lock(&adev->mn_lock); | 78 | mutex_lock(&adev->mn_lock); |
77 | mutex_lock(&rmn->lock); | 79 | down_write(&rmn->lock); |
78 | hash_del(&rmn->node); | 80 | hash_del(&rmn->node); |
79 | rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, | 81 | rbtree_postorder_for_each_entry_safe(node, next_node, |
80 | it.rb) { | 82 | &rmn->objects.rb_root, it.rb) { |
81 | list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { | 83 | list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { |
82 | bo->mn = NULL; | 84 | bo->mn = NULL; |
83 | list_del_init(&bo->mn_list); | 85 | list_del_init(&bo->mn_list); |
84 | } | 86 | } |
85 | kfree(node); | 87 | kfree(node); |
86 | } | 88 | } |
87 | mutex_unlock(&rmn->lock); | 89 | up_write(&rmn->lock); |
88 | mutex_unlock(&adev->mn_lock); | 90 | mutex_unlock(&adev->mn_lock); |
89 | mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); | 91 | mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); |
90 | kfree(rmn); | 92 | kfree(rmn); |
@@ -106,6 +108,53 @@ static void amdgpu_mn_release(struct mmu_notifier *mn, | |||
106 | schedule_work(&rmn->work); | 108 | schedule_work(&rmn->work); |
107 | } | 109 | } |
108 | 110 | ||
111 | |||
112 | /** | ||
113 | * amdgpu_mn_lock - take the write side lock for this mn | ||
114 | */ | ||
115 | void amdgpu_mn_lock(struct amdgpu_mn *mn) | ||
116 | { | ||
117 | if (mn) | ||
118 | down_write(&mn->lock); | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * amdgpu_mn_unlock - drop the write side lock for this mn | ||
123 | */ | ||
124 | void amdgpu_mn_unlock(struct amdgpu_mn *mn) | ||
125 | { | ||
126 | if (mn) | ||
127 | up_write(&mn->lock); | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * amdgpu_mn_read_lock - take the rmn read lock | ||
132 | * | ||
133 | * @rmn: our notifier | ||
134 | * | ||
135 | * Take the rmn read side lock. | ||
136 | */ | ||
137 | static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn) | ||
138 | { | ||
139 | mutex_lock(&rmn->read_lock); | ||
140 | if (atomic_inc_return(&rmn->recursion) == 1) | ||
141 | down_read_non_owner(&rmn->lock); | ||
142 | mutex_unlock(&rmn->read_lock); | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * amdgpu_mn_read_unlock - drop the rmn read lock | ||
147 | * | ||
148 | * @rmn: our notifier | ||
149 | * | ||
150 | * Drop the rmn read side lock. | ||
151 | */ | ||
152 | static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn) | ||
153 | { | ||
154 | if (atomic_dec_return(&rmn->recursion) == 0) | ||
155 | up_read_non_owner(&rmn->lock); | ||
156 | } | ||
157 | |||
109 | /** | 158 | /** |
110 | * amdgpu_mn_invalidate_node - unmap all BOs of a node | 159 | * amdgpu_mn_invalidate_node - unmap all BOs of a node |
111 | * | 160 | * |
@@ -126,54 +175,13 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, | |||
126 | if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) | 175 | if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) |
127 | continue; | 176 | continue; |
128 | 177 | ||
129 | r = amdgpu_bo_reserve(bo, true); | ||
130 | if (r) { | ||
131 | DRM_ERROR("(%ld) failed to reserve user bo\n", r); | ||
132 | continue; | ||
133 | } | ||
134 | |||
135 | r = reservation_object_wait_timeout_rcu(bo->tbo.resv, | 178 | r = reservation_object_wait_timeout_rcu(bo->tbo.resv, |
136 | true, false, MAX_SCHEDULE_TIMEOUT); | 179 | true, false, MAX_SCHEDULE_TIMEOUT); |
137 | if (r <= 0) | 180 | if (r <= 0) |
138 | DRM_ERROR("(%ld) failed to wait for user bo\n", r); | 181 | DRM_ERROR("(%ld) failed to wait for user bo\n", r); |
139 | 182 | ||
140 | amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); | 183 | amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); |
141 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); | ||
142 | if (r) | ||
143 | DRM_ERROR("(%ld) failed to validate user bo\n", r); | ||
144 | |||
145 | amdgpu_bo_unreserve(bo); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * amdgpu_mn_invalidate_page - callback to notify about mm change | ||
151 | * | ||
152 | * @mn: our notifier | ||
153 | * @mn: the mm this callback is about | ||
154 | * @address: address of invalidate page | ||
155 | * | ||
156 | * Invalidation of a single page. Blocks for all BOs mapping it | ||
157 | * and unmap them by move them into system domain again. | ||
158 | */ | ||
159 | static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn, | ||
160 | struct mm_struct *mm, | ||
161 | unsigned long address) | ||
162 | { | ||
163 | struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); | ||
164 | struct interval_tree_node *it; | ||
165 | |||
166 | mutex_lock(&rmn->lock); | ||
167 | |||
168 | it = interval_tree_iter_first(&rmn->objects, address, address); | ||
169 | if (it) { | ||
170 | struct amdgpu_mn_node *node; | ||
171 | |||
172 | node = container_of(it, struct amdgpu_mn_node, it); | ||
173 | amdgpu_mn_invalidate_node(node, address, address); | ||
174 | } | 184 | } |
175 | |||
176 | mutex_unlock(&rmn->lock); | ||
177 | } | 185 | } |
178 | 186 | ||
179 | /** | 187 | /** |
@@ -198,7 +206,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | |||
198 | /* notification is exclusive, but interval is inclusive */ | 206 | /* notification is exclusive, but interval is inclusive */ |
199 | end -= 1; | 207 | end -= 1; |
200 | 208 | ||
201 | mutex_lock(&rmn->lock); | 209 | amdgpu_mn_read_lock(rmn); |
202 | 210 | ||
203 | it = interval_tree_iter_first(&rmn->objects, start, end); | 211 | it = interval_tree_iter_first(&rmn->objects, start, end); |
204 | while (it) { | 212 | while (it) { |
@@ -209,14 +217,32 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | |||
209 | 217 | ||
210 | amdgpu_mn_invalidate_node(node, start, end); | 218 | amdgpu_mn_invalidate_node(node, start, end); |
211 | } | 219 | } |
220 | } | ||
212 | 221 | ||
213 | mutex_unlock(&rmn->lock); | 222 | /** |
223 | * amdgpu_mn_invalidate_range_end - callback to notify about mm change | ||
224 | * | ||
225 | * @mn: our notifier | ||
226 | * @mn: the mm this callback is about | ||
227 | * @start: start of updated range | ||
228 | * @end: end of updated range | ||
229 | * | ||
230 | * Release the lock again to allow new command submissions. | ||
231 | */ | ||
232 | static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, | ||
233 | struct mm_struct *mm, | ||
234 | unsigned long start, | ||
235 | unsigned long end) | ||
236 | { | ||
237 | struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); | ||
238 | |||
239 | amdgpu_mn_read_unlock(rmn); | ||
214 | } | 240 | } |
215 | 241 | ||
216 | static const struct mmu_notifier_ops amdgpu_mn_ops = { | 242 | static const struct mmu_notifier_ops amdgpu_mn_ops = { |
217 | .release = amdgpu_mn_release, | 243 | .release = amdgpu_mn_release, |
218 | .invalidate_page = amdgpu_mn_invalidate_page, | ||
219 | .invalidate_range_start = amdgpu_mn_invalidate_range_start, | 244 | .invalidate_range_start = amdgpu_mn_invalidate_range_start, |
245 | .invalidate_range_end = amdgpu_mn_invalidate_range_end, | ||
220 | }; | 246 | }; |
221 | 247 | ||
222 | /** | 248 | /** |
@@ -226,7 +252,7 @@ static const struct mmu_notifier_ops amdgpu_mn_ops = { | |||
226 | * | 252 | * |
227 | * Creates a notifier context for current->mm. | 253 | * Creates a notifier context for current->mm. |
228 | */ | 254 | */ |
229 | static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | 255 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) |
230 | { | 256 | { |
231 | struct mm_struct *mm = current->mm; | 257 | struct mm_struct *mm = current->mm; |
232 | struct amdgpu_mn *rmn; | 258 | struct amdgpu_mn *rmn; |
@@ -251,8 +277,10 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
251 | rmn->adev = adev; | 277 | rmn->adev = adev; |
252 | rmn->mm = mm; | 278 | rmn->mm = mm; |
253 | rmn->mn.ops = &amdgpu_mn_ops; | 279 | rmn->mn.ops = &amdgpu_mn_ops; |
254 | mutex_init(&rmn->lock); | 280 | init_rwsem(&rmn->lock); |
255 | rmn->objects = RB_ROOT; | 281 | rmn->objects = RB_ROOT_CACHED; |
282 | mutex_init(&rmn->read_lock); | ||
283 | atomic_set(&rmn->recursion, 0); | ||
256 | 284 | ||
257 | r = __mmu_notifier_register(&rmn->mn, mm); | 285 | r = __mmu_notifier_register(&rmn->mn, mm); |
258 | if (r) | 286 | if (r) |
@@ -298,7 +326,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
298 | 326 | ||
299 | INIT_LIST_HEAD(&bos); | 327 | INIT_LIST_HEAD(&bos); |
300 | 328 | ||
301 | mutex_lock(&rmn->lock); | 329 | down_write(&rmn->lock); |
302 | 330 | ||
303 | while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { | 331 | while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { |
304 | kfree(node); | 332 | kfree(node); |
@@ -312,7 +340,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
312 | if (!node) { | 340 | if (!node) { |
313 | node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); | 341 | node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); |
314 | if (!node) { | 342 | if (!node) { |
315 | mutex_unlock(&rmn->lock); | 343 | up_write(&rmn->lock); |
316 | return -ENOMEM; | 344 | return -ENOMEM; |
317 | } | 345 | } |
318 | } | 346 | } |
@@ -327,7 +355,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
327 | 355 | ||
328 | interval_tree_insert(&node->it, &rmn->objects); | 356 | interval_tree_insert(&node->it, &rmn->objects); |
329 | 357 | ||
330 | mutex_unlock(&rmn->lock); | 358 | up_write(&rmn->lock); |
331 | 359 | ||
332 | return 0; | 360 | return 0; |
333 | } | 361 | } |
@@ -353,7 +381,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) | |||
353 | return; | 381 | return; |
354 | } | 382 | } |
355 | 383 | ||
356 | mutex_lock(&rmn->lock); | 384 | down_write(&rmn->lock); |
357 | 385 | ||
358 | /* save the next list entry for later */ | 386 | /* save the next list entry for later */ |
359 | head = bo->mn_list.next; | 387 | head = bo->mn_list.next; |
@@ -368,6 +396,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) | |||
368 | kfree(node); | 396 | kfree(node); |
369 | } | 397 | } |
370 | 398 | ||
371 | mutex_unlock(&rmn->lock); | 399 | up_write(&rmn->lock); |
372 | mutex_unlock(&adev->mn_lock); | 400 | mutex_unlock(&adev->mn_lock); |
373 | } | 401 | } |
402 | |||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h new file mode 100644 index 000000000000..d0095a3793b8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * Copyright 2017 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Authors: Christian König | ||
23 | */ | ||
24 | #ifndef __AMDGPU_MN_H__ | ||
25 | #define __AMDGPU_MN_H__ | ||
26 | |||
27 | /* | ||
28 | * MMU Notifier | ||
29 | */ | ||
30 | struct amdgpu_mn; | ||
31 | |||
32 | #if defined(CONFIG_MMU_NOTIFIER) | ||
33 | void amdgpu_mn_lock(struct amdgpu_mn *mn); | ||
34 | void amdgpu_mn_unlock(struct amdgpu_mn *mn); | ||
35 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); | ||
36 | int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); | ||
37 | void amdgpu_mn_unregister(struct amdgpu_bo *bo); | ||
38 | #else | ||
39 | static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} | ||
40 | static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} | ||
41 | static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | ||
42 | { | ||
43 | return NULL; | ||
44 | } | ||
45 | static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | ||
46 | { | ||
47 | return -ENODEV; | ||
48 | } | ||
49 | static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} | ||
50 | #endif | ||
51 | |||
52 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6e72fe7901ec..6982baeccd14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
@@ -37,55 +37,6 @@ | |||
37 | #include "amdgpu.h" | 37 | #include "amdgpu.h" |
38 | #include "amdgpu_trace.h" | 38 | #include "amdgpu_trace.h" |
39 | 39 | ||
40 | |||
41 | |||
42 | static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, | ||
43 | struct ttm_mem_reg *mem) | ||
44 | { | ||
45 | if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) | ||
46 | return 0; | ||
47 | |||
48 | return ((mem->start << PAGE_SHIFT) + mem->size) > | ||
49 | adev->mc.visible_vram_size ? | ||
50 | adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : | ||
51 | mem->size; | ||
52 | } | ||
53 | |||
54 | static void amdgpu_update_memory_usage(struct amdgpu_device *adev, | ||
55 | struct ttm_mem_reg *old_mem, | ||
56 | struct ttm_mem_reg *new_mem) | ||
57 | { | ||
58 | u64 vis_size; | ||
59 | if (!adev) | ||
60 | return; | ||
61 | |||
62 | if (new_mem) { | ||
63 | switch (new_mem->mem_type) { | ||
64 | case TTM_PL_TT: | ||
65 | atomic64_add(new_mem->size, &adev->gtt_usage); | ||
66 | break; | ||
67 | case TTM_PL_VRAM: | ||
68 | atomic64_add(new_mem->size, &adev->vram_usage); | ||
69 | vis_size = amdgpu_get_vis_part_size(adev, new_mem); | ||
70 | atomic64_add(vis_size, &adev->vram_vis_usage); | ||
71 | break; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | if (old_mem) { | ||
76 | switch (old_mem->mem_type) { | ||
77 | case TTM_PL_TT: | ||
78 | atomic64_sub(old_mem->size, &adev->gtt_usage); | ||
79 | break; | ||
80 | case TTM_PL_VRAM: | ||
81 | atomic64_sub(old_mem->size, &adev->vram_usage); | ||
82 | vis_size = amdgpu_get_vis_part_size(adev, old_mem); | ||
83 | atomic64_sub(vis_size, &adev->vram_vis_usage); | ||
84 | break; | ||
85 | } | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | 40 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) |
90 | { | 41 | { |
91 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); | 42 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); |
@@ -94,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | |||
94 | bo = container_of(tbo, struct amdgpu_bo, tbo); | 45 | bo = container_of(tbo, struct amdgpu_bo, tbo); |
95 | 46 | ||
96 | amdgpu_bo_kunmap(bo); | 47 | amdgpu_bo_kunmap(bo); |
97 | amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); | ||
98 | 48 | ||
99 | drm_gem_object_release(&bo->gem_base); | 49 | drm_gem_object_release(&bo->gem_base); |
100 | amdgpu_bo_unref(&bo->parent); | 50 | amdgpu_bo_unref(&bo->parent); |
@@ -114,11 +64,12 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) | |||
114 | return false; | 64 | return false; |
115 | } | 65 | } |
116 | 66 | ||
117 | static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, | 67 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) |
118 | struct ttm_placement *placement, | ||
119 | struct ttm_place *places, | ||
120 | u32 domain, u64 flags) | ||
121 | { | 68 | { |
69 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); | ||
70 | struct ttm_placement *placement = &abo->placement; | ||
71 | struct ttm_place *places = abo->placements; | ||
72 | u64 flags = abo->flags; | ||
122 | u32 c = 0; | 73 | u32 c = 0; |
123 | 74 | ||
124 | if (domain & AMDGPU_GEM_DOMAIN_VRAM) { | 75 | if (domain & AMDGPU_GEM_DOMAIN_VRAM) { |
@@ -141,7 +92,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, | |||
141 | 92 | ||
142 | if (domain & AMDGPU_GEM_DOMAIN_GTT) { | 93 | if (domain & AMDGPU_GEM_DOMAIN_GTT) { |
143 | places[c].fpfn = 0; | 94 | places[c].fpfn = 0; |
144 | places[c].lpfn = 0; | 95 | if (flags & AMDGPU_GEM_CREATE_SHADOW) |
96 | places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; | ||
97 | else | ||
98 | places[c].lpfn = 0; | ||
145 | places[c].flags = TTM_PL_FLAG_TT; | 99 | places[c].flags = TTM_PL_FLAG_TT; |
146 | if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) | 100 | if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) |
147 | places[c].flags |= TTM_PL_FLAG_WC | | 101 | places[c].flags |= TTM_PL_FLAG_WC | |
@@ -198,27 +152,6 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, | |||
198 | placement->busy_placement = places; | 152 | placement->busy_placement = places; |
199 | } | 153 | } |
200 | 154 | ||
201 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) | ||
202 | { | ||
203 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); | ||
204 | |||
205 | amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements, | ||
206 | domain, abo->flags); | ||
207 | } | ||
208 | |||
209 | static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, | ||
210 | struct ttm_placement *placement) | ||
211 | { | ||
212 | BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1)); | ||
213 | |||
214 | memcpy(bo->placements, placement->placement, | ||
215 | placement->num_placement * sizeof(struct ttm_place)); | ||
216 | bo->placement.num_placement = placement->num_placement; | ||
217 | bo->placement.num_busy_placement = placement->num_busy_placement; | ||
218 | bo->placement.placement = bo->placements; | ||
219 | bo->placement.busy_placement = bo->placements; | ||
220 | } | ||
221 | |||
222 | /** | 155 | /** |
223 | * amdgpu_bo_create_reserved - create reserved BO for kernel use | 156 | * amdgpu_bo_create_reserved - create reserved BO for kernel use |
224 | * | 157 | * |
@@ -350,14 +283,13 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, | |||
350 | *cpu_addr = NULL; | 283 | *cpu_addr = NULL; |
351 | } | 284 | } |
352 | 285 | ||
353 | int amdgpu_bo_create_restricted(struct amdgpu_device *adev, | 286 | static int amdgpu_bo_do_create(struct amdgpu_device *adev, |
354 | unsigned long size, int byte_align, | 287 | unsigned long size, int byte_align, |
355 | bool kernel, u32 domain, u64 flags, | 288 | bool kernel, u32 domain, u64 flags, |
356 | struct sg_table *sg, | 289 | struct sg_table *sg, |
357 | struct ttm_placement *placement, | 290 | struct reservation_object *resv, |
358 | struct reservation_object *resv, | 291 | uint64_t init_value, |
359 | uint64_t init_value, | 292 | struct amdgpu_bo **bo_ptr) |
360 | struct amdgpu_bo **bo_ptr) | ||
361 | { | 293 | { |
362 | struct amdgpu_bo *bo; | 294 | struct amdgpu_bo *bo; |
363 | enum ttm_bo_type type; | 295 | enum ttm_bo_type type; |
@@ -431,10 +363,11 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, | |||
431 | bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; | 363 | bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; |
432 | #endif | 364 | #endif |
433 | 365 | ||
434 | amdgpu_fill_placement_to_bo(bo, placement); | 366 | bo->tbo.bdev = &adev->mman.bdev; |
435 | /* Kernel allocation are uninterruptible */ | 367 | amdgpu_ttm_placement_from_domain(bo, domain); |
436 | 368 | ||
437 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); | 369 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); |
370 | /* Kernel allocation are uninterruptible */ | ||
438 | r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, | 371 | r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, |
439 | &bo->placement, page_align, !kernel, NULL, | 372 | &bo->placement, page_align, !kernel, NULL, |
440 | acc_size, sg, resv, &amdgpu_ttm_bo_destroy); | 373 | acc_size, sg, resv, &amdgpu_ttm_bo_destroy); |
@@ -489,28 +422,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, | |||
489 | unsigned long size, int byte_align, | 422 | unsigned long size, int byte_align, |
490 | struct amdgpu_bo *bo) | 423 | struct amdgpu_bo *bo) |
491 | { | 424 | { |
492 | struct ttm_placement placement = {0}; | ||
493 | struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; | ||
494 | int r; | 425 | int r; |
495 | 426 | ||
496 | if (bo->shadow) | 427 | if (bo->shadow) |
497 | return 0; | 428 | return 0; |
498 | 429 | ||
499 | bo->flags |= AMDGPU_GEM_CREATE_SHADOW; | 430 | r = amdgpu_bo_do_create(adev, size, byte_align, true, |
500 | memset(&placements, 0, | 431 | AMDGPU_GEM_DOMAIN_GTT, |
501 | (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); | 432 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
502 | 433 | AMDGPU_GEM_CREATE_SHADOW, | |
503 | amdgpu_ttm_placement_init(adev, &placement, | 434 | NULL, bo->tbo.resv, 0, |
504 | placements, AMDGPU_GEM_DOMAIN_GTT, | 435 | &bo->shadow); |
505 | AMDGPU_GEM_CREATE_CPU_GTT_USWC); | ||
506 | |||
507 | r = amdgpu_bo_create_restricted(adev, size, byte_align, true, | ||
508 | AMDGPU_GEM_DOMAIN_GTT, | ||
509 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, | ||
510 | NULL, &placement, | ||
511 | bo->tbo.resv, | ||
512 | 0, | ||
513 | &bo->shadow); | ||
514 | if (!r) { | 436 | if (!r) { |
515 | bo->shadow->parent = amdgpu_bo_ref(bo); | 437 | bo->shadow->parent = amdgpu_bo_ref(bo); |
516 | mutex_lock(&adev->shadow_list_lock); | 438 | mutex_lock(&adev->shadow_list_lock); |
@@ -532,32 +454,23 @@ int amdgpu_bo_create(struct amdgpu_device *adev, | |||
532 | uint64_t init_value, | 454 | uint64_t init_value, |
533 | struct amdgpu_bo **bo_ptr) | 455 | struct amdgpu_bo **bo_ptr) |
534 | { | 456 | { |
535 | struct ttm_placement placement = {0}; | 457 | uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; |
536 | struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; | ||
537 | int r; | 458 | int r; |
538 | 459 | ||
539 | memset(&placements, 0, | 460 | r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, |
540 | (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); | 461 | parent_flags, sg, resv, init_value, bo_ptr); |
541 | |||
542 | amdgpu_ttm_placement_init(adev, &placement, | ||
543 | placements, domain, flags); | ||
544 | |||
545 | r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, | ||
546 | domain, flags, sg, &placement, | ||
547 | resv, init_value, bo_ptr); | ||
548 | if (r) | 462 | if (r) |
549 | return r; | 463 | return r; |
550 | 464 | ||
551 | if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) { | 465 | if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { |
552 | if (!resv) { | 466 | if (!resv) |
553 | r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL); | 467 | WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, |
554 | WARN_ON(r != 0); | 468 | NULL)); |
555 | } | ||
556 | 469 | ||
557 | r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); | 470 | r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); |
558 | 471 | ||
559 | if (!resv) | 472 | if (!resv) |
560 | ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock); | 473 | reservation_object_unlock((*bo_ptr)->tbo.resv); |
561 | 474 | ||
562 | if (r) | 475 | if (r) |
563 | amdgpu_bo_unref(bo_ptr); | 476 | amdgpu_bo_unref(bo_ptr); |
@@ -722,7 +635,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, | |||
722 | { | 635 | { |
723 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 636 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
724 | int r, i; | 637 | int r, i; |
725 | unsigned fpfn, lpfn; | ||
726 | 638 | ||
727 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) | 639 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) |
728 | return -EPERM; | 640 | return -EPERM; |
@@ -754,22 +666,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, | |||
754 | } | 666 | } |
755 | 667 | ||
756 | bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | 668 | bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
669 | /* force to pin into visible video ram */ | ||
670 | if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) | ||
671 | bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | ||
757 | amdgpu_ttm_placement_from_domain(bo, domain); | 672 | amdgpu_ttm_placement_from_domain(bo, domain); |
758 | for (i = 0; i < bo->placement.num_placement; i++) { | 673 | for (i = 0; i < bo->placement.num_placement; i++) { |
759 | /* force to pin into visible video ram */ | 674 | unsigned fpfn, lpfn; |
760 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && | 675 | |
761 | !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) && | 676 | fpfn = min_offset >> PAGE_SHIFT; |
762 | (!max_offset || max_offset > | 677 | lpfn = max_offset >> PAGE_SHIFT; |
763 | adev->mc.visible_vram_size)) { | 678 | |
764 | if (WARN_ON_ONCE(min_offset > | ||
765 | adev->mc.visible_vram_size)) | ||
766 | return -EINVAL; | ||
767 | fpfn = min_offset >> PAGE_SHIFT; | ||
768 | lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; | ||
769 | } else { | ||
770 | fpfn = min_offset >> PAGE_SHIFT; | ||
771 | lpfn = max_offset >> PAGE_SHIFT; | ||
772 | } | ||
773 | if (fpfn > bo->placements[i].fpfn) | 679 | if (fpfn > bo->placements[i].fpfn) |
774 | bo->placements[i].fpfn = fpfn; | 680 | bo->placements[i].fpfn = fpfn; |
775 | if (!bo->placements[i].lpfn || | 681 | if (!bo->placements[i].lpfn || |
@@ -979,7 +885,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, | |||
979 | return; | 885 | return; |
980 | 886 | ||
981 | abo = container_of(bo, struct amdgpu_bo, tbo); | 887 | abo = container_of(bo, struct amdgpu_bo, tbo); |
982 | amdgpu_vm_bo_invalidate(adev, abo); | 888 | amdgpu_vm_bo_invalidate(adev, abo, evict); |
983 | 889 | ||
984 | amdgpu_bo_kunmap(abo); | 890 | amdgpu_bo_kunmap(abo); |
985 | 891 | ||
@@ -992,8 +898,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, | |||
992 | return; | 898 | return; |
993 | 899 | ||
994 | /* move_notify is called before move happens */ | 900 | /* move_notify is called before move happens */ |
995 | amdgpu_update_memory_usage(adev, &bo->mem, new_mem); | ||
996 | |||
997 | trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); | 901 | trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); |
998 | } | 902 | } |
999 | 903 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 9b7b4fcb047b..39b6bf6fb051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -33,7 +33,9 @@ | |||
33 | 33 | ||
34 | #define AMDGPU_BO_INVALID_OFFSET LONG_MAX | 34 | #define AMDGPU_BO_INVALID_OFFSET LONG_MAX |
35 | 35 | ||
36 | /* bo virtual addresses in a vm */ | ||
36 | struct amdgpu_bo_va_mapping { | 37 | struct amdgpu_bo_va_mapping { |
38 | struct amdgpu_bo_va *bo_va; | ||
37 | struct list_head list; | 39 | struct list_head list; |
38 | struct rb_node rb; | 40 | struct rb_node rb; |
39 | uint64_t start; | 41 | uint64_t start; |
@@ -43,26 +45,24 @@ struct amdgpu_bo_va_mapping { | |||
43 | uint64_t flags; | 45 | uint64_t flags; |
44 | }; | 46 | }; |
45 | 47 | ||
46 | /* bo virtual addresses in a specific vm */ | 48 | /* User space allocated BO in a VM */ |
47 | struct amdgpu_bo_va { | 49 | struct amdgpu_bo_va { |
50 | struct amdgpu_vm_bo_base base; | ||
51 | |||
48 | /* protected by bo being reserved */ | 52 | /* protected by bo being reserved */ |
49 | struct list_head bo_list; | ||
50 | struct dma_fence *last_pt_update; | ||
51 | unsigned ref_count; | 53 | unsigned ref_count; |
52 | 54 | ||
53 | /* protected by vm mutex and spinlock */ | 55 | /* all other members protected by the VM PD being reserved */ |
54 | struct list_head vm_status; | 56 | struct dma_fence *last_pt_update; |
55 | 57 | ||
56 | /* mappings for this bo_va */ | 58 | /* mappings for this bo_va */ |
57 | struct list_head invalids; | 59 | struct list_head invalids; |
58 | struct list_head valids; | 60 | struct list_head valids; |
59 | 61 | ||
60 | /* constant after initialization */ | 62 | /* If the mappings are cleared or filled */ |
61 | struct amdgpu_vm *vm; | 63 | bool cleared; |
62 | struct amdgpu_bo *bo; | ||
63 | }; | 64 | }; |
64 | 65 | ||
65 | |||
66 | struct amdgpu_bo { | 66 | struct amdgpu_bo { |
67 | /* Protected by tbo.reserved */ | 67 | /* Protected by tbo.reserved */ |
68 | u32 preferred_domains; | 68 | u32 preferred_domains; |
@@ -195,14 +195,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, | |||
195 | struct reservation_object *resv, | 195 | struct reservation_object *resv, |
196 | uint64_t init_value, | 196 | uint64_t init_value, |
197 | struct amdgpu_bo **bo_ptr); | 197 | struct amdgpu_bo **bo_ptr); |
198 | int amdgpu_bo_create_restricted(struct amdgpu_device *adev, | ||
199 | unsigned long size, int byte_align, | ||
200 | bool kernel, u32 domain, u64 flags, | ||
201 | struct sg_table *sg, | ||
202 | struct ttm_placement *placement, | ||
203 | struct reservation_object *resv, | ||
204 | uint64_t init_value, | ||
205 | struct amdgpu_bo **bo_ptr); | ||
206 | int amdgpu_bo_create_reserved(struct amdgpu_device *adev, | 198 | int amdgpu_bo_create_reserved(struct amdgpu_device *adev, |
207 | unsigned long size, int align, | 199 | unsigned long size, int align, |
208 | u32 domain, struct amdgpu_bo **bo_ptr, | 200 | u32 domain, struct amdgpu_bo **bo_ptr, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7df503aedb69..f6ce52956e6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |||
@@ -74,7 +74,7 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) | |||
74 | adev->pm.dpm.ac_power = true; | 74 | adev->pm.dpm.ac_power = true; |
75 | else | 75 | else |
76 | adev->pm.dpm.ac_power = false; | 76 | adev->pm.dpm.ac_power = false; |
77 | if (adev->pm.funcs->enable_bapm) | 77 | if (adev->powerplay.pp_funcs->enable_bapm) |
78 | amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power); | 78 | amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power); |
79 | mutex_unlock(&adev->pm.mutex); | 79 | mutex_unlock(&adev->pm.mutex); |
80 | } | 80 | } |
@@ -88,9 +88,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, | |||
88 | struct amdgpu_device *adev = ddev->dev_private; | 88 | struct amdgpu_device *adev = ddev->dev_private; |
89 | enum amd_pm_state_type pm; | 89 | enum amd_pm_state_type pm; |
90 | 90 | ||
91 | if (adev->pp_enabled) { | 91 | if (adev->powerplay.pp_funcs->get_current_power_state) |
92 | pm = amdgpu_dpm_get_current_power_state(adev); | 92 | pm = amdgpu_dpm_get_current_power_state(adev); |
93 | } else | 93 | else |
94 | pm = adev->pm.dpm.user_state; | 94 | pm = adev->pm.dpm.user_state; |
95 | 95 | ||
96 | return snprintf(buf, PAGE_SIZE, "%s\n", | 96 | return snprintf(buf, PAGE_SIZE, "%s\n", |
@@ -119,7 +119,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, | |||
119 | } | 119 | } |
120 | 120 | ||
121 | if (adev->pp_enabled) { | 121 | if (adev->pp_enabled) { |
122 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); | 122 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); |
123 | } else { | 123 | } else { |
124 | mutex_lock(&adev->pm.mutex); | 124 | mutex_lock(&adev->pm.mutex); |
125 | adev->pm.dpm.user_state = state; | 125 | adev->pm.dpm.user_state = state; |
@@ -140,13 +140,17 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, | |||
140 | { | 140 | { |
141 | struct drm_device *ddev = dev_get_drvdata(dev); | 141 | struct drm_device *ddev = dev_get_drvdata(dev); |
142 | struct amdgpu_device *adev = ddev->dev_private; | 142 | struct amdgpu_device *adev = ddev->dev_private; |
143 | enum amd_dpm_forced_level level; | 143 | enum amd_dpm_forced_level level = 0xff; |
144 | 144 | ||
145 | if ((adev->flags & AMD_IS_PX) && | 145 | if ((adev->flags & AMD_IS_PX) && |
146 | (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) | 146 | (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) |
147 | return snprintf(buf, PAGE_SIZE, "off\n"); | 147 | return snprintf(buf, PAGE_SIZE, "off\n"); |
148 | 148 | ||
149 | level = amdgpu_dpm_get_performance_level(adev); | 149 | if (adev->powerplay.pp_funcs->get_performance_level) |
150 | level = amdgpu_dpm_get_performance_level(adev); | ||
151 | else | ||
152 | level = adev->pm.dpm.forced_level; | ||
153 | |||
150 | return snprintf(buf, PAGE_SIZE, "%s\n", | 154 | return snprintf(buf, PAGE_SIZE, "%s\n", |
151 | (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : | 155 | (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : |
152 | (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : | 156 | (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : |
@@ -167,7 +171,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, | |||
167 | struct drm_device *ddev = dev_get_drvdata(dev); | 171 | struct drm_device *ddev = dev_get_drvdata(dev); |
168 | struct amdgpu_device *adev = ddev->dev_private; | 172 | struct amdgpu_device *adev = ddev->dev_private; |
169 | enum amd_dpm_forced_level level; | 173 | enum amd_dpm_forced_level level; |
170 | enum amd_dpm_forced_level current_level; | 174 | enum amd_dpm_forced_level current_level = 0xff; |
171 | int ret = 0; | 175 | int ret = 0; |
172 | 176 | ||
173 | /* Can't force performance level when the card is off */ | 177 | /* Can't force performance level when the card is off */ |
@@ -175,7 +179,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, | |||
175 | (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) | 179 | (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) |
176 | return -EINVAL; | 180 | return -EINVAL; |
177 | 181 | ||
178 | current_level = amdgpu_dpm_get_performance_level(adev); | 182 | if (adev->powerplay.pp_funcs->get_performance_level) |
183 | current_level = amdgpu_dpm_get_performance_level(adev); | ||
179 | 184 | ||
180 | if (strncmp("low", buf, strlen("low")) == 0) { | 185 | if (strncmp("low", buf, strlen("low")) == 0) { |
181 | level = AMD_DPM_FORCED_LEVEL_LOW; | 186 | level = AMD_DPM_FORCED_LEVEL_LOW; |
@@ -203,9 +208,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, | |||
203 | if (current_level == level) | 208 | if (current_level == level) |
204 | return count; | 209 | return count; |
205 | 210 | ||
206 | if (adev->pp_enabled) | 211 | if (adev->powerplay.pp_funcs->force_performance_level) { |
207 | amdgpu_dpm_force_performance_level(adev, level); | ||
208 | else { | ||
209 | mutex_lock(&adev->pm.mutex); | 212 | mutex_lock(&adev->pm.mutex); |
210 | if (adev->pm.dpm.thermal_active) { | 213 | if (adev->pm.dpm.thermal_active) { |
211 | count = -EINVAL; | 214 | count = -EINVAL; |
@@ -233,7 +236,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, | |||
233 | struct pp_states_info data; | 236 | struct pp_states_info data; |
234 | int i, buf_len; | 237 | int i, buf_len; |
235 | 238 | ||
236 | if (adev->pp_enabled) | 239 | if (adev->powerplay.pp_funcs->get_pp_num_states) |
237 | amdgpu_dpm_get_pp_num_states(adev, &data); | 240 | amdgpu_dpm_get_pp_num_states(adev, &data); |
238 | 241 | ||
239 | buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); | 242 | buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); |
@@ -257,8 +260,8 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, | |||
257 | enum amd_pm_state_type pm = 0; | 260 | enum amd_pm_state_type pm = 0; |
258 | int i = 0; | 261 | int i = 0; |
259 | 262 | ||
260 | if (adev->pp_enabled) { | 263 | if (adev->powerplay.pp_funcs->get_current_power_state |
261 | 264 | && adev->powerplay.pp_funcs->get_pp_num_states) { | |
262 | pm = amdgpu_dpm_get_current_power_state(adev); | 265 | pm = amdgpu_dpm_get_current_power_state(adev); |
263 | amdgpu_dpm_get_pp_num_states(adev, &data); | 266 | amdgpu_dpm_get_pp_num_states(adev, &data); |
264 | 267 | ||
@@ -280,25 +283,10 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev, | |||
280 | { | 283 | { |
281 | struct drm_device *ddev = dev_get_drvdata(dev); | 284 | struct drm_device *ddev = dev_get_drvdata(dev); |
282 | struct amdgpu_device *adev = ddev->dev_private; | 285 | struct amdgpu_device *adev = ddev->dev_private; |
283 | struct pp_states_info data; | ||
284 | enum amd_pm_state_type pm = 0; | ||
285 | int i; | ||
286 | |||
287 | if (adev->pp_force_state_enabled && adev->pp_enabled) { | ||
288 | pm = amdgpu_dpm_get_current_power_state(adev); | ||
289 | amdgpu_dpm_get_pp_num_states(adev, &data); | ||
290 | |||
291 | for (i = 0; i < data.nums; i++) { | ||
292 | if (pm == data.states[i]) | ||
293 | break; | ||
294 | } | ||
295 | 286 | ||
296 | if (i == data.nums) | 287 | if (adev->pp_force_state_enabled) |
297 | i = -EINVAL; | 288 | return amdgpu_get_pp_cur_state(dev, attr, buf); |
298 | 289 | else | |
299 | return snprintf(buf, PAGE_SIZE, "%d\n", i); | ||
300 | |||
301 | } else | ||
302 | return snprintf(buf, PAGE_SIZE, "\n"); | 290 | return snprintf(buf, PAGE_SIZE, "\n"); |
303 | } | 291 | } |
304 | 292 | ||
@@ -330,7 +318,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, | |||
330 | if (state != POWER_STATE_TYPE_INTERNAL_BOOT && | 318 | if (state != POWER_STATE_TYPE_INTERNAL_BOOT && |
331 | state != POWER_STATE_TYPE_DEFAULT) { | 319 | state != POWER_STATE_TYPE_DEFAULT) { |
332 | amdgpu_dpm_dispatch_task(adev, | 320 | amdgpu_dpm_dispatch_task(adev, |
333 | AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); | 321 | AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); |
334 | adev->pp_force_state_enabled = true; | 322 | adev->pp_force_state_enabled = true; |
335 | } | 323 | } |
336 | } | 324 | } |
@@ -347,7 +335,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, | |||
347 | char *table = NULL; | 335 | char *table = NULL; |
348 | int size; | 336 | int size; |
349 | 337 | ||
350 | if (adev->pp_enabled) | 338 | if (adev->powerplay.pp_funcs->get_pp_table) |
351 | size = amdgpu_dpm_get_pp_table(adev, &table); | 339 | size = amdgpu_dpm_get_pp_table(adev, &table); |
352 | else | 340 | else |
353 | return 0; | 341 | return 0; |
@@ -368,7 +356,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, | |||
368 | struct drm_device *ddev = dev_get_drvdata(dev); | 356 | struct drm_device *ddev = dev_get_drvdata(dev); |
369 | struct amdgpu_device *adev = ddev->dev_private; | 357 | struct amdgpu_device *adev = ddev->dev_private; |
370 | 358 | ||
371 | if (adev->pp_enabled) | 359 | if (adev->powerplay.pp_funcs->set_pp_table) |
372 | amdgpu_dpm_set_pp_table(adev, buf, count); | 360 | amdgpu_dpm_set_pp_table(adev, buf, count); |
373 | 361 | ||
374 | return count; | 362 | return count; |
@@ -380,14 +368,11 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, | |||
380 | { | 368 | { |
381 | struct drm_device *ddev = dev_get_drvdata(dev); | 369 | struct drm_device *ddev = dev_get_drvdata(dev); |
382 | struct amdgpu_device *adev = ddev->dev_private; | 370 | struct amdgpu_device *adev = ddev->dev_private; |
383 | ssize_t size = 0; | ||
384 | 371 | ||
385 | if (adev->pp_enabled) | 372 | if (adev->powerplay.pp_funcs->print_clock_levels) |
386 | size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); | 373 | return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); |
387 | else if (adev->pm.funcs->print_clock_levels) | 374 | else |
388 | size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf); | 375 | return snprintf(buf, PAGE_SIZE, "\n"); |
389 | |||
390 | return size; | ||
391 | } | 376 | } |
392 | 377 | ||
393 | static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, | 378 | static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, |
@@ -416,10 +401,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, | |||
416 | mask |= 1 << level; | 401 | mask |= 1 << level; |
417 | } | 402 | } |
418 | 403 | ||
419 | if (adev->pp_enabled) | 404 | if (adev->powerplay.pp_funcs->force_clock_level) |
420 | amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); | 405 | amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); |
421 | else if (adev->pm.funcs->force_clock_level) | 406 | |
422 | adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask); | ||
423 | fail: | 407 | fail: |
424 | return count; | 408 | return count; |
425 | } | 409 | } |
@@ -430,14 +414,11 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, | |||
430 | { | 414 | { |
431 | struct drm_device *ddev = dev_get_drvdata(dev); | 415 | struct drm_device *ddev = dev_get_drvdata(dev); |
432 | struct amdgpu_device *adev = ddev->dev_private; | 416 | struct amdgpu_device *adev = ddev->dev_private; |
433 | ssize_t size = 0; | ||
434 | |||
435 | if (adev->pp_enabled) | ||
436 | size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); | ||
437 | else if (adev->pm.funcs->print_clock_levels) | ||
438 | size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf); | ||
439 | 417 | ||
440 | return size; | 418 | if (adev->powerplay.pp_funcs->print_clock_levels) |
419 | return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); | ||
420 | else | ||
421 | return snprintf(buf, PAGE_SIZE, "\n"); | ||
441 | } | 422 | } |
442 | 423 | ||
443 | static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, | 424 | static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, |
@@ -465,11 +446,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, | |||
465 | } | 446 | } |
466 | mask |= 1 << level; | 447 | mask |= 1 << level; |
467 | } | 448 | } |
468 | 449 | if (adev->powerplay.pp_funcs->force_clock_level) | |
469 | if (adev->pp_enabled) | ||
470 | amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); | 450 | amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); |
471 | else if (adev->pm.funcs->force_clock_level) | 451 | |
472 | adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask); | ||
473 | fail: | 452 | fail: |
474 | return count; | 453 | return count; |
475 | } | 454 | } |
@@ -480,14 +459,11 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, | |||
480 | { | 459 | { |
481 | struct drm_device *ddev = dev_get_drvdata(dev); | 460 | struct drm_device *ddev = dev_get_drvdata(dev); |
482 | struct amdgpu_device *adev = ddev->dev_private; | 461 | struct amdgpu_device *adev = ddev->dev_private; |
483 | ssize_t size = 0; | ||
484 | |||
485 | if (adev->pp_enabled) | ||
486 | size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); | ||
487 | else if (adev->pm.funcs->print_clock_levels) | ||
488 | size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf); | ||
489 | 462 | ||
490 | return size; | 463 | if (adev->powerplay.pp_funcs->print_clock_levels) |
464 | return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); | ||
465 | else | ||
466 | return snprintf(buf, PAGE_SIZE, "\n"); | ||
491 | } | 467 | } |
492 | 468 | ||
493 | static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, | 469 | static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, |
@@ -515,11 +491,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, | |||
515 | } | 491 | } |
516 | mask |= 1 << level; | 492 | mask |= 1 << level; |
517 | } | 493 | } |
518 | 494 | if (adev->powerplay.pp_funcs->force_clock_level) | |
519 | if (adev->pp_enabled) | ||
520 | amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); | 495 | amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); |
521 | else if (adev->pm.funcs->force_clock_level) | 496 | |
522 | adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask); | ||
523 | fail: | 497 | fail: |
524 | return count; | 498 | return count; |
525 | } | 499 | } |
@@ -532,10 +506,8 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, | |||
532 | struct amdgpu_device *adev = ddev->dev_private; | 506 | struct amdgpu_device *adev = ddev->dev_private; |
533 | uint32_t value = 0; | 507 | uint32_t value = 0; |
534 | 508 | ||
535 | if (adev->pp_enabled) | 509 | if (adev->powerplay.pp_funcs->get_sclk_od) |
536 | value = amdgpu_dpm_get_sclk_od(adev); | 510 | value = amdgpu_dpm_get_sclk_od(adev); |
537 | else if (adev->pm.funcs->get_sclk_od) | ||
538 | value = adev->pm.funcs->get_sclk_od(adev); | ||
539 | 511 | ||
540 | return snprintf(buf, PAGE_SIZE, "%d\n", value); | 512 | return snprintf(buf, PAGE_SIZE, "%d\n", value); |
541 | } | 513 | } |
@@ -556,12 +528,12 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, | |||
556 | count = -EINVAL; | 528 | count = -EINVAL; |
557 | goto fail; | 529 | goto fail; |
558 | } | 530 | } |
531 | if (adev->powerplay.pp_funcs->set_sclk_od) | ||
532 | amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); | ||
559 | 533 | ||
560 | if (adev->pp_enabled) { | 534 | if (adev->pp_enabled) { |
561 | amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); | 535 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); |
562 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL); | 536 | } else { |
563 | } else if (adev->pm.funcs->set_sclk_od) { | ||
564 | adev->pm.funcs->set_sclk_od(adev, (uint32_t)value); | ||
565 | adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; | 537 | adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; |
566 | amdgpu_pm_compute_clocks(adev); | 538 | amdgpu_pm_compute_clocks(adev); |
567 | } | 539 | } |
@@ -578,10 +550,8 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, | |||
578 | struct amdgpu_device *adev = ddev->dev_private; | 550 | struct amdgpu_device *adev = ddev->dev_private; |
579 | uint32_t value = 0; | 551 | uint32_t value = 0; |
580 | 552 | ||
581 | if (adev->pp_enabled) | 553 | if (adev->powerplay.pp_funcs->get_mclk_od) |
582 | value = amdgpu_dpm_get_mclk_od(adev); | 554 | value = amdgpu_dpm_get_mclk_od(adev); |
583 | else if (adev->pm.funcs->get_mclk_od) | ||
584 | value = adev->pm.funcs->get_mclk_od(adev); | ||
585 | 555 | ||
586 | return snprintf(buf, PAGE_SIZE, "%d\n", value); | 556 | return snprintf(buf, PAGE_SIZE, "%d\n", value); |
587 | } | 557 | } |
@@ -602,12 +572,12 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, | |||
602 | count = -EINVAL; | 572 | count = -EINVAL; |
603 | goto fail; | 573 | goto fail; |
604 | } | 574 | } |
575 | if (adev->powerplay.pp_funcs->set_mclk_od) | ||
576 | amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); | ||
605 | 577 | ||
606 | if (adev->pp_enabled) { | 578 | if (adev->pp_enabled) { |
607 | amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); | 579 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); |
608 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL); | 580 | } else { |
609 | } else if (adev->pm.funcs->set_mclk_od) { | ||
610 | adev->pm.funcs->set_mclk_od(adev, (uint32_t)value); | ||
611 | adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; | 581 | adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; |
612 | amdgpu_pm_compute_clocks(adev); | 582 | amdgpu_pm_compute_clocks(adev); |
613 | } | 583 | } |
@@ -621,14 +591,11 @@ static ssize_t amdgpu_get_pp_power_profile(struct device *dev, | |||
621 | { | 591 | { |
622 | struct drm_device *ddev = dev_get_drvdata(dev); | 592 | struct drm_device *ddev = dev_get_drvdata(dev); |
623 | struct amdgpu_device *adev = ddev->dev_private; | 593 | struct amdgpu_device *adev = ddev->dev_private; |
624 | int ret = 0; | 594 | int ret = 0xff; |
625 | 595 | ||
626 | if (adev->pp_enabled) | 596 | if (adev->powerplay.pp_funcs->get_power_profile_state) |
627 | ret = amdgpu_dpm_get_power_profile_state( | 597 | ret = amdgpu_dpm_get_power_profile_state( |
628 | adev, query); | 598 | adev, query); |
629 | else if (adev->pm.funcs->get_power_profile_state) | ||
630 | ret = adev->pm.funcs->get_power_profile_state( | ||
631 | adev, query); | ||
632 | 599 | ||
633 | if (ret) | 600 | if (ret) |
634 | return ret; | 601 | return ret; |
@@ -675,15 +642,12 @@ static ssize_t amdgpu_set_pp_power_profile(struct device *dev, | |||
675 | char *sub_str, buf_cpy[128], *tmp_str; | 642 | char *sub_str, buf_cpy[128], *tmp_str; |
676 | const char delimiter[3] = {' ', '\n', '\0'}; | 643 | const char delimiter[3] = {' ', '\n', '\0'}; |
677 | long int value; | 644 | long int value; |
678 | int ret = 0; | 645 | int ret = 0xff; |
679 | 646 | ||
680 | if (strncmp("reset", buf, strlen("reset")) == 0) { | 647 | if (strncmp("reset", buf, strlen("reset")) == 0) { |
681 | if (adev->pp_enabled) | 648 | if (adev->powerplay.pp_funcs->reset_power_profile_state) |
682 | ret = amdgpu_dpm_reset_power_profile_state( | 649 | ret = amdgpu_dpm_reset_power_profile_state( |
683 | adev, request); | 650 | adev, request); |
684 | else if (adev->pm.funcs->reset_power_profile_state) | ||
685 | ret = adev->pm.funcs->reset_power_profile_state( | ||
686 | adev, request); | ||
687 | if (ret) { | 651 | if (ret) { |
688 | count = -EINVAL; | 652 | count = -EINVAL; |
689 | goto fail; | 653 | goto fail; |
@@ -692,12 +656,10 @@ static ssize_t amdgpu_set_pp_power_profile(struct device *dev, | |||
692 | } | 656 | } |
693 | 657 | ||
694 | if (strncmp("set", buf, strlen("set")) == 0) { | 658 | if (strncmp("set", buf, strlen("set")) == 0) { |
695 | if (adev->pp_enabled) | 659 | if (adev->powerplay.pp_funcs->set_power_profile_state) |
696 | ret = amdgpu_dpm_set_power_profile_state( | 660 | ret = amdgpu_dpm_set_power_profile_state( |
697 | adev, request); | 661 | adev, request); |
698 | else if (adev->pm.funcs->set_power_profile_state) | 662 | |
699 | ret = adev->pm.funcs->set_power_profile_state( | ||
700 | adev, request); | ||
701 | if (ret) { | 663 | if (ret) { |
702 | count = -EINVAL; | 664 | count = -EINVAL; |
703 | goto fail; | 665 | goto fail; |
@@ -745,13 +707,8 @@ static ssize_t amdgpu_set_pp_power_profile(struct device *dev, | |||
745 | 707 | ||
746 | loop++; | 708 | loop++; |
747 | } | 709 | } |
748 | 710 | if (adev->powerplay.pp_funcs->set_power_profile_state) | |
749 | if (adev->pp_enabled) | 711 | ret = amdgpu_dpm_set_power_profile_state(adev, request); |
750 | ret = amdgpu_dpm_set_power_profile_state( | ||
751 | adev, request); | ||
752 | else if (adev->pm.funcs->set_power_profile_state) | ||
753 | ret = adev->pm.funcs->set_power_profile_state( | ||
754 | adev, request); | ||
755 | 712 | ||
756 | if (ret) | 713 | if (ret) |
757 | count = -EINVAL; | 714 | count = -EINVAL; |
@@ -831,7 +788,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, | |||
831 | (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) | 788 | (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) |
832 | return -EINVAL; | 789 | return -EINVAL; |
833 | 790 | ||
834 | if (!adev->pp_enabled && !adev->pm.funcs->get_temperature) | 791 | if (!adev->powerplay.pp_funcs->get_temperature) |
835 | temp = 0; | 792 | temp = 0; |
836 | else | 793 | else |
837 | temp = amdgpu_dpm_get_temperature(adev); | 794 | temp = amdgpu_dpm_get_temperature(adev); |
@@ -862,7 +819,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, | |||
862 | struct amdgpu_device *adev = dev_get_drvdata(dev); | 819 | struct amdgpu_device *adev = dev_get_drvdata(dev); |
863 | u32 pwm_mode = 0; | 820 | u32 pwm_mode = 0; |
864 | 821 | ||
865 | if (!adev->pp_enabled && !adev->pm.funcs->get_fan_control_mode) | 822 | if (!adev->powerplay.pp_funcs->get_fan_control_mode) |
866 | return -EINVAL; | 823 | return -EINVAL; |
867 | 824 | ||
868 | pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); | 825 | pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); |
@@ -879,7 +836,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, | |||
879 | int err; | 836 | int err; |
880 | int value; | 837 | int value; |
881 | 838 | ||
882 | if (!adev->pp_enabled && !adev->pm.funcs->set_fan_control_mode) | 839 | if (!adev->powerplay.pp_funcs->set_fan_control_mode) |
883 | return -EINVAL; | 840 | return -EINVAL; |
884 | 841 | ||
885 | err = kstrtoint(buf, 10, &value); | 842 | err = kstrtoint(buf, 10, &value); |
@@ -919,9 +876,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, | |||
919 | 876 | ||
920 | value = (value * 100) / 255; | 877 | value = (value * 100) / 255; |
921 | 878 | ||
922 | err = amdgpu_dpm_set_fan_speed_percent(adev, value); | 879 | if (adev->powerplay.pp_funcs->set_fan_speed_percent) { |
923 | if (err) | 880 | err = amdgpu_dpm_set_fan_speed_percent(adev, value); |
924 | return err; | 881 | if (err) |
882 | return err; | ||
883 | } | ||
925 | 884 | ||
926 | return count; | 885 | return count; |
927 | } | 886 | } |
@@ -932,11 +891,13 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, | |||
932 | { | 891 | { |
933 | struct amdgpu_device *adev = dev_get_drvdata(dev); | 892 | struct amdgpu_device *adev = dev_get_drvdata(dev); |
934 | int err; | 893 | int err; |
935 | u32 speed; | 894 | u32 speed = 0; |
936 | 895 | ||
937 | err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); | 896 | if (adev->powerplay.pp_funcs->get_fan_speed_percent) { |
938 | if (err) | 897 | err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); |
939 | return err; | 898 | if (err) |
899 | return err; | ||
900 | } | ||
940 | 901 | ||
941 | speed = (speed * 255) / 100; | 902 | speed = (speed * 255) / 100; |
942 | 903 | ||
@@ -949,11 +910,13 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, | |||
949 | { | 910 | { |
950 | struct amdgpu_device *adev = dev_get_drvdata(dev); | 911 | struct amdgpu_device *adev = dev_get_drvdata(dev); |
951 | int err; | 912 | int err; |
952 | u32 speed; | 913 | u32 speed = 0; |
953 | 914 | ||
954 | err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); | 915 | if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { |
955 | if (err) | 916 | err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); |
956 | return err; | 917 | if (err) |
918 | return err; | ||
919 | } | ||
957 | 920 | ||
958 | return sprintf(buf, "%i\n", speed); | 921 | return sprintf(buf, "%i\n", speed); |
959 | } | 922 | } |
@@ -1008,21 +971,21 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, | |||
1008 | return 0; | 971 | return 0; |
1009 | 972 | ||
1010 | /* mask fan attributes if we have no bindings for this asic to expose */ | 973 | /* mask fan attributes if we have no bindings for this asic to expose */ |
1011 | if ((!adev->pm.funcs->get_fan_speed_percent && | 974 | if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && |
1012 | attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ | 975 | attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ |
1013 | (!adev->pm.funcs->get_fan_control_mode && | 976 | (!adev->powerplay.pp_funcs->get_fan_control_mode && |
1014 | attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ | 977 | attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ |
1015 | effective_mode &= ~S_IRUGO; | 978 | effective_mode &= ~S_IRUGO; |
1016 | 979 | ||
1017 | if ((!adev->pm.funcs->set_fan_speed_percent && | 980 | if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && |
1018 | attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ | 981 | attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ |
1019 | (!adev->pm.funcs->set_fan_control_mode && | 982 | (!adev->powerplay.pp_funcs->set_fan_control_mode && |
1020 | attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ | 983 | attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ |
1021 | effective_mode &= ~S_IWUSR; | 984 | effective_mode &= ~S_IWUSR; |
1022 | 985 | ||
1023 | /* hide max/min values if we can't both query and manage the fan */ | 986 | /* hide max/min values if we can't both query and manage the fan */ |
1024 | if ((!adev->pm.funcs->set_fan_speed_percent && | 987 | if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && |
1025 | !adev->pm.funcs->get_fan_speed_percent) && | 988 | !adev->powerplay.pp_funcs->get_fan_speed_percent) && |
1026 | (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || | 989 | (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || |
1027 | attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) | 990 | attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) |
1028 | return 0; | 991 | return 0; |
@@ -1055,7 +1018,7 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) | |||
1055 | if (!adev->pm.dpm_enabled) | 1018 | if (!adev->pm.dpm_enabled) |
1056 | return; | 1019 | return; |
1057 | 1020 | ||
1058 | if (adev->pm.funcs->get_temperature) { | 1021 | if (adev->powerplay.pp_funcs->get_temperature) { |
1059 | int temp = amdgpu_dpm_get_temperature(adev); | 1022 | int temp = amdgpu_dpm_get_temperature(adev); |
1060 | 1023 | ||
1061 | if (temp < adev->pm.dpm.thermal.min_temp) | 1024 | if (temp < adev->pm.dpm.thermal.min_temp) |
@@ -1087,7 +1050,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev, | |||
1087 | true : false; | 1050 | true : false; |
1088 | 1051 | ||
1089 | /* check if the vblank period is too short to adjust the mclk */ | 1052 | /* check if the vblank period is too short to adjust the mclk */ |
1090 | if (single_display && adev->pm.funcs->vblank_too_short) { | 1053 | if (single_display && adev->powerplay.pp_funcs->vblank_too_short) { |
1091 | if (amdgpu_dpm_vblank_too_short(adev)) | 1054 | if (amdgpu_dpm_vblank_too_short(adev)) |
1092 | single_display = false; | 1055 | single_display = false; |
1093 | } | 1056 | } |
@@ -1216,7 +1179,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) | |||
1216 | struct amdgpu_ps *ps; | 1179 | struct amdgpu_ps *ps; |
1217 | enum amd_pm_state_type dpm_state; | 1180 | enum amd_pm_state_type dpm_state; |
1218 | int ret; | 1181 | int ret; |
1219 | bool equal; | 1182 | bool equal = false; |
1220 | 1183 | ||
1221 | /* if dpm init failed */ | 1184 | /* if dpm init failed */ |
1222 | if (!adev->pm.dpm_enabled) | 1185 | if (!adev->pm.dpm_enabled) |
@@ -1236,7 +1199,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) | |||
1236 | else | 1199 | else |
1237 | return; | 1200 | return; |
1238 | 1201 | ||
1239 | if (amdgpu_dpm == 1) { | 1202 | if (amdgpu_dpm == 1 && adev->powerplay.pp_funcs->print_power_state) { |
1240 | printk("switching from power state:\n"); | 1203 | printk("switching from power state:\n"); |
1241 | amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps); | 1204 | amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps); |
1242 | printk("switching to power state:\n"); | 1205 | printk("switching to power state:\n"); |
@@ -1245,15 +1208,17 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) | |||
1245 | 1208 | ||
1246 | /* update whether vce is active */ | 1209 | /* update whether vce is active */ |
1247 | ps->vce_active = adev->pm.dpm.vce_active; | 1210 | ps->vce_active = adev->pm.dpm.vce_active; |
1248 | 1211 | if (adev->powerplay.pp_funcs->display_configuration_changed) | |
1249 | amdgpu_dpm_display_configuration_changed(adev); | 1212 | amdgpu_dpm_display_configuration_changed(adev); |
1250 | 1213 | ||
1251 | ret = amdgpu_dpm_pre_set_power_state(adev); | 1214 | ret = amdgpu_dpm_pre_set_power_state(adev); |
1252 | if (ret) | 1215 | if (ret) |
1253 | return; | 1216 | return; |
1254 | 1217 | ||
1255 | if ((0 != amgdpu_dpm_check_state_equal(adev, adev->pm.dpm.current_ps, adev->pm.dpm.requested_ps, &equal))) | 1218 | if (adev->powerplay.pp_funcs->check_state_equal) { |
1256 | equal = false; | 1219 | if (0 != amdgpu_dpm_check_state_equal(adev, adev->pm.dpm.current_ps, adev->pm.dpm.requested_ps, &equal)) |
1220 | equal = false; | ||
1221 | } | ||
1257 | 1222 | ||
1258 | if (equal) | 1223 | if (equal) |
1259 | return; | 1224 | return; |
@@ -1264,7 +1229,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) | |||
1264 | adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; | 1229 | adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; |
1265 | adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; | 1230 | adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; |
1266 | 1231 | ||
1267 | if (adev->pm.funcs->force_performance_level) { | 1232 | if (adev->powerplay.pp_funcs->force_performance_level) { |
1268 | if (adev->pm.dpm.thermal_active) { | 1233 | if (adev->pm.dpm.thermal_active) { |
1269 | enum amd_dpm_forced_level level = adev->pm.dpm.forced_level; | 1234 | enum amd_dpm_forced_level level = adev->pm.dpm.forced_level; |
1270 | /* force low perf level for thermal */ | 1235 | /* force low perf level for thermal */ |
@@ -1280,7 +1245,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) | |||
1280 | 1245 | ||
1281 | void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) | 1246 | void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) |
1282 | { | 1247 | { |
1283 | if (adev->pp_enabled || adev->pm.funcs->powergate_uvd) { | 1248 | if (adev->powerplay.pp_funcs->powergate_uvd) { |
1284 | /* enable/disable UVD */ | 1249 | /* enable/disable UVD */ |
1285 | mutex_lock(&adev->pm.mutex); | 1250 | mutex_lock(&adev->pm.mutex); |
1286 | amdgpu_dpm_powergate_uvd(adev, !enable); | 1251 | amdgpu_dpm_powergate_uvd(adev, !enable); |
@@ -1302,7 +1267,7 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) | |||
1302 | 1267 | ||
1303 | void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) | 1268 | void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) |
1304 | { | 1269 | { |
1305 | if (adev->pp_enabled || adev->pm.funcs->powergate_vce) { | 1270 | if (adev->powerplay.pp_funcs->powergate_vce) { |
1306 | /* enable/disable VCE */ | 1271 | /* enable/disable VCE */ |
1307 | mutex_lock(&adev->pm.mutex); | 1272 | mutex_lock(&adev->pm.mutex); |
1308 | amdgpu_dpm_powergate_vce(adev, !enable); | 1273 | amdgpu_dpm_powergate_vce(adev, !enable); |
@@ -1337,8 +1302,7 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) | |||
1337 | { | 1302 | { |
1338 | int i; | 1303 | int i; |
1339 | 1304 | ||
1340 | if (adev->pp_enabled) | 1305 | if (adev->powerplay.pp_funcs->print_power_state == NULL) |
1341 | /* TO DO */ | ||
1342 | return; | 1306 | return; |
1343 | 1307 | ||
1344 | for (i = 0; i < adev->pm.dpm.num_ps; i++) | 1308 | for (i = 0; i < adev->pm.dpm.num_ps; i++) |
@@ -1353,10 +1317,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) | |||
1353 | if (adev->pm.sysfs_initialized) | 1317 | if (adev->pm.sysfs_initialized) |
1354 | return 0; | 1318 | return 0; |
1355 | 1319 | ||
1356 | if (!adev->pp_enabled) { | 1320 | if (adev->powerplay.pp_funcs->get_temperature == NULL) |
1357 | if (adev->pm.funcs->get_temperature == NULL) | 1321 | return 0; |
1358 | return 0; | ||
1359 | } | ||
1360 | 1322 | ||
1361 | adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, | 1323 | adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, |
1362 | DRIVER_NAME, adev, | 1324 | DRIVER_NAME, adev, |
@@ -1496,7 +1458,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | |||
1496 | } | 1458 | } |
1497 | 1459 | ||
1498 | if (adev->pp_enabled) { | 1460 | if (adev->pp_enabled) { |
1499 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL); | 1461 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL); |
1500 | } else { | 1462 | } else { |
1501 | mutex_lock(&adev->pm.mutex); | 1463 | mutex_lock(&adev->pm.mutex); |
1502 | adev->pm.dpm.new_active_crtcs = 0; | 1464 | adev->pm.dpm.new_active_crtcs = 0; |
@@ -1634,8 +1596,8 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) | |||
1634 | return amdgpu_debugfs_pm_info_pp(m, adev); | 1596 | return amdgpu_debugfs_pm_info_pp(m, adev); |
1635 | } else { | 1597 | } else { |
1636 | mutex_lock(&adev->pm.mutex); | 1598 | mutex_lock(&adev->pm.mutex); |
1637 | if (adev->pm.funcs->debugfs_print_current_performance_level) | 1599 | if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) |
1638 | adev->pm.funcs->debugfs_print_current_performance_level(adev, m); | 1600 | adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); |
1639 | else | 1601 | else |
1640 | seq_printf(m, "Debugfs support not implemented for this asic\n"); | 1602 | seq_printf(m, "Debugfs support not implemented for this asic\n"); |
1641 | mutex_unlock(&adev->pm.mutex); | 1603 | mutex_unlock(&adev->pm.mutex); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index b7e1c026c0c8..2d2f0960b025 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | |||
@@ -87,17 +87,28 @@ static int amdgpu_pp_early_init(void *handle) | |||
87 | case CHIP_OLAND: | 87 | case CHIP_OLAND: |
88 | case CHIP_HAINAN: | 88 | case CHIP_HAINAN: |
89 | amd_pp->ip_funcs = &si_dpm_ip_funcs; | 89 | amd_pp->ip_funcs = &si_dpm_ip_funcs; |
90 | amd_pp->pp_funcs = &si_dpm_funcs; | ||
90 | break; | 91 | break; |
91 | #endif | 92 | #endif |
92 | #ifdef CONFIG_DRM_AMDGPU_CIK | 93 | #ifdef CONFIG_DRM_AMDGPU_CIK |
93 | case CHIP_BONAIRE: | 94 | case CHIP_BONAIRE: |
94 | case CHIP_HAWAII: | 95 | case CHIP_HAWAII: |
95 | amd_pp->ip_funcs = &ci_dpm_ip_funcs; | 96 | if (amdgpu_dpm == -1) { |
97 | amd_pp->ip_funcs = &ci_dpm_ip_funcs; | ||
98 | amd_pp->pp_funcs = &ci_dpm_funcs; | ||
99 | } else { | ||
100 | adev->pp_enabled = true; | ||
101 | if (amdgpu_create_pp_handle(adev)) | ||
102 | return -EINVAL; | ||
103 | amd_pp->ip_funcs = &pp_ip_funcs; | ||
104 | amd_pp->pp_funcs = &pp_dpm_funcs; | ||
105 | } | ||
96 | break; | 106 | break; |
97 | case CHIP_KABINI: | 107 | case CHIP_KABINI: |
98 | case CHIP_MULLINS: | 108 | case CHIP_MULLINS: |
99 | case CHIP_KAVERI: | 109 | case CHIP_KAVERI: |
100 | amd_pp->ip_funcs = &kv_dpm_ip_funcs; | 110 | amd_pp->ip_funcs = &kv_dpm_ip_funcs; |
111 | amd_pp->pp_funcs = &kv_dpm_funcs; | ||
101 | break; | 112 | break; |
102 | #endif | 113 | #endif |
103 | default: | 114 | default: |
@@ -128,7 +139,7 @@ static int amdgpu_pp_late_init(void *handle) | |||
128 | 139 | ||
129 | if (adev->pp_enabled && adev->pm.dpm_enabled) { | 140 | if (adev->pp_enabled && adev->pm.dpm_enabled) { |
130 | amdgpu_pm_sysfs_init(adev); | 141 | amdgpu_pm_sysfs_init(adev); |
131 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL); | 142 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_COMPLETE_INIT, NULL, NULL); |
132 | } | 143 | } |
133 | 144 | ||
134 | return ret; | 145 | return ret; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 5b3f92891f89..90af8e82b16a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | |||
@@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) | |||
57 | ttm_bo_kunmap(&bo->dma_buf_vmap); | 57 | ttm_bo_kunmap(&bo->dma_buf_vmap); |
58 | } | 58 | } |
59 | 59 | ||
60 | int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) | ||
61 | { | ||
62 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); | ||
63 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | ||
64 | unsigned asize = amdgpu_bo_size(bo); | ||
65 | int ret; | ||
66 | |||
67 | if (!vma->vm_file) | ||
68 | return -ENODEV; | ||
69 | |||
70 | if (adev == NULL) | ||
71 | return -ENODEV; | ||
72 | |||
73 | /* Check for valid size. */ | ||
74 | if (asize < vma->vm_end - vma->vm_start) | ||
75 | return -EINVAL; | ||
76 | |||
77 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || | ||
78 | (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { | ||
79 | return -EPERM; | ||
80 | } | ||
81 | vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT; | ||
82 | |||
83 | /* prime mmap does not need to check access, so allow here */ | ||
84 | ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data); | ||
85 | if (ret) | ||
86 | return ret; | ||
87 | |||
88 | ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev); | ||
89 | drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data); | ||
90 | |||
91 | return ret; | ||
92 | } | ||
93 | |||
60 | struct drm_gem_object * | 94 | struct drm_gem_object * |
61 | amdgpu_gem_prime_import_sg_table(struct drm_device *dev, | 95 | amdgpu_gem_prime_import_sg_table(struct drm_device *dev, |
62 | struct dma_buf_attachment *attach, | 96 | struct dma_buf_attachment *attach, |
@@ -136,7 +170,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, | |||
136 | { | 170 | { |
137 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); | 171 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); |
138 | 172 | ||
139 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) | 173 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || |
174 | bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) | ||
140 | return ERR_PTR(-EPERM); | 175 | return ERR_PTR(-EPERM); |
141 | 176 | ||
142 | return drm_gem_prime_export(dev, gobj, flags); | 177 | return drm_gem_prime_export(dev, gobj, flags); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8c2204c7b384..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | |||
@@ -57,21 +57,23 @@ static int psp_sw_init(void *handle) | |||
57 | psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; | 57 | psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; |
58 | psp->ring_init = psp_v3_1_ring_init; | 58 | psp->ring_init = psp_v3_1_ring_init; |
59 | psp->ring_create = psp_v3_1_ring_create; | 59 | psp->ring_create = psp_v3_1_ring_create; |
60 | psp->ring_stop = psp_v3_1_ring_stop; | ||
60 | psp->ring_destroy = psp_v3_1_ring_destroy; | 61 | psp->ring_destroy = psp_v3_1_ring_destroy; |
61 | psp->cmd_submit = psp_v3_1_cmd_submit; | 62 | psp->cmd_submit = psp_v3_1_cmd_submit; |
62 | psp->compare_sram_data = psp_v3_1_compare_sram_data; | 63 | psp->compare_sram_data = psp_v3_1_compare_sram_data; |
63 | psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; | 64 | psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; |
65 | psp->mode1_reset = psp_v3_1_mode1_reset; | ||
64 | break; | 66 | break; |
65 | case CHIP_RAVEN: | 67 | case CHIP_RAVEN: |
66 | #if 0 | ||
67 | psp->init_microcode = psp_v10_0_init_microcode; | 68 | psp->init_microcode = psp_v10_0_init_microcode; |
68 | #endif | ||
69 | psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; | 69 | psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; |
70 | psp->ring_init = psp_v10_0_ring_init; | 70 | psp->ring_init = psp_v10_0_ring_init; |
71 | psp->ring_create = psp_v10_0_ring_create; | 71 | psp->ring_create = psp_v10_0_ring_create; |
72 | psp->ring_stop = psp_v10_0_ring_stop; | ||
72 | psp->ring_destroy = psp_v10_0_ring_destroy; | 73 | psp->ring_destroy = psp_v10_0_ring_destroy; |
73 | psp->cmd_submit = psp_v10_0_cmd_submit; | 74 | psp->cmd_submit = psp_v10_0_cmd_submit; |
74 | psp->compare_sram_data = psp_v10_0_compare_sram_data; | 75 | psp->compare_sram_data = psp_v10_0_compare_sram_data; |
76 | psp->mode1_reset = psp_v10_0_mode1_reset; | ||
75 | break; | 77 | break; |
76 | default: | 78 | default: |
77 | return -EINVAL; | 79 | return -EINVAL; |
@@ -90,6 +92,12 @@ static int psp_sw_init(void *handle) | |||
90 | 92 | ||
91 | static int psp_sw_fini(void *handle) | 93 | static int psp_sw_fini(void *handle) |
92 | { | 94 | { |
95 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
96 | |||
97 | release_firmware(adev->psp.sos_fw); | ||
98 | adev->psp.sos_fw = NULL; | ||
99 | release_firmware(adev->psp.asd_fw); | ||
100 | adev->psp.asd_fw = NULL; | ||
93 | return 0; | 101 | return 0; |
94 | } | 102 | } |
95 | 103 | ||
@@ -253,15 +261,18 @@ static int psp_asd_load(struct psp_context *psp) | |||
253 | 261 | ||
254 | static int psp_hw_start(struct psp_context *psp) | 262 | static int psp_hw_start(struct psp_context *psp) |
255 | { | 263 | { |
264 | struct amdgpu_device *adev = psp->adev; | ||
256 | int ret; | 265 | int ret; |
257 | 266 | ||
258 | ret = psp_bootloader_load_sysdrv(psp); | 267 | if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) { |
259 | if (ret) | 268 | ret = psp_bootloader_load_sysdrv(psp); |
260 | return ret; | 269 | if (ret) |
270 | return ret; | ||
261 | 271 | ||
262 | ret = psp_bootloader_load_sos(psp); | 272 | ret = psp_bootloader_load_sos(psp); |
263 | if (ret) | 273 | if (ret) |
264 | return ret; | 274 | return ret; |
275 | } | ||
265 | 276 | ||
266 | ret = psp_ring_create(psp, PSP_RING_TYPE__KM); | 277 | ret = psp_ring_create(psp, PSP_RING_TYPE__KM); |
267 | if (ret) | 278 | if (ret) |
@@ -453,6 +464,16 @@ static int psp_hw_fini(void *handle) | |||
453 | 464 | ||
454 | static int psp_suspend(void *handle) | 465 | static int psp_suspend(void *handle) |
455 | { | 466 | { |
467 | int ret; | ||
468 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
469 | struct psp_context *psp = &adev->psp; | ||
470 | |||
471 | ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); | ||
472 | if (ret) { | ||
473 | DRM_ERROR("PSP ring stop failed\n"); | ||
474 | return ret; | ||
475 | } | ||
476 | |||
456 | return 0; | 477 | return 0; |
457 | } | 478 | } |
458 | 479 | ||
@@ -487,6 +508,22 @@ failed: | |||
487 | return ret; | 508 | return ret; |
488 | } | 509 | } |
489 | 510 | ||
511 | static bool psp_check_reset(void* handle) | ||
512 | { | ||
513 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
514 | |||
515 | if (adev->flags & AMD_IS_APU) | ||
516 | return true; | ||
517 | |||
518 | return false; | ||
519 | } | ||
520 | |||
521 | static int psp_reset(void* handle) | ||
522 | { | ||
523 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
524 | return psp_mode1_reset(&adev->psp); | ||
525 | } | ||
526 | |||
490 | static bool psp_check_fw_loading_status(struct amdgpu_device *adev, | 527 | static bool psp_check_fw_loading_status(struct amdgpu_device *adev, |
491 | enum AMDGPU_UCODE_ID ucode_type) | 528 | enum AMDGPU_UCODE_ID ucode_type) |
492 | { | 529 | { |
@@ -530,8 +567,9 @@ const struct amd_ip_funcs psp_ip_funcs = { | |||
530 | .suspend = psp_suspend, | 567 | .suspend = psp_suspend, |
531 | .resume = psp_resume, | 568 | .resume = psp_resume, |
532 | .is_idle = NULL, | 569 | .is_idle = NULL, |
570 | .check_soft_reset = psp_check_reset, | ||
533 | .wait_for_idle = NULL, | 571 | .wait_for_idle = NULL, |
534 | .soft_reset = NULL, | 572 | .soft_reset = psp_reset, |
535 | .set_clockgating_state = psp_set_clockgating_state, | 573 | .set_clockgating_state = psp_set_clockgating_state, |
536 | .set_powergating_state = psp_set_powergating_state, | 574 | .set_powergating_state = psp_set_powergating_state, |
537 | }; | 575 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 538fa9dbfb21..ce4654550416 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | |||
@@ -66,6 +66,8 @@ struct psp_context | |||
66 | struct psp_gfx_cmd_resp *cmd); | 66 | struct psp_gfx_cmd_resp *cmd); |
67 | int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); | 67 | int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); |
68 | int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); | 68 | int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); |
69 | int (*ring_stop)(struct psp_context *psp, | ||
70 | enum psp_ring_type ring_type); | ||
69 | int (*ring_destroy)(struct psp_context *psp, | 71 | int (*ring_destroy)(struct psp_context *psp, |
70 | enum psp_ring_type ring_type); | 72 | enum psp_ring_type ring_type); |
71 | int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, | 73 | int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, |
@@ -74,6 +76,7 @@ struct psp_context | |||
74 | struct amdgpu_firmware_info *ucode, | 76 | struct amdgpu_firmware_info *ucode, |
75 | enum AMDGPU_UCODE_ID ucode_type); | 77 | enum AMDGPU_UCODE_ID ucode_type); |
76 | bool (*smu_reload_quirk)(struct psp_context *psp); | 78 | bool (*smu_reload_quirk)(struct psp_context *psp); |
79 | int (*mode1_reset)(struct psp_context *psp); | ||
77 | 80 | ||
78 | /* fence buffer */ | 81 | /* fence buffer */ |
79 | struct amdgpu_bo *fw_pri_bo; | 82 | struct amdgpu_bo *fw_pri_bo; |
@@ -123,6 +126,7 @@ struct amdgpu_psp_funcs { | |||
123 | #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) | 126 | #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) |
124 | #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) | 127 | #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) |
125 | #define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) | 128 | #define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) |
129 | #define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type)) | ||
126 | #define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) | 130 | #define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) |
127 | #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ | 131 | #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ |
128 | (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) | 132 | (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) |
@@ -136,6 +140,8 @@ struct amdgpu_psp_funcs { | |||
136 | ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) | 140 | ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) |
137 | #define psp_smu_reload_quirk(psp) \ | 141 | #define psp_smu_reload_quirk(psp) \ |
138 | ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) | 142 | ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) |
143 | #define psp_mode1_reset(psp) \ | ||
144 | ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false) | ||
139 | 145 | ||
140 | extern const struct amd_ip_funcs psp_ip_funcs; | 146 | extern const struct amd_ip_funcs psp_ip_funcs; |
141 | 147 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6c5646b48d1a..5ce65280b396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
@@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
170 | unsigned irq_type) | 170 | unsigned irq_type) |
171 | { | 171 | { |
172 | int r; | 172 | int r; |
173 | int sched_hw_submission = amdgpu_sched_hw_submission; | ||
174 | |||
175 | /* Set the hw submission limit higher for KIQ because | ||
176 | * it's used for a number of gfx/compute tasks by both | ||
177 | * KFD and KGD which may have outstanding fences and | ||
178 | * it doesn't really use the gpu scheduler anyway; | ||
179 | * KIQ tasks get submitted directly to the ring. | ||
180 | */ | ||
181 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) | ||
182 | sched_hw_submission = max(sched_hw_submission, 256); | ||
173 | 183 | ||
174 | if (ring->adev == NULL) { | 184 | if (ring->adev == NULL) { |
175 | if (adev->num_rings >= AMDGPU_MAX_RINGS) | 185 | if (adev->num_rings >= AMDGPU_MAX_RINGS) |
@@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
178 | ring->adev = adev; | 188 | ring->adev = adev; |
179 | ring->idx = adev->num_rings++; | 189 | ring->idx = adev->num_rings++; |
180 | adev->rings[ring->idx] = ring; | 190 | adev->rings[ring->idx] = ring; |
181 | r = amdgpu_fence_driver_init_ring(ring, | 191 | r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission); |
182 | amdgpu_sched_hw_submission); | ||
183 | if (r) | 192 | if (r) |
184 | return r; | 193 | return r; |
185 | } | 194 | } |
@@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
218 | return r; | 227 | return r; |
219 | } | 228 | } |
220 | 229 | ||
221 | ring->ring_size = roundup_pow_of_two(max_dw * 4 * | 230 | ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); |
222 | amdgpu_sched_hw_submission); | ||
223 | 231 | ||
224 | ring->buf_mask = (ring->ring_size / 4) - 1; | 232 | ring->buf_mask = (ring->ring_size / 4) - 1; |
225 | ring->ptr_mask = ring->funcs->support_64bit_ptrs ? | 233 | ring->ptr_mask = ring->funcs->support_64bit_ptrs ? |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a6899180b265..c586f44312f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | |||
244 | struct dma_fence *f = e->fence; | 244 | struct dma_fence *f = e->fence; |
245 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); | 245 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); |
246 | 246 | ||
247 | if (dma_fence_is_signaled(f)) { | ||
248 | hash_del(&e->node); | ||
249 | dma_fence_put(f); | ||
250 | kmem_cache_free(amdgpu_sync_slab, e); | ||
251 | continue; | ||
252 | } | ||
247 | if (ring && s_fence) { | 253 | if (ring && s_fence) { |
248 | /* For fences from the same ring it is sufficient | 254 | /* For fences from the same ring it is sufficient |
249 | * when they are scheduled. | 255 | * when they are scheduled. |
@@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | |||
256 | } | 262 | } |
257 | } | 263 | } |
258 | 264 | ||
259 | if (dma_fence_is_signaled(f)) { | ||
260 | hash_del(&e->node); | ||
261 | dma_fence_put(f); | ||
262 | kmem_cache_free(amdgpu_sync_slab, e); | ||
263 | continue; | ||
264 | } | ||
265 | |||
266 | return f; | 265 | return f; |
267 | } | 266 | } |
268 | 267 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 9ab58245e518..213988f336ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | |||
@@ -228,7 +228,7 @@ TRACE_EVENT(amdgpu_vm_bo_map, | |||
228 | ), | 228 | ), |
229 | 229 | ||
230 | TP_fast_assign( | 230 | TP_fast_assign( |
231 | __entry->bo = bo_va ? bo_va->bo : NULL; | 231 | __entry->bo = bo_va ? bo_va->base.bo : NULL; |
232 | __entry->start = mapping->start; | 232 | __entry->start = mapping->start; |
233 | __entry->last = mapping->last; | 233 | __entry->last = mapping->last; |
234 | __entry->offset = mapping->offset; | 234 | __entry->offset = mapping->offset; |
@@ -252,7 +252,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, | |||
252 | ), | 252 | ), |
253 | 253 | ||
254 | TP_fast_assign( | 254 | TP_fast_assign( |
255 | __entry->bo = bo_va->bo; | 255 | __entry->bo = bo_va->base.bo; |
256 | __entry->start = mapping->start; | 256 | __entry->start = mapping->start; |
257 | __entry->last = mapping->last; | 257 | __entry->last = mapping->last; |
258 | __entry->offset = mapping->offset; | 258 | __entry->offset = mapping->offset; |
@@ -417,5 +417,5 @@ TRACE_EVENT(amdgpu_ttm_bo_move, | |||
417 | 417 | ||
418 | /* This part must be outside protection */ | 418 | /* This part must be outside protection */ |
419 | #undef TRACE_INCLUDE_PATH | 419 | #undef TRACE_INCLUDE_PATH |
420 | #define TRACE_INCLUDE_PATH . | 420 | #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/amd/amdgpu |
421 | #include <trace/define_trace.h> | 421 | #include <trace/define_trace.h> |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index 385b7e1d72f9..9ec96b9e85d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c | |||
@@ -1,4 +1,23 @@ | |||
1 | /* Copyright Red Hat Inc 2010. | 1 | /* Copyright Red Hat Inc 2010. |
2 | * | ||
3 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
4 | * copy of this software and associated documentation files (the "Software"), | ||
5 | * to deal in the Software without restriction, including without limitation | ||
6 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
7 | * and/or sell copies of the Software, and to permit persons to whom the | ||
8 | * Software is furnished to do so, subject to the following conditions: | ||
9 | * | ||
10 | * The above copyright notice and this permission notice shall be included in | ||
11 | * all copies or substantial portions of the Software. | ||
12 | * | ||
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
16 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
17 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
18 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
19 | * OTHER DEALINGS IN THE SOFTWARE. | ||
20 | * | ||
2 | * Author : Dave Airlie <airlied@redhat.com> | 21 | * Author : Dave Airlie <airlied@redhat.com> |
3 | */ | 22 | */ |
4 | #include <drm/drmP.h> | 23 | #include <drm/drmP.h> |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c803b082324d..15a28578d458 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -42,7 +42,9 @@ | |||
42 | #include <linux/swap.h> | 42 | #include <linux/swap.h> |
43 | #include <linux/pagemap.h> | 43 | #include <linux/pagemap.h> |
44 | #include <linux/debugfs.h> | 44 | #include <linux/debugfs.h> |
45 | #include <linux/iommu.h> | ||
45 | #include "amdgpu.h" | 46 | #include "amdgpu.h" |
47 | #include "amdgpu_trace.h" | ||
46 | #include "bif/bif_4_1_d.h" | 48 | #include "bif/bif_4_1_d.h" |
47 | 49 | ||
48 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) | 50 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) |
@@ -607,6 +609,7 @@ struct amdgpu_ttm_tt { | |||
607 | spinlock_t guptasklock; | 609 | spinlock_t guptasklock; |
608 | struct list_head guptasks; | 610 | struct list_head guptasks; |
609 | atomic_t mmu_invalidations; | 611 | atomic_t mmu_invalidations; |
612 | uint32_t last_set_pages; | ||
610 | struct list_head list; | 613 | struct list_head list; |
611 | }; | 614 | }; |
612 | 615 | ||
@@ -620,6 +623,8 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
620 | if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) | 623 | if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) |
621 | flags |= FOLL_WRITE; | 624 | flags |= FOLL_WRITE; |
622 | 625 | ||
626 | down_read(¤t->mm->mmap_sem); | ||
627 | |||
623 | if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { | 628 | if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { |
624 | /* check that we only use anonymous memory | 629 | /* check that we only use anonymous memory |
625 | to prevent problems with writeback */ | 630 | to prevent problems with writeback */ |
@@ -627,8 +632,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
627 | struct vm_area_struct *vma; | 632 | struct vm_area_struct *vma; |
628 | 633 | ||
629 | vma = find_vma(gtt->usermm, gtt->userptr); | 634 | vma = find_vma(gtt->usermm, gtt->userptr); |
630 | if (!vma || vma->vm_file || vma->vm_end < end) | 635 | if (!vma || vma->vm_file || vma->vm_end < end) { |
636 | up_read(¤t->mm->mmap_sem); | ||
631 | return -EPERM; | 637 | return -EPERM; |
638 | } | ||
632 | } | 639 | } |
633 | 640 | ||
634 | do { | 641 | do { |
@@ -655,13 +662,47 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
655 | 662 | ||
656 | } while (pinned < ttm->num_pages); | 663 | } while (pinned < ttm->num_pages); |
657 | 664 | ||
665 | up_read(¤t->mm->mmap_sem); | ||
658 | return 0; | 666 | return 0; |
659 | 667 | ||
660 | release_pages: | 668 | release_pages: |
661 | release_pages(pages, pinned, 0); | 669 | release_pages(pages, pinned, 0); |
670 | up_read(¤t->mm->mmap_sem); | ||
662 | return r; | 671 | return r; |
663 | } | 672 | } |
664 | 673 | ||
674 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) | ||
675 | { | ||
676 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
677 | unsigned i; | ||
678 | |||
679 | gtt->last_set_pages = atomic_read(>t->mmu_invalidations); | ||
680 | for (i = 0; i < ttm->num_pages; ++i) { | ||
681 | if (ttm->pages[i]) | ||
682 | put_page(ttm->pages[i]); | ||
683 | |||
684 | ttm->pages[i] = pages ? pages[i] : NULL; | ||
685 | } | ||
686 | } | ||
687 | |||
688 | void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) | ||
689 | { | ||
690 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
691 | unsigned i; | ||
692 | |||
693 | for (i = 0; i < ttm->num_pages; ++i) { | ||
694 | struct page *page = ttm->pages[i]; | ||
695 | |||
696 | if (!page) | ||
697 | continue; | ||
698 | |||
699 | if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) | ||
700 | set_page_dirty(page); | ||
701 | |||
702 | mark_page_accessed(page); | ||
703 | } | ||
704 | } | ||
705 | |||
665 | /* prepare the sg table with the user pages */ | 706 | /* prepare the sg table with the user pages */ |
666 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) | 707 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) |
667 | { | 708 | { |
@@ -699,7 +740,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) | |||
699 | { | 740 | { |
700 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); | 741 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
701 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 742 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
702 | struct sg_page_iter sg_iter; | ||
703 | 743 | ||
704 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); | 744 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
705 | enum dma_data_direction direction = write ? | 745 | enum dma_data_direction direction = write ? |
@@ -712,47 +752,16 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) | |||
712 | /* free the sg table and pages again */ | 752 | /* free the sg table and pages again */ |
713 | dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); | 753 | dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); |
714 | 754 | ||
715 | for_each_sg_page(ttm->sg->sgl, &sg_iter, ttm->sg->nents, 0) { | 755 | amdgpu_ttm_tt_mark_user_pages(ttm); |
716 | struct page *page = sg_page_iter_page(&sg_iter); | ||
717 | if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) | ||
718 | set_page_dirty(page); | ||
719 | |||
720 | mark_page_accessed(page); | ||
721 | put_page(page); | ||
722 | } | ||
723 | 756 | ||
724 | sg_free_table(ttm->sg); | 757 | sg_free_table(ttm->sg); |
725 | } | 758 | } |
726 | 759 | ||
727 | static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem) | ||
728 | { | ||
729 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
730 | uint64_t flags; | ||
731 | int r; | ||
732 | |||
733 | spin_lock(>t->adev->gtt_list_lock); | ||
734 | flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem); | ||
735 | gtt->offset = (u64)mem->start << PAGE_SHIFT; | ||
736 | r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, | ||
737 | ttm->pages, gtt->ttm.dma_address, flags); | ||
738 | |||
739 | if (r) { | ||
740 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", | ||
741 | ttm->num_pages, gtt->offset); | ||
742 | goto error_gart_bind; | ||
743 | } | ||
744 | |||
745 | list_add_tail(>t->list, >t->adev->gtt_list); | ||
746 | error_gart_bind: | ||
747 | spin_unlock(>t->adev->gtt_list_lock); | ||
748 | return r; | ||
749 | |||
750 | } | ||
751 | |||
752 | static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, | 760 | static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, |
753 | struct ttm_mem_reg *bo_mem) | 761 | struct ttm_mem_reg *bo_mem) |
754 | { | 762 | { |
755 | struct amdgpu_ttm_tt *gtt = (void*)ttm; | 763 | struct amdgpu_ttm_tt *gtt = (void*)ttm; |
764 | uint64_t flags; | ||
756 | int r = 0; | 765 | int r = 0; |
757 | 766 | ||
758 | if (gtt->userptr) { | 767 | if (gtt->userptr) { |
@@ -772,9 +781,24 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, | |||
772 | bo_mem->mem_type == AMDGPU_PL_OA) | 781 | bo_mem->mem_type == AMDGPU_PL_OA) |
773 | return -EINVAL; | 782 | return -EINVAL; |
774 | 783 | ||
775 | if (amdgpu_gtt_mgr_is_allocated(bo_mem)) | 784 | if (!amdgpu_gtt_mgr_is_allocated(bo_mem)) |
776 | r = amdgpu_ttm_do_bind(ttm, bo_mem); | 785 | return 0; |
786 | |||
787 | spin_lock(>t->adev->gtt_list_lock); | ||
788 | flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); | ||
789 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; | ||
790 | r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, | ||
791 | ttm->pages, gtt->ttm.dma_address, flags); | ||
777 | 792 | ||
793 | if (r) { | ||
794 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", | ||
795 | ttm->num_pages, gtt->offset); | ||
796 | goto error_gart_bind; | ||
797 | } | ||
798 | |||
799 | list_add_tail(>t->list, >t->adev->gtt_list); | ||
800 | error_gart_bind: | ||
801 | spin_unlock(>t->adev->gtt_list_lock); | ||
778 | return r; | 802 | return r; |
779 | } | 803 | } |
780 | 804 | ||
@@ -787,20 +811,38 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) | |||
787 | 811 | ||
788 | int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) | 812 | int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) |
789 | { | 813 | { |
814 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); | ||
790 | struct ttm_tt *ttm = bo->ttm; | 815 | struct ttm_tt *ttm = bo->ttm; |
816 | struct ttm_mem_reg tmp; | ||
817 | struct ttm_placement placement; | ||
818 | struct ttm_place placements; | ||
791 | int r; | 819 | int r; |
792 | 820 | ||
793 | if (!ttm || amdgpu_ttm_is_bound(ttm)) | 821 | if (!ttm || amdgpu_ttm_is_bound(ttm)) |
794 | return 0; | 822 | return 0; |
795 | 823 | ||
796 | r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, | 824 | tmp = bo->mem; |
797 | NULL, bo_mem); | 825 | tmp.mm_node = NULL; |
798 | if (r) { | 826 | placement.num_placement = 1; |
799 | DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); | 827 | placement.placement = &placements; |
828 | placement.num_busy_placement = 1; | ||
829 | placement.busy_placement = &placements; | ||
830 | placements.fpfn = 0; | ||
831 | placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; | ||
832 | placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; | ||
833 | |||
834 | r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); | ||
835 | if (unlikely(r)) | ||
800 | return r; | 836 | return r; |
801 | } | ||
802 | 837 | ||
803 | return amdgpu_ttm_do_bind(ttm, bo_mem); | 838 | r = ttm_bo_move_ttm(bo, true, false, &tmp); |
839 | if (unlikely(r)) | ||
840 | ttm_bo_mem_put(bo, &tmp); | ||
841 | else | ||
842 | bo->offset = (bo->mem.start << PAGE_SHIFT) + | ||
843 | bo->bdev->man[bo->mem.mem_type].gpu_offset; | ||
844 | |||
845 | return r; | ||
804 | } | 846 | } |
805 | 847 | ||
806 | int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) | 848 | int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) |
@@ -892,10 +934,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, | |||
892 | 934 | ||
893 | static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) | 935 | static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) |
894 | { | 936 | { |
895 | struct amdgpu_device *adev; | 937 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
896 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 938 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
897 | unsigned i; | ||
898 | int r; | ||
899 | bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); | 939 | bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); |
900 | 940 | ||
901 | if (ttm->state != tt_unpopulated) | 941 | if (ttm->state != tt_unpopulated) |
@@ -918,44 +958,23 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) | |||
918 | return 0; | 958 | return 0; |
919 | } | 959 | } |
920 | 960 | ||
921 | adev = amdgpu_ttm_adev(ttm->bdev); | ||
922 | |||
923 | #ifdef CONFIG_SWIOTLB | 961 | #ifdef CONFIG_SWIOTLB |
924 | if (swiotlb_nr_tbl()) { | 962 | if (swiotlb_nr_tbl()) { |
925 | return ttm_dma_populate(>t->ttm, adev->dev); | 963 | return ttm_dma_populate(>t->ttm, adev->dev); |
926 | } | 964 | } |
927 | #endif | 965 | #endif |
928 | 966 | ||
929 | r = ttm_pool_populate(ttm); | 967 | return ttm_populate_and_map_pages(adev->dev, >t->ttm); |
930 | if (r) { | ||
931 | return r; | ||
932 | } | ||
933 | |||
934 | for (i = 0; i < ttm->num_pages; i++) { | ||
935 | gtt->ttm.dma_address[i] = pci_map_page(adev->pdev, ttm->pages[i], | ||
936 | 0, PAGE_SIZE, | ||
937 | PCI_DMA_BIDIRECTIONAL); | ||
938 | if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) { | ||
939 | while (i--) { | ||
940 | pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i], | ||
941 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | ||
942 | gtt->ttm.dma_address[i] = 0; | ||
943 | } | ||
944 | ttm_pool_unpopulate(ttm); | ||
945 | return -EFAULT; | ||
946 | } | ||
947 | } | ||
948 | return 0; | ||
949 | } | 968 | } |
950 | 969 | ||
951 | static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) | 970 | static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) |
952 | { | 971 | { |
953 | struct amdgpu_device *adev; | 972 | struct amdgpu_device *adev; |
954 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 973 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
955 | unsigned i; | ||
956 | bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); | 974 | bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); |
957 | 975 | ||
958 | if (gtt && gtt->userptr) { | 976 | if (gtt && gtt->userptr) { |
977 | amdgpu_ttm_tt_set_user_pages(ttm, NULL); | ||
959 | kfree(ttm->sg); | 978 | kfree(ttm->sg); |
960 | ttm->page_flags &= ~TTM_PAGE_FLAG_SG; | 979 | ttm->page_flags &= ~TTM_PAGE_FLAG_SG; |
961 | return; | 980 | return; |
@@ -973,14 +992,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) | |||
973 | } | 992 | } |
974 | #endif | 993 | #endif |
975 | 994 | ||
976 | for (i = 0; i < ttm->num_pages; i++) { | 995 | ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); |
977 | if (gtt->ttm.dma_address[i]) { | ||
978 | pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i], | ||
979 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | ||
980 | } | ||
981 | } | ||
982 | |||
983 | ttm_pool_unpopulate(ttm); | ||
984 | } | 996 | } |
985 | 997 | ||
986 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | 998 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, |
@@ -997,6 +1009,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | |||
997 | spin_lock_init(>t->guptasklock); | 1009 | spin_lock_init(>t->guptasklock); |
998 | INIT_LIST_HEAD(>t->guptasks); | 1010 | INIT_LIST_HEAD(>t->guptasks); |
999 | atomic_set(>t->mmu_invalidations, 0); | 1011 | atomic_set(>t->mmu_invalidations, 0); |
1012 | gtt->last_set_pages = 0; | ||
1000 | 1013 | ||
1001 | return 0; | 1014 | return 0; |
1002 | } | 1015 | } |
@@ -1049,6 +1062,16 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, | |||
1049 | return prev_invalidated != *last_invalidated; | 1062 | return prev_invalidated != *last_invalidated; |
1050 | } | 1063 | } |
1051 | 1064 | ||
1065 | bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) | ||
1066 | { | ||
1067 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
1068 | |||
1069 | if (gtt == NULL || !gtt->userptr) | ||
1070 | return false; | ||
1071 | |||
1072 | return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; | ||
1073 | } | ||
1074 | |||
1052 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) | 1075 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) |
1053 | { | 1076 | { |
1054 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1077 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
@@ -1148,14 +1171,14 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, | |||
1148 | } | 1171 | } |
1149 | 1172 | ||
1150 | spin_lock_irqsave(&adev->mmio_idx_lock, flags); | 1173 | spin_lock_irqsave(&adev->mmio_idx_lock, flags); |
1151 | WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); | 1174 | WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); |
1152 | WREG32(mmMM_INDEX_HI, aligned_pos >> 31); | 1175 | WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); |
1153 | if (!write || mask != 0xffffffff) | 1176 | if (!write || mask != 0xffffffff) |
1154 | value = RREG32(mmMM_DATA); | 1177 | value = RREG32_NO_KIQ(mmMM_DATA); |
1155 | if (write) { | 1178 | if (write) { |
1156 | value &= ~mask; | 1179 | value &= ~mask; |
1157 | value |= (*(uint32_t *)buf << shift) & mask; | 1180 | value |= (*(uint32_t *)buf << shift) & mask; |
1158 | WREG32(mmMM_DATA, value); | 1181 | WREG32_NO_KIQ(mmMM_DATA, value); |
1159 | } | 1182 | } |
1160 | spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); | 1183 | spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); |
1161 | if (!write) { | 1184 | if (!write) { |
@@ -1503,8 +1526,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, | |||
1503 | struct dma_fence **fence) | 1526 | struct dma_fence **fence) |
1504 | { | 1527 | { |
1505 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 1528 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
1506 | /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ | 1529 | uint32_t max_bytes = 8 * |
1507 | uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; | 1530 | adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde; |
1508 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | 1531 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
1509 | 1532 | ||
1510 | struct drm_mm_node *mm_node; | 1533 | struct drm_mm_node *mm_node; |
@@ -1536,8 +1559,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, | |||
1536 | ++mm_node; | 1559 | ++mm_node; |
1537 | } | 1560 | } |
1538 | 1561 | ||
1539 | /* 10 double words for each SDMA_OP_PTEPDE cmd */ | 1562 | /* num of dwords for each SDMA_OP_PTEPDE cmd */ |
1540 | num_dw = num_loops * 10; | 1563 | num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; |
1541 | 1564 | ||
1542 | /* for IB padding */ | 1565 | /* for IB padding */ |
1543 | num_dw += 64; | 1566 | num_dw += 64; |
@@ -1597,32 +1620,16 @@ error_free: | |||
1597 | 1620 | ||
1598 | #if defined(CONFIG_DEBUG_FS) | 1621 | #if defined(CONFIG_DEBUG_FS) |
1599 | 1622 | ||
1600 | extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager | ||
1601 | *man); | ||
1602 | static int amdgpu_mm_dump_table(struct seq_file *m, void *data) | 1623 | static int amdgpu_mm_dump_table(struct seq_file *m, void *data) |
1603 | { | 1624 | { |
1604 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 1625 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
1605 | unsigned ttm_pl = *(int *)node->info_ent->data; | 1626 | unsigned ttm_pl = *(int *)node->info_ent->data; |
1606 | struct drm_device *dev = node->minor->dev; | 1627 | struct drm_device *dev = node->minor->dev; |
1607 | struct amdgpu_device *adev = dev->dev_private; | 1628 | struct amdgpu_device *adev = dev->dev_private; |
1608 | struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; | 1629 | struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; |
1609 | struct ttm_bo_global *glob = adev->mman.bdev.glob; | ||
1610 | struct drm_printer p = drm_seq_file_printer(m); | 1630 | struct drm_printer p = drm_seq_file_printer(m); |
1611 | 1631 | ||
1612 | spin_lock(&glob->lru_lock); | 1632 | man->func->debug(man, &p); |
1613 | drm_mm_print(mm, &p); | ||
1614 | spin_unlock(&glob->lru_lock); | ||
1615 | switch (ttm_pl) { | ||
1616 | case TTM_PL_VRAM: | ||
1617 | seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", | ||
1618 | adev->mman.bdev.man[ttm_pl].size, | ||
1619 | (u64)atomic64_read(&adev->vram_usage) >> 20, | ||
1620 | (u64)atomic64_read(&adev->vram_vis_usage) >> 20); | ||
1621 | break; | ||
1622 | case TTM_PL_TT: | ||
1623 | amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); | ||
1624 | break; | ||
1625 | } | ||
1626 | return 0; | 1633 | return 0; |
1627 | } | 1634 | } |
1628 | 1635 | ||
@@ -1659,9 +1666,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, | |||
1659 | return result; | 1666 | return result; |
1660 | 1667 | ||
1661 | spin_lock_irqsave(&adev->mmio_idx_lock, flags); | 1668 | spin_lock_irqsave(&adev->mmio_idx_lock, flags); |
1662 | WREG32(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); | 1669 | WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); |
1663 | WREG32(mmMM_INDEX_HI, *pos >> 31); | 1670 | WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); |
1664 | value = RREG32(mmMM_DATA); | 1671 | value = RREG32_NO_KIQ(mmMM_DATA); |
1665 | spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); | 1672 | spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); |
1666 | 1673 | ||
1667 | r = put_user(value, (uint32_t *)buf); | 1674 | r = put_user(value, (uint32_t *)buf); |
@@ -1677,10 +1684,50 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, | |||
1677 | return result; | 1684 | return result; |
1678 | } | 1685 | } |
1679 | 1686 | ||
1687 | static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, | ||
1688 | size_t size, loff_t *pos) | ||
1689 | { | ||
1690 | struct amdgpu_device *adev = file_inode(f)->i_private; | ||
1691 | ssize_t result = 0; | ||
1692 | int r; | ||
1693 | |||
1694 | if (size & 0x3 || *pos & 0x3) | ||
1695 | return -EINVAL; | ||
1696 | |||
1697 | if (*pos >= adev->mc.mc_vram_size) | ||
1698 | return -ENXIO; | ||
1699 | |||
1700 | while (size) { | ||
1701 | unsigned long flags; | ||
1702 | uint32_t value; | ||
1703 | |||
1704 | if (*pos >= adev->mc.mc_vram_size) | ||
1705 | return result; | ||
1706 | |||
1707 | r = get_user(value, (uint32_t *)buf); | ||
1708 | if (r) | ||
1709 | return r; | ||
1710 | |||
1711 | spin_lock_irqsave(&adev->mmio_idx_lock, flags); | ||
1712 | WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); | ||
1713 | WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); | ||
1714 | WREG32_NO_KIQ(mmMM_DATA, value); | ||
1715 | spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); | ||
1716 | |||
1717 | result += 4; | ||
1718 | buf += 4; | ||
1719 | *pos += 4; | ||
1720 | size -= 4; | ||
1721 | } | ||
1722 | |||
1723 | return result; | ||
1724 | } | ||
1725 | |||
1680 | static const struct file_operations amdgpu_ttm_vram_fops = { | 1726 | static const struct file_operations amdgpu_ttm_vram_fops = { |
1681 | .owner = THIS_MODULE, | 1727 | .owner = THIS_MODULE, |
1682 | .read = amdgpu_ttm_vram_read, | 1728 | .read = amdgpu_ttm_vram_read, |
1683 | .llseek = default_llseek | 1729 | .write = amdgpu_ttm_vram_write, |
1730 | .llseek = default_llseek, | ||
1684 | }; | 1731 | }; |
1685 | 1732 | ||
1686 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS | 1733 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS |
@@ -1732,6 +1779,53 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { | |||
1732 | 1779 | ||
1733 | #endif | 1780 | #endif |
1734 | 1781 | ||
1782 | static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf, | ||
1783 | size_t size, loff_t *pos) | ||
1784 | { | ||
1785 | struct amdgpu_device *adev = file_inode(f)->i_private; | ||
1786 | int r; | ||
1787 | uint64_t phys; | ||
1788 | struct iommu_domain *dom; | ||
1789 | |||
1790 | // always return 8 bytes | ||
1791 | if (size != 8) | ||
1792 | return -EINVAL; | ||
1793 | |||
1794 | // only accept page addresses | ||
1795 | if (*pos & 0xFFF) | ||
1796 | return -EINVAL; | ||
1797 | |||
1798 | dom = iommu_get_domain_for_dev(adev->dev); | ||
1799 | if (dom) | ||
1800 | phys = iommu_iova_to_phys(dom, *pos); | ||
1801 | else | ||
1802 | phys = *pos; | ||
1803 | |||
1804 | r = copy_to_user(buf, &phys, 8); | ||
1805 | if (r) | ||
1806 | return -EFAULT; | ||
1807 | |||
1808 | return 8; | ||
1809 | } | ||
1810 | |||
1811 | static const struct file_operations amdgpu_ttm_iova_fops = { | ||
1812 | .owner = THIS_MODULE, | ||
1813 | .read = amdgpu_iova_to_phys_read, | ||
1814 | .llseek = default_llseek | ||
1815 | }; | ||
1816 | |||
1817 | static const struct { | ||
1818 | char *name; | ||
1819 | const struct file_operations *fops; | ||
1820 | int domain; | ||
1821 | } ttm_debugfs_entries[] = { | ||
1822 | { "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM }, | ||
1823 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS | ||
1824 | { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT }, | ||
1825 | #endif | ||
1826 | { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM }, | ||
1827 | }; | ||
1828 | |||
1735 | #endif | 1829 | #endif |
1736 | 1830 | ||
1737 | static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) | 1831 | static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) |
@@ -1742,22 +1836,21 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) | |||
1742 | struct drm_minor *minor = adev->ddev->primary; | 1836 | struct drm_minor *minor = adev->ddev->primary; |
1743 | struct dentry *ent, *root = minor->debugfs_root; | 1837 | struct dentry *ent, *root = minor->debugfs_root; |
1744 | 1838 | ||
1745 | ent = debugfs_create_file("amdgpu_vram", S_IFREG | S_IRUGO, root, | 1839 | for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) { |
1746 | adev, &amdgpu_ttm_vram_fops); | 1840 | ent = debugfs_create_file( |
1747 | if (IS_ERR(ent)) | 1841 | ttm_debugfs_entries[count].name, |
1748 | return PTR_ERR(ent); | 1842 | S_IFREG | S_IRUGO, root, |
1749 | i_size_write(ent->d_inode, adev->mc.mc_vram_size); | 1843 | adev, |
1750 | adev->mman.vram = ent; | 1844 | ttm_debugfs_entries[count].fops); |
1751 | 1845 | if (IS_ERR(ent)) | |
1752 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS | 1846 | return PTR_ERR(ent); |
1753 | ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root, | 1847 | if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM) |
1754 | adev, &amdgpu_ttm_gtt_fops); | 1848 | i_size_write(ent->d_inode, adev->mc.mc_vram_size); |
1755 | if (IS_ERR(ent)) | 1849 | else if (ttm_debugfs_entries[count].domain == TTM_PL_TT) |
1756 | return PTR_ERR(ent); | 1850 | i_size_write(ent->d_inode, adev->mc.gart_size); |
1757 | i_size_write(ent->d_inode, adev->mc.gart_size); | 1851 | adev->mman.debugfs_entries[count] = ent; |
1758 | adev->mman.gtt = ent; | 1852 | } |
1759 | 1853 | ||
1760 | #endif | ||
1761 | count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); | 1854 | count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); |
1762 | 1855 | ||
1763 | #ifdef CONFIG_SWIOTLB | 1856 | #ifdef CONFIG_SWIOTLB |
@@ -1767,7 +1860,6 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) | |||
1767 | 1860 | ||
1768 | return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); | 1861 | return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); |
1769 | #else | 1862 | #else |
1770 | |||
1771 | return 0; | 1863 | return 0; |
1772 | #endif | 1864 | #endif |
1773 | } | 1865 | } |
@@ -1775,14 +1867,9 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) | |||
1775 | static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) | 1867 | static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) |
1776 | { | 1868 | { |
1777 | #if defined(CONFIG_DEBUG_FS) | 1869 | #if defined(CONFIG_DEBUG_FS) |
1870 | unsigned i; | ||
1778 | 1871 | ||
1779 | debugfs_remove(adev->mman.vram); | 1872 | for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++) |
1780 | adev->mman.vram = NULL; | 1873 | debugfs_remove(adev->mman.debugfs_entries[i]); |
1781 | |||
1782 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS | ||
1783 | debugfs_remove(adev->mman.gtt); | ||
1784 | adev->mman.gtt = NULL; | ||
1785 | #endif | ||
1786 | |||
1787 | #endif | 1874 | #endif |
1788 | } | 1875 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 0e2399f32de7..7abae6867339 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #ifndef __AMDGPU_TTM_H__ | 24 | #ifndef __AMDGPU_TTM_H__ |
25 | #define __AMDGPU_TTM_H__ | 25 | #define __AMDGPU_TTM_H__ |
26 | 26 | ||
27 | #include "amdgpu.h" | ||
27 | #include "gpu_scheduler.h" | 28 | #include "gpu_scheduler.h" |
28 | 29 | ||
29 | #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) | 30 | #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) |
@@ -45,8 +46,7 @@ struct amdgpu_mman { | |||
45 | bool initialized; | 46 | bool initialized; |
46 | 47 | ||
47 | #if defined(CONFIG_DEBUG_FS) | 48 | #if defined(CONFIG_DEBUG_FS) |
48 | struct dentry *vram; | 49 | struct dentry *debugfs_entries[8]; |
49 | struct dentry *gtt; | ||
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | /* buffer handling */ | 52 | /* buffer handling */ |
@@ -62,10 +62,10 @@ extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; | |||
62 | extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; | 62 | extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; |
63 | 63 | ||
64 | bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); | 64 | bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); |
65 | int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, | 65 | uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); |
66 | struct ttm_buffer_object *tbo, | 66 | |
67 | const struct ttm_place *place, | 67 | uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); |
68 | struct ttm_mem_reg *mem); | 68 | uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); |
69 | 69 | ||
70 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | 70 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, |
71 | uint64_t dst_offset, uint32_t byte_count, | 71 | uint64_t dst_offset, uint32_t byte_count, |
@@ -82,4 +82,20 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); | |||
82 | int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); | 82 | int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); |
83 | int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); | 83 | int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); |
84 | 84 | ||
85 | int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); | ||
86 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); | ||
87 | void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); | ||
88 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | ||
89 | uint32_t flags); | ||
90 | bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); | ||
91 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); | ||
92 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | ||
93 | unsigned long end); | ||
94 | bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, | ||
95 | int *last_invalidated); | ||
96 | bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); | ||
97 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); | ||
98 | uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, | ||
99 | struct ttm_mem_reg *mem); | ||
100 | |||
85 | #endif | 101 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 36c763310df5..65649026b836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | |||
@@ -270,12 +270,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) | |||
270 | else | 270 | else |
271 | return AMDGPU_FW_LOAD_SMU; | 271 | return AMDGPU_FW_LOAD_SMU; |
272 | case CHIP_VEGA10: | 272 | case CHIP_VEGA10: |
273 | if (!load_type) | ||
274 | return AMDGPU_FW_LOAD_DIRECT; | ||
275 | else | ||
276 | return AMDGPU_FW_LOAD_PSP; | ||
277 | case CHIP_RAVEN: | 273 | case CHIP_RAVEN: |
278 | if (load_type != 2) | 274 | if (!load_type) |
279 | return AMDGPU_FW_LOAD_DIRECT; | 275 | return AMDGPU_FW_LOAD_DIRECT; |
280 | else | 276 | else |
281 | return AMDGPU_FW_LOAD_PSP; | 277 | return AMDGPU_FW_LOAD_PSP; |
@@ -364,8 +360,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, | |||
364 | int amdgpu_ucode_init_bo(struct amdgpu_device *adev) | 360 | int amdgpu_ucode_init_bo(struct amdgpu_device *adev) |
365 | { | 361 | { |
366 | struct amdgpu_bo **bo = &adev->firmware.fw_buf; | 362 | struct amdgpu_bo **bo = &adev->firmware.fw_buf; |
367 | uint64_t fw_mc_addr; | ||
368 | void *fw_buf_ptr = NULL; | ||
369 | uint64_t fw_offset = 0; | 363 | uint64_t fw_offset = 0; |
370 | int i, err; | 364 | int i, err; |
371 | struct amdgpu_firmware_info *ucode = NULL; | 365 | struct amdgpu_firmware_info *ucode = NULL; |
@@ -376,37 +370,39 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) | |||
376 | return 0; | 370 | return 0; |
377 | } | 371 | } |
378 | 372 | ||
379 | err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, | 373 | if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) { |
380 | amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, | 374 | err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, |
381 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | 375 | amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, |
382 | NULL, NULL, 0, bo); | 376 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, |
383 | if (err) { | 377 | NULL, NULL, 0, bo); |
384 | dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); | 378 | if (err) { |
385 | goto failed; | 379 | dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); |
386 | } | 380 | goto failed; |
381 | } | ||
387 | 382 | ||
388 | err = amdgpu_bo_reserve(*bo, false); | 383 | err = amdgpu_bo_reserve(*bo, false); |
389 | if (err) { | 384 | if (err) { |
390 | dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err); | 385 | dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err); |
391 | goto failed_reserve; | 386 | goto failed_reserve; |
392 | } | 387 | } |
393 | 388 | ||
394 | err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, | 389 | err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, |
395 | &fw_mc_addr); | 390 | &adev->firmware.fw_buf_mc); |
396 | if (err) { | 391 | if (err) { |
397 | dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err); | 392 | dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err); |
398 | goto failed_pin; | 393 | goto failed_pin; |
399 | } | 394 | } |
400 | 395 | ||
401 | err = amdgpu_bo_kmap(*bo, &fw_buf_ptr); | 396 | err = amdgpu_bo_kmap(*bo, &adev->firmware.fw_buf_ptr); |
402 | if (err) { | 397 | if (err) { |
403 | dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err); | 398 | dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err); |
404 | goto failed_kmap; | 399 | goto failed_kmap; |
405 | } | 400 | } |
406 | 401 | ||
407 | amdgpu_bo_unreserve(*bo); | 402 | amdgpu_bo_unreserve(*bo); |
403 | } | ||
408 | 404 | ||
409 | memset(fw_buf_ptr, 0, adev->firmware.fw_size); | 405 | memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size); |
410 | 406 | ||
411 | /* | 407 | /* |
412 | * if SMU loaded firmware, it needn't add SMC, UVD, and VCE | 408 | * if SMU loaded firmware, it needn't add SMC, UVD, and VCE |
@@ -425,14 +421,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) | |||
425 | ucode = &adev->firmware.ucode[i]; | 421 | ucode = &adev->firmware.ucode[i]; |
426 | if (ucode->fw) { | 422 | if (ucode->fw) { |
427 | header = (const struct common_firmware_header *)ucode->fw->data; | 423 | header = (const struct common_firmware_header *)ucode->fw->data; |
428 | amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset, | 424 | amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset, |
429 | (void *)((uint8_t *)fw_buf_ptr + fw_offset)); | 425 | adev->firmware.fw_buf_ptr + fw_offset); |
430 | if (i == AMDGPU_UCODE_ID_CP_MEC1 && | 426 | if (i == AMDGPU_UCODE_ID_CP_MEC1 && |
431 | adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { | 427 | adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { |
432 | const struct gfx_firmware_header_v1_0 *cp_hdr; | 428 | const struct gfx_firmware_header_v1_0 *cp_hdr; |
433 | cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; | 429 | cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; |
434 | amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset, | 430 | amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset, |
435 | fw_buf_ptr + fw_offset); | 431 | adev->firmware.fw_buf_ptr + fw_offset); |
436 | fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); | 432 | fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); |
437 | } | 433 | } |
438 | fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE); | 434 | fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index aefecf6c1e7b..e8bd50cf9785 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | |||
@@ -269,6 +269,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | |||
269 | 269 | ||
270 | int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) | 270 | int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) |
271 | { | 271 | { |
272 | int i; | ||
272 | kfree(adev->uvd.saved_bo); | 273 | kfree(adev->uvd.saved_bo); |
273 | 274 | ||
274 | amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); | 275 | amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); |
@@ -279,6 +280,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) | |||
279 | 280 | ||
280 | amdgpu_ring_fini(&adev->uvd.ring); | 281 | amdgpu_ring_fini(&adev->uvd.ring); |
281 | 282 | ||
283 | for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) | ||
284 | amdgpu_ring_fini(&adev->uvd.ring_enc[i]); | ||
285 | |||
282 | release_firmware(adev->uvd.fw); | 286 | release_firmware(adev->uvd.fw); |
283 | 287 | ||
284 | return 0; | 288 | return 0; |
@@ -410,10 +414,10 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) | |||
410 | uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); | 414 | uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); |
411 | int r = 0; | 415 | int r = 0; |
412 | 416 | ||
413 | mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); | 417 | r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); |
414 | if (mapping == NULL) { | 418 | if (r) { |
415 | DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); | 419 | DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); |
416 | return -EINVAL; | 420 | return r; |
417 | } | 421 | } |
418 | 422 | ||
419 | if (!ctx->parser->adev->uvd.address_64_bit) { | 423 | if (!ctx->parser->adev->uvd.address_64_bit) { |
@@ -588,6 +592,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, | |||
588 | } | 592 | } |
589 | break; | 593 | break; |
590 | 594 | ||
595 | case 8: /* MJPEG */ | ||
596 | min_dpb_size = 0; | ||
597 | break; | ||
598 | |||
591 | case 16: /* H265 */ | 599 | case 16: /* H265 */ |
592 | image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; | 600 | image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; |
593 | image_size = ALIGN(image_size, 256); | 601 | image_size = ALIGN(image_size, 256); |
@@ -733,10 +741,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) | |||
733 | uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); | 741 | uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); |
734 | int r; | 742 | int r; |
735 | 743 | ||
736 | mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); | 744 | r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); |
737 | if (mapping == NULL) { | 745 | if (r) { |
738 | DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); | 746 | DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); |
739 | return -EINVAL; | 747 | return r; |
740 | } | 748 | } |
741 | 749 | ||
742 | start = amdgpu_bo_gpu_offset(bo); | 750 | start = amdgpu_bo_gpu_offset(bo); |
@@ -913,10 +921,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) | |||
913 | return -EINVAL; | 921 | return -EINVAL; |
914 | } | 922 | } |
915 | 923 | ||
916 | r = amdgpu_cs_sysvm_access_required(parser); | ||
917 | if (r) | ||
918 | return r; | ||
919 | |||
920 | ctx.parser = parser; | 924 | ctx.parser = parser; |
921 | ctx.buf_sizes = buf_sizes; | 925 | ctx.buf_sizes = buf_sizes; |
922 | ctx.ib_idx = ib_idx; | 926 | ctx.ib_idx = ib_idx; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c855366521ab..b46280c1279f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | |||
@@ -559,6 +559,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, | |||
559 | struct amdgpu_bo_va_mapping *mapping; | 559 | struct amdgpu_bo_va_mapping *mapping; |
560 | struct amdgpu_bo *bo; | 560 | struct amdgpu_bo *bo; |
561 | uint64_t addr; | 561 | uint64_t addr; |
562 | int r; | ||
562 | 563 | ||
563 | if (index == 0xffffffff) | 564 | if (index == 0xffffffff) |
564 | index = 0; | 565 | index = 0; |
@@ -567,11 +568,11 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, | |||
567 | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; | 568 | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; |
568 | addr += ((uint64_t)size) * ((uint64_t)index); | 569 | addr += ((uint64_t)size) * ((uint64_t)index); |
569 | 570 | ||
570 | mapping = amdgpu_cs_find_mapping(p, addr, &bo); | 571 | r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); |
571 | if (mapping == NULL) { | 572 | if (r) { |
572 | DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", | 573 | DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", |
573 | addr, lo, hi, size, index); | 574 | addr, lo, hi, size, index); |
574 | return -EINVAL; | 575 | return r; |
575 | } | 576 | } |
576 | 577 | ||
577 | if ((addr + (uint64_t)size) > | 578 | if ((addr + (uint64_t)size) > |
@@ -652,10 +653,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) | |||
652 | p->job->vm = NULL; | 653 | p->job->vm = NULL; |
653 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); | 654 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); |
654 | 655 | ||
655 | r = amdgpu_cs_sysvm_access_required(p); | ||
656 | if (r) | ||
657 | return r; | ||
658 | |||
659 | while (idx < ib->length_dw) { | 656 | while (idx < ib->length_dw) { |
660 | uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); | 657 | uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); |
661 | uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); | 658 | uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8a081e162d13..ab05121b9272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | |||
@@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) | |||
46 | * address within META_DATA init package to support SRIOV gfx preemption. | 46 | * address within META_DATA init package to support SRIOV gfx preemption. |
47 | */ | 47 | */ |
48 | 48 | ||
49 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 49 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
50 | struct amdgpu_bo_va **bo_va) | ||
50 | { | 51 | { |
51 | int r; | ||
52 | struct amdgpu_bo_va *bo_va; | ||
53 | struct ww_acquire_ctx ticket; | 52 | struct ww_acquire_ctx ticket; |
54 | struct list_head list; | 53 | struct list_head list; |
55 | struct amdgpu_bo_list_entry pd; | 54 | struct amdgpu_bo_list_entry pd; |
56 | struct ttm_validate_buffer csa_tv; | 55 | struct ttm_validate_buffer csa_tv; |
56 | int r; | ||
57 | 57 | ||
58 | INIT_LIST_HEAD(&list); | 58 | INIT_LIST_HEAD(&list); |
59 | INIT_LIST_HEAD(&csa_tv.head); | 59 | INIT_LIST_HEAD(&csa_tv.head); |
@@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
69 | return r; | 69 | return r; |
70 | } | 70 | } |
71 | 71 | ||
72 | bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); | 72 | *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); |
73 | if (!bo_va) { | 73 | if (!*bo_va) { |
74 | ttm_eu_backoff_reservation(&ticket, &list); | 74 | ttm_eu_backoff_reservation(&ticket, &list); |
75 | DRM_ERROR("failed to create bo_va for static CSA\n"); | 75 | DRM_ERROR("failed to create bo_va for static CSA\n"); |
76 | return -ENOMEM; | 76 | return -ENOMEM; |
77 | } | 77 | } |
78 | 78 | ||
79 | r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, | 79 | r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, |
80 | AMDGPU_CSA_SIZE); | 80 | AMDGPU_CSA_SIZE); |
81 | if (r) { | 81 | if (r) { |
82 | DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); | 82 | DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); |
83 | amdgpu_vm_bo_rmv(adev, bo_va); | 83 | amdgpu_vm_bo_rmv(adev, *bo_va); |
84 | ttm_eu_backoff_reservation(&ticket, &list); | 84 | ttm_eu_backoff_reservation(&ticket, &list); |
85 | return r; | 85 | return r; |
86 | } | 86 | } |
87 | 87 | ||
88 | r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, | 88 | r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, |
89 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | | 89 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | |
90 | AMDGPU_PTE_EXECUTABLE); | 90 | AMDGPU_PTE_EXECUTABLE); |
91 | 91 | ||
92 | if (r) { | 92 | if (r) { |
93 | DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); | 93 | DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); |
94 | amdgpu_vm_bo_rmv(adev, bo_va); | 94 | amdgpu_vm_bo_rmv(adev, *bo_va); |
95 | ttm_eu_backoff_reservation(&ticket, &list); | 95 | ttm_eu_backoff_reservation(&ticket, &list); |
96 | return r; | 96 | return r; |
97 | } | 97 | } |
98 | 98 | ||
99 | vm->csa_bo_va = bo_va; | ||
100 | ttm_eu_backoff_reservation(&ticket, &list); | 99 | ttm_eu_backoff_reservation(&ticket, &list); |
101 | return 0; | 100 | return 0; |
102 | } | 101 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5b1baf387c1..afcfb8bcfb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | |||
@@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void) | |||
90 | 90 | ||
91 | struct amdgpu_vm; | 91 | struct amdgpu_vm; |
92 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev); | 92 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev); |
93 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 93 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
94 | struct amdgpu_bo_va **bo_va); | ||
94 | void amdgpu_virt_init_setting(struct amdgpu_device *adev); | 95 | void amdgpu_virt_init_setting(struct amdgpu_device *adev); |
95 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); | 96 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); |
96 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); | 97 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ce36652029e..bbcc67038203 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -27,12 +27,59 @@ | |||
27 | */ | 27 | */ |
28 | #include <linux/dma-fence-array.h> | 28 | #include <linux/dma-fence-array.h> |
29 | #include <linux/interval_tree_generic.h> | 29 | #include <linux/interval_tree_generic.h> |
30 | #include <linux/idr.h> | ||
30 | #include <drm/drmP.h> | 31 | #include <drm/drmP.h> |
31 | #include <drm/amdgpu_drm.h> | 32 | #include <drm/amdgpu_drm.h> |
32 | #include "amdgpu.h" | 33 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 34 | #include "amdgpu_trace.h" |
34 | 35 | ||
35 | /* | 36 | /* |
37 | * PASID manager | ||
38 | * | ||
39 | * PASIDs are global address space identifiers that can be shared | ||
40 | * between the GPU, an IOMMU and the driver. VMs on different devices | ||
41 | * may use the same PASID if they share the same address | ||
42 | * space. Therefore PASIDs are allocated using a global IDA. VMs are | ||
43 | * looked up from the PASID per amdgpu_device. | ||
44 | */ | ||
45 | static DEFINE_IDA(amdgpu_vm_pasid_ida); | ||
46 | |||
47 | /** | ||
48 | * amdgpu_vm_alloc_pasid - Allocate a PASID | ||
49 | * @bits: Maximum width of the PASID in bits, must be at least 1 | ||
50 | * | ||
51 | * Allocates a PASID of the given width while keeping smaller PASIDs | ||
52 | * available if possible. | ||
53 | * | ||
54 | * Returns a positive integer on success. Returns %-EINVAL if bits==0. | ||
55 | * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on | ||
56 | * memory allocation failure. | ||
57 | */ | ||
58 | int amdgpu_vm_alloc_pasid(unsigned int bits) | ||
59 | { | ||
60 | int pasid = -EINVAL; | ||
61 | |||
62 | for (bits = min(bits, 31U); bits > 0; bits--) { | ||
63 | pasid = ida_simple_get(&amdgpu_vm_pasid_ida, | ||
64 | 1U << (bits - 1), 1U << bits, | ||
65 | GFP_KERNEL); | ||
66 | if (pasid != -ENOSPC) | ||
67 | break; | ||
68 | } | ||
69 | |||
70 | return pasid; | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * amdgpu_vm_free_pasid - Free a PASID | ||
75 | * @pasid: PASID to free | ||
76 | */ | ||
77 | void amdgpu_vm_free_pasid(unsigned int pasid) | ||
78 | { | ||
79 | ida_simple_remove(&amdgpu_vm_pasid_ida, pasid); | ||
80 | } | ||
81 | |||
82 | /* | ||
36 | * GPUVM | 83 | * GPUVM |
37 | * GPUVM is similar to the legacy gart on older asics, however | 84 | * GPUVM is similar to the legacy gart on older asics, however |
38 | * rather than there being a single global gart table | 85 | * rather than there being a single global gart table |
@@ -140,7 +187,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
140 | struct list_head *validated, | 187 | struct list_head *validated, |
141 | struct amdgpu_bo_list_entry *entry) | 188 | struct amdgpu_bo_list_entry *entry) |
142 | { | 189 | { |
143 | entry->robj = vm->root.bo; | 190 | entry->robj = vm->root.base.bo; |
144 | entry->priority = 0; | 191 | entry->priority = 0; |
145 | entry->tv.bo = &entry->robj->tbo; | 192 | entry->tv.bo = &entry->robj->tbo; |
146 | entry->tv.shared = true; | 193 | entry->tv.shared = true; |
@@ -149,54 +196,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
149 | } | 196 | } |
150 | 197 | ||
151 | /** | 198 | /** |
152 | * amdgpu_vm_validate_layer - validate a single page table level | ||
153 | * | ||
154 | * @parent: parent page table level | ||
155 | * @validate: callback to do the validation | ||
156 | * @param: parameter for the validation callback | ||
157 | * | ||
158 | * Validate the page table BOs on command submission if neccessary. | ||
159 | */ | ||
160 | static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, | ||
161 | int (*validate)(void *, struct amdgpu_bo *), | ||
162 | void *param, bool use_cpu_for_update) | ||
163 | { | ||
164 | unsigned i; | ||
165 | int r; | ||
166 | |||
167 | if (use_cpu_for_update) { | ||
168 | r = amdgpu_bo_kmap(parent->bo, NULL); | ||
169 | if (r) | ||
170 | return r; | ||
171 | } | ||
172 | |||
173 | if (!parent->entries) | ||
174 | return 0; | ||
175 | |||
176 | for (i = 0; i <= parent->last_entry_used; ++i) { | ||
177 | struct amdgpu_vm_pt *entry = &parent->entries[i]; | ||
178 | |||
179 | if (!entry->bo) | ||
180 | continue; | ||
181 | |||
182 | r = validate(param, entry->bo); | ||
183 | if (r) | ||
184 | return r; | ||
185 | |||
186 | /* | ||
187 | * Recurse into the sub directory. This is harmless because we | ||
188 | * have only a maximum of 5 layers. | ||
189 | */ | ||
190 | r = amdgpu_vm_validate_level(entry, validate, param, | ||
191 | use_cpu_for_update); | ||
192 | if (r) | ||
193 | return r; | ||
194 | } | ||
195 | |||
196 | return r; | ||
197 | } | ||
198 | |||
199 | /** | ||
200 | * amdgpu_vm_validate_pt_bos - validate the page table BOs | 199 | * amdgpu_vm_validate_pt_bos - validate the page table BOs |
201 | * | 200 | * |
202 | * @adev: amdgpu device pointer | 201 | * @adev: amdgpu device pointer |
@@ -210,64 +209,70 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
210 | int (*validate)(void *p, struct amdgpu_bo *bo), | 209 | int (*validate)(void *p, struct amdgpu_bo *bo), |
211 | void *param) | 210 | void *param) |
212 | { | 211 | { |
213 | uint64_t num_evictions; | 212 | struct ttm_bo_global *glob = adev->mman.bdev.glob; |
214 | 213 | int r; | |
215 | /* We only need to validate the page tables | ||
216 | * if they aren't already valid. | ||
217 | */ | ||
218 | num_evictions = atomic64_read(&adev->num_evictions); | ||
219 | if (num_evictions == vm->last_eviction_counter) | ||
220 | return 0; | ||
221 | 214 | ||
222 | return amdgpu_vm_validate_level(&vm->root, validate, param, | 215 | spin_lock(&vm->status_lock); |
223 | vm->use_cpu_for_update); | 216 | while (!list_empty(&vm->evicted)) { |
224 | } | 217 | struct amdgpu_vm_bo_base *bo_base; |
218 | struct amdgpu_bo *bo; | ||
225 | 219 | ||
226 | /** | 220 | bo_base = list_first_entry(&vm->evicted, |
227 | * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail | 221 | struct amdgpu_vm_bo_base, |
228 | * | 222 | vm_status); |
229 | * @adev: amdgpu device instance | 223 | spin_unlock(&vm->status_lock); |
230 | * @vm: vm providing the BOs | ||
231 | * | ||
232 | * Move the PT BOs to the tail of the LRU. | ||
233 | */ | ||
234 | static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) | ||
235 | { | ||
236 | unsigned i; | ||
237 | 224 | ||
238 | if (!parent->entries) | 225 | bo = bo_base->bo; |
239 | return; | 226 | BUG_ON(!bo); |
227 | if (bo->parent) { | ||
228 | r = validate(param, bo); | ||
229 | if (r) | ||
230 | return r; | ||
240 | 231 | ||
241 | for (i = 0; i <= parent->last_entry_used; ++i) { | 232 | spin_lock(&glob->lru_lock); |
242 | struct amdgpu_vm_pt *entry = &parent->entries[i]; | 233 | ttm_bo_move_to_lru_tail(&bo->tbo); |
234 | if (bo->shadow) | ||
235 | ttm_bo_move_to_lru_tail(&bo->shadow->tbo); | ||
236 | spin_unlock(&glob->lru_lock); | ||
237 | } | ||
243 | 238 | ||
244 | if (!entry->bo) | 239 | if (bo->tbo.type == ttm_bo_type_kernel && |
245 | continue; | 240 | vm->use_cpu_for_update) { |
241 | r = amdgpu_bo_kmap(bo, NULL); | ||
242 | if (r) | ||
243 | return r; | ||
244 | } | ||
246 | 245 | ||
247 | ttm_bo_move_to_lru_tail(&entry->bo->tbo); | 246 | spin_lock(&vm->status_lock); |
248 | amdgpu_vm_move_level_in_lru(entry); | 247 | if (bo->tbo.type != ttm_bo_type_kernel) |
248 | list_move(&bo_base->vm_status, &vm->moved); | ||
249 | else | ||
250 | list_move(&bo_base->vm_status, &vm->relocated); | ||
249 | } | 251 | } |
252 | spin_unlock(&vm->status_lock); | ||
253 | |||
254 | return 0; | ||
250 | } | 255 | } |
251 | 256 | ||
252 | /** | 257 | /** |
253 | * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail | 258 | * amdgpu_vm_ready - check VM is ready for updates |
254 | * | 259 | * |
255 | * @adev: amdgpu device instance | 260 | * @vm: VM to check |
256 | * @vm: vm providing the BOs | ||
257 | * | 261 | * |
258 | * Move the PT BOs to the tail of the LRU. | 262 | * Check if all VM PDs/PTs are ready for updates |
259 | */ | 263 | */ |
260 | void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | 264 | bool amdgpu_vm_ready(struct amdgpu_vm *vm) |
261 | struct amdgpu_vm *vm) | ||
262 | { | 265 | { |
263 | struct ttm_bo_global *glob = adev->mman.bdev.glob; | 266 | bool ready; |
264 | 267 | ||
265 | spin_lock(&glob->lru_lock); | 268 | spin_lock(&vm->status_lock); |
266 | amdgpu_vm_move_level_in_lru(&vm->root); | 269 | ready = list_empty(&vm->evicted); |
267 | spin_unlock(&glob->lru_lock); | 270 | spin_unlock(&vm->status_lock); |
271 | |||
272 | return ready; | ||
268 | } | 273 | } |
269 | 274 | ||
270 | /** | 275 | /** |
271 | * amdgpu_vm_alloc_levels - allocate the PD/PT levels | 276 | * amdgpu_vm_alloc_levels - allocate the PD/PT levels |
272 | * | 277 | * |
273 | * @adev: amdgpu_device pointer | 278 | * @adev: amdgpu_device pointer |
@@ -330,11 +335,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
330 | 335 | ||
331 | /* walk over the address space and allocate the page tables */ | 336 | /* walk over the address space and allocate the page tables */ |
332 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { | 337 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { |
333 | struct reservation_object *resv = vm->root.bo->tbo.resv; | 338 | struct reservation_object *resv = vm->root.base.bo->tbo.resv; |
334 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | 339 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
335 | struct amdgpu_bo *pt; | 340 | struct amdgpu_bo *pt; |
336 | 341 | ||
337 | if (!entry->bo) { | 342 | if (!entry->base.bo) { |
338 | r = amdgpu_bo_create(adev, | 343 | r = amdgpu_bo_create(adev, |
339 | amdgpu_vm_bo_size(adev, level), | 344 | amdgpu_vm_bo_size(adev, level), |
340 | AMDGPU_GPU_PAGE_SIZE, true, | 345 | AMDGPU_GPU_PAGE_SIZE, true, |
@@ -355,11 +360,15 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
355 | /* Keep a reference to the root directory to avoid | 360 | /* Keep a reference to the root directory to avoid |
356 | * freeing them up in the wrong order. | 361 | * freeing them up in the wrong order. |
357 | */ | 362 | */ |
358 | pt->parent = amdgpu_bo_ref(vm->root.bo); | 363 | pt->parent = amdgpu_bo_ref(parent->base.bo); |
359 | 364 | ||
360 | entry->bo = pt; | 365 | entry->base.vm = vm; |
366 | entry->base.bo = pt; | ||
367 | list_add_tail(&entry->base.bo_list, &pt->va); | ||
368 | spin_lock(&vm->status_lock); | ||
369 | list_add(&entry->base.vm_status, &vm->relocated); | ||
370 | spin_unlock(&vm->status_lock); | ||
361 | entry->addr = 0; | 371 | entry->addr = 0; |
362 | entry->huge_page = false; | ||
363 | } | 372 | } |
364 | 373 | ||
365 | if (level < adev->vm_manager.num_level) { | 374 | if (level < adev->vm_manager.num_level) { |
@@ -899,8 +908,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | |||
899 | { | 908 | { |
900 | struct amdgpu_bo_va *bo_va; | 909 | struct amdgpu_bo_va *bo_va; |
901 | 910 | ||
902 | list_for_each_entry(bo_va, &bo->va, bo_list) { | 911 | list_for_each_entry(bo_va, &bo->va, base.bo_list) { |
903 | if (bo_va->vm == vm) { | 912 | if (bo_va->base.vm == vm) { |
904 | return bo_va; | 913 | return bo_va; |
905 | } | 914 | } |
906 | } | 915 | } |
@@ -1025,7 +1034,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
1025 | int r; | 1034 | int r; |
1026 | 1035 | ||
1027 | amdgpu_sync_create(&sync); | 1036 | amdgpu_sync_create(&sync); |
1028 | amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); | 1037 | amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); |
1029 | r = amdgpu_sync_wait(&sync, true); | 1038 | r = amdgpu_sync_wait(&sync, true); |
1030 | amdgpu_sync_free(&sync); | 1039 | amdgpu_sync_free(&sync); |
1031 | 1040 | ||
@@ -1044,18 +1053,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
1044 | */ | 1053 | */ |
1045 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, | 1054 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, |
1046 | struct amdgpu_vm *vm, | 1055 | struct amdgpu_vm *vm, |
1047 | struct amdgpu_vm_pt *parent, | 1056 | struct amdgpu_vm_pt *parent) |
1048 | unsigned level) | ||
1049 | { | 1057 | { |
1050 | struct amdgpu_bo *shadow; | 1058 | struct amdgpu_bo *shadow; |
1051 | struct amdgpu_ring *ring = NULL; | 1059 | struct amdgpu_ring *ring = NULL; |
1052 | uint64_t pd_addr, shadow_addr = 0; | 1060 | uint64_t pd_addr, shadow_addr = 0; |
1053 | uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); | ||
1054 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; | 1061 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; |
1055 | unsigned count = 0, pt_idx, ndw = 0; | 1062 | unsigned count = 0, pt_idx, ndw = 0; |
1056 | struct amdgpu_job *job; | 1063 | struct amdgpu_job *job; |
1057 | struct amdgpu_pte_update_params params; | 1064 | struct amdgpu_pte_update_params params; |
1058 | struct dma_fence *fence = NULL; | 1065 | struct dma_fence *fence = NULL; |
1066 | uint32_t incr; | ||
1059 | 1067 | ||
1060 | int r; | 1068 | int r; |
1061 | 1069 | ||
@@ -1064,21 +1072,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1064 | 1072 | ||
1065 | memset(¶ms, 0, sizeof(params)); | 1073 | memset(¶ms, 0, sizeof(params)); |
1066 | params.adev = adev; | 1074 | params.adev = adev; |
1067 | shadow = parent->bo->shadow; | 1075 | shadow = parent->base.bo->shadow; |
1068 | 1076 | ||
1069 | if (vm->use_cpu_for_update) { | 1077 | if (vm->use_cpu_for_update) { |
1070 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); | 1078 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); |
1071 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); | 1079 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); |
1072 | if (unlikely(r)) | 1080 | if (unlikely(r)) |
1073 | return r; | 1081 | return r; |
1074 | 1082 | ||
1075 | params.func = amdgpu_vm_cpu_set_ptes; | 1083 | params.func = amdgpu_vm_cpu_set_ptes; |
1076 | } else { | 1084 | } else { |
1077 | if (shadow) { | ||
1078 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | ||
1079 | if (r) | ||
1080 | return r; | ||
1081 | } | ||
1082 | ring = container_of(vm->entity.sched, struct amdgpu_ring, | 1085 | ring = container_of(vm->entity.sched, struct amdgpu_ring, |
1083 | sched); | 1086 | sched); |
1084 | 1087 | ||
@@ -1088,7 +1091,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1088 | /* assume the worst case */ | 1091 | /* assume the worst case */ |
1089 | ndw += parent->last_entry_used * 6; | 1092 | ndw += parent->last_entry_used * 6; |
1090 | 1093 | ||
1091 | pd_addr = amdgpu_bo_gpu_offset(parent->bo); | 1094 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); |
1092 | 1095 | ||
1093 | if (shadow) { | 1096 | if (shadow) { |
1094 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | 1097 | shadow_addr = amdgpu_bo_gpu_offset(shadow); |
@@ -1108,30 +1111,28 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1108 | 1111 | ||
1109 | /* walk over the address space and update the directory */ | 1112 | /* walk over the address space and update the directory */ |
1110 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1113 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
1111 | struct amdgpu_bo *bo = parent->entries[pt_idx].bo; | 1114 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
1115 | struct amdgpu_bo *bo = entry->base.bo; | ||
1112 | uint64_t pde, pt; | 1116 | uint64_t pde, pt; |
1113 | 1117 | ||
1114 | if (bo == NULL) | 1118 | if (bo == NULL) |
1115 | continue; | 1119 | continue; |
1116 | 1120 | ||
1117 | if (bo->shadow) { | 1121 | spin_lock(&vm->status_lock); |
1118 | struct amdgpu_bo *pt_shadow = bo->shadow; | 1122 | list_del_init(&entry->base.vm_status); |
1119 | 1123 | spin_unlock(&vm->status_lock); | |
1120 | r = amdgpu_ttm_bind(&pt_shadow->tbo, | ||
1121 | &pt_shadow->tbo.mem); | ||
1122 | if (r) | ||
1123 | return r; | ||
1124 | } | ||
1125 | 1124 | ||
1126 | pt = amdgpu_bo_gpu_offset(bo); | 1125 | pt = amdgpu_bo_gpu_offset(bo); |
1127 | pt = amdgpu_gart_get_vm_pde(adev, pt); | 1126 | pt = amdgpu_gart_get_vm_pde(adev, pt); |
1128 | if (parent->entries[pt_idx].addr == pt || | 1127 | /* Don't update huge pages here */ |
1129 | parent->entries[pt_idx].huge_page) | 1128 | if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || |
1129 | parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) | ||
1130 | continue; | 1130 | continue; |
1131 | 1131 | ||
1132 | parent->entries[pt_idx].addr = pt; | 1132 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; |
1133 | 1133 | ||
1134 | pde = pd_addr + pt_idx * 8; | 1134 | pde = pd_addr + pt_idx * 8; |
1135 | incr = amdgpu_bo_size(bo); | ||
1135 | if (((last_pde + 8 * count) != pde) || | 1136 | if (((last_pde + 8 * count) != pde) || |
1136 | ((last_pt + incr * count) != pt) || | 1137 | ((last_pt + incr * count) != pt) || |
1137 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { | 1138 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { |
@@ -1159,7 +1160,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1159 | } | 1160 | } |
1160 | 1161 | ||
1161 | if (count) { | 1162 | if (count) { |
1162 | if (vm->root.bo->shadow) | 1163 | if (vm->root.base.bo->shadow) |
1163 | params.func(¶ms, last_shadow, last_pt, | 1164 | params.func(¶ms, last_shadow, last_pt, |
1164 | count, incr, AMDGPU_PTE_VALID); | 1165 | count, incr, AMDGPU_PTE_VALID); |
1165 | 1166 | ||
@@ -1172,7 +1173,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1172 | amdgpu_job_free(job); | 1173 | amdgpu_job_free(job); |
1173 | } else { | 1174 | } else { |
1174 | amdgpu_ring_pad_ib(ring, params.ib); | 1175 | amdgpu_ring_pad_ib(ring, params.ib); |
1175 | amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, | 1176 | amdgpu_sync_resv(adev, &job->sync, |
1177 | parent->base.bo->tbo.resv, | ||
1176 | AMDGPU_FENCE_OWNER_VM); | 1178 | AMDGPU_FENCE_OWNER_VM); |
1177 | if (shadow) | 1179 | if (shadow) |
1178 | amdgpu_sync_resv(adev, &job->sync, | 1180 | amdgpu_sync_resv(adev, &job->sync, |
@@ -1185,26 +1187,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1185 | if (r) | 1187 | if (r) |
1186 | goto error_free; | 1188 | goto error_free; |
1187 | 1189 | ||
1188 | amdgpu_bo_fence(parent->bo, fence, true); | 1190 | amdgpu_bo_fence(parent->base.bo, fence, true); |
1189 | dma_fence_put(vm->last_dir_update); | 1191 | dma_fence_put(vm->last_update); |
1190 | vm->last_dir_update = dma_fence_get(fence); | 1192 | vm->last_update = fence; |
1191 | dma_fence_put(fence); | ||
1192 | } | 1193 | } |
1193 | } | 1194 | } |
1194 | /* | ||
1195 | * Recurse into the subdirectories. This recursion is harmless because | ||
1196 | * we only have a maximum of 5 layers. | ||
1197 | */ | ||
1198 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | ||
1199 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | ||
1200 | |||
1201 | if (!entry->bo) | ||
1202 | continue; | ||
1203 | |||
1204 | r = amdgpu_vm_update_level(adev, vm, entry, level + 1); | ||
1205 | if (r) | ||
1206 | return r; | ||
1207 | } | ||
1208 | 1195 | ||
1209 | return 0; | 1196 | return 0; |
1210 | 1197 | ||
@@ -1220,7 +1207,8 @@ error_free: | |||
1220 | * | 1207 | * |
1221 | * Mark all PD level as invalid after an error. | 1208 | * Mark all PD level as invalid after an error. |
1222 | */ | 1209 | */ |
1223 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | 1210 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, |
1211 | struct amdgpu_vm_pt *parent) | ||
1224 | { | 1212 | { |
1225 | unsigned pt_idx; | 1213 | unsigned pt_idx; |
1226 | 1214 | ||
@@ -1231,11 +1219,15 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | |||
1231 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1219 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
1232 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | 1220 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
1233 | 1221 | ||
1234 | if (!entry->bo) | 1222 | if (!entry->base.bo) |
1235 | continue; | 1223 | continue; |
1236 | 1224 | ||
1237 | entry->addr = ~0ULL; | 1225 | entry->addr = ~0ULL; |
1238 | amdgpu_vm_invalidate_level(entry); | 1226 | spin_lock(&vm->status_lock); |
1227 | if (list_empty(&entry->base.vm_status)) | ||
1228 | list_add(&entry->base.vm_status, &vm->relocated); | ||
1229 | spin_unlock(&vm->status_lock); | ||
1230 | amdgpu_vm_invalidate_level(vm, entry); | ||
1239 | } | 1231 | } |
1240 | } | 1232 | } |
1241 | 1233 | ||
@@ -1253,9 +1245,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
1253 | { | 1245 | { |
1254 | int r; | 1246 | int r; |
1255 | 1247 | ||
1256 | r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); | 1248 | spin_lock(&vm->status_lock); |
1257 | if (r) | 1249 | while (!list_empty(&vm->relocated)) { |
1258 | amdgpu_vm_invalidate_level(&vm->root); | 1250 | struct amdgpu_vm_bo_base *bo_base; |
1251 | struct amdgpu_bo *bo; | ||
1252 | |||
1253 | bo_base = list_first_entry(&vm->relocated, | ||
1254 | struct amdgpu_vm_bo_base, | ||
1255 | vm_status); | ||
1256 | spin_unlock(&vm->status_lock); | ||
1257 | |||
1258 | bo = bo_base->bo->parent; | ||
1259 | if (bo) { | ||
1260 | struct amdgpu_vm_bo_base *parent; | ||
1261 | struct amdgpu_vm_pt *pt; | ||
1262 | |||
1263 | parent = list_first_entry(&bo->va, | ||
1264 | struct amdgpu_vm_bo_base, | ||
1265 | bo_list); | ||
1266 | pt = container_of(parent, struct amdgpu_vm_pt, base); | ||
1267 | |||
1268 | r = amdgpu_vm_update_level(adev, vm, pt); | ||
1269 | if (r) { | ||
1270 | amdgpu_vm_invalidate_level(vm, &vm->root); | ||
1271 | return r; | ||
1272 | } | ||
1273 | spin_lock(&vm->status_lock); | ||
1274 | } else { | ||
1275 | spin_lock(&vm->status_lock); | ||
1276 | list_del_init(&bo_base->vm_status); | ||
1277 | } | ||
1278 | } | ||
1279 | spin_unlock(&vm->status_lock); | ||
1259 | 1280 | ||
1260 | if (vm->use_cpu_for_update) { | 1281 | if (vm->use_cpu_for_update) { |
1261 | /* Flush HDP */ | 1282 | /* Flush HDP */ |
@@ -1286,7 +1307,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, | |||
1286 | *entry = &p->vm->root; | 1307 | *entry = &p->vm->root; |
1287 | while ((*entry)->entries) { | 1308 | while ((*entry)->entries) { |
1288 | idx = addr >> (p->adev->vm_manager.block_size * level--); | 1309 | idx = addr >> (p->adev->vm_manager.block_size * level--); |
1289 | idx %= amdgpu_bo_size((*entry)->bo) / 8; | 1310 | idx %= amdgpu_bo_size((*entry)->base.bo) / 8; |
1290 | *parent = *entry; | 1311 | *parent = *entry; |
1291 | *entry = &(*entry)->entries[idx]; | 1312 | *entry = &(*entry)->entries[idx]; |
1292 | } | 1313 | } |
@@ -1307,55 +1328,62 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, | |||
1307 | * | 1328 | * |
1308 | * Check if we can update the PD with a huge page. | 1329 | * Check if we can update the PD with a huge page. |
1309 | */ | 1330 | */ |
1310 | static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | 1331 | static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, |
1311 | struct amdgpu_vm_pt *entry, | 1332 | struct amdgpu_vm_pt *entry, |
1312 | struct amdgpu_vm_pt *parent, | 1333 | struct amdgpu_vm_pt *parent, |
1313 | unsigned nptes, uint64_t dst, | 1334 | unsigned nptes, uint64_t dst, |
1314 | uint64_t flags) | 1335 | uint64_t flags) |
1315 | { | 1336 | { |
1316 | bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); | 1337 | bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); |
1317 | uint64_t pd_addr, pde; | 1338 | uint64_t pd_addr, pde; |
1318 | int r; | ||
1319 | 1339 | ||
1320 | /* In the case of a mixed PT the PDE must point to it*/ | 1340 | /* In the case of a mixed PT the PDE must point to it*/ |
1321 | if (p->adev->asic_type < CHIP_VEGA10 || | 1341 | if (p->adev->asic_type < CHIP_VEGA10 || |
1322 | nptes != AMDGPU_VM_PTE_COUNT(p->adev) || | 1342 | nptes != AMDGPU_VM_PTE_COUNT(p->adev) || |
1323 | p->func == amdgpu_vm_do_copy_ptes || | 1343 | p->src || |
1324 | !(flags & AMDGPU_PTE_VALID)) { | 1344 | !(flags & AMDGPU_PTE_VALID)) { |
1325 | 1345 | ||
1326 | dst = amdgpu_bo_gpu_offset(entry->bo); | 1346 | dst = amdgpu_bo_gpu_offset(entry->base.bo); |
1327 | dst = amdgpu_gart_get_vm_pde(p->adev, dst); | 1347 | dst = amdgpu_gart_get_vm_pde(p->adev, dst); |
1328 | flags = AMDGPU_PTE_VALID; | 1348 | flags = AMDGPU_PTE_VALID; |
1329 | } else { | 1349 | } else { |
1350 | /* Set the huge page flag to stop scanning at this PDE */ | ||
1330 | flags |= AMDGPU_PDE_PTE; | 1351 | flags |= AMDGPU_PDE_PTE; |
1331 | } | 1352 | } |
1332 | 1353 | ||
1333 | if (entry->addr == dst && | 1354 | if (entry->addr == (dst | flags)) |
1334 | entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) | 1355 | return; |
1335 | return 0; | ||
1336 | 1356 | ||
1337 | entry->addr = dst; | 1357 | entry->addr = (dst | flags); |
1338 | entry->huge_page = !!(flags & AMDGPU_PDE_PTE); | ||
1339 | 1358 | ||
1340 | if (use_cpu_update) { | 1359 | if (use_cpu_update) { |
1341 | r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr); | 1360 | /* In case a huge page is replaced with a system |
1342 | if (r) | 1361 | * memory mapping, p->pages_addr != NULL and |
1343 | return r; | 1362 | * amdgpu_vm_cpu_set_ptes would try to translate dst |
1363 | * through amdgpu_vm_map_gart. But dst is already a | ||
1364 | * GPU address (of the page table). Disable | ||
1365 | * amdgpu_vm_map_gart temporarily. | ||
1366 | */ | ||
1367 | dma_addr_t *tmp; | ||
1368 | |||
1369 | tmp = p->pages_addr; | ||
1370 | p->pages_addr = NULL; | ||
1344 | 1371 | ||
1372 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); | ||
1345 | pde = pd_addr + (entry - parent->entries) * 8; | 1373 | pde = pd_addr + (entry - parent->entries) * 8; |
1346 | amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); | 1374 | amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); |
1375 | |||
1376 | p->pages_addr = tmp; | ||
1347 | } else { | 1377 | } else { |
1348 | if (parent->bo->shadow) { | 1378 | if (parent->base.bo->shadow) { |
1349 | pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); | 1379 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); |
1350 | pde = pd_addr + (entry - parent->entries) * 8; | 1380 | pde = pd_addr + (entry - parent->entries) * 8; |
1351 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); | 1381 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); |
1352 | } | 1382 | } |
1353 | pd_addr = amdgpu_bo_gpu_offset(parent->bo); | 1383 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); |
1354 | pde = pd_addr + (entry - parent->entries) * 8; | 1384 | pde = pd_addr + (entry - parent->entries) * 8; |
1355 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); | 1385 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); |
1356 | } | 1386 | } |
1357 | |||
1358 | return 0; | ||
1359 | } | 1387 | } |
1360 | 1388 | ||
1361 | /** | 1389 | /** |
@@ -1382,7 +1410,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1382 | struct amdgpu_bo *pt; | 1410 | struct amdgpu_bo *pt; |
1383 | unsigned nptes; | 1411 | unsigned nptes; |
1384 | bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); | 1412 | bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); |
1385 | int r; | ||
1386 | 1413 | ||
1387 | /* walk over the address space and update the page tables */ | 1414 | /* walk over the address space and update the page tables */ |
1388 | for (addr = start; addr < end; addr += nptes, | 1415 | for (addr = start; addr < end; addr += nptes, |
@@ -1398,15 +1425,13 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1398 | else | 1425 | else |
1399 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); | 1426 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); |
1400 | 1427 | ||
1401 | r = amdgpu_vm_handle_huge_pages(params, entry, parent, | 1428 | amdgpu_vm_handle_huge_pages(params, entry, parent, |
1402 | nptes, dst, flags); | 1429 | nptes, dst, flags); |
1403 | if (r) | 1430 | /* We don't need to update PTEs for huge pages */ |
1404 | return r; | 1431 | if (entry->addr & AMDGPU_PDE_PTE) |
1405 | |||
1406 | if (entry->huge_page) | ||
1407 | continue; | 1432 | continue; |
1408 | 1433 | ||
1409 | pt = entry->bo; | 1434 | pt = entry->base.bo; |
1410 | if (use_cpu_update) { | 1435 | if (use_cpu_update) { |
1411 | pe_start = (unsigned long)amdgpu_bo_kptr(pt); | 1436 | pe_start = (unsigned long)amdgpu_bo_kptr(pt); |
1412 | } else { | 1437 | } else { |
@@ -1442,8 +1467,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1442 | uint64_t start, uint64_t end, | 1467 | uint64_t start, uint64_t end, |
1443 | uint64_t dst, uint64_t flags) | 1468 | uint64_t dst, uint64_t flags) |
1444 | { | 1469 | { |
1445 | int r; | ||
1446 | |||
1447 | /** | 1470 | /** |
1448 | * The MC L1 TLB supports variable sized pages, based on a fragment | 1471 | * The MC L1 TLB supports variable sized pages, based on a fragment |
1449 | * field in the PTE. When this field is set to a non-zero value, page | 1472 | * field in the PTE. When this field is set to a non-zero value, page |
@@ -1462,41 +1485,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1462 | * Userspace can support this by aligning virtual base address and | 1485 | * Userspace can support this by aligning virtual base address and |
1463 | * allocation size to the fragment size. | 1486 | * allocation size to the fragment size. |
1464 | */ | 1487 | */ |
1465 | 1488 | unsigned max_frag = params->adev->vm_manager.fragment_size; | |
1466 | /* SI and newer are optimized for 64KB */ | 1489 | int r; |
1467 | unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev); | ||
1468 | uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); | ||
1469 | uint64_t frag_align = 1 << pages_per_frag; | ||
1470 | |||
1471 | uint64_t frag_start = ALIGN(start, frag_align); | ||
1472 | uint64_t frag_end = end & ~(frag_align - 1); | ||
1473 | 1490 | ||
1474 | /* system pages are non continuously */ | 1491 | /* system pages are non continuously */ |
1475 | if (params->src || !(flags & AMDGPU_PTE_VALID) || | 1492 | if (params->src || !(flags & AMDGPU_PTE_VALID)) |
1476 | (frag_start >= frag_end)) | ||
1477 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); | 1493 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); |
1478 | 1494 | ||
1479 | /* handle the 4K area at the beginning */ | 1495 | while (start != end) { |
1480 | if (start != frag_start) { | 1496 | uint64_t frag_flags, frag_end; |
1481 | r = amdgpu_vm_update_ptes(params, start, frag_start, | 1497 | unsigned frag; |
1482 | dst, flags); | 1498 | |
1499 | /* This intentionally wraps around if no bit is set */ | ||
1500 | frag = min((unsigned)ffs(start) - 1, | ||
1501 | (unsigned)fls64(end - start) - 1); | ||
1502 | if (frag >= max_frag) { | ||
1503 | frag_flags = AMDGPU_PTE_FRAG(max_frag); | ||
1504 | frag_end = end & ~((1ULL << max_frag) - 1); | ||
1505 | } else { | ||
1506 | frag_flags = AMDGPU_PTE_FRAG(frag); | ||
1507 | frag_end = start + (1 << frag); | ||
1508 | } | ||
1509 | |||
1510 | r = amdgpu_vm_update_ptes(params, start, frag_end, dst, | ||
1511 | flags | frag_flags); | ||
1483 | if (r) | 1512 | if (r) |
1484 | return r; | 1513 | return r; |
1485 | dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; | ||
1486 | } | ||
1487 | |||
1488 | /* handle the area in the middle */ | ||
1489 | r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, | ||
1490 | flags | frag_flags); | ||
1491 | if (r) | ||
1492 | return r; | ||
1493 | 1514 | ||
1494 | /* handle the 4K area at the end */ | 1515 | dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; |
1495 | if (frag_end != end) { | 1516 | start = frag_end; |
1496 | dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; | ||
1497 | r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); | ||
1498 | } | 1517 | } |
1499 | return r; | 1518 | |
1519 | return 0; | ||
1500 | } | 1520 | } |
1501 | 1521 | ||
1502 | /** | 1522 | /** |
@@ -1504,7 +1524,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1504 | * | 1524 | * |
1505 | * @adev: amdgpu_device pointer | 1525 | * @adev: amdgpu_device pointer |
1506 | * @exclusive: fence we need to sync to | 1526 | * @exclusive: fence we need to sync to |
1507 | * @src: address where to copy page table entries from | ||
1508 | * @pages_addr: DMA addresses to use for mapping | 1527 | * @pages_addr: DMA addresses to use for mapping |
1509 | * @vm: requested vm | 1528 | * @vm: requested vm |
1510 | * @start: start of mapped range | 1529 | * @start: start of mapped range |
@@ -1518,7 +1537,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1518 | */ | 1537 | */ |
1519 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | 1538 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, |
1520 | struct dma_fence *exclusive, | 1539 | struct dma_fence *exclusive, |
1521 | uint64_t src, | ||
1522 | dma_addr_t *pages_addr, | 1540 | dma_addr_t *pages_addr, |
1523 | struct amdgpu_vm *vm, | 1541 | struct amdgpu_vm *vm, |
1524 | uint64_t start, uint64_t last, | 1542 | uint64_t start, uint64_t last, |
@@ -1536,7 +1554,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1536 | memset(¶ms, 0, sizeof(params)); | 1554 | memset(¶ms, 0, sizeof(params)); |
1537 | params.adev = adev; | 1555 | params.adev = adev; |
1538 | params.vm = vm; | 1556 | params.vm = vm; |
1539 | params.src = src; | ||
1540 | 1557 | ||
1541 | /* sync to everything on unmapping */ | 1558 | /* sync to everything on unmapping */ |
1542 | if (!(flags & AMDGPU_PTE_VALID)) | 1559 | if (!(flags & AMDGPU_PTE_VALID)) |
@@ -1565,10 +1582,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1565 | nptes = last - start + 1; | 1582 | nptes = last - start + 1; |
1566 | 1583 | ||
1567 | /* | 1584 | /* |
1568 | * reserve space for one command every (1 << BLOCK_SIZE) | 1585 | * reserve space for two commands every (1 << BLOCK_SIZE) |
1569 | * entries or 2k dwords (whatever is smaller) | 1586 | * entries or 2k dwords (whatever is smaller) |
1587 | * | ||
1588 | * The second command is for the shadow pagetables. | ||
1570 | */ | 1589 | */ |
1571 | ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1; | 1590 | ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; |
1572 | 1591 | ||
1573 | /* padding, etc. */ | 1592 | /* padding, etc. */ |
1574 | ndw = 64; | 1593 | ndw = 64; |
@@ -1576,15 +1595,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1576 | /* one PDE write for each huge page */ | 1595 | /* one PDE write for each huge page */ |
1577 | ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; | 1596 | ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; |
1578 | 1597 | ||
1579 | if (src) { | 1598 | if (pages_addr) { |
1580 | /* only copy commands needed */ | ||
1581 | ndw += ncmds * 7; | ||
1582 | |||
1583 | params.func = amdgpu_vm_do_copy_ptes; | ||
1584 | |||
1585 | } else if (pages_addr) { | ||
1586 | /* copy commands needed */ | 1599 | /* copy commands needed */ |
1587 | ndw += ncmds * 7; | 1600 | ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; |
1588 | 1601 | ||
1589 | /* and also PTEs */ | 1602 | /* and also PTEs */ |
1590 | ndw += nptes * 2; | 1603 | ndw += nptes * 2; |
@@ -1593,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1593 | 1606 | ||
1594 | } else { | 1607 | } else { |
1595 | /* set page commands needed */ | 1608 | /* set page commands needed */ |
1596 | ndw += ncmds * 10; | 1609 | ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; |
1597 | 1610 | ||
1598 | /* two extra commands for begin/end of fragment */ | 1611 | /* extra commands for begin/end fragments */ |
1599 | ndw += 2 * 10; | 1612 | ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw |
1613 | * adev->vm_manager.fragment_size; | ||
1600 | 1614 | ||
1601 | params.func = amdgpu_vm_do_set_ptes; | 1615 | params.func = amdgpu_vm_do_set_ptes; |
1602 | } | 1616 | } |
@@ -1607,7 +1621,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1607 | 1621 | ||
1608 | params.ib = &job->ibs[0]; | 1622 | params.ib = &job->ibs[0]; |
1609 | 1623 | ||
1610 | if (!src && pages_addr) { | 1624 | if (pages_addr) { |
1611 | uint64_t *pte; | 1625 | uint64_t *pte; |
1612 | unsigned i; | 1626 | unsigned i; |
1613 | 1627 | ||
@@ -1628,12 +1642,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1628 | if (r) | 1642 | if (r) |
1629 | goto error_free; | 1643 | goto error_free; |
1630 | 1644 | ||
1631 | r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, | 1645 | r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, |
1632 | owner); | 1646 | owner); |
1633 | if (r) | 1647 | if (r) |
1634 | goto error_free; | 1648 | goto error_free; |
1635 | 1649 | ||
1636 | r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); | 1650 | r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); |
1637 | if (r) | 1651 | if (r) |
1638 | goto error_free; | 1652 | goto error_free; |
1639 | 1653 | ||
@@ -1648,14 +1662,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1648 | if (r) | 1662 | if (r) |
1649 | goto error_free; | 1663 | goto error_free; |
1650 | 1664 | ||
1651 | amdgpu_bo_fence(vm->root.bo, f, true); | 1665 | amdgpu_bo_fence(vm->root.base.bo, f, true); |
1652 | dma_fence_put(*fence); | 1666 | dma_fence_put(*fence); |
1653 | *fence = f; | 1667 | *fence = f; |
1654 | return 0; | 1668 | return 0; |
1655 | 1669 | ||
1656 | error_free: | 1670 | error_free: |
1657 | amdgpu_job_free(job); | 1671 | amdgpu_job_free(job); |
1658 | amdgpu_vm_invalidate_level(&vm->root); | 1672 | amdgpu_vm_invalidate_level(vm, &vm->root); |
1659 | return r; | 1673 | return r; |
1660 | } | 1674 | } |
1661 | 1675 | ||
@@ -1664,7 +1678,6 @@ error_free: | |||
1664 | * | 1678 | * |
1665 | * @adev: amdgpu_device pointer | 1679 | * @adev: amdgpu_device pointer |
1666 | * @exclusive: fence we need to sync to | 1680 | * @exclusive: fence we need to sync to |
1667 | * @gtt_flags: flags as they are used for GTT | ||
1668 | * @pages_addr: DMA addresses to use for mapping | 1681 | * @pages_addr: DMA addresses to use for mapping |
1669 | * @vm: requested vm | 1682 | * @vm: requested vm |
1670 | * @mapping: mapped range and flags to use for the update | 1683 | * @mapping: mapped range and flags to use for the update |
@@ -1678,7 +1691,6 @@ error_free: | |||
1678 | */ | 1691 | */ |
1679 | static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | 1692 | static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, |
1680 | struct dma_fence *exclusive, | 1693 | struct dma_fence *exclusive, |
1681 | uint64_t gtt_flags, | ||
1682 | dma_addr_t *pages_addr, | 1694 | dma_addr_t *pages_addr, |
1683 | struct amdgpu_vm *vm, | 1695 | struct amdgpu_vm *vm, |
1684 | struct amdgpu_bo_va_mapping *mapping, | 1696 | struct amdgpu_bo_va_mapping *mapping, |
@@ -1686,7 +1698,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1686 | struct drm_mm_node *nodes, | 1698 | struct drm_mm_node *nodes, |
1687 | struct dma_fence **fence) | 1699 | struct dma_fence **fence) |
1688 | { | 1700 | { |
1689 | uint64_t pfn, src = 0, start = mapping->start; | 1701 | uint64_t pfn, start = mapping->start; |
1690 | int r; | 1702 | int r; |
1691 | 1703 | ||
1692 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | 1704 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here |
@@ -1733,11 +1745,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1733 | } | 1745 | } |
1734 | 1746 | ||
1735 | if (pages_addr) { | 1747 | if (pages_addr) { |
1736 | if (flags == gtt_flags) | 1748 | max_entries = min(max_entries, 16ull * 1024ull); |
1737 | src = adev->gart.table_addr + | ||
1738 | (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8; | ||
1739 | else | ||
1740 | max_entries = min(max_entries, 16ull * 1024ull); | ||
1741 | addr = 0; | 1749 | addr = 0; |
1742 | } else if (flags & AMDGPU_PTE_VALID) { | 1750 | } else if (flags & AMDGPU_PTE_VALID) { |
1743 | addr += adev->vm_manager.vram_base_offset; | 1751 | addr += adev->vm_manager.vram_base_offset; |
@@ -1745,8 +1753,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1745 | addr += pfn << PAGE_SHIFT; | 1753 | addr += pfn << PAGE_SHIFT; |
1746 | 1754 | ||
1747 | last = min((uint64_t)mapping->last, start + max_entries - 1); | 1755 | last = min((uint64_t)mapping->last, start + max_entries - 1); |
1748 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, | 1756 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, |
1749 | src, pages_addr, vm, | ||
1750 | start, last, flags, addr, | 1757 | start, last, flags, addr, |
1751 | fence); | 1758 | fence); |
1752 | if (r) | 1759 | if (r) |
@@ -1778,75 +1785,75 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1778 | struct amdgpu_bo_va *bo_va, | 1785 | struct amdgpu_bo_va *bo_va, |
1779 | bool clear) | 1786 | bool clear) |
1780 | { | 1787 | { |
1781 | struct amdgpu_vm *vm = bo_va->vm; | 1788 | struct amdgpu_bo *bo = bo_va->base.bo; |
1789 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
1782 | struct amdgpu_bo_va_mapping *mapping; | 1790 | struct amdgpu_bo_va_mapping *mapping; |
1783 | dma_addr_t *pages_addr = NULL; | 1791 | dma_addr_t *pages_addr = NULL; |
1784 | uint64_t gtt_flags, flags; | ||
1785 | struct ttm_mem_reg *mem; | 1792 | struct ttm_mem_reg *mem; |
1786 | struct drm_mm_node *nodes; | 1793 | struct drm_mm_node *nodes; |
1787 | struct dma_fence *exclusive; | 1794 | struct dma_fence *exclusive, **last_update; |
1795 | uint64_t flags; | ||
1788 | int r; | 1796 | int r; |
1789 | 1797 | ||
1790 | if (clear || !bo_va->bo) { | 1798 | if (clear || !bo_va->base.bo) { |
1791 | mem = NULL; | 1799 | mem = NULL; |
1792 | nodes = NULL; | 1800 | nodes = NULL; |
1793 | exclusive = NULL; | 1801 | exclusive = NULL; |
1794 | } else { | 1802 | } else { |
1795 | struct ttm_dma_tt *ttm; | 1803 | struct ttm_dma_tt *ttm; |
1796 | 1804 | ||
1797 | mem = &bo_va->bo->tbo.mem; | 1805 | mem = &bo_va->base.bo->tbo.mem; |
1798 | nodes = mem->mm_node; | 1806 | nodes = mem->mm_node; |
1799 | if (mem->mem_type == TTM_PL_TT) { | 1807 | if (mem->mem_type == TTM_PL_TT) { |
1800 | ttm = container_of(bo_va->bo->tbo.ttm, struct | 1808 | ttm = container_of(bo_va->base.bo->tbo.ttm, |
1801 | ttm_dma_tt, ttm); | 1809 | struct ttm_dma_tt, ttm); |
1802 | pages_addr = ttm->dma_address; | 1810 | pages_addr = ttm->dma_address; |
1803 | } | 1811 | } |
1804 | exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); | 1812 | exclusive = reservation_object_get_excl(bo->tbo.resv); |
1805 | } | 1813 | } |
1806 | 1814 | ||
1807 | if (bo_va->bo) { | 1815 | if (bo) |
1808 | flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); | 1816 | flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); |
1809 | gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && | 1817 | else |
1810 | adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? | ||
1811 | flags : 0; | ||
1812 | } else { | ||
1813 | flags = 0x0; | 1818 | flags = 0x0; |
1814 | gtt_flags = ~0x0; | ||
1815 | } | ||
1816 | 1819 | ||
1817 | spin_lock(&vm->status_lock); | 1820 | if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) |
1818 | if (!list_empty(&bo_va->vm_status)) | 1821 | last_update = &vm->last_update; |
1822 | else | ||
1823 | last_update = &bo_va->last_pt_update; | ||
1824 | |||
1825 | if (!clear && bo_va->base.moved) { | ||
1826 | bo_va->base.moved = false; | ||
1819 | list_splice_init(&bo_va->valids, &bo_va->invalids); | 1827 | list_splice_init(&bo_va->valids, &bo_va->invalids); |
1820 | spin_unlock(&vm->status_lock); | 1828 | |
1829 | } else if (bo_va->cleared != clear) { | ||
1830 | list_splice_init(&bo_va->valids, &bo_va->invalids); | ||
1831 | } | ||
1821 | 1832 | ||
1822 | list_for_each_entry(mapping, &bo_va->invalids, list) { | 1833 | list_for_each_entry(mapping, &bo_va->invalids, list) { |
1823 | r = amdgpu_vm_bo_split_mapping(adev, exclusive, | 1834 | r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, |
1824 | gtt_flags, pages_addr, vm, | ||
1825 | mapping, flags, nodes, | 1835 | mapping, flags, nodes, |
1826 | &bo_va->last_pt_update); | 1836 | last_update); |
1827 | if (r) | 1837 | if (r) |
1828 | return r; | 1838 | return r; |
1829 | } | 1839 | } |
1830 | 1840 | ||
1831 | if (trace_amdgpu_vm_bo_mapping_enabled()) { | 1841 | if (vm->use_cpu_for_update) { |
1832 | list_for_each_entry(mapping, &bo_va->valids, list) | 1842 | /* Flush HDP */ |
1833 | trace_amdgpu_vm_bo_mapping(mapping); | 1843 | mb(); |
1834 | 1844 | amdgpu_gart_flush_gpu_tlb(adev, 0); | |
1835 | list_for_each_entry(mapping, &bo_va->invalids, list) | ||
1836 | trace_amdgpu_vm_bo_mapping(mapping); | ||
1837 | } | 1845 | } |
1838 | 1846 | ||
1839 | spin_lock(&vm->status_lock); | 1847 | spin_lock(&vm->status_lock); |
1840 | list_splice_init(&bo_va->invalids, &bo_va->valids); | 1848 | list_del_init(&bo_va->base.vm_status); |
1841 | list_del_init(&bo_va->vm_status); | ||
1842 | if (clear) | ||
1843 | list_add(&bo_va->vm_status, &vm->cleared); | ||
1844 | spin_unlock(&vm->status_lock); | 1849 | spin_unlock(&vm->status_lock); |
1845 | 1850 | ||
1846 | if (vm->use_cpu_for_update) { | 1851 | list_splice_init(&bo_va->invalids, &bo_va->valids); |
1847 | /* Flush HDP */ | 1852 | bo_va->cleared = clear; |
1848 | mb(); | 1853 | |
1849 | amdgpu_gart_flush_gpu_tlb(adev, 0); | 1854 | if (trace_amdgpu_vm_bo_mapping_enabled()) { |
1855 | list_for_each_entry(mapping, &bo_va->valids, list) | ||
1856 | trace_amdgpu_vm_bo_mapping(mapping); | ||
1850 | } | 1857 | } |
1851 | 1858 | ||
1852 | return 0; | 1859 | return 0; |
@@ -1954,7 +1961,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, | |||
1954 | */ | 1961 | */ |
1955 | static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 1962 | static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
1956 | { | 1963 | { |
1957 | struct reservation_object *resv = vm->root.bo->tbo.resv; | 1964 | struct reservation_object *resv = vm->root.base.bo->tbo.resv; |
1958 | struct dma_fence *excl, **shared; | 1965 | struct dma_fence *excl, **shared; |
1959 | unsigned i, shared_count; | 1966 | unsigned i, shared_count; |
1960 | int r; | 1967 | int r; |
@@ -2012,7 +2019,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
2012 | if (vm->pte_support_ats) | 2019 | if (vm->pte_support_ats) |
2013 | init_pte_value = AMDGPU_PTE_SYSTEM; | 2020 | init_pte_value = AMDGPU_PTE_SYSTEM; |
2014 | 2021 | ||
2015 | r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, | 2022 | r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, |
2016 | mapping->start, mapping->last, | 2023 | mapping->start, mapping->last, |
2017 | init_pte_value, 0, &f); | 2024 | init_pte_value, 0, &f); |
2018 | amdgpu_vm_free_mapping(adev, vm, mapping, f); | 2025 | amdgpu_vm_free_mapping(adev, vm, mapping, f); |
@@ -2034,29 +2041,35 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
2034 | } | 2041 | } |
2035 | 2042 | ||
2036 | /** | 2043 | /** |
2037 | * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT | 2044 | * amdgpu_vm_handle_moved - handle moved BOs in the PT |
2038 | * | 2045 | * |
2039 | * @adev: amdgpu_device pointer | 2046 | * @adev: amdgpu_device pointer |
2040 | * @vm: requested vm | 2047 | * @vm: requested vm |
2048 | * @sync: sync object to add fences to | ||
2041 | * | 2049 | * |
2042 | * Make sure all invalidated BOs are cleared in the PT. | 2050 | * Make sure all BOs which are moved are updated in the PTs. |
2043 | * Returns 0 for success. | 2051 | * Returns 0 for success. |
2044 | * | 2052 | * |
2045 | * PTs have to be reserved and mutex must be locked! | 2053 | * PTs have to be reserved! |
2046 | */ | 2054 | */ |
2047 | int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | 2055 | int amdgpu_vm_handle_moved(struct amdgpu_device *adev, |
2048 | struct amdgpu_vm *vm, struct amdgpu_sync *sync) | 2056 | struct amdgpu_vm *vm) |
2049 | { | 2057 | { |
2050 | struct amdgpu_bo_va *bo_va = NULL; | 2058 | bool clear; |
2051 | int r = 0; | 2059 | int r = 0; |
2052 | 2060 | ||
2053 | spin_lock(&vm->status_lock); | 2061 | spin_lock(&vm->status_lock); |
2054 | while (!list_empty(&vm->invalidated)) { | 2062 | while (!list_empty(&vm->moved)) { |
2055 | bo_va = list_first_entry(&vm->invalidated, | 2063 | struct amdgpu_bo_va *bo_va; |
2056 | struct amdgpu_bo_va, vm_status); | 2064 | |
2065 | bo_va = list_first_entry(&vm->moved, | ||
2066 | struct amdgpu_bo_va, base.vm_status); | ||
2057 | spin_unlock(&vm->status_lock); | 2067 | spin_unlock(&vm->status_lock); |
2058 | 2068 | ||
2059 | r = amdgpu_vm_bo_update(adev, bo_va, true); | 2069 | /* Per VM BOs never need to bo cleared in the page tables */ |
2070 | clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; | ||
2071 | |||
2072 | r = amdgpu_vm_bo_update(adev, bo_va, clear); | ||
2060 | if (r) | 2073 | if (r) |
2061 | return r; | 2074 | return r; |
2062 | 2075 | ||
@@ -2064,9 +2077,6 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | |||
2064 | } | 2077 | } |
2065 | spin_unlock(&vm->status_lock); | 2078 | spin_unlock(&vm->status_lock); |
2066 | 2079 | ||
2067 | if (bo_va) | ||
2068 | r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); | ||
2069 | |||
2070 | return r; | 2080 | return r; |
2071 | } | 2081 | } |
2072 | 2082 | ||
@@ -2093,20 +2103,54 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
2093 | if (bo_va == NULL) { | 2103 | if (bo_va == NULL) { |
2094 | return NULL; | 2104 | return NULL; |
2095 | } | 2105 | } |
2096 | bo_va->vm = vm; | 2106 | bo_va->base.vm = vm; |
2097 | bo_va->bo = bo; | 2107 | bo_va->base.bo = bo; |
2108 | INIT_LIST_HEAD(&bo_va->base.bo_list); | ||
2109 | INIT_LIST_HEAD(&bo_va->base.vm_status); | ||
2110 | |||
2098 | bo_va->ref_count = 1; | 2111 | bo_va->ref_count = 1; |
2099 | INIT_LIST_HEAD(&bo_va->bo_list); | ||
2100 | INIT_LIST_HEAD(&bo_va->valids); | 2112 | INIT_LIST_HEAD(&bo_va->valids); |
2101 | INIT_LIST_HEAD(&bo_va->invalids); | 2113 | INIT_LIST_HEAD(&bo_va->invalids); |
2102 | INIT_LIST_HEAD(&bo_va->vm_status); | ||
2103 | 2114 | ||
2104 | if (bo) | 2115 | if (bo) |
2105 | list_add_tail(&bo_va->bo_list, &bo->va); | 2116 | list_add_tail(&bo_va->base.bo_list, &bo->va); |
2106 | 2117 | ||
2107 | return bo_va; | 2118 | return bo_va; |
2108 | } | 2119 | } |
2109 | 2120 | ||
2121 | |||
2122 | /** | ||
2123 | * amdgpu_vm_bo_insert_mapping - insert a new mapping | ||
2124 | * | ||
2125 | * @adev: amdgpu_device pointer | ||
2126 | * @bo_va: bo_va to store the address | ||
2127 | * @mapping: the mapping to insert | ||
2128 | * | ||
2129 | * Insert a new mapping into all structures. | ||
2130 | */ | ||
2131 | static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, | ||
2132 | struct amdgpu_bo_va *bo_va, | ||
2133 | struct amdgpu_bo_va_mapping *mapping) | ||
2134 | { | ||
2135 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
2136 | struct amdgpu_bo *bo = bo_va->base.bo; | ||
2137 | |||
2138 | mapping->bo_va = bo_va; | ||
2139 | list_add(&mapping->list, &bo_va->invalids); | ||
2140 | amdgpu_vm_it_insert(mapping, &vm->va); | ||
2141 | |||
2142 | if (mapping->flags & AMDGPU_PTE_PRT) | ||
2143 | amdgpu_vm_prt_get(adev); | ||
2144 | |||
2145 | if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | ||
2146 | spin_lock(&vm->status_lock); | ||
2147 | if (list_empty(&bo_va->base.vm_status)) | ||
2148 | list_add(&bo_va->base.vm_status, &vm->moved); | ||
2149 | spin_unlock(&vm->status_lock); | ||
2150 | } | ||
2151 | trace_amdgpu_vm_bo_map(bo_va, mapping); | ||
2152 | } | ||
2153 | |||
2110 | /** | 2154 | /** |
2111 | * amdgpu_vm_bo_map - map bo inside a vm | 2155 | * amdgpu_vm_bo_map - map bo inside a vm |
2112 | * | 2156 | * |
@@ -2127,7 +2171,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2127 | uint64_t size, uint64_t flags) | 2171 | uint64_t size, uint64_t flags) |
2128 | { | 2172 | { |
2129 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2173 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
2130 | struct amdgpu_vm *vm = bo_va->vm; | 2174 | struct amdgpu_bo *bo = bo_va->base.bo; |
2175 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
2131 | uint64_t eaddr; | 2176 | uint64_t eaddr; |
2132 | 2177 | ||
2133 | /* validate the parameters */ | 2178 | /* validate the parameters */ |
@@ -2138,7 +2183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2138 | /* make sure object fit at this offset */ | 2183 | /* make sure object fit at this offset */ |
2139 | eaddr = saddr + size - 1; | 2184 | eaddr = saddr + size - 1; |
2140 | if (saddr >= eaddr || | 2185 | if (saddr >= eaddr || |
2141 | (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) | 2186 | (bo && offset + size > amdgpu_bo_size(bo))) |
2142 | return -EINVAL; | 2187 | return -EINVAL; |
2143 | 2188 | ||
2144 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 2189 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
@@ -2148,7 +2193,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2148 | if (tmp) { | 2193 | if (tmp) { |
2149 | /* bo and tmp overlap, invalid addr */ | 2194 | /* bo and tmp overlap, invalid addr */ |
2150 | dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " | 2195 | dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " |
2151 | "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, | 2196 | "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, |
2152 | tmp->start, tmp->last + 1); | 2197 | tmp->start, tmp->last + 1); |
2153 | return -EINVAL; | 2198 | return -EINVAL; |
2154 | } | 2199 | } |
@@ -2157,17 +2202,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2157 | if (!mapping) | 2202 | if (!mapping) |
2158 | return -ENOMEM; | 2203 | return -ENOMEM; |
2159 | 2204 | ||
2160 | INIT_LIST_HEAD(&mapping->list); | ||
2161 | mapping->start = saddr; | 2205 | mapping->start = saddr; |
2162 | mapping->last = eaddr; | 2206 | mapping->last = eaddr; |
2163 | mapping->offset = offset; | 2207 | mapping->offset = offset; |
2164 | mapping->flags = flags; | 2208 | mapping->flags = flags; |
2165 | 2209 | ||
2166 | list_add(&mapping->list, &bo_va->invalids); | 2210 | amdgpu_vm_bo_insert_map(adev, bo_va, mapping); |
2167 | amdgpu_vm_it_insert(mapping, &vm->va); | ||
2168 | |||
2169 | if (flags & AMDGPU_PTE_PRT) | ||
2170 | amdgpu_vm_prt_get(adev); | ||
2171 | 2211 | ||
2172 | return 0; | 2212 | return 0; |
2173 | } | 2213 | } |
@@ -2193,7 +2233,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2193 | uint64_t size, uint64_t flags) | 2233 | uint64_t size, uint64_t flags) |
2194 | { | 2234 | { |
2195 | struct amdgpu_bo_va_mapping *mapping; | 2235 | struct amdgpu_bo_va_mapping *mapping; |
2196 | struct amdgpu_vm *vm = bo_va->vm; | 2236 | struct amdgpu_bo *bo = bo_va->base.bo; |
2197 | uint64_t eaddr; | 2237 | uint64_t eaddr; |
2198 | int r; | 2238 | int r; |
2199 | 2239 | ||
@@ -2205,7 +2245,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2205 | /* make sure object fit at this offset */ | 2245 | /* make sure object fit at this offset */ |
2206 | eaddr = saddr + size - 1; | 2246 | eaddr = saddr + size - 1; |
2207 | if (saddr >= eaddr || | 2247 | if (saddr >= eaddr || |
2208 | (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) | 2248 | (bo && offset + size > amdgpu_bo_size(bo))) |
2209 | return -EINVAL; | 2249 | return -EINVAL; |
2210 | 2250 | ||
2211 | /* Allocate all the needed memory */ | 2251 | /* Allocate all the needed memory */ |
@@ -2213,7 +2253,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2213 | if (!mapping) | 2253 | if (!mapping) |
2214 | return -ENOMEM; | 2254 | return -ENOMEM; |
2215 | 2255 | ||
2216 | r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); | 2256 | r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); |
2217 | if (r) { | 2257 | if (r) { |
2218 | kfree(mapping); | 2258 | kfree(mapping); |
2219 | return r; | 2259 | return r; |
@@ -2227,11 +2267,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2227 | mapping->offset = offset; | 2267 | mapping->offset = offset; |
2228 | mapping->flags = flags; | 2268 | mapping->flags = flags; |
2229 | 2269 | ||
2230 | list_add(&mapping->list, &bo_va->invalids); | 2270 | amdgpu_vm_bo_insert_map(adev, bo_va, mapping); |
2231 | amdgpu_vm_it_insert(mapping, &vm->va); | ||
2232 | |||
2233 | if (flags & AMDGPU_PTE_PRT) | ||
2234 | amdgpu_vm_prt_get(adev); | ||
2235 | 2271 | ||
2236 | return 0; | 2272 | return 0; |
2237 | } | 2273 | } |
@@ -2253,7 +2289,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
2253 | uint64_t saddr) | 2289 | uint64_t saddr) |
2254 | { | 2290 | { |
2255 | struct amdgpu_bo_va_mapping *mapping; | 2291 | struct amdgpu_bo_va_mapping *mapping; |
2256 | struct amdgpu_vm *vm = bo_va->vm; | 2292 | struct amdgpu_vm *vm = bo_va->base.vm; |
2257 | bool valid = true; | 2293 | bool valid = true; |
2258 | 2294 | ||
2259 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 2295 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
@@ -2277,6 +2313,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
2277 | 2313 | ||
2278 | list_del(&mapping->list); | 2314 | list_del(&mapping->list); |
2279 | amdgpu_vm_it_remove(mapping, &vm->va); | 2315 | amdgpu_vm_it_remove(mapping, &vm->va); |
2316 | mapping->bo_va = NULL; | ||
2280 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 2317 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
2281 | 2318 | ||
2282 | if (valid) | 2319 | if (valid) |
@@ -2362,6 +2399,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
2362 | if (tmp->last > eaddr) | 2399 | if (tmp->last > eaddr) |
2363 | tmp->last = eaddr; | 2400 | tmp->last = eaddr; |
2364 | 2401 | ||
2402 | tmp->bo_va = NULL; | ||
2365 | list_add(&tmp->list, &vm->freed); | 2403 | list_add(&tmp->list, &vm->freed); |
2366 | trace_amdgpu_vm_bo_unmap(NULL, tmp); | 2404 | trace_amdgpu_vm_bo_unmap(NULL, tmp); |
2367 | } | 2405 | } |
@@ -2388,6 +2426,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
2388 | } | 2426 | } |
2389 | 2427 | ||
2390 | /** | 2428 | /** |
2429 | * amdgpu_vm_bo_lookup_mapping - find mapping by address | ||
2430 | * | ||
2431 | * @vm: the requested VM | ||
2432 | * | ||
2433 | * Find a mapping by it's address. | ||
2434 | */ | ||
2435 | struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, | ||
2436 | uint64_t addr) | ||
2437 | { | ||
2438 | return amdgpu_vm_it_iter_first(&vm->va, addr, addr); | ||
2439 | } | ||
2440 | |||
2441 | /** | ||
2391 | * amdgpu_vm_bo_rmv - remove a bo to a specific vm | 2442 | * amdgpu_vm_bo_rmv - remove a bo to a specific vm |
2392 | * | 2443 | * |
2393 | * @adev: amdgpu_device pointer | 2444 | * @adev: amdgpu_device pointer |
@@ -2401,17 +2452,18 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
2401 | struct amdgpu_bo_va *bo_va) | 2452 | struct amdgpu_bo_va *bo_va) |
2402 | { | 2453 | { |
2403 | struct amdgpu_bo_va_mapping *mapping, *next; | 2454 | struct amdgpu_bo_va_mapping *mapping, *next; |
2404 | struct amdgpu_vm *vm = bo_va->vm; | 2455 | struct amdgpu_vm *vm = bo_va->base.vm; |
2405 | 2456 | ||
2406 | list_del(&bo_va->bo_list); | 2457 | list_del(&bo_va->base.bo_list); |
2407 | 2458 | ||
2408 | spin_lock(&vm->status_lock); | 2459 | spin_lock(&vm->status_lock); |
2409 | list_del(&bo_va->vm_status); | 2460 | list_del(&bo_va->base.vm_status); |
2410 | spin_unlock(&vm->status_lock); | 2461 | spin_unlock(&vm->status_lock); |
2411 | 2462 | ||
2412 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { | 2463 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { |
2413 | list_del(&mapping->list); | 2464 | list_del(&mapping->list); |
2414 | amdgpu_vm_it_remove(mapping, &vm->va); | 2465 | amdgpu_vm_it_remove(mapping, &vm->va); |
2466 | mapping->bo_va = NULL; | ||
2415 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 2467 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
2416 | list_add(&mapping->list, &vm->freed); | 2468 | list_add(&mapping->list, &vm->freed); |
2417 | } | 2469 | } |
@@ -2436,15 +2488,37 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
2436 | * Mark @bo as invalid. | 2488 | * Mark @bo as invalid. |
2437 | */ | 2489 | */ |
2438 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | 2490 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, |
2439 | struct amdgpu_bo *bo) | 2491 | struct amdgpu_bo *bo, bool evicted) |
2440 | { | 2492 | { |
2441 | struct amdgpu_bo_va *bo_va; | 2493 | struct amdgpu_vm_bo_base *bo_base; |
2494 | |||
2495 | list_for_each_entry(bo_base, &bo->va, bo_list) { | ||
2496 | struct amdgpu_vm *vm = bo_base->vm; | ||
2497 | |||
2498 | bo_base->moved = true; | ||
2499 | if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | ||
2500 | spin_lock(&bo_base->vm->status_lock); | ||
2501 | if (bo->tbo.type == ttm_bo_type_kernel) | ||
2502 | list_move(&bo_base->vm_status, &vm->evicted); | ||
2503 | else | ||
2504 | list_move_tail(&bo_base->vm_status, | ||
2505 | &vm->evicted); | ||
2506 | spin_unlock(&bo_base->vm->status_lock); | ||
2507 | continue; | ||
2508 | } | ||
2509 | |||
2510 | if (bo->tbo.type == ttm_bo_type_kernel) { | ||
2511 | spin_lock(&bo_base->vm->status_lock); | ||
2512 | if (list_empty(&bo_base->vm_status)) | ||
2513 | list_add(&bo_base->vm_status, &vm->relocated); | ||
2514 | spin_unlock(&bo_base->vm->status_lock); | ||
2515 | continue; | ||
2516 | } | ||
2442 | 2517 | ||
2443 | list_for_each_entry(bo_va, &bo->va, bo_list) { | 2518 | spin_lock(&bo_base->vm->status_lock); |
2444 | spin_lock(&bo_va->vm->status_lock); | 2519 | if (list_empty(&bo_base->vm_status)) |
2445 | if (list_empty(&bo_va->vm_status)) | 2520 | list_add(&bo_base->vm_status, &vm->moved); |
2446 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); | 2521 | spin_unlock(&bo_base->vm->status_lock); |
2447 | spin_unlock(&bo_va->vm->status_lock); | ||
2448 | } | 2522 | } |
2449 | } | 2523 | } |
2450 | 2524 | ||
@@ -2462,12 +2536,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) | |||
2462 | } | 2536 | } |
2463 | 2537 | ||
2464 | /** | 2538 | /** |
2465 | * amdgpu_vm_adjust_size - adjust vm size and block size | 2539 | * amdgpu_vm_set_fragment_size - adjust fragment size in PTE |
2540 | * | ||
2541 | * @adev: amdgpu_device pointer | ||
2542 | * @fragment_size_default: the default fragment size if it's set auto | ||
2543 | */ | ||
2544 | void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) | ||
2545 | { | ||
2546 | if (amdgpu_vm_fragment_size == -1) | ||
2547 | adev->vm_manager.fragment_size = fragment_size_default; | ||
2548 | else | ||
2549 | adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; | ||
2550 | } | ||
2551 | |||
2552 | /** | ||
2553 | * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size | ||
2466 | * | 2554 | * |
2467 | * @adev: amdgpu_device pointer | 2555 | * @adev: amdgpu_device pointer |
2468 | * @vm_size: the default vm size if it's set auto | 2556 | * @vm_size: the default vm size if it's set auto |
2469 | */ | 2557 | */ |
2470 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) | 2558 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default) |
2471 | { | 2559 | { |
2472 | /* adjust vm size firstly */ | 2560 | /* adjust vm size firstly */ |
2473 | if (amdgpu_vm_size == -1) | 2561 | if (amdgpu_vm_size == -1) |
@@ -2482,8 +2570,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) | |||
2482 | else | 2570 | else |
2483 | adev->vm_manager.block_size = amdgpu_vm_block_size; | 2571 | adev->vm_manager.block_size = amdgpu_vm_block_size; |
2484 | 2572 | ||
2485 | DRM_INFO("vm size is %llu GB, block size is %u-bit\n", | 2573 | amdgpu_vm_set_fragment_size(adev, fragment_size_default); |
2486 | adev->vm_manager.vm_size, adev->vm_manager.block_size); | 2574 | |
2575 | DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", | ||
2576 | adev->vm_manager.vm_size, adev->vm_manager.block_size, | ||
2577 | adev->vm_manager.fragment_size); | ||
2487 | } | 2578 | } |
2488 | 2579 | ||
2489 | /** | 2580 | /** |
@@ -2496,7 +2587,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) | |||
2496 | * Init @vm fields. | 2587 | * Init @vm fields. |
2497 | */ | 2588 | */ |
2498 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 2589 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
2499 | int vm_context) | 2590 | int vm_context, unsigned int pasid) |
2500 | { | 2591 | { |
2501 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, | 2592 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, |
2502 | AMDGPU_VM_PTE_COUNT(adev) * 8); | 2593 | AMDGPU_VM_PTE_COUNT(adev) * 8); |
@@ -2507,13 +2598,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2507 | u64 flags; | 2598 | u64 flags; |
2508 | uint64_t init_pde_value = 0; | 2599 | uint64_t init_pde_value = 0; |
2509 | 2600 | ||
2510 | vm->va = RB_ROOT; | 2601 | vm->va = RB_ROOT_CACHED; |
2511 | vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); | 2602 | vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); |
2512 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2603 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
2513 | vm->reserved_vmid[i] = NULL; | 2604 | vm->reserved_vmid[i] = NULL; |
2514 | spin_lock_init(&vm->status_lock); | 2605 | spin_lock_init(&vm->status_lock); |
2515 | INIT_LIST_HEAD(&vm->invalidated); | 2606 | INIT_LIST_HEAD(&vm->evicted); |
2516 | INIT_LIST_HEAD(&vm->cleared); | 2607 | INIT_LIST_HEAD(&vm->relocated); |
2608 | INIT_LIST_HEAD(&vm->moved); | ||
2517 | INIT_LIST_HEAD(&vm->freed); | 2609 | INIT_LIST_HEAD(&vm->freed); |
2518 | 2610 | ||
2519 | /* create scheduler entity for page table updates */ | 2611 | /* create scheduler entity for page table updates */ |
@@ -2544,7 +2636,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2544 | vm->use_cpu_for_update ? "CPU" : "SDMA"); | 2636 | vm->use_cpu_for_update ? "CPU" : "SDMA"); |
2545 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), | 2637 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), |
2546 | "CPU update of VM recommended only for large BAR system\n"); | 2638 | "CPU update of VM recommended only for large BAR system\n"); |
2547 | vm->last_dir_update = NULL; | 2639 | vm->last_update = NULL; |
2548 | 2640 | ||
2549 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | 2641 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | |
2550 | AMDGPU_GEM_CREATE_VRAM_CLEARED; | 2642 | AMDGPU_GEM_CREATE_VRAM_CLEARED; |
@@ -2557,30 +2649,46 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2557 | r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, | 2649 | r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, |
2558 | AMDGPU_GEM_DOMAIN_VRAM, | 2650 | AMDGPU_GEM_DOMAIN_VRAM, |
2559 | flags, | 2651 | flags, |
2560 | NULL, NULL, init_pde_value, &vm->root.bo); | 2652 | NULL, NULL, init_pde_value, &vm->root.base.bo); |
2561 | if (r) | 2653 | if (r) |
2562 | goto error_free_sched_entity; | 2654 | goto error_free_sched_entity; |
2563 | 2655 | ||
2564 | r = amdgpu_bo_reserve(vm->root.bo, false); | 2656 | vm->root.base.vm = vm; |
2565 | if (r) | 2657 | list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); |
2566 | goto error_free_root; | 2658 | INIT_LIST_HEAD(&vm->root.base.vm_status); |
2567 | |||
2568 | vm->last_eviction_counter = atomic64_read(&adev->num_evictions); | ||
2569 | 2659 | ||
2570 | if (vm->use_cpu_for_update) { | 2660 | if (vm->use_cpu_for_update) { |
2571 | r = amdgpu_bo_kmap(vm->root.bo, NULL); | 2661 | r = amdgpu_bo_reserve(vm->root.base.bo, false); |
2662 | if (r) | ||
2663 | goto error_free_root; | ||
2664 | |||
2665 | r = amdgpu_bo_kmap(vm->root.base.bo, NULL); | ||
2666 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
2572 | if (r) | 2667 | if (r) |
2573 | goto error_free_root; | 2668 | goto error_free_root; |
2574 | } | 2669 | } |
2575 | 2670 | ||
2576 | amdgpu_bo_unreserve(vm->root.bo); | 2671 | if (pasid) { |
2672 | unsigned long flags; | ||
2673 | |||
2674 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); | ||
2675 | r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, | ||
2676 | GFP_ATOMIC); | ||
2677 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); | ||
2678 | if (r < 0) | ||
2679 | goto error_free_root; | ||
2680 | |||
2681 | vm->pasid = pasid; | ||
2682 | } | ||
2683 | |||
2684 | INIT_KFIFO(vm->faults); | ||
2577 | 2685 | ||
2578 | return 0; | 2686 | return 0; |
2579 | 2687 | ||
2580 | error_free_root: | 2688 | error_free_root: |
2581 | amdgpu_bo_unref(&vm->root.bo->shadow); | 2689 | amdgpu_bo_unref(&vm->root.base.bo->shadow); |
2582 | amdgpu_bo_unref(&vm->root.bo); | 2690 | amdgpu_bo_unref(&vm->root.base.bo); |
2583 | vm->root.bo = NULL; | 2691 | vm->root.base.bo = NULL; |
2584 | 2692 | ||
2585 | error_free_sched_entity: | 2693 | error_free_sched_entity: |
2586 | amd_sched_entity_fini(&ring->sched, &vm->entity); | 2694 | amd_sched_entity_fini(&ring->sched, &vm->entity); |
@@ -2599,9 +2707,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) | |||
2599 | { | 2707 | { |
2600 | unsigned i; | 2708 | unsigned i; |
2601 | 2709 | ||
2602 | if (level->bo) { | 2710 | if (level->base.bo) { |
2603 | amdgpu_bo_unref(&level->bo->shadow); | 2711 | list_del(&level->base.bo_list); |
2604 | amdgpu_bo_unref(&level->bo); | 2712 | list_del(&level->base.vm_status); |
2713 | amdgpu_bo_unref(&level->base.bo->shadow); | ||
2714 | amdgpu_bo_unref(&level->base.bo); | ||
2605 | } | 2715 | } |
2606 | 2716 | ||
2607 | if (level->entries) | 2717 | if (level->entries) |
@@ -2624,14 +2734,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
2624 | { | 2734 | { |
2625 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2735 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
2626 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; | 2736 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; |
2737 | u64 fault; | ||
2627 | int i; | 2738 | int i; |
2628 | 2739 | ||
2740 | /* Clear pending page faults from IH when the VM is destroyed */ | ||
2741 | while (kfifo_get(&vm->faults, &fault)) | ||
2742 | amdgpu_ih_clear_fault(adev, fault); | ||
2743 | |||
2744 | if (vm->pasid) { | ||
2745 | unsigned long flags; | ||
2746 | |||
2747 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); | ||
2748 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | ||
2749 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); | ||
2750 | } | ||
2751 | |||
2629 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); | 2752 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); |
2630 | 2753 | ||
2631 | if (!RB_EMPTY_ROOT(&vm->va)) { | 2754 | if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { |
2632 | dev_err(adev->dev, "still active bo inside vm\n"); | 2755 | dev_err(adev->dev, "still active bo inside vm\n"); |
2633 | } | 2756 | } |
2634 | rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { | 2757 | rbtree_postorder_for_each_entry_safe(mapping, tmp, |
2758 | &vm->va.rb_root, rb) { | ||
2635 | list_del(&mapping->list); | 2759 | list_del(&mapping->list); |
2636 | amdgpu_vm_it_remove(mapping, &vm->va); | 2760 | amdgpu_vm_it_remove(mapping, &vm->va); |
2637 | kfree(mapping); | 2761 | kfree(mapping); |
@@ -2647,7 +2771,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
2647 | } | 2771 | } |
2648 | 2772 | ||
2649 | amdgpu_vm_free_levels(&vm->root); | 2773 | amdgpu_vm_free_levels(&vm->root); |
2650 | dma_fence_put(vm->last_dir_update); | 2774 | dma_fence_put(vm->last_update); |
2651 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2775 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
2652 | amdgpu_vm_free_reserved_vmid(adev, vm, i); | 2776 | amdgpu_vm_free_reserved_vmid(adev, vm, i); |
2653 | } | 2777 | } |
@@ -2705,6 +2829,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) | |||
2705 | adev->vm_manager.vm_update_mode = 0; | 2829 | adev->vm_manager.vm_update_mode = 0; |
2706 | #endif | 2830 | #endif |
2707 | 2831 | ||
2832 | idr_init(&adev->vm_manager.pasid_idr); | ||
2833 | spin_lock_init(&adev->vm_manager.pasid_lock); | ||
2708 | } | 2834 | } |
2709 | 2835 | ||
2710 | /** | 2836 | /** |
@@ -2718,6 +2844,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) | |||
2718 | { | 2844 | { |
2719 | unsigned i, j; | 2845 | unsigned i, j; |
2720 | 2846 | ||
2847 | WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); | ||
2848 | idr_destroy(&adev->vm_manager.pasid_idr); | ||
2849 | |||
2721 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { | 2850 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { |
2722 | struct amdgpu_vm_id_manager *id_mgr = | 2851 | struct amdgpu_vm_id_manager *id_mgr = |
2723 | &adev->vm_manager.id_mgr[i]; | 2852 | &adev->vm_manager.id_mgr[i]; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 217ecba8f4cc..0af090667dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #define __AMDGPU_VM_H__ | 25 | #define __AMDGPU_VM_H__ |
26 | 26 | ||
27 | #include <linux/rbtree.h> | 27 | #include <linux/rbtree.h> |
28 | #include <linux/idr.h> | ||
28 | 29 | ||
29 | #include "gpu_scheduler.h" | 30 | #include "gpu_scheduler.h" |
30 | #include "amdgpu_sync.h" | 31 | #include "amdgpu_sync.h" |
@@ -50,11 +51,6 @@ struct amdgpu_bo_list_entry; | |||
50 | /* PTBs (Page Table Blocks) need to be aligned to 32K */ | 51 | /* PTBs (Page Table Blocks) need to be aligned to 32K */ |
51 | #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 | 52 | #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 |
52 | 53 | ||
53 | /* LOG2 number of continuous pages for the fragment field */ | ||
54 | #define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \ | ||
55 | ((adev)->asic_type < CHIP_VEGA10 ? 4 : \ | ||
56 | (adev)->vm_manager.block_size) | ||
57 | |||
58 | #define AMDGPU_PTE_VALID (1ULL << 0) | 54 | #define AMDGPU_PTE_VALID (1ULL << 0) |
59 | #define AMDGPU_PTE_SYSTEM (1ULL << 1) | 55 | #define AMDGPU_PTE_SYSTEM (1ULL << 1) |
60 | #define AMDGPU_PTE_SNOOPED (1ULL << 2) | 56 | #define AMDGPU_PTE_SNOOPED (1ULL << 2) |
@@ -99,37 +95,57 @@ struct amdgpu_bo_list_entry; | |||
99 | #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) | 95 | #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) |
100 | #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) | 96 | #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) |
101 | 97 | ||
98 | /* base structure for tracking BO usage in a VM */ | ||
99 | struct amdgpu_vm_bo_base { | ||
100 | /* constant after initialization */ | ||
101 | struct amdgpu_vm *vm; | ||
102 | struct amdgpu_bo *bo; | ||
103 | |||
104 | /* protected by bo being reserved */ | ||
105 | struct list_head bo_list; | ||
106 | |||
107 | /* protected by spinlock */ | ||
108 | struct list_head vm_status; | ||
109 | |||
110 | /* protected by the BO being reserved */ | ||
111 | bool moved; | ||
112 | }; | ||
102 | 113 | ||
103 | struct amdgpu_vm_pt { | 114 | struct amdgpu_vm_pt { |
104 | struct amdgpu_bo *bo; | 115 | struct amdgpu_vm_bo_base base; |
105 | uint64_t addr; | 116 | uint64_t addr; |
106 | bool huge_page; | ||
107 | 117 | ||
108 | /* array of page tables, one for each directory entry */ | 118 | /* array of page tables, one for each directory entry */ |
109 | struct amdgpu_vm_pt *entries; | 119 | struct amdgpu_vm_pt *entries; |
110 | unsigned last_entry_used; | 120 | unsigned last_entry_used; |
111 | }; | 121 | }; |
112 | 122 | ||
123 | #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) | ||
124 | #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) | ||
125 | #define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) | ||
126 | |||
113 | struct amdgpu_vm { | 127 | struct amdgpu_vm { |
114 | /* tree of virtual addresses mapped */ | 128 | /* tree of virtual addresses mapped */ |
115 | struct rb_root va; | 129 | struct rb_root_cached va; |
116 | 130 | ||
117 | /* protecting invalidated */ | 131 | /* protecting invalidated */ |
118 | spinlock_t status_lock; | 132 | spinlock_t status_lock; |
119 | 133 | ||
120 | /* BOs moved, but not yet updated in the PT */ | 134 | /* BOs who needs a validation */ |
121 | struct list_head invalidated; | 135 | struct list_head evicted; |
136 | |||
137 | /* PT BOs which relocated and their parent need an update */ | ||
138 | struct list_head relocated; | ||
122 | 139 | ||
123 | /* BOs cleared in the PT because of a move */ | 140 | /* BOs moved, but not yet updated in the PT */ |
124 | struct list_head cleared; | 141 | struct list_head moved; |
125 | 142 | ||
126 | /* BO mappings freed, but not yet updated in the PT */ | 143 | /* BO mappings freed, but not yet updated in the PT */ |
127 | struct list_head freed; | 144 | struct list_head freed; |
128 | 145 | ||
129 | /* contains the page directory */ | 146 | /* contains the page directory */ |
130 | struct amdgpu_vm_pt root; | 147 | struct amdgpu_vm_pt root; |
131 | struct dma_fence *last_dir_update; | 148 | struct dma_fence *last_update; |
132 | uint64_t last_eviction_counter; | ||
133 | 149 | ||
134 | /* protecting freed */ | 150 | /* protecting freed */ |
135 | spinlock_t freed_lock; | 151 | spinlock_t freed_lock; |
@@ -137,18 +153,20 @@ struct amdgpu_vm { | |||
137 | /* Scheduler entity for page table updates */ | 153 | /* Scheduler entity for page table updates */ |
138 | struct amd_sched_entity entity; | 154 | struct amd_sched_entity entity; |
139 | 155 | ||
140 | /* client id */ | 156 | /* client id and PASID (TODO: replace client_id with PASID) */ |
141 | u64 client_id; | 157 | u64 client_id; |
158 | unsigned int pasid; | ||
142 | /* dedicated to vm */ | 159 | /* dedicated to vm */ |
143 | struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; | 160 | struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; |
144 | /* each VM will map on CSA */ | ||
145 | struct amdgpu_bo_va *csa_bo_va; | ||
146 | 161 | ||
147 | /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ | 162 | /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ |
148 | bool use_cpu_for_update; | 163 | bool use_cpu_for_update; |
149 | 164 | ||
150 | /* Flag to indicate ATS support from PTE for GFX9 */ | 165 | /* Flag to indicate ATS support from PTE for GFX9 */ |
151 | bool pte_support_ats; | 166 | bool pte_support_ats; |
167 | |||
168 | /* Up to 128 pending page faults */ | ||
169 | DECLARE_KFIFO(faults, u64, 128); | ||
152 | }; | 170 | }; |
153 | 171 | ||
154 | struct amdgpu_vm_id { | 172 | struct amdgpu_vm_id { |
@@ -191,6 +209,7 @@ struct amdgpu_vm_manager { | |||
191 | uint32_t num_level; | 209 | uint32_t num_level; |
192 | uint64_t vm_size; | 210 | uint64_t vm_size; |
193 | uint32_t block_size; | 211 | uint32_t block_size; |
212 | uint32_t fragment_size; | ||
194 | /* vram base address for page table entry */ | 213 | /* vram base address for page table entry */ |
195 | u64 vram_base_offset; | 214 | u64 vram_base_offset; |
196 | /* vm pte handling */ | 215 | /* vm pte handling */ |
@@ -210,21 +229,28 @@ struct amdgpu_vm_manager { | |||
210 | * BIT1[= 0] Compute updated by SDMA [= 1] by CPU | 229 | * BIT1[= 0] Compute updated by SDMA [= 1] by CPU |
211 | */ | 230 | */ |
212 | int vm_update_mode; | 231 | int vm_update_mode; |
232 | |||
233 | /* PASID to VM mapping, will be used in interrupt context to | ||
234 | * look up VM of a page fault | ||
235 | */ | ||
236 | struct idr pasid_idr; | ||
237 | spinlock_t pasid_lock; | ||
213 | }; | 238 | }; |
214 | 239 | ||
240 | int amdgpu_vm_alloc_pasid(unsigned int bits); | ||
241 | void amdgpu_vm_free_pasid(unsigned int pasid); | ||
215 | void amdgpu_vm_manager_init(struct amdgpu_device *adev); | 242 | void amdgpu_vm_manager_init(struct amdgpu_device *adev); |
216 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); | 243 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); |
217 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 244 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
218 | int vm_context); | 245 | int vm_context, unsigned int pasid); |
219 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 246 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); |
220 | void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | 247 | void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, |
221 | struct list_head *validated, | 248 | struct list_head *validated, |
222 | struct amdgpu_bo_list_entry *entry); | 249 | struct amdgpu_bo_list_entry *entry); |
250 | bool amdgpu_vm_ready(struct amdgpu_vm *vm); | ||
223 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 251 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
224 | int (*callback)(void *p, struct amdgpu_bo *bo), | 252 | int (*callback)(void *p, struct amdgpu_bo *bo), |
225 | void *param); | 253 | void *param); |
226 | void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | ||
227 | struct amdgpu_vm *vm); | ||
228 | int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, | 254 | int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, |
229 | struct amdgpu_vm *vm, | 255 | struct amdgpu_vm *vm, |
230 | uint64_t saddr, uint64_t size); | 256 | uint64_t saddr, uint64_t size); |
@@ -240,13 +266,13 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
240 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | 266 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, |
241 | struct amdgpu_vm *vm, | 267 | struct amdgpu_vm *vm, |
242 | struct dma_fence **fence); | 268 | struct dma_fence **fence); |
243 | int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 269 | int amdgpu_vm_handle_moved(struct amdgpu_device *adev, |
244 | struct amdgpu_sync *sync); | 270 | struct amdgpu_vm *vm); |
245 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, | 271 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, |
246 | struct amdgpu_bo_va *bo_va, | 272 | struct amdgpu_bo_va *bo_va, |
247 | bool clear); | 273 | bool clear); |
248 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | 274 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, |
249 | struct amdgpu_bo *bo); | 275 | struct amdgpu_bo *bo, bool evicted); |
250 | struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | 276 | struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, |
251 | struct amdgpu_bo *bo); | 277 | struct amdgpu_bo *bo); |
252 | struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | 278 | struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, |
@@ -266,9 +292,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
266 | int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | 292 | int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, |
267 | struct amdgpu_vm *vm, | 293 | struct amdgpu_vm *vm, |
268 | uint64_t saddr, uint64_t size); | 294 | uint64_t saddr, uint64_t size); |
295 | struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, | ||
296 | uint64_t addr); | ||
269 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | 297 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, |
270 | struct amdgpu_bo_va *bo_va); | 298 | struct amdgpu_bo_va *bo_va); |
271 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); | 299 | void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, |
300 | uint32_t fragment_size_default); | ||
301 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, | ||
302 | uint32_t fragment_size_default); | ||
272 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); | 303 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); |
273 | bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, | 304 | bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, |
274 | struct amdgpu_job *job); | 305 | struct amdgpu_job *job); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a2c59a08b2bd..26e900627971 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | |||
@@ -28,6 +28,8 @@ | |||
28 | struct amdgpu_vram_mgr { | 28 | struct amdgpu_vram_mgr { |
29 | struct drm_mm mm; | 29 | struct drm_mm mm; |
30 | spinlock_t lock; | 30 | spinlock_t lock; |
31 | atomic64_t usage; | ||
32 | atomic64_t vis_usage; | ||
31 | }; | 33 | }; |
32 | 34 | ||
33 | /** | 35 | /** |
@@ -79,6 +81,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) | |||
79 | } | 81 | } |
80 | 82 | ||
81 | /** | 83 | /** |
84 | * amdgpu_vram_mgr_vis_size - Calculate visible node size | ||
85 | * | ||
86 | * @adev: amdgpu device structure | ||
87 | * @node: MM node structure | ||
88 | * | ||
89 | * Calculate how many bytes of the MM node are inside visible VRAM | ||
90 | */ | ||
91 | static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, | ||
92 | struct drm_mm_node *node) | ||
93 | { | ||
94 | uint64_t start = node->start << PAGE_SHIFT; | ||
95 | uint64_t end = (node->size + node->start) << PAGE_SHIFT; | ||
96 | |||
97 | if (start >= adev->mc.visible_vram_size) | ||
98 | return 0; | ||
99 | |||
100 | return (end > adev->mc.visible_vram_size ? | ||
101 | adev->mc.visible_vram_size : end) - start; | ||
102 | } | ||
103 | |||
104 | /** | ||
82 | * amdgpu_vram_mgr_new - allocate new ranges | 105 | * amdgpu_vram_mgr_new - allocate new ranges |
83 | * | 106 | * |
84 | * @man: TTM memory type manager | 107 | * @man: TTM memory type manager |
@@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, | |||
93 | const struct ttm_place *place, | 116 | const struct ttm_place *place, |
94 | struct ttm_mem_reg *mem) | 117 | struct ttm_mem_reg *mem) |
95 | { | 118 | { |
119 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | ||
96 | struct amdgpu_vram_mgr *mgr = man->priv; | 120 | struct amdgpu_vram_mgr *mgr = man->priv; |
97 | struct drm_mm *mm = &mgr->mm; | 121 | struct drm_mm *mm = &mgr->mm; |
98 | struct drm_mm_node *nodes; | 122 | struct drm_mm_node *nodes; |
99 | enum drm_mm_insert_mode mode; | 123 | enum drm_mm_insert_mode mode; |
100 | unsigned long lpfn, num_nodes, pages_per_node, pages_left; | 124 | unsigned long lpfn, num_nodes, pages_per_node, pages_left; |
125 | uint64_t usage = 0, vis_usage = 0; | ||
101 | unsigned i; | 126 | unsigned i; |
102 | int r; | 127 | int r; |
103 | 128 | ||
@@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, | |||
142 | if (unlikely(r)) | 167 | if (unlikely(r)) |
143 | goto error; | 168 | goto error; |
144 | 169 | ||
170 | usage += nodes[i].size << PAGE_SHIFT; | ||
171 | vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); | ||
172 | |||
145 | /* Calculate a virtual BO start address to easily check if | 173 | /* Calculate a virtual BO start address to easily check if |
146 | * everything is CPU accessible. | 174 | * everything is CPU accessible. |
147 | */ | 175 | */ |
@@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, | |||
155 | } | 183 | } |
156 | spin_unlock(&mgr->lock); | 184 | spin_unlock(&mgr->lock); |
157 | 185 | ||
186 | atomic64_add(usage, &mgr->usage); | ||
187 | atomic64_add(vis_usage, &mgr->vis_usage); | ||
188 | |||
158 | mem->mm_node = nodes; | 189 | mem->mm_node = nodes; |
159 | 190 | ||
160 | return 0; | 191 | return 0; |
@@ -181,8 +212,10 @@ error: | |||
181 | static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, | 212 | static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, |
182 | struct ttm_mem_reg *mem) | 213 | struct ttm_mem_reg *mem) |
183 | { | 214 | { |
215 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | ||
184 | struct amdgpu_vram_mgr *mgr = man->priv; | 216 | struct amdgpu_vram_mgr *mgr = man->priv; |
185 | struct drm_mm_node *nodes = mem->mm_node; | 217 | struct drm_mm_node *nodes = mem->mm_node; |
218 | uint64_t usage = 0, vis_usage = 0; | ||
186 | unsigned pages = mem->num_pages; | 219 | unsigned pages = mem->num_pages; |
187 | 220 | ||
188 | if (!mem->mm_node) | 221 | if (!mem->mm_node) |
@@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, | |||
192 | while (pages) { | 225 | while (pages) { |
193 | pages -= nodes->size; | 226 | pages -= nodes->size; |
194 | drm_mm_remove_node(nodes); | 227 | drm_mm_remove_node(nodes); |
228 | usage += nodes->size << PAGE_SHIFT; | ||
229 | vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); | ||
195 | ++nodes; | 230 | ++nodes; |
196 | } | 231 | } |
197 | spin_unlock(&mgr->lock); | 232 | spin_unlock(&mgr->lock); |
198 | 233 | ||
234 | atomic64_sub(usage, &mgr->usage); | ||
235 | atomic64_sub(vis_usage, &mgr->vis_usage); | ||
236 | |||
199 | kfree(mem->mm_node); | 237 | kfree(mem->mm_node); |
200 | mem->mm_node = NULL; | 238 | mem->mm_node = NULL; |
201 | } | 239 | } |
202 | 240 | ||
203 | /** | 241 | /** |
242 | * amdgpu_vram_mgr_usage - how many bytes are used in this domain | ||
243 | * | ||
244 | * @man: TTM memory type manager | ||
245 | * | ||
246 | * Returns how many bytes are used in this domain. | ||
247 | */ | ||
248 | uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man) | ||
249 | { | ||
250 | struct amdgpu_vram_mgr *mgr = man->priv; | ||
251 | |||
252 | return atomic64_read(&mgr->usage); | ||
253 | } | ||
254 | |||
255 | /** | ||
256 | * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part | ||
257 | * | ||
258 | * @man: TTM memory type manager | ||
259 | * | ||
260 | * Returns how many bytes are used in the visible part of VRAM | ||
261 | */ | ||
262 | uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man) | ||
263 | { | ||
264 | struct amdgpu_vram_mgr *mgr = man->priv; | ||
265 | |||
266 | return atomic64_read(&mgr->vis_usage); | ||
267 | } | ||
268 | |||
269 | /** | ||
204 | * amdgpu_vram_mgr_debug - dump VRAM table | 270 | * amdgpu_vram_mgr_debug - dump VRAM table |
205 | * | 271 | * |
206 | * @man: TTM memory type manager | 272 | * @man: TTM memory type manager |
207 | * @prefix: text prefix | 273 | * @printer: DRM printer to use |
208 | * | 274 | * |
209 | * Dump the table content using printk. | 275 | * Dump the table content using printk. |
210 | */ | 276 | */ |
211 | static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, | 277 | static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, |
212 | const char *prefix) | 278 | struct drm_printer *printer) |
213 | { | 279 | { |
214 | struct amdgpu_vram_mgr *mgr = man->priv; | 280 | struct amdgpu_vram_mgr *mgr = man->priv; |
215 | struct drm_printer p = drm_debug_printer(prefix); | ||
216 | 281 | ||
217 | spin_lock(&mgr->lock); | 282 | spin_lock(&mgr->lock); |
218 | drm_mm_print(&mgr->mm, &p); | 283 | drm_mm_print(&mgr->mm, printer); |
219 | spin_unlock(&mgr->lock); | 284 | spin_unlock(&mgr->lock); |
285 | |||
286 | drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", | ||
287 | man->size, amdgpu_vram_mgr_usage(man) >> 20, | ||
288 | amdgpu_vram_mgr_vis_usage(man) >> 20); | ||
220 | } | 289 | } |
221 | 290 | ||
222 | const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { | 291 | const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index d69aa2e179bb..69500a8b4e2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c | |||
@@ -1343,8 +1343,11 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) | |||
1343 | idx = 0x80; | 1343 | idx = 0x80; |
1344 | 1344 | ||
1345 | str = CSTR(idx); | 1345 | str = CSTR(idx); |
1346 | if (*str != '\0') | 1346 | if (*str != '\0') { |
1347 | pr_info("ATOM BIOS: %s\n", str); | 1347 | pr_info("ATOM BIOS: %s\n", str); |
1348 | strlcpy(ctx->vbios_version, str, sizeof(ctx->vbios_version)); | ||
1349 | } | ||
1350 | |||
1348 | 1351 | ||
1349 | return ctx; | 1352 | return ctx; |
1350 | } | 1353 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index ddd8045accf3..a39170991afe 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h | |||
@@ -140,6 +140,7 @@ struct atom_context { | |||
140 | int io_mode; | 140 | int io_mode; |
141 | uint32_t *scratch; | 141 | uint32_t *scratch; |
142 | int scratch_size_bytes; | 142 | int scratch_size_bytes; |
143 | char vbios_version[20]; | ||
143 | }; | 144 | }; |
144 | 145 | ||
145 | extern int amdgpu_atom_debug; | 146 | extern int amdgpu_atom_debug; |
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index cb508a211b2f..68ce1bdaf2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c | |||
@@ -307,7 +307,6 @@ static int ci_set_power_limit(struct amdgpu_device *adev, u32 n); | |||
307 | static int ci_set_overdrive_target_tdp(struct amdgpu_device *adev, | 307 | static int ci_set_overdrive_target_tdp(struct amdgpu_device *adev, |
308 | u32 target_tdp); | 308 | u32 target_tdp); |
309 | static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate); | 309 | static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate); |
310 | static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev); | ||
311 | static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev); | 310 | static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev); |
312 | 311 | ||
313 | static PPSMC_Result amdgpu_ci_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, | 312 | static PPSMC_Result amdgpu_ci_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, |
@@ -883,8 +882,9 @@ static int ci_power_control_set_level(struct amdgpu_device *adev) | |||
883 | return ret; | 882 | return ret; |
884 | } | 883 | } |
885 | 884 | ||
886 | static void ci_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate) | 885 | static void ci_dpm_powergate_uvd(void *handle, bool gate) |
887 | { | 886 | { |
887 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
888 | struct ci_power_info *pi = ci_get_pi(adev); | 888 | struct ci_power_info *pi = ci_get_pi(adev); |
889 | 889 | ||
890 | pi->uvd_power_gated = gate; | 890 | pi->uvd_power_gated = gate; |
@@ -901,8 +901,9 @@ static void ci_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate) | |||
901 | } | 901 | } |
902 | } | 902 | } |
903 | 903 | ||
904 | static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev) | 904 | static bool ci_dpm_vblank_too_short(void *handle) |
905 | { | 905 | { |
906 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
906 | u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); | 907 | u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); |
907 | u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; | 908 | u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; |
908 | 909 | ||
@@ -1210,11 +1211,12 @@ static int ci_fan_ctrl_stop_smc_fan_control(struct amdgpu_device *adev) | |||
1210 | } | 1211 | } |
1211 | } | 1212 | } |
1212 | 1213 | ||
1213 | static int ci_dpm_get_fan_speed_percent(struct amdgpu_device *adev, | 1214 | static int ci_dpm_get_fan_speed_percent(void *handle, |
1214 | u32 *speed) | 1215 | u32 *speed) |
1215 | { | 1216 | { |
1216 | u32 duty, duty100; | 1217 | u32 duty, duty100; |
1217 | u64 tmp64; | 1218 | u64 tmp64; |
1219 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1218 | 1220 | ||
1219 | if (adev->pm.no_fan) | 1221 | if (adev->pm.no_fan) |
1220 | return -ENOENT; | 1222 | return -ENOENT; |
@@ -1237,12 +1239,13 @@ static int ci_dpm_get_fan_speed_percent(struct amdgpu_device *adev, | |||
1237 | return 0; | 1239 | return 0; |
1238 | } | 1240 | } |
1239 | 1241 | ||
1240 | static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev, | 1242 | static int ci_dpm_set_fan_speed_percent(void *handle, |
1241 | u32 speed) | 1243 | u32 speed) |
1242 | { | 1244 | { |
1243 | u32 tmp; | 1245 | u32 tmp; |
1244 | u32 duty, duty100; | 1246 | u32 duty, duty100; |
1245 | u64 tmp64; | 1247 | u64 tmp64; |
1248 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1246 | struct ci_power_info *pi = ci_get_pi(adev); | 1249 | struct ci_power_info *pi = ci_get_pi(adev); |
1247 | 1250 | ||
1248 | if (adev->pm.no_fan) | 1251 | if (adev->pm.no_fan) |
@@ -1271,8 +1274,10 @@ static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev, | |||
1271 | return 0; | 1274 | return 0; |
1272 | } | 1275 | } |
1273 | 1276 | ||
1274 | static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) | 1277 | static void ci_dpm_set_fan_control_mode(void *handle, u32 mode) |
1275 | { | 1278 | { |
1279 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1280 | |||
1276 | switch (mode) { | 1281 | switch (mode) { |
1277 | case AMD_FAN_CTRL_NONE: | 1282 | case AMD_FAN_CTRL_NONE: |
1278 | if (adev->pm.dpm.fan.ucode_fan_control) | 1283 | if (adev->pm.dpm.fan.ucode_fan_control) |
@@ -1292,8 +1297,9 @@ static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) | |||
1292 | } | 1297 | } |
1293 | } | 1298 | } |
1294 | 1299 | ||
1295 | static u32 ci_dpm_get_fan_control_mode(struct amdgpu_device *adev) | 1300 | static u32 ci_dpm_get_fan_control_mode(void *handle) |
1296 | { | 1301 | { |
1302 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1297 | struct ci_power_info *pi = ci_get_pi(adev); | 1303 | struct ci_power_info *pi = ci_get_pi(adev); |
1298 | 1304 | ||
1299 | if (pi->fan_is_controlled_by_smc) | 1305 | if (pi->fan_is_controlled_by_smc) |
@@ -4378,9 +4384,10 @@ static u32 ci_get_lowest_enabled_level(struct amdgpu_device *adev, | |||
4378 | } | 4384 | } |
4379 | 4385 | ||
4380 | 4386 | ||
4381 | static int ci_dpm_force_performance_level(struct amdgpu_device *adev, | 4387 | static int ci_dpm_force_performance_level(void *handle, |
4382 | enum amd_dpm_forced_level level) | 4388 | enum amd_dpm_forced_level level) |
4383 | { | 4389 | { |
4390 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
4384 | struct ci_power_info *pi = ci_get_pi(adev); | 4391 | struct ci_power_info *pi = ci_get_pi(adev); |
4385 | u32 tmp, levels, i; | 4392 | u32 tmp, levels, i; |
4386 | int ret; | 4393 | int ret; |
@@ -5291,8 +5298,9 @@ static void ci_update_requested_ps(struct amdgpu_device *adev, | |||
5291 | adev->pm.dpm.requested_ps = &pi->requested_rps; | 5298 | adev->pm.dpm.requested_ps = &pi->requested_rps; |
5292 | } | 5299 | } |
5293 | 5300 | ||
5294 | static int ci_dpm_pre_set_power_state(struct amdgpu_device *adev) | 5301 | static int ci_dpm_pre_set_power_state(void *handle) |
5295 | { | 5302 | { |
5303 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
5296 | struct ci_power_info *pi = ci_get_pi(adev); | 5304 | struct ci_power_info *pi = ci_get_pi(adev); |
5297 | struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; | 5305 | struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; |
5298 | struct amdgpu_ps *new_ps = &requested_ps; | 5306 | struct amdgpu_ps *new_ps = &requested_ps; |
@@ -5304,8 +5312,9 @@ static int ci_dpm_pre_set_power_state(struct amdgpu_device *adev) | |||
5304 | return 0; | 5312 | return 0; |
5305 | } | 5313 | } |
5306 | 5314 | ||
5307 | static void ci_dpm_post_set_power_state(struct amdgpu_device *adev) | 5315 | static void ci_dpm_post_set_power_state(void *handle) |
5308 | { | 5316 | { |
5317 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
5309 | struct ci_power_info *pi = ci_get_pi(adev); | 5318 | struct ci_power_info *pi = ci_get_pi(adev); |
5310 | struct amdgpu_ps *new_ps = &pi->requested_rps; | 5319 | struct amdgpu_ps *new_ps = &pi->requested_rps; |
5311 | 5320 | ||
@@ -5479,8 +5488,9 @@ static void ci_dpm_disable(struct amdgpu_device *adev) | |||
5479 | ci_update_current_ps(adev, boot_ps); | 5488 | ci_update_current_ps(adev, boot_ps); |
5480 | } | 5489 | } |
5481 | 5490 | ||
5482 | static int ci_dpm_set_power_state(struct amdgpu_device *adev) | 5491 | static int ci_dpm_set_power_state(void *handle) |
5483 | { | 5492 | { |
5493 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
5484 | struct ci_power_info *pi = ci_get_pi(adev); | 5494 | struct ci_power_info *pi = ci_get_pi(adev); |
5485 | struct amdgpu_ps *new_ps = &pi->requested_rps; | 5495 | struct amdgpu_ps *new_ps = &pi->requested_rps; |
5486 | struct amdgpu_ps *old_ps = &pi->current_rps; | 5496 | struct amdgpu_ps *old_ps = &pi->current_rps; |
@@ -5551,8 +5561,10 @@ static void ci_dpm_reset_asic(struct amdgpu_device *adev) | |||
5551 | } | 5561 | } |
5552 | #endif | 5562 | #endif |
5553 | 5563 | ||
5554 | static void ci_dpm_display_configuration_changed(struct amdgpu_device *adev) | 5564 | static void ci_dpm_display_configuration_changed(void *handle) |
5555 | { | 5565 | { |
5566 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
5567 | |||
5556 | ci_program_display_gap(adev); | 5568 | ci_program_display_gap(adev); |
5557 | } | 5569 | } |
5558 | 5570 | ||
@@ -6105,9 +6117,10 @@ static int ci_dpm_init(struct amdgpu_device *adev) | |||
6105 | } | 6117 | } |
6106 | 6118 | ||
6107 | static void | 6119 | static void |
6108 | ci_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, | 6120 | ci_dpm_debugfs_print_current_performance_level(void *handle, |
6109 | struct seq_file *m) | 6121 | struct seq_file *m) |
6110 | { | 6122 | { |
6123 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6111 | struct ci_power_info *pi = ci_get_pi(adev); | 6124 | struct ci_power_info *pi = ci_get_pi(adev); |
6112 | struct amdgpu_ps *rps = &pi->current_rps; | 6125 | struct amdgpu_ps *rps = &pi->current_rps; |
6113 | u32 sclk = ci_get_average_sclk_freq(adev); | 6126 | u32 sclk = ci_get_average_sclk_freq(adev); |
@@ -6131,12 +6144,13 @@ ci_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, | |||
6131 | seq_printf(m, "GPU load: %u %%\n", activity_percent); | 6144 | seq_printf(m, "GPU load: %u %%\n", activity_percent); |
6132 | } | 6145 | } |
6133 | 6146 | ||
6134 | static void ci_dpm_print_power_state(struct amdgpu_device *adev, | 6147 | static void ci_dpm_print_power_state(void *handle, void *current_ps) |
6135 | struct amdgpu_ps *rps) | ||
6136 | { | 6148 | { |
6149 | struct amdgpu_ps *rps = (struct amdgpu_ps *)current_ps; | ||
6137 | struct ci_ps *ps = ci_get_ps(rps); | 6150 | struct ci_ps *ps = ci_get_ps(rps); |
6138 | struct ci_pl *pl; | 6151 | struct ci_pl *pl; |
6139 | int i; | 6152 | int i; |
6153 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6140 | 6154 | ||
6141 | amdgpu_dpm_print_class_info(rps->class, rps->class2); | 6155 | amdgpu_dpm_print_class_info(rps->class, rps->class2); |
6142 | amdgpu_dpm_print_cap_info(rps->caps); | 6156 | amdgpu_dpm_print_cap_info(rps->caps); |
@@ -6158,20 +6172,23 @@ static inline bool ci_are_power_levels_equal(const struct ci_pl *ci_cpl1, | |||
6158 | (ci_cpl1->pcie_lane == ci_cpl2->pcie_lane)); | 6172 | (ci_cpl1->pcie_lane == ci_cpl2->pcie_lane)); |
6159 | } | 6173 | } |
6160 | 6174 | ||
6161 | static int ci_check_state_equal(struct amdgpu_device *adev, | 6175 | static int ci_check_state_equal(void *handle, |
6162 | struct amdgpu_ps *cps, | 6176 | void *current_ps, |
6163 | struct amdgpu_ps *rps, | 6177 | void *request_ps, |
6164 | bool *equal) | 6178 | bool *equal) |
6165 | { | 6179 | { |
6166 | struct ci_ps *ci_cps; | 6180 | struct ci_ps *ci_cps; |
6167 | struct ci_ps *ci_rps; | 6181 | struct ci_ps *ci_rps; |
6168 | int i; | 6182 | int i; |
6183 | struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps; | ||
6184 | struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps; | ||
6185 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6169 | 6186 | ||
6170 | if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) | 6187 | if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) |
6171 | return -EINVAL; | 6188 | return -EINVAL; |
6172 | 6189 | ||
6173 | ci_cps = ci_get_ps(cps); | 6190 | ci_cps = ci_get_ps((struct amdgpu_ps *)cps); |
6174 | ci_rps = ci_get_ps(rps); | 6191 | ci_rps = ci_get_ps((struct amdgpu_ps *)rps); |
6175 | 6192 | ||
6176 | if (ci_cps == NULL) { | 6193 | if (ci_cps == NULL) { |
6177 | *equal = false; | 6194 | *equal = false; |
@@ -6199,8 +6216,9 @@ static int ci_check_state_equal(struct amdgpu_device *adev, | |||
6199 | return 0; | 6216 | return 0; |
6200 | } | 6217 | } |
6201 | 6218 | ||
6202 | static u32 ci_dpm_get_sclk(struct amdgpu_device *adev, bool low) | 6219 | static u32 ci_dpm_get_sclk(void *handle, bool low) |
6203 | { | 6220 | { |
6221 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6204 | struct ci_power_info *pi = ci_get_pi(adev); | 6222 | struct ci_power_info *pi = ci_get_pi(adev); |
6205 | struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); | 6223 | struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); |
6206 | 6224 | ||
@@ -6210,8 +6228,9 @@ static u32 ci_dpm_get_sclk(struct amdgpu_device *adev, bool low) | |||
6210 | return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; | 6228 | return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; |
6211 | } | 6229 | } |
6212 | 6230 | ||
6213 | static u32 ci_dpm_get_mclk(struct amdgpu_device *adev, bool low) | 6231 | static u32 ci_dpm_get_mclk(void *handle, bool low) |
6214 | { | 6232 | { |
6233 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6215 | struct ci_power_info *pi = ci_get_pi(adev); | 6234 | struct ci_power_info *pi = ci_get_pi(adev); |
6216 | struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); | 6235 | struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); |
6217 | 6236 | ||
@@ -6222,10 +6241,11 @@ static u32 ci_dpm_get_mclk(struct amdgpu_device *adev, bool low) | |||
6222 | } | 6241 | } |
6223 | 6242 | ||
6224 | /* get temperature in millidegrees */ | 6243 | /* get temperature in millidegrees */ |
6225 | static int ci_dpm_get_temp(struct amdgpu_device *adev) | 6244 | static int ci_dpm_get_temp(void *handle) |
6226 | { | 6245 | { |
6227 | u32 temp; | 6246 | u32 temp; |
6228 | int actual_temp = 0; | 6247 | int actual_temp = 0; |
6248 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6229 | 6249 | ||
6230 | temp = (RREG32_SMC(ixCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> | 6250 | temp = (RREG32_SMC(ixCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> |
6231 | CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; | 6251 | CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; |
@@ -6261,7 +6281,6 @@ static int ci_dpm_early_init(void *handle) | |||
6261 | { | 6281 | { |
6262 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 6282 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
6263 | 6283 | ||
6264 | ci_dpm_set_dpm_funcs(adev); | ||
6265 | ci_dpm_set_irq_funcs(adev); | 6284 | ci_dpm_set_irq_funcs(adev); |
6266 | 6285 | ||
6267 | return 0; | 6286 | return 0; |
@@ -6551,9 +6570,10 @@ static int ci_dpm_set_powergating_state(void *handle, | |||
6551 | return 0; | 6570 | return 0; |
6552 | } | 6571 | } |
6553 | 6572 | ||
6554 | static int ci_dpm_print_clock_levels(struct amdgpu_device *adev, | 6573 | static int ci_dpm_print_clock_levels(void *handle, |
6555 | enum pp_clock_type type, char *buf) | 6574 | enum pp_clock_type type, char *buf) |
6556 | { | 6575 | { |
6576 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6557 | struct ci_power_info *pi = ci_get_pi(adev); | 6577 | struct ci_power_info *pi = ci_get_pi(adev); |
6558 | struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; | 6578 | struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; |
6559 | struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; | 6579 | struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; |
@@ -6618,9 +6638,10 @@ static int ci_dpm_print_clock_levels(struct amdgpu_device *adev, | |||
6618 | return size; | 6638 | return size; |
6619 | } | 6639 | } |
6620 | 6640 | ||
6621 | static int ci_dpm_force_clock_level(struct amdgpu_device *adev, | 6641 | static int ci_dpm_force_clock_level(void *handle, |
6622 | enum pp_clock_type type, uint32_t mask) | 6642 | enum pp_clock_type type, uint32_t mask) |
6623 | { | 6643 | { |
6644 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6624 | struct ci_power_info *pi = ci_get_pi(adev); | 6645 | struct ci_power_info *pi = ci_get_pi(adev); |
6625 | 6646 | ||
6626 | if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO | | 6647 | if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO | |
@@ -6664,8 +6685,9 @@ static int ci_dpm_force_clock_level(struct amdgpu_device *adev, | |||
6664 | return 0; | 6685 | return 0; |
6665 | } | 6686 | } |
6666 | 6687 | ||
6667 | static int ci_dpm_get_sclk_od(struct amdgpu_device *adev) | 6688 | static int ci_dpm_get_sclk_od(void *handle) |
6668 | { | 6689 | { |
6690 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6669 | struct ci_power_info *pi = ci_get_pi(adev); | 6691 | struct ci_power_info *pi = ci_get_pi(adev); |
6670 | struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table); | 6692 | struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table); |
6671 | struct ci_single_dpm_table *golden_sclk_table = | 6693 | struct ci_single_dpm_table *golden_sclk_table = |
@@ -6680,8 +6702,9 @@ static int ci_dpm_get_sclk_od(struct amdgpu_device *adev) | |||
6680 | return value; | 6702 | return value; |
6681 | } | 6703 | } |
6682 | 6704 | ||
6683 | static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value) | 6705 | static int ci_dpm_set_sclk_od(void *handle, uint32_t value) |
6684 | { | 6706 | { |
6707 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6685 | struct ci_power_info *pi = ci_get_pi(adev); | 6708 | struct ci_power_info *pi = ci_get_pi(adev); |
6686 | struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); | 6709 | struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); |
6687 | struct ci_single_dpm_table *golden_sclk_table = | 6710 | struct ci_single_dpm_table *golden_sclk_table = |
@@ -6698,8 +6721,9 @@ static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value) | |||
6698 | return 0; | 6721 | return 0; |
6699 | } | 6722 | } |
6700 | 6723 | ||
6701 | static int ci_dpm_get_mclk_od(struct amdgpu_device *adev) | 6724 | static int ci_dpm_get_mclk_od(void *handle) |
6702 | { | 6725 | { |
6726 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6703 | struct ci_power_info *pi = ci_get_pi(adev); | 6727 | struct ci_power_info *pi = ci_get_pi(adev); |
6704 | struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table); | 6728 | struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table); |
6705 | struct ci_single_dpm_table *golden_mclk_table = | 6729 | struct ci_single_dpm_table *golden_mclk_table = |
@@ -6714,8 +6738,9 @@ static int ci_dpm_get_mclk_od(struct amdgpu_device *adev) | |||
6714 | return value; | 6738 | return value; |
6715 | } | 6739 | } |
6716 | 6740 | ||
6717 | static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) | 6741 | static int ci_dpm_set_mclk_od(void *handle, uint32_t value) |
6718 | { | 6742 | { |
6743 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6719 | struct ci_power_info *pi = ci_get_pi(adev); | 6744 | struct ci_power_info *pi = ci_get_pi(adev); |
6720 | struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); | 6745 | struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); |
6721 | struct ci_single_dpm_table *golden_mclk_table = | 6746 | struct ci_single_dpm_table *golden_mclk_table = |
@@ -6732,9 +6757,10 @@ static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) | |||
6732 | return 0; | 6757 | return 0; |
6733 | } | 6758 | } |
6734 | 6759 | ||
6735 | static int ci_dpm_get_power_profile_state(struct amdgpu_device *adev, | 6760 | static int ci_dpm_get_power_profile_state(void *handle, |
6736 | struct amd_pp_profile *query) | 6761 | struct amd_pp_profile *query) |
6737 | { | 6762 | { |
6763 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6738 | struct ci_power_info *pi = ci_get_pi(adev); | 6764 | struct ci_power_info *pi = ci_get_pi(adev); |
6739 | 6765 | ||
6740 | if (!pi || !query) | 6766 | if (!pi || !query) |
@@ -6851,9 +6877,10 @@ static int ci_set_power_profile_state(struct amdgpu_device *adev, | |||
6851 | return result; | 6877 | return result; |
6852 | } | 6878 | } |
6853 | 6879 | ||
6854 | static int ci_dpm_set_power_profile_state(struct amdgpu_device *adev, | 6880 | static int ci_dpm_set_power_profile_state(void *handle, |
6855 | struct amd_pp_profile *request) | 6881 | struct amd_pp_profile *request) |
6856 | { | 6882 | { |
6883 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6857 | struct ci_power_info *pi = ci_get_pi(adev); | 6884 | struct ci_power_info *pi = ci_get_pi(adev); |
6858 | int ret = -1; | 6885 | int ret = -1; |
6859 | 6886 | ||
@@ -6906,9 +6933,10 @@ static int ci_dpm_set_power_profile_state(struct amdgpu_device *adev, | |||
6906 | return 0; | 6933 | return 0; |
6907 | } | 6934 | } |
6908 | 6935 | ||
6909 | static int ci_dpm_reset_power_profile_state(struct amdgpu_device *adev, | 6936 | static int ci_dpm_reset_power_profile_state(void *handle, |
6910 | struct amd_pp_profile *request) | 6937 | struct amd_pp_profile *request) |
6911 | { | 6938 | { |
6939 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6912 | struct ci_power_info *pi = ci_get_pi(adev); | 6940 | struct ci_power_info *pi = ci_get_pi(adev); |
6913 | 6941 | ||
6914 | if (!pi || !request) | 6942 | if (!pi || !request) |
@@ -6927,9 +6955,10 @@ static int ci_dpm_reset_power_profile_state(struct amdgpu_device *adev, | |||
6927 | return -EINVAL; | 6955 | return -EINVAL; |
6928 | } | 6956 | } |
6929 | 6957 | ||
6930 | static int ci_dpm_switch_power_profile(struct amdgpu_device *adev, | 6958 | static int ci_dpm_switch_power_profile(void *handle, |
6931 | enum amd_pp_profile_type type) | 6959 | enum amd_pp_profile_type type) |
6932 | { | 6960 | { |
6961 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6933 | struct ci_power_info *pi = ci_get_pi(adev); | 6962 | struct ci_power_info *pi = ci_get_pi(adev); |
6934 | struct amd_pp_profile request = {0}; | 6963 | struct amd_pp_profile request = {0}; |
6935 | 6964 | ||
@@ -6944,11 +6973,12 @@ static int ci_dpm_switch_power_profile(struct amdgpu_device *adev, | |||
6944 | return 0; | 6973 | return 0; |
6945 | } | 6974 | } |
6946 | 6975 | ||
6947 | static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx, | 6976 | static int ci_dpm_read_sensor(void *handle, int idx, |
6948 | void *value, int *size) | 6977 | void *value, int *size) |
6949 | { | 6978 | { |
6950 | u32 activity_percent = 50; | 6979 | u32 activity_percent = 50; |
6951 | int ret; | 6980 | int ret; |
6981 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6952 | 6982 | ||
6953 | /* size must be at least 4 bytes for all sensors */ | 6983 | /* size must be at least 4 bytes for all sensors */ |
6954 | if (*size < 4) | 6984 | if (*size < 4) |
@@ -7003,7 +7033,7 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = { | |||
7003 | .set_powergating_state = ci_dpm_set_powergating_state, | 7033 | .set_powergating_state = ci_dpm_set_powergating_state, |
7004 | }; | 7034 | }; |
7005 | 7035 | ||
7006 | static const struct amdgpu_dpm_funcs ci_dpm_funcs = { | 7036 | const struct amd_pm_funcs ci_dpm_funcs = { |
7007 | .get_temperature = &ci_dpm_get_temp, | 7037 | .get_temperature = &ci_dpm_get_temp, |
7008 | .pre_set_power_state = &ci_dpm_pre_set_power_state, | 7038 | .pre_set_power_state = &ci_dpm_pre_set_power_state, |
7009 | .set_power_state = &ci_dpm_set_power_state, | 7039 | .set_power_state = &ci_dpm_set_power_state, |
@@ -7035,12 +7065,6 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = { | |||
7035 | .read_sensor = ci_dpm_read_sensor, | 7065 | .read_sensor = ci_dpm_read_sensor, |
7036 | }; | 7066 | }; |
7037 | 7067 | ||
7038 | static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev) | ||
7039 | { | ||
7040 | if (adev->pm.funcs == NULL) | ||
7041 | adev->pm.funcs = &ci_dpm_funcs; | ||
7042 | } | ||
7043 | |||
7044 | static const struct amdgpu_irq_src_funcs ci_dpm_irq_funcs = { | 7068 | static const struct amdgpu_irq_src_funcs ci_dpm_irq_funcs = { |
7045 | .set = ci_dpm_set_interrupt_state, | 7069 | .set = ci_dpm_set_interrupt_state, |
7046 | .process = ci_dpm_process_interrupt, | 7070 | .process = ci_dpm_process_interrupt, |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h index b1c8e7b446ea..c7b4349f6319 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h | |||
@@ -26,5 +26,6 @@ | |||
26 | 26 | ||
27 | extern const struct amd_ip_funcs ci_dpm_ip_funcs; | 27 | extern const struct amd_ip_funcs ci_dpm_ip_funcs; |
28 | extern const struct amd_ip_funcs kv_dpm_ip_funcs; | 28 | extern const struct amd_ip_funcs kv_dpm_ip_funcs; |
29 | 29 | extern const struct amd_pm_funcs ci_dpm_funcs; | |
30 | extern const struct amd_pm_funcs kv_dpm_funcs; | ||
30 | #endif | 31 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index b8918432c572..07d3d895da10 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c | |||
@@ -228,6 +228,19 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) | |||
228 | * [127:96] - reserved | 228 | * [127:96] - reserved |
229 | */ | 229 | */ |
230 | 230 | ||
231 | /** | ||
232 | * cik_ih_prescreen_iv - prescreen an interrupt vector | ||
233 | * | ||
234 | * @adev: amdgpu_device pointer | ||
235 | * | ||
236 | * Returns true if the interrupt vector should be further processed. | ||
237 | */ | ||
238 | static bool cik_ih_prescreen_iv(struct amdgpu_device *adev) | ||
239 | { | ||
240 | /* Process all interrupts */ | ||
241 | return true; | ||
242 | } | ||
243 | |||
231 | /** | 244 | /** |
232 | * cik_ih_decode_iv - decode an interrupt vector | 245 | * cik_ih_decode_iv - decode an interrupt vector |
233 | * | 246 | * |
@@ -433,6 +446,7 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { | |||
433 | 446 | ||
434 | static const struct amdgpu_ih_funcs cik_ih_funcs = { | 447 | static const struct amdgpu_ih_funcs cik_ih_funcs = { |
435 | .get_wptr = cik_ih_get_wptr, | 448 | .get_wptr = cik_ih_get_wptr, |
449 | .prescreen_iv = cik_ih_prescreen_iv, | ||
436 | .decode_iv = cik_ih_decode_iv, | 450 | .decode_iv = cik_ih_decode_iv, |
437 | .set_rptr = cik_ih_set_rptr | 451 | .set_rptr = cik_ih_set_rptr |
438 | }; | 452 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index f508f4d01e4a..60cecd117705 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c | |||
@@ -1387,8 +1387,13 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) | |||
1387 | } | 1387 | } |
1388 | 1388 | ||
1389 | static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { | 1389 | static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { |
1390 | .copy_pte_num_dw = 7, | ||
1390 | .copy_pte = cik_sdma_vm_copy_pte, | 1391 | .copy_pte = cik_sdma_vm_copy_pte, |
1392 | |||
1391 | .write_pte = cik_sdma_vm_write_pte, | 1393 | .write_pte = cik_sdma_vm_write_pte, |
1394 | |||
1395 | .set_max_nums_pte_pde = 0x1fffff >> 3, | ||
1396 | .set_pte_pde_num_dw = 10, | ||
1392 | .set_pte_pde = cik_sdma_vm_set_pte_pde, | 1397 | .set_pte_pde = cik_sdma_vm_set_pte_pde, |
1393 | }; | 1398 | }; |
1394 | 1399 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 0c1209cdd1cb..b6cdf4afaf46 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c | |||
@@ -208,6 +208,19 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) | |||
208 | } | 208 | } |
209 | 209 | ||
210 | /** | 210 | /** |
211 | * cz_ih_prescreen_iv - prescreen an interrupt vector | ||
212 | * | ||
213 | * @adev: amdgpu_device pointer | ||
214 | * | ||
215 | * Returns true if the interrupt vector should be further processed. | ||
216 | */ | ||
217 | static bool cz_ih_prescreen_iv(struct amdgpu_device *adev) | ||
218 | { | ||
219 | /* Process all interrupts */ | ||
220 | return true; | ||
221 | } | ||
222 | |||
223 | /** | ||
211 | * cz_ih_decode_iv - decode an interrupt vector | 224 | * cz_ih_decode_iv - decode an interrupt vector |
212 | * | 225 | * |
213 | * @adev: amdgpu_device pointer | 226 | * @adev: amdgpu_device pointer |
@@ -414,6 +427,7 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { | |||
414 | 427 | ||
415 | static const struct amdgpu_ih_funcs cz_ih_funcs = { | 428 | static const struct amdgpu_ih_funcs cz_ih_funcs = { |
416 | .get_wptr = cz_ih_get_wptr, | 429 | .get_wptr = cz_ih_get_wptr, |
430 | .prescreen_iv = cz_ih_prescreen_iv, | ||
417 | .decode_iv = cz_ih_decode_iv, | 431 | .decode_iv = cz_ih_decode_iv, |
418 | .set_rptr = cz_ih_set_rptr | 432 | .set_rptr = cz_ih_set_rptr |
419 | }; | 433 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index d228f5a99044..dbbe986f90f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | |||
@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) | |||
636 | NUM_BANKS(ADDR_SURF_2_BANK); | 636 | NUM_BANKS(ADDR_SURF_2_BANK); |
637 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) | 637 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
638 | WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); | 638 | WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); |
639 | } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { | 639 | } else if (adev->asic_type == CHIP_OLAND) { |
640 | tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
641 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
642 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
643 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | ||
644 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
645 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
646 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
647 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
648 | tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
649 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
650 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
651 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | ||
652 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
653 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
654 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
655 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
656 | tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
657 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
658 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
659 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
660 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
661 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
662 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
663 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
664 | tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
665 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
666 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
667 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | ||
668 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
669 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
670 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
671 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
672 | tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
673 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
674 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
675 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | ||
676 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
677 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
678 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
679 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
680 | tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
681 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
682 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
683 | TILE_SPLIT(split_equal_to_row_size) | | ||
684 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
685 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
686 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
687 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
688 | tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
689 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
690 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
691 | TILE_SPLIT(split_equal_to_row_size) | | ||
692 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
693 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
694 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
695 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
696 | tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | ||
697 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
698 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
699 | TILE_SPLIT(split_equal_to_row_size) | | ||
700 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
701 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
702 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
703 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
704 | tilemode[8] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
705 | ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | | ||
706 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
707 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | ||
708 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
709 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
710 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
711 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
712 | tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
713 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
714 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
715 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | ||
716 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
717 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
718 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
719 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
720 | tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
721 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
722 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
723 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
724 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
725 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
726 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
727 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
728 | tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
729 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
730 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
731 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
732 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
733 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
734 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
735 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
736 | tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
737 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
738 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
739 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | ||
740 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
741 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
742 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
743 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
744 | tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
745 | ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
746 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
747 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | ||
748 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
749 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
750 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
751 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
752 | tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
753 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
754 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
755 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
756 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
757 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
758 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
759 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
760 | tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
761 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
762 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
763 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
764 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
765 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
766 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
767 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
768 | tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
769 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
770 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
771 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | ||
772 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
773 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
774 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
775 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
776 | tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
777 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
778 | PIPE_CONFIG(ADDR_SURF_P4_8x16) | | ||
779 | TILE_SPLIT(split_equal_to_row_size) | | ||
780 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
781 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
782 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
783 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
784 | tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
785 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
786 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | ||
787 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
788 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
789 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
790 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
791 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
792 | tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
793 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
794 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | ||
795 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
796 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
797 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
798 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
799 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); | ||
800 | tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
801 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
802 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | ||
803 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
804 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
805 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
806 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
807 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
808 | tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
809 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
810 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | ||
811 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | ||
812 | NUM_BANKS(ADDR_SURF_16_BANK) | | ||
813 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
814 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
815 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); | ||
816 | tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | | ||
817 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
818 | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | | ||
819 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | | ||
820 | NUM_BANKS(ADDR_SURF_8_BANK) | | ||
821 | BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
822 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
823 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1); | ||
824 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) | ||
825 | WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); | ||
826 | } else if (adev->asic_type == CHIP_HAINAN) { | ||
640 | tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | | 827 | tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | |
641 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 828 | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
642 | PIPE_CONFIG(ADDR_SURF_P2) | | 829 | PIPE_CONFIG(ADDR_SURF_P2) | |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 53a4af7596c1..00868764a0dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
@@ -1921,6 +1921,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) | |||
1921 | ELEMENT_SIZE, 1); | 1921 | ELEMENT_SIZE, 1); |
1922 | sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, | 1922 | sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, |
1923 | INDEX_STRIDE, 3); | 1923 | INDEX_STRIDE, 3); |
1924 | WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); | ||
1924 | 1925 | ||
1925 | mutex_lock(&adev->srbm_mutex); | 1926 | mutex_lock(&adev->srbm_mutex); |
1926 | for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { | 1927 | for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { |
@@ -1934,7 +1935,6 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) | |||
1934 | WREG32(mmSH_MEM_APE1_BASE, 1); | 1935 | WREG32(mmSH_MEM_APE1_BASE, 1); |
1935 | WREG32(mmSH_MEM_APE1_LIMIT, 0); | 1936 | WREG32(mmSH_MEM_APE1_LIMIT, 0); |
1936 | WREG32(mmSH_MEM_BASES, sh_mem_base); | 1937 | WREG32(mmSH_MEM_BASES, sh_mem_base); |
1937 | WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); | ||
1938 | } | 1938 | } |
1939 | cik_srbm_select(adev, 0, 0, 0, 0); | 1939 | cik_srbm_select(adev, 0, 0, 0, 0); |
1940 | mutex_unlock(&adev->srbm_mutex); | 1940 | mutex_unlock(&adev->srbm_mutex); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0710b0b2e4b6..dfc10b1baea0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -125,24 +125,39 @@ MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); | |||
125 | MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); | 125 | MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); |
126 | 126 | ||
127 | MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); | 127 | MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); |
128 | MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); | ||
128 | MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); | 129 | MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); |
130 | MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); | ||
129 | MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); | 131 | MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); |
132 | MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); | ||
130 | MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); | 133 | MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); |
134 | MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); | ||
131 | MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); | 135 | MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); |
136 | MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); | ||
132 | MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); | 137 | MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); |
133 | 138 | ||
134 | MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); | 139 | MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); |
140 | MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); | ||
135 | MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); | 141 | MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); |
142 | MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); | ||
136 | MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); | 143 | MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); |
144 | MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); | ||
137 | MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); | 145 | MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); |
146 | MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); | ||
138 | MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); | 147 | MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); |
148 | MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); | ||
139 | MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); | 149 | MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); |
140 | 150 | ||
141 | MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); | 151 | MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); |
152 | MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); | ||
142 | MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); | 153 | MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); |
154 | MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); | ||
143 | MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); | 155 | MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); |
156 | MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); | ||
144 | MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); | 157 | MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); |
158 | MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); | ||
145 | MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); | 159 | MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); |
160 | MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); | ||
146 | MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); | 161 | MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); |
147 | 162 | ||
148 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = | 163 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = |
@@ -918,8 +933,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
918 | BUG(); | 933 | BUG(); |
919 | } | 934 | } |
920 | 935 | ||
921 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); | 936 | if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { |
922 | err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); | 937 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); |
938 | err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); | ||
939 | if (err == -ENOENT) { | ||
940 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); | ||
941 | err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); | ||
942 | } | ||
943 | } else { | ||
944 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); | ||
945 | err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); | ||
946 | } | ||
923 | if (err) | 947 | if (err) |
924 | goto out; | 948 | goto out; |
925 | err = amdgpu_ucode_validate(adev->gfx.pfp_fw); | 949 | err = amdgpu_ucode_validate(adev->gfx.pfp_fw); |
@@ -929,8 +953,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
929 | adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); | 953 | adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); |
930 | adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); | 954 | adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); |
931 | 955 | ||
932 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); | 956 | if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { |
933 | err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); | 957 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); |
958 | err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); | ||
959 | if (err == -ENOENT) { | ||
960 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); | ||
961 | err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); | ||
962 | } | ||
963 | } else { | ||
964 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); | ||
965 | err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); | ||
966 | } | ||
934 | if (err) | 967 | if (err) |
935 | goto out; | 968 | goto out; |
936 | err = amdgpu_ucode_validate(adev->gfx.me_fw); | 969 | err = amdgpu_ucode_validate(adev->gfx.me_fw); |
@@ -941,8 +974,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
941 | 974 | ||
942 | adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); | 975 | adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); |
943 | 976 | ||
944 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); | 977 | if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { |
945 | err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); | 978 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); |
979 | err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); | ||
980 | if (err == -ENOENT) { | ||
981 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); | ||
982 | err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); | ||
983 | } | ||
984 | } else { | ||
985 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); | ||
986 | err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); | ||
987 | } | ||
946 | if (err) | 988 | if (err) |
947 | goto out; | 989 | goto out; |
948 | err = amdgpu_ucode_validate(adev->gfx.ce_fw); | 990 | err = amdgpu_ucode_validate(adev->gfx.ce_fw); |
@@ -1012,8 +1054,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
1012 | for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) | 1054 | for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) |
1013 | adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); | 1055 | adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); |
1014 | 1056 | ||
1015 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | 1057 | if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { |
1016 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); | 1058 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); |
1059 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); | ||
1060 | if (err == -ENOENT) { | ||
1061 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | ||
1062 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); | ||
1063 | } | ||
1064 | } else { | ||
1065 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | ||
1066 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); | ||
1067 | } | ||
1017 | if (err) | 1068 | if (err) |
1018 | goto out; | 1069 | goto out; |
1019 | err = amdgpu_ucode_validate(adev->gfx.mec_fw); | 1070 | err = amdgpu_ucode_validate(adev->gfx.mec_fw); |
@@ -1025,8 +1076,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
1025 | 1076 | ||
1026 | if ((adev->asic_type != CHIP_STONEY) && | 1077 | if ((adev->asic_type != CHIP_STONEY) && |
1027 | (adev->asic_type != CHIP_TOPAZ)) { | 1078 | (adev->asic_type != CHIP_TOPAZ)) { |
1028 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); | 1079 | if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { |
1029 | err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); | 1080 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); |
1081 | err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); | ||
1082 | if (err == -ENOENT) { | ||
1083 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); | ||
1084 | err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); | ||
1085 | } | ||
1086 | } else { | ||
1087 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); | ||
1088 | err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); | ||
1089 | } | ||
1030 | if (!err) { | 1090 | if (!err) { |
1031 | err = amdgpu_ucode_validate(adev->gfx.mec2_fw); | 1091 | err = amdgpu_ucode_validate(adev->gfx.mec2_fw); |
1032 | if (err) | 1092 | if (err) |
@@ -2053,6 +2113,7 @@ static int gfx_v8_0_sw_fini(void *handle) | |||
2053 | amdgpu_gfx_compute_mqd_sw_fini(adev); | 2113 | amdgpu_gfx_compute_mqd_sw_fini(adev); |
2054 | amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); | 2114 | amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); |
2055 | amdgpu_gfx_kiq_fini(adev); | 2115 | amdgpu_gfx_kiq_fini(adev); |
2116 | amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL); | ||
2056 | 2117 | ||
2057 | gfx_v8_0_mec_fini(adev); | 2118 | gfx_v8_0_mec_fini(adev); |
2058 | gfx_v8_0_rlc_fini(adev); | 2119 | gfx_v8_0_rlc_fini(adev); |
@@ -3707,6 +3768,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) | |||
3707 | ELEMENT_SIZE, 1); | 3768 | ELEMENT_SIZE, 1); |
3708 | sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, | 3769 | sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, |
3709 | INDEX_STRIDE, 3); | 3770 | INDEX_STRIDE, 3); |
3771 | WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); | ||
3772 | |||
3710 | mutex_lock(&adev->srbm_mutex); | 3773 | mutex_lock(&adev->srbm_mutex); |
3711 | for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { | 3774 | for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { |
3712 | vi_srbm_select(adev, 0, 0, 0, i); | 3775 | vi_srbm_select(adev, 0, 0, 0, i); |
@@ -3730,7 +3793,6 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) | |||
3730 | 3793 | ||
3731 | WREG32(mmSH_MEM_APE1_BASE, 1); | 3794 | WREG32(mmSH_MEM_APE1_BASE, 1); |
3732 | WREG32(mmSH_MEM_APE1_LIMIT, 0); | 3795 | WREG32(mmSH_MEM_APE1_LIMIT, 0); |
3733 | WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); | ||
3734 | } | 3796 | } |
3735 | vi_srbm_select(adev, 0, 0, 0, 0); | 3797 | vi_srbm_select(adev, 0, 0, 0, 0); |
3736 | mutex_unlock(&adev->srbm_mutex); | 3798 | mutex_unlock(&adev->srbm_mutex); |
@@ -4576,12 +4638,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) | |||
4576 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; | 4638 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; |
4577 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; | 4639 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; |
4578 | mqd->compute_misc_reserved = 0x00000003; | 4640 | mqd->compute_misc_reserved = 0x00000003; |
4579 | if (!(adev->flags & AMD_IS_APU)) { | 4641 | mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr |
4580 | mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr | 4642 | + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); |
4581 | + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); | 4643 | mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr |
4582 | mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr | 4644 | + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); |
4583 | + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); | ||
4584 | } | ||
4585 | eop_base_addr = ring->eop_gpu_addr >> 8; | 4645 | eop_base_addr = ring->eop_gpu_addr >> 8; |
4586 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; | 4646 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; |
4587 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); | 4647 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); |
@@ -4752,7 +4812,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) | |||
4752 | 4812 | ||
4753 | gfx_v8_0_kiq_setting(ring); | 4813 | gfx_v8_0_kiq_setting(ring); |
4754 | 4814 | ||
4755 | if (adev->gfx.in_reset) { /* for GPU_RESET case */ | 4815 | if (adev->in_sriov_reset) { /* for GPU_RESET case */ |
4756 | /* reset MQD to a clean status */ | 4816 | /* reset MQD to a clean status */ |
4757 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 4817 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
4758 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); | 4818 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); |
@@ -4767,8 +4827,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) | |||
4767 | mutex_unlock(&adev->srbm_mutex); | 4827 | mutex_unlock(&adev->srbm_mutex); |
4768 | } else { | 4828 | } else { |
4769 | memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); | 4829 | memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); |
4770 | ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; | 4830 | ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; |
4771 | ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; | 4831 | ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; |
4772 | mutex_lock(&adev->srbm_mutex); | 4832 | mutex_lock(&adev->srbm_mutex); |
4773 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | 4833 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
4774 | gfx_v8_0_mqd_init(ring); | 4834 | gfx_v8_0_mqd_init(ring); |
@@ -4789,10 +4849,10 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) | |||
4789 | struct vi_mqd *mqd = ring->mqd_ptr; | 4849 | struct vi_mqd *mqd = ring->mqd_ptr; |
4790 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; | 4850 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; |
4791 | 4851 | ||
4792 | if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { | 4852 | if (!adev->in_sriov_reset && !adev->gfx.in_suspend) { |
4793 | memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); | 4853 | memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); |
4794 | ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; | 4854 | ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; |
4795 | ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; | 4855 | ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; |
4796 | mutex_lock(&adev->srbm_mutex); | 4856 | mutex_lock(&adev->srbm_mutex); |
4797 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | 4857 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
4798 | gfx_v8_0_mqd_init(ring); | 4858 | gfx_v8_0_mqd_init(ring); |
@@ -4801,7 +4861,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) | |||
4801 | 4861 | ||
4802 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 4862 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
4803 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); | 4863 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); |
4804 | } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ | 4864 | } else if (adev->in_sriov_reset) { /* for GPU_RESET case */ |
4805 | /* reset MQD to a clean status */ | 4865 | /* reset MQD to a clean status */ |
4806 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 4866 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
4807 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); | 4867 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); |
@@ -4974,12 +5034,69 @@ static int gfx_v8_0_hw_init(void *handle) | |||
4974 | return r; | 5034 | return r; |
4975 | } | 5035 | } |
4976 | 5036 | ||
5037 | static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) | ||
5038 | { | ||
5039 | struct amdgpu_device *adev = kiq_ring->adev; | ||
5040 | uint32_t scratch, tmp = 0; | ||
5041 | int r, i; | ||
5042 | |||
5043 | r = amdgpu_gfx_scratch_get(adev, &scratch); | ||
5044 | if (r) { | ||
5045 | DRM_ERROR("Failed to get scratch reg (%d).\n", r); | ||
5046 | return r; | ||
5047 | } | ||
5048 | WREG32(scratch, 0xCAFEDEAD); | ||
5049 | |||
5050 | r = amdgpu_ring_alloc(kiq_ring, 10); | ||
5051 | if (r) { | ||
5052 | DRM_ERROR("Failed to lock KIQ (%d).\n", r); | ||
5053 | amdgpu_gfx_scratch_free(adev, scratch); | ||
5054 | return r; | ||
5055 | } | ||
5056 | |||
5057 | /* unmap queues */ | ||
5058 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); | ||
5059 | amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ | ||
5060 | PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ | ||
5061 | PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | | ||
5062 | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | | ||
5063 | PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); | ||
5064 | amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); | ||
5065 | amdgpu_ring_write(kiq_ring, 0); | ||
5066 | amdgpu_ring_write(kiq_ring, 0); | ||
5067 | amdgpu_ring_write(kiq_ring, 0); | ||
5068 | /* write to scratch for completion */ | ||
5069 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | ||
5070 | amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | ||
5071 | amdgpu_ring_write(kiq_ring, 0xDEADBEEF); | ||
5072 | amdgpu_ring_commit(kiq_ring); | ||
5073 | |||
5074 | for (i = 0; i < adev->usec_timeout; i++) { | ||
5075 | tmp = RREG32(scratch); | ||
5076 | if (tmp == 0xDEADBEEF) | ||
5077 | break; | ||
5078 | DRM_UDELAY(1); | ||
5079 | } | ||
5080 | if (i >= adev->usec_timeout) { | ||
5081 | DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); | ||
5082 | r = -EINVAL; | ||
5083 | } | ||
5084 | amdgpu_gfx_scratch_free(adev, scratch); | ||
5085 | return r; | ||
5086 | } | ||
5087 | |||
4977 | static int gfx_v8_0_hw_fini(void *handle) | 5088 | static int gfx_v8_0_hw_fini(void *handle) |
4978 | { | 5089 | { |
4979 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 5090 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
5091 | int i; | ||
4980 | 5092 | ||
4981 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); | 5093 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); |
4982 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); | 5094 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); |
5095 | |||
5096 | /* disable KCQ to avoid CPC touch memory not valid anymore */ | ||
5097 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | ||
5098 | gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); | ||
5099 | |||
4983 | if (amdgpu_sriov_vf(adev)) { | 5100 | if (amdgpu_sriov_vf(adev)) { |
4984 | pr_debug("For SRIOV client, shouldn't do anything.\n"); | 5101 | pr_debug("For SRIOV client, shouldn't do anything.\n"); |
4985 | return 0; | 5102 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b39f81dda847..deeaee1457ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
@@ -66,38 +66,70 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); | |||
66 | 66 | ||
67 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = | 67 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = |
68 | { | 68 | { |
69 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), | 69 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), |
70 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, | 70 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), |
71 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), | 71 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), |
72 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, | 72 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) }, |
73 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), | 73 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), |
74 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, | 74 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), |
75 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), | 75 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), |
76 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)}, | 76 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1) }, |
77 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), | 77 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), |
78 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)}, | 78 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), |
79 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), | 79 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), |
80 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)}, | 80 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2) }, |
81 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), | 81 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), |
82 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)}, | 82 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), |
83 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), | 83 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), |
84 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)}, | 84 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3) }, |
85 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), | 85 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), |
86 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)}, | 86 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), |
87 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), | 87 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), |
88 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)}, | 88 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4) }, |
89 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), | 89 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), |
90 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)}, | 90 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), |
91 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), | 91 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), |
92 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)}, | 92 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5) }, |
93 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), | 93 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), |
94 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, | 94 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), |
95 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), | 95 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), |
96 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)}, | 96 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6) }, |
97 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), | 97 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), |
98 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)}, | 98 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), |
99 | {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), | 99 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), |
100 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)} | 100 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7) }, |
101 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), | ||
102 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), | ||
103 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), | ||
104 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8) }, | ||
105 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), | ||
106 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), | ||
107 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), | ||
108 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9) }, | ||
109 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), | ||
110 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), | ||
111 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), | ||
112 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10) }, | ||
113 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), | ||
114 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), | ||
115 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), | ||
116 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11) }, | ||
117 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), | ||
118 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), | ||
119 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), | ||
120 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, | ||
121 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), | ||
122 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), | ||
123 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), | ||
124 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13) }, | ||
125 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), | ||
126 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), | ||
127 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), | ||
128 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14) }, | ||
129 | { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), | ||
130 | SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), | ||
131 | SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), | ||
132 | SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15) } | ||
101 | }; | 133 | }; |
102 | 134 | ||
103 | static const u32 golden_settings_gc_9_0[] = | 135 | static const u32 golden_settings_gc_9_0[] = |
@@ -352,6 +384,25 @@ err1: | |||
352 | return r; | 384 | return r; |
353 | } | 385 | } |
354 | 386 | ||
387 | |||
388 | static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) | ||
389 | { | ||
390 | release_firmware(adev->gfx.pfp_fw); | ||
391 | adev->gfx.pfp_fw = NULL; | ||
392 | release_firmware(adev->gfx.me_fw); | ||
393 | adev->gfx.me_fw = NULL; | ||
394 | release_firmware(adev->gfx.ce_fw); | ||
395 | adev->gfx.ce_fw = NULL; | ||
396 | release_firmware(adev->gfx.rlc_fw); | ||
397 | adev->gfx.rlc_fw = NULL; | ||
398 | release_firmware(adev->gfx.mec_fw); | ||
399 | adev->gfx.mec_fw = NULL; | ||
400 | release_firmware(adev->gfx.mec2_fw); | ||
401 | adev->gfx.mec2_fw = NULL; | ||
402 | |||
403 | kfree(adev->gfx.rlc.register_list_format); | ||
404 | } | ||
405 | |||
355 | static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | 406 | static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) |
356 | { | 407 | { |
357 | const char *chip_name; | 408 | const char *chip_name; |
@@ -1120,30 +1171,22 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) | |||
1120 | { | 1171 | { |
1121 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; | 1172 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; |
1122 | int r; | 1173 | int r; |
1123 | u32 data; | 1174 | u32 data, base; |
1124 | u32 size; | ||
1125 | u32 base; | ||
1126 | 1175 | ||
1127 | if (!amdgpu_ngg) | 1176 | if (!amdgpu_ngg) |
1128 | return 0; | 1177 | return 0; |
1129 | 1178 | ||
1130 | /* Program buffer size */ | 1179 | /* Program buffer size */ |
1131 | data = 0; | 1180 | data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, |
1132 | size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; | 1181 | adev->gfx.ngg.buf[NGG_PRIM].size >> 8); |
1133 | data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); | 1182 | data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, |
1134 | 1183 | adev->gfx.ngg.buf[NGG_POS].size >> 8); | |
1135 | size = adev->gfx.ngg.buf[NGG_POS].size / 256; | ||
1136 | data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); | ||
1137 | |||
1138 | WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); | 1184 | WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); |
1139 | 1185 | ||
1140 | data = 0; | 1186 | data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, |
1141 | size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; | 1187 | adev->gfx.ngg.buf[NGG_CNTL].size >> 8); |
1142 | data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); | 1188 | data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, |
1143 | 1189 | adev->gfx.ngg.buf[NGG_PARAM].size >> 10); | |
1144 | size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024; | ||
1145 | data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); | ||
1146 | |||
1147 | WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); | 1190 | WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); |
1148 | 1191 | ||
1149 | /* Program buffer base address */ | 1192 | /* Program buffer base address */ |
@@ -1306,7 +1349,10 @@ static int gfx_v9_0_sw_init(void *handle) | |||
1306 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | 1349 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
1307 | ring = &adev->gfx.gfx_ring[i]; | 1350 | ring = &adev->gfx.gfx_ring[i]; |
1308 | ring->ring_obj = NULL; | 1351 | ring->ring_obj = NULL; |
1309 | sprintf(ring->name, "gfx"); | 1352 | if (!i) |
1353 | sprintf(ring->name, "gfx"); | ||
1354 | else | ||
1355 | sprintf(ring->name, "gfx_%d", i); | ||
1310 | ring->use_doorbell = true; | 1356 | ring->use_doorbell = true; |
1311 | ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; | 1357 | ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; |
1312 | r = amdgpu_ring_init(adev, ring, 1024, | 1358 | r = amdgpu_ring_init(adev, ring, 1024, |
@@ -1346,7 +1392,7 @@ static int gfx_v9_0_sw_init(void *handle) | |||
1346 | return r; | 1392 | return r; |
1347 | 1393 | ||
1348 | /* create MQD for all compute queues as wel as KIQ for SRIOV case */ | 1394 | /* create MQD for all compute queues as wel as KIQ for SRIOV case */ |
1349 | r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd)); | 1395 | r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); |
1350 | if (r) | 1396 | if (r) |
1351 | return r; | 1397 | return r; |
1352 | 1398 | ||
@@ -1398,9 +1444,11 @@ static int gfx_v9_0_sw_fini(void *handle) | |||
1398 | amdgpu_gfx_compute_mqd_sw_fini(adev); | 1444 | amdgpu_gfx_compute_mqd_sw_fini(adev); |
1399 | amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); | 1445 | amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); |
1400 | amdgpu_gfx_kiq_fini(adev); | 1446 | amdgpu_gfx_kiq_fini(adev); |
1447 | amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL); | ||
1401 | 1448 | ||
1402 | gfx_v9_0_mec_fini(adev); | 1449 | gfx_v9_0_mec_fini(adev); |
1403 | gfx_v9_0_ngg_fini(adev); | 1450 | gfx_v9_0_ngg_fini(adev); |
1451 | gfx_v9_0_free_microcode(adev); | ||
1404 | 1452 | ||
1405 | return 0; | 1453 | return 0; |
1406 | } | 1454 | } |
@@ -1740,11 +1788,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
1740 | 1788 | ||
1741 | static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) | 1789 | static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) |
1742 | { | 1790 | { |
1743 | u32 tmp = 0; | 1791 | WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); |
1744 | |||
1745 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); | ||
1746 | tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; | ||
1747 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); | ||
1748 | } | 1792 | } |
1749 | 1793 | ||
1750 | static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, | 1794 | static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, |
@@ -1822,16 +1866,11 @@ static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev | |||
1822 | uint32_t default_data = 0; | 1866 | uint32_t default_data = 0; |
1823 | 1867 | ||
1824 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1868 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1825 | 1869 | data = REG_SET_FIELD(data, RLC_PG_CNTL, | |
1826 | if (enable == true) { | 1870 | SMU_CLK_SLOWDOWN_ON_PU_ENABLE, |
1827 | data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; | 1871 | enable ? 1 : 0); |
1828 | if (default_data != data) | 1872 | if (default_data != data) |
1829 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1873 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1830 | } else { | ||
1831 | data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; | ||
1832 | if(default_data != data) | ||
1833 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
1834 | } | ||
1835 | } | 1874 | } |
1836 | 1875 | ||
1837 | static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, | 1876 | static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, |
@@ -1841,16 +1880,11 @@ static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *ad | |||
1841 | uint32_t default_data = 0; | 1880 | uint32_t default_data = 0; |
1842 | 1881 | ||
1843 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1882 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1844 | 1883 | data = REG_SET_FIELD(data, RLC_PG_CNTL, | |
1845 | if (enable == true) { | 1884 | SMU_CLK_SLOWDOWN_ON_PD_ENABLE, |
1846 | data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; | 1885 | enable ? 1 : 0); |
1847 | if(default_data != data) | 1886 | if(default_data != data) |
1848 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1887 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1849 | } else { | ||
1850 | data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; | ||
1851 | if(default_data != data) | ||
1852 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
1853 | } | ||
1854 | } | 1888 | } |
1855 | 1889 | ||
1856 | static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, | 1890 | static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, |
@@ -1860,16 +1894,11 @@ static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, | |||
1860 | uint32_t default_data = 0; | 1894 | uint32_t default_data = 0; |
1861 | 1895 | ||
1862 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1896 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1863 | 1897 | data = REG_SET_FIELD(data, RLC_PG_CNTL, | |
1864 | if (enable == true) { | 1898 | CP_PG_DISABLE, |
1865 | data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; | 1899 | enable ? 0 : 1); |
1866 | if(default_data != data) | 1900 | if(default_data != data) |
1867 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1901 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1868 | } else { | ||
1869 | data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; | ||
1870 | if(default_data != data) | ||
1871 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | ||
1872 | } | ||
1873 | } | 1902 | } |
1874 | 1903 | ||
1875 | static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, | 1904 | static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, |
@@ -1878,10 +1907,9 @@ static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, | |||
1878 | uint32_t data, default_data; | 1907 | uint32_t data, default_data; |
1879 | 1908 | ||
1880 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1909 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1881 | if (enable == true) | 1910 | data = REG_SET_FIELD(data, RLC_PG_CNTL, |
1882 | data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; | 1911 | GFX_POWER_GATING_ENABLE, |
1883 | else | 1912 | enable ? 1 : 0); |
1884 | data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; | ||
1885 | if(default_data != data) | 1913 | if(default_data != data) |
1886 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1914 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1887 | } | 1915 | } |
@@ -1892,10 +1920,9 @@ static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, | |||
1892 | uint32_t data, default_data; | 1920 | uint32_t data, default_data; |
1893 | 1921 | ||
1894 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1922 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1895 | if (enable == true) | 1923 | data = REG_SET_FIELD(data, RLC_PG_CNTL, |
1896 | data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; | 1924 | GFX_PIPELINE_PG_ENABLE, |
1897 | else | 1925 | enable ? 1 : 0); |
1898 | data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; | ||
1899 | if(default_data != data) | 1926 | if(default_data != data) |
1900 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1927 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1901 | 1928 | ||
@@ -1910,10 +1937,9 @@ static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade | |||
1910 | uint32_t data, default_data; | 1937 | uint32_t data, default_data; |
1911 | 1938 | ||
1912 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1939 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1913 | if (enable == true) | 1940 | data = REG_SET_FIELD(data, RLC_PG_CNTL, |
1914 | data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; | 1941 | STATIC_PER_CU_PG_ENABLE, |
1915 | else | 1942 | enable ? 1 : 0); |
1916 | data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; | ||
1917 | if(default_data != data) | 1943 | if(default_data != data) |
1918 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1944 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1919 | } | 1945 | } |
@@ -1924,10 +1950,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad | |||
1924 | uint32_t data, default_data; | 1950 | uint32_t data, default_data; |
1925 | 1951 | ||
1926 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); | 1952 | default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); |
1927 | if (enable == true) | 1953 | data = REG_SET_FIELD(data, RLC_PG_CNTL, |
1928 | data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; | 1954 | DYN_PER_CU_PG_ENABLE, |
1929 | else | 1955 | enable ? 1 : 0); |
1930 | data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; | ||
1931 | if(default_data != data) | 1956 | if(default_data != data) |
1932 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); | 1957 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); |
1933 | } | 1958 | } |
@@ -1967,13 +1992,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) | |||
1967 | 1992 | ||
1968 | void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) | 1993 | void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) |
1969 | { | 1994 | { |
1970 | u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); | 1995 | WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); |
1971 | |||
1972 | tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); | ||
1973 | WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); | ||
1974 | |||
1975 | gfx_v9_0_enable_gui_idle_interrupt(adev, false); | 1996 | gfx_v9_0_enable_gui_idle_interrupt(adev, false); |
1976 | |||
1977 | gfx_v9_0_wait_for_rlc_serdes(adev); | 1997 | gfx_v9_0_wait_for_rlc_serdes(adev); |
1978 | } | 1998 | } |
1979 | 1999 | ||
@@ -2045,8 +2065,10 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) | |||
2045 | { | 2065 | { |
2046 | int r; | 2066 | int r; |
2047 | 2067 | ||
2048 | if (amdgpu_sriov_vf(adev)) | 2068 | if (amdgpu_sriov_vf(adev)) { |
2069 | gfx_v9_0_init_csb(adev); | ||
2049 | return 0; | 2070 | return 0; |
2071 | } | ||
2050 | 2072 | ||
2051 | gfx_v9_0_rlc_stop(adev); | 2073 | gfx_v9_0_rlc_stop(adev); |
2052 | 2074 | ||
@@ -2157,7 +2179,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2157 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; | 2179 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; |
2158 | const struct cs_section_def *sect = NULL; | 2180 | const struct cs_section_def *sect = NULL; |
2159 | const struct cs_extent_def *ext = NULL; | 2181 | const struct cs_extent_def *ext = NULL; |
2160 | int r, i; | 2182 | int r, i, tmp; |
2161 | 2183 | ||
2162 | /* init the CP */ | 2184 | /* init the CP */ |
2163 | WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); | 2185 | WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); |
@@ -2165,7 +2187,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2165 | 2187 | ||
2166 | gfx_v9_0_cp_gfx_enable(adev, true); | 2188 | gfx_v9_0_cp_gfx_enable(adev, true); |
2167 | 2189 | ||
2168 | r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); | 2190 | r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); |
2169 | if (r) { | 2191 | if (r) { |
2170 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); | 2192 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); |
2171 | return r; | 2193 | return r; |
@@ -2203,6 +2225,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2203 | amdgpu_ring_write(ring, 0x8000); | 2225 | amdgpu_ring_write(ring, 0x8000); |
2204 | amdgpu_ring_write(ring, 0x8000); | 2226 | amdgpu_ring_write(ring, 0x8000); |
2205 | 2227 | ||
2228 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); | ||
2229 | tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | | ||
2230 | (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); | ||
2231 | amdgpu_ring_write(ring, tmp); | ||
2232 | amdgpu_ring_write(ring, 0); | ||
2233 | |||
2206 | amdgpu_ring_commit(ring); | 2234 | amdgpu_ring_commit(ring); |
2207 | 2235 | ||
2208 | return 0; | 2236 | return 0; |
@@ -2457,6 +2485,13 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) | |||
2457 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; | 2485 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; |
2458 | mqd->compute_misc_reserved = 0x00000003; | 2486 | mqd->compute_misc_reserved = 0x00000003; |
2459 | 2487 | ||
2488 | mqd->dynamic_cu_mask_addr_lo = | ||
2489 | lower_32_bits(ring->mqd_gpu_addr | ||
2490 | + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); | ||
2491 | mqd->dynamic_cu_mask_addr_hi = | ||
2492 | upper_32_bits(ring->mqd_gpu_addr | ||
2493 | + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); | ||
2494 | |||
2460 | eop_base_addr = ring->eop_gpu_addr >> 8; | 2495 | eop_base_addr = ring->eop_gpu_addr >> 8; |
2461 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; | 2496 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; |
2462 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); | 2497 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); |
@@ -2480,10 +2515,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) | |||
2480 | DOORBELL_SOURCE, 0); | 2515 | DOORBELL_SOURCE, 0); |
2481 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, | 2516 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
2482 | DOORBELL_HIT, 0); | 2517 | DOORBELL_HIT, 0); |
2483 | } | 2518 | } else { |
2484 | else | ||
2485 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, | 2519 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
2486 | DOORBELL_EN, 0); | 2520 | DOORBELL_EN, 0); |
2521 | } | ||
2487 | 2522 | ||
2488 | mqd->cp_hqd_pq_doorbell_control = tmp; | 2523 | mqd->cp_hqd_pq_doorbell_control = tmp; |
2489 | 2524 | ||
@@ -2686,10 +2721,10 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) | |||
2686 | 2721 | ||
2687 | gfx_v9_0_kiq_setting(ring); | 2722 | gfx_v9_0_kiq_setting(ring); |
2688 | 2723 | ||
2689 | if (adev->gfx.in_reset) { /* for GPU_RESET case */ | 2724 | if (adev->in_sriov_reset) { /* for GPU_RESET case */ |
2690 | /* reset MQD to a clean status */ | 2725 | /* reset MQD to a clean status */ |
2691 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 2726 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
2692 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); | 2727 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); |
2693 | 2728 | ||
2694 | /* reset ring buffer */ | 2729 | /* reset ring buffer */ |
2695 | ring->wptr = 0; | 2730 | ring->wptr = 0; |
@@ -2701,7 +2736,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) | |||
2701 | soc15_grbm_select(adev, 0, 0, 0, 0); | 2736 | soc15_grbm_select(adev, 0, 0, 0, 0); |
2702 | mutex_unlock(&adev->srbm_mutex); | 2737 | mutex_unlock(&adev->srbm_mutex); |
2703 | } else { | 2738 | } else { |
2704 | memset((void *)mqd, 0, sizeof(*mqd)); | 2739 | memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); |
2740 | ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; | ||
2741 | ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; | ||
2705 | mutex_lock(&adev->srbm_mutex); | 2742 | mutex_lock(&adev->srbm_mutex); |
2706 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | 2743 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
2707 | gfx_v9_0_mqd_init(ring); | 2744 | gfx_v9_0_mqd_init(ring); |
@@ -2710,7 +2747,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) | |||
2710 | mutex_unlock(&adev->srbm_mutex); | 2747 | mutex_unlock(&adev->srbm_mutex); |
2711 | 2748 | ||
2712 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 2749 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
2713 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); | 2750 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); |
2714 | } | 2751 | } |
2715 | 2752 | ||
2716 | return 0; | 2753 | return 0; |
@@ -2722,8 +2759,10 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) | |||
2722 | struct v9_mqd *mqd = ring->mqd_ptr; | 2759 | struct v9_mqd *mqd = ring->mqd_ptr; |
2723 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; | 2760 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; |
2724 | 2761 | ||
2725 | if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { | 2762 | if (!adev->in_sriov_reset && !adev->gfx.in_suspend) { |
2726 | memset((void *)mqd, 0, sizeof(*mqd)); | 2763 | memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); |
2764 | ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; | ||
2765 | ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; | ||
2727 | mutex_lock(&adev->srbm_mutex); | 2766 | mutex_lock(&adev->srbm_mutex); |
2728 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | 2767 | soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); |
2729 | gfx_v9_0_mqd_init(ring); | 2768 | gfx_v9_0_mqd_init(ring); |
@@ -2731,11 +2770,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) | |||
2731 | mutex_unlock(&adev->srbm_mutex); | 2770 | mutex_unlock(&adev->srbm_mutex); |
2732 | 2771 | ||
2733 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 2772 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
2734 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); | 2773 | memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); |
2735 | } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ | 2774 | } else if (adev->in_sriov_reset) { /* for GPU_RESET case */ |
2736 | /* reset MQD to a clean status */ | 2775 | /* reset MQD to a clean status */ |
2737 | if (adev->gfx.mec.mqd_backup[mqd_idx]) | 2776 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
2738 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); | 2777 | memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); |
2739 | 2778 | ||
2740 | /* reset ring buffer */ | 2779 | /* reset ring buffer */ |
2741 | ring->wptr = 0; | 2780 | ring->wptr = 0; |
@@ -2876,12 +2915,70 @@ static int gfx_v9_0_hw_init(void *handle) | |||
2876 | return r; | 2915 | return r; |
2877 | } | 2916 | } |
2878 | 2917 | ||
2918 | static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) | ||
2919 | { | ||
2920 | struct amdgpu_device *adev = kiq_ring->adev; | ||
2921 | uint32_t scratch, tmp = 0; | ||
2922 | int r, i; | ||
2923 | |||
2924 | r = amdgpu_gfx_scratch_get(adev, &scratch); | ||
2925 | if (r) { | ||
2926 | DRM_ERROR("Failed to get scratch reg (%d).\n", r); | ||
2927 | return r; | ||
2928 | } | ||
2929 | WREG32(scratch, 0xCAFEDEAD); | ||
2930 | |||
2931 | r = amdgpu_ring_alloc(kiq_ring, 10); | ||
2932 | if (r) { | ||
2933 | DRM_ERROR("Failed to lock KIQ (%d).\n", r); | ||
2934 | amdgpu_gfx_scratch_free(adev, scratch); | ||
2935 | return r; | ||
2936 | } | ||
2937 | |||
2938 | /* unmap queues */ | ||
2939 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); | ||
2940 | amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ | ||
2941 | PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ | ||
2942 | PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | | ||
2943 | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | | ||
2944 | PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); | ||
2945 | amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); | ||
2946 | amdgpu_ring_write(kiq_ring, 0); | ||
2947 | amdgpu_ring_write(kiq_ring, 0); | ||
2948 | amdgpu_ring_write(kiq_ring, 0); | ||
2949 | /* write to scratch for completion */ | ||
2950 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | ||
2951 | amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | ||
2952 | amdgpu_ring_write(kiq_ring, 0xDEADBEEF); | ||
2953 | amdgpu_ring_commit(kiq_ring); | ||
2954 | |||
2955 | for (i = 0; i < adev->usec_timeout; i++) { | ||
2956 | tmp = RREG32(scratch); | ||
2957 | if (tmp == 0xDEADBEEF) | ||
2958 | break; | ||
2959 | DRM_UDELAY(1); | ||
2960 | } | ||
2961 | if (i >= adev->usec_timeout) { | ||
2962 | DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); | ||
2963 | r = -EINVAL; | ||
2964 | } | ||
2965 | amdgpu_gfx_scratch_free(adev, scratch); | ||
2966 | return r; | ||
2967 | } | ||
2968 | |||
2969 | |||
2879 | static int gfx_v9_0_hw_fini(void *handle) | 2970 | static int gfx_v9_0_hw_fini(void *handle) |
2880 | { | 2971 | { |
2881 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 2972 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
2973 | int i; | ||
2882 | 2974 | ||
2883 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); | 2975 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); |
2884 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); | 2976 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); |
2977 | |||
2978 | /* disable KCQ to avoid CPC touch memory not valid anymore */ | ||
2979 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | ||
2980 | gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); | ||
2981 | |||
2885 | if (amdgpu_sriov_vf(adev)) { | 2982 | if (amdgpu_sriov_vf(adev)) { |
2886 | pr_debug("For SRIOV client, shouldn't do anything.\n"); | 2983 | pr_debug("For SRIOV client, shouldn't do anything.\n"); |
2887 | return 0; | 2984 | return 0; |
@@ -2924,15 +3021,10 @@ static bool gfx_v9_0_is_idle(void *handle) | |||
2924 | static int gfx_v9_0_wait_for_idle(void *handle) | 3021 | static int gfx_v9_0_wait_for_idle(void *handle) |
2925 | { | 3022 | { |
2926 | unsigned i; | 3023 | unsigned i; |
2927 | u32 tmp; | ||
2928 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 3024 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
2929 | 3025 | ||
2930 | for (i = 0; i < adev->usec_timeout; i++) { | 3026 | for (i = 0; i < adev->usec_timeout; i++) { |
2931 | /* read MC_STATUS */ | 3027 | if (gfx_v9_0_is_idle(handle)) |
2932 | tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & | ||
2933 | GRBM_STATUS__GUI_ACTIVE_MASK; | ||
2934 | |||
2935 | if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) | ||
2936 | return 0; | 3028 | return 0; |
2937 | udelay(1); | 3029 | udelay(1); |
2938 | } | 3030 | } |
@@ -3493,7 +3585,9 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | |||
3493 | u32 ref_and_mask, reg_mem_engine; | 3585 | u32 ref_and_mask, reg_mem_engine; |
3494 | struct nbio_hdp_flush_reg *nbio_hf_reg; | 3586 | struct nbio_hdp_flush_reg *nbio_hf_reg; |
3495 | 3587 | ||
3496 | if (ring->adev->asic_type == CHIP_VEGA10) | 3588 | if (ring->adev->flags & AMD_IS_APU) |
3589 | nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; | ||
3590 | else | ||
3497 | nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; | 3591 | nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; |
3498 | 3592 | ||
3499 | if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { | 3593 | if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { |
@@ -3522,7 +3616,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | |||
3522 | static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) | 3616 | static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) |
3523 | { | 3617 | { |
3524 | gfx_v9_0_write_data_to_reg(ring, 0, true, | 3618 | gfx_v9_0_write_data_to_reg(ring, 0, true, |
3525 | SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1); | 3619 | SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); |
3526 | } | 3620 | } |
3527 | 3621 | ||
3528 | static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | 3622 | static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
@@ -3751,6 +3845,12 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) | |||
3751 | amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); | 3845 | amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); |
3752 | } | 3846 | } |
3753 | 3847 | ||
3848 | static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) | ||
3849 | { | ||
3850 | amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); | ||
3851 | amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ | ||
3852 | } | ||
3853 | |||
3754 | static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | 3854 | static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) |
3755 | { | 3855 | { |
3756 | uint32_t dw2 = 0; | 3856 | uint32_t dw2 = 0; |
@@ -3758,6 +3858,8 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | |||
3758 | if (amdgpu_sriov_vf(ring->adev)) | 3858 | if (amdgpu_sriov_vf(ring->adev)) |
3759 | gfx_v9_0_ring_emit_ce_meta(ring); | 3859 | gfx_v9_0_ring_emit_ce_meta(ring); |
3760 | 3860 | ||
3861 | gfx_v9_0_ring_emit_tmz(ring, true); | ||
3862 | |||
3761 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ | 3863 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ |
3762 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { | 3864 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { |
3763 | /* set load_global_config & load_global_uconfig */ | 3865 | /* set load_global_config & load_global_uconfig */ |
@@ -3808,12 +3910,6 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne | |||
3808 | ring->ring[offset] = (ring->ring_size>>2) - offset + cur; | 3910 | ring->ring[offset] = (ring->ring_size>>2) - offset + cur; |
3809 | } | 3911 | } |
3810 | 3912 | ||
3811 | static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) | ||
3812 | { | ||
3813 | amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); | ||
3814 | amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ | ||
3815 | } | ||
3816 | |||
3817 | static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) | 3913 | static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) |
3818 | { | 3914 | { |
3819 | struct amdgpu_device *adev = ring->adev; | 3915 | struct amdgpu_device *adev = ring->adev; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 408723ef157c..c17996e18086 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | |||
@@ -144,8 +144,8 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) | |||
144 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); | 144 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); |
145 | 145 | ||
146 | tmp = mmVM_L2_CNTL3_DEFAULT; | 146 | tmp = mmVM_L2_CNTL3_DEFAULT; |
147 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); | 147 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); |
148 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); | 148 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); |
149 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); | 149 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); |
150 | 150 | ||
151 | tmp = mmVM_L2_CNTL4_DEFAULT; | 151 | tmp = mmVM_L2_CNTL4_DEFAULT; |
@@ -319,6 +319,12 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, | |||
319 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 319 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
320 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 320 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
321 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 321 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
322 | if (!value) { | ||
323 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | ||
324 | CRASH_ON_NO_RETRY_FAULT, 1); | ||
325 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | ||
326 | CRASH_ON_RETRY_FAULT, 1); | ||
327 | } | ||
322 | WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); | 328 | WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); |
323 | } | 329 | } |
324 | 330 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 93c45f26b7c8..f4603a7c8ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | |||
@@ -332,7 +332,24 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) | |||
332 | adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; | 332 | adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; |
333 | adev->mc.visible_vram_size = adev->mc.aper_size; | 333 | adev->mc.visible_vram_size = adev->mc.aper_size; |
334 | 334 | ||
335 | amdgpu_gart_set_defaults(adev); | 335 | /* set the gart size */ |
336 | if (amdgpu_gart_size == -1) { | ||
337 | switch (adev->asic_type) { | ||
338 | case CHIP_HAINAN: /* no MM engines */ | ||
339 | default: | ||
340 | adev->mc.gart_size = 256ULL << 20; | ||
341 | break; | ||
342 | case CHIP_VERDE: /* UVD, VCE do not support GPUVM */ | ||
343 | case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */ | ||
344 | case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */ | ||
345 | case CHIP_OLAND: /* UVD, VCE do not support GPUVM */ | ||
346 | adev->mc.gart_size = 1024ULL << 20; | ||
347 | break; | ||
348 | } | ||
349 | } else { | ||
350 | adev->mc.gart_size = (u64)amdgpu_gart_size << 20; | ||
351 | } | ||
352 | |||
336 | gmc_v6_0_vram_gtt_location(adev, &adev->mc); | 353 | gmc_v6_0_vram_gtt_location(adev, &adev->mc); |
337 | 354 | ||
338 | return 0; | 355 | return 0; |
@@ -461,6 +478,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) | |||
461 | static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) | 478 | static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) |
462 | { | 479 | { |
463 | int r, i; | 480 | int r, i; |
481 | u32 field; | ||
464 | 482 | ||
465 | if (adev->gart.robj == NULL) { | 483 | if (adev->gart.robj == NULL) { |
466 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | 484 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
@@ -488,10 +506,12 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) | |||
488 | WREG32(mmVM_L2_CNTL2, | 506 | WREG32(mmVM_L2_CNTL2, |
489 | VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | | 507 | VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | |
490 | VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); | 508 | VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); |
509 | |||
510 | field = adev->vm_manager.fragment_size; | ||
491 | WREG32(mmVM_L2_CNTL3, | 511 | WREG32(mmVM_L2_CNTL3, |
492 | VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | | 512 | VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | |
493 | (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) | | 513 | (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) | |
494 | (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); | 514 | (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); |
495 | /* setup context0 */ | 515 | /* setup context0 */ |
496 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); | 516 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); |
497 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); | 517 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); |
@@ -811,7 +831,7 @@ static int gmc_v6_0_sw_init(void *handle) | |||
811 | if (r) | 831 | if (r) |
812 | return r; | 832 | return r; |
813 | 833 | ||
814 | amdgpu_vm_adjust_size(adev, 64); | 834 | amdgpu_vm_adjust_size(adev, 64, 9); |
815 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; | 835 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; |
816 | 836 | ||
817 | adev->mc.mc_mask = 0xffffffffffULL; | 837 | adev->mc.mc_mask = 0xffffffffffULL; |
@@ -881,6 +901,8 @@ static int gmc_v6_0_sw_fini(void *handle) | |||
881 | gmc_v6_0_gart_fini(adev); | 901 | gmc_v6_0_gart_fini(adev); |
882 | amdgpu_gem_force_release(adev); | 902 | amdgpu_gem_force_release(adev); |
883 | amdgpu_bo_fini(adev); | 903 | amdgpu_bo_fini(adev); |
904 | release_firmware(adev->mc.fw); | ||
905 | adev->mc.fw = NULL; | ||
884 | 906 | ||
885 | return 0; | 907 | return 0; |
886 | } | 908 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 4a9e84062874..b0528ca9207b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
@@ -386,7 +386,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) | |||
386 | if (adev->mc.visible_vram_size > adev->mc.real_vram_size) | 386 | if (adev->mc.visible_vram_size > adev->mc.real_vram_size) |
387 | adev->mc.visible_vram_size = adev->mc.real_vram_size; | 387 | adev->mc.visible_vram_size = adev->mc.real_vram_size; |
388 | 388 | ||
389 | amdgpu_gart_set_defaults(adev); | 389 | /* set the gart size */ |
390 | if (amdgpu_gart_size == -1) { | ||
391 | switch (adev->asic_type) { | ||
392 | case CHIP_TOPAZ: /* no MM engines */ | ||
393 | default: | ||
394 | adev->mc.gart_size = 256ULL << 20; | ||
395 | break; | ||
396 | #ifdef CONFIG_DRM_AMDGPU_CIK | ||
397 | case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */ | ||
398 | case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */ | ||
399 | case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */ | ||
400 | case CHIP_KABINI: /* UVD, VCE do not support GPUVM */ | ||
401 | case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */ | ||
402 | adev->mc.gart_size = 1024ULL << 20; | ||
403 | break; | ||
404 | #endif | ||
405 | } | ||
406 | } else { | ||
407 | adev->mc.gart_size = (u64)amdgpu_gart_size << 20; | ||
408 | } | ||
409 | |||
390 | gmc_v7_0_vram_gtt_location(adev, &adev->mc); | 410 | gmc_v7_0_vram_gtt_location(adev, &adev->mc); |
391 | 411 | ||
392 | return 0; | 412 | return 0; |
@@ -562,7 +582,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) | |||
562 | static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) | 582 | static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) |
563 | { | 583 | { |
564 | int r, i; | 584 | int r, i; |
565 | u32 tmp; | 585 | u32 tmp, field; |
566 | 586 | ||
567 | if (adev->gart.robj == NULL) { | 587 | if (adev->gart.robj == NULL) { |
568 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | 588 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
@@ -592,10 +612,12 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) | |||
592 | tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 612 | tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
593 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 613 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
594 | WREG32(mmVM_L2_CNTL2, tmp); | 614 | WREG32(mmVM_L2_CNTL2, tmp); |
615 | |||
616 | field = adev->vm_manager.fragment_size; | ||
595 | tmp = RREG32(mmVM_L2_CNTL3); | 617 | tmp = RREG32(mmVM_L2_CNTL3); |
596 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); | 618 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); |
597 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); | 619 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); |
598 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); | 620 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); |
599 | WREG32(mmVM_L2_CNTL3, tmp); | 621 | WREG32(mmVM_L2_CNTL3, tmp); |
600 | /* setup context0 */ | 622 | /* setup context0 */ |
601 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); | 623 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); |
@@ -948,7 +970,7 @@ static int gmc_v7_0_sw_init(void *handle) | |||
948 | * Currently set to 4GB ((1 << 20) 4k pages). | 970 | * Currently set to 4GB ((1 << 20) 4k pages). |
949 | * Max GPUVM size for cayman and SI is 40 bits. | 971 | * Max GPUVM size for cayman and SI is 40 bits. |
950 | */ | 972 | */ |
951 | amdgpu_vm_adjust_size(adev, 64); | 973 | amdgpu_vm_adjust_size(adev, 64, 9); |
952 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; | 974 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; |
953 | 975 | ||
954 | /* Set the internal MC address mask | 976 | /* Set the internal MC address mask |
@@ -1028,6 +1050,8 @@ static int gmc_v7_0_sw_fini(void *handle) | |||
1028 | gmc_v7_0_gart_fini(adev); | 1050 | gmc_v7_0_gart_fini(adev); |
1029 | amdgpu_gem_force_release(adev); | 1051 | amdgpu_gem_force_release(adev); |
1030 | amdgpu_bo_fini(adev); | 1052 | amdgpu_bo_fini(adev); |
1053 | release_firmware(adev->mc.fw); | ||
1054 | adev->mc.fw = NULL; | ||
1031 | 1055 | ||
1032 | return 0; | 1056 | return 0; |
1033 | } | 1057 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 85c937b5e40b..f368cfe2f585 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
@@ -562,7 +562,26 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) | |||
562 | if (adev->mc.visible_vram_size > adev->mc.real_vram_size) | 562 | if (adev->mc.visible_vram_size > adev->mc.real_vram_size) |
563 | adev->mc.visible_vram_size = adev->mc.real_vram_size; | 563 | adev->mc.visible_vram_size = adev->mc.real_vram_size; |
564 | 564 | ||
565 | amdgpu_gart_set_defaults(adev); | 565 | /* set the gart size */ |
566 | if (amdgpu_gart_size == -1) { | ||
567 | switch (adev->asic_type) { | ||
568 | case CHIP_POLARIS11: /* all engines support GPUVM */ | ||
569 | case CHIP_POLARIS10: /* all engines support GPUVM */ | ||
570 | case CHIP_POLARIS12: /* all engines support GPUVM */ | ||
571 | default: | ||
572 | adev->mc.gart_size = 256ULL << 20; | ||
573 | break; | ||
574 | case CHIP_TONGA: /* UVD, VCE do not support GPUVM */ | ||
575 | case CHIP_FIJI: /* UVD, VCE do not support GPUVM */ | ||
576 | case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */ | ||
577 | case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */ | ||
578 | adev->mc.gart_size = 1024ULL << 20; | ||
579 | break; | ||
580 | } | ||
581 | } else { | ||
582 | adev->mc.gart_size = (u64)amdgpu_gart_size << 20; | ||
583 | } | ||
584 | |||
566 | gmc_v8_0_vram_gtt_location(adev, &adev->mc); | 585 | gmc_v8_0_vram_gtt_location(adev, &adev->mc); |
567 | 586 | ||
568 | return 0; | 587 | return 0; |
@@ -762,7 +781,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) | |||
762 | static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) | 781 | static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) |
763 | { | 782 | { |
764 | int r, i; | 783 | int r, i; |
765 | u32 tmp; | 784 | u32 tmp, field; |
766 | 785 | ||
767 | if (adev->gart.robj == NULL) { | 786 | if (adev->gart.robj == NULL) { |
768 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | 787 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
@@ -793,10 +812,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) | |||
793 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 812 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
794 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 813 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
795 | WREG32(mmVM_L2_CNTL2, tmp); | 814 | WREG32(mmVM_L2_CNTL2, tmp); |
815 | |||
816 | field = adev->vm_manager.fragment_size; | ||
796 | tmp = RREG32(mmVM_L2_CNTL3); | 817 | tmp = RREG32(mmVM_L2_CNTL3); |
797 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); | 818 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); |
798 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); | 819 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); |
799 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); | 820 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); |
800 | WREG32(mmVM_L2_CNTL3, tmp); | 821 | WREG32(mmVM_L2_CNTL3, tmp); |
801 | /* XXX: set to enable PTE/PDE in system memory */ | 822 | /* XXX: set to enable PTE/PDE in system memory */ |
802 | tmp = RREG32(mmVM_L2_CNTL4); | 823 | tmp = RREG32(mmVM_L2_CNTL4); |
@@ -1046,7 +1067,7 @@ static int gmc_v8_0_sw_init(void *handle) | |||
1046 | * Currently set to 4GB ((1 << 20) 4k pages). | 1067 | * Currently set to 4GB ((1 << 20) 4k pages). |
1047 | * Max GPUVM size for cayman and SI is 40 bits. | 1068 | * Max GPUVM size for cayman and SI is 40 bits. |
1048 | */ | 1069 | */ |
1049 | amdgpu_vm_adjust_size(adev, 64); | 1070 | amdgpu_vm_adjust_size(adev, 64, 9); |
1050 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; | 1071 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; |
1051 | 1072 | ||
1052 | /* Set the internal MC address mask | 1073 | /* Set the internal MC address mask |
@@ -1126,6 +1147,8 @@ static int gmc_v8_0_sw_fini(void *handle) | |||
1126 | gmc_v8_0_gart_fini(adev); | 1147 | gmc_v8_0_gart_fini(adev); |
1127 | amdgpu_gem_force_release(adev); | 1148 | amdgpu_gem_force_release(adev); |
1128 | amdgpu_bo_fini(adev); | 1149 | amdgpu_bo_fini(adev); |
1150 | release_firmware(adev->mc.fw); | ||
1151 | adev->mc.fw = NULL; | ||
1129 | 1152 | ||
1130 | return 0; | 1153 | return 0; |
1131 | } | 1154 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c22899a08106..621699331e09 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include "vega10/DC/dce_12_0_offset.h" | 32 | #include "vega10/DC/dce_12_0_offset.h" |
33 | #include "vega10/DC/dce_12_0_sh_mask.h" | 33 | #include "vega10/DC/dce_12_0_sh_mask.h" |
34 | #include "vega10/vega10_enum.h" | 34 | #include "vega10/vega10_enum.h" |
35 | #include "vega10/MMHUB/mmhub_1_0_offset.h" | ||
36 | #include "vega10/ATHUB/athub_1_0_offset.h" | ||
35 | 37 | ||
36 | #include "soc15_common.h" | 38 | #include "soc15_common.h" |
37 | 39 | ||
@@ -71,13 +73,25 @@ static const u32 golden_settings_vega10_hdp[] = | |||
71 | 0xf6e, 0x0fffffff, 0x00000000, | 73 | 0xf6e, 0x0fffffff, 0x00000000, |
72 | }; | 74 | }; |
73 | 75 | ||
76 | static const u32 golden_settings_mmhub_1_0_0[] = | ||
77 | { | ||
78 | SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_WRCLI2), 0x00000007, 0xfe5fe0fa, | ||
79 | SOC15_REG_OFFSET(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0), 0x00000030, 0x55555565 | ||
80 | }; | ||
81 | |||
82 | static const u32 golden_settings_athub_1_0_0[] = | ||
83 | { | ||
84 | SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL), 0x0000ff00, 0x00000800, | ||
85 | SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL2), 0x00ff00ff, 0x00080008 | ||
86 | }; | ||
87 | |||
74 | static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, | 88 | static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, |
75 | struct amdgpu_irq_src *src, | 89 | struct amdgpu_irq_src *src, |
76 | unsigned type, | 90 | unsigned type, |
77 | enum amdgpu_interrupt_state state) | 91 | enum amdgpu_interrupt_state state) |
78 | { | 92 | { |
79 | struct amdgpu_vmhub *hub; | 93 | struct amdgpu_vmhub *hub; |
80 | u32 tmp, reg, bits, i; | 94 | u32 tmp, reg, bits, i, j; |
81 | 95 | ||
82 | bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | | 96 | bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | |
83 | VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | | 97 | VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | |
@@ -89,43 +103,26 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, | |||
89 | 103 | ||
90 | switch (state) { | 104 | switch (state) { |
91 | case AMDGPU_IRQ_STATE_DISABLE: | 105 | case AMDGPU_IRQ_STATE_DISABLE: |
92 | /* MM HUB */ | 106 | for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { |
93 | hub = &adev->vmhub[AMDGPU_MMHUB]; | 107 | hub = &adev->vmhub[j]; |
94 | for (i = 0; i< 16; i++) { | 108 | for (i = 0; i < 16; i++) { |
95 | reg = hub->vm_context0_cntl + i; | 109 | reg = hub->vm_context0_cntl + i; |
96 | tmp = RREG32(reg); | 110 | tmp = RREG32(reg); |
97 | tmp &= ~bits; | 111 | tmp &= ~bits; |
98 | WREG32(reg, tmp); | 112 | WREG32(reg, tmp); |
99 | } | 113 | } |
100 | |||
101 | /* GFX HUB */ | ||
102 | hub = &adev->vmhub[AMDGPU_GFXHUB]; | ||
103 | for (i = 0; i < 16; i++) { | ||
104 | reg = hub->vm_context0_cntl + i; | ||
105 | tmp = RREG32(reg); | ||
106 | tmp &= ~bits; | ||
107 | WREG32(reg, tmp); | ||
108 | } | 114 | } |
109 | break; | 115 | break; |
110 | case AMDGPU_IRQ_STATE_ENABLE: | 116 | case AMDGPU_IRQ_STATE_ENABLE: |
111 | /* MM HUB */ | 117 | for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { |
112 | hub = &adev->vmhub[AMDGPU_MMHUB]; | 118 | hub = &adev->vmhub[j]; |
113 | for (i = 0; i< 16; i++) { | 119 | for (i = 0; i < 16; i++) { |
114 | reg = hub->vm_context0_cntl + i; | 120 | reg = hub->vm_context0_cntl + i; |
115 | tmp = RREG32(reg); | 121 | tmp = RREG32(reg); |
116 | tmp |= bits; | 122 | tmp |= bits; |
117 | WREG32(reg, tmp); | 123 | WREG32(reg, tmp); |
124 | } | ||
118 | } | 125 | } |
119 | |||
120 | /* GFX HUB */ | ||
121 | hub = &adev->vmhub[AMDGPU_GFXHUB]; | ||
122 | for (i = 0; i < 16; i++) { | ||
123 | reg = hub->vm_context0_cntl + i; | ||
124 | tmp = RREG32(reg); | ||
125 | tmp |= bits; | ||
126 | WREG32(reg, tmp); | ||
127 | } | ||
128 | break; | ||
129 | default: | 126 | default: |
130 | break; | 127 | break; |
131 | } | 128 | } |
@@ -499,7 +496,21 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) | |||
499 | if (adev->mc.visible_vram_size > adev->mc.real_vram_size) | 496 | if (adev->mc.visible_vram_size > adev->mc.real_vram_size) |
500 | adev->mc.visible_vram_size = adev->mc.real_vram_size; | 497 | adev->mc.visible_vram_size = adev->mc.real_vram_size; |
501 | 498 | ||
502 | amdgpu_gart_set_defaults(adev); | 499 | /* set the gart size */ |
500 | if (amdgpu_gart_size == -1) { | ||
501 | switch (adev->asic_type) { | ||
502 | case CHIP_VEGA10: /* all engines support GPUVM */ | ||
503 | default: | ||
504 | adev->mc.gart_size = 256ULL << 20; | ||
505 | break; | ||
506 | case CHIP_RAVEN: /* DCE SG support */ | ||
507 | adev->mc.gart_size = 1024ULL << 20; | ||
508 | break; | ||
509 | } | ||
510 | } else { | ||
511 | adev->mc.gart_size = (u64)amdgpu_gart_size << 20; | ||
512 | } | ||
513 | |||
503 | gmc_v9_0_vram_gtt_location(adev, &adev->mc); | 514 | gmc_v9_0_vram_gtt_location(adev, &adev->mc); |
504 | 515 | ||
505 | return 0; | 516 | return 0; |
@@ -541,9 +552,10 @@ static int gmc_v9_0_sw_init(void *handle) | |||
541 | adev->vm_manager.vm_size = 1U << 18; | 552 | adev->vm_manager.vm_size = 1U << 18; |
542 | adev->vm_manager.block_size = 9; | 553 | adev->vm_manager.block_size = 9; |
543 | adev->vm_manager.num_level = 3; | 554 | adev->vm_manager.num_level = 3; |
555 | amdgpu_vm_set_fragment_size(adev, 9); | ||
544 | } else { | 556 | } else { |
545 | /* vm_size is 64GB for legacy 2-level page support*/ | 557 | /* vm_size is 64GB for legacy 2-level page support */ |
546 | amdgpu_vm_adjust_size(adev, 64); | 558 | amdgpu_vm_adjust_size(adev, 64, 9); |
547 | adev->vm_manager.num_level = 1; | 559 | adev->vm_manager.num_level = 1; |
548 | } | 560 | } |
549 | break; | 561 | break; |
@@ -558,14 +570,16 @@ static int gmc_v9_0_sw_init(void *handle) | |||
558 | adev->vm_manager.vm_size = 1U << 18; | 570 | adev->vm_manager.vm_size = 1U << 18; |
559 | adev->vm_manager.block_size = 9; | 571 | adev->vm_manager.block_size = 9; |
560 | adev->vm_manager.num_level = 3; | 572 | adev->vm_manager.num_level = 3; |
573 | amdgpu_vm_set_fragment_size(adev, 9); | ||
561 | break; | 574 | break; |
562 | default: | 575 | default: |
563 | break; | 576 | break; |
564 | } | 577 | } |
565 | 578 | ||
566 | DRM_INFO("vm size is %llu GB, block size is %u-bit\n", | 579 | DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n", |
567 | adev->vm_manager.vm_size, | 580 | adev->vm_manager.vm_size, |
568 | adev->vm_manager.block_size); | 581 | adev->vm_manager.block_size, |
582 | adev->vm_manager.fragment_size); | ||
569 | 583 | ||
570 | /* This interrupt is VMC page fault.*/ | 584 | /* This interrupt is VMC page fault.*/ |
571 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, | 585 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, |
@@ -665,8 +679,17 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) | |||
665 | { | 679 | { |
666 | switch (adev->asic_type) { | 680 | switch (adev->asic_type) { |
667 | case CHIP_VEGA10: | 681 | case CHIP_VEGA10: |
682 | amdgpu_program_register_sequence(adev, | ||
683 | golden_settings_mmhub_1_0_0, | ||
684 | (const u32)ARRAY_SIZE(golden_settings_mmhub_1_0_0)); | ||
685 | amdgpu_program_register_sequence(adev, | ||
686 | golden_settings_athub_1_0_0, | ||
687 | (const u32)ARRAY_SIZE(golden_settings_athub_1_0_0)); | ||
668 | break; | 688 | break; |
669 | case CHIP_RAVEN: | 689 | case CHIP_RAVEN: |
690 | amdgpu_program_register_sequence(adev, | ||
691 | golden_settings_athub_1_0_0, | ||
692 | (const u32)ARRAY_SIZE(golden_settings_athub_1_0_0)); | ||
670 | break; | 693 | break; |
671 | default: | 694 | default: |
672 | break; | 695 | break; |
@@ -696,12 +719,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) | |||
696 | if (r) | 719 | if (r) |
697 | return r; | 720 | return r; |
698 | 721 | ||
699 | /* After HDP is initialized, flush HDP.*/ | ||
700 | if (adev->flags & AMD_IS_APU) | ||
701 | nbio_v7_0_hdp_flush(adev); | ||
702 | else | ||
703 | nbio_v6_1_hdp_flush(adev); | ||
704 | |||
705 | switch (adev->asic_type) { | 722 | switch (adev->asic_type) { |
706 | case CHIP_RAVEN: | 723 | case CHIP_RAVEN: |
707 | mmhub_v1_0_initialize_power_gating(adev); | 724 | mmhub_v1_0_initialize_power_gating(adev); |
@@ -719,13 +736,16 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) | |||
719 | if (r) | 736 | if (r) |
720 | return r; | 737 | return r; |
721 | 738 | ||
722 | tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); | 739 | WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); |
723 | tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; | ||
724 | WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); | ||
725 | 740 | ||
726 | tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); | 741 | tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); |
727 | WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); | 742 | WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); |
728 | 743 | ||
744 | /* After HDP is initialized, flush HDP.*/ | ||
745 | if (adev->flags & AMD_IS_APU) | ||
746 | nbio_v7_0_hdp_flush(adev); | ||
747 | else | ||
748 | nbio_v6_1_hdp_flush(adev); | ||
729 | 749 | ||
730 | if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) | 750 | if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) |
731 | value = false; | 751 | value = false; |
@@ -734,7 +754,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) | |||
734 | 754 | ||
735 | gfxhub_v1_0_set_fault_enable_default(adev, value); | 755 | gfxhub_v1_0_set_fault_enable_default(adev, value); |
736 | mmhub_v1_0_set_fault_enable_default(adev, value); | 756 | mmhub_v1_0_set_fault_enable_default(adev, value); |
737 | |||
738 | gmc_v9_0_gart_flush_gpu_tlb(adev, 0); | 757 | gmc_v9_0_gart_flush_gpu_tlb(adev, 0); |
739 | 758 | ||
740 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", | 759 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", |
@@ -753,17 +772,11 @@ static int gmc_v9_0_hw_init(void *handle) | |||
753 | gmc_v9_0_init_golden_registers(adev); | 772 | gmc_v9_0_init_golden_registers(adev); |
754 | 773 | ||
755 | if (adev->mode_info.num_crtc) { | 774 | if (adev->mode_info.num_crtc) { |
756 | u32 tmp; | ||
757 | |||
758 | /* Lockout access through VGA aperture*/ | 775 | /* Lockout access through VGA aperture*/ |
759 | tmp = RREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL); | 776 | WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); |
760 | tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); | ||
761 | WREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL, tmp); | ||
762 | 777 | ||
763 | /* disable VGA render */ | 778 | /* disable VGA render */ |
764 | tmp = RREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL); | 779 | WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); |
765 | tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); | ||
766 | WREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL, tmp); | ||
767 | } | 780 | } |
768 | 781 | ||
769 | r = gmc_v9_0_gart_enable(adev); | 782 | r = gmc_v9_0_gart_enable(adev); |
@@ -805,9 +818,7 @@ static int gmc_v9_0_suspend(void *handle) | |||
805 | { | 818 | { |
806 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 819 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
807 | 820 | ||
808 | gmc_v9_0_hw_fini(adev); | 821 | return gmc_v9_0_hw_fini(adev); |
809 | |||
810 | return 0; | ||
811 | } | 822 | } |
812 | 823 | ||
813 | static int gmc_v9_0_resume(void *handle) | 824 | static int gmc_v9_0_resume(void *handle) |
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 7a0ea27ac429..65ed6d3a8f05 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c | |||
@@ -208,6 +208,19 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) | |||
208 | } | 208 | } |
209 | 209 | ||
210 | /** | 210 | /** |
211 | * iceland_ih_prescreen_iv - prescreen an interrupt vector | ||
212 | * | ||
213 | * @adev: amdgpu_device pointer | ||
214 | * | ||
215 | * Returns true if the interrupt vector should be further processed. | ||
216 | */ | ||
217 | static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev) | ||
218 | { | ||
219 | /* Process all interrupts */ | ||
220 | return true; | ||
221 | } | ||
222 | |||
223 | /** | ||
211 | * iceland_ih_decode_iv - decode an interrupt vector | 224 | * iceland_ih_decode_iv - decode an interrupt vector |
212 | * | 225 | * |
213 | * @adev: amdgpu_device pointer | 226 | * @adev: amdgpu_device pointer |
@@ -412,6 +425,7 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { | |||
412 | 425 | ||
413 | static const struct amdgpu_ih_funcs iceland_ih_funcs = { | 426 | static const struct amdgpu_ih_funcs iceland_ih_funcs = { |
414 | .get_wptr = iceland_ih_get_wptr, | 427 | .get_wptr = iceland_ih_get_wptr, |
428 | .prescreen_iv = iceland_ih_prescreen_iv, | ||
415 | .decode_iv = iceland_ih_decode_iv, | 429 | .decode_iv = iceland_ih_decode_iv, |
416 | .set_rptr = iceland_ih_set_rptr | 430 | .set_rptr = iceland_ih_set_rptr |
417 | }; | 431 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 3bbf2ccfca89..b57399a462c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #define KV_MINIMUM_ENGINE_CLOCK 800 | 42 | #define KV_MINIMUM_ENGINE_CLOCK 800 |
43 | #define SMC_RAM_END 0x40000 | 43 | #define SMC_RAM_END 0x40000 |
44 | 44 | ||
45 | static void kv_dpm_set_dpm_funcs(struct amdgpu_device *adev); | ||
46 | static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev); | 45 | static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev); |
47 | static int kv_enable_nb_dpm(struct amdgpu_device *adev, | 46 | static int kv_enable_nb_dpm(struct amdgpu_device *adev, |
48 | bool enable); | 47 | bool enable); |
@@ -64,7 +63,7 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev, | |||
64 | int min_temp, int max_temp); | 63 | int min_temp, int max_temp); |
65 | static int kv_init_fps_limits(struct amdgpu_device *adev); | 64 | static int kv_init_fps_limits(struct amdgpu_device *adev); |
66 | 65 | ||
67 | static void kv_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate); | 66 | static void kv_dpm_powergate_uvd(void *handle, bool gate); |
68 | static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate); | 67 | static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate); |
69 | static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate); | 68 | static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate); |
70 | static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate); | 69 | static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate); |
@@ -1245,8 +1244,9 @@ static void kv_update_requested_ps(struct amdgpu_device *adev, | |||
1245 | adev->pm.dpm.requested_ps = &pi->requested_rps; | 1244 | adev->pm.dpm.requested_ps = &pi->requested_rps; |
1246 | } | 1245 | } |
1247 | 1246 | ||
1248 | static void kv_dpm_enable_bapm(struct amdgpu_device *adev, bool enable) | 1247 | static void kv_dpm_enable_bapm(void *handle, bool enable) |
1249 | { | 1248 | { |
1249 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1250 | struct kv_power_info *pi = kv_get_pi(adev); | 1250 | struct kv_power_info *pi = kv_get_pi(adev); |
1251 | int ret; | 1251 | int ret; |
1252 | 1252 | ||
@@ -1672,8 +1672,9 @@ static int kv_update_acp_dpm(struct amdgpu_device *adev, bool gate) | |||
1672 | return kv_enable_acp_dpm(adev, !gate); | 1672 | return kv_enable_acp_dpm(adev, !gate); |
1673 | } | 1673 | } |
1674 | 1674 | ||
1675 | static void kv_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate) | 1675 | static void kv_dpm_powergate_uvd(void *handle, bool gate) |
1676 | { | 1676 | { |
1677 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1677 | struct kv_power_info *pi = kv_get_pi(adev); | 1678 | struct kv_power_info *pi = kv_get_pi(adev); |
1678 | int ret; | 1679 | int ret; |
1679 | 1680 | ||
@@ -1868,10 +1869,11 @@ static int kv_enable_nb_dpm(struct amdgpu_device *adev, | |||
1868 | return ret; | 1869 | return ret; |
1869 | } | 1870 | } |
1870 | 1871 | ||
1871 | static int kv_dpm_force_performance_level(struct amdgpu_device *adev, | 1872 | static int kv_dpm_force_performance_level(void *handle, |
1872 | enum amd_dpm_forced_level level) | 1873 | enum amd_dpm_forced_level level) |
1873 | { | 1874 | { |
1874 | int ret; | 1875 | int ret; |
1876 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1875 | 1877 | ||
1876 | if (level == AMD_DPM_FORCED_LEVEL_HIGH) { | 1878 | if (level == AMD_DPM_FORCED_LEVEL_HIGH) { |
1877 | ret = kv_force_dpm_highest(adev); | 1879 | ret = kv_force_dpm_highest(adev); |
@@ -1892,8 +1894,9 @@ static int kv_dpm_force_performance_level(struct amdgpu_device *adev, | |||
1892 | return 0; | 1894 | return 0; |
1893 | } | 1895 | } |
1894 | 1896 | ||
1895 | static int kv_dpm_pre_set_power_state(struct amdgpu_device *adev) | 1897 | static int kv_dpm_pre_set_power_state(void *handle) |
1896 | { | 1898 | { |
1899 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1897 | struct kv_power_info *pi = kv_get_pi(adev); | 1900 | struct kv_power_info *pi = kv_get_pi(adev); |
1898 | struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; | 1901 | struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; |
1899 | struct amdgpu_ps *new_ps = &requested_ps; | 1902 | struct amdgpu_ps *new_ps = &requested_ps; |
@@ -1907,8 +1910,9 @@ static int kv_dpm_pre_set_power_state(struct amdgpu_device *adev) | |||
1907 | return 0; | 1910 | return 0; |
1908 | } | 1911 | } |
1909 | 1912 | ||
1910 | static int kv_dpm_set_power_state(struct amdgpu_device *adev) | 1913 | static int kv_dpm_set_power_state(void *handle) |
1911 | { | 1914 | { |
1915 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1912 | struct kv_power_info *pi = kv_get_pi(adev); | 1916 | struct kv_power_info *pi = kv_get_pi(adev); |
1913 | struct amdgpu_ps *new_ps = &pi->requested_rps; | 1917 | struct amdgpu_ps *new_ps = &pi->requested_rps; |
1914 | struct amdgpu_ps *old_ps = &pi->current_rps; | 1918 | struct amdgpu_ps *old_ps = &pi->current_rps; |
@@ -1981,8 +1985,9 @@ static int kv_dpm_set_power_state(struct amdgpu_device *adev) | |||
1981 | return 0; | 1985 | return 0; |
1982 | } | 1986 | } |
1983 | 1987 | ||
1984 | static void kv_dpm_post_set_power_state(struct amdgpu_device *adev) | 1988 | static void kv_dpm_post_set_power_state(void *handle) |
1985 | { | 1989 | { |
1990 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
1986 | struct kv_power_info *pi = kv_get_pi(adev); | 1991 | struct kv_power_info *pi = kv_get_pi(adev); |
1987 | struct amdgpu_ps *new_ps = &pi->requested_rps; | 1992 | struct amdgpu_ps *new_ps = &pi->requested_rps; |
1988 | 1993 | ||
@@ -2848,9 +2853,10 @@ static int kv_dpm_init(struct amdgpu_device *adev) | |||
2848 | } | 2853 | } |
2849 | 2854 | ||
2850 | static void | 2855 | static void |
2851 | kv_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, | 2856 | kv_dpm_debugfs_print_current_performance_level(void *handle, |
2852 | struct seq_file *m) | 2857 | struct seq_file *m) |
2853 | { | 2858 | { |
2859 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
2854 | struct kv_power_info *pi = kv_get_pi(adev); | 2860 | struct kv_power_info *pi = kv_get_pi(adev); |
2855 | u32 current_index = | 2861 | u32 current_index = |
2856 | (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & | 2862 | (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & |
@@ -2875,11 +2881,12 @@ kv_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, | |||
2875 | } | 2881 | } |
2876 | 2882 | ||
2877 | static void | 2883 | static void |
2878 | kv_dpm_print_power_state(struct amdgpu_device *adev, | 2884 | kv_dpm_print_power_state(void *handle, void *request_ps) |
2879 | struct amdgpu_ps *rps) | ||
2880 | { | 2885 | { |
2881 | int i; | 2886 | int i; |
2887 | struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps; | ||
2882 | struct kv_ps *ps = kv_get_ps(rps); | 2888 | struct kv_ps *ps = kv_get_ps(rps); |
2889 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
2883 | 2890 | ||
2884 | amdgpu_dpm_print_class_info(rps->class, rps->class2); | 2891 | amdgpu_dpm_print_class_info(rps->class, rps->class2); |
2885 | amdgpu_dpm_print_cap_info(rps->caps); | 2892 | amdgpu_dpm_print_cap_info(rps->caps); |
@@ -2905,13 +2912,14 @@ static void kv_dpm_fini(struct amdgpu_device *adev) | |||
2905 | amdgpu_free_extended_power_table(adev); | 2912 | amdgpu_free_extended_power_table(adev); |
2906 | } | 2913 | } |
2907 | 2914 | ||
2908 | static void kv_dpm_display_configuration_changed(struct amdgpu_device *adev) | 2915 | static void kv_dpm_display_configuration_changed(void *handle) |
2909 | { | 2916 | { |
2910 | 2917 | ||
2911 | } | 2918 | } |
2912 | 2919 | ||
2913 | static u32 kv_dpm_get_sclk(struct amdgpu_device *adev, bool low) | 2920 | static u32 kv_dpm_get_sclk(void *handle, bool low) |
2914 | { | 2921 | { |
2922 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
2915 | struct kv_power_info *pi = kv_get_pi(adev); | 2923 | struct kv_power_info *pi = kv_get_pi(adev); |
2916 | struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps); | 2924 | struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps); |
2917 | 2925 | ||
@@ -2921,18 +2929,20 @@ static u32 kv_dpm_get_sclk(struct amdgpu_device *adev, bool low) | |||
2921 | return requested_state->levels[requested_state->num_levels - 1].sclk; | 2929 | return requested_state->levels[requested_state->num_levels - 1].sclk; |
2922 | } | 2930 | } |
2923 | 2931 | ||
2924 | static u32 kv_dpm_get_mclk(struct amdgpu_device *adev, bool low) | 2932 | static u32 kv_dpm_get_mclk(void *handle, bool low) |
2925 | { | 2933 | { |
2934 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
2926 | struct kv_power_info *pi = kv_get_pi(adev); | 2935 | struct kv_power_info *pi = kv_get_pi(adev); |
2927 | 2936 | ||
2928 | return pi->sys_info.bootup_uma_clk; | 2937 | return pi->sys_info.bootup_uma_clk; |
2929 | } | 2938 | } |
2930 | 2939 | ||
2931 | /* get temperature in millidegrees */ | 2940 | /* get temperature in millidegrees */ |
2932 | static int kv_dpm_get_temp(struct amdgpu_device *adev) | 2941 | static int kv_dpm_get_temp(void *handle) |
2933 | { | 2942 | { |
2934 | u32 temp; | 2943 | u32 temp; |
2935 | int actual_temp = 0; | 2944 | int actual_temp = 0; |
2945 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
2936 | 2946 | ||
2937 | temp = RREG32_SMC(0xC0300E0C); | 2947 | temp = RREG32_SMC(0xC0300E0C); |
2938 | 2948 | ||
@@ -2950,7 +2960,6 @@ static int kv_dpm_early_init(void *handle) | |||
2950 | { | 2960 | { |
2951 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 2961 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
2952 | 2962 | ||
2953 | kv_dpm_set_dpm_funcs(adev); | ||
2954 | kv_dpm_set_irq_funcs(adev); | 2963 | kv_dpm_set_irq_funcs(adev); |
2955 | 2964 | ||
2956 | return 0; | 2965 | return 0; |
@@ -3222,14 +3231,17 @@ static inline bool kv_are_power_levels_equal(const struct kv_pl *kv_cpl1, | |||
3222 | (kv_cpl1->force_nbp_state == kv_cpl2->force_nbp_state)); | 3231 | (kv_cpl1->force_nbp_state == kv_cpl2->force_nbp_state)); |
3223 | } | 3232 | } |
3224 | 3233 | ||
3225 | static int kv_check_state_equal(struct amdgpu_device *adev, | 3234 | static int kv_check_state_equal(void *handle, |
3226 | struct amdgpu_ps *cps, | 3235 | void *current_ps, |
3227 | struct amdgpu_ps *rps, | 3236 | void *request_ps, |
3228 | bool *equal) | 3237 | bool *equal) |
3229 | { | 3238 | { |
3230 | struct kv_ps *kv_cps; | 3239 | struct kv_ps *kv_cps; |
3231 | struct kv_ps *kv_rps; | 3240 | struct kv_ps *kv_rps; |
3232 | int i; | 3241 | int i; |
3242 | struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps; | ||
3243 | struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps; | ||
3244 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
3233 | 3245 | ||
3234 | if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) | 3246 | if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) |
3235 | return -EINVAL; | 3247 | return -EINVAL; |
@@ -3262,9 +3274,10 @@ static int kv_check_state_equal(struct amdgpu_device *adev, | |||
3262 | return 0; | 3274 | return 0; |
3263 | } | 3275 | } |
3264 | 3276 | ||
3265 | static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx, | 3277 | static int kv_dpm_read_sensor(void *handle, int idx, |
3266 | void *value, int *size) | 3278 | void *value, int *size) |
3267 | { | 3279 | { |
3280 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
3268 | struct kv_power_info *pi = kv_get_pi(adev); | 3281 | struct kv_power_info *pi = kv_get_pi(adev); |
3269 | uint32_t sclk; | 3282 | uint32_t sclk; |
3270 | u32 pl_index = | 3283 | u32 pl_index = |
@@ -3312,7 +3325,7 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = { | |||
3312 | .set_powergating_state = kv_dpm_set_powergating_state, | 3325 | .set_powergating_state = kv_dpm_set_powergating_state, |
3313 | }; | 3326 | }; |
3314 | 3327 | ||
3315 | static const struct amdgpu_dpm_funcs kv_dpm_funcs = { | 3328 | const struct amd_pm_funcs kv_dpm_funcs = { |
3316 | .get_temperature = &kv_dpm_get_temp, | 3329 | .get_temperature = &kv_dpm_get_temp, |
3317 | .pre_set_power_state = &kv_dpm_pre_set_power_state, | 3330 | .pre_set_power_state = &kv_dpm_pre_set_power_state, |
3318 | .set_power_state = &kv_dpm_set_power_state, | 3331 | .set_power_state = &kv_dpm_set_power_state, |
@@ -3330,12 +3343,6 @@ static const struct amdgpu_dpm_funcs kv_dpm_funcs = { | |||
3330 | .read_sensor = &kv_dpm_read_sensor, | 3343 | .read_sensor = &kv_dpm_read_sensor, |
3331 | }; | 3344 | }; |
3332 | 3345 | ||
3333 | static void kv_dpm_set_dpm_funcs(struct amdgpu_device *adev) | ||
3334 | { | ||
3335 | if (adev->pm.funcs == NULL) | ||
3336 | adev->pm.funcs = &kv_dpm_funcs; | ||
3337 | } | ||
3338 | |||
3339 | static const struct amdgpu_irq_src_funcs kv_dpm_irq_funcs = { | 3346 | static const struct amdgpu_irq_src_funcs kv_dpm_irq_funcs = { |
3340 | .set = kv_dpm_set_interrupt_state, | 3347 | .set = kv_dpm_set_interrupt_state, |
3341 | .process = kv_dpm_process_interrupt, | 3348 | .process = kv_dpm_process_interrupt, |
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index ad8def3cc343..cc21c4bdec27 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | |||
@@ -158,8 +158,8 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) | |||
158 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); | 158 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); |
159 | 159 | ||
160 | tmp = mmVM_L2_CNTL3_DEFAULT; | 160 | tmp = mmVM_L2_CNTL3_DEFAULT; |
161 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); | 161 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); |
162 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); | 162 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); |
163 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); | 163 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); |
164 | 164 | ||
165 | tmp = mmVM_L2_CNTL4_DEFAULT; | 165 | tmp = mmVM_L2_CNTL4_DEFAULT; |
@@ -273,7 +273,7 @@ static const struct pctl_data pctl0_data[] = { | |||
273 | {0x135, 0x12a810}, | 273 | {0x135, 0x12a810}, |
274 | {0x149, 0x7a82c} | 274 | {0x149, 0x7a82c} |
275 | }; | 275 | }; |
276 | #define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0])) | 276 | #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data)) |
277 | 277 | ||
278 | #define PCTL0_RENG_EXEC_END_PTR 0x151 | 278 | #define PCTL0_RENG_EXEC_END_PTR 0x151 |
279 | #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 | 279 | #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 |
@@ -309,7 +309,7 @@ static const struct pctl_data pctl1_data[] = { | |||
309 | {0x1f0, 0x5000a7f6}, | 309 | {0x1f0, 0x5000a7f6}, |
310 | {0x1f1, 0x5000a7e4} | 310 | {0x1f1, 0x5000a7e4} |
311 | }; | 311 | }; |
312 | #define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0])) | 312 | #define PCTL1_DATA_LEN (ARRAY_SIZE(pctl1_data)) |
313 | 313 | ||
314 | #define PCTL1_RENG_EXEC_END_PTR 0x1f1 | 314 | #define PCTL1_RENG_EXEC_END_PTR 0x1f1 |
315 | #define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 | 315 | #define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 |
@@ -561,6 +561,13 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) | |||
561 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 561 | WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
562 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | 562 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, |
563 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); | 563 | EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); |
564 | if (!value) { | ||
565 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | ||
566 | CRASH_ON_NO_RETRY_FAULT, 1); | ||
567 | tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, | ||
568 | CRASH_ON_RETRY_FAULT, 1); | ||
569 | } | ||
570 | |||
564 | WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); | 571 | WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); |
565 | } | 572 | } |
566 | 573 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 1e91b9a1c591..67e78576a9eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | |||
@@ -24,7 +24,7 @@ | |||
24 | #ifndef __MXGPU_AI_H__ | 24 | #ifndef __MXGPU_AI_H__ |
25 | #define __MXGPU_AI_H__ | 25 | #define __MXGPU_AI_H__ |
26 | 26 | ||
27 | #define AI_MAILBOX_TIMEDOUT 5000 | 27 | #define AI_MAILBOX_TIMEDOUT 12000 |
28 | 28 | ||
29 | enum idh_request { | 29 | enum idh_request { |
30 | IDH_REQ_GPU_INIT_ACCESS = 1, | 30 | IDH_REQ_GPU_INIT_ACCESS = 1, |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h index c791d73d2d54..f13dc6cc158f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h | |||
@@ -23,7 +23,7 @@ | |||
23 | #ifndef __MXGPU_VI_H__ | 23 | #ifndef __MXGPU_VI_H__ |
24 | #define __MXGPU_VI_H__ | 24 | #define __MXGPU_VI_H__ |
25 | 25 | ||
26 | #define VI_MAILBOX_TIMEDOUT 5000 | 26 | #define VI_MAILBOX_TIMEDOUT 12000 |
27 | #define VI_MAILBOX_RESET_TIME 12 | 27 | #define VI_MAILBOX_RESET_TIME 12 |
28 | 28 | ||
29 | /* VI mailbox messages request */ | 29 | /* VI mailbox messages request */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index f7cf994b1da2..dea7c909ca5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include "raven1/GC/gc_9_1_offset.h" | 35 | #include "raven1/GC/gc_9_1_offset.h" |
36 | #include "raven1/SDMA0/sdma0_4_1_offset.h" | 36 | #include "raven1/SDMA0/sdma0_4_1_offset.h" |
37 | 37 | ||
38 | MODULE_FIRMWARE("amdgpu/raven_asd.bin"); | ||
39 | |||
38 | static int | 40 | static int |
39 | psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) | 41 | psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) |
40 | { | 42 | { |
@@ -136,15 +138,13 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm | |||
136 | { | 138 | { |
137 | int ret; | 139 | int ret; |
138 | uint64_t fw_mem_mc_addr = ucode->mc_addr; | 140 | uint64_t fw_mem_mc_addr = ucode->mc_addr; |
139 | struct common_firmware_header *header; | ||
140 | 141 | ||
141 | memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); | 142 | memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); |
142 | header = (struct common_firmware_header *)ucode->fw; | ||
143 | 143 | ||
144 | cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; | 144 | cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; |
145 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); | 145 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); |
146 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); | 146 | cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); |
147 | cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes); | 147 | cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; |
148 | 148 | ||
149 | ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); | 149 | ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); |
150 | if (ret) | 150 | if (ret) |
@@ -209,7 +209,7 @@ int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) | |||
209 | return ret; | 209 | return ret; |
210 | } | 210 | } |
211 | 211 | ||
212 | int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) | 212 | int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) |
213 | { | 213 | { |
214 | int ret = 0; | 214 | int ret = 0; |
215 | struct psp_ring *ring; | 215 | struct psp_ring *ring; |
@@ -229,6 +229,19 @@ int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type | |||
229 | ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), | 229 | ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), |
230 | 0x80000000, 0x80000000, false); | 230 | 0x80000000, 0x80000000, false); |
231 | 231 | ||
232 | return ret; | ||
233 | } | ||
234 | |||
235 | int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) | ||
236 | { | ||
237 | int ret = 0; | ||
238 | struct psp_ring *ring = &psp->km_ring; | ||
239 | struct amdgpu_device *adev = psp->adev; | ||
240 | |||
241 | ret = psp_v10_0_ring_stop(psp, ring_type); | ||
242 | if (ret) | ||
243 | DRM_ERROR("Fail to stop psp ring\n"); | ||
244 | |||
232 | amdgpu_bo_free_kernel(&adev->firmware.rbuf, | 245 | amdgpu_bo_free_kernel(&adev->firmware.rbuf, |
233 | &ring->ring_mem_mc_addr, | 246 | &ring->ring_mem_mc_addr, |
234 | (void **)&ring->ring_mem); | 247 | (void **)&ring->ring_mem); |
@@ -245,15 +258,20 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, | |||
245 | struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; | 258 | struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; |
246 | struct psp_ring *ring = &psp->km_ring; | 259 | struct psp_ring *ring = &psp->km_ring; |
247 | struct amdgpu_device *adev = psp->adev; | 260 | struct amdgpu_device *adev = psp->adev; |
261 | uint32_t ring_size_dw = ring->ring_size / 4; | ||
262 | uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; | ||
248 | 263 | ||
249 | /* KM (GPCOM) prepare write pointer */ | 264 | /* KM (GPCOM) prepare write pointer */ |
250 | psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); | 265 | psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); |
251 | 266 | ||
252 | /* Update KM RB frame pointer to new frame */ | 267 | /* Update KM RB frame pointer to new frame */ |
253 | if ((psp_write_ptr_reg % ring->ring_size) == 0) | 268 | if ((psp_write_ptr_reg % ring_size_dw) == 0) |
254 | write_frame = ring->ring_mem; | 269 | write_frame = ring->ring_mem; |
255 | else | 270 | else |
256 | write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4)); | 271 | write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); |
272 | |||
273 | /* Initialize KM RB frame */ | ||
274 | memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); | ||
257 | 275 | ||
258 | /* Update KM RB frame */ | 276 | /* Update KM RB frame */ |
259 | write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); | 277 | write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); |
@@ -263,8 +281,7 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, | |||
263 | write_frame->fence_value = index; | 281 | write_frame->fence_value = index; |
264 | 282 | ||
265 | /* Update the write Pointer in DWORDs */ | 283 | /* Update the write Pointer in DWORDs */ |
266 | psp_write_ptr_reg += sizeof(struct psp_gfx_rb_frame) / 4; | 284 | psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; |
267 | psp_write_ptr_reg = (psp_write_ptr_reg >= ring->ring_size) ? 0 : psp_write_ptr_reg; | ||
268 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); | 285 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); |
269 | 286 | ||
270 | return 0; | 287 | return 0; |
@@ -390,3 +407,10 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp, | |||
390 | 407 | ||
391 | return true; | 408 | return true; |
392 | } | 409 | } |
410 | |||
411 | |||
412 | int psp_v10_0_mode1_reset(struct psp_context *psp) | ||
413 | { | ||
414 | DRM_INFO("psp mode 1 reset not supported now! \n"); | ||
415 | return -EINVAL; | ||
416 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h index e76cde2f01f9..451e8308303f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h | |||
@@ -34,6 +34,8 @@ extern int psp_v10_0_ring_init(struct psp_context *psp, | |||
34 | enum psp_ring_type ring_type); | 34 | enum psp_ring_type ring_type); |
35 | extern int psp_v10_0_ring_create(struct psp_context *psp, | 35 | extern int psp_v10_0_ring_create(struct psp_context *psp, |
36 | enum psp_ring_type ring_type); | 36 | enum psp_ring_type ring_type); |
37 | extern int psp_v10_0_ring_stop(struct psp_context *psp, | ||
38 | enum psp_ring_type ring_type); | ||
37 | extern int psp_v10_0_ring_destroy(struct psp_context *psp, | 39 | extern int psp_v10_0_ring_destroy(struct psp_context *psp, |
38 | enum psp_ring_type ring_type); | 40 | enum psp_ring_type ring_type); |
39 | extern int psp_v10_0_cmd_submit(struct psp_context *psp, | 41 | extern int psp_v10_0_cmd_submit(struct psp_context *psp, |
@@ -43,4 +45,6 @@ extern int psp_v10_0_cmd_submit(struct psp_context *psp, | |||
43 | extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, | 45 | extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, |
44 | struct amdgpu_firmware_info *ucode, | 46 | struct amdgpu_firmware_info *ucode, |
45 | enum AMDGPU_UCODE_ID ucode_type); | 47 | enum AMDGPU_UCODE_ID ucode_type); |
48 | |||
49 | extern int psp_v10_0_mode1_reset(struct psp_context *psp); | ||
46 | #endif | 50 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 2a535a4b8d5b..cee5c396b277 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | |||
@@ -319,7 +319,7 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) | |||
319 | return ret; | 319 | return ret; |
320 | } | 320 | } |
321 | 321 | ||
322 | int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) | 322 | int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) |
323 | { | 323 | { |
324 | int ret = 0; | 324 | int ret = 0; |
325 | struct psp_ring *ring; | 325 | struct psp_ring *ring; |
@@ -339,6 +339,19 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) | |||
339 | ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), | 339 | ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), |
340 | 0x80000000, 0x80000000, false); | 340 | 0x80000000, 0x80000000, false); |
341 | 341 | ||
342 | return ret; | ||
343 | } | ||
344 | |||
345 | int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) | ||
346 | { | ||
347 | int ret = 0; | ||
348 | struct psp_ring *ring = &psp->km_ring; | ||
349 | struct amdgpu_device *adev = psp->adev; | ||
350 | |||
351 | ret = psp_v3_1_ring_stop(psp, ring_type); | ||
352 | if (ret) | ||
353 | DRM_ERROR("Fail to stop psp ring\n"); | ||
354 | |||
342 | amdgpu_bo_free_kernel(&adev->firmware.rbuf, | 355 | amdgpu_bo_free_kernel(&adev->firmware.rbuf, |
343 | &ring->ring_mem_mc_addr, | 356 | &ring->ring_mem_mc_addr, |
344 | (void **)&ring->ring_mem); | 357 | (void **)&ring->ring_mem); |
@@ -517,3 +530,37 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) | |||
517 | reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2); | 530 | reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2); |
518 | return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; | 531 | return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; |
519 | } | 532 | } |
533 | |||
534 | int psp_v3_1_mode1_reset(struct psp_context *psp) | ||
535 | { | ||
536 | int ret; | ||
537 | uint32_t offset; | ||
538 | struct amdgpu_device *adev = psp->adev; | ||
539 | |||
540 | offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); | ||
541 | |||
542 | ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); | ||
543 | |||
544 | if (ret) { | ||
545 | DRM_INFO("psp is not working correctly before mode1 reset!\n"); | ||
546 | return -EINVAL; | ||
547 | } | ||
548 | |||
549 | /*send the mode 1 reset command*/ | ||
550 | WREG32(offset, 0x70000); | ||
551 | |||
552 | mdelay(1000); | ||
553 | |||
554 | offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); | ||
555 | |||
556 | ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); | ||
557 | |||
558 | if (ret) { | ||
559 | DRM_INFO("psp mode 1 reset failed!\n"); | ||
560 | return -EINVAL; | ||
561 | } | ||
562 | |||
563 | DRM_INFO("psp mode1 reset succeed \n"); | ||
564 | |||
565 | return 0; | ||
566 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h index 9dcd0b25c4c6..b05dbada7751 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h | |||
@@ -41,6 +41,8 @@ extern int psp_v3_1_ring_init(struct psp_context *psp, | |||
41 | enum psp_ring_type ring_type); | 41 | enum psp_ring_type ring_type); |
42 | extern int psp_v3_1_ring_create(struct psp_context *psp, | 42 | extern int psp_v3_1_ring_create(struct psp_context *psp, |
43 | enum psp_ring_type ring_type); | 43 | enum psp_ring_type ring_type); |
44 | extern int psp_v3_1_ring_stop(struct psp_context *psp, | ||
45 | enum psp_ring_type ring_type); | ||
44 | extern int psp_v3_1_ring_destroy(struct psp_context *psp, | 46 | extern int psp_v3_1_ring_destroy(struct psp_context *psp, |
45 | enum psp_ring_type ring_type); | 47 | enum psp_ring_type ring_type); |
46 | extern int psp_v3_1_cmd_submit(struct psp_context *psp, | 48 | extern int psp_v3_1_cmd_submit(struct psp_context *psp, |
@@ -51,4 +53,5 @@ extern bool psp_v3_1_compare_sram_data(struct psp_context *psp, | |||
51 | struct amdgpu_firmware_info *ucode, | 53 | struct amdgpu_firmware_info *ucode, |
52 | enum AMDGPU_UCODE_ID ucode_type); | 54 | enum AMDGPU_UCODE_ID ucode_type); |
53 | extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp); | 55 | extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp); |
56 | extern int psp_v3_1_mode1_reset(struct psp_context *psp); | ||
54 | #endif | 57 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index f2d0710258cb..acdee3a4602c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | |||
@@ -1324,8 +1324,13 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) | |||
1324 | } | 1324 | } |
1325 | 1325 | ||
1326 | static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { | 1326 | static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { |
1327 | .copy_pte_num_dw = 7, | ||
1327 | .copy_pte = sdma_v2_4_vm_copy_pte, | 1328 | .copy_pte = sdma_v2_4_vm_copy_pte, |
1329 | |||
1328 | .write_pte = sdma_v2_4_vm_write_pte, | 1330 | .write_pte = sdma_v2_4_vm_write_pte, |
1331 | |||
1332 | .set_max_nums_pte_pde = 0x1fffff >> 3, | ||
1333 | .set_pte_pde_num_dw = 10, | ||
1329 | .set_pte_pde = sdma_v2_4_vm_set_pte_pde, | 1334 | .set_pte_pde = sdma_v2_4_vm_set_pte_pde, |
1330 | }; | 1335 | }; |
1331 | 1336 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index b1de44f22824..72f31cc7df00 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
@@ -379,8 +379,10 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
379 | struct amdgpu_device *adev = ring->adev; | 379 | struct amdgpu_device *adev = ring->adev; |
380 | 380 | ||
381 | if (ring->use_doorbell) { | 381 | if (ring->use_doorbell) { |
382 | u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; | ||
383 | |||
382 | /* XXX check if swapping is necessary on BE */ | 384 | /* XXX check if swapping is necessary on BE */ |
383 | adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr) << 2; | 385 | WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); |
384 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); | 386 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); |
385 | } else { | 387 | } else { |
386 | int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; | 388 | int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; |
@@ -641,10 +643,11 @@ static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable) | |||
641 | static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | 643 | static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) |
642 | { | 644 | { |
643 | struct amdgpu_ring *ring; | 645 | struct amdgpu_ring *ring; |
644 | u32 rb_cntl, ib_cntl; | 646 | u32 rb_cntl, ib_cntl, wptr_poll_cntl; |
645 | u32 rb_bufsz; | 647 | u32 rb_bufsz; |
646 | u32 wb_offset; | 648 | u32 wb_offset; |
647 | u32 doorbell; | 649 | u32 doorbell; |
650 | u64 wptr_gpu_addr; | ||
648 | int i, j, r; | 651 | int i, j, r; |
649 | 652 | ||
650 | for (i = 0; i < adev->sdma.num_instances; i++) { | 653 | for (i = 0; i < adev->sdma.num_instances; i++) { |
@@ -707,6 +710,20 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
707 | } | 710 | } |
708 | WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell); | 711 | WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell); |
709 | 712 | ||
713 | /* setup the wptr shadow polling */ | ||
714 | wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
715 | |||
716 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO + sdma_offsets[i], | ||
717 | lower_32_bits(wptr_gpu_addr)); | ||
718 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], | ||
719 | upper_32_bits(wptr_gpu_addr)); | ||
720 | wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); | ||
721 | if (amdgpu_sriov_vf(adev)) | ||
722 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); | ||
723 | else | ||
724 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); | ||
725 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); | ||
726 | |||
710 | /* enable DMA RB */ | 727 | /* enable DMA RB */ |
711 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); | 728 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); |
712 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); | 729 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); |
@@ -1713,11 +1730,11 @@ static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib, | |||
1713 | } | 1730 | } |
1714 | 1731 | ||
1715 | static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { | 1732 | static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { |
1716 | .copy_max_bytes = 0x1fffff, | 1733 | .copy_max_bytes = 0x3fffe0, /* not 0x3fffff due to HW limitation */ |
1717 | .copy_num_dw = 7, | 1734 | .copy_num_dw = 7, |
1718 | .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, | 1735 | .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, |
1719 | 1736 | ||
1720 | .fill_max_bytes = 0x1fffff, | 1737 | .fill_max_bytes = 0x3fffe0, /* not 0x3fffff due to HW limitation */ |
1721 | .fill_num_dw = 5, | 1738 | .fill_num_dw = 5, |
1722 | .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, | 1739 | .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, |
1723 | }; | 1740 | }; |
@@ -1731,8 +1748,14 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) | |||
1731 | } | 1748 | } |
1732 | 1749 | ||
1733 | static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { | 1750 | static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { |
1751 | .copy_pte_num_dw = 7, | ||
1734 | .copy_pte = sdma_v3_0_vm_copy_pte, | 1752 | .copy_pte = sdma_v3_0_vm_copy_pte, |
1753 | |||
1735 | .write_pte = sdma_v3_0_vm_write_pte, | 1754 | .write_pte = sdma_v3_0_vm_write_pte, |
1755 | |||
1756 | /* not 0x3fffff due to HW limitation */ | ||
1757 | .set_max_nums_pte_pde = 0x3fffe0 >> 3, | ||
1758 | .set_pte_pde_num_dw = 10, | ||
1736 | .set_pte_pde = sdma_v3_0_vm_set_pte_pde, | 1759 | .set_pte_pde = sdma_v3_0_vm_set_pte_pde, |
1737 | }; | 1760 | }; |
1738 | 1761 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index fd7c72aaafa6..c26d205ff3bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | |||
@@ -54,7 +54,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); | |||
54 | static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); | 54 | static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); |
55 | 55 | ||
56 | static const u32 golden_settings_sdma_4[] = { | 56 | static const u32 golden_settings_sdma_4[] = { |
57 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, | 57 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, |
58 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, | 58 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, |
59 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, | 59 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, |
60 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, | 60 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, |
@@ -89,7 +89,7 @@ static const u32 golden_settings_sdma_vg10[] = { | |||
89 | 89 | ||
90 | static const u32 golden_settings_sdma_4_1[] = | 90 | static const u32 golden_settings_sdma_4_1[] = |
91 | { | 91 | { |
92 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, | 92 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, |
93 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, | 93 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, |
94 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, | 94 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, |
95 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, | 95 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, |
@@ -398,7 +398,7 @@ static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) | |||
398 | { | 398 | { |
399 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | | 399 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | |
400 | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); | 400 | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); |
401 | amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0)); | 401 | amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE)); |
402 | amdgpu_ring_write(ring, 1); | 402 | amdgpu_ring_write(ring, 1); |
403 | } | 403 | } |
404 | 404 | ||
@@ -1264,6 +1264,11 @@ static int sdma_v4_0_sw_fini(void *handle) | |||
1264 | for (i = 0; i < adev->sdma.num_instances; i++) | 1264 | for (i = 0; i < adev->sdma.num_instances; i++) |
1265 | amdgpu_ring_fini(&adev->sdma.instance[i].ring); | 1265 | amdgpu_ring_fini(&adev->sdma.instance[i].ring); |
1266 | 1266 | ||
1267 | for (i = 0; i < adev->sdma.num_instances; i++) { | ||
1268 | release_firmware(adev->sdma.instance[i].fw); | ||
1269 | adev->sdma.instance[i].fw = NULL; | ||
1270 | } | ||
1271 | |||
1267 | return 0; | 1272 | return 0; |
1268 | } | 1273 | } |
1269 | 1274 | ||
@@ -1714,8 +1719,13 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) | |||
1714 | } | 1719 | } |
1715 | 1720 | ||
1716 | static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { | 1721 | static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { |
1722 | .copy_pte_num_dw = 7, | ||
1717 | .copy_pte = sdma_v4_0_vm_copy_pte, | 1723 | .copy_pte = sdma_v4_0_vm_copy_pte, |
1724 | |||
1718 | .write_pte = sdma_v4_0_vm_write_pte, | 1725 | .write_pte = sdma_v4_0_vm_write_pte, |
1726 | |||
1727 | .set_max_nums_pte_pde = 0x400000 >> 3, | ||
1728 | .set_pte_pde_num_dw = 10, | ||
1719 | .set_pte_pde = sdma_v4_0_vm_set_pte_pde, | 1729 | .set_pte_pde = sdma_v4_0_vm_set_pte_pde, |
1720 | }; | 1730 | }; |
1721 | 1731 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 112969f3301a..3fa2fbf8c9a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c | |||
@@ -887,8 +887,13 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) | |||
887 | } | 887 | } |
888 | 888 | ||
889 | static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { | 889 | static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { |
890 | .copy_pte_num_dw = 5, | ||
890 | .copy_pte = si_dma_vm_copy_pte, | 891 | .copy_pte = si_dma_vm_copy_pte, |
892 | |||
891 | .write_pte = si_dma_vm_write_pte, | 893 | .write_pte = si_dma_vm_write_pte, |
894 | |||
895 | .set_max_nums_pte_pde = 0xffff8 >> 3, | ||
896 | .set_pte_pde_num_dw = 9, | ||
892 | .set_pte_pde = si_dma_vm_set_pte_pde, | 897 | .set_pte_pde = si_dma_vm_set_pte_pde, |
893 | }; | 898 | }; |
894 | 899 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index d63873f3f574..9b8db6046271 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c | |||
@@ -1847,7 +1847,6 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev, | |||
1847 | 1847 | ||
1848 | static void si_thermal_start_smc_fan_control(struct amdgpu_device *adev); | 1848 | static void si_thermal_start_smc_fan_control(struct amdgpu_device *adev); |
1849 | static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev); | 1849 | static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev); |
1850 | static void si_dpm_set_dpm_funcs(struct amdgpu_device *adev); | ||
1851 | static void si_dpm_set_irq_funcs(struct amdgpu_device *adev); | 1850 | static void si_dpm_set_irq_funcs(struct amdgpu_device *adev); |
1852 | 1851 | ||
1853 | static struct si_power_info *si_get_pi(struct amdgpu_device *adev) | 1852 | static struct si_power_info *si_get_pi(struct amdgpu_device *adev) |
@@ -3060,9 +3059,9 @@ static int si_get_vce_clock_voltage(struct amdgpu_device *adev, | |||
3060 | return ret; | 3059 | return ret; |
3061 | } | 3060 | } |
3062 | 3061 | ||
3063 | static bool si_dpm_vblank_too_short(struct amdgpu_device *adev) | 3062 | static bool si_dpm_vblank_too_short(void *handle) |
3064 | { | 3063 | { |
3065 | 3064 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
3066 | u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); | 3065 | u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); |
3067 | /* we never hit the non-gddr5 limit so disable it */ | 3066 | /* we never hit the non-gddr5 limit so disable it */ |
3068 | u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; | 3067 | u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; |
@@ -3871,9 +3870,10 @@ static int si_restrict_performance_levels_before_switch(struct amdgpu_device *ad | |||
3871 | 0 : -EINVAL; | 3870 | 0 : -EINVAL; |
3872 | } | 3871 | } |
3873 | 3872 | ||
3874 | static int si_dpm_force_performance_level(struct amdgpu_device *adev, | 3873 | static int si_dpm_force_performance_level(void *handle, |
3875 | enum amd_dpm_forced_level level) | 3874 | enum amd_dpm_forced_level level) |
3876 | { | 3875 | { |
3876 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
3877 | struct amdgpu_ps *rps = adev->pm.dpm.current_ps; | 3877 | struct amdgpu_ps *rps = adev->pm.dpm.current_ps; |
3878 | struct si_ps *ps = si_get_ps(rps); | 3878 | struct si_ps *ps = si_get_ps(rps); |
3879 | u32 levels = ps->performance_level_count; | 3879 | u32 levels = ps->performance_level_count; |
@@ -6575,11 +6575,12 @@ static int si_fan_ctrl_stop_smc_fan_control(struct amdgpu_device *adev) | |||
6575 | } | 6575 | } |
6576 | } | 6576 | } |
6577 | 6577 | ||
6578 | static int si_dpm_get_fan_speed_percent(struct amdgpu_device *adev, | 6578 | static int si_dpm_get_fan_speed_percent(void *handle, |
6579 | u32 *speed) | 6579 | u32 *speed) |
6580 | { | 6580 | { |
6581 | u32 duty, duty100; | 6581 | u32 duty, duty100; |
6582 | u64 tmp64; | 6582 | u64 tmp64; |
6583 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6583 | 6584 | ||
6584 | if (adev->pm.no_fan) | 6585 | if (adev->pm.no_fan) |
6585 | return -ENOENT; | 6586 | return -ENOENT; |
@@ -6600,9 +6601,10 @@ static int si_dpm_get_fan_speed_percent(struct amdgpu_device *adev, | |||
6600 | return 0; | 6601 | return 0; |
6601 | } | 6602 | } |
6602 | 6603 | ||
6603 | static int si_dpm_set_fan_speed_percent(struct amdgpu_device *adev, | 6604 | static int si_dpm_set_fan_speed_percent(void *handle, |
6604 | u32 speed) | 6605 | u32 speed) |
6605 | { | 6606 | { |
6607 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6606 | struct si_power_info *si_pi = si_get_pi(adev); | 6608 | struct si_power_info *si_pi = si_get_pi(adev); |
6607 | u32 tmp; | 6609 | u32 tmp; |
6608 | u32 duty, duty100; | 6610 | u32 duty, duty100; |
@@ -6633,8 +6635,10 @@ static int si_dpm_set_fan_speed_percent(struct amdgpu_device *adev, | |||
6633 | return 0; | 6635 | return 0; |
6634 | } | 6636 | } |
6635 | 6637 | ||
6636 | static void si_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) | 6638 | static void si_dpm_set_fan_control_mode(void *handle, u32 mode) |
6637 | { | 6639 | { |
6640 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6641 | |||
6638 | if (mode) { | 6642 | if (mode) { |
6639 | /* stop auto-manage */ | 6643 | /* stop auto-manage */ |
6640 | if (adev->pm.dpm.fan.ucode_fan_control) | 6644 | if (adev->pm.dpm.fan.ucode_fan_control) |
@@ -6649,8 +6653,9 @@ static void si_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) | |||
6649 | } | 6653 | } |
6650 | } | 6654 | } |
6651 | 6655 | ||
6652 | static u32 si_dpm_get_fan_control_mode(struct amdgpu_device *adev) | 6656 | static u32 si_dpm_get_fan_control_mode(void *handle) |
6653 | { | 6657 | { |
6658 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6654 | struct si_power_info *si_pi = si_get_pi(adev); | 6659 | struct si_power_info *si_pi = si_get_pi(adev); |
6655 | u32 tmp; | 6660 | u32 tmp; |
6656 | 6661 | ||
@@ -6946,8 +6951,9 @@ static void si_dpm_disable(struct amdgpu_device *adev) | |||
6946 | ni_update_current_ps(adev, boot_ps); | 6951 | ni_update_current_ps(adev, boot_ps); |
6947 | } | 6952 | } |
6948 | 6953 | ||
6949 | static int si_dpm_pre_set_power_state(struct amdgpu_device *adev) | 6954 | static int si_dpm_pre_set_power_state(void *handle) |
6950 | { | 6955 | { |
6956 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6951 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 6957 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
6952 | struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; | 6958 | struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; |
6953 | struct amdgpu_ps *new_ps = &requested_ps; | 6959 | struct amdgpu_ps *new_ps = &requested_ps; |
@@ -6984,8 +6990,9 @@ static int si_power_control_set_level(struct amdgpu_device *adev) | |||
6984 | return 0; | 6990 | return 0; |
6985 | } | 6991 | } |
6986 | 6992 | ||
6987 | static int si_dpm_set_power_state(struct amdgpu_device *adev) | 6993 | static int si_dpm_set_power_state(void *handle) |
6988 | { | 6994 | { |
6995 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
6989 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 6996 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
6990 | struct amdgpu_ps *new_ps = &eg_pi->requested_rps; | 6997 | struct amdgpu_ps *new_ps = &eg_pi->requested_rps; |
6991 | struct amdgpu_ps *old_ps = &eg_pi->current_rps; | 6998 | struct amdgpu_ps *old_ps = &eg_pi->current_rps; |
@@ -7086,8 +7093,9 @@ static int si_dpm_set_power_state(struct amdgpu_device *adev) | |||
7086 | return 0; | 7093 | return 0; |
7087 | } | 7094 | } |
7088 | 7095 | ||
7089 | static void si_dpm_post_set_power_state(struct amdgpu_device *adev) | 7096 | static void si_dpm_post_set_power_state(void *handle) |
7090 | { | 7097 | { |
7098 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7091 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 7099 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
7092 | struct amdgpu_ps *new_ps = &eg_pi->requested_rps; | 7100 | struct amdgpu_ps *new_ps = &eg_pi->requested_rps; |
7093 | 7101 | ||
@@ -7103,8 +7111,10 @@ void si_dpm_reset_asic(struct amdgpu_device *adev) | |||
7103 | } | 7111 | } |
7104 | #endif | 7112 | #endif |
7105 | 7113 | ||
7106 | static void si_dpm_display_configuration_changed(struct amdgpu_device *adev) | 7114 | static void si_dpm_display_configuration_changed(void *handle) |
7107 | { | 7115 | { |
7116 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7117 | |||
7108 | si_program_display_gap(adev); | 7118 | si_program_display_gap(adev); |
7109 | } | 7119 | } |
7110 | 7120 | ||
@@ -7486,9 +7496,10 @@ static void si_dpm_fini(struct amdgpu_device *adev) | |||
7486 | amdgpu_free_extended_power_table(adev); | 7496 | amdgpu_free_extended_power_table(adev); |
7487 | } | 7497 | } |
7488 | 7498 | ||
7489 | static void si_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, | 7499 | static void si_dpm_debugfs_print_current_performance_level(void *handle, |
7490 | struct seq_file *m) | 7500 | struct seq_file *m) |
7491 | { | 7501 | { |
7502 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7492 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 7503 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
7493 | struct amdgpu_ps *rps = &eg_pi->current_rps; | 7504 | struct amdgpu_ps *rps = &eg_pi->current_rps; |
7494 | struct si_ps *ps = si_get_ps(rps); | 7505 | struct si_ps *ps = si_get_ps(rps); |
@@ -7860,10 +7871,11 @@ static int si_dpm_set_powergating_state(void *handle, | |||
7860 | } | 7871 | } |
7861 | 7872 | ||
7862 | /* get temperature in millidegrees */ | 7873 | /* get temperature in millidegrees */ |
7863 | static int si_dpm_get_temp(struct amdgpu_device *adev) | 7874 | static int si_dpm_get_temp(void *handle) |
7864 | { | 7875 | { |
7865 | u32 temp; | 7876 | u32 temp; |
7866 | int actual_temp = 0; | 7877 | int actual_temp = 0; |
7878 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7867 | 7879 | ||
7868 | temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> | 7880 | temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> |
7869 | CTF_TEMP_SHIFT; | 7881 | CTF_TEMP_SHIFT; |
@@ -7878,8 +7890,9 @@ static int si_dpm_get_temp(struct amdgpu_device *adev) | |||
7878 | return actual_temp; | 7890 | return actual_temp; |
7879 | } | 7891 | } |
7880 | 7892 | ||
7881 | static u32 si_dpm_get_sclk(struct amdgpu_device *adev, bool low) | 7893 | static u32 si_dpm_get_sclk(void *handle, bool low) |
7882 | { | 7894 | { |
7895 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7883 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 7896 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
7884 | struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps); | 7897 | struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps); |
7885 | 7898 | ||
@@ -7889,8 +7902,9 @@ static u32 si_dpm_get_sclk(struct amdgpu_device *adev, bool low) | |||
7889 | return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; | 7902 | return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; |
7890 | } | 7903 | } |
7891 | 7904 | ||
7892 | static u32 si_dpm_get_mclk(struct amdgpu_device *adev, bool low) | 7905 | static u32 si_dpm_get_mclk(void *handle, bool low) |
7893 | { | 7906 | { |
7907 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7894 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 7908 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
7895 | struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps); | 7909 | struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps); |
7896 | 7910 | ||
@@ -7900,9 +7914,11 @@ static u32 si_dpm_get_mclk(struct amdgpu_device *adev, bool low) | |||
7900 | return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk; | 7914 | return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk; |
7901 | } | 7915 | } |
7902 | 7916 | ||
7903 | static void si_dpm_print_power_state(struct amdgpu_device *adev, | 7917 | static void si_dpm_print_power_state(void *handle, |
7904 | struct amdgpu_ps *rps) | 7918 | void *current_ps) |
7905 | { | 7919 | { |
7920 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7921 | struct amdgpu_ps *rps = (struct amdgpu_ps *)current_ps; | ||
7906 | struct si_ps *ps = si_get_ps(rps); | 7922 | struct si_ps *ps = si_get_ps(rps); |
7907 | struct rv7xx_pl *pl; | 7923 | struct rv7xx_pl *pl; |
7908 | int i; | 7924 | int i; |
@@ -7927,7 +7943,6 @@ static int si_dpm_early_init(void *handle) | |||
7927 | 7943 | ||
7928 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 7944 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
7929 | 7945 | ||
7930 | si_dpm_set_dpm_funcs(adev); | ||
7931 | si_dpm_set_irq_funcs(adev); | 7946 | si_dpm_set_irq_funcs(adev); |
7932 | return 0; | 7947 | return 0; |
7933 | } | 7948 | } |
@@ -7942,20 +7957,23 @@ static inline bool si_are_power_levels_equal(const struct rv7xx_pl *si_cpl1, | |||
7942 | (si_cpl1->vddci == si_cpl2->vddci)); | 7957 | (si_cpl1->vddci == si_cpl2->vddci)); |
7943 | } | 7958 | } |
7944 | 7959 | ||
7945 | static int si_check_state_equal(struct amdgpu_device *adev, | 7960 | static int si_check_state_equal(void *handle, |
7946 | struct amdgpu_ps *cps, | 7961 | void *current_ps, |
7947 | struct amdgpu_ps *rps, | 7962 | void *request_ps, |
7948 | bool *equal) | 7963 | bool *equal) |
7949 | { | 7964 | { |
7950 | struct si_ps *si_cps; | 7965 | struct si_ps *si_cps; |
7951 | struct si_ps *si_rps; | 7966 | struct si_ps *si_rps; |
7952 | int i; | 7967 | int i; |
7968 | struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps; | ||
7969 | struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps; | ||
7970 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7953 | 7971 | ||
7954 | if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) | 7972 | if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) |
7955 | return -EINVAL; | 7973 | return -EINVAL; |
7956 | 7974 | ||
7957 | si_cps = si_get_ps(cps); | 7975 | si_cps = si_get_ps((struct amdgpu_ps *)cps); |
7958 | si_rps = si_get_ps(rps); | 7976 | si_rps = si_get_ps((struct amdgpu_ps *)rps); |
7959 | 7977 | ||
7960 | if (si_cps == NULL) { | 7978 | if (si_cps == NULL) { |
7961 | printk("si_cps is NULL\n"); | 7979 | printk("si_cps is NULL\n"); |
@@ -7983,9 +8001,10 @@ static int si_check_state_equal(struct amdgpu_device *adev, | |||
7983 | return 0; | 8001 | return 0; |
7984 | } | 8002 | } |
7985 | 8003 | ||
7986 | static int si_dpm_read_sensor(struct amdgpu_device *adev, int idx, | 8004 | static int si_dpm_read_sensor(void *handle, int idx, |
7987 | void *value, int *size) | 8005 | void *value, int *size) |
7988 | { | 8006 | { |
8007 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
7989 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); | 8008 | struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); |
7990 | struct amdgpu_ps *rps = &eg_pi->current_rps; | 8009 | struct amdgpu_ps *rps = &eg_pi->current_rps; |
7991 | struct si_ps *ps = si_get_ps(rps); | 8010 | struct si_ps *ps = si_get_ps(rps); |
@@ -8041,7 +8060,7 @@ const struct amd_ip_funcs si_dpm_ip_funcs = { | |||
8041 | .set_powergating_state = si_dpm_set_powergating_state, | 8060 | .set_powergating_state = si_dpm_set_powergating_state, |
8042 | }; | 8061 | }; |
8043 | 8062 | ||
8044 | static const struct amdgpu_dpm_funcs si_dpm_funcs = { | 8063 | const struct amd_pm_funcs si_dpm_funcs = { |
8045 | .get_temperature = &si_dpm_get_temp, | 8064 | .get_temperature = &si_dpm_get_temp, |
8046 | .pre_set_power_state = &si_dpm_pre_set_power_state, | 8065 | .pre_set_power_state = &si_dpm_pre_set_power_state, |
8047 | .set_power_state = &si_dpm_set_power_state, | 8066 | .set_power_state = &si_dpm_set_power_state, |
@@ -8062,12 +8081,6 @@ static const struct amdgpu_dpm_funcs si_dpm_funcs = { | |||
8062 | .read_sensor = &si_dpm_read_sensor, | 8081 | .read_sensor = &si_dpm_read_sensor, |
8063 | }; | 8082 | }; |
8064 | 8083 | ||
8065 | static void si_dpm_set_dpm_funcs(struct amdgpu_device *adev) | ||
8066 | { | ||
8067 | if (adev->pm.funcs == NULL) | ||
8068 | adev->pm.funcs = &si_dpm_funcs; | ||
8069 | } | ||
8070 | |||
8071 | static const struct amdgpu_irq_src_funcs si_dpm_irq_funcs = { | 8084 | static const struct amdgpu_irq_src_funcs si_dpm_irq_funcs = { |
8072 | .set = si_dpm_set_interrupt_state, | 8085 | .set = si_dpm_set_interrupt_state, |
8073 | .process = si_dpm_process_interrupt, | 8086 | .process = si_dpm_process_interrupt, |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h index 51ce21c5f4fb..9fe343de3477 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h | |||
@@ -246,6 +246,7 @@ enum si_display_gap | |||
246 | }; | 246 | }; |
247 | 247 | ||
248 | extern const struct amd_ip_funcs si_dpm_ip_funcs; | 248 | extern const struct amd_ip_funcs si_dpm_ip_funcs; |
249 | extern const struct amd_pm_funcs si_dpm_funcs; | ||
249 | 250 | ||
250 | struct ni_leakage_coeffients | 251 | struct ni_leakage_coeffients |
251 | { | 252 | { |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index ce25e03a077d..d2c6b80309c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c | |||
@@ -118,6 +118,19 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev) | |||
118 | return (wptr & adev->irq.ih.ptr_mask); | 118 | return (wptr & adev->irq.ih.ptr_mask); |
119 | } | 119 | } |
120 | 120 | ||
121 | /** | ||
122 | * si_ih_prescreen_iv - prescreen an interrupt vector | ||
123 | * | ||
124 | * @adev: amdgpu_device pointer | ||
125 | * | ||
126 | * Returns true if the interrupt vector should be further processed. | ||
127 | */ | ||
128 | static bool si_ih_prescreen_iv(struct amdgpu_device *adev) | ||
129 | { | ||
130 | /* Process all interrupts */ | ||
131 | return true; | ||
132 | } | ||
133 | |||
121 | static void si_ih_decode_iv(struct amdgpu_device *adev, | 134 | static void si_ih_decode_iv(struct amdgpu_device *adev, |
122 | struct amdgpu_iv_entry *entry) | 135 | struct amdgpu_iv_entry *entry) |
123 | { | 136 | { |
@@ -288,6 +301,7 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { | |||
288 | 301 | ||
289 | static const struct amdgpu_ih_funcs si_ih_funcs = { | 302 | static const struct amdgpu_ih_funcs si_ih_funcs = { |
290 | .get_wptr = si_ih_get_wptr, | 303 | .get_wptr = si_ih_get_wptr, |
304 | .prescreen_iv = si_ih_prescreen_iv, | ||
291 | .decode_iv = si_ih_decode_iv, | 305 | .decode_iv = si_ih_decode_iv, |
292 | .set_rptr = si_ih_set_rptr | 306 | .set_rptr = si_ih_set_rptr |
293 | }; | 307 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f2c3a49f73a0..245a18aeb389 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c | |||
@@ -407,18 +407,27 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, | |||
407 | return -EINVAL; | 407 | return -EINVAL; |
408 | } | 408 | } |
409 | 409 | ||
410 | static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) | 410 | static int soc15_asic_reset(struct amdgpu_device *adev) |
411 | { | 411 | { |
412 | u32 i; | 412 | u32 i; |
413 | 413 | ||
414 | dev_info(adev->dev, "GPU pci config reset\n"); | 414 | amdgpu_atombios_scratch_regs_engine_hung(adev, true); |
415 | |||
416 | dev_info(adev->dev, "GPU reset\n"); | ||
415 | 417 | ||
416 | /* disable BM */ | 418 | /* disable BM */ |
417 | pci_clear_master(adev->pdev); | 419 | pci_clear_master(adev->pdev); |
418 | /* reset */ | ||
419 | amdgpu_pci_config_reset(adev); | ||
420 | 420 | ||
421 | udelay(100); | 421 | pci_save_state(adev->pdev); |
422 | |||
423 | for (i = 0; i < AMDGPU_MAX_IP_NUM; i++) { | ||
424 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP){ | ||
425 | adev->ip_blocks[i].version->funcs->soft_reset((void *)adev); | ||
426 | break; | ||
427 | } | ||
428 | } | ||
429 | |||
430 | pci_restore_state(adev->pdev); | ||
422 | 431 | ||
423 | /* wait for asic to come out of reset */ | 432 | /* wait for asic to come out of reset */ |
424 | for (i = 0; i < adev->usec_timeout; i++) { | 433 | for (i = 0; i < adev->usec_timeout; i++) { |
@@ -430,14 +439,6 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) | |||
430 | udelay(1); | 439 | udelay(1); |
431 | } | 440 | } |
432 | 441 | ||
433 | } | ||
434 | |||
435 | static int soc15_asic_reset(struct amdgpu_device *adev) | ||
436 | { | ||
437 | amdgpu_atombios_scratch_regs_engine_hung(adev, true); | ||
438 | |||
439 | soc15_gpu_pci_config_reset(adev); | ||
440 | |||
441 | amdgpu_atombios_scratch_regs_engine_hung(adev, false); | 442 | amdgpu_atombios_scratch_regs_engine_hung(adev, false); |
442 | 443 | ||
443 | return 0; | 444 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e79befd80eed..7f408f85fdb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h | |||
@@ -250,6 +250,7 @@ | |||
250 | #define PACKET3_SET_UCONFIG_REG 0x79 | 250 | #define PACKET3_SET_UCONFIG_REG 0x79 |
251 | #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 | 251 | #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 |
252 | #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 | 252 | #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 |
253 | #define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) | ||
253 | #define PACKET3_SCRATCH_RAM_WRITE 0x7D | 254 | #define PACKET3_SCRATCH_RAM_WRITE 0x7D |
254 | #define PACKET3_SCRATCH_RAM_READ 0x7E | 255 | #define PACKET3_SCRATCH_RAM_READ 0x7E |
255 | #define PACKET3_LOAD_CONST_RAM 0x80 | 256 | #define PACKET3_LOAD_CONST_RAM 0x80 |
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 923df2c0e535..5ed00692618e 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c | |||
@@ -219,6 +219,19 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) | |||
219 | } | 219 | } |
220 | 220 | ||
221 | /** | 221 | /** |
222 | * tonga_ih_prescreen_iv - prescreen an interrupt vector | ||
223 | * | ||
224 | * @adev: amdgpu_device pointer | ||
225 | * | ||
226 | * Returns true if the interrupt vector should be further processed. | ||
227 | */ | ||
228 | static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev) | ||
229 | { | ||
230 | /* Process all interrupts */ | ||
231 | return true; | ||
232 | } | ||
233 | |||
234 | /** | ||
222 | * tonga_ih_decode_iv - decode an interrupt vector | 235 | * tonga_ih_decode_iv - decode an interrupt vector |
223 | * | 236 | * |
224 | * @adev: amdgpu_device pointer | 237 | * @adev: amdgpu_device pointer |
@@ -478,6 +491,7 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { | |||
478 | 491 | ||
479 | static const struct amdgpu_ih_funcs tonga_ih_funcs = { | 492 | static const struct amdgpu_ih_funcs tonga_ih_funcs = { |
480 | .get_wptr = tonga_ih_get_wptr, | 493 | .get_wptr = tonga_ih_get_wptr, |
494 | .prescreen_iv = tonga_ih_prescreen_iv, | ||
481 | .decode_iv = tonga_ih_decode_iv, | 495 | .decode_iv = tonga_ih_decode_iv, |
482 | .set_rptr = tonga_ih_set_rptr | 496 | .set_rptr = tonga_ih_set_rptr |
483 | }; | 497 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 23a85750edd6..b8ed8faf2003 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | |||
@@ -1161,7 +1161,7 @@ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | |||
1161 | */ | 1161 | */ |
1162 | static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) | 1162 | static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) |
1163 | { | 1163 | { |
1164 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); | 1164 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); |
1165 | amdgpu_ring_write(ring, 1); | 1165 | amdgpu_ring_write(ring, 1); |
1166 | } | 1166 | } |
1167 | 1167 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 11134d5f7443..75745544600a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | |||
@@ -1011,10 +1011,6 @@ static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, | |||
1011 | { | 1011 | { |
1012 | DRM_DEBUG("IH: VCE\n"); | 1012 | DRM_DEBUG("IH: VCE\n"); |
1013 | 1013 | ||
1014 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), | ||
1015 | VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, | ||
1016 | ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); | ||
1017 | |||
1018 | switch (entry->src_data[0]) { | 1014 | switch (entry->src_data[0]) { |
1019 | case 0: | 1015 | case 0: |
1020 | case 1: | 1016 | case 1: |
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 21e7b88401e1..1eb4d79d6e30 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | |||
@@ -812,7 +812,7 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 | |||
812 | */ | 812 | */ |
813 | static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) | 813 | static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) |
814 | { | 814 | { |
815 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); | 815 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); |
816 | amdgpu_ring_write(ring, 1); | 816 | amdgpu_ring_write(ring, 1); |
817 | } | 817 | } |
818 | 818 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 56150e8d1ed2..a3b30d84dbb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c | |||
@@ -219,14 +219,92 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) | |||
219 | wptr, adev->irq.ih.rptr, tmp); | 219 | wptr, adev->irq.ih.rptr, tmp); |
220 | adev->irq.ih.rptr = tmp; | 220 | adev->irq.ih.rptr = tmp; |
221 | 221 | ||
222 | tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); | 222 | tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); |
223 | tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); | 223 | tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); |
224 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); | 224 | WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); |
225 | } | 225 | } |
226 | return (wptr & adev->irq.ih.ptr_mask); | 226 | return (wptr & adev->irq.ih.ptr_mask); |
227 | } | 227 | } |
228 | 228 | ||
229 | /** | 229 | /** |
230 | * vega10_ih_prescreen_iv - prescreen an interrupt vector | ||
231 | * | ||
232 | * @adev: amdgpu_device pointer | ||
233 | * | ||
234 | * Returns true if the interrupt vector should be further processed. | ||
235 | */ | ||
236 | static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) | ||
237 | { | ||
238 | u32 ring_index = adev->irq.ih.rptr >> 2; | ||
239 | u32 dw0, dw3, dw4, dw5; | ||
240 | u16 pasid; | ||
241 | u64 addr, key; | ||
242 | struct amdgpu_vm *vm; | ||
243 | int r; | ||
244 | |||
245 | dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); | ||
246 | dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); | ||
247 | dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); | ||
248 | dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); | ||
249 | |||
250 | /* Filter retry page faults, let only the first one pass. If | ||
251 | * there are too many outstanding faults, ignore them until | ||
252 | * some faults get cleared. | ||
253 | */ | ||
254 | switch (dw0 & 0xff) { | ||
255 | case AMDGPU_IH_CLIENTID_VMC: | ||
256 | case AMDGPU_IH_CLIENTID_UTCL2: | ||
257 | break; | ||
258 | default: | ||
259 | /* Not a VM fault */ | ||
260 | return true; | ||
261 | } | ||
262 | |||
263 | /* Not a retry fault */ | ||
264 | if (!(dw5 & 0x80)) | ||
265 | return true; | ||
266 | |||
267 | pasid = dw3 & 0xffff; | ||
268 | /* No PASID, can't identify faulting process */ | ||
269 | if (!pasid) | ||
270 | return true; | ||
271 | |||
272 | addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12); | ||
273 | key = AMDGPU_VM_FAULT(pasid, addr); | ||
274 | r = amdgpu_ih_add_fault(adev, key); | ||
275 | |||
276 | /* Hash table is full or the fault is already being processed, | ||
277 | * ignore further page faults | ||
278 | */ | ||
279 | if (r != 0) | ||
280 | goto ignore_iv; | ||
281 | |||
282 | /* Track retry faults in per-VM fault FIFO. */ | ||
283 | spin_lock(&adev->vm_manager.pasid_lock); | ||
284 | vm = idr_find(&adev->vm_manager.pasid_idr, pasid); | ||
285 | spin_unlock(&adev->vm_manager.pasid_lock); | ||
286 | if (WARN_ON_ONCE(!vm)) { | ||
287 | /* VM not found, process it normally */ | ||
288 | amdgpu_ih_clear_fault(adev, key); | ||
289 | return true; | ||
290 | } | ||
291 | /* No locking required with single writer and single reader */ | ||
292 | r = kfifo_put(&vm->faults, key); | ||
293 | if (!r) { | ||
294 | /* FIFO is full. Ignore it until there is space */ | ||
295 | amdgpu_ih_clear_fault(adev, key); | ||
296 | goto ignore_iv; | ||
297 | } | ||
298 | |||
299 | /* It's the first fault for this address, process it normally */ | ||
300 | return true; | ||
301 | |||
302 | ignore_iv: | ||
303 | adev->irq.ih.rptr += 32; | ||
304 | return false; | ||
305 | } | ||
306 | |||
307 | /** | ||
230 | * vega10_ih_decode_iv - decode an interrupt vector | 308 | * vega10_ih_decode_iv - decode an interrupt vector |
231 | * | 309 | * |
232 | * @adev: amdgpu_device pointer | 310 | * @adev: amdgpu_device pointer |
@@ -310,6 +388,14 @@ static int vega10_ih_sw_init(void *handle) | |||
310 | adev->irq.ih.use_doorbell = true; | 388 | adev->irq.ih.use_doorbell = true; |
311 | adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1; | 389 | adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1; |
312 | 390 | ||
391 | adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL); | ||
392 | if (!adev->irq.ih.faults) | ||
393 | return -ENOMEM; | ||
394 | INIT_CHASH_TABLE(adev->irq.ih.faults->hash, | ||
395 | AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); | ||
396 | spin_lock_init(&adev->irq.ih.faults->lock); | ||
397 | adev->irq.ih.faults->count = 0; | ||
398 | |||
313 | r = amdgpu_irq_init(adev); | 399 | r = amdgpu_irq_init(adev); |
314 | 400 | ||
315 | return r; | 401 | return r; |
@@ -322,6 +408,9 @@ static int vega10_ih_sw_fini(void *handle) | |||
322 | amdgpu_irq_fini(adev); | 408 | amdgpu_irq_fini(adev); |
323 | amdgpu_ih_ring_fini(adev); | 409 | amdgpu_ih_ring_fini(adev); |
324 | 410 | ||
411 | kfree(adev->irq.ih.faults); | ||
412 | adev->irq.ih.faults = NULL; | ||
413 | |||
325 | return 0; | 414 | return 0; |
326 | } | 415 | } |
327 | 416 | ||
@@ -410,6 +499,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { | |||
410 | 499 | ||
411 | static const struct amdgpu_ih_funcs vega10_ih_funcs = { | 500 | static const struct amdgpu_ih_funcs vega10_ih_funcs = { |
412 | .get_wptr = vega10_ih_get_wptr, | 501 | .get_wptr = vega10_ih_get_wptr, |
502 | .prescreen_iv = vega10_ih_prescreen_iv, | ||
413 | .decode_iv = vega10_ih_decode_iv, | 503 | .decode_iv = vega10_ih_decode_iv, |
414 | .set_rptr = vega10_ih_set_rptr | 504 | .set_rptr = vega10_ih_set_rptr |
415 | }; | 505 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6cac291c96da..9ff69b90df36 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c | |||
@@ -1028,8 +1028,7 @@ static int vi_common_early_init(void *handle) | |||
1028 | /* rev0 hardware requires workarounds to support PG */ | 1028 | /* rev0 hardware requires workarounds to support PG */ |
1029 | adev->pg_flags = 0; | 1029 | adev->pg_flags = 0; |
1030 | if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { | 1030 | if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { |
1031 | adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | | 1031 | adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | |
1032 | AMD_PG_SUPPORT_GFX_SMG | | ||
1033 | AMD_PG_SUPPORT_GFX_PIPELINE | | 1032 | AMD_PG_SUPPORT_GFX_PIPELINE | |
1034 | AMD_PG_SUPPORT_CP | | 1033 | AMD_PG_SUPPORT_CP | |
1035 | AMD_PG_SUPPORT_UVD | | 1034 | AMD_PG_SUPPORT_UVD | |