diff options
author | Dave Airlie <airlied@redhat.com> | 2016-02-18 20:13:01 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2016-02-18 20:13:01 -0500 |
commit | 5263925c092d137a0830ca4afe692366127dca4e (patch) | |
tree | 49ce726b058d36f5b5d21156716ab0153f443243 | |
parent | 08244c00859f25036417ea7b790cfa73e43443fc (diff) | |
parent | 390be2824fa4211c2e973c69b72e04000559bba3 (diff) |
Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next
First radeon and amdgpu pull request for 4.6. Highlights:
- ACP support for APUs with i2s audio
- CS ioctl optimizations
- GPU scheduler optimizations
- GPUVM optimizations
- Initial GPU reset support (not enabled yet)
- New powerplay sysfs interface for manually selecting clocks
- Powerplay fixes
- Virtualization fixes
- Removal of hw semaphore support
- Lots of other misc fixes and cleanups
* 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits)
drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy
drm/amdgpu: Fix race condition in amdgpu_mn_unregister
drm/amdgpu: cleanup gem init/finit
drm/amdgpu: rework GEM info printing
drm/amdgpu: print the GPU offset as well in gem_info
drm/amdgpu: optionally print the pin count in gem_info as well
drm/amdgpu: print the BO size only once in amdgpu_gem_info
drm/amdgpu: print pid as integer
drm/amdgpu: remove page flip work queue v3
drm/amdgpu: stop blocking for page filp fences
drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code
drm/amdgpu: remove fence reset detection leftovers
drm/amdgpu: Fix race condition in MMU notifier release
drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled
drm/amdgpu/vi: move uvd tiling config setup into uvd code
drm/amdgpu/vi: move sdma tiling config setup into sdma code
drm/amdgpu/cik: move uvd tiling config setup into uvd code
drm/amdgpu/cik: move sdma tiling config setup into sdma code
drm/amdgpu/gfx7: rework gpu_init()
drm/amdgpu/gfx: clean up harvest configuration (v2)
...
86 files changed, 5486 insertions, 4365 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 08706f064e6e..f2a74d0b68ae 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig | |||
@@ -172,6 +172,8 @@ config DRM_AMDGPU | |||
172 | source "drivers/gpu/drm/amd/amdgpu/Kconfig" | 172 | source "drivers/gpu/drm/amd/amdgpu/Kconfig" |
173 | source "drivers/gpu/drm/amd/powerplay/Kconfig" | 173 | source "drivers/gpu/drm/amd/powerplay/Kconfig" |
174 | 174 | ||
175 | source "drivers/gpu/drm/amd/acp/Kconfig" | ||
176 | |||
175 | source "drivers/gpu/drm/nouveau/Kconfig" | 177 | source "drivers/gpu/drm/nouveau/Kconfig" |
176 | 178 | ||
177 | config DRM_I810 | 179 | config DRM_I810 |
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig new file mode 100644 index 000000000000..2b07813bceed --- /dev/null +++ b/drivers/gpu/drm/amd/acp/Kconfig | |||
@@ -0,0 +1,11 @@ | |||
1 | menu "ACP Configuration" | ||
2 | |||
3 | config DRM_AMD_ACP | ||
4 | bool "Enable ACP IP support" | ||
5 | default y | ||
6 | select MFD_CORE | ||
7 | select PM_GENERIC_DOMAINS if PM | ||
8 | help | ||
9 | Choose this option to enable ACP IP support for AMD SOCs. | ||
10 | |||
11 | endmenu | ||
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile new file mode 100644 index 000000000000..8363cb57915b --- /dev/null +++ b/drivers/gpu/drm/amd/acp/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # | ||
2 | # Makefile for the ACP, which is a sub-component | ||
3 | # of AMDSOC/AMDGPU drm driver. | ||
4 | # It provides the HW control for ACP related functionalities. | ||
5 | |||
6 | subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include | ||
7 | |||
8 | AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o | ||
diff --git a/drivers/gpu/drm/amd/acp/acp_hw.c b/drivers/gpu/drm/amd/acp/acp_hw.c new file mode 100644 index 000000000000..7af83f142b4b --- /dev/null +++ b/drivers/gpu/drm/amd/acp/acp_hw.c | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright 2015 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/mm.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/device.h> | ||
27 | #include <linux/delay.h> | ||
28 | #include <linux/errno.h> | ||
29 | |||
30 | #include "acp_gfx_if.h" | ||
31 | |||
32 | #define ACP_MODE_I2S 0 | ||
33 | #define ACP_MODE_AZ 1 | ||
34 | |||
35 | #define mmACP_AZALIA_I2S_SELECT 0x51d4 | ||
36 | |||
37 | int amd_acp_hw_init(void *cgs_device, | ||
38 | unsigned acp_version_major, unsigned acp_version_minor) | ||
39 | { | ||
40 | unsigned int acp_mode = ACP_MODE_I2S; | ||
41 | |||
42 | if ((acp_version_major == 2) && (acp_version_minor == 2)) | ||
43 | acp_mode = cgs_read_register(cgs_device, | ||
44 | mmACP_AZALIA_I2S_SELECT); | ||
45 | |||
46 | if (acp_mode != ACP_MODE_I2S) | ||
47 | return -ENODEV; | ||
48 | |||
49 | return 0; | ||
50 | } | ||
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h new file mode 100644 index 000000000000..bccf47b63899 --- /dev/null +++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * Copyright 2015 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef _ACP_GFX_IF_H | ||
25 | #define _ACP_GFX_IF_H | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include "cgs_linux.h" | ||
29 | #include "cgs_common.h" | ||
30 | |||
31 | int amd_acp_hw_init(void *cgs_device, | ||
32 | unsigned acp_version_major, unsigned acp_version_minor); | ||
33 | |||
34 | #endif /* _ACP_GFX_IF_H */ | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 20c9539abc36..c7fcdcedaadb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ | |||
8 | -I$(FULL_AMD_PATH)/include \ | 8 | -I$(FULL_AMD_PATH)/include \ |
9 | -I$(FULL_AMD_PATH)/amdgpu \ | 9 | -I$(FULL_AMD_PATH)/amdgpu \ |
10 | -I$(FULL_AMD_PATH)/scheduler \ | 10 | -I$(FULL_AMD_PATH)/scheduler \ |
11 | -I$(FULL_AMD_PATH)/powerplay/inc | 11 | -I$(FULL_AMD_PATH)/powerplay/inc \ |
12 | -I$(FULL_AMD_PATH)/acp/include | ||
12 | 13 | ||
13 | amdgpu-y := amdgpu_drv.o | 14 | amdgpu-y := amdgpu_drv.o |
14 | 15 | ||
@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ | |||
20 | amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ | 21 | amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ |
21 | amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ | 22 | amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ |
22 | amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ | 23 | amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ |
23 | atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \ | 24 | atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ |
24 | amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ | 25 | amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ |
25 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o | 26 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o |
26 | 27 | ||
@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o | |||
92 | amdgpu-y += \ | 93 | amdgpu-y += \ |
93 | ../scheduler/gpu_scheduler.o \ | 94 | ../scheduler/gpu_scheduler.o \ |
94 | ../scheduler/sched_fence.o \ | 95 | ../scheduler/sched_fence.o \ |
95 | amdgpu_sched.o | 96 | amdgpu_job.o |
97 | |||
98 | # ACP componet | ||
99 | ifneq ($(CONFIG_DRM_AMD_ACP),) | ||
100 | amdgpu-y += amdgpu_acp.o | ||
101 | |||
102 | AMDACPPATH := ../acp | ||
103 | include $(FULL_AMD_PATH)/acp/Makefile | ||
104 | |||
105 | amdgpu-y += $(AMD_ACP_FILES) | ||
106 | endif | ||
96 | 107 | ||
97 | amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o | 108 | amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o |
98 | amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o | 109 | amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 82edf95b7740..f5bac97a438b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -53,6 +53,7 @@ | |||
53 | #include "amdgpu_ucode.h" | 53 | #include "amdgpu_ucode.h" |
54 | #include "amdgpu_gds.h" | 54 | #include "amdgpu_gds.h" |
55 | #include "amd_powerplay.h" | 55 | #include "amd_powerplay.h" |
56 | #include "amdgpu_acp.h" | ||
56 | 57 | ||
57 | #include "gpu_scheduler.h" | 58 | #include "gpu_scheduler.h" |
58 | 59 | ||
@@ -74,7 +75,6 @@ extern int amdgpu_dpm; | |||
74 | extern int amdgpu_smc_load_fw; | 75 | extern int amdgpu_smc_load_fw; |
75 | extern int amdgpu_aspm; | 76 | extern int amdgpu_aspm; |
76 | extern int amdgpu_runtime_pm; | 77 | extern int amdgpu_runtime_pm; |
77 | extern int amdgpu_hard_reset; | ||
78 | extern unsigned amdgpu_ip_block_mask; | 78 | extern unsigned amdgpu_ip_block_mask; |
79 | extern int amdgpu_bapm; | 79 | extern int amdgpu_bapm; |
80 | extern int amdgpu_deep_color; | 80 | extern int amdgpu_deep_color; |
@@ -82,10 +82,8 @@ extern int amdgpu_vm_size; | |||
82 | extern int amdgpu_vm_block_size; | 82 | extern int amdgpu_vm_block_size; |
83 | extern int amdgpu_vm_fault_stop; | 83 | extern int amdgpu_vm_fault_stop; |
84 | extern int amdgpu_vm_debug; | 84 | extern int amdgpu_vm_debug; |
85 | extern int amdgpu_enable_scheduler; | ||
86 | extern int amdgpu_sched_jobs; | 85 | extern int amdgpu_sched_jobs; |
87 | extern int amdgpu_sched_hw_submission; | 86 | extern int amdgpu_sched_hw_submission; |
88 | extern int amdgpu_enable_semaphores; | ||
89 | extern int amdgpu_powerplay; | 87 | extern int amdgpu_powerplay; |
90 | 88 | ||
91 | #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 | 89 | #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 |
@@ -106,9 +104,6 @@ extern int amdgpu_powerplay; | |||
106 | /* max number of IP instances */ | 104 | /* max number of IP instances */ |
107 | #define AMDGPU_MAX_SDMA_INSTANCES 2 | 105 | #define AMDGPU_MAX_SDMA_INSTANCES 2 |
108 | 106 | ||
109 | /* number of hw syncs before falling back on blocking */ | ||
110 | #define AMDGPU_NUM_SYNCS 4 | ||
111 | |||
112 | /* hardcode that limit for now */ | 107 | /* hardcode that limit for now */ |
113 | #define AMDGPU_VA_RESERVED_SIZE (8 << 20) | 108 | #define AMDGPU_VA_RESERVED_SIZE (8 << 20) |
114 | 109 | ||
@@ -189,7 +184,6 @@ struct amdgpu_fence; | |||
189 | struct amdgpu_ib; | 184 | struct amdgpu_ib; |
190 | struct amdgpu_vm; | 185 | struct amdgpu_vm; |
191 | struct amdgpu_ring; | 186 | struct amdgpu_ring; |
192 | struct amdgpu_semaphore; | ||
193 | struct amdgpu_cs_parser; | 187 | struct amdgpu_cs_parser; |
194 | struct amdgpu_job; | 188 | struct amdgpu_job; |
195 | struct amdgpu_irq_src; | 189 | struct amdgpu_irq_src; |
@@ -287,7 +281,7 @@ struct amdgpu_vm_pte_funcs { | |||
287 | unsigned count); | 281 | unsigned count); |
288 | /* write pte one entry at a time with addr mapping */ | 282 | /* write pte one entry at a time with addr mapping */ |
289 | void (*write_pte)(struct amdgpu_ib *ib, | 283 | void (*write_pte)(struct amdgpu_ib *ib, |
290 | uint64_t pe, | 284 | const dma_addr_t *pages_addr, uint64_t pe, |
291 | uint64_t addr, unsigned count, | 285 | uint64_t addr, unsigned count, |
292 | uint32_t incr, uint32_t flags); | 286 | uint32_t incr, uint32_t flags); |
293 | /* for linear pte/pde updates without addr mapping */ | 287 | /* for linear pte/pde updates without addr mapping */ |
@@ -295,8 +289,6 @@ struct amdgpu_vm_pte_funcs { | |||
295 | uint64_t pe, | 289 | uint64_t pe, |
296 | uint64_t addr, unsigned count, | 290 | uint64_t addr, unsigned count, |
297 | uint32_t incr, uint32_t flags); | 291 | uint32_t incr, uint32_t flags); |
298 | /* pad the indirect buffer to the necessary number of dw */ | ||
299 | void (*pad_ib)(struct amdgpu_ib *ib); | ||
300 | }; | 292 | }; |
301 | 293 | ||
302 | /* provided by the gmc block */ | 294 | /* provided by the gmc block */ |
@@ -334,9 +326,6 @@ struct amdgpu_ring_funcs { | |||
334 | struct amdgpu_ib *ib); | 326 | struct amdgpu_ib *ib); |
335 | void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, | 327 | void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, |
336 | uint64_t seq, unsigned flags); | 328 | uint64_t seq, unsigned flags); |
337 | bool (*emit_semaphore)(struct amdgpu_ring *ring, | ||
338 | struct amdgpu_semaphore *semaphore, | ||
339 | bool emit_wait); | ||
340 | void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, | 329 | void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, |
341 | uint64_t pd_addr); | 330 | uint64_t pd_addr); |
342 | void (*emit_hdp_flush)(struct amdgpu_ring *ring); | 331 | void (*emit_hdp_flush)(struct amdgpu_ring *ring); |
@@ -349,6 +338,8 @@ struct amdgpu_ring_funcs { | |||
349 | int (*test_ib)(struct amdgpu_ring *ring); | 338 | int (*test_ib)(struct amdgpu_ring *ring); |
350 | /* insert NOP packets */ | 339 | /* insert NOP packets */ |
351 | void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); | 340 | void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); |
341 | /* pad the indirect buffer to the necessary number of dw */ | ||
342 | void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); | ||
352 | }; | 343 | }; |
353 | 344 | ||
354 | /* | 345 | /* |
@@ -394,7 +385,7 @@ struct amdgpu_fence_driver { | |||
394 | uint64_t gpu_addr; | 385 | uint64_t gpu_addr; |
395 | volatile uint32_t *cpu_addr; | 386 | volatile uint32_t *cpu_addr; |
396 | /* sync_seq is protected by ring emission lock */ | 387 | /* sync_seq is protected by ring emission lock */ |
397 | uint64_t sync_seq[AMDGPU_MAX_RINGS]; | 388 | uint64_t sync_seq; |
398 | atomic64_t last_seq; | 389 | atomic64_t last_seq; |
399 | bool initialized; | 390 | bool initialized; |
400 | struct amdgpu_irq_src *irq_src; | 391 | struct amdgpu_irq_src *irq_src; |
@@ -447,11 +438,6 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring); | |||
447 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); | 438 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); |
448 | unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); | 439 | unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); |
449 | 440 | ||
450 | bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, | ||
451 | struct amdgpu_ring *ring); | ||
452 | void amdgpu_fence_note_sync(struct amdgpu_fence *fence, | ||
453 | struct amdgpu_ring *ring); | ||
454 | |||
455 | /* | 441 | /* |
456 | * TTM. | 442 | * TTM. |
457 | */ | 443 | */ |
@@ -470,6 +456,8 @@ struct amdgpu_mman { | |||
470 | /* buffer handling */ | 456 | /* buffer handling */ |
471 | const struct amdgpu_buffer_funcs *buffer_funcs; | 457 | const struct amdgpu_buffer_funcs *buffer_funcs; |
472 | struct amdgpu_ring *buffer_funcs_ring; | 458 | struct amdgpu_ring *buffer_funcs_ring; |
459 | /* Scheduler entity for buffer moves */ | ||
460 | struct amd_sched_entity entity; | ||
473 | }; | 461 | }; |
474 | 462 | ||
475 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, | 463 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, |
@@ -484,8 +472,6 @@ struct amdgpu_bo_list_entry { | |||
484 | struct amdgpu_bo *robj; | 472 | struct amdgpu_bo *robj; |
485 | struct ttm_validate_buffer tv; | 473 | struct ttm_validate_buffer tv; |
486 | struct amdgpu_bo_va *bo_va; | 474 | struct amdgpu_bo_va *bo_va; |
487 | unsigned prefered_domains; | ||
488 | unsigned allowed_domains; | ||
489 | uint32_t priority; | 475 | uint32_t priority; |
490 | }; | 476 | }; |
491 | 477 | ||
@@ -522,7 +508,8 @@ struct amdgpu_bo { | |||
522 | /* Protected by gem.mutex */ | 508 | /* Protected by gem.mutex */ |
523 | struct list_head list; | 509 | struct list_head list; |
524 | /* Protected by tbo.reserved */ | 510 | /* Protected by tbo.reserved */ |
525 | u32 initial_domain; | 511 | u32 prefered_domains; |
512 | u32 allowed_domains; | ||
526 | struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; | 513 | struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; |
527 | struct ttm_placement placement; | 514 | struct ttm_placement placement; |
528 | struct ttm_buffer_object tbo; | 515 | struct ttm_buffer_object tbo; |
@@ -544,7 +531,6 @@ struct amdgpu_bo { | |||
544 | struct amdgpu_bo *parent; | 531 | struct amdgpu_bo *parent; |
545 | 532 | ||
546 | struct ttm_bo_kmap_obj dma_buf_vmap; | 533 | struct ttm_bo_kmap_obj dma_buf_vmap; |
547 | pid_t pid; | ||
548 | struct amdgpu_mn *mn; | 534 | struct amdgpu_mn *mn; |
549 | struct list_head mn_list; | 535 | struct list_head mn_list; |
550 | }; | 536 | }; |
@@ -621,13 +607,7 @@ struct amdgpu_sa_bo { | |||
621 | /* | 607 | /* |
622 | * GEM objects. | 608 | * GEM objects. |
623 | */ | 609 | */ |
624 | struct amdgpu_gem { | 610 | void amdgpu_gem_force_release(struct amdgpu_device *adev); |
625 | struct mutex mutex; | ||
626 | struct list_head objects; | ||
627 | }; | ||
628 | |||
629 | int amdgpu_gem_init(struct amdgpu_device *adev); | ||
630 | void amdgpu_gem_fini(struct amdgpu_device *adev); | ||
631 | int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, | 611 | int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, |
632 | int alignment, u32 initial_domain, | 612 | int alignment, u32 initial_domain, |
633 | u64 flags, bool kernel, | 613 | u64 flags, bool kernel, |
@@ -639,32 +619,10 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, | |||
639 | int amdgpu_mode_dumb_mmap(struct drm_file *filp, | 619 | int amdgpu_mode_dumb_mmap(struct drm_file *filp, |
640 | struct drm_device *dev, | 620 | struct drm_device *dev, |
641 | uint32_t handle, uint64_t *offset_p); | 621 | uint32_t handle, uint64_t *offset_p); |
642 | |||
643 | /* | ||
644 | * Semaphores. | ||
645 | */ | ||
646 | struct amdgpu_semaphore { | ||
647 | struct amdgpu_sa_bo *sa_bo; | ||
648 | signed waiters; | ||
649 | uint64_t gpu_addr; | ||
650 | }; | ||
651 | |||
652 | int amdgpu_semaphore_create(struct amdgpu_device *adev, | ||
653 | struct amdgpu_semaphore **semaphore); | ||
654 | bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring, | ||
655 | struct amdgpu_semaphore *semaphore); | ||
656 | bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, | ||
657 | struct amdgpu_semaphore *semaphore); | ||
658 | void amdgpu_semaphore_free(struct amdgpu_device *adev, | ||
659 | struct amdgpu_semaphore **semaphore, | ||
660 | struct fence *fence); | ||
661 | |||
662 | /* | 622 | /* |
663 | * Synchronization | 623 | * Synchronization |
664 | */ | 624 | */ |
665 | struct amdgpu_sync { | 625 | struct amdgpu_sync { |
666 | struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; | ||
667 | struct fence *sync_to[AMDGPU_MAX_RINGS]; | ||
668 | DECLARE_HASHTABLE(fences, 4); | 626 | DECLARE_HASHTABLE(fences, 4); |
669 | struct fence *last_vm_update; | 627 | struct fence *last_vm_update; |
670 | }; | 628 | }; |
@@ -676,12 +634,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
676 | struct amdgpu_sync *sync, | 634 | struct amdgpu_sync *sync, |
677 | struct reservation_object *resv, | 635 | struct reservation_object *resv, |
678 | void *owner); | 636 | void *owner); |
679 | int amdgpu_sync_rings(struct amdgpu_sync *sync, | ||
680 | struct amdgpu_ring *ring); | ||
681 | struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); | 637 | struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); |
682 | int amdgpu_sync_wait(struct amdgpu_sync *sync); | 638 | int amdgpu_sync_wait(struct amdgpu_sync *sync); |
683 | void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, | 639 | void amdgpu_sync_free(struct amdgpu_sync *sync); |
684 | struct fence *fence); | ||
685 | 640 | ||
686 | /* | 641 | /* |
687 | * GART structures, functions & helpers | 642 | * GART structures, functions & helpers |
@@ -799,6 +754,7 @@ struct amdgpu_flip_work { | |||
799 | struct fence *excl; | 754 | struct fence *excl; |
800 | unsigned shared_count; | 755 | unsigned shared_count; |
801 | struct fence **shared; | 756 | struct fence **shared; |
757 | struct fence_cb cb; | ||
802 | }; | 758 | }; |
803 | 759 | ||
804 | 760 | ||
@@ -811,12 +767,11 @@ struct amdgpu_ib { | |||
811 | uint32_t length_dw; | 767 | uint32_t length_dw; |
812 | uint64_t gpu_addr; | 768 | uint64_t gpu_addr; |
813 | uint32_t *ptr; | 769 | uint32_t *ptr; |
814 | struct amdgpu_ring *ring; | ||
815 | struct amdgpu_fence *fence; | 770 | struct amdgpu_fence *fence; |
816 | struct amdgpu_user_fence *user; | 771 | struct amdgpu_user_fence *user; |
772 | bool grabbed_vmid; | ||
817 | struct amdgpu_vm *vm; | 773 | struct amdgpu_vm *vm; |
818 | struct amdgpu_ctx *ctx; | 774 | struct amdgpu_ctx *ctx; |
819 | struct amdgpu_sync sync; | ||
820 | uint32_t gds_base, gds_size; | 775 | uint32_t gds_base, gds_size; |
821 | uint32_t gws_base, gws_size; | 776 | uint32_t gws_base, gws_size; |
822 | uint32_t oa_base, oa_size; | 777 | uint32_t oa_base, oa_size; |
@@ -835,13 +790,14 @@ enum amdgpu_ring_type { | |||
835 | 790 | ||
836 | extern struct amd_sched_backend_ops amdgpu_sched_ops; | 791 | extern struct amd_sched_backend_ops amdgpu_sched_ops; |
837 | 792 | ||
838 | int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, | 793 | int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, |
839 | struct amdgpu_ring *ring, | 794 | struct amdgpu_job **job); |
840 | struct amdgpu_ib *ibs, | 795 | int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, |
841 | unsigned num_ibs, | 796 | struct amdgpu_job **job); |
842 | int (*free_job)(struct amdgpu_job *), | 797 | void amdgpu_job_free(struct amdgpu_job *job); |
843 | void *owner, | 798 | int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, |
844 | struct fence **fence); | 799 | struct amd_sched_entity *entity, void *owner, |
800 | struct fence **f); | ||
845 | 801 | ||
846 | struct amdgpu_ring { | 802 | struct amdgpu_ring { |
847 | struct amdgpu_device *adev; | 803 | struct amdgpu_device *adev; |
@@ -850,7 +806,6 @@ struct amdgpu_ring { | |||
850 | struct amd_gpu_scheduler sched; | 806 | struct amd_gpu_scheduler sched; |
851 | 807 | ||
852 | spinlock_t fence_lock; | 808 | spinlock_t fence_lock; |
853 | struct mutex *ring_lock; | ||
854 | struct amdgpu_bo *ring_obj; | 809 | struct amdgpu_bo *ring_obj; |
855 | volatile uint32_t *ring; | 810 | volatile uint32_t *ring; |
856 | unsigned rptr_offs; | 811 | unsigned rptr_offs; |
@@ -859,7 +814,7 @@ struct amdgpu_ring { | |||
859 | unsigned wptr; | 814 | unsigned wptr; |
860 | unsigned wptr_old; | 815 | unsigned wptr_old; |
861 | unsigned ring_size; | 816 | unsigned ring_size; |
862 | unsigned ring_free_dw; | 817 | unsigned max_dw; |
863 | int count_dw; | 818 | int count_dw; |
864 | uint64_t gpu_addr; | 819 | uint64_t gpu_addr; |
865 | uint32_t align_mask; | 820 | uint32_t align_mask; |
@@ -867,8 +822,6 @@ struct amdgpu_ring { | |||
867 | bool ready; | 822 | bool ready; |
868 | u32 nop; | 823 | u32 nop; |
869 | u32 idx; | 824 | u32 idx; |
870 | u64 last_semaphore_signal_addr; | ||
871 | u64 last_semaphore_wait_addr; | ||
872 | u32 me; | 825 | u32 me; |
873 | u32 pipe; | 826 | u32 pipe; |
874 | u32 queue; | 827 | u32 queue; |
@@ -881,7 +834,6 @@ struct amdgpu_ring { | |||
881 | struct amdgpu_ctx *current_ctx; | 834 | struct amdgpu_ctx *current_ctx; |
882 | enum amdgpu_ring_type type; | 835 | enum amdgpu_ring_type type; |
883 | char name[16]; | 836 | char name[16]; |
884 | bool is_pte_ring; | ||
885 | }; | 837 | }; |
886 | 838 | ||
887 | /* | 839 | /* |
@@ -932,6 +884,8 @@ struct amdgpu_vm_id { | |||
932 | }; | 884 | }; |
933 | 885 | ||
934 | struct amdgpu_vm { | 886 | struct amdgpu_vm { |
887 | /* tree of virtual addresses mapped */ | ||
888 | spinlock_t it_lock; | ||
935 | struct rb_root va; | 889 | struct rb_root va; |
936 | 890 | ||
937 | /* protecting invalidated */ | 891 | /* protecting invalidated */ |
@@ -956,30 +910,40 @@ struct amdgpu_vm { | |||
956 | 910 | ||
957 | /* for id and flush management per ring */ | 911 | /* for id and flush management per ring */ |
958 | struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; | 912 | struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; |
959 | /* for interval tree */ | 913 | |
960 | spinlock_t it_lock; | ||
961 | /* protecting freed */ | 914 | /* protecting freed */ |
962 | spinlock_t freed_lock; | 915 | spinlock_t freed_lock; |
916 | |||
917 | /* Scheduler entity for page table updates */ | ||
918 | struct amd_sched_entity entity; | ||
919 | }; | ||
920 | |||
921 | struct amdgpu_vm_manager_id { | ||
922 | struct list_head list; | ||
923 | struct fence *active; | ||
924 | atomic_long_t owner; | ||
963 | }; | 925 | }; |
964 | 926 | ||
965 | struct amdgpu_vm_manager { | 927 | struct amdgpu_vm_manager { |
966 | struct { | 928 | /* Handling of VMIDs */ |
967 | struct fence *active; | 929 | struct mutex lock; |
968 | atomic_long_t owner; | 930 | unsigned num_ids; |
969 | } ids[AMDGPU_NUM_VM]; | 931 | struct list_head ids_lru; |
932 | struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM]; | ||
970 | 933 | ||
971 | uint32_t max_pfn; | 934 | uint32_t max_pfn; |
972 | /* number of VMIDs */ | ||
973 | unsigned nvm; | ||
974 | /* vram base address for page table entry */ | 935 | /* vram base address for page table entry */ |
975 | u64 vram_base_offset; | 936 | u64 vram_base_offset; |
976 | /* is vm enabled? */ | 937 | /* is vm enabled? */ |
977 | bool enabled; | 938 | bool enabled; |
978 | /* vm pte handling */ | 939 | /* vm pte handling */ |
979 | const struct amdgpu_vm_pte_funcs *vm_pte_funcs; | 940 | const struct amdgpu_vm_pte_funcs *vm_pte_funcs; |
980 | struct amdgpu_ring *vm_pte_funcs_ring; | 941 | struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; |
942 | unsigned vm_pte_num_rings; | ||
943 | atomic_t vm_pte_next_ring; | ||
981 | }; | 944 | }; |
982 | 945 | ||
946 | void amdgpu_vm_manager_init(struct amdgpu_device *adev); | ||
983 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); | 947 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); |
984 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 948 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); |
985 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 949 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); |
@@ -990,14 +954,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates); | |||
990 | void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | 954 | void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, |
991 | struct amdgpu_vm *vm); | 955 | struct amdgpu_vm *vm); |
992 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | 956 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, |
993 | struct amdgpu_sync *sync); | 957 | struct amdgpu_sync *sync, struct fence *fence); |
994 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | 958 | void amdgpu_vm_flush(struct amdgpu_ring *ring, |
995 | struct amdgpu_vm *vm, | 959 | struct amdgpu_vm *vm, |
996 | struct fence *updates); | 960 | struct fence *updates); |
997 | void amdgpu_vm_fence(struct amdgpu_device *adev, | 961 | uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); |
998 | struct amdgpu_vm *vm, | ||
999 | struct fence *fence); | ||
1000 | uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); | ||
1001 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | 962 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, |
1002 | struct amdgpu_vm *vm); | 963 | struct amdgpu_vm *vm); |
1003 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | 964 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, |
@@ -1023,7 +984,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
1023 | uint64_t addr); | 984 | uint64_t addr); |
1024 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | 985 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, |
1025 | struct amdgpu_bo_va *bo_va); | 986 | struct amdgpu_bo_va *bo_va); |
1026 | int amdgpu_vm_free_job(struct amdgpu_job *job); | ||
1027 | 987 | ||
1028 | /* | 988 | /* |
1029 | * context related structures | 989 | * context related structures |
@@ -1051,10 +1011,6 @@ struct amdgpu_ctx_mgr { | |||
1051 | struct idr ctx_handles; | 1011 | struct idr ctx_handles; |
1052 | }; | 1012 | }; |
1053 | 1013 | ||
1054 | int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, | ||
1055 | struct amdgpu_ctx *ctx); | ||
1056 | void amdgpu_ctx_fini(struct amdgpu_ctx *ctx); | ||
1057 | |||
1058 | struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); | 1014 | struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); |
1059 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); | 1015 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); |
1060 | 1016 | ||
@@ -1096,6 +1052,8 @@ struct amdgpu_bo_list { | |||
1096 | 1052 | ||
1097 | struct amdgpu_bo_list * | 1053 | struct amdgpu_bo_list * |
1098 | amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); | 1054 | amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); |
1055 | void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, | ||
1056 | struct list_head *validated); | ||
1099 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list); | 1057 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list); |
1100 | void amdgpu_bo_list_free(struct amdgpu_bo_list *list); | 1058 | void amdgpu_bo_list_free(struct amdgpu_bo_list *list); |
1101 | 1059 | ||
@@ -1169,6 +1127,7 @@ struct amdgpu_gca_config { | |||
1169 | unsigned multi_gpu_tile_size; | 1127 | unsigned multi_gpu_tile_size; |
1170 | unsigned mc_arb_ramcfg; | 1128 | unsigned mc_arb_ramcfg; |
1171 | unsigned gb_addr_config; | 1129 | unsigned gb_addr_config; |
1130 | unsigned num_rbs; | ||
1172 | 1131 | ||
1173 | uint32_t tile_mode_array[32]; | 1132 | uint32_t tile_mode_array[32]; |
1174 | uint32_t macrotile_mode_array[16]; | 1133 | uint32_t macrotile_mode_array[16]; |
@@ -1211,23 +1170,21 @@ struct amdgpu_gfx { | |||
1211 | unsigned ce_ram_size; | 1170 | unsigned ce_ram_size; |
1212 | }; | 1171 | }; |
1213 | 1172 | ||
1214 | int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | 1173 | int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
1215 | unsigned size, struct amdgpu_ib *ib); | 1174 | unsigned size, struct amdgpu_ib *ib); |
1216 | void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib); | 1175 | void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib); |
1217 | int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | 1176 | int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, |
1218 | struct amdgpu_ib *ib, void *owner); | 1177 | struct amdgpu_ib *ib, void *owner, |
1178 | struct fence *last_vm_update, | ||
1179 | struct fence **f); | ||
1219 | int amdgpu_ib_pool_init(struct amdgpu_device *adev); | 1180 | int amdgpu_ib_pool_init(struct amdgpu_device *adev); |
1220 | void amdgpu_ib_pool_fini(struct amdgpu_device *adev); | 1181 | void amdgpu_ib_pool_fini(struct amdgpu_device *adev); |
1221 | int amdgpu_ib_ring_tests(struct amdgpu_device *adev); | 1182 | int amdgpu_ib_ring_tests(struct amdgpu_device *adev); |
1222 | /* Ring access between begin & end cannot sleep */ | ||
1223 | void amdgpu_ring_free_size(struct amdgpu_ring *ring); | ||
1224 | int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); | 1183 | int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); |
1225 | int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw); | ||
1226 | void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); | 1184 | void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); |
1185 | void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); | ||
1227 | void amdgpu_ring_commit(struct amdgpu_ring *ring); | 1186 | void amdgpu_ring_commit(struct amdgpu_ring *ring); |
1228 | void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); | ||
1229 | void amdgpu_ring_undo(struct amdgpu_ring *ring); | 1187 | void amdgpu_ring_undo(struct amdgpu_ring *ring); |
1230 | void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring); | ||
1231 | unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, | 1188 | unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, |
1232 | uint32_t **data); | 1189 | uint32_t **data); |
1233 | int amdgpu_ring_restore(struct amdgpu_ring *ring, | 1190 | int amdgpu_ring_restore(struct amdgpu_ring *ring, |
@@ -1246,47 +1203,57 @@ struct amdgpu_cs_chunk { | |||
1246 | uint32_t chunk_id; | 1203 | uint32_t chunk_id; |
1247 | uint32_t length_dw; | 1204 | uint32_t length_dw; |
1248 | uint32_t *kdata; | 1205 | uint32_t *kdata; |
1249 | void __user *user_ptr; | ||
1250 | }; | 1206 | }; |
1251 | 1207 | ||
1252 | struct amdgpu_cs_parser { | 1208 | struct amdgpu_cs_parser { |
1253 | struct amdgpu_device *adev; | 1209 | struct amdgpu_device *adev; |
1254 | struct drm_file *filp; | 1210 | struct drm_file *filp; |
1255 | struct amdgpu_ctx *ctx; | 1211 | struct amdgpu_ctx *ctx; |
1256 | struct amdgpu_bo_list *bo_list; | 1212 | |
1257 | /* chunks */ | 1213 | /* chunks */ |
1258 | unsigned nchunks; | 1214 | unsigned nchunks; |
1259 | struct amdgpu_cs_chunk *chunks; | 1215 | struct amdgpu_cs_chunk *chunks; |
1260 | /* relocations */ | ||
1261 | struct amdgpu_bo_list_entry vm_pd; | ||
1262 | struct list_head validated; | ||
1263 | struct fence *fence; | ||
1264 | 1216 | ||
1265 | struct amdgpu_ib *ibs; | 1217 | /* scheduler job object */ |
1266 | uint32_t num_ibs; | 1218 | struct amdgpu_job *job; |
1267 | 1219 | ||
1268 | struct ww_acquire_ctx ticket; | 1220 | /* buffer objects */ |
1221 | struct ww_acquire_ctx ticket; | ||
1222 | struct amdgpu_bo_list *bo_list; | ||
1223 | struct amdgpu_bo_list_entry vm_pd; | ||
1224 | struct list_head validated; | ||
1225 | struct fence *fence; | ||
1226 | uint64_t bytes_moved_threshold; | ||
1227 | uint64_t bytes_moved; | ||
1269 | 1228 | ||
1270 | /* user fence */ | 1229 | /* user fence */ |
1271 | struct amdgpu_user_fence uf; | ||
1272 | struct amdgpu_bo_list_entry uf_entry; | 1230 | struct amdgpu_bo_list_entry uf_entry; |
1273 | }; | 1231 | }; |
1274 | 1232 | ||
1275 | struct amdgpu_job { | 1233 | struct amdgpu_job { |
1276 | struct amd_sched_job base; | 1234 | struct amd_sched_job base; |
1277 | struct amdgpu_device *adev; | 1235 | struct amdgpu_device *adev; |
1236 | struct amdgpu_ring *ring; | ||
1237 | struct amdgpu_sync sync; | ||
1278 | struct amdgpu_ib *ibs; | 1238 | struct amdgpu_ib *ibs; |
1279 | uint32_t num_ibs; | 1239 | uint32_t num_ibs; |
1280 | void *owner; | 1240 | void *owner; |
1281 | struct amdgpu_user_fence uf; | 1241 | struct amdgpu_user_fence uf; |
1282 | int (*free_job)(struct amdgpu_job *job); | ||
1283 | }; | 1242 | }; |
1284 | #define to_amdgpu_job(sched_job) \ | 1243 | #define to_amdgpu_job(sched_job) \ |
1285 | container_of((sched_job), struct amdgpu_job, base) | 1244 | container_of((sched_job), struct amdgpu_job, base) |
1286 | 1245 | ||
1287 | static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) | 1246 | static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, |
1247 | uint32_t ib_idx, int idx) | ||
1288 | { | 1248 | { |
1289 | return p->ibs[ib_idx].ptr[idx]; | 1249 | return p->job->ibs[ib_idx].ptr[idx]; |
1250 | } | ||
1251 | |||
1252 | static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, | ||
1253 | uint32_t ib_idx, int idx, | ||
1254 | uint32_t value) | ||
1255 | { | ||
1256 | p->job->ibs[ib_idx].ptr[idx] = value; | ||
1290 | } | 1257 | } |
1291 | 1258 | ||
1292 | /* | 1259 | /* |
@@ -1538,6 +1505,7 @@ enum amdgpu_dpm_forced_level { | |||
1538 | AMDGPU_DPM_FORCED_LEVEL_AUTO = 0, | 1505 | AMDGPU_DPM_FORCED_LEVEL_AUTO = 0, |
1539 | AMDGPU_DPM_FORCED_LEVEL_LOW = 1, | 1506 | AMDGPU_DPM_FORCED_LEVEL_LOW = 1, |
1540 | AMDGPU_DPM_FORCED_LEVEL_HIGH = 2, | 1507 | AMDGPU_DPM_FORCED_LEVEL_HIGH = 2, |
1508 | AMDGPU_DPM_FORCED_LEVEL_MANUAL = 3, | ||
1541 | }; | 1509 | }; |
1542 | 1510 | ||
1543 | struct amdgpu_vce_state { | 1511 | struct amdgpu_vce_state { |
@@ -1667,6 +1635,7 @@ struct amdgpu_uvd { | |||
1667 | struct amdgpu_ring ring; | 1635 | struct amdgpu_ring ring; |
1668 | struct amdgpu_irq_src irq; | 1636 | struct amdgpu_irq_src irq; |
1669 | bool address_64_bit; | 1637 | bool address_64_bit; |
1638 | struct amd_sched_entity entity; | ||
1670 | }; | 1639 | }; |
1671 | 1640 | ||
1672 | /* | 1641 | /* |
@@ -1691,6 +1660,7 @@ struct amdgpu_vce { | |||
1691 | struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; | 1660 | struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; |
1692 | struct amdgpu_irq_src irq; | 1661 | struct amdgpu_irq_src irq; |
1693 | unsigned harvest_config; | 1662 | unsigned harvest_config; |
1663 | struct amd_sched_entity entity; | ||
1694 | }; | 1664 | }; |
1695 | 1665 | ||
1696 | /* | 1666 | /* |
@@ -1925,6 +1895,18 @@ void amdgpu_cgs_destroy_device(void *cgs_device); | |||
1925 | 1895 | ||
1926 | 1896 | ||
1927 | /* | 1897 | /* |
1898 | * CGS | ||
1899 | */ | ||
1900 | void *amdgpu_cgs_create_device(struct amdgpu_device *adev); | ||
1901 | void amdgpu_cgs_destroy_device(void *cgs_device); | ||
1902 | |||
1903 | |||
1904 | /* GPU virtualization */ | ||
1905 | struct amdgpu_virtualization { | ||
1906 | bool supports_sr_iov; | ||
1907 | }; | ||
1908 | |||
1909 | /* | ||
1928 | * Core structure, functions and helpers. | 1910 | * Core structure, functions and helpers. |
1929 | */ | 1911 | */ |
1930 | typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); | 1912 | typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); |
@@ -1944,6 +1926,10 @@ struct amdgpu_device { | |||
1944 | struct drm_device *ddev; | 1926 | struct drm_device *ddev; |
1945 | struct pci_dev *pdev; | 1927 | struct pci_dev *pdev; |
1946 | 1928 | ||
1929 | #ifdef CONFIG_DRM_AMD_ACP | ||
1930 | struct amdgpu_acp acp; | ||
1931 | #endif | ||
1932 | |||
1947 | /* ASIC */ | 1933 | /* ASIC */ |
1948 | enum amd_asic_type asic_type; | 1934 | enum amd_asic_type asic_type; |
1949 | uint32_t family; | 1935 | uint32_t family; |
@@ -2020,7 +2006,6 @@ struct amdgpu_device { | |||
2020 | 2006 | ||
2021 | /* memory management */ | 2007 | /* memory management */ |
2022 | struct amdgpu_mman mman; | 2008 | struct amdgpu_mman mman; |
2023 | struct amdgpu_gem gem; | ||
2024 | struct amdgpu_vram_scratch vram_scratch; | 2009 | struct amdgpu_vram_scratch vram_scratch; |
2025 | struct amdgpu_wb wb; | 2010 | struct amdgpu_wb wb; |
2026 | atomic64_t vram_usage; | 2011 | atomic64_t vram_usage; |
@@ -2038,7 +2023,6 @@ struct amdgpu_device { | |||
2038 | 2023 | ||
2039 | /* rings */ | 2024 | /* rings */ |
2040 | unsigned fence_context; | 2025 | unsigned fence_context; |
2041 | struct mutex ring_lock; | ||
2042 | unsigned num_rings; | 2026 | unsigned num_rings; |
2043 | struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; | 2027 | struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; |
2044 | bool ib_pool_ready; | 2028 | bool ib_pool_ready; |
@@ -2050,6 +2034,7 @@ struct amdgpu_device { | |||
2050 | /* powerplay */ | 2034 | /* powerplay */ |
2051 | struct amd_powerplay powerplay; | 2035 | struct amd_powerplay powerplay; |
2052 | bool pp_enabled; | 2036 | bool pp_enabled; |
2037 | bool pp_force_state_enabled; | ||
2053 | 2038 | ||
2054 | /* dpm */ | 2039 | /* dpm */ |
2055 | struct amdgpu_pm pm; | 2040 | struct amdgpu_pm pm; |
@@ -2091,8 +2076,7 @@ struct amdgpu_device { | |||
2091 | /* amdkfd interface */ | 2076 | /* amdkfd interface */ |
2092 | struct kfd_dev *kfd; | 2077 | struct kfd_dev *kfd; |
2093 | 2078 | ||
2094 | /* kernel conext for IB submission */ | 2079 | struct amdgpu_virtualization virtualization; |
2095 | struct amdgpu_ctx kernel_ctx; | ||
2096 | }; | 2080 | }; |
2097 | 2081 | ||
2098 | bool amdgpu_device_is_px(struct drm_device *dev); | 2082 | bool amdgpu_device_is_px(struct drm_device *dev); |
@@ -2197,7 +2181,6 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) | |||
2197 | ring->ring[ring->wptr++] = v; | 2181 | ring->ring[ring->wptr++] = v; |
2198 | ring->wptr &= ring->ptr_mask; | 2182 | ring->wptr &= ring->ptr_mask; |
2199 | ring->count_dw--; | 2183 | ring->count_dw--; |
2200 | ring->ring_free_dw--; | ||
2201 | } | 2184 | } |
2202 | 2185 | ||
2203 | static inline struct amdgpu_sdma_instance * | 2186 | static inline struct amdgpu_sdma_instance * |
@@ -2233,9 +2216,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
2233 | #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) | 2216 | #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) |
2234 | #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) | 2217 | #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) |
2235 | #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) | 2218 | #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) |
2236 | #define amdgpu_vm_write_pte(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (addr), (count), (incr), (flags))) | 2219 | #define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags))) |
2237 | #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) | 2220 | #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) |
2238 | #define amdgpu_vm_pad_ib(adev, ib) ((adev)->vm_manager.vm_pte_funcs->pad_ib((ib))) | ||
2239 | #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) | 2221 | #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) |
2240 | #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) | 2222 | #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) |
2241 | #define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r)) | 2223 | #define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r)) |
@@ -2245,9 +2227,9 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
2245 | #define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) | 2227 | #define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) |
2246 | #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) | 2228 | #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) |
2247 | #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) | 2229 | #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) |
2248 | #define amdgpu_ring_emit_semaphore(r, semaphore, emit_wait) (r)->funcs->emit_semaphore((r), (semaphore), (emit_wait)) | ||
2249 | #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) | 2230 | #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) |
2250 | #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) | 2231 | #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) |
2232 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) | ||
2251 | #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) | 2233 | #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) |
2252 | #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) | 2234 | #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) |
2253 | #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) | 2235 | #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) |
@@ -2339,6 +2321,21 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
2339 | #define amdgpu_dpm_get_performance_level(adev) \ | 2321 | #define amdgpu_dpm_get_performance_level(adev) \ |
2340 | (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) | 2322 | (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) |
2341 | 2323 | ||
2324 | #define amdgpu_dpm_get_pp_num_states(adev, data) \ | ||
2325 | (adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data) | ||
2326 | |||
2327 | #define amdgpu_dpm_get_pp_table(adev, table) \ | ||
2328 | (adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table) | ||
2329 | |||
2330 | #define amdgpu_dpm_set_pp_table(adev, buf, size) \ | ||
2331 | (adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size) | ||
2332 | |||
2333 | #define amdgpu_dpm_print_clock_levels(adev, type, buf) \ | ||
2334 | (adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf) | ||
2335 | |||
2336 | #define amdgpu_dpm_force_clock_level(adev, type, level) \ | ||
2337 | (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level) | ||
2338 | |||
2342 | #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ | 2339 | #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ |
2343 | (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) | 2340 | (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) |
2344 | 2341 | ||
@@ -2349,7 +2346,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev); | |||
2349 | void amdgpu_pci_config_reset(struct amdgpu_device *adev); | 2346 | void amdgpu_pci_config_reset(struct amdgpu_device *adev); |
2350 | bool amdgpu_card_posted(struct amdgpu_device *adev); | 2347 | bool amdgpu_card_posted(struct amdgpu_device *adev); |
2351 | void amdgpu_update_display_priority(struct amdgpu_device *adev); | 2348 | void amdgpu_update_display_priority(struct amdgpu_device *adev); |
2352 | bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); | ||
2353 | 2349 | ||
2354 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); | 2350 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); |
2355 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | 2351 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, |
@@ -2359,7 +2355,9 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain); | |||
2359 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); | 2355 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); |
2360 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | 2356 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, |
2361 | uint32_t flags); | 2357 | uint32_t flags); |
2362 | bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); | 2358 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); |
2359 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | ||
2360 | unsigned long end); | ||
2363 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); | 2361 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); |
2364 | uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, | 2362 | uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |
2365 | struct ttm_mem_reg *mem); | 2363 | struct ttm_mem_reg *mem); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c new file mode 100644 index 000000000000..9f8cfaab3004 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | |||
@@ -0,0 +1,502 @@ | |||
1 | /* | ||
2 | * Copyright 2015 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Authors: AMD | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/irqdomain.h> | ||
27 | #include <linux/pm_domain.h> | ||
28 | #include <linux/platform_device.h> | ||
29 | #include <sound/designware_i2s.h> | ||
30 | #include <sound/pcm.h> | ||
31 | |||
32 | #include "amdgpu.h" | ||
33 | #include "atom.h" | ||
34 | #include "amdgpu_acp.h" | ||
35 | |||
36 | #include "acp_gfx_if.h" | ||
37 | |||
38 | #define ACP_TILE_ON_MASK 0x03 | ||
39 | #define ACP_TILE_OFF_MASK 0x02 | ||
40 | #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f | ||
41 | #define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 | ||
42 | |||
43 | #define ACP_TILE_P1_MASK 0x3e | ||
44 | #define ACP_TILE_P2_MASK 0x3d | ||
45 | #define ACP_TILE_DSP0_MASK 0x3b | ||
46 | #define ACP_TILE_DSP1_MASK 0x37 | ||
47 | |||
48 | #define ACP_TILE_DSP2_MASK 0x2f | ||
49 | |||
50 | #define ACP_DMA_REGS_END 0x146c0 | ||
51 | #define ACP_I2S_PLAY_REGS_START 0x14840 | ||
52 | #define ACP_I2S_PLAY_REGS_END 0x148b4 | ||
53 | #define ACP_I2S_CAP_REGS_START 0x148b8 | ||
54 | #define ACP_I2S_CAP_REGS_END 0x1496c | ||
55 | |||
56 | #define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac | ||
57 | #define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8 | ||
58 | #define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c | ||
59 | #define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68 | ||
60 | |||
61 | #define mmACP_PGFSM_RETAIN_REG 0x51c9 | ||
62 | #define mmACP_PGFSM_CONFIG_REG 0x51ca | ||
63 | #define mmACP_PGFSM_READ_REG_0 0x51cc | ||
64 | |||
65 | #define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8 | ||
66 | #define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9 | ||
67 | #define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa | ||
68 | #define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb | ||
69 | |||
70 | #define ACP_TIMEOUT_LOOP 0x000000FF | ||
71 | #define ACP_DEVS 3 | ||
72 | #define ACP_SRC_ID 162 | ||
73 | |||
74 | enum { | ||
75 | ACP_TILE_P1 = 0, | ||
76 | ACP_TILE_P2, | ||
77 | ACP_TILE_DSP0, | ||
78 | ACP_TILE_DSP1, | ||
79 | ACP_TILE_DSP2, | ||
80 | }; | ||
81 | |||
82 | static int acp_sw_init(void *handle) | ||
83 | { | ||
84 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
85 | |||
86 | adev->acp.parent = adev->dev; | ||
87 | |||
88 | adev->acp.cgs_device = | ||
89 | amdgpu_cgs_create_device(adev); | ||
90 | if (!adev->acp.cgs_device) | ||
91 | return -EINVAL; | ||
92 | |||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static int acp_sw_fini(void *handle) | ||
97 | { | ||
98 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
99 | |||
100 | if (adev->acp.cgs_device) | ||
101 | amdgpu_cgs_destroy_device(adev->acp.cgs_device); | ||
102 | |||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | /* power off a tile/block within ACP */ | ||
107 | static int acp_suspend_tile(void *cgs_dev, int tile) | ||
108 | { | ||
109 | u32 val = 0; | ||
110 | u32 count = 0; | ||
111 | |||
112 | if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) { | ||
113 | pr_err("Invalid ACP tile : %d to suspend\n", tile); | ||
114 | return -1; | ||
115 | } | ||
116 | |||
117 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile); | ||
118 | val &= ACP_TILE_ON_MASK; | ||
119 | |||
120 | if (val == 0x0) { | ||
121 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); | ||
122 | val = val | (1 << tile); | ||
123 | cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); | ||
124 | cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG, | ||
125 | 0x500 + tile); | ||
126 | |||
127 | count = ACP_TIMEOUT_LOOP; | ||
128 | while (true) { | ||
129 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 | ||
130 | + tile); | ||
131 | val = val & ACP_TILE_ON_MASK; | ||
132 | if (val == ACP_TILE_OFF_MASK) | ||
133 | break; | ||
134 | if (--count == 0) { | ||
135 | pr_err("Timeout reading ACP PGFSM status\n"); | ||
136 | return -ETIMEDOUT; | ||
137 | } | ||
138 | udelay(100); | ||
139 | } | ||
140 | |||
141 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); | ||
142 | |||
143 | val |= ACP_TILE_OFF_RETAIN_REG_MASK; | ||
144 | cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); | ||
145 | } | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | /* power on a tile/block within ACP */ | ||
150 | static int acp_resume_tile(void *cgs_dev, int tile) | ||
151 | { | ||
152 | u32 val = 0; | ||
153 | u32 count = 0; | ||
154 | |||
155 | if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) { | ||
156 | pr_err("Invalid ACP tile to resume\n"); | ||
157 | return -1; | ||
158 | } | ||
159 | |||
160 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile); | ||
161 | val = val & ACP_TILE_ON_MASK; | ||
162 | |||
163 | if (val != 0x0) { | ||
164 | cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG, | ||
165 | 0x600 + tile); | ||
166 | count = ACP_TIMEOUT_LOOP; | ||
167 | while (true) { | ||
168 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 | ||
169 | + tile); | ||
170 | val = val & ACP_TILE_ON_MASK; | ||
171 | if (val == 0x0) | ||
172 | break; | ||
173 | if (--count == 0) { | ||
174 | pr_err("Timeout reading ACP PGFSM status\n"); | ||
175 | return -ETIMEDOUT; | ||
176 | } | ||
177 | udelay(100); | ||
178 | } | ||
179 | val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); | ||
180 | if (tile == ACP_TILE_P1) | ||
181 | val = val & (ACP_TILE_P1_MASK); | ||
182 | else if (tile == ACP_TILE_P2) | ||
183 | val = val & (ACP_TILE_P2_MASK); | ||
184 | |||
185 | cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); | ||
186 | } | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | struct acp_pm_domain { | ||
191 | void *cgs_dev; | ||
192 | struct generic_pm_domain gpd; | ||
193 | }; | ||
194 | |||
195 | static int acp_poweroff(struct generic_pm_domain *genpd) | ||
196 | { | ||
197 | int i, ret; | ||
198 | struct acp_pm_domain *apd; | ||
199 | |||
200 | apd = container_of(genpd, struct acp_pm_domain, gpd); | ||
201 | if (apd != NULL) { | ||
202 | /* Donot return abruptly if any of power tile fails to suspend. | ||
203 | * Log it and continue powering off other tile | ||
204 | */ | ||
205 | for (i = 4; i >= 0 ; i--) { | ||
206 | ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); | ||
207 | if (ret) | ||
208 | pr_err("ACP tile %d tile suspend failed\n", i); | ||
209 | } | ||
210 | } | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static int acp_poweron(struct generic_pm_domain *genpd) | ||
215 | { | ||
216 | int i, ret; | ||
217 | struct acp_pm_domain *apd; | ||
218 | |||
219 | apd = container_of(genpd, struct acp_pm_domain, gpd); | ||
220 | if (apd != NULL) { | ||
221 | for (i = 0; i < 2; i++) { | ||
222 | ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i); | ||
223 | if (ret) { | ||
224 | pr_err("ACP tile %d resume failed\n", i); | ||
225 | break; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | /* Disable DSPs which are not going to be used */ | ||
230 | for (i = 0; i < 3; i++) { | ||
231 | ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i); | ||
232 | /* Continue suspending other DSP, even if one fails */ | ||
233 | if (ret) | ||
234 | pr_err("ACP DSP %d suspend failed\n", i); | ||
235 | } | ||
236 | } | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | static struct device *get_mfd_cell_dev(const char *device_name, int r) | ||
241 | { | ||
242 | char auto_dev_name[25]; | ||
243 | char buf[8]; | ||
244 | struct device *dev; | ||
245 | |||
246 | sprintf(buf, ".%d.auto", r); | ||
247 | strcpy(auto_dev_name, device_name); | ||
248 | strcat(auto_dev_name, buf); | ||
249 | dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name); | ||
250 | dev_info(dev, "device %s added to pm domain\n", auto_dev_name); | ||
251 | |||
252 | return dev; | ||
253 | } | ||
254 | |||
255 | /** | ||
256 | * acp_hw_init - start and test ACP block | ||
257 | * | ||
258 | * @adev: amdgpu_device pointer | ||
259 | * | ||
260 | */ | ||
261 | static int acp_hw_init(void *handle) | ||
262 | { | ||
263 | int r, i; | ||
264 | uint64_t acp_base; | ||
265 | struct device *dev; | ||
266 | struct i2s_platform_data *i2s_pdata; | ||
267 | |||
268 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
269 | |||
270 | const struct amdgpu_ip_block_version *ip_version = | ||
271 | amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); | ||
272 | |||
273 | if (!ip_version) | ||
274 | return -EINVAL; | ||
275 | |||
276 | r = amd_acp_hw_init(adev->acp.cgs_device, | ||
277 | ip_version->major, ip_version->minor); | ||
278 | /* -ENODEV means board uses AZ rather than ACP */ | ||
279 | if (r == -ENODEV) | ||
280 | return 0; | ||
281 | else if (r) | ||
282 | return r; | ||
283 | |||
284 | r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, | ||
285 | 0x5289, 0, &acp_base); | ||
286 | if (r == -ENODEV) | ||
287 | return 0; | ||
288 | else if (r) | ||
289 | return r; | ||
290 | |||
291 | adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); | ||
292 | if (adev->acp.acp_genpd == NULL) | ||
293 | return -ENOMEM; | ||
294 | |||
295 | adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; | ||
296 | adev->acp.acp_genpd->gpd.power_off = acp_poweroff; | ||
297 | adev->acp.acp_genpd->gpd.power_on = acp_poweron; | ||
298 | |||
299 | |||
300 | adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; | ||
301 | |||
302 | pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); | ||
303 | |||
304 | adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS, | ||
305 | GFP_KERNEL); | ||
306 | |||
307 | if (adev->acp.acp_cell == NULL) | ||
308 | return -ENOMEM; | ||
309 | |||
310 | adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL); | ||
311 | |||
312 | if (adev->acp.acp_res == NULL) { | ||
313 | kfree(adev->acp.acp_cell); | ||
314 | return -ENOMEM; | ||
315 | } | ||
316 | |||
317 | i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL); | ||
318 | if (i2s_pdata == NULL) { | ||
319 | kfree(adev->acp.acp_res); | ||
320 | kfree(adev->acp.acp_cell); | ||
321 | return -ENOMEM; | ||
322 | } | ||
323 | |||
324 | i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; | ||
325 | i2s_pdata[0].cap = DWC_I2S_PLAY; | ||
326 | i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; | ||
327 | i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; | ||
328 | i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; | ||
329 | |||
330 | i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | | ||
331 | DW_I2S_QUIRK_COMP_PARAM1; | ||
332 | i2s_pdata[1].cap = DWC_I2S_RECORD; | ||
333 | i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; | ||
334 | i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; | ||
335 | i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; | ||
336 | |||
337 | adev->acp.acp_res[0].name = "acp2x_dma"; | ||
338 | adev->acp.acp_res[0].flags = IORESOURCE_MEM; | ||
339 | adev->acp.acp_res[0].start = acp_base; | ||
340 | adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; | ||
341 | |||
342 | adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; | ||
343 | adev->acp.acp_res[1].flags = IORESOURCE_MEM; | ||
344 | adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; | ||
345 | adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; | ||
346 | |||
347 | adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; | ||
348 | adev->acp.acp_res[2].flags = IORESOURCE_MEM; | ||
349 | adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; | ||
350 | adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; | ||
351 | |||
352 | adev->acp.acp_res[3].name = "acp2x_dma_irq"; | ||
353 | adev->acp.acp_res[3].flags = IORESOURCE_IRQ; | ||
354 | adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162); | ||
355 | adev->acp.acp_res[3].end = adev->acp.acp_res[3].start; | ||
356 | |||
357 | adev->acp.acp_cell[0].name = "acp_audio_dma"; | ||
358 | adev->acp.acp_cell[0].num_resources = 4; | ||
359 | adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; | ||
360 | |||
361 | adev->acp.acp_cell[1].name = "designware-i2s"; | ||
362 | adev->acp.acp_cell[1].num_resources = 1; | ||
363 | adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; | ||
364 | adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; | ||
365 | adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); | ||
366 | |||
367 | adev->acp.acp_cell[2].name = "designware-i2s"; | ||
368 | adev->acp.acp_cell[2].num_resources = 1; | ||
369 | adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; | ||
370 | adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; | ||
371 | adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); | ||
372 | |||
373 | r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, | ||
374 | ACP_DEVS); | ||
375 | if (r) | ||
376 | return r; | ||
377 | |||
378 | for (i = 0; i < ACP_DEVS ; i++) { | ||
379 | dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); | ||
380 | r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); | ||
381 | if (r) { | ||
382 | dev_err(dev, "Failed to add dev to genpd\n"); | ||
383 | return r; | ||
384 | } | ||
385 | } | ||
386 | |||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | /** | ||
391 | * acp_hw_fini - stop the hardware block | ||
392 | * | ||
393 | * @adev: amdgpu_device pointer | ||
394 | * | ||
395 | */ | ||
396 | static int acp_hw_fini(void *handle) | ||
397 | { | ||
398 | int i, ret; | ||
399 | struct device *dev; | ||
400 | |||
401 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
402 | |||
403 | for (i = 0; i < ACP_DEVS ; i++) { | ||
404 | dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); | ||
405 | ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); | ||
406 | /* If removal fails, dont giveup and try rest */ | ||
407 | if (ret) | ||
408 | dev_err(dev, "remove dev from genpd failed\n"); | ||
409 | } | ||
410 | |||
411 | mfd_remove_devices(adev->acp.parent); | ||
412 | kfree(adev->acp.acp_res); | ||
413 | kfree(adev->acp.acp_genpd); | ||
414 | kfree(adev->acp.acp_cell); | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | static int acp_suspend(void *handle) | ||
420 | { | ||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | static int acp_resume(void *handle) | ||
425 | { | ||
426 | int i, ret; | ||
427 | struct acp_pm_domain *apd; | ||
428 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
429 | |||
430 | /* SMU block will power on ACP irrespective of ACP runtime status. | ||
431 | * Power off explicitly based on genpd ACP runtime status so that ACP | ||
432 | * hw and ACP-genpd status are in sync. | ||
433 | * 'suspend_power_off' represents "Power status before system suspend" | ||
434 | */ | ||
435 | if (adev->acp.acp_genpd->gpd.suspend_power_off == true) { | ||
436 | apd = container_of(&adev->acp.acp_genpd->gpd, | ||
437 | struct acp_pm_domain, gpd); | ||
438 | |||
439 | for (i = 4; i >= 0 ; i--) { | ||
440 | ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); | ||
441 | if (ret) | ||
442 | pr_err("ACP tile %d tile suspend failed\n", i); | ||
443 | } | ||
444 | } | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | static int acp_early_init(void *handle) | ||
449 | { | ||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static bool acp_is_idle(void *handle) | ||
454 | { | ||
455 | return true; | ||
456 | } | ||
457 | |||
458 | static int acp_wait_for_idle(void *handle) | ||
459 | { | ||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | static int acp_soft_reset(void *handle) | ||
464 | { | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | static void acp_print_status(void *handle) | ||
469 | { | ||
470 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
471 | |||
472 | dev_info(adev->dev, "ACP STATUS\n"); | ||
473 | } | ||
474 | |||
475 | static int acp_set_clockgating_state(void *handle, | ||
476 | enum amd_clockgating_state state) | ||
477 | { | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | static int acp_set_powergating_state(void *handle, | ||
482 | enum amd_powergating_state state) | ||
483 | { | ||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | const struct amd_ip_funcs acp_ip_funcs = { | ||
488 | .early_init = acp_early_init, | ||
489 | .late_init = NULL, | ||
490 | .sw_init = acp_sw_init, | ||
491 | .sw_fini = acp_sw_fini, | ||
492 | .hw_init = acp_hw_init, | ||
493 | .hw_fini = acp_hw_fini, | ||
494 | .suspend = acp_suspend, | ||
495 | .resume = acp_resume, | ||
496 | .is_idle = acp_is_idle, | ||
497 | .wait_for_idle = acp_wait_for_idle, | ||
498 | .soft_reset = acp_soft_reset, | ||
499 | .print_status = acp_print_status, | ||
500 | .set_clockgating_state = acp_set_clockgating_state, | ||
501 | .set_powergating_state = acp_set_powergating_state, | ||
502 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h new file mode 100644 index 000000000000..f6e32a639107 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * Copyright 2015 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Authors: AMD | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #ifndef __AMDGPU_ACP_H__ | ||
27 | #define __AMDGPU_ACP_H__ | ||
28 | |||
29 | #include <linux/mfd/core.h> | ||
30 | |||
31 | struct amdgpu_acp { | ||
32 | struct device *parent; | ||
33 | void *cgs_device; | ||
34 | struct amd_acp_private *private; | ||
35 | struct mfd_cell *acp_cell; | ||
36 | struct resource *acp_res; | ||
37 | struct acp_pm_domain *acp_genpd; | ||
38 | }; | ||
39 | |||
40 | extern const struct amd_ip_funcs acp_ip_funcs; | ||
41 | |||
42 | #endif /* __AMDGPU_ACP_H__ */ | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9416e0f5c1db..84b0ce39ee14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | |||
@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, | |||
1514 | return -EINVAL; | 1514 | return -EINVAL; |
1515 | } | 1515 | } |
1516 | 1516 | ||
1517 | bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev) | ||
1518 | { | ||
1519 | int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo); | ||
1520 | u8 frev, crev; | ||
1521 | u16 data_offset, size; | ||
1522 | |||
1523 | if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size, | ||
1524 | &frev, &crev, &data_offset)) | ||
1525 | return true; | ||
1526 | |||
1527 | return false; | ||
1528 | } | ||
1529 | |||
1517 | void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) | 1530 | void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) |
1518 | { | 1531 | { |
1519 | uint32_t bios_6_scratch; | 1532 | uint32_t bios_6_scratch; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 0ebb959ea435..9e1442053fe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | |||
@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, | |||
196 | u8 module_index, | 196 | u8 module_index, |
197 | struct atom_mc_reg_table *reg_table); | 197 | struct atom_mc_reg_table *reg_table); |
198 | 198 | ||
199 | bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev); | ||
200 | |||
199 | void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); | 201 | void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); |
200 | void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev); | 202 | void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev); |
201 | void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); | 203 | void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index f82a2dd83874..90d6fc1618aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | |||
@@ -32,6 +32,9 @@ | |||
32 | #include "amdgpu.h" | 32 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 33 | #include "amdgpu_trace.h" |
34 | 34 | ||
35 | #define AMDGPU_BO_LIST_MAX_PRIORITY 32u | ||
36 | #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) | ||
37 | |||
35 | static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, | 38 | static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, |
36 | struct amdgpu_bo_list **result, | 39 | struct amdgpu_bo_list **result, |
37 | int *id) | 40 | int *id) |
@@ -90,6 +93,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, | |||
90 | 93 | ||
91 | bool has_userptr = false; | 94 | bool has_userptr = false; |
92 | unsigned i; | 95 | unsigned i; |
96 | int r; | ||
93 | 97 | ||
94 | array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); | 98 | array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); |
95 | if (!array) | 99 | if (!array) |
@@ -99,31 +103,34 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, | |||
99 | for (i = 0; i < num_entries; ++i) { | 103 | for (i = 0; i < num_entries; ++i) { |
100 | struct amdgpu_bo_list_entry *entry = &array[i]; | 104 | struct amdgpu_bo_list_entry *entry = &array[i]; |
101 | struct drm_gem_object *gobj; | 105 | struct drm_gem_object *gobj; |
106 | struct mm_struct *usermm; | ||
102 | 107 | ||
103 | gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle); | 108 | gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle); |
104 | if (!gobj) | 109 | if (!gobj) { |
110 | r = -ENOENT; | ||
105 | goto error_free; | 111 | goto error_free; |
112 | } | ||
106 | 113 | ||
107 | entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); | 114 | entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); |
108 | drm_gem_object_unreference_unlocked(gobj); | 115 | drm_gem_object_unreference_unlocked(gobj); |
109 | entry->priority = info[i].bo_priority; | 116 | entry->priority = min(info[i].bo_priority, |
110 | entry->prefered_domains = entry->robj->initial_domain; | 117 | AMDGPU_BO_LIST_MAX_PRIORITY); |
111 | entry->allowed_domains = entry->prefered_domains; | 118 | usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm); |
112 | if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) | 119 | if (usermm) { |
113 | entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; | 120 | if (usermm != current->mm) { |
114 | if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) { | 121 | r = -EPERM; |
122 | goto error_free; | ||
123 | } | ||
115 | has_userptr = true; | 124 | has_userptr = true; |
116 | entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; | ||
117 | entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; | ||
118 | } | 125 | } |
119 | entry->tv.bo = &entry->robj->tbo; | 126 | entry->tv.bo = &entry->robj->tbo; |
120 | entry->tv.shared = true; | 127 | entry->tv.shared = true; |
121 | 128 | ||
122 | if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) | 129 | if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) |
123 | gds_obj = entry->robj; | 130 | gds_obj = entry->robj; |
124 | if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) | 131 | if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) |
125 | gws_obj = entry->robj; | 132 | gws_obj = entry->robj; |
126 | if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) | 133 | if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) |
127 | oa_obj = entry->robj; | 134 | oa_obj = entry->robj; |
128 | 135 | ||
129 | trace_amdgpu_bo_list_set(list, entry->robj); | 136 | trace_amdgpu_bo_list_set(list, entry->robj); |
@@ -145,7 +152,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, | |||
145 | 152 | ||
146 | error_free: | 153 | error_free: |
147 | drm_free_large(array); | 154 | drm_free_large(array); |
148 | return -ENOENT; | 155 | return r; |
149 | } | 156 | } |
150 | 157 | ||
151 | struct amdgpu_bo_list * | 158 | struct amdgpu_bo_list * |
@@ -161,6 +168,36 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) | |||
161 | return result; | 168 | return result; |
162 | } | 169 | } |
163 | 170 | ||
171 | void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, | ||
172 | struct list_head *validated) | ||
173 | { | ||
174 | /* This is based on the bucket sort with O(n) time complexity. | ||
175 | * An item with priority "i" is added to bucket[i]. The lists are then | ||
176 | * concatenated in descending order. | ||
177 | */ | ||
178 | struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS]; | ||
179 | unsigned i; | ||
180 | |||
181 | for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) | ||
182 | INIT_LIST_HEAD(&bucket[i]); | ||
183 | |||
184 | /* Since buffers which appear sooner in the relocation list are | ||
185 | * likely to be used more often than buffers which appear later | ||
186 | * in the list, the sort mustn't change the ordering of buffers | ||
187 | * with the same priority, i.e. it must be stable. | ||
188 | */ | ||
189 | for (i = 0; i < list->num_entries; i++) { | ||
190 | unsigned priority = list->array[i].priority; | ||
191 | |||
192 | list_add_tail(&list->array[i].tv.head, | ||
193 | &bucket[priority]); | ||
194 | } | ||
195 | |||
196 | /* Connect the sorted buckets in the output list. */ | ||
197 | for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) | ||
198 | list_splice(&bucket[i], validated); | ||
199 | } | ||
200 | |||
164 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list) | 201 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list) |
165 | { | 202 | { |
166 | mutex_unlock(&list->lock); | 203 | mutex_unlock(&list->lock); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b882e8175615..52c3eb96b199 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -30,47 +30,6 @@ | |||
30 | #include "amdgpu.h" | 30 | #include "amdgpu.h" |
31 | #include "amdgpu_trace.h" | 31 | #include "amdgpu_trace.h" |
32 | 32 | ||
33 | #define AMDGPU_CS_MAX_PRIORITY 32u | ||
34 | #define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1) | ||
35 | |||
36 | /* This is based on the bucket sort with O(n) time complexity. | ||
37 | * An item with priority "i" is added to bucket[i]. The lists are then | ||
38 | * concatenated in descending order. | ||
39 | */ | ||
40 | struct amdgpu_cs_buckets { | ||
41 | struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; | ||
42 | }; | ||
43 | |||
44 | static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) | ||
45 | { | ||
46 | unsigned i; | ||
47 | |||
48 | for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) | ||
49 | INIT_LIST_HEAD(&b->bucket[i]); | ||
50 | } | ||
51 | |||
52 | static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b, | ||
53 | struct list_head *item, unsigned priority) | ||
54 | { | ||
55 | /* Since buffers which appear sooner in the relocation list are | ||
56 | * likely to be used more often than buffers which appear later | ||
57 | * in the list, the sort mustn't change the ordering of buffers | ||
58 | * with the same priority, i.e. it must be stable. | ||
59 | */ | ||
60 | list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]); | ||
61 | } | ||
62 | |||
63 | static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b, | ||
64 | struct list_head *out_list) | ||
65 | { | ||
66 | unsigned i; | ||
67 | |||
68 | /* Connect the sorted buckets in the output list. */ | ||
69 | for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) { | ||
70 | list_splice(&b->bucket[i], out_list); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | 33 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, |
75 | u32 ip_instance, u32 ring, | 34 | u32 ip_instance, u32 ring, |
76 | struct amdgpu_ring **out_ring) | 35 | struct amdgpu_ring **out_ring) |
@@ -128,6 +87,7 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | |||
128 | } | 87 | } |
129 | 88 | ||
130 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | 89 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, |
90 | struct amdgpu_user_fence *uf, | ||
131 | struct drm_amdgpu_cs_chunk_fence *fence_data) | 91 | struct drm_amdgpu_cs_chunk_fence *fence_data) |
132 | { | 92 | { |
133 | struct drm_gem_object *gobj; | 93 | struct drm_gem_object *gobj; |
@@ -139,17 +99,15 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | |||
139 | if (gobj == NULL) | 99 | if (gobj == NULL) |
140 | return -EINVAL; | 100 | return -EINVAL; |
141 | 101 | ||
142 | p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); | 102 | uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); |
143 | p->uf.offset = fence_data->offset; | 103 | uf->offset = fence_data->offset; |
144 | 104 | ||
145 | if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) { | 105 | if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { |
146 | drm_gem_object_unreference_unlocked(gobj); | 106 | drm_gem_object_unreference_unlocked(gobj); |
147 | return -EINVAL; | 107 | return -EINVAL; |
148 | } | 108 | } |
149 | 109 | ||
150 | p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo); | 110 | p->uf_entry.robj = amdgpu_bo_ref(uf->bo); |
151 | p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT; | ||
152 | p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT; | ||
153 | p->uf_entry.priority = 0; | 111 | p->uf_entry.priority = 0; |
154 | p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; | 112 | p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; |
155 | p->uf_entry.tv.shared = true; | 113 | p->uf_entry.tv.shared = true; |
@@ -160,11 +118,12 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | |||
160 | 118 | ||
161 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | 119 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) |
162 | { | 120 | { |
121 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | ||
163 | union drm_amdgpu_cs *cs = data; | 122 | union drm_amdgpu_cs *cs = data; |
164 | uint64_t *chunk_array_user; | 123 | uint64_t *chunk_array_user; |
165 | uint64_t *chunk_array; | 124 | uint64_t *chunk_array; |
166 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 125 | struct amdgpu_user_fence uf = {}; |
167 | unsigned size; | 126 | unsigned size, num_ibs = 0; |
168 | int i; | 127 | int i; |
169 | int ret; | 128 | int ret; |
170 | 129 | ||
@@ -181,15 +140,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
181 | goto free_chunk; | 140 | goto free_chunk; |
182 | } | 141 | } |
183 | 142 | ||
184 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | ||
185 | |||
186 | /* get chunks */ | 143 | /* get chunks */ |
187 | INIT_LIST_HEAD(&p->validated); | ||
188 | chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); | 144 | chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); |
189 | if (copy_from_user(chunk_array, chunk_array_user, | 145 | if (copy_from_user(chunk_array, chunk_array_user, |
190 | sizeof(uint64_t)*cs->in.num_chunks)) { | 146 | sizeof(uint64_t)*cs->in.num_chunks)) { |
191 | ret = -EFAULT; | 147 | ret = -EFAULT; |
192 | goto put_bo_list; | 148 | goto put_ctx; |
193 | } | 149 | } |
194 | 150 | ||
195 | p->nchunks = cs->in.num_chunks; | 151 | p->nchunks = cs->in.num_chunks; |
@@ -197,7 +153,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
197 | GFP_KERNEL); | 153 | GFP_KERNEL); |
198 | if (!p->chunks) { | 154 | if (!p->chunks) { |
199 | ret = -ENOMEM; | 155 | ret = -ENOMEM; |
200 | goto put_bo_list; | 156 | goto put_ctx; |
201 | } | 157 | } |
202 | 158 | ||
203 | for (i = 0; i < p->nchunks; i++) { | 159 | for (i = 0; i < p->nchunks; i++) { |
@@ -217,7 +173,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
217 | 173 | ||
218 | size = p->chunks[i].length_dw; | 174 | size = p->chunks[i].length_dw; |
219 | cdata = (void __user *)(unsigned long)user_chunk.chunk_data; | 175 | cdata = (void __user *)(unsigned long)user_chunk.chunk_data; |
220 | p->chunks[i].user_ptr = cdata; | ||
221 | 176 | ||
222 | p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); | 177 | p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); |
223 | if (p->chunks[i].kdata == NULL) { | 178 | if (p->chunks[i].kdata == NULL) { |
@@ -233,7 +188,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
233 | 188 | ||
234 | switch (p->chunks[i].chunk_id) { | 189 | switch (p->chunks[i].chunk_id) { |
235 | case AMDGPU_CHUNK_ID_IB: | 190 | case AMDGPU_CHUNK_ID_IB: |
236 | p->num_ibs++; | 191 | ++num_ibs; |
237 | break; | 192 | break; |
238 | 193 | ||
239 | case AMDGPU_CHUNK_ID_FENCE: | 194 | case AMDGPU_CHUNK_ID_FENCE: |
@@ -243,7 +198,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
243 | goto free_partial_kdata; | 198 | goto free_partial_kdata; |
244 | } | 199 | } |
245 | 200 | ||
246 | ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata); | 201 | ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); |
247 | if (ret) | 202 | if (ret) |
248 | goto free_partial_kdata; | 203 | goto free_partial_kdata; |
249 | 204 | ||
@@ -258,12 +213,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
258 | } | 213 | } |
259 | } | 214 | } |
260 | 215 | ||
261 | 216 | ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); | |
262 | p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); | 217 | if (ret) |
263 | if (!p->ibs) { | ||
264 | ret = -ENOMEM; | ||
265 | goto free_all_kdata; | 218 | goto free_all_kdata; |
266 | } | 219 | |
220 | p->job->uf = uf; | ||
267 | 221 | ||
268 | kfree(chunk_array); | 222 | kfree(chunk_array); |
269 | return 0; | 223 | return 0; |
@@ -274,9 +228,7 @@ free_partial_kdata: | |||
274 | for (; i >= 0; i--) | 228 | for (; i >= 0; i--) |
275 | drm_free_large(p->chunks[i].kdata); | 229 | drm_free_large(p->chunks[i].kdata); |
276 | kfree(p->chunks); | 230 | kfree(p->chunks); |
277 | put_bo_list: | 231 | put_ctx: |
278 | if (p->bo_list) | ||
279 | amdgpu_bo_list_put(p->bo_list); | ||
280 | amdgpu_ctx_put(p->ctx); | 232 | amdgpu_ctx_put(p->ctx); |
281 | free_chunk: | 233 | free_chunk: |
282 | kfree(chunk_array); | 234 | kfree(chunk_array); |
@@ -336,80 +288,76 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) | |||
336 | return max(bytes_moved_threshold, 1024*1024ull); | 288 | return max(bytes_moved_threshold, 1024*1024ull); |
337 | } | 289 | } |
338 | 290 | ||
339 | int amdgpu_cs_list_validate(struct amdgpu_device *adev, | 291 | int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, |
340 | struct amdgpu_vm *vm, | ||
341 | struct list_head *validated) | 292 | struct list_head *validated) |
342 | { | 293 | { |
343 | struct amdgpu_bo_list_entry *lobj; | 294 | struct amdgpu_bo_list_entry *lobj; |
344 | struct amdgpu_bo *bo; | 295 | u64 initial_bytes_moved; |
345 | u64 bytes_moved = 0, initial_bytes_moved; | ||
346 | u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev); | ||
347 | int r; | 296 | int r; |
348 | 297 | ||
349 | list_for_each_entry(lobj, validated, tv.head) { | 298 | list_for_each_entry(lobj, validated, tv.head) { |
350 | bo = lobj->robj; | 299 | struct amdgpu_bo *bo = lobj->robj; |
351 | if (!bo->pin_count) { | 300 | struct mm_struct *usermm; |
352 | u32 domain = lobj->prefered_domains; | 301 | uint32_t domain; |
353 | u32 current_domain = | ||
354 | amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); | ||
355 | |||
356 | /* Check if this buffer will be moved and don't move it | ||
357 | * if we have moved too many buffers for this IB already. | ||
358 | * | ||
359 | * Note that this allows moving at least one buffer of | ||
360 | * any size, because it doesn't take the current "bo" | ||
361 | * into account. We don't want to disallow buffer moves | ||
362 | * completely. | ||
363 | */ | ||
364 | if ((lobj->allowed_domains & current_domain) != 0 && | ||
365 | (domain & current_domain) == 0 && /* will be moved */ | ||
366 | bytes_moved > bytes_moved_threshold) { | ||
367 | /* don't move it */ | ||
368 | domain = current_domain; | ||
369 | } | ||
370 | 302 | ||
371 | retry: | 303 | usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); |
372 | amdgpu_ttm_placement_from_domain(bo, domain); | 304 | if (usermm && usermm != current->mm) |
373 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); | 305 | return -EPERM; |
374 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | 306 | |
375 | bytes_moved += atomic64_read(&adev->num_bytes_moved) - | 307 | if (bo->pin_count) |
376 | initial_bytes_moved; | 308 | continue; |
377 | 309 | ||
378 | if (unlikely(r)) { | 310 | /* Avoid moving this one if we have moved too many buffers |
379 | if (r != -ERESTARTSYS && domain != lobj->allowed_domains) { | 311 | * for this IB already. |
380 | domain = lobj->allowed_domains; | 312 | * |
381 | goto retry; | 313 | * Note that this allows moving at least one buffer of |
382 | } | 314 | * any size, because it doesn't take the current "bo" |
383 | return r; | 315 | * into account. We don't want to disallow buffer moves |
316 | * completely. | ||
317 | */ | ||
318 | if (p->bytes_moved <= p->bytes_moved_threshold) | ||
319 | domain = bo->prefered_domains; | ||
320 | else | ||
321 | domain = bo->allowed_domains; | ||
322 | |||
323 | retry: | ||
324 | amdgpu_ttm_placement_from_domain(bo, domain); | ||
325 | initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); | ||
326 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | ||
327 | p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - | ||
328 | initial_bytes_moved; | ||
329 | |||
330 | if (unlikely(r)) { | ||
331 | if (r != -ERESTARTSYS && domain != bo->allowed_domains) { | ||
332 | domain = bo->allowed_domains; | ||
333 | goto retry; | ||
384 | } | 334 | } |
335 | return r; | ||
385 | } | 336 | } |
386 | lobj->bo_va = amdgpu_vm_bo_find(vm, bo); | ||
387 | } | 337 | } |
388 | return 0; | 338 | return 0; |
389 | } | 339 | } |
390 | 340 | ||
391 | static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) | 341 | static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, |
342 | union drm_amdgpu_cs *cs) | ||
392 | { | 343 | { |
393 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 344 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
394 | struct amdgpu_cs_buckets buckets; | ||
395 | struct list_head duplicates; | 345 | struct list_head duplicates; |
396 | bool need_mmap_lock = false; | 346 | bool need_mmap_lock = false; |
397 | int i, r; | 347 | int r; |
398 | 348 | ||
349 | INIT_LIST_HEAD(&p->validated); | ||
350 | |||
351 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | ||
399 | if (p->bo_list) { | 352 | if (p->bo_list) { |
400 | need_mmap_lock = p->bo_list->has_userptr; | 353 | need_mmap_lock = p->bo_list->has_userptr; |
401 | amdgpu_cs_buckets_init(&buckets); | 354 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); |
402 | for (i = 0; i < p->bo_list->num_entries; i++) | ||
403 | amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head, | ||
404 | p->bo_list->array[i].priority); | ||
405 | |||
406 | amdgpu_cs_buckets_get_list(&buckets, &p->validated); | ||
407 | } | 355 | } |
408 | 356 | ||
409 | INIT_LIST_HEAD(&duplicates); | 357 | INIT_LIST_HEAD(&duplicates); |
410 | amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); | 358 | amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); |
411 | 359 | ||
412 | if (p->uf.bo) | 360 | if (p->job->uf.bo) |
413 | list_add(&p->uf_entry.tv.head, &p->validated); | 361 | list_add(&p->uf_entry.tv.head, &p->validated); |
414 | 362 | ||
415 | if (need_mmap_lock) | 363 | if (need_mmap_lock) |
@@ -421,11 +369,27 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) | |||
421 | 369 | ||
422 | amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); | 370 | amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); |
423 | 371 | ||
424 | r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); | 372 | p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); |
373 | p->bytes_moved = 0; | ||
374 | |||
375 | r = amdgpu_cs_list_validate(p, &duplicates); | ||
376 | if (r) | ||
377 | goto error_validate; | ||
378 | |||
379 | r = amdgpu_cs_list_validate(p, &p->validated); | ||
425 | if (r) | 380 | if (r) |
426 | goto error_validate; | 381 | goto error_validate; |
427 | 382 | ||
428 | r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); | 383 | if (p->bo_list) { |
384 | struct amdgpu_vm *vm = &fpriv->vm; | ||
385 | unsigned i; | ||
386 | |||
387 | for (i = 0; i < p->bo_list->num_entries; i++) { | ||
388 | struct amdgpu_bo *bo = p->bo_list->array[i].robj; | ||
389 | |||
390 | p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); | ||
391 | } | ||
392 | } | ||
429 | 393 | ||
430 | error_validate: | 394 | error_validate: |
431 | if (r) { | 395 | if (r) { |
@@ -447,7 +411,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) | |||
447 | 411 | ||
448 | list_for_each_entry(e, &p->validated, tv.head) { | 412 | list_for_each_entry(e, &p->validated, tv.head) { |
449 | struct reservation_object *resv = e->robj->tbo.resv; | 413 | struct reservation_object *resv = e->robj->tbo.resv; |
450 | r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp); | 414 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); |
451 | 415 | ||
452 | if (r) | 416 | if (r) |
453 | return r; | 417 | return r; |
@@ -510,11 +474,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo | |||
510 | for (i = 0; i < parser->nchunks; i++) | 474 | for (i = 0; i < parser->nchunks; i++) |
511 | drm_free_large(parser->chunks[i].kdata); | 475 | drm_free_large(parser->chunks[i].kdata); |
512 | kfree(parser->chunks); | 476 | kfree(parser->chunks); |
513 | if (parser->ibs) | 477 | if (parser->job) |
514 | for (i = 0; i < parser->num_ibs; i++) | 478 | amdgpu_job_free(parser->job); |
515 | amdgpu_ib_free(parser->adev, &parser->ibs[i]); | ||
516 | kfree(parser->ibs); | ||
517 | amdgpu_bo_unref(&parser->uf.bo); | ||
518 | amdgpu_bo_unref(&parser->uf_entry.robj); | 479 | amdgpu_bo_unref(&parser->uf_entry.robj); |
519 | } | 480 | } |
520 | 481 | ||
@@ -530,7 +491,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, | |||
530 | if (r) | 491 | if (r) |
531 | return r; | 492 | return r; |
532 | 493 | ||
533 | r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); | 494 | r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence); |
534 | if (r) | 495 | if (r) |
535 | return r; | 496 | return r; |
536 | 497 | ||
@@ -556,14 +517,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, | |||
556 | return r; | 517 | return r; |
557 | 518 | ||
558 | f = bo_va->last_pt_update; | 519 | f = bo_va->last_pt_update; |
559 | r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); | 520 | r = amdgpu_sync_fence(adev, &p->job->sync, f); |
560 | if (r) | 521 | if (r) |
561 | return r; | 522 | return r; |
562 | } | 523 | } |
563 | 524 | ||
564 | } | 525 | } |
565 | 526 | ||
566 | r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync); | 527 | r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); |
567 | 528 | ||
568 | if (amdgpu_vm_debug && p->bo_list) { | 529 | if (amdgpu_vm_debug && p->bo_list) { |
569 | /* Invalidate all BOs to test for userspace bugs */ | 530 | /* Invalidate all BOs to test for userspace bugs */ |
@@ -581,29 +542,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, | |||
581 | } | 542 | } |
582 | 543 | ||
583 | static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | 544 | static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, |
584 | struct amdgpu_cs_parser *parser) | 545 | struct amdgpu_cs_parser *p) |
585 | { | 546 | { |
586 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; | 547 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
587 | struct amdgpu_vm *vm = &fpriv->vm; | 548 | struct amdgpu_vm *vm = &fpriv->vm; |
588 | struct amdgpu_ring *ring; | 549 | struct amdgpu_ring *ring = p->job->ring; |
589 | int i, r; | 550 | int i, r; |
590 | 551 | ||
591 | if (parser->num_ibs == 0) | ||
592 | return 0; | ||
593 | |||
594 | /* Only for UVD/VCE VM emulation */ | 552 | /* Only for UVD/VCE VM emulation */ |
595 | for (i = 0; i < parser->num_ibs; i++) { | 553 | if (ring->funcs->parse_cs) { |
596 | ring = parser->ibs[i].ring; | 554 | for (i = 0; i < p->job->num_ibs; i++) { |
597 | if (ring->funcs->parse_cs) { | 555 | r = amdgpu_ring_parse_cs(ring, p, i); |
598 | r = amdgpu_ring_parse_cs(ring, parser, i); | ||
599 | if (r) | 556 | if (r) |
600 | return r; | 557 | return r; |
601 | } | 558 | } |
602 | } | 559 | } |
603 | 560 | ||
604 | r = amdgpu_bo_vm_update_pte(parser, vm); | 561 | r = amdgpu_bo_vm_update_pte(p, vm); |
605 | if (!r) | 562 | if (!r) |
606 | amdgpu_cs_sync_rings(parser); | 563 | amdgpu_cs_sync_rings(p); |
607 | 564 | ||
608 | return r; | 565 | return r; |
609 | } | 566 | } |
@@ -626,14 +583,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
626 | int i, j; | 583 | int i, j; |
627 | int r; | 584 | int r; |
628 | 585 | ||
629 | for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) { | 586 | for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { |
630 | struct amdgpu_cs_chunk *chunk; | 587 | struct amdgpu_cs_chunk *chunk; |
631 | struct amdgpu_ib *ib; | 588 | struct amdgpu_ib *ib; |
632 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | 589 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; |
633 | struct amdgpu_ring *ring; | 590 | struct amdgpu_ring *ring; |
634 | 591 | ||
635 | chunk = &parser->chunks[i]; | 592 | chunk = &parser->chunks[i]; |
636 | ib = &parser->ibs[j]; | 593 | ib = &parser->job->ibs[j]; |
637 | chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; | 594 | chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; |
638 | 595 | ||
639 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | 596 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) |
@@ -645,6 +602,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
645 | if (r) | 602 | if (r) |
646 | return r; | 603 | return r; |
647 | 604 | ||
605 | if (parser->job->ring && parser->job->ring != ring) | ||
606 | return -EINVAL; | ||
607 | |||
608 | parser->job->ring = ring; | ||
609 | |||
648 | if (ring->funcs->parse_cs) { | 610 | if (ring->funcs->parse_cs) { |
649 | struct amdgpu_bo_va_mapping *m; | 611 | struct amdgpu_bo_va_mapping *m; |
650 | struct amdgpu_bo *aobj = NULL; | 612 | struct amdgpu_bo *aobj = NULL; |
@@ -673,7 +635,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
673 | offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; | 635 | offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; |
674 | kptr += chunk_ib->va_start - offset; | 636 | kptr += chunk_ib->va_start - offset; |
675 | 637 | ||
676 | r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib); | 638 | r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib); |
677 | if (r) { | 639 | if (r) { |
678 | DRM_ERROR("Failed to get ib !\n"); | 640 | DRM_ERROR("Failed to get ib !\n"); |
679 | return r; | 641 | return r; |
@@ -682,7 +644,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
682 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | 644 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); |
683 | amdgpu_bo_kunmap(aobj); | 645 | amdgpu_bo_kunmap(aobj); |
684 | } else { | 646 | } else { |
685 | r = amdgpu_ib_get(ring, vm, 0, ib); | 647 | r = amdgpu_ib_get(adev, vm, 0, ib); |
686 | if (r) { | 648 | if (r) { |
687 | DRM_ERROR("Failed to get ib !\n"); | 649 | DRM_ERROR("Failed to get ib !\n"); |
688 | return r; | 650 | return r; |
@@ -697,15 +659,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
697 | j++; | 659 | j++; |
698 | } | 660 | } |
699 | 661 | ||
700 | if (!parser->num_ibs) | ||
701 | return 0; | ||
702 | |||
703 | /* add GDS resources to first IB */ | 662 | /* add GDS resources to first IB */ |
704 | if (parser->bo_list) { | 663 | if (parser->bo_list) { |
705 | struct amdgpu_bo *gds = parser->bo_list->gds_obj; | 664 | struct amdgpu_bo *gds = parser->bo_list->gds_obj; |
706 | struct amdgpu_bo *gws = parser->bo_list->gws_obj; | 665 | struct amdgpu_bo *gws = parser->bo_list->gws_obj; |
707 | struct amdgpu_bo *oa = parser->bo_list->oa_obj; | 666 | struct amdgpu_bo *oa = parser->bo_list->oa_obj; |
708 | struct amdgpu_ib *ib = &parser->ibs[0]; | 667 | struct amdgpu_ib *ib = &parser->job->ibs[0]; |
709 | 668 | ||
710 | if (gds) { | 669 | if (gds) { |
711 | ib->gds_base = amdgpu_bo_gpu_offset(gds); | 670 | ib->gds_base = amdgpu_bo_gpu_offset(gds); |
@@ -721,15 +680,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
721 | } | 680 | } |
722 | } | 681 | } |
723 | /* wrap the last IB with user fence */ | 682 | /* wrap the last IB with user fence */ |
724 | if (parser->uf.bo) { | 683 | if (parser->job->uf.bo) { |
725 | struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; | 684 | struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; |
726 | 685 | ||
727 | /* UVD & VCE fw doesn't support user fences */ | 686 | /* UVD & VCE fw doesn't support user fences */ |
728 | if (ib->ring->type == AMDGPU_RING_TYPE_UVD || | 687 | if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || |
729 | ib->ring->type == AMDGPU_RING_TYPE_VCE) | 688 | parser->job->ring->type == AMDGPU_RING_TYPE_VCE) |
730 | return -EINVAL; | 689 | return -EINVAL; |
731 | 690 | ||
732 | ib->user = &parser->uf; | 691 | ib->user = &parser->job->uf; |
733 | } | 692 | } |
734 | 693 | ||
735 | return 0; | 694 | return 0; |
@@ -739,14 +698,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
739 | struct amdgpu_cs_parser *p) | 698 | struct amdgpu_cs_parser *p) |
740 | { | 699 | { |
741 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 700 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
742 | struct amdgpu_ib *ib; | ||
743 | int i, j, r; | 701 | int i, j, r; |
744 | 702 | ||
745 | if (!p->num_ibs) | ||
746 | return 0; | ||
747 | |||
748 | /* Add dependencies to first IB */ | ||
749 | ib = &p->ibs[0]; | ||
750 | for (i = 0; i < p->nchunks; ++i) { | 703 | for (i = 0; i < p->nchunks; ++i) { |
751 | struct drm_amdgpu_cs_chunk_dep *deps; | 704 | struct drm_amdgpu_cs_chunk_dep *deps; |
752 | struct amdgpu_cs_chunk *chunk; | 705 | struct amdgpu_cs_chunk *chunk; |
@@ -784,7 +737,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
784 | return r; | 737 | return r; |
785 | 738 | ||
786 | } else if (fence) { | 739 | } else if (fence) { |
787 | r = amdgpu_sync_fence(adev, &ib->sync, fence); | 740 | r = amdgpu_sync_fence(adev, &p->job->sync, |
741 | fence); | ||
788 | fence_put(fence); | 742 | fence_put(fence); |
789 | amdgpu_ctx_put(ctx); | 743 | amdgpu_ctx_put(ctx); |
790 | if (r) | 744 | if (r) |
@@ -796,15 +750,36 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
796 | return 0; | 750 | return 0; |
797 | } | 751 | } |
798 | 752 | ||
799 | static int amdgpu_cs_free_job(struct amdgpu_job *job) | 753 | static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, |
754 | union drm_amdgpu_cs *cs) | ||
800 | { | 755 | { |
801 | int i; | 756 | struct amdgpu_ring *ring = p->job->ring; |
802 | if (job->ibs) | 757 | struct amd_sched_fence *fence; |
803 | for (i = 0; i < job->num_ibs; i++) | 758 | struct amdgpu_job *job; |
804 | amdgpu_ib_free(job->adev, &job->ibs[i]); | 759 | |
805 | kfree(job->ibs); | 760 | job = p->job; |
806 | if (job->uf.bo) | 761 | p->job = NULL; |
807 | amdgpu_bo_unref(&job->uf.bo); | 762 | |
763 | job->base.sched = &ring->sched; | ||
764 | job->base.s_entity = &p->ctx->rings[ring->idx].entity; | ||
765 | job->owner = p->filp; | ||
766 | |||
767 | fence = amd_sched_fence_create(job->base.s_entity, p->filp); | ||
768 | if (!fence) { | ||
769 | amdgpu_job_free(job); | ||
770 | return -ENOMEM; | ||
771 | } | ||
772 | |||
773 | job->base.s_fence = fence; | ||
774 | p->fence = fence_get(&fence->base); | ||
775 | |||
776 | cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, | ||
777 | &fence->base); | ||
778 | job->ibs[job->num_ibs - 1].sequence = cs->out.handle; | ||
779 | |||
780 | trace_amdgpu_cs_ioctl(job); | ||
781 | amd_sched_entity_push_job(&job->base); | ||
782 | |||
808 | return 0; | 783 | return 0; |
809 | } | 784 | } |
810 | 785 | ||
@@ -829,7 +804,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
829 | r = amdgpu_cs_handle_lockup(adev, r); | 804 | r = amdgpu_cs_handle_lockup(adev, r); |
830 | return r; | 805 | return r; |
831 | } | 806 | } |
832 | r = amdgpu_cs_parser_relocs(&parser); | 807 | r = amdgpu_cs_parser_bos(&parser, data); |
833 | if (r == -ENOMEM) | 808 | if (r == -ENOMEM) |
834 | DRM_ERROR("Not enough memory for command submission!\n"); | 809 | DRM_ERROR("Not enough memory for command submission!\n"); |
835 | else if (r && r != -ERESTARTSYS) | 810 | else if (r && r != -ERESTARTSYS) |
@@ -848,68 +823,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
848 | if (r) | 823 | if (r) |
849 | goto out; | 824 | goto out; |
850 | 825 | ||
851 | for (i = 0; i < parser.num_ibs; i++) | 826 | for (i = 0; i < parser.job->num_ibs; i++) |
852 | trace_amdgpu_cs(&parser, i); | 827 | trace_amdgpu_cs(&parser, i); |
853 | 828 | ||
854 | r = amdgpu_cs_ib_vm_chunk(adev, &parser); | 829 | r = amdgpu_cs_ib_vm_chunk(adev, &parser); |
855 | if (r) | 830 | if (r) |
856 | goto out; | 831 | goto out; |
857 | 832 | ||
858 | if (amdgpu_enable_scheduler && parser.num_ibs) { | 833 | r = amdgpu_cs_submit(&parser, cs); |
859 | struct amdgpu_ring * ring = parser.ibs->ring; | ||
860 | struct amd_sched_fence *fence; | ||
861 | struct amdgpu_job *job; | ||
862 | |||
863 | job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); | ||
864 | if (!job) { | ||
865 | r = -ENOMEM; | ||
866 | goto out; | ||
867 | } | ||
868 | |||
869 | job->base.sched = &ring->sched; | ||
870 | job->base.s_entity = &parser.ctx->rings[ring->idx].entity; | ||
871 | job->adev = parser.adev; | ||
872 | job->owner = parser.filp; | ||
873 | job->free_job = amdgpu_cs_free_job; | ||
874 | |||
875 | job->ibs = parser.ibs; | ||
876 | job->num_ibs = parser.num_ibs; | ||
877 | parser.ibs = NULL; | ||
878 | parser.num_ibs = 0; | ||
879 | |||
880 | if (job->ibs[job->num_ibs - 1].user) { | ||
881 | job->uf = parser.uf; | ||
882 | job->ibs[job->num_ibs - 1].user = &job->uf; | ||
883 | parser.uf.bo = NULL; | ||
884 | } | ||
885 | |||
886 | fence = amd_sched_fence_create(job->base.s_entity, | ||
887 | parser.filp); | ||
888 | if (!fence) { | ||
889 | r = -ENOMEM; | ||
890 | amdgpu_cs_free_job(job); | ||
891 | kfree(job); | ||
892 | goto out; | ||
893 | } | ||
894 | job->base.s_fence = fence; | ||
895 | parser.fence = fence_get(&fence->base); | ||
896 | |||
897 | cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, | ||
898 | &fence->base); | ||
899 | job->ibs[job->num_ibs - 1].sequence = cs->out.handle; | ||
900 | |||
901 | trace_amdgpu_cs_ioctl(job); | ||
902 | amd_sched_entity_push_job(&job->base); | ||
903 | |||
904 | } else { | ||
905 | struct amdgpu_fence *fence; | ||
906 | |||
907 | r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs, | ||
908 | parser.filp); | ||
909 | fence = parser.ibs[parser.num_ibs - 1].fence; | ||
910 | parser.fence = fence_get(&fence->base); | ||
911 | cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; | ||
912 | } | ||
913 | 834 | ||
914 | out: | 835 | out: |
915 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); | 836 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); |
@@ -980,30 +901,36 @@ struct amdgpu_bo_va_mapping * | |||
980 | amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | 901 | amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, |
981 | uint64_t addr, struct amdgpu_bo **bo) | 902 | uint64_t addr, struct amdgpu_bo **bo) |
982 | { | 903 | { |
983 | struct amdgpu_bo_list_entry *reloc; | ||
984 | struct amdgpu_bo_va_mapping *mapping; | 904 | struct amdgpu_bo_va_mapping *mapping; |
905 | unsigned i; | ||
906 | |||
907 | if (!parser->bo_list) | ||
908 | return NULL; | ||
985 | 909 | ||
986 | addr /= AMDGPU_GPU_PAGE_SIZE; | 910 | addr /= AMDGPU_GPU_PAGE_SIZE; |
987 | 911 | ||
988 | list_for_each_entry(reloc, &parser->validated, tv.head) { | 912 | for (i = 0; i < parser->bo_list->num_entries; i++) { |
989 | if (!reloc->bo_va) | 913 | struct amdgpu_bo_list_entry *lobj; |
914 | |||
915 | lobj = &parser->bo_list->array[i]; | ||
916 | if (!lobj->bo_va) | ||
990 | continue; | 917 | continue; |
991 | 918 | ||
992 | list_for_each_entry(mapping, &reloc->bo_va->valids, list) { | 919 | list_for_each_entry(mapping, &lobj->bo_va->valids, list) { |
993 | if (mapping->it.start > addr || | 920 | if (mapping->it.start > addr || |
994 | addr > mapping->it.last) | 921 | addr > mapping->it.last) |
995 | continue; | 922 | continue; |
996 | 923 | ||
997 | *bo = reloc->bo_va->bo; | 924 | *bo = lobj->bo_va->bo; |
998 | return mapping; | 925 | return mapping; |
999 | } | 926 | } |
1000 | 927 | ||
1001 | list_for_each_entry(mapping, &reloc->bo_va->invalids, list) { | 928 | list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { |
1002 | if (mapping->it.start > addr || | 929 | if (mapping->it.start > addr || |
1003 | addr > mapping->it.last) | 930 | addr > mapping->it.last) |
1004 | continue; | 931 | continue; |
1005 | 932 | ||
1006 | *bo = reloc->bo_va->bo; | 933 | *bo = lobj->bo_va->bo; |
1007 | return mapping; | 934 | return mapping; |
1008 | } | 935 | } |
1009 | } | 936 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 17d1fb12128a..17e13621fae9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
@@ -25,8 +25,7 @@ | |||
25 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
26 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
27 | 27 | ||
28 | int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, | 28 | static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) |
29 | struct amdgpu_ctx *ctx) | ||
30 | { | 29 | { |
31 | unsigned i, j; | 30 | unsigned i, j; |
32 | int r; | 31 | int r; |
@@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, | |||
35 | ctx->adev = adev; | 34 | ctx->adev = adev; |
36 | kref_init(&ctx->refcount); | 35 | kref_init(&ctx->refcount); |
37 | spin_lock_init(&ctx->ring_lock); | 36 | spin_lock_init(&ctx->ring_lock); |
38 | ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs * | 37 | ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, |
39 | AMDGPU_MAX_RINGS, GFP_KERNEL); | 38 | sizeof(struct fence*), GFP_KERNEL); |
40 | if (!ctx->fences) | 39 | if (!ctx->fences) |
41 | return -ENOMEM; | 40 | return -ENOMEM; |
42 | 41 | ||
43 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 42 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
44 | ctx->rings[i].sequence = 1; | 43 | ctx->rings[i].sequence = 1; |
45 | ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) * | 44 | ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; |
46 | amdgpu_sched_jobs * i; | ||
47 | } | 45 | } |
48 | if (amdgpu_enable_scheduler) { | 46 | /* create context entity for each ring */ |
49 | /* create context entity for each ring */ | 47 | for (i = 0; i < adev->num_rings; i++) { |
50 | for (i = 0; i < adev->num_rings; i++) { | 48 | struct amdgpu_ring *ring = adev->rings[i]; |
51 | struct amd_sched_rq *rq; | 49 | struct amd_sched_rq *rq; |
52 | if (pri >= AMD_SCHED_MAX_PRIORITY) { | 50 | |
53 | kfree(ctx->fences); | 51 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; |
54 | return -EINVAL; | 52 | r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, |
55 | } | 53 | rq, amdgpu_sched_jobs); |
56 | rq = &adev->rings[i]->sched.sched_rq[pri]; | 54 | if (r) |
57 | r = amd_sched_entity_init(&adev->rings[i]->sched, | 55 | break; |
58 | &ctx->rings[i].entity, | 56 | } |
59 | rq, amdgpu_sched_jobs); | 57 | |
60 | if (r) | 58 | if (i < adev->num_rings) { |
61 | break; | 59 | for (j = 0; j < i; j++) |
62 | } | 60 | amd_sched_entity_fini(&adev->rings[j]->sched, |
63 | 61 | &ctx->rings[j].entity); | |
64 | if (i < adev->num_rings) { | 62 | kfree(ctx->fences); |
65 | for (j = 0; j < i; j++) | 63 | return r; |
66 | amd_sched_entity_fini(&adev->rings[j]->sched, | ||
67 | &ctx->rings[j].entity); | ||
68 | kfree(ctx->fences); | ||
69 | return r; | ||
70 | } | ||
71 | } | 64 | } |
72 | return 0; | 65 | return 0; |
73 | } | 66 | } |
74 | 67 | ||
75 | void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) | 68 | static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) |
76 | { | 69 | { |
77 | struct amdgpu_device *adev = ctx->adev; | 70 | struct amdgpu_device *adev = ctx->adev; |
78 | unsigned i, j; | 71 | unsigned i, j; |
@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) | |||
85 | fence_put(ctx->rings[i].fences[j]); | 78 | fence_put(ctx->rings[i].fences[j]); |
86 | kfree(ctx->fences); | 79 | kfree(ctx->fences); |
87 | 80 | ||
88 | if (amdgpu_enable_scheduler) { | 81 | for (i = 0; i < adev->num_rings; i++) |
89 | for (i = 0; i < adev->num_rings; i++) | 82 | amd_sched_entity_fini(&adev->rings[i]->sched, |
90 | amd_sched_entity_fini(&adev->rings[i]->sched, | 83 | &ctx->rings[i].entity); |
91 | &ctx->rings[i].entity); | ||
92 | } | ||
93 | } | 84 | } |
94 | 85 | ||
95 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | 86 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, |
@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | |||
112 | return r; | 103 | return r; |
113 | } | 104 | } |
114 | *id = (uint32_t)r; | 105 | *id = (uint32_t)r; |
115 | r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx); | 106 | r = amdgpu_ctx_init(adev, ctx); |
116 | if (r) { | 107 | if (r) { |
117 | idr_remove(&mgr->ctx_handles, *id); | 108 | idr_remove(&mgr->ctx_handles, *id); |
118 | *id = 0; | 109 | *id = 0; |
@@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, | |||
200 | id = args->in.ctx_id; | 191 | id = args->in.ctx_id; |
201 | 192 | ||
202 | switch (args->in.op) { | 193 | switch (args->in.op) { |
203 | case AMDGPU_CTX_OP_ALLOC_CTX: | 194 | case AMDGPU_CTX_OP_ALLOC_CTX: |
204 | r = amdgpu_ctx_alloc(adev, fpriv, &id); | 195 | r = amdgpu_ctx_alloc(adev, fpriv, &id); |
205 | args->out.alloc.ctx_id = id; | 196 | args->out.alloc.ctx_id = id; |
206 | break; | 197 | break; |
207 | case AMDGPU_CTX_OP_FREE_CTX: | 198 | case AMDGPU_CTX_OP_FREE_CTX: |
208 | r = amdgpu_ctx_free(fpriv, id); | 199 | r = amdgpu_ctx_free(fpriv, id); |
209 | break; | 200 | break; |
210 | case AMDGPU_CTX_OP_QUERY_STATE: | 201 | case AMDGPU_CTX_OP_QUERY_STATE: |
211 | r = amdgpu_ctx_query(adev, fpriv, id, &args->out); | 202 | r = amdgpu_ctx_query(adev, fpriv, id, &args->out); |
212 | break; | 203 | break; |
213 | default: | 204 | default: |
214 | return -EINVAL; | 205 | return -EINVAL; |
215 | } | 206 | } |
216 | 207 | ||
217 | return r; | 208 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 65531463f88e..db20d2783def 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -636,31 +636,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev) | |||
636 | } | 636 | } |
637 | 637 | ||
638 | /** | 638 | /** |
639 | * amdgpu_boot_test_post_card - check and possibly initialize the hw | ||
640 | * | ||
641 | * @adev: amdgpu_device pointer | ||
642 | * | ||
643 | * Check if the asic is initialized and if not, attempt to initialize | ||
644 | * it (all asics). | ||
645 | * Returns true if initialized or false if not. | ||
646 | */ | ||
647 | bool amdgpu_boot_test_post_card(struct amdgpu_device *adev) | ||
648 | { | ||
649 | if (amdgpu_card_posted(adev)) | ||
650 | return true; | ||
651 | |||
652 | if (adev->bios) { | ||
653 | DRM_INFO("GPU not posted. posting now...\n"); | ||
654 | if (adev->is_atom_bios) | ||
655 | amdgpu_atom_asic_init(adev->mode_info.atom_context); | ||
656 | return true; | ||
657 | } else { | ||
658 | dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); | ||
659 | return false; | ||
660 | } | ||
661 | } | ||
662 | |||
663 | /** | ||
664 | * amdgpu_dummy_page_init - init dummy page used by the driver | 639 | * amdgpu_dummy_page_init - init dummy page used by the driver |
665 | * | 640 | * |
666 | * @adev: amdgpu_device pointer | 641 | * @adev: amdgpu_device pointer |
@@ -959,12 +934,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) | |||
959 | amdgpu_sched_jobs); | 934 | amdgpu_sched_jobs); |
960 | amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); | 935 | amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); |
961 | } | 936 | } |
962 | /* vramlimit must be a power of two */ | ||
963 | if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) { | ||
964 | dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n", | ||
965 | amdgpu_vram_limit); | ||
966 | amdgpu_vram_limit = 0; | ||
967 | } | ||
968 | 937 | ||
969 | if (amdgpu_gart_size != -1) { | 938 | if (amdgpu_gart_size != -1) { |
970 | /* gtt size must be power of two and greater or equal to 32M */ | 939 | /* gtt size must be power of two and greater or equal to 32M */ |
@@ -1434,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1434 | adev->mman.buffer_funcs = NULL; | 1403 | adev->mman.buffer_funcs = NULL; |
1435 | adev->mman.buffer_funcs_ring = NULL; | 1404 | adev->mman.buffer_funcs_ring = NULL; |
1436 | adev->vm_manager.vm_pte_funcs = NULL; | 1405 | adev->vm_manager.vm_pte_funcs = NULL; |
1437 | adev->vm_manager.vm_pte_funcs_ring = NULL; | 1406 | adev->vm_manager.vm_pte_num_rings = 0; |
1438 | adev->gart.gart_funcs = NULL; | 1407 | adev->gart.gart_funcs = NULL; |
1439 | adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); | 1408 | adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); |
1440 | 1409 | ||
@@ -1455,9 +1424,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1455 | 1424 | ||
1456 | /* mutex initialization are all done here so we | 1425 | /* mutex initialization are all done here so we |
1457 | * can recall function without having locking issues */ | 1426 | * can recall function without having locking issues */ |
1458 | mutex_init(&adev->ring_lock); | 1427 | mutex_init(&adev->vm_manager.lock); |
1459 | atomic_set(&adev->irq.ih.lock, 0); | 1428 | atomic_set(&adev->irq.ih.lock, 0); |
1460 | mutex_init(&adev->gem.mutex); | ||
1461 | mutex_init(&adev->pm.mutex); | 1429 | mutex_init(&adev->pm.mutex); |
1462 | mutex_init(&adev->gfx.gpu_clock_mutex); | 1430 | mutex_init(&adev->gfx.gpu_clock_mutex); |
1463 | mutex_init(&adev->srbm_mutex); | 1431 | mutex_init(&adev->srbm_mutex); |
@@ -1531,8 +1499,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1531 | return r; | 1499 | return r; |
1532 | } | 1500 | } |
1533 | 1501 | ||
1502 | /* See if the asic supports SR-IOV */ | ||
1503 | adev->virtualization.supports_sr_iov = | ||
1504 | amdgpu_atombios_has_gpu_virtualization_table(adev); | ||
1505 | |||
1534 | /* Post card if necessary */ | 1506 | /* Post card if necessary */ |
1535 | if (!amdgpu_card_posted(adev)) { | 1507 | if (!amdgpu_card_posted(adev) || |
1508 | adev->virtualization.supports_sr_iov) { | ||
1536 | if (!adev->bios) { | 1509 | if (!adev->bios) { |
1537 | dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); | 1510 | dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); |
1538 | return -EINVAL; | 1511 | return -EINVAL; |
@@ -1577,11 +1550,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1577 | return r; | 1550 | return r; |
1578 | } | 1551 | } |
1579 | 1552 | ||
1580 | r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx); | ||
1581 | if (r) { | ||
1582 | dev_err(adev->dev, "failed to create kernel context (%d).\n", r); | ||
1583 | return r; | ||
1584 | } | ||
1585 | r = amdgpu_ib_ring_tests(adev); | 1553 | r = amdgpu_ib_ring_tests(adev); |
1586 | if (r) | 1554 | if (r) |
1587 | DRM_ERROR("ib ring test failed (%d).\n", r); | 1555 | DRM_ERROR("ib ring test failed (%d).\n", r); |
@@ -1645,7 +1613,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
1645 | adev->shutdown = true; | 1613 | adev->shutdown = true; |
1646 | /* evict vram memory */ | 1614 | /* evict vram memory */ |
1647 | amdgpu_bo_evict_vram(adev); | 1615 | amdgpu_bo_evict_vram(adev); |
1648 | amdgpu_ctx_fini(&adev->kernel_ctx); | ||
1649 | amdgpu_ib_pool_fini(adev); | 1616 | amdgpu_ib_pool_fini(adev); |
1650 | amdgpu_fence_driver_fini(adev); | 1617 | amdgpu_fence_driver_fini(adev); |
1651 | amdgpu_fbdev_fini(adev); | 1618 | amdgpu_fbdev_fini(adev); |
@@ -1889,6 +1856,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) | |||
1889 | 1856 | ||
1890 | retry: | 1857 | retry: |
1891 | r = amdgpu_asic_reset(adev); | 1858 | r = amdgpu_asic_reset(adev); |
1859 | /* post card */ | ||
1860 | amdgpu_atom_asic_init(adev->mode_info.atom_context); | ||
1861 | |||
1892 | if (!r) { | 1862 | if (!r) { |
1893 | dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); | 1863 | dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); |
1894 | r = amdgpu_resume(adev); | 1864 | r = amdgpu_resume(adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index acd066d0a805..2cb53c24dec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | |||
@@ -35,32 +35,30 @@ | |||
35 | #include <drm/drm_crtc_helper.h> | 35 | #include <drm/drm_crtc_helper.h> |
36 | #include <drm/drm_edid.h> | 36 | #include <drm/drm_edid.h> |
37 | 37 | ||
38 | static void amdgpu_flip_wait_fence(struct amdgpu_device *adev, | 38 | static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb) |
39 | struct fence **f) | ||
40 | { | 39 | { |
41 | struct amdgpu_fence *fence; | 40 | struct amdgpu_flip_work *work = |
42 | long r; | 41 | container_of(cb, struct amdgpu_flip_work, cb); |
43 | 42 | ||
44 | if (*f == NULL) | 43 | fence_put(f); |
45 | return; | 44 | schedule_work(&work->flip_work); |
45 | } | ||
46 | 46 | ||
47 | fence = to_amdgpu_fence(*f); | 47 | static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, |
48 | if (fence) { | 48 | struct fence **f) |
49 | r = fence_wait(&fence->base, false); | 49 | { |
50 | if (r == -EDEADLK) | 50 | struct fence *fence= *f; |
51 | r = amdgpu_gpu_reset(adev); | ||
52 | } else | ||
53 | r = fence_wait(*f, false); | ||
54 | 51 | ||
55 | if (r) | 52 | if (fence == NULL) |
56 | DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r); | 53 | return false; |
57 | 54 | ||
58 | /* We continue with the page flip even if we failed to wait on | ||
59 | * the fence, otherwise the DRM core and userspace will be | ||
60 | * confused about which BO the CRTC is scanning out | ||
61 | */ | ||
62 | fence_put(*f); | ||
63 | *f = NULL; | 55 | *f = NULL; |
56 | |||
57 | if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) | ||
58 | return true; | ||
59 | |||
60 | fence_put(*f); | ||
61 | return false; | ||
64 | } | 62 | } |
65 | 63 | ||
66 | static void amdgpu_flip_work_func(struct work_struct *__work) | 64 | static void amdgpu_flip_work_func(struct work_struct *__work) |
@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work) | |||
76 | int vpos, hpos, stat, min_udelay; | 74 | int vpos, hpos, stat, min_udelay; |
77 | struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; | 75 | struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; |
78 | 76 | ||
79 | amdgpu_flip_wait_fence(adev, &work->excl); | 77 | if (amdgpu_flip_handle_fence(work, &work->excl)) |
78 | return; | ||
79 | |||
80 | for (i = 0; i < work->shared_count; ++i) | 80 | for (i = 0; i < work->shared_count; ++i) |
81 | amdgpu_flip_wait_fence(adev, &work->shared[i]); | 81 | if (amdgpu_flip_handle_fence(work, &work->shared[i])) |
82 | return; | ||
82 | 83 | ||
83 | /* We borrow the event spin lock for protecting flip_status */ | 84 | /* We borrow the event spin lock for protecting flip_status */ |
84 | spin_lock_irqsave(&crtc->dev->event_lock, flags); | 85 | spin_lock_irqsave(&crtc->dev->event_lock, flags); |
@@ -118,12 +119,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work) | |||
118 | spin_lock_irqsave(&crtc->dev->event_lock, flags); | 119 | spin_lock_irqsave(&crtc->dev->event_lock, flags); |
119 | }; | 120 | }; |
120 | 121 | ||
121 | /* do the flip (mmio) */ | ||
122 | adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); | ||
123 | /* set the flip status */ | 122 | /* set the flip status */ |
124 | amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED; | 123 | amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED; |
125 | |||
126 | spin_unlock_irqrestore(&crtc->dev->event_lock, flags); | 124 | spin_unlock_irqrestore(&crtc->dev->event_lock, flags); |
125 | |||
126 | /* Do the flip (mmio) */ | ||
127 | adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); | ||
127 | } | 128 | } |
128 | 129 | ||
129 | /* | 130 | /* |
@@ -242,7 +243,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, | |||
242 | /* update crtc fb */ | 243 | /* update crtc fb */ |
243 | crtc->primary->fb = fb; | 244 | crtc->primary->fb = fb; |
244 | spin_unlock_irqrestore(&crtc->dev->event_lock, flags); | 245 | spin_unlock_irqrestore(&crtc->dev->event_lock, flags); |
245 | queue_work(amdgpu_crtc->pflip_queue, &work->flip_work); | 246 | amdgpu_flip_work_func(&work->flip_work); |
246 | return 0; | 247 | return 0; |
247 | 248 | ||
248 | vblank_cleanup: | 249 | vblank_cleanup: |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9c1af8976bef..ce79a8b605a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -69,7 +69,6 @@ int amdgpu_dpm = -1; | |||
69 | int amdgpu_smc_load_fw = 1; | 69 | int amdgpu_smc_load_fw = 1; |
70 | int amdgpu_aspm = -1; | 70 | int amdgpu_aspm = -1; |
71 | int amdgpu_runtime_pm = -1; | 71 | int amdgpu_runtime_pm = -1; |
72 | int amdgpu_hard_reset = 0; | ||
73 | unsigned amdgpu_ip_block_mask = 0xffffffff; | 72 | unsigned amdgpu_ip_block_mask = 0xffffffff; |
74 | int amdgpu_bapm = -1; | 73 | int amdgpu_bapm = -1; |
75 | int amdgpu_deep_color = 0; | 74 | int amdgpu_deep_color = 0; |
@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1; | |||
78 | int amdgpu_vm_fault_stop = 0; | 77 | int amdgpu_vm_fault_stop = 0; |
79 | int amdgpu_vm_debug = 0; | 78 | int amdgpu_vm_debug = 0; |
80 | int amdgpu_exp_hw_support = 0; | 79 | int amdgpu_exp_hw_support = 0; |
81 | int amdgpu_enable_scheduler = 1; | ||
82 | int amdgpu_sched_jobs = 32; | 80 | int amdgpu_sched_jobs = 32; |
83 | int amdgpu_sched_hw_submission = 2; | 81 | int amdgpu_sched_hw_submission = 2; |
84 | int amdgpu_enable_semaphores = 0; | ||
85 | int amdgpu_powerplay = -1; | 82 | int amdgpu_powerplay = -1; |
86 | 83 | ||
87 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); | 84 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); |
@@ -126,9 +123,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); | |||
126 | MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); | 123 | MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); |
127 | module_param_named(runpm, amdgpu_runtime_pm, int, 0444); | 124 | module_param_named(runpm, amdgpu_runtime_pm, int, 0444); |
128 | 125 | ||
129 | MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))"); | ||
130 | module_param_named(hard_reset, amdgpu_hard_reset, int, 0444); | ||
131 | |||
132 | MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); | 126 | MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); |
133 | module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); | 127 | module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); |
134 | 128 | ||
@@ -153,18 +147,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); | |||
153 | MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); | 147 | MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); |
154 | module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); | 148 | module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); |
155 | 149 | ||
156 | MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)"); | ||
157 | module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444); | ||
158 | |||
159 | MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)"); | 150 | MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)"); |
160 | module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); | 151 | module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); |
161 | 152 | ||
162 | MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); | 153 | MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); |
163 | module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); | 154 | module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); |
164 | 155 | ||
165 | MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))"); | ||
166 | module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644); | ||
167 | |||
168 | #ifdef CONFIG_DRM_AMD_POWERPLAY | 156 | #ifdef CONFIG_DRM_AMD_POWERPLAY |
169 | MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))"); | 157 | MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))"); |
170 | module_param_named(powerplay, amdgpu_powerplay, int, 0444); | 158 | module_param_named(powerplay, amdgpu_powerplay, int, 0444); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3671f9f220bd..97db196dc6f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -107,7 +107,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, | |||
107 | if ((*fence) == NULL) { | 107 | if ((*fence) == NULL) { |
108 | return -ENOMEM; | 108 | return -ENOMEM; |
109 | } | 109 | } |
110 | (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx]; | 110 | (*fence)->seq = ++ring->fence_drv.sync_seq; |
111 | (*fence)->ring = ring; | 111 | (*fence)->ring = ring; |
112 | (*fence)->owner = owner; | 112 | (*fence)->owner = owner; |
113 | fence_init(&(*fence)->base, &amdgpu_fence_ops, | 113 | fence_init(&(*fence)->base, &amdgpu_fence_ops, |
@@ -171,7 +171,7 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring) | |||
171 | */ | 171 | */ |
172 | last_seq = atomic64_read(&ring->fence_drv.last_seq); | 172 | last_seq = atomic64_read(&ring->fence_drv.last_seq); |
173 | do { | 173 | do { |
174 | last_emitted = ring->fence_drv.sync_seq[ring->idx]; | 174 | last_emitted = ring->fence_drv.sync_seq; |
175 | seq = amdgpu_fence_read(ring); | 175 | seq = amdgpu_fence_read(ring); |
176 | seq |= last_seq & 0xffffffff00000000LL; | 176 | seq |= last_seq & 0xffffffff00000000LL; |
177 | if (seq < last_seq) { | 177 | if (seq < last_seq) { |
@@ -260,34 +260,28 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq) | |||
260 | } | 260 | } |
261 | 261 | ||
262 | /* | 262 | /* |
263 | * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal | 263 | * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal |
264 | * @ring: ring to wait on for the seq number | 264 | * @ring: ring to wait on for the seq number |
265 | * @seq: seq number wait for | 265 | * @seq: seq number wait for |
266 | * | 266 | * |
267 | * return value: | 267 | * return value: |
268 | * 0: seq signaled, and gpu not hang | 268 | * 0: seq signaled, and gpu not hang |
269 | * -EDEADL: GPU hang detected | ||
270 | * -EINVAL: some paramter is not valid | 269 | * -EINVAL: some paramter is not valid |
271 | */ | 270 | */ |
272 | static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) | 271 | static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) |
273 | { | 272 | { |
274 | bool signaled = false; | ||
275 | |||
276 | BUG_ON(!ring); | 273 | BUG_ON(!ring); |
277 | if (seq > ring->fence_drv.sync_seq[ring->idx]) | 274 | if (seq > ring->fence_drv.sync_seq) |
278 | return -EINVAL; | 275 | return -EINVAL; |
279 | 276 | ||
280 | if (atomic64_read(&ring->fence_drv.last_seq) >= seq) | 277 | if (atomic64_read(&ring->fence_drv.last_seq) >= seq) |
281 | return 0; | 278 | return 0; |
282 | 279 | ||
283 | amdgpu_fence_schedule_fallback(ring); | 280 | amdgpu_fence_schedule_fallback(ring); |
284 | wait_event(ring->fence_drv.fence_queue, ( | 281 | wait_event(ring->fence_drv.fence_queue, |
285 | (signaled = amdgpu_fence_seq_signaled(ring, seq)))); | 282 | amdgpu_fence_seq_signaled(ring, seq)); |
286 | 283 | ||
287 | if (signaled) | 284 | return 0; |
288 | return 0; | ||
289 | else | ||
290 | return -EDEADLK; | ||
291 | } | 285 | } |
292 | 286 | ||
293 | /** | 287 | /** |
@@ -304,7 +298,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring) | |||
304 | { | 298 | { |
305 | uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; | 299 | uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; |
306 | 300 | ||
307 | if (seq >= ring->fence_drv.sync_seq[ring->idx]) | 301 | if (seq >= ring->fence_drv.sync_seq) |
308 | return -ENOENT; | 302 | return -ENOENT; |
309 | 303 | ||
310 | return amdgpu_fence_ring_wait_seq(ring, seq); | 304 | return amdgpu_fence_ring_wait_seq(ring, seq); |
@@ -322,7 +316,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring) | |||
322 | */ | 316 | */ |
323 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) | 317 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) |
324 | { | 318 | { |
325 | uint64_t seq = ring->fence_drv.sync_seq[ring->idx]; | 319 | uint64_t seq = ring->fence_drv.sync_seq; |
326 | 320 | ||
327 | if (!seq) | 321 | if (!seq) |
328 | return 0; | 322 | return 0; |
@@ -347,7 +341,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) | |||
347 | * but it's ok to report slightly wrong fence count here. | 341 | * but it's ok to report slightly wrong fence count here. |
348 | */ | 342 | */ |
349 | amdgpu_fence_process(ring); | 343 | amdgpu_fence_process(ring); |
350 | emitted = ring->fence_drv.sync_seq[ring->idx] | 344 | emitted = ring->fence_drv.sync_seq |
351 | - atomic64_read(&ring->fence_drv.last_seq); | 345 | - atomic64_read(&ring->fence_drv.last_seq); |
352 | /* to avoid 32bits warp around */ | 346 | /* to avoid 32bits warp around */ |
353 | if (emitted > 0x10000000) | 347 | if (emitted > 0x10000000) |
@@ -357,68 +351,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) | |||
357 | } | 351 | } |
358 | 352 | ||
359 | /** | 353 | /** |
360 | * amdgpu_fence_need_sync - do we need a semaphore | ||
361 | * | ||
362 | * @fence: amdgpu fence object | ||
363 | * @dst_ring: which ring to check against | ||
364 | * | ||
365 | * Check if the fence needs to be synced against another ring | ||
366 | * (all asics). If so, we need to emit a semaphore. | ||
367 | * Returns true if we need to sync with another ring, false if | ||
368 | * not. | ||
369 | */ | ||
370 | bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, | ||
371 | struct amdgpu_ring *dst_ring) | ||
372 | { | ||
373 | struct amdgpu_fence_driver *fdrv; | ||
374 | |||
375 | if (!fence) | ||
376 | return false; | ||
377 | |||
378 | if (fence->ring == dst_ring) | ||
379 | return false; | ||
380 | |||
381 | /* we are protected by the ring mutex */ | ||
382 | fdrv = &dst_ring->fence_drv; | ||
383 | if (fence->seq <= fdrv->sync_seq[fence->ring->idx]) | ||
384 | return false; | ||
385 | |||
386 | return true; | ||
387 | } | ||
388 | |||
389 | /** | ||
390 | * amdgpu_fence_note_sync - record the sync point | ||
391 | * | ||
392 | * @fence: amdgpu fence object | ||
393 | * @dst_ring: which ring to check against | ||
394 | * | ||
395 | * Note the sequence number at which point the fence will | ||
396 | * be synced with the requested ring (all asics). | ||
397 | */ | ||
398 | void amdgpu_fence_note_sync(struct amdgpu_fence *fence, | ||
399 | struct amdgpu_ring *dst_ring) | ||
400 | { | ||
401 | struct amdgpu_fence_driver *dst, *src; | ||
402 | unsigned i; | ||
403 | |||
404 | if (!fence) | ||
405 | return; | ||
406 | |||
407 | if (fence->ring == dst_ring) | ||
408 | return; | ||
409 | |||
410 | /* we are protected by the ring mutex */ | ||
411 | src = &fence->ring->fence_drv; | ||
412 | dst = &dst_ring->fence_drv; | ||
413 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
414 | if (i == dst_ring->idx) | ||
415 | continue; | ||
416 | |||
417 | dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); | ||
418 | } | ||
419 | } | ||
420 | |||
421 | /** | ||
422 | * amdgpu_fence_driver_start_ring - make the fence driver | 354 | * amdgpu_fence_driver_start_ring - make the fence driver |
423 | * ready for use on the requested ring. | 355 | * ready for use on the requested ring. |
424 | * | 356 | * |
@@ -471,13 +403,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
471 | */ | 403 | */ |
472 | int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | 404 | int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) |
473 | { | 405 | { |
474 | int i, r; | 406 | long timeout; |
407 | int r; | ||
475 | 408 | ||
476 | ring->fence_drv.cpu_addr = NULL; | 409 | ring->fence_drv.cpu_addr = NULL; |
477 | ring->fence_drv.gpu_addr = 0; | 410 | ring->fence_drv.gpu_addr = 0; |
478 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | 411 | ring->fence_drv.sync_seq = 0; |
479 | ring->fence_drv.sync_seq[i] = 0; | ||
480 | |||
481 | atomic64_set(&ring->fence_drv.last_seq, 0); | 412 | atomic64_set(&ring->fence_drv.last_seq, 0); |
482 | ring->fence_drv.initialized = false; | 413 | ring->fence_drv.initialized = false; |
483 | 414 | ||
@@ -486,26 +417,24 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
486 | 417 | ||
487 | init_waitqueue_head(&ring->fence_drv.fence_queue); | 418 | init_waitqueue_head(&ring->fence_drv.fence_queue); |
488 | 419 | ||
489 | if (amdgpu_enable_scheduler) { | 420 | timeout = msecs_to_jiffies(amdgpu_lockup_timeout); |
490 | long timeout = msecs_to_jiffies(amdgpu_lockup_timeout); | 421 | if (timeout == 0) { |
491 | if (timeout == 0) { | 422 | /* |
492 | /* | 423 | * FIXME: |
493 | * FIXME: | 424 | * Delayed workqueue cannot use it directly, |
494 | * Delayed workqueue cannot use it directly, | 425 | * so the scheduler will not use delayed workqueue if |
495 | * so the scheduler will not use delayed workqueue if | 426 | * MAX_SCHEDULE_TIMEOUT is set. |
496 | * MAX_SCHEDULE_TIMEOUT is set. | 427 | * Currently keep it simple and silly. |
497 | * Currently keep it simple and silly. | 428 | */ |
498 | */ | 429 | timeout = MAX_SCHEDULE_TIMEOUT; |
499 | timeout = MAX_SCHEDULE_TIMEOUT; | 430 | } |
500 | } | 431 | r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, |
501 | r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, | 432 | amdgpu_sched_hw_submission, |
502 | amdgpu_sched_hw_submission, | 433 | timeout, ring->name); |
503 | timeout, ring->name); | 434 | if (r) { |
504 | if (r) { | 435 | DRM_ERROR("Failed to create scheduler on ring %s.\n", |
505 | DRM_ERROR("Failed to create scheduler on ring %s.\n", | 436 | ring->name); |
506 | ring->name); | 437 | return r; |
507 | return r; | ||
508 | } | ||
509 | } | 438 | } |
510 | 439 | ||
511 | return 0; | 440 | return 0; |
@@ -552,7 +481,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) | |||
552 | 481 | ||
553 | if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) | 482 | if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) |
554 | kmem_cache_destroy(amdgpu_fence_slab); | 483 | kmem_cache_destroy(amdgpu_fence_slab); |
555 | mutex_lock(&adev->ring_lock); | ||
556 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | 484 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
557 | struct amdgpu_ring *ring = adev->rings[i]; | 485 | struct amdgpu_ring *ring = adev->rings[i]; |
558 | 486 | ||
@@ -570,7 +498,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) | |||
570 | del_timer_sync(&ring->fence_drv.fallback_timer); | 498 | del_timer_sync(&ring->fence_drv.fallback_timer); |
571 | ring->fence_drv.initialized = false; | 499 | ring->fence_drv.initialized = false; |
572 | } | 500 | } |
573 | mutex_unlock(&adev->ring_lock); | ||
574 | } | 501 | } |
575 | 502 | ||
576 | /** | 503 | /** |
@@ -585,7 +512,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) | |||
585 | { | 512 | { |
586 | int i, r; | 513 | int i, r; |
587 | 514 | ||
588 | mutex_lock(&adev->ring_lock); | ||
589 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | 515 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
590 | struct amdgpu_ring *ring = adev->rings[i]; | 516 | struct amdgpu_ring *ring = adev->rings[i]; |
591 | if (!ring || !ring->fence_drv.initialized) | 517 | if (!ring || !ring->fence_drv.initialized) |
@@ -602,7 +528,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) | |||
602 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | 528 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, |
603 | ring->fence_drv.irq_type); | 529 | ring->fence_drv.irq_type); |
604 | } | 530 | } |
605 | mutex_unlock(&adev->ring_lock); | ||
606 | } | 531 | } |
607 | 532 | ||
608 | /** | 533 | /** |
@@ -621,7 +546,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) | |||
621 | { | 546 | { |
622 | int i; | 547 | int i; |
623 | 548 | ||
624 | mutex_lock(&adev->ring_lock); | ||
625 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | 549 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
626 | struct amdgpu_ring *ring = adev->rings[i]; | 550 | struct amdgpu_ring *ring = adev->rings[i]; |
627 | if (!ring || !ring->fence_drv.initialized) | 551 | if (!ring || !ring->fence_drv.initialized) |
@@ -631,7 +555,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) | |||
631 | amdgpu_irq_get(adev, ring->fence_drv.irq_src, | 555 | amdgpu_irq_get(adev, ring->fence_drv.irq_src, |
632 | ring->fence_drv.irq_type); | 556 | ring->fence_drv.irq_type); |
633 | } | 557 | } |
634 | mutex_unlock(&adev->ring_lock); | ||
635 | } | 558 | } |
636 | 559 | ||
637 | /** | 560 | /** |
@@ -651,7 +574,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) | |||
651 | if (!ring || !ring->fence_drv.initialized) | 574 | if (!ring || !ring->fence_drv.initialized) |
652 | continue; | 575 | continue; |
653 | 576 | ||
654 | amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]); | 577 | amdgpu_fence_write(ring, ring->fence_drv.sync_seq); |
655 | } | 578 | } |
656 | } | 579 | } |
657 | 580 | ||
@@ -781,7 +704,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) | |||
781 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 704 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
782 | struct drm_device *dev = node->minor->dev; | 705 | struct drm_device *dev = node->minor->dev; |
783 | struct amdgpu_device *adev = dev->dev_private; | 706 | struct amdgpu_device *adev = dev->dev_private; |
784 | int i, j; | 707 | int i; |
785 | 708 | ||
786 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 709 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
787 | struct amdgpu_ring *ring = adev->rings[i]; | 710 | struct amdgpu_ring *ring = adev->rings[i]; |
@@ -794,28 +717,38 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) | |||
794 | seq_printf(m, "Last signaled fence 0x%016llx\n", | 717 | seq_printf(m, "Last signaled fence 0x%016llx\n", |
795 | (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); | 718 | (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); |
796 | seq_printf(m, "Last emitted 0x%016llx\n", | 719 | seq_printf(m, "Last emitted 0x%016llx\n", |
797 | ring->fence_drv.sync_seq[i]); | 720 | ring->fence_drv.sync_seq); |
798 | |||
799 | for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { | ||
800 | struct amdgpu_ring *other = adev->rings[j]; | ||
801 | if (i != j && other && other->fence_drv.initialized && | ||
802 | ring->fence_drv.sync_seq[j]) | ||
803 | seq_printf(m, "Last sync to ring %d 0x%016llx\n", | ||
804 | j, ring->fence_drv.sync_seq[j]); | ||
805 | } | ||
806 | } | 721 | } |
807 | return 0; | 722 | return 0; |
808 | } | 723 | } |
809 | 724 | ||
725 | /** | ||
726 | * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset | ||
727 | * | ||
728 | * Manually trigger a gpu reset at the next fence wait. | ||
729 | */ | ||
730 | static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) | ||
731 | { | ||
732 | struct drm_info_node *node = (struct drm_info_node *) m->private; | ||
733 | struct drm_device *dev = node->minor->dev; | ||
734 | struct amdgpu_device *adev = dev->dev_private; | ||
735 | |||
736 | seq_printf(m, "gpu reset\n"); | ||
737 | amdgpu_gpu_reset(adev); | ||
738 | |||
739 | return 0; | ||
740 | } | ||
741 | |||
810 | static struct drm_info_list amdgpu_debugfs_fence_list[] = { | 742 | static struct drm_info_list amdgpu_debugfs_fence_list[] = { |
811 | {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, | 743 | {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, |
744 | {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} | ||
812 | }; | 745 | }; |
813 | #endif | 746 | #endif |
814 | 747 | ||
815 | int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) | 748 | int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) |
816 | { | 749 | { |
817 | #if defined(CONFIG_DEBUG_FS) | 750 | #if defined(CONFIG_DEBUG_FS) |
818 | return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1); | 751 | return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); |
819 | #else | 752 | #else |
820 | return 0; | 753 | return 0; |
821 | #endif | 754 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7380f782cd14..2e26a517f2d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
@@ -83,24 +83,32 @@ retry: | |||
83 | return r; | 83 | return r; |
84 | } | 84 | } |
85 | *obj = &robj->gem_base; | 85 | *obj = &robj->gem_base; |
86 | robj->pid = task_pid_nr(current); | ||
87 | |||
88 | mutex_lock(&adev->gem.mutex); | ||
89 | list_add_tail(&robj->list, &adev->gem.objects); | ||
90 | mutex_unlock(&adev->gem.mutex); | ||
91 | 86 | ||
92 | return 0; | 87 | return 0; |
93 | } | 88 | } |
94 | 89 | ||
95 | int amdgpu_gem_init(struct amdgpu_device *adev) | 90 | void amdgpu_gem_force_release(struct amdgpu_device *adev) |
96 | { | 91 | { |
97 | INIT_LIST_HEAD(&adev->gem.objects); | 92 | struct drm_device *ddev = adev->ddev; |
98 | return 0; | 93 | struct drm_file *file; |
99 | } | ||
100 | 94 | ||
101 | void amdgpu_gem_fini(struct amdgpu_device *adev) | 95 | mutex_lock(&ddev->struct_mutex); |
102 | { | 96 | |
103 | amdgpu_bo_force_delete(adev); | 97 | list_for_each_entry(file, &ddev->filelist, lhead) { |
98 | struct drm_gem_object *gobj; | ||
99 | int handle; | ||
100 | |||
101 | WARN_ONCE(1, "Still active user space clients!\n"); | ||
102 | spin_lock(&file->table_lock); | ||
103 | idr_for_each_entry(&file->object_idr, gobj, handle) { | ||
104 | WARN_ONCE(1, "And also active allocations!\n"); | ||
105 | drm_gem_object_unreference(gobj); | ||
106 | } | ||
107 | idr_destroy(&file->object_idr); | ||
108 | spin_unlock(&file->table_lock); | ||
109 | } | ||
110 | |||
111 | mutex_unlock(&ddev->struct_mutex); | ||
104 | } | 112 | } |
105 | 113 | ||
106 | /* | 114 | /* |
@@ -252,6 +260,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
252 | goto handle_lockup; | 260 | goto handle_lockup; |
253 | 261 | ||
254 | bo = gem_to_amdgpu_bo(gobj); | 262 | bo = gem_to_amdgpu_bo(gobj); |
263 | bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; | ||
264 | bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; | ||
255 | r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); | 265 | r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); |
256 | if (r) | 266 | if (r) |
257 | goto release_object; | 267 | goto release_object; |
@@ -308,7 +318,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, | |||
308 | return -ENOENT; | 318 | return -ENOENT; |
309 | } | 319 | } |
310 | robj = gem_to_amdgpu_bo(gobj); | 320 | robj = gem_to_amdgpu_bo(gobj); |
311 | if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) || | 321 | if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) || |
312 | (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { | 322 | (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { |
313 | drm_gem_object_unreference_unlocked(gobj); | 323 | drm_gem_object_unreference_unlocked(gobj); |
314 | return -EPERM; | 324 | return -EPERM; |
@@ -628,7 +638,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, | |||
628 | 638 | ||
629 | info.bo_size = robj->gem_base.size; | 639 | info.bo_size = robj->gem_base.size; |
630 | info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; | 640 | info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; |
631 | info.domains = robj->initial_domain; | 641 | info.domains = robj->prefered_domains; |
632 | info.domain_flags = robj->flags; | 642 | info.domain_flags = robj->flags; |
633 | amdgpu_bo_unreserve(robj); | 643 | amdgpu_bo_unreserve(robj); |
634 | if (copy_to_user(out, &info, sizeof(info))) | 644 | if (copy_to_user(out, &info, sizeof(info))) |
@@ -636,14 +646,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, | |||
636 | break; | 646 | break; |
637 | } | 647 | } |
638 | case AMDGPU_GEM_OP_SET_PLACEMENT: | 648 | case AMDGPU_GEM_OP_SET_PLACEMENT: |
639 | if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) { | 649 | if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) { |
640 | r = -EPERM; | 650 | r = -EPERM; |
641 | amdgpu_bo_unreserve(robj); | 651 | amdgpu_bo_unreserve(robj); |
642 | break; | 652 | break; |
643 | } | 653 | } |
644 | robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM | | 654 | robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | |
645 | AMDGPU_GEM_DOMAIN_GTT | | 655 | AMDGPU_GEM_DOMAIN_GTT | |
646 | AMDGPU_GEM_DOMAIN_CPU); | 656 | AMDGPU_GEM_DOMAIN_CPU); |
657 | robj->allowed_domains = robj->prefered_domains; | ||
658 | if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) | ||
659 | robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; | ||
660 | |||
647 | amdgpu_bo_unreserve(robj); | 661 | amdgpu_bo_unreserve(robj); |
648 | break; | 662 | break; |
649 | default: | 663 | default: |
@@ -688,38 +702,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, | |||
688 | } | 702 | } |
689 | 703 | ||
690 | #if defined(CONFIG_DEBUG_FS) | 704 | #if defined(CONFIG_DEBUG_FS) |
705 | static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) | ||
706 | { | ||
707 | struct drm_gem_object *gobj = ptr; | ||
708 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); | ||
709 | struct seq_file *m = data; | ||
710 | |||
711 | unsigned domain; | ||
712 | const char *placement; | ||
713 | unsigned pin_count; | ||
714 | |||
715 | domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); | ||
716 | switch (domain) { | ||
717 | case AMDGPU_GEM_DOMAIN_VRAM: | ||
718 | placement = "VRAM"; | ||
719 | break; | ||
720 | case AMDGPU_GEM_DOMAIN_GTT: | ||
721 | placement = " GTT"; | ||
722 | break; | ||
723 | case AMDGPU_GEM_DOMAIN_CPU: | ||
724 | default: | ||
725 | placement = " CPU"; | ||
726 | break; | ||
727 | } | ||
728 | seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx", | ||
729 | id, amdgpu_bo_size(bo), placement, | ||
730 | amdgpu_bo_gpu_offset(bo)); | ||
731 | |||
732 | pin_count = ACCESS_ONCE(bo->pin_count); | ||
733 | if (pin_count) | ||
734 | seq_printf(m, " pin count %d", pin_count); | ||
735 | seq_printf(m, "\n"); | ||
736 | |||
737 | return 0; | ||
738 | } | ||
739 | |||
691 | static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) | 740 | static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) |
692 | { | 741 | { |
693 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 742 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
694 | struct drm_device *dev = node->minor->dev; | 743 | struct drm_device *dev = node->minor->dev; |
695 | struct amdgpu_device *adev = dev->dev_private; | 744 | struct drm_file *file; |
696 | struct amdgpu_bo *rbo; | 745 | int r; |
697 | unsigned i = 0; | ||
698 | 746 | ||
699 | mutex_lock(&adev->gem.mutex); | 747 | r = mutex_lock_interruptible(&dev->struct_mutex); |
700 | list_for_each_entry(rbo, &adev->gem.objects, list) { | 748 | if (r) |
701 | unsigned domain; | 749 | return r; |
702 | const char *placement; | ||
703 | 750 | ||
704 | domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type); | 751 | list_for_each_entry(file, &dev->filelist, lhead) { |
705 | switch (domain) { | 752 | struct task_struct *task; |
706 | case AMDGPU_GEM_DOMAIN_VRAM: | 753 | |
707 | placement = "VRAM"; | 754 | /* |
708 | break; | 755 | * Although we have a valid reference on file->pid, that does |
709 | case AMDGPU_GEM_DOMAIN_GTT: | 756 | * not guarantee that the task_struct who called get_pid() is |
710 | placement = " GTT"; | 757 | * still alive (e.g. get_pid(current) => fork() => exit()). |
711 | break; | 758 | * Therefore, we need to protect this ->comm access using RCU. |
712 | case AMDGPU_GEM_DOMAIN_CPU: | 759 | */ |
713 | default: | 760 | rcu_read_lock(); |
714 | placement = " CPU"; | 761 | task = pid_task(file->pid, PIDTYPE_PID); |
715 | break; | 762 | seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid), |
716 | } | 763 | task ? task->comm : "<unknown>"); |
717 | seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n", | 764 | rcu_read_unlock(); |
718 | i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20, | 765 | |
719 | placement, (unsigned long)rbo->pid); | 766 | spin_lock(&file->table_lock); |
720 | i++; | 767 | idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m); |
768 | spin_unlock(&file->table_lock); | ||
721 | } | 769 | } |
722 | mutex_unlock(&adev->gem.mutex); | 770 | |
771 | mutex_unlock(&dev->struct_mutex); | ||
723 | return 0; | 772 | return 0; |
724 | } | 773 | } |
725 | 774 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 9e25edafa721..b5bdd5d59b58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); | |||
55 | * suballocator. | 55 | * suballocator. |
56 | * Returns 0 on success, error on failure. | 56 | * Returns 0 on success, error on failure. |
57 | */ | 57 | */ |
58 | int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | 58 | int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
59 | unsigned size, struct amdgpu_ib *ib) | 59 | unsigned size, struct amdgpu_ib *ib) |
60 | { | 60 | { |
61 | struct amdgpu_device *adev = ring->adev; | ||
62 | int r; | 61 | int r; |
63 | 62 | ||
64 | if (size) { | 63 | if (size) { |
@@ -75,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | |||
75 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); | 74 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); |
76 | } | 75 | } |
77 | 76 | ||
78 | amdgpu_sync_create(&ib->sync); | ||
79 | |||
80 | ib->ring = ring; | ||
81 | ib->vm = vm; | 77 | ib->vm = vm; |
82 | 78 | ||
83 | return 0; | 79 | return 0; |
@@ -93,7 +89,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | |||
93 | */ | 89 | */ |
94 | void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) | 90 | void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) |
95 | { | 91 | { |
96 | amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); | ||
97 | amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); | 92 | amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); |
98 | if (ib->fence) | 93 | if (ib->fence) |
99 | fence_put(&ib->fence->base); | 94 | fence_put(&ib->fence->base); |
@@ -106,6 +101,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) | |||
106 | * @num_ibs: number of IBs to schedule | 101 | * @num_ibs: number of IBs to schedule |
107 | * @ibs: IB objects to schedule | 102 | * @ibs: IB objects to schedule |
108 | * @owner: owner for creating the fences | 103 | * @owner: owner for creating the fences |
104 | * @f: fence created during this submission | ||
109 | * | 105 | * |
110 | * Schedule an IB on the associated ring (all asics). | 106 | * Schedule an IB on the associated ring (all asics). |
111 | * Returns 0 on success, error on failure. | 107 | * Returns 0 on success, error on failure. |
@@ -120,11 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) | |||
120 | * a CONST_IB), it will be put on the ring prior to the DE IB. Prior | 116 | * a CONST_IB), it will be put on the ring prior to the DE IB. Prior |
121 | * to SI there was just a DE IB. | 117 | * to SI there was just a DE IB. |
122 | */ | 118 | */ |
123 | int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | 119 | int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, |
124 | struct amdgpu_ib *ibs, void *owner) | 120 | struct amdgpu_ib *ibs, void *owner, |
121 | struct fence *last_vm_update, | ||
122 | struct fence **f) | ||
125 | { | 123 | { |
124 | struct amdgpu_device *adev = ring->adev; | ||
126 | struct amdgpu_ib *ib = &ibs[0]; | 125 | struct amdgpu_ib *ib = &ibs[0]; |
127 | struct amdgpu_ring *ring; | ||
128 | struct amdgpu_ctx *ctx, *old_ctx; | 126 | struct amdgpu_ctx *ctx, *old_ctx; |
129 | struct amdgpu_vm *vm; | 127 | struct amdgpu_vm *vm; |
130 | unsigned i; | 128 | unsigned i; |
@@ -133,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
133 | if (num_ibs == 0) | 131 | if (num_ibs == 0) |
134 | return -EINVAL; | 132 | return -EINVAL; |
135 | 133 | ||
136 | ring = ibs->ring; | ||
137 | ctx = ibs->ctx; | 134 | ctx = ibs->ctx; |
138 | vm = ibs->vm; | 135 | vm = ibs->vm; |
139 | 136 | ||
@@ -141,36 +138,21 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
141 | dev_err(adev->dev, "couldn't schedule ib\n"); | 138 | dev_err(adev->dev, "couldn't schedule ib\n"); |
142 | return -EINVAL; | 139 | return -EINVAL; |
143 | } | 140 | } |
144 | r = amdgpu_sync_wait(&ibs->sync); | ||
145 | if (r) { | ||
146 | dev_err(adev->dev, "IB sync failed (%d).\n", r); | ||
147 | return r; | ||
148 | } | ||
149 | r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); | ||
150 | if (r) { | ||
151 | dev_err(adev->dev, "scheduling IB failed (%d).\n", r); | ||
152 | return r; | ||
153 | } | ||
154 | 141 | ||
155 | if (vm) { | 142 | if (vm && !ibs->grabbed_vmid) { |
156 | /* grab a vm id if necessary */ | 143 | dev_err(adev->dev, "VM IB without ID\n"); |
157 | r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync); | 144 | return -EINVAL; |
158 | if (r) { | ||
159 | amdgpu_ring_unlock_undo(ring); | ||
160 | return r; | ||
161 | } | ||
162 | } | 145 | } |
163 | 146 | ||
164 | r = amdgpu_sync_rings(&ibs->sync, ring); | 147 | r = amdgpu_ring_alloc(ring, 256 * num_ibs); |
165 | if (r) { | 148 | if (r) { |
166 | amdgpu_ring_unlock_undo(ring); | 149 | dev_err(adev->dev, "scheduling IB failed (%d).\n", r); |
167 | dev_err(adev->dev, "failed to sync rings (%d)\n", r); | ||
168 | return r; | 150 | return r; |
169 | } | 151 | } |
170 | 152 | ||
171 | if (vm) { | 153 | if (vm) { |
172 | /* do context switch */ | 154 | /* do context switch */ |
173 | amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update); | 155 | amdgpu_vm_flush(ring, vm, last_vm_update); |
174 | 156 | ||
175 | if (ring->funcs->emit_gds_switch) | 157 | if (ring->funcs->emit_gds_switch) |
176 | amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, | 158 | amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, |
@@ -186,9 +168,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
186 | for (i = 0; i < num_ibs; ++i) { | 168 | for (i = 0; i < num_ibs; ++i) { |
187 | ib = &ibs[i]; | 169 | ib = &ibs[i]; |
188 | 170 | ||
189 | if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) { | 171 | if (ib->ctx != ctx || ib->vm != vm) { |
190 | ring->current_ctx = old_ctx; | 172 | ring->current_ctx = old_ctx; |
191 | amdgpu_ring_unlock_undo(ring); | 173 | amdgpu_ring_undo(ring); |
192 | return -EINVAL; | 174 | return -EINVAL; |
193 | } | 175 | } |
194 | amdgpu_ring_emit_ib(ring, ib); | 176 | amdgpu_ring_emit_ib(ring, ib); |
@@ -199,14 +181,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
199 | if (r) { | 181 | if (r) { |
200 | dev_err(adev->dev, "failed to emit fence (%d)\n", r); | 182 | dev_err(adev->dev, "failed to emit fence (%d)\n", r); |
201 | ring->current_ctx = old_ctx; | 183 | ring->current_ctx = old_ctx; |
202 | amdgpu_ring_unlock_undo(ring); | 184 | amdgpu_ring_undo(ring); |
203 | return r; | 185 | return r; |
204 | } | 186 | } |
205 | 187 | ||
206 | if (!amdgpu_enable_scheduler && ib->ctx) | ||
207 | ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, | ||
208 | &ib->fence->base); | ||
209 | |||
210 | /* wrap the last IB with fence */ | 188 | /* wrap the last IB with fence */ |
211 | if (ib->user) { | 189 | if (ib->user) { |
212 | uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); | 190 | uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); |
@@ -215,10 +193,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
215 | AMDGPU_FENCE_FLAG_64BIT); | 193 | AMDGPU_FENCE_FLAG_64BIT); |
216 | } | 194 | } |
217 | 195 | ||
218 | if (ib->vm) | 196 | if (f) |
219 | amdgpu_vm_fence(adev, ib->vm, &ib->fence->base); | 197 | *f = fence_get(&ib->fence->base); |
220 | 198 | ||
221 | amdgpu_ring_unlock_commit(ring); | 199 | amdgpu_ring_commit(ring); |
222 | return 0; | 200 | return 0; |
223 | } | 201 | } |
224 | 202 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c new file mode 100644 index 000000000000..f29bbb96a881 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
@@ -0,0 +1,159 @@ | |||
1 | /* | ||
2 | * Copyright 2015 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * | ||
23 | */ | ||
24 | #include <linux/kthread.h> | ||
25 | #include <linux/wait.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <drm/drmP.h> | ||
28 | #include "amdgpu.h" | ||
29 | #include "amdgpu_trace.h" | ||
30 | |||
31 | int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, | ||
32 | struct amdgpu_job **job) | ||
33 | { | ||
34 | size_t size = sizeof(struct amdgpu_job); | ||
35 | |||
36 | if (num_ibs == 0) | ||
37 | return -EINVAL; | ||
38 | |||
39 | size += sizeof(struct amdgpu_ib) * num_ibs; | ||
40 | |||
41 | *job = kzalloc(size, GFP_KERNEL); | ||
42 | if (!*job) | ||
43 | return -ENOMEM; | ||
44 | |||
45 | (*job)->adev = adev; | ||
46 | (*job)->ibs = (void *)&(*job)[1]; | ||
47 | (*job)->num_ibs = num_ibs; | ||
48 | |||
49 | amdgpu_sync_create(&(*job)->sync); | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, | ||
55 | struct amdgpu_job **job) | ||
56 | { | ||
57 | int r; | ||
58 | |||
59 | r = amdgpu_job_alloc(adev, 1, job); | ||
60 | if (r) | ||
61 | return r; | ||
62 | |||
63 | r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); | ||
64 | if (r) | ||
65 | kfree(*job); | ||
66 | |||
67 | return r; | ||
68 | } | ||
69 | |||
70 | void amdgpu_job_free(struct amdgpu_job *job) | ||
71 | { | ||
72 | unsigned i; | ||
73 | |||
74 | for (i = 0; i < job->num_ibs; ++i) | ||
75 | amdgpu_ib_free(job->adev, &job->ibs[i]); | ||
76 | |||
77 | amdgpu_bo_unref(&job->uf.bo); | ||
78 | amdgpu_sync_free(&job->sync); | ||
79 | kfree(job); | ||
80 | } | ||
81 | |||
82 | int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, | ||
83 | struct amd_sched_entity *entity, void *owner, | ||
84 | struct fence **f) | ||
85 | { | ||
86 | job->ring = ring; | ||
87 | job->base.sched = &ring->sched; | ||
88 | job->base.s_entity = entity; | ||
89 | job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); | ||
90 | if (!job->base.s_fence) | ||
91 | return -ENOMEM; | ||
92 | |||
93 | *f = fence_get(&job->base.s_fence->base); | ||
94 | |||
95 | job->owner = owner; | ||
96 | amd_sched_entity_push_job(&job->base); | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) | ||
102 | { | ||
103 | struct amdgpu_job *job = to_amdgpu_job(sched_job); | ||
104 | struct amdgpu_vm *vm = job->ibs->vm; | ||
105 | |||
106 | struct fence *fence = amdgpu_sync_get_fence(&job->sync); | ||
107 | |||
108 | if (fence == NULL && vm && !job->ibs->grabbed_vmid) { | ||
109 | struct amdgpu_ring *ring = job->ring; | ||
110 | int r; | ||
111 | |||
112 | r = amdgpu_vm_grab_id(vm, ring, &job->sync, | ||
113 | &job->base.s_fence->base); | ||
114 | if (r) | ||
115 | DRM_ERROR("Error getting VM ID (%d)\n", r); | ||
116 | else | ||
117 | job->ibs->grabbed_vmid = true; | ||
118 | |||
119 | fence = amdgpu_sync_get_fence(&job->sync); | ||
120 | } | ||
121 | |||
122 | return fence; | ||
123 | } | ||
124 | |||
125 | static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) | ||
126 | { | ||
127 | struct fence *fence = NULL; | ||
128 | struct amdgpu_job *job; | ||
129 | int r; | ||
130 | |||
131 | if (!sched_job) { | ||
132 | DRM_ERROR("job is null\n"); | ||
133 | return NULL; | ||
134 | } | ||
135 | job = to_amdgpu_job(sched_job); | ||
136 | |||
137 | r = amdgpu_sync_wait(&job->sync); | ||
138 | if (r) { | ||
139 | DRM_ERROR("failed to sync wait (%d)\n", r); | ||
140 | return NULL; | ||
141 | } | ||
142 | |||
143 | trace_amdgpu_sched_run_job(job); | ||
144 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner, | ||
145 | job->sync.last_vm_update, &fence); | ||
146 | if (r) { | ||
147 | DRM_ERROR("Error scheduling IBs (%d)\n", r); | ||
148 | goto err; | ||
149 | } | ||
150 | |||
151 | err: | ||
152 | amdgpu_job_free(job); | ||
153 | return fence; | ||
154 | } | ||
155 | |||
156 | struct amd_sched_backend_ops amdgpu_sched_ops = { | ||
157 | .dependency = amdgpu_job_dependency, | ||
158 | .run_job = amdgpu_job_run, | ||
159 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e23843f4d877..7805a8706af7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
447 | dev_info.max_memory_clock = adev->pm.default_mclk * 10; | 447 | dev_info.max_memory_clock = adev->pm.default_mclk * 10; |
448 | } | 448 | } |
449 | dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; | 449 | dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; |
450 | dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * | 450 | dev_info.num_rb_pipes = adev->gfx.config.num_rbs; |
451 | adev->gfx.config.max_shader_engines; | ||
452 | dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; | 451 | dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; |
453 | dev_info._pad = 0; | 452 | dev_info._pad = 0; |
454 | dev_info.ids_flags = 0; | 453 | dev_info.ids_flags = 0; |
@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, | |||
727 | 726 | ||
728 | /* Get associated drm_crtc: */ | 727 | /* Get associated drm_crtc: */ |
729 | crtc = &adev->mode_info.crtcs[pipe]->base; | 728 | crtc = &adev->mode_info.crtcs[pipe]->base; |
729 | if (!crtc) { | ||
730 | /* This can occur on driver load if some component fails to | ||
731 | * initialize completely and driver is unloaded */ | ||
732 | DRM_ERROR("Uninitialized crtc %d\n", pipe); | ||
733 | return -EINVAL; | ||
734 | } | ||
730 | 735 | ||
731 | /* Helper routine in DRM core does all the work: */ | 736 | /* Helper routine in DRM core does all the work: */ |
732 | return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, | 737 | return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index b1969f2b2038..d7ec9bd6755f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | |||
@@ -48,8 +48,7 @@ struct amdgpu_mn { | |||
48 | /* protected by adev->mn_lock */ | 48 | /* protected by adev->mn_lock */ |
49 | struct hlist_node node; | 49 | struct hlist_node node; |
50 | 50 | ||
51 | /* objects protected by lock */ | 51 | /* objects protected by mm->mmap_sem */ |
52 | struct mutex lock; | ||
53 | struct rb_root objects; | 52 | struct rb_root objects; |
54 | }; | 53 | }; |
55 | 54 | ||
@@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work) | |||
73 | struct amdgpu_bo *bo, *next_bo; | 72 | struct amdgpu_bo *bo, *next_bo; |
74 | 73 | ||
75 | mutex_lock(&adev->mn_lock); | 74 | mutex_lock(&adev->mn_lock); |
76 | mutex_lock(&rmn->lock); | 75 | down_write(&rmn->mm->mmap_sem); |
77 | hash_del(&rmn->node); | 76 | hash_del(&rmn->node); |
78 | rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, | 77 | rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, |
79 | it.rb) { | 78 | it.rb) { |
80 | |||
81 | interval_tree_remove(&node->it, &rmn->objects); | ||
82 | list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { | 79 | list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { |
83 | bo->mn = NULL; | 80 | bo->mn = NULL; |
84 | list_del_init(&bo->mn_list); | 81 | list_del_init(&bo->mn_list); |
85 | } | 82 | } |
86 | kfree(node); | 83 | kfree(node); |
87 | } | 84 | } |
88 | mutex_unlock(&rmn->lock); | 85 | up_write(&rmn->mm->mmap_sem); |
89 | mutex_unlock(&adev->mn_lock); | 86 | mutex_unlock(&adev->mn_lock); |
90 | mmu_notifier_unregister(&rmn->mn, rmn->mm); | 87 | mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); |
91 | kfree(rmn); | 88 | kfree(rmn); |
92 | } | 89 | } |
93 | 90 | ||
@@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | |||
129 | /* notification is exclusive, but interval is inclusive */ | 126 | /* notification is exclusive, but interval is inclusive */ |
130 | end -= 1; | 127 | end -= 1; |
131 | 128 | ||
132 | mutex_lock(&rmn->lock); | ||
133 | |||
134 | it = interval_tree_iter_first(&rmn->objects, start, end); | 129 | it = interval_tree_iter_first(&rmn->objects, start, end); |
135 | while (it) { | 130 | while (it) { |
136 | struct amdgpu_mn_node *node; | 131 | struct amdgpu_mn_node *node; |
@@ -142,7 +137,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | |||
142 | 137 | ||
143 | list_for_each_entry(bo, &node->bos, mn_list) { | 138 | list_for_each_entry(bo, &node->bos, mn_list) { |
144 | 139 | ||
145 | if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound) | 140 | if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, |
141 | end)) | ||
146 | continue; | 142 | continue; |
147 | 143 | ||
148 | r = amdgpu_bo_reserve(bo, true); | 144 | r = amdgpu_bo_reserve(bo, true); |
@@ -164,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | |||
164 | amdgpu_bo_unreserve(bo); | 160 | amdgpu_bo_unreserve(bo); |
165 | } | 161 | } |
166 | } | 162 | } |
167 | |||
168 | mutex_unlock(&rmn->lock); | ||
169 | } | 163 | } |
170 | 164 | ||
171 | static const struct mmu_notifier_ops amdgpu_mn_ops = { | 165 | static const struct mmu_notifier_ops amdgpu_mn_ops = { |
@@ -186,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
186 | struct amdgpu_mn *rmn; | 180 | struct amdgpu_mn *rmn; |
187 | int r; | 181 | int r; |
188 | 182 | ||
189 | down_write(&mm->mmap_sem); | ||
190 | mutex_lock(&adev->mn_lock); | 183 | mutex_lock(&adev->mn_lock); |
184 | down_write(&mm->mmap_sem); | ||
191 | 185 | ||
192 | hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) | 186 | hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) |
193 | if (rmn->mm == mm) | 187 | if (rmn->mm == mm) |
@@ -202,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
202 | rmn->adev = adev; | 196 | rmn->adev = adev; |
203 | rmn->mm = mm; | 197 | rmn->mm = mm; |
204 | rmn->mn.ops = &amdgpu_mn_ops; | 198 | rmn->mn.ops = &amdgpu_mn_ops; |
205 | mutex_init(&rmn->lock); | ||
206 | rmn->objects = RB_ROOT; | 199 | rmn->objects = RB_ROOT; |
207 | 200 | ||
208 | r = __mmu_notifier_register(&rmn->mn, mm); | 201 | r = __mmu_notifier_register(&rmn->mn, mm); |
@@ -212,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
212 | hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); | 205 | hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); |
213 | 206 | ||
214 | release_locks: | 207 | release_locks: |
215 | mutex_unlock(&adev->mn_lock); | ||
216 | up_write(&mm->mmap_sem); | 208 | up_write(&mm->mmap_sem); |
209 | mutex_unlock(&adev->mn_lock); | ||
217 | 210 | ||
218 | return rmn; | 211 | return rmn; |
219 | 212 | ||
220 | free_rmn: | 213 | free_rmn: |
221 | mutex_unlock(&adev->mn_lock); | ||
222 | up_write(&mm->mmap_sem); | 214 | up_write(&mm->mmap_sem); |
215 | mutex_unlock(&adev->mn_lock); | ||
223 | kfree(rmn); | 216 | kfree(rmn); |
224 | 217 | ||
225 | return ERR_PTR(r); | 218 | return ERR_PTR(r); |
@@ -249,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
249 | 242 | ||
250 | INIT_LIST_HEAD(&bos); | 243 | INIT_LIST_HEAD(&bos); |
251 | 244 | ||
252 | mutex_lock(&rmn->lock); | 245 | down_write(&rmn->mm->mmap_sem); |
253 | 246 | ||
254 | while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { | 247 | while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { |
255 | kfree(node); | 248 | kfree(node); |
@@ -263,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
263 | if (!node) { | 256 | if (!node) { |
264 | node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); | 257 | node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); |
265 | if (!node) { | 258 | if (!node) { |
266 | mutex_unlock(&rmn->lock); | 259 | up_write(&rmn->mm->mmap_sem); |
267 | return -ENOMEM; | 260 | return -ENOMEM; |
268 | } | 261 | } |
269 | } | 262 | } |
@@ -278,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
278 | 271 | ||
279 | interval_tree_insert(&node->it, &rmn->objects); | 272 | interval_tree_insert(&node->it, &rmn->objects); |
280 | 273 | ||
281 | mutex_unlock(&rmn->lock); | 274 | up_write(&rmn->mm->mmap_sem); |
282 | 275 | ||
283 | return 0; | 276 | return 0; |
284 | } | 277 | } |
@@ -297,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) | |||
297 | struct list_head *head; | 290 | struct list_head *head; |
298 | 291 | ||
299 | mutex_lock(&adev->mn_lock); | 292 | mutex_lock(&adev->mn_lock); |
293 | |||
300 | rmn = bo->mn; | 294 | rmn = bo->mn; |
301 | if (rmn == NULL) { | 295 | if (rmn == NULL) { |
302 | mutex_unlock(&adev->mn_lock); | 296 | mutex_unlock(&adev->mn_lock); |
303 | return; | 297 | return; |
304 | } | 298 | } |
305 | 299 | ||
306 | mutex_lock(&rmn->lock); | 300 | down_write(&rmn->mm->mmap_sem); |
301 | |||
307 | /* save the next list entry for later */ | 302 | /* save the next list entry for later */ |
308 | head = bo->mn_list.next; | 303 | head = bo->mn_list.next; |
309 | 304 | ||
@@ -317,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) | |||
317 | kfree(node); | 312 | kfree(node); |
318 | } | 313 | } |
319 | 314 | ||
320 | mutex_unlock(&rmn->lock); | 315 | up_write(&rmn->mm->mmap_sem); |
321 | mutex_unlock(&adev->mn_lock); | 316 | mutex_unlock(&adev->mn_lock); |
322 | } | 317 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index fdc1be8550da..8d432e6901af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | |||
@@ -390,7 +390,6 @@ struct amdgpu_crtc { | |||
390 | struct drm_display_mode native_mode; | 390 | struct drm_display_mode native_mode; |
391 | u32 pll_id; | 391 | u32 pll_id; |
392 | /* page flipping */ | 392 | /* page flipping */ |
393 | struct workqueue_struct *pflip_queue; | ||
394 | struct amdgpu_flip_work *pflip_works; | 393 | struct amdgpu_flip_work *pflip_works; |
395 | enum amdgpu_flip_status pflip_status; | 394 | enum amdgpu_flip_status pflip_status; |
396 | int deferred_flip_completion; | 395 | int deferred_flip_completion; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b8fbbd7699e4..9a025a77958d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | |||
97 | 97 | ||
98 | amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL); | 98 | amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL); |
99 | 99 | ||
100 | mutex_lock(&bo->adev->gem.mutex); | ||
101 | list_del_init(&bo->list); | ||
102 | mutex_unlock(&bo->adev->gem.mutex); | ||
103 | drm_gem_object_release(&bo->gem_base); | 100 | drm_gem_object_release(&bo->gem_base); |
104 | amdgpu_bo_unref(&bo->parent); | 101 | amdgpu_bo_unref(&bo->parent); |
105 | kfree(bo->metadata); | 102 | kfree(bo->metadata); |
@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, | |||
254 | bo->adev = adev; | 251 | bo->adev = adev; |
255 | INIT_LIST_HEAD(&bo->list); | 252 | INIT_LIST_HEAD(&bo->list); |
256 | INIT_LIST_HEAD(&bo->va); | 253 | INIT_LIST_HEAD(&bo->va); |
257 | bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM | | 254 | bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | |
258 | AMDGPU_GEM_DOMAIN_GTT | | 255 | AMDGPU_GEM_DOMAIN_GTT | |
259 | AMDGPU_GEM_DOMAIN_CPU | | 256 | AMDGPU_GEM_DOMAIN_CPU | |
260 | AMDGPU_GEM_DOMAIN_GDS | | 257 | AMDGPU_GEM_DOMAIN_GDS | |
261 | AMDGPU_GEM_DOMAIN_GWS | | 258 | AMDGPU_GEM_DOMAIN_GWS | |
262 | AMDGPU_GEM_DOMAIN_OA); | 259 | AMDGPU_GEM_DOMAIN_OA); |
260 | bo->allowed_domains = bo->prefered_domains; | ||
261 | if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) | ||
262 | bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; | ||
263 | 263 | ||
264 | bo->flags = flags; | 264 | bo->flags = flags; |
265 | 265 | ||
@@ -367,7 +367,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, | |||
367 | int r, i; | 367 | int r, i; |
368 | unsigned fpfn, lpfn; | 368 | unsigned fpfn, lpfn; |
369 | 369 | ||
370 | if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) | 370 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) |
371 | return -EPERM; | 371 | return -EPERM; |
372 | 372 | ||
373 | if (WARN_ON_ONCE(min_offset > max_offset)) | 373 | if (WARN_ON_ONCE(min_offset > max_offset)) |
@@ -470,26 +470,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) | |||
470 | return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); | 470 | return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); |
471 | } | 471 | } |
472 | 472 | ||
473 | void amdgpu_bo_force_delete(struct amdgpu_device *adev) | ||
474 | { | ||
475 | struct amdgpu_bo *bo, *n; | ||
476 | |||
477 | if (list_empty(&adev->gem.objects)) { | ||
478 | return; | ||
479 | } | ||
480 | dev_err(adev->dev, "Userspace still has active objects !\n"); | ||
481 | list_for_each_entry_safe(bo, n, &adev->gem.objects, list) { | ||
482 | dev_err(adev->dev, "%p %p %lu %lu force free\n", | ||
483 | &bo->gem_base, bo, (unsigned long)bo->gem_base.size, | ||
484 | *((unsigned long *)&bo->gem_base.refcount)); | ||
485 | mutex_lock(&bo->adev->gem.mutex); | ||
486 | list_del_init(&bo->list); | ||
487 | mutex_unlock(&bo->adev->gem.mutex); | ||
488 | /* this should unref the ttm bo */ | ||
489 | drm_gem_object_unreference_unlocked(&bo->gem_base); | ||
490 | } | ||
491 | } | ||
492 | |||
493 | int amdgpu_bo_init(struct amdgpu_device *adev) | 473 | int amdgpu_bo_init(struct amdgpu_device *adev) |
494 | { | 474 | { |
495 | /* Add an MTRR for the VRAM */ | 475 | /* Add an MTRR for the VRAM */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5107fb291bdb..acc08018c6cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, | |||
149 | u64 *gpu_addr); | 149 | u64 *gpu_addr); |
150 | int amdgpu_bo_unpin(struct amdgpu_bo *bo); | 150 | int amdgpu_bo_unpin(struct amdgpu_bo *bo); |
151 | int amdgpu_bo_evict_vram(struct amdgpu_device *adev); | 151 | int amdgpu_bo_evict_vram(struct amdgpu_device *adev); |
152 | void amdgpu_bo_force_delete(struct amdgpu_device *adev); | ||
153 | int amdgpu_bo_init(struct amdgpu_device *adev); | 152 | int amdgpu_bo_init(struct amdgpu_device *adev); |
154 | void amdgpu_bo_fini(struct amdgpu_device *adev); | 153 | void amdgpu_bo_fini(struct amdgpu_device *adev); |
155 | int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, | 154 | int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7d8d84eaea4a..d77b2bdbe800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |||
@@ -119,7 +119,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, | |||
119 | level = amdgpu_dpm_get_performance_level(adev); | 119 | level = amdgpu_dpm_get_performance_level(adev); |
120 | return snprintf(buf, PAGE_SIZE, "%s\n", | 120 | return snprintf(buf, PAGE_SIZE, "%s\n", |
121 | (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : | 121 | (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : |
122 | (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : "high"); | 122 | (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : |
123 | (level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" : | ||
124 | (level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" : "unknown"); | ||
123 | } else { | 125 | } else { |
124 | enum amdgpu_dpm_forced_level level; | 126 | enum amdgpu_dpm_forced_level level; |
125 | 127 | ||
@@ -146,6 +148,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, | |||
146 | level = AMDGPU_DPM_FORCED_LEVEL_HIGH; | 148 | level = AMDGPU_DPM_FORCED_LEVEL_HIGH; |
147 | } else if (strncmp("auto", buf, strlen("auto")) == 0) { | 149 | } else if (strncmp("auto", buf, strlen("auto")) == 0) { |
148 | level = AMDGPU_DPM_FORCED_LEVEL_AUTO; | 150 | level = AMDGPU_DPM_FORCED_LEVEL_AUTO; |
151 | } else if (strncmp("manual", buf, strlen("manual")) == 0) { | ||
152 | level = AMDGPU_DPM_FORCED_LEVEL_MANUAL; | ||
149 | } else { | 153 | } else { |
150 | count = -EINVAL; | 154 | count = -EINVAL; |
151 | goto fail; | 155 | goto fail; |
@@ -172,10 +176,293 @@ fail: | |||
172 | return count; | 176 | return count; |
173 | } | 177 | } |
174 | 178 | ||
179 | static ssize_t amdgpu_get_pp_num_states(struct device *dev, | ||
180 | struct device_attribute *attr, | ||
181 | char *buf) | ||
182 | { | ||
183 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
184 | struct amdgpu_device *adev = ddev->dev_private; | ||
185 | struct pp_states_info data; | ||
186 | int i, buf_len; | ||
187 | |||
188 | if (adev->pp_enabled) | ||
189 | amdgpu_dpm_get_pp_num_states(adev, &data); | ||
190 | |||
191 | buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); | ||
192 | for (i = 0; i < data.nums; i++) | ||
193 | buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, | ||
194 | (data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" : | ||
195 | (data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" : | ||
196 | (data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" : | ||
197 | (data.states[i] == POWER_STATE_TYPE_PERFORMANCE) ? "performance" : "default"); | ||
198 | |||
199 | return buf_len; | ||
200 | } | ||
201 | |||
202 | static ssize_t amdgpu_get_pp_cur_state(struct device *dev, | ||
203 | struct device_attribute *attr, | ||
204 | char *buf) | ||
205 | { | ||
206 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
207 | struct amdgpu_device *adev = ddev->dev_private; | ||
208 | struct pp_states_info data; | ||
209 | enum amd_pm_state_type pm = 0; | ||
210 | int i = 0; | ||
211 | |||
212 | if (adev->pp_enabled) { | ||
213 | |||
214 | pm = amdgpu_dpm_get_current_power_state(adev); | ||
215 | amdgpu_dpm_get_pp_num_states(adev, &data); | ||
216 | |||
217 | for (i = 0; i < data.nums; i++) { | ||
218 | if (pm == data.states[i]) | ||
219 | break; | ||
220 | } | ||
221 | |||
222 | if (i == data.nums) | ||
223 | i = -EINVAL; | ||
224 | } | ||
225 | |||
226 | return snprintf(buf, PAGE_SIZE, "%d\n", i); | ||
227 | } | ||
228 | |||
229 | static ssize_t amdgpu_get_pp_force_state(struct device *dev, | ||
230 | struct device_attribute *attr, | ||
231 | char *buf) | ||
232 | { | ||
233 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
234 | struct amdgpu_device *adev = ddev->dev_private; | ||
235 | struct pp_states_info data; | ||
236 | enum amd_pm_state_type pm = 0; | ||
237 | int i; | ||
238 | |||
239 | if (adev->pp_force_state_enabled && adev->pp_enabled) { | ||
240 | pm = amdgpu_dpm_get_current_power_state(adev); | ||
241 | amdgpu_dpm_get_pp_num_states(adev, &data); | ||
242 | |||
243 | for (i = 0; i < data.nums; i++) { | ||
244 | if (pm == data.states[i]) | ||
245 | break; | ||
246 | } | ||
247 | |||
248 | if (i == data.nums) | ||
249 | i = -EINVAL; | ||
250 | |||
251 | return snprintf(buf, PAGE_SIZE, "%d\n", i); | ||
252 | |||
253 | } else | ||
254 | return snprintf(buf, PAGE_SIZE, "\n"); | ||
255 | } | ||
256 | |||
257 | static ssize_t amdgpu_set_pp_force_state(struct device *dev, | ||
258 | struct device_attribute *attr, | ||
259 | const char *buf, | ||
260 | size_t count) | ||
261 | { | ||
262 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
263 | struct amdgpu_device *adev = ddev->dev_private; | ||
264 | enum amd_pm_state_type state = 0; | ||
265 | long idx; | ||
266 | int ret; | ||
267 | |||
268 | if (strlen(buf) == 1) | ||
269 | adev->pp_force_state_enabled = false; | ||
270 | else { | ||
271 | ret = kstrtol(buf, 0, &idx); | ||
272 | |||
273 | if (ret) { | ||
274 | count = -EINVAL; | ||
275 | goto fail; | ||
276 | } | ||
277 | |||
278 | if (adev->pp_enabled) { | ||
279 | struct pp_states_info data; | ||
280 | amdgpu_dpm_get_pp_num_states(adev, &data); | ||
281 | state = data.states[idx]; | ||
282 | /* only set user selected power states */ | ||
283 | if (state != POWER_STATE_TYPE_INTERNAL_BOOT && | ||
284 | state != POWER_STATE_TYPE_DEFAULT) { | ||
285 | amdgpu_dpm_dispatch_task(adev, | ||
286 | AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); | ||
287 | adev->pp_force_state_enabled = true; | ||
288 | } | ||
289 | } | ||
290 | } | ||
291 | fail: | ||
292 | return count; | ||
293 | } | ||
294 | |||
295 | static ssize_t amdgpu_get_pp_table(struct device *dev, | ||
296 | struct device_attribute *attr, | ||
297 | char *buf) | ||
298 | { | ||
299 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
300 | struct amdgpu_device *adev = ddev->dev_private; | ||
301 | char *table = NULL; | ||
302 | int size, i; | ||
303 | |||
304 | if (adev->pp_enabled) | ||
305 | size = amdgpu_dpm_get_pp_table(adev, &table); | ||
306 | else | ||
307 | return 0; | ||
308 | |||
309 | if (size >= PAGE_SIZE) | ||
310 | size = PAGE_SIZE - 1; | ||
311 | |||
312 | for (i = 0; i < size; i++) { | ||
313 | sprintf(buf + i, "%02x", table[i]); | ||
314 | } | ||
315 | sprintf(buf + i, "\n"); | ||
316 | |||
317 | return size; | ||
318 | } | ||
319 | |||
320 | static ssize_t amdgpu_set_pp_table(struct device *dev, | ||
321 | struct device_attribute *attr, | ||
322 | const char *buf, | ||
323 | size_t count) | ||
324 | { | ||
325 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
326 | struct amdgpu_device *adev = ddev->dev_private; | ||
327 | |||
328 | if (adev->pp_enabled) | ||
329 | amdgpu_dpm_set_pp_table(adev, buf, count); | ||
330 | |||
331 | return count; | ||
332 | } | ||
333 | |||
334 | static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, | ||
335 | struct device_attribute *attr, | ||
336 | char *buf) | ||
337 | { | ||
338 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
339 | struct amdgpu_device *adev = ddev->dev_private; | ||
340 | ssize_t size = 0; | ||
341 | |||
342 | if (adev->pp_enabled) | ||
343 | size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); | ||
344 | |||
345 | return size; | ||
346 | } | ||
347 | |||
348 | static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, | ||
349 | struct device_attribute *attr, | ||
350 | const char *buf, | ||
351 | size_t count) | ||
352 | { | ||
353 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
354 | struct amdgpu_device *adev = ddev->dev_private; | ||
355 | int ret; | ||
356 | long level; | ||
357 | |||
358 | ret = kstrtol(buf, 0, &level); | ||
359 | |||
360 | if (ret) { | ||
361 | count = -EINVAL; | ||
362 | goto fail; | ||
363 | } | ||
364 | |||
365 | if (adev->pp_enabled) | ||
366 | amdgpu_dpm_force_clock_level(adev, PP_SCLK, level); | ||
367 | fail: | ||
368 | return count; | ||
369 | } | ||
370 | |||
371 | static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, | ||
372 | struct device_attribute *attr, | ||
373 | char *buf) | ||
374 | { | ||
375 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
376 | struct amdgpu_device *adev = ddev->dev_private; | ||
377 | ssize_t size = 0; | ||
378 | |||
379 | if (adev->pp_enabled) | ||
380 | size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); | ||
381 | |||
382 | return size; | ||
383 | } | ||
384 | |||
385 | static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, | ||
386 | struct device_attribute *attr, | ||
387 | const char *buf, | ||
388 | size_t count) | ||
389 | { | ||
390 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
391 | struct amdgpu_device *adev = ddev->dev_private; | ||
392 | int ret; | ||
393 | long level; | ||
394 | |||
395 | ret = kstrtol(buf, 0, &level); | ||
396 | |||
397 | if (ret) { | ||
398 | count = -EINVAL; | ||
399 | goto fail; | ||
400 | } | ||
401 | |||
402 | if (adev->pp_enabled) | ||
403 | amdgpu_dpm_force_clock_level(adev, PP_MCLK, level); | ||
404 | fail: | ||
405 | return count; | ||
406 | } | ||
407 | |||
408 | static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, | ||
409 | struct device_attribute *attr, | ||
410 | char *buf) | ||
411 | { | ||
412 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
413 | struct amdgpu_device *adev = ddev->dev_private; | ||
414 | ssize_t size = 0; | ||
415 | |||
416 | if (adev->pp_enabled) | ||
417 | size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); | ||
418 | |||
419 | return size; | ||
420 | } | ||
421 | |||
422 | static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, | ||
423 | struct device_attribute *attr, | ||
424 | const char *buf, | ||
425 | size_t count) | ||
426 | { | ||
427 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
428 | struct amdgpu_device *adev = ddev->dev_private; | ||
429 | int ret; | ||
430 | long level; | ||
431 | |||
432 | ret = kstrtol(buf, 0, &level); | ||
433 | |||
434 | if (ret) { | ||
435 | count = -EINVAL; | ||
436 | goto fail; | ||
437 | } | ||
438 | |||
439 | if (adev->pp_enabled) | ||
440 | amdgpu_dpm_force_clock_level(adev, PP_PCIE, level); | ||
441 | fail: | ||
442 | return count; | ||
443 | } | ||
444 | |||
175 | static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); | 445 | static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); |
176 | static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, | 446 | static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, |
177 | amdgpu_get_dpm_forced_performance_level, | 447 | amdgpu_get_dpm_forced_performance_level, |
178 | amdgpu_set_dpm_forced_performance_level); | 448 | amdgpu_set_dpm_forced_performance_level); |
449 | static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL); | ||
450 | static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL); | ||
451 | static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR, | ||
452 | amdgpu_get_pp_force_state, | ||
453 | amdgpu_set_pp_force_state); | ||
454 | static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR, | ||
455 | amdgpu_get_pp_table, | ||
456 | amdgpu_set_pp_table); | ||
457 | static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR, | ||
458 | amdgpu_get_pp_dpm_sclk, | ||
459 | amdgpu_set_pp_dpm_sclk); | ||
460 | static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR, | ||
461 | amdgpu_get_pp_dpm_mclk, | ||
462 | amdgpu_set_pp_dpm_mclk); | ||
463 | static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, | ||
464 | amdgpu_get_pp_dpm_pcie, | ||
465 | amdgpu_set_pp_dpm_pcie); | ||
179 | 466 | ||
180 | static ssize_t amdgpu_hwmon_show_temp(struct device *dev, | 467 | static ssize_t amdgpu_hwmon_show_temp(struct device *dev, |
181 | struct device_attribute *attr, | 468 | struct device_attribute *attr, |
@@ -623,14 +910,12 @@ force: | |||
623 | amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps); | 910 | amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps); |
624 | } | 911 | } |
625 | 912 | ||
626 | mutex_lock(&adev->ring_lock); | ||
627 | |||
628 | /* update whether vce is active */ | 913 | /* update whether vce is active */ |
629 | ps->vce_active = adev->pm.dpm.vce_active; | 914 | ps->vce_active = adev->pm.dpm.vce_active; |
630 | 915 | ||
631 | ret = amdgpu_dpm_pre_set_power_state(adev); | 916 | ret = amdgpu_dpm_pre_set_power_state(adev); |
632 | if (ret) | 917 | if (ret) |
633 | goto done; | 918 | return; |
634 | 919 | ||
635 | /* update display watermarks based on new power state */ | 920 | /* update display watermarks based on new power state */ |
636 | amdgpu_display_bandwidth_update(adev); | 921 | amdgpu_display_bandwidth_update(adev); |
@@ -667,9 +952,6 @@ force: | |||
667 | amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level); | 952 | amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level); |
668 | } | 953 | } |
669 | } | 954 | } |
670 | |||
671 | done: | ||
672 | mutex_unlock(&adev->ring_lock); | ||
673 | } | 955 | } |
674 | 956 | ||
675 | void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) | 957 | void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) |
@@ -770,6 +1052,44 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) | |||
770 | DRM_ERROR("failed to create device file for dpm state\n"); | 1052 | DRM_ERROR("failed to create device file for dpm state\n"); |
771 | return ret; | 1053 | return ret; |
772 | } | 1054 | } |
1055 | |||
1056 | if (adev->pp_enabled) { | ||
1057 | ret = device_create_file(adev->dev, &dev_attr_pp_num_states); | ||
1058 | if (ret) { | ||
1059 | DRM_ERROR("failed to create device file pp_num_states\n"); | ||
1060 | return ret; | ||
1061 | } | ||
1062 | ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); | ||
1063 | if (ret) { | ||
1064 | DRM_ERROR("failed to create device file pp_cur_state\n"); | ||
1065 | return ret; | ||
1066 | } | ||
1067 | ret = device_create_file(adev->dev, &dev_attr_pp_force_state); | ||
1068 | if (ret) { | ||
1069 | DRM_ERROR("failed to create device file pp_force_state\n"); | ||
1070 | return ret; | ||
1071 | } | ||
1072 | ret = device_create_file(adev->dev, &dev_attr_pp_table); | ||
1073 | if (ret) { | ||
1074 | DRM_ERROR("failed to create device file pp_table\n"); | ||
1075 | return ret; | ||
1076 | } | ||
1077 | ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); | ||
1078 | if (ret) { | ||
1079 | DRM_ERROR("failed to create device file pp_dpm_sclk\n"); | ||
1080 | return ret; | ||
1081 | } | ||
1082 | ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); | ||
1083 | if (ret) { | ||
1084 | DRM_ERROR("failed to create device file pp_dpm_mclk\n"); | ||
1085 | return ret; | ||
1086 | } | ||
1087 | ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); | ||
1088 | if (ret) { | ||
1089 | DRM_ERROR("failed to create device file pp_dpm_pcie\n"); | ||
1090 | return ret; | ||
1091 | } | ||
1092 | } | ||
773 | ret = amdgpu_debugfs_pm_init(adev); | 1093 | ret = amdgpu_debugfs_pm_init(adev); |
774 | if (ret) { | 1094 | if (ret) { |
775 | DRM_ERROR("Failed to register debugfs file for dpm!\n"); | 1095 | DRM_ERROR("Failed to register debugfs file for dpm!\n"); |
@@ -787,6 +1107,15 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) | |||
787 | hwmon_device_unregister(adev->pm.int_hwmon_dev); | 1107 | hwmon_device_unregister(adev->pm.int_hwmon_dev); |
788 | device_remove_file(adev->dev, &dev_attr_power_dpm_state); | 1108 | device_remove_file(adev->dev, &dev_attr_power_dpm_state); |
789 | device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); | 1109 | device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); |
1110 | if (adev->pp_enabled) { | ||
1111 | device_remove_file(adev->dev, &dev_attr_pp_num_states); | ||
1112 | device_remove_file(adev->dev, &dev_attr_pp_cur_state); | ||
1113 | device_remove_file(adev->dev, &dev_attr_pp_force_state); | ||
1114 | device_remove_file(adev->dev, &dev_attr_pp_table); | ||
1115 | device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); | ||
1116 | device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); | ||
1117 | device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); | ||
1118 | } | ||
790 | } | 1119 | } |
791 | 1120 | ||
792 | void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | 1121 | void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) |
@@ -802,13 +1131,11 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | |||
802 | int i = 0; | 1131 | int i = 0; |
803 | 1132 | ||
804 | amdgpu_display_bandwidth_update(adev); | 1133 | amdgpu_display_bandwidth_update(adev); |
805 | mutex_lock(&adev->ring_lock); | 1134 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
806 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | 1135 | struct amdgpu_ring *ring = adev->rings[i]; |
807 | struct amdgpu_ring *ring = adev->rings[i]; | 1136 | if (ring && ring->ready) |
808 | if (ring && ring->ready) | 1137 | amdgpu_fence_wait_empty(ring); |
809 | amdgpu_fence_wait_empty(ring); | 1138 | } |
810 | } | ||
811 | mutex_unlock(&adev->ring_lock); | ||
812 | 1139 | ||
813 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL); | 1140 | amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL); |
814 | } else { | 1141 | } else { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 59f735a933a9..be6388f73ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | |||
@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, | |||
73 | if (ret) | 73 | if (ret) |
74 | return ERR_PTR(ret); | 74 | return ERR_PTR(ret); |
75 | 75 | ||
76 | mutex_lock(&adev->gem.mutex); | ||
77 | list_add_tail(&bo->list, &adev->gem.objects); | ||
78 | mutex_unlock(&adev->gem.mutex); | ||
79 | |||
80 | return &bo->gem_base; | 76 | return &bo->gem_base; |
81 | } | 77 | } |
82 | 78 | ||
@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, | |||
121 | { | 117 | { |
122 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); | 118 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); |
123 | 119 | ||
124 | if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) | 120 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) |
125 | return ERR_PTR(-EPERM); | 121 | return ERR_PTR(-EPERM); |
126 | 122 | ||
127 | return drm_gem_prime_export(dev, gobj, flags); | 123 | return drm_gem_prime_export(dev, gobj, flags); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d1f234dd2126..56c07e3fdb33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
@@ -49,28 +49,6 @@ | |||
49 | static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); | 49 | static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); |
50 | 50 | ||
51 | /** | 51 | /** |
52 | * amdgpu_ring_free_size - update the free size | ||
53 | * | ||
54 | * @adev: amdgpu_device pointer | ||
55 | * @ring: amdgpu_ring structure holding ring information | ||
56 | * | ||
57 | * Update the free dw slots in the ring buffer (all asics). | ||
58 | */ | ||
59 | void amdgpu_ring_free_size(struct amdgpu_ring *ring) | ||
60 | { | ||
61 | uint32_t rptr = amdgpu_ring_get_rptr(ring); | ||
62 | |||
63 | /* This works because ring_size is a power of 2 */ | ||
64 | ring->ring_free_dw = rptr + (ring->ring_size / 4); | ||
65 | ring->ring_free_dw -= ring->wptr; | ||
66 | ring->ring_free_dw &= ring->ptr_mask; | ||
67 | if (!ring->ring_free_dw) { | ||
68 | /* this is an empty ring */ | ||
69 | ring->ring_free_dw = ring->ring_size / 4; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * amdgpu_ring_alloc - allocate space on the ring buffer | 52 | * amdgpu_ring_alloc - allocate space on the ring buffer |
75 | * | 53 | * |
76 | * @adev: amdgpu_device pointer | 54 | * @adev: amdgpu_device pointer |
@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring) | |||
82 | */ | 60 | */ |
83 | int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) | 61 | int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) |
84 | { | 62 | { |
85 | int r; | ||
86 | |||
87 | /* make sure we aren't trying to allocate more space than there is on the ring */ | ||
88 | if (ndw > (ring->ring_size / 4)) | ||
89 | return -ENOMEM; | ||
90 | /* Align requested size with padding so unlock_commit can | 63 | /* Align requested size with padding so unlock_commit can |
91 | * pad safely */ | 64 | * pad safely */ |
92 | amdgpu_ring_free_size(ring); | ||
93 | ndw = (ndw + ring->align_mask) & ~ring->align_mask; | 65 | ndw = (ndw + ring->align_mask) & ~ring->align_mask; |
94 | while (ndw > (ring->ring_free_dw - 1)) { | ||
95 | amdgpu_ring_free_size(ring); | ||
96 | if (ndw < ring->ring_free_dw) { | ||
97 | break; | ||
98 | } | ||
99 | r = amdgpu_fence_wait_next(ring); | ||
100 | if (r) | ||
101 | return r; | ||
102 | } | ||
103 | ring->count_dw = ndw; | ||
104 | ring->wptr_old = ring->wptr; | ||
105 | return 0; | ||
106 | } | ||
107 | 66 | ||
108 | /** | 67 | /* Make sure we aren't trying to allocate more space |
109 | * amdgpu_ring_lock - lock the ring and allocate space on it | 68 | * than the maximum for one submission |
110 | * | 69 | */ |
111 | * @adev: amdgpu_device pointer | 70 | if (WARN_ON_ONCE(ndw > ring->max_dw)) |
112 | * @ring: amdgpu_ring structure holding ring information | 71 | return -ENOMEM; |
113 | * @ndw: number of dwords to allocate in the ring buffer | ||
114 | * | ||
115 | * Lock the ring and allocate @ndw dwords in the ring buffer | ||
116 | * (all asics). | ||
117 | * Returns 0 on success, error on failure. | ||
118 | */ | ||
119 | int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) | ||
120 | { | ||
121 | int r; | ||
122 | 72 | ||
123 | mutex_lock(ring->ring_lock); | 73 | ring->count_dw = ndw; |
124 | r = amdgpu_ring_alloc(ring, ndw); | 74 | ring->wptr_old = ring->wptr; |
125 | if (r) { | ||
126 | mutex_unlock(ring->ring_lock); | ||
127 | return r; | ||
128 | } | ||
129 | return 0; | 75 | return 0; |
130 | } | 76 | } |
131 | 77 | ||
@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | |||
144 | amdgpu_ring_write(ring, ring->nop); | 90 | amdgpu_ring_write(ring, ring->nop); |
145 | } | 91 | } |
146 | 92 | ||
93 | /** amdgpu_ring_generic_pad_ib - pad IB with NOP packets | ||
94 | * | ||
95 | * @ring: amdgpu_ring structure holding ring information | ||
96 | * @ib: IB to add NOP packets to | ||
97 | * | ||
98 | * This is the generic pad_ib function for rings except SDMA | ||
99 | */ | ||
100 | void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) | ||
101 | { | ||
102 | while (ib->length_dw & ring->align_mask) | ||
103 | ib->ptr[ib->length_dw++] = ring->nop; | ||
104 | } | ||
105 | |||
147 | /** | 106 | /** |
148 | * amdgpu_ring_commit - tell the GPU to execute the new | 107 | * amdgpu_ring_commit - tell the GPU to execute the new |
149 | * commands on the ring buffer | 108 | * commands on the ring buffer |
@@ -168,20 +127,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) | |||
168 | } | 127 | } |
169 | 128 | ||
170 | /** | 129 | /** |
171 | * amdgpu_ring_unlock_commit - tell the GPU to execute the new | ||
172 | * commands on the ring buffer and unlock it | ||
173 | * | ||
174 | * @ring: amdgpu_ring structure holding ring information | ||
175 | * | ||
176 | * Call amdgpu_ring_commit() then unlock the ring (all asics). | ||
177 | */ | ||
178 | void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring) | ||
179 | { | ||
180 | amdgpu_ring_commit(ring); | ||
181 | mutex_unlock(ring->ring_lock); | ||
182 | } | ||
183 | |||
184 | /** | ||
185 | * amdgpu_ring_undo - reset the wptr | 130 | * amdgpu_ring_undo - reset the wptr |
186 | * | 131 | * |
187 | * @ring: amdgpu_ring structure holding ring information | 132 | * @ring: amdgpu_ring structure holding ring information |
@@ -194,19 +139,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) | |||
194 | } | 139 | } |
195 | 140 | ||
196 | /** | 141 | /** |
197 | * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring | ||
198 | * | ||
199 | * @ring: amdgpu_ring structure holding ring information | ||
200 | * | ||
201 | * Call amdgpu_ring_undo() then unlock the ring (all asics). | ||
202 | */ | ||
203 | void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring) | ||
204 | { | ||
205 | amdgpu_ring_undo(ring); | ||
206 | mutex_unlock(ring->ring_lock); | ||
207 | } | ||
208 | |||
209 | /** | ||
210 | * amdgpu_ring_backup - Back up the content of a ring | 142 | * amdgpu_ring_backup - Back up the content of a ring |
211 | * | 143 | * |
212 | * @ring: the ring we want to back up | 144 | * @ring: the ring we want to back up |
@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, | |||
218 | { | 150 | { |
219 | unsigned size, ptr, i; | 151 | unsigned size, ptr, i; |
220 | 152 | ||
221 | /* just in case lock the ring */ | ||
222 | mutex_lock(ring->ring_lock); | ||
223 | *data = NULL; | 153 | *data = NULL; |
224 | 154 | ||
225 | if (ring->ring_obj == NULL) { | 155 | if (ring->ring_obj == NULL) |
226 | mutex_unlock(ring->ring_lock); | ||
227 | return 0; | 156 | return 0; |
228 | } | ||
229 | 157 | ||
230 | /* it doesn't make sense to save anything if all fences are signaled */ | 158 | /* it doesn't make sense to save anything if all fences are signaled */ |
231 | if (!amdgpu_fence_count_emitted(ring)) { | 159 | if (!amdgpu_fence_count_emitted(ring)) |
232 | mutex_unlock(ring->ring_lock); | ||
233 | return 0; | 160 | return 0; |
234 | } | ||
235 | 161 | ||
236 | ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); | 162 | ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); |
237 | 163 | ||
238 | size = ring->wptr + (ring->ring_size / 4); | 164 | size = ring->wptr + (ring->ring_size / 4); |
239 | size -= ptr; | 165 | size -= ptr; |
240 | size &= ring->ptr_mask; | 166 | size &= ring->ptr_mask; |
241 | if (size == 0) { | 167 | if (size == 0) |
242 | mutex_unlock(ring->ring_lock); | ||
243 | return 0; | 168 | return 0; |
244 | } | ||
245 | 169 | ||
246 | /* and then save the content of the ring */ | 170 | /* and then save the content of the ring */ |
247 | *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); | 171 | *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); |
248 | if (!*data) { | 172 | if (!*data) |
249 | mutex_unlock(ring->ring_lock); | ||
250 | return 0; | 173 | return 0; |
251 | } | ||
252 | for (i = 0; i < size; ++i) { | 174 | for (i = 0; i < size; ++i) { |
253 | (*data)[i] = ring->ring[ptr++]; | 175 | (*data)[i] = ring->ring[ptr++]; |
254 | ptr &= ring->ptr_mask; | 176 | ptr &= ring->ptr_mask; |
255 | } | 177 | } |
256 | 178 | ||
257 | mutex_unlock(ring->ring_lock); | ||
258 | return size; | 179 | return size; |
259 | } | 180 | } |
260 | 181 | ||
@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring, | |||
276 | return 0; | 197 | return 0; |
277 | 198 | ||
278 | /* restore the saved ring content */ | 199 | /* restore the saved ring content */ |
279 | r = amdgpu_ring_lock(ring, size); | 200 | r = amdgpu_ring_alloc(ring, size); |
280 | if (r) | 201 | if (r) |
281 | return r; | 202 | return r; |
282 | 203 | ||
@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring, | |||
284 | amdgpu_ring_write(ring, data[i]); | 205 | amdgpu_ring_write(ring, data[i]); |
285 | } | 206 | } |
286 | 207 | ||
287 | amdgpu_ring_unlock_commit(ring); | 208 | amdgpu_ring_commit(ring); |
288 | kfree(data); | 209 | kfree(data); |
289 | return 0; | 210 | return 0; |
290 | } | 211 | } |
@@ -352,7 +273,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
352 | return r; | 273 | return r; |
353 | } | 274 | } |
354 | 275 | ||
355 | ring->ring_lock = &adev->ring_lock; | ||
356 | /* Align ring size */ | 276 | /* Align ring size */ |
357 | rb_bufsz = order_base_2(ring_size / 8); | 277 | rb_bufsz = order_base_2(ring_size / 8); |
358 | ring_size = (1 << (rb_bufsz + 1)) * 4; | 278 | ring_size = (1 << (rb_bufsz + 1)) * 4; |
@@ -389,7 +309,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
389 | } | 309 | } |
390 | } | 310 | } |
391 | ring->ptr_mask = (ring->ring_size / 4) - 1; | 311 | ring->ptr_mask = (ring->ring_size / 4) - 1; |
392 | ring->ring_free_dw = ring->ring_size / 4; | 312 | ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4, |
313 | amdgpu_sched_hw_submission); | ||
393 | 314 | ||
394 | if (amdgpu_debugfs_ring_init(adev, ring)) { | 315 | if (amdgpu_debugfs_ring_init(adev, ring)) { |
395 | DRM_ERROR("Failed to register debugfs file for rings !\n"); | 316 | DRM_ERROR("Failed to register debugfs file for rings !\n"); |
@@ -410,15 +331,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) | |||
410 | int r; | 331 | int r; |
411 | struct amdgpu_bo *ring_obj; | 332 | struct amdgpu_bo *ring_obj; |
412 | 333 | ||
413 | if (ring->ring_lock == NULL) | ||
414 | return; | ||
415 | |||
416 | mutex_lock(ring->ring_lock); | ||
417 | ring_obj = ring->ring_obj; | 334 | ring_obj = ring->ring_obj; |
418 | ring->ready = false; | 335 | ring->ready = false; |
419 | ring->ring = NULL; | 336 | ring->ring = NULL; |
420 | ring->ring_obj = NULL; | 337 | ring->ring_obj = NULL; |
421 | mutex_unlock(ring->ring_lock); | ||
422 | 338 | ||
423 | amdgpu_wb_free(ring->adev, ring->fence_offs); | 339 | amdgpu_wb_free(ring->adev, ring->fence_offs); |
424 | amdgpu_wb_free(ring->adev, ring->rptr_offs); | 340 | amdgpu_wb_free(ring->adev, ring->rptr_offs); |
@@ -474,29 +390,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) | |||
474 | struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); | 390 | struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); |
475 | 391 | ||
476 | uint32_t rptr, wptr, rptr_next; | 392 | uint32_t rptr, wptr, rptr_next; |
477 | unsigned count, i, j; | 393 | unsigned i; |
478 | |||
479 | amdgpu_ring_free_size(ring); | ||
480 | count = (ring->ring_size / 4) - ring->ring_free_dw; | ||
481 | 394 | ||
482 | wptr = amdgpu_ring_get_wptr(ring); | 395 | wptr = amdgpu_ring_get_wptr(ring); |
483 | seq_printf(m, "wptr: 0x%08x [%5d]\n", | 396 | seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr); |
484 | wptr, wptr); | ||
485 | 397 | ||
486 | rptr = amdgpu_ring_get_rptr(ring); | 398 | rptr = amdgpu_ring_get_rptr(ring); |
487 | seq_printf(m, "rptr: 0x%08x [%5d]\n", | ||
488 | rptr, rptr); | ||
489 | |||
490 | rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); | 399 | rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); |
491 | 400 | ||
401 | seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr); | ||
402 | |||
492 | seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", | 403 | seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", |
493 | ring->wptr, ring->wptr); | 404 | ring->wptr, ring->wptr); |
494 | seq_printf(m, "last semaphore signal addr : 0x%016llx\n", | ||
495 | ring->last_semaphore_signal_addr); | ||
496 | seq_printf(m, "last semaphore wait addr : 0x%016llx\n", | ||
497 | ring->last_semaphore_wait_addr); | ||
498 | seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); | ||
499 | seq_printf(m, "%u dwords in ring\n", count); | ||
500 | 405 | ||
501 | if (!ring->ready) | 406 | if (!ring->ready) |
502 | return 0; | 407 | return 0; |
@@ -505,11 +410,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) | |||
505 | * packet that is the root issue | 410 | * packet that is the root issue |
506 | */ | 411 | */ |
507 | i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; | 412 | i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; |
508 | for (j = 0; j <= (count + 32); j++) { | 413 | while (i != rptr) { |
414 | seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); | ||
415 | if (i == rptr) | ||
416 | seq_puts(m, " *"); | ||
417 | if (i == rptr_next) | ||
418 | seq_puts(m, " #"); | ||
419 | seq_puts(m, "\n"); | ||
420 | i = (i + 1) & ring->ptr_mask; | ||
421 | } | ||
422 | while (i != wptr) { | ||
509 | seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); | 423 | seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); |
510 | if (rptr == i) | 424 | if (i == rptr) |
511 | seq_puts(m, " *"); | 425 | seq_puts(m, " *"); |
512 | if (rptr_next == i) | 426 | if (i == rptr_next) |
513 | seq_puts(m, " #"); | 427 | seq_puts(m, " #"); |
514 | seq_puts(m, "\n"); | 428 | seq_puts(m, "\n"); |
515 | i = (i + 1) & ring->ptr_mask; | 429 | i = (i + 1) & ring->ptr_mask; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 8b88edb0434b..7d8f8f1e3f7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | |||
@@ -321,8 +321,11 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, | |||
321 | int i, r; | 321 | int i, r; |
322 | signed long t; | 322 | signed long t; |
323 | 323 | ||
324 | BUG_ON(align > sa_manager->align); | 324 | if (WARN_ON_ONCE(align > sa_manager->align)) |
325 | BUG_ON(size > sa_manager->size); | 325 | return -EINVAL; |
326 | |||
327 | if (WARN_ON_ONCE(size > sa_manager->size)) | ||
328 | return -EINVAL; | ||
326 | 329 | ||
327 | *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); | 330 | *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); |
328 | if ((*sa_bo) == NULL) { | 331 | if ((*sa_bo) == NULL) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c deleted file mode 100644 index 438c05254695..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ /dev/null | |||
@@ -1,108 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2015 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * | ||
23 | */ | ||
24 | #include <linux/kthread.h> | ||
25 | #include <linux/wait.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <drm/drmP.h> | ||
28 | #include "amdgpu.h" | ||
29 | #include "amdgpu_trace.h" | ||
30 | |||
31 | static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) | ||
32 | { | ||
33 | struct amdgpu_job *job = to_amdgpu_job(sched_job); | ||
34 | return amdgpu_sync_get_fence(&job->ibs->sync); | ||
35 | } | ||
36 | |||
37 | static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) | ||
38 | { | ||
39 | struct amdgpu_fence *fence = NULL; | ||
40 | struct amdgpu_job *job; | ||
41 | int r; | ||
42 | |||
43 | if (!sched_job) { | ||
44 | DRM_ERROR("job is null\n"); | ||
45 | return NULL; | ||
46 | } | ||
47 | job = to_amdgpu_job(sched_job); | ||
48 | trace_amdgpu_sched_run_job(job); | ||
49 | r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner); | ||
50 | if (r) { | ||
51 | DRM_ERROR("Error scheduling IBs (%d)\n", r); | ||
52 | goto err; | ||
53 | } | ||
54 | |||
55 | fence = job->ibs[job->num_ibs - 1].fence; | ||
56 | fence_get(&fence->base); | ||
57 | |||
58 | err: | ||
59 | if (job->free_job) | ||
60 | job->free_job(job); | ||
61 | |||
62 | kfree(job); | ||
63 | return fence ? &fence->base : NULL; | ||
64 | } | ||
65 | |||
66 | struct amd_sched_backend_ops amdgpu_sched_ops = { | ||
67 | .dependency = amdgpu_sched_dependency, | ||
68 | .run_job = amdgpu_sched_run_job, | ||
69 | }; | ||
70 | |||
71 | int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, | ||
72 | struct amdgpu_ring *ring, | ||
73 | struct amdgpu_ib *ibs, | ||
74 | unsigned num_ibs, | ||
75 | int (*free_job)(struct amdgpu_job *), | ||
76 | void *owner, | ||
77 | struct fence **f) | ||
78 | { | ||
79 | int r = 0; | ||
80 | if (amdgpu_enable_scheduler) { | ||
81 | struct amdgpu_job *job = | ||
82 | kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); | ||
83 | if (!job) | ||
84 | return -ENOMEM; | ||
85 | job->base.sched = &ring->sched; | ||
86 | job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; | ||
87 | job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); | ||
88 | if (!job->base.s_fence) { | ||
89 | kfree(job); | ||
90 | return -ENOMEM; | ||
91 | } | ||
92 | *f = fence_get(&job->base.s_fence->base); | ||
93 | |||
94 | job->adev = adev; | ||
95 | job->ibs = ibs; | ||
96 | job->num_ibs = num_ibs; | ||
97 | job->owner = owner; | ||
98 | job->free_job = free_job; | ||
99 | amd_sched_entity_push_job(&job->base); | ||
100 | } else { | ||
101 | r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); | ||
102 | if (r) | ||
103 | return r; | ||
104 | *f = fence_get(&ibs[num_ibs - 1].fence->base); | ||
105 | } | ||
106 | |||
107 | return 0; | ||
108 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c deleted file mode 100644 index 1caaf201b708..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ /dev/null | |||
@@ -1,102 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Christian König. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the | ||
7 | * "Software"), to deal in the Software without restriction, including | ||
8 | * without limitation the rights to use, copy, modify, merge, publish, | ||
9 | * distribute, sub license, and/or sell copies of the Software, and to | ||
10 | * permit persons to whom the Software is furnished to do so, subject to | ||
11 | * the following conditions: | ||
12 | * | ||
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
20 | * | ||
21 | * The above copyright notice and this permission notice (including the | ||
22 | * next paragraph) shall be included in all copies or substantial portions | ||
23 | * of the Software. | ||
24 | * | ||
25 | */ | ||
26 | /* | ||
27 | * Authors: | ||
28 | * Christian König <deathsimple@vodafone.de> | ||
29 | */ | ||
30 | #include <drm/drmP.h> | ||
31 | #include "amdgpu.h" | ||
32 | #include "amdgpu_trace.h" | ||
33 | |||
34 | int amdgpu_semaphore_create(struct amdgpu_device *adev, | ||
35 | struct amdgpu_semaphore **semaphore) | ||
36 | { | ||
37 | int r; | ||
38 | |||
39 | *semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL); | ||
40 | if (*semaphore == NULL) { | ||
41 | return -ENOMEM; | ||
42 | } | ||
43 | r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, | ||
44 | &(*semaphore)->sa_bo, 8, 8); | ||
45 | if (r) { | ||
46 | kfree(*semaphore); | ||
47 | *semaphore = NULL; | ||
48 | return r; | ||
49 | } | ||
50 | (*semaphore)->waiters = 0; | ||
51 | (*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo); | ||
52 | |||
53 | *((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; | ||
54 | |||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring, | ||
59 | struct amdgpu_semaphore *semaphore) | ||
60 | { | ||
61 | trace_amdgpu_semaphore_signale(ring->idx, semaphore); | ||
62 | |||
63 | if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) { | ||
64 | --semaphore->waiters; | ||
65 | |||
66 | /* for debugging lockup only, used by sysfs debug files */ | ||
67 | ring->last_semaphore_signal_addr = semaphore->gpu_addr; | ||
68 | return true; | ||
69 | } | ||
70 | return false; | ||
71 | } | ||
72 | |||
73 | bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, | ||
74 | struct amdgpu_semaphore *semaphore) | ||
75 | { | ||
76 | trace_amdgpu_semaphore_wait(ring->idx, semaphore); | ||
77 | |||
78 | if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) { | ||
79 | ++semaphore->waiters; | ||
80 | |||
81 | /* for debugging lockup only, used by sysfs debug files */ | ||
82 | ring->last_semaphore_wait_addr = semaphore->gpu_addr; | ||
83 | return true; | ||
84 | } | ||
85 | return false; | ||
86 | } | ||
87 | |||
88 | void amdgpu_semaphore_free(struct amdgpu_device *adev, | ||
89 | struct amdgpu_semaphore **semaphore, | ||
90 | struct fence *fence) | ||
91 | { | ||
92 | if (semaphore == NULL || *semaphore == NULL) { | ||
93 | return; | ||
94 | } | ||
95 | if ((*semaphore)->waiters > 0) { | ||
96 | dev_err(adev->dev, "semaphore %p has more waiters than signalers," | ||
97 | " hardware lockup imminent!\n", *semaphore); | ||
98 | } | ||
99 | amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence); | ||
100 | kfree(*semaphore); | ||
101 | *semaphore = NULL; | ||
102 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 181ce39ef5e5..c15be00de904 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -46,14 +46,6 @@ struct amdgpu_sync_entry { | |||
46 | */ | 46 | */ |
47 | void amdgpu_sync_create(struct amdgpu_sync *sync) | 47 | void amdgpu_sync_create(struct amdgpu_sync *sync) |
48 | { | 48 | { |
49 | unsigned i; | ||
50 | |||
51 | for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) | ||
52 | sync->semaphores[i] = NULL; | ||
53 | |||
54 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | ||
55 | sync->sync_to[i] = NULL; | ||
56 | |||
57 | hash_init(sync->fences); | 49 | hash_init(sync->fences); |
58 | sync->last_vm_update = NULL; | 50 | sync->last_vm_update = NULL; |
59 | } | 51 | } |
@@ -107,7 +99,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, | |||
107 | struct fence *f) | 99 | struct fence *f) |
108 | { | 100 | { |
109 | struct amdgpu_sync_entry *e; | 101 | struct amdgpu_sync_entry *e; |
110 | struct amdgpu_fence *fence; | ||
111 | 102 | ||
112 | if (!f) | 103 | if (!f) |
113 | return 0; | 104 | return 0; |
@@ -116,27 +107,20 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, | |||
116 | amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) | 107 | amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) |
117 | amdgpu_sync_keep_later(&sync->last_vm_update, f); | 108 | amdgpu_sync_keep_later(&sync->last_vm_update, f); |
118 | 109 | ||
119 | fence = to_amdgpu_fence(f); | 110 | hash_for_each_possible(sync->fences, e, node, f->context) { |
120 | if (!fence || fence->ring->adev != adev) { | 111 | if (unlikely(e->fence->context != f->context)) |
121 | hash_for_each_possible(sync->fences, e, node, f->context) { | 112 | continue; |
122 | if (unlikely(e->fence->context != f->context)) | ||
123 | continue; | ||
124 | |||
125 | amdgpu_sync_keep_later(&e->fence, f); | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); | ||
130 | if (!e) | ||
131 | return -ENOMEM; | ||
132 | 113 | ||
133 | hash_add(sync->fences, &e->node, f->context); | 114 | amdgpu_sync_keep_later(&e->fence, f); |
134 | e->fence = fence_get(f); | ||
135 | return 0; | 115 | return 0; |
136 | } | 116 | } |
137 | 117 | ||
138 | amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f); | 118 | e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); |
119 | if (!e) | ||
120 | return -ENOMEM; | ||
139 | 121 | ||
122 | hash_add(sync->fences, &e->node, f->context); | ||
123 | e->fence = fence_get(f); | ||
140 | return 0; | 124 | return 0; |
141 | } | 125 | } |
142 | 126 | ||
@@ -153,13 +137,13 @@ static void *amdgpu_sync_get_owner(struct fence *f) | |||
153 | } | 137 | } |
154 | 138 | ||
155 | /** | 139 | /** |
156 | * amdgpu_sync_resv - use the semaphores to sync to a reservation object | 140 | * amdgpu_sync_resv - sync to a reservation object |
157 | * | 141 | * |
158 | * @sync: sync object to add fences from reservation object to | 142 | * @sync: sync object to add fences from reservation object to |
159 | * @resv: reservation object with embedded fence | 143 | * @resv: reservation object with embedded fence |
160 | * @shared: true if we should only sync to the exclusive fence | 144 | * @shared: true if we should only sync to the exclusive fence |
161 | * | 145 | * |
162 | * Sync to the fence using the semaphore objects | 146 | * Sync to the fence |
163 | */ | 147 | */ |
164 | int amdgpu_sync_resv(struct amdgpu_device *adev, | 148 | int amdgpu_sync_resv(struct amdgpu_device *adev, |
165 | struct amdgpu_sync *sync, | 149 | struct amdgpu_sync *sync, |
@@ -250,123 +234,17 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync) | |||
250 | kfree(e); | 234 | kfree(e); |
251 | } | 235 | } |
252 | 236 | ||
253 | if (amdgpu_enable_semaphores) | ||
254 | return 0; | ||
255 | |||
256 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
257 | struct fence *fence = sync->sync_to[i]; | ||
258 | if (!fence) | ||
259 | continue; | ||
260 | |||
261 | r = fence_wait(fence, false); | ||
262 | if (r) | ||
263 | return r; | ||
264 | } | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * amdgpu_sync_rings - sync ring to all registered fences | ||
271 | * | ||
272 | * @sync: sync object to use | ||
273 | * @ring: ring that needs sync | ||
274 | * | ||
275 | * Ensure that all registered fences are signaled before letting | ||
276 | * the ring continue. The caller must hold the ring lock. | ||
277 | */ | ||
278 | int amdgpu_sync_rings(struct amdgpu_sync *sync, | ||
279 | struct amdgpu_ring *ring) | ||
280 | { | ||
281 | struct amdgpu_device *adev = ring->adev; | ||
282 | unsigned count = 0; | ||
283 | int i, r; | ||
284 | |||
285 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
286 | struct amdgpu_ring *other = adev->rings[i]; | ||
287 | struct amdgpu_semaphore *semaphore; | ||
288 | struct amdgpu_fence *fence; | ||
289 | |||
290 | if (!sync->sync_to[i]) | ||
291 | continue; | ||
292 | |||
293 | fence = to_amdgpu_fence(sync->sync_to[i]); | ||
294 | |||
295 | /* check if we really need to sync */ | ||
296 | if (!amdgpu_enable_scheduler && | ||
297 | !amdgpu_fence_need_sync(fence, ring)) | ||
298 | continue; | ||
299 | |||
300 | /* prevent GPU deadlocks */ | ||
301 | if (!other->ready) { | ||
302 | dev_err(adev->dev, "Syncing to a disabled ring!"); | ||
303 | return -EINVAL; | ||
304 | } | ||
305 | |||
306 | if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) { | ||
307 | r = fence_wait(sync->sync_to[i], true); | ||
308 | if (r) | ||
309 | return r; | ||
310 | continue; | ||
311 | } | ||
312 | |||
313 | if (count >= AMDGPU_NUM_SYNCS) { | ||
314 | /* not enough room, wait manually */ | ||
315 | r = fence_wait(&fence->base, false); | ||
316 | if (r) | ||
317 | return r; | ||
318 | continue; | ||
319 | } | ||
320 | r = amdgpu_semaphore_create(adev, &semaphore); | ||
321 | if (r) | ||
322 | return r; | ||
323 | |||
324 | sync->semaphores[count++] = semaphore; | ||
325 | |||
326 | /* allocate enough space for sync command */ | ||
327 | r = amdgpu_ring_alloc(other, 16); | ||
328 | if (r) | ||
329 | return r; | ||
330 | |||
331 | /* emit the signal semaphore */ | ||
332 | if (!amdgpu_semaphore_emit_signal(other, semaphore)) { | ||
333 | /* signaling wasn't successful wait manually */ | ||
334 | amdgpu_ring_undo(other); | ||
335 | r = fence_wait(&fence->base, false); | ||
336 | if (r) | ||
337 | return r; | ||
338 | continue; | ||
339 | } | ||
340 | |||
341 | /* we assume caller has already allocated space on waiters ring */ | ||
342 | if (!amdgpu_semaphore_emit_wait(ring, semaphore)) { | ||
343 | /* waiting wasn't successful wait manually */ | ||
344 | amdgpu_ring_undo(other); | ||
345 | r = fence_wait(&fence->base, false); | ||
346 | if (r) | ||
347 | return r; | ||
348 | continue; | ||
349 | } | ||
350 | |||
351 | amdgpu_ring_commit(other); | ||
352 | amdgpu_fence_note_sync(fence, ring); | ||
353 | } | ||
354 | |||
355 | return 0; | 237 | return 0; |
356 | } | 238 | } |
357 | 239 | ||
358 | /** | 240 | /** |
359 | * amdgpu_sync_free - free the sync object | 241 | * amdgpu_sync_free - free the sync object |
360 | * | 242 | * |
361 | * @adev: amdgpu_device pointer | ||
362 | * @sync: sync object to use | 243 | * @sync: sync object to use |
363 | * @fence: fence to use for the free | ||
364 | * | 244 | * |
365 | * Free the sync object by freeing all semaphores in it. | 245 | * Free the sync object. |
366 | */ | 246 | */ |
367 | void amdgpu_sync_free(struct amdgpu_device *adev, | 247 | void amdgpu_sync_free(struct amdgpu_sync *sync) |
368 | struct amdgpu_sync *sync, | ||
369 | struct fence *fence) | ||
370 | { | 248 | { |
371 | struct amdgpu_sync_entry *e; | 249 | struct amdgpu_sync_entry *e; |
372 | struct hlist_node *tmp; | 250 | struct hlist_node *tmp; |
@@ -378,11 +256,5 @@ void amdgpu_sync_free(struct amdgpu_device *adev, | |||
378 | kfree(e); | 256 | kfree(e); |
379 | } | 257 | } |
380 | 258 | ||
381 | for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) | ||
382 | amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); | ||
383 | |||
384 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | ||
385 | fence_put(sync->sync_to[i]); | ||
386 | |||
387 | fence_put(sync->last_vm_update); | 259 | fence_put(sync->last_vm_update); |
388 | } | 260 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 4865615e9c06..05a53f4fc334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | |||
@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev) | |||
238 | amdgpu_do_test_moves(adev); | 238 | amdgpu_do_test_moves(adev); |
239 | } | 239 | } |
240 | 240 | ||
241 | static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev, | ||
242 | struct amdgpu_ring *ring, | ||
243 | struct fence **fence) | ||
244 | { | ||
245 | uint32_t handle = ring->idx ^ 0xdeafbeef; | ||
246 | int r; | ||
247 | |||
248 | if (ring == &adev->uvd.ring) { | ||
249 | r = amdgpu_uvd_get_create_msg(ring, handle, NULL); | ||
250 | if (r) { | ||
251 | DRM_ERROR("Failed to get dummy create msg\n"); | ||
252 | return r; | ||
253 | } | ||
254 | |||
255 | r = amdgpu_uvd_get_destroy_msg(ring, handle, fence); | ||
256 | if (r) { | ||
257 | DRM_ERROR("Failed to get dummy destroy msg\n"); | ||
258 | return r; | ||
259 | } | ||
260 | |||
261 | } else if (ring == &adev->vce.ring[0] || | ||
262 | ring == &adev->vce.ring[1]) { | ||
263 | r = amdgpu_vce_get_create_msg(ring, handle, NULL); | ||
264 | if (r) { | ||
265 | DRM_ERROR("Failed to get dummy create msg\n"); | ||
266 | return r; | ||
267 | } | ||
268 | |||
269 | r = amdgpu_vce_get_destroy_msg(ring, handle, fence); | ||
270 | if (r) { | ||
271 | DRM_ERROR("Failed to get dummy destroy msg\n"); | ||
272 | return r; | ||
273 | } | ||
274 | } else { | ||
275 | struct amdgpu_fence *a_fence = NULL; | ||
276 | r = amdgpu_ring_lock(ring, 64); | ||
277 | if (r) { | ||
278 | DRM_ERROR("Failed to lock ring A %d\n", ring->idx); | ||
279 | return r; | ||
280 | } | ||
281 | amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence); | ||
282 | amdgpu_ring_unlock_commit(ring); | ||
283 | *fence = &a_fence->base; | ||
284 | } | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | void amdgpu_test_ring_sync(struct amdgpu_device *adev, | 241 | void amdgpu_test_ring_sync(struct amdgpu_device *adev, |
289 | struct amdgpu_ring *ringA, | 242 | struct amdgpu_ring *ringA, |
290 | struct amdgpu_ring *ringB) | 243 | struct amdgpu_ring *ringB) |
291 | { | 244 | { |
292 | struct fence *fence1 = NULL, *fence2 = NULL; | ||
293 | struct amdgpu_semaphore *semaphore = NULL; | ||
294 | int r; | ||
295 | |||
296 | r = amdgpu_semaphore_create(adev, &semaphore); | ||
297 | if (r) { | ||
298 | DRM_ERROR("Failed to create semaphore\n"); | ||
299 | goto out_cleanup; | ||
300 | } | ||
301 | |||
302 | r = amdgpu_ring_lock(ringA, 64); | ||
303 | if (r) { | ||
304 | DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); | ||
305 | goto out_cleanup; | ||
306 | } | ||
307 | amdgpu_semaphore_emit_wait(ringA, semaphore); | ||
308 | amdgpu_ring_unlock_commit(ringA); | ||
309 | |||
310 | r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1); | ||
311 | if (r) | ||
312 | goto out_cleanup; | ||
313 | |||
314 | r = amdgpu_ring_lock(ringA, 64); | ||
315 | if (r) { | ||
316 | DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); | ||
317 | goto out_cleanup; | ||
318 | } | ||
319 | amdgpu_semaphore_emit_wait(ringA, semaphore); | ||
320 | amdgpu_ring_unlock_commit(ringA); | ||
321 | |||
322 | r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2); | ||
323 | if (r) | ||
324 | goto out_cleanup; | ||
325 | |||
326 | mdelay(1000); | ||
327 | |||
328 | if (fence_is_signaled(fence1)) { | ||
329 | DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); | ||
330 | goto out_cleanup; | ||
331 | } | ||
332 | |||
333 | r = amdgpu_ring_lock(ringB, 64); | ||
334 | if (r) { | ||
335 | DRM_ERROR("Failed to lock ring B %p\n", ringB); | ||
336 | goto out_cleanup; | ||
337 | } | ||
338 | amdgpu_semaphore_emit_signal(ringB, semaphore); | ||
339 | amdgpu_ring_unlock_commit(ringB); | ||
340 | |||
341 | r = fence_wait(fence1, false); | ||
342 | if (r) { | ||
343 | DRM_ERROR("Failed to wait for sync fence 1\n"); | ||
344 | goto out_cleanup; | ||
345 | } | ||
346 | |||
347 | mdelay(1000); | ||
348 | |||
349 | if (fence_is_signaled(fence2)) { | ||
350 | DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); | ||
351 | goto out_cleanup; | ||
352 | } | ||
353 | |||
354 | r = amdgpu_ring_lock(ringB, 64); | ||
355 | if (r) { | ||
356 | DRM_ERROR("Failed to lock ring B %p\n", ringB); | ||
357 | goto out_cleanup; | ||
358 | } | ||
359 | amdgpu_semaphore_emit_signal(ringB, semaphore); | ||
360 | amdgpu_ring_unlock_commit(ringB); | ||
361 | |||
362 | r = fence_wait(fence2, false); | ||
363 | if (r) { | ||
364 | DRM_ERROR("Failed to wait for sync fence 1\n"); | ||
365 | goto out_cleanup; | ||
366 | } | ||
367 | |||
368 | out_cleanup: | ||
369 | amdgpu_semaphore_free(adev, &semaphore, NULL); | ||
370 | |||
371 | if (fence1) | ||
372 | fence_put(fence1); | ||
373 | |||
374 | if (fence2) | ||
375 | fence_put(fence2); | ||
376 | |||
377 | if (r) | ||
378 | printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); | ||
379 | } | 245 | } |
380 | 246 | ||
381 | static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, | 247 | static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, |
@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, | |||
383 | struct amdgpu_ring *ringB, | 249 | struct amdgpu_ring *ringB, |
384 | struct amdgpu_ring *ringC) | 250 | struct amdgpu_ring *ringC) |
385 | { | 251 | { |
386 | struct fence *fenceA = NULL, *fenceB = NULL; | ||
387 | struct amdgpu_semaphore *semaphore = NULL; | ||
388 | bool sigA, sigB; | ||
389 | int i, r; | ||
390 | |||
391 | r = amdgpu_semaphore_create(adev, &semaphore); | ||
392 | if (r) { | ||
393 | DRM_ERROR("Failed to create semaphore\n"); | ||
394 | goto out_cleanup; | ||
395 | } | ||
396 | |||
397 | r = amdgpu_ring_lock(ringA, 64); | ||
398 | if (r) { | ||
399 | DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); | ||
400 | goto out_cleanup; | ||
401 | } | ||
402 | amdgpu_semaphore_emit_wait(ringA, semaphore); | ||
403 | amdgpu_ring_unlock_commit(ringA); | ||
404 | |||
405 | r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA); | ||
406 | if (r) | ||
407 | goto out_cleanup; | ||
408 | |||
409 | r = amdgpu_ring_lock(ringB, 64); | ||
410 | if (r) { | ||
411 | DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); | ||
412 | goto out_cleanup; | ||
413 | } | ||
414 | amdgpu_semaphore_emit_wait(ringB, semaphore); | ||
415 | amdgpu_ring_unlock_commit(ringB); | ||
416 | r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB); | ||
417 | if (r) | ||
418 | goto out_cleanup; | ||
419 | |||
420 | mdelay(1000); | ||
421 | |||
422 | if (fence_is_signaled(fenceA)) { | ||
423 | DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); | ||
424 | goto out_cleanup; | ||
425 | } | ||
426 | if (fence_is_signaled(fenceB)) { | ||
427 | DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); | ||
428 | goto out_cleanup; | ||
429 | } | ||
430 | |||
431 | r = amdgpu_ring_lock(ringC, 64); | ||
432 | if (r) { | ||
433 | DRM_ERROR("Failed to lock ring B %p\n", ringC); | ||
434 | goto out_cleanup; | ||
435 | } | ||
436 | amdgpu_semaphore_emit_signal(ringC, semaphore); | ||
437 | amdgpu_ring_unlock_commit(ringC); | ||
438 | |||
439 | for (i = 0; i < 30; ++i) { | ||
440 | mdelay(100); | ||
441 | sigA = fence_is_signaled(fenceA); | ||
442 | sigB = fence_is_signaled(fenceB); | ||
443 | if (sigA || sigB) | ||
444 | break; | ||
445 | } | ||
446 | |||
447 | if (!sigA && !sigB) { | ||
448 | DRM_ERROR("Neither fence A nor B has been signaled\n"); | ||
449 | goto out_cleanup; | ||
450 | } else if (sigA && sigB) { | ||
451 | DRM_ERROR("Both fence A and B has been signaled\n"); | ||
452 | goto out_cleanup; | ||
453 | } | ||
454 | |||
455 | DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); | ||
456 | |||
457 | r = amdgpu_ring_lock(ringC, 64); | ||
458 | if (r) { | ||
459 | DRM_ERROR("Failed to lock ring B %p\n", ringC); | ||
460 | goto out_cleanup; | ||
461 | } | ||
462 | amdgpu_semaphore_emit_signal(ringC, semaphore); | ||
463 | amdgpu_ring_unlock_commit(ringC); | ||
464 | |||
465 | mdelay(1000); | ||
466 | |||
467 | r = fence_wait(fenceA, false); | ||
468 | if (r) { | ||
469 | DRM_ERROR("Failed to wait for sync fence A\n"); | ||
470 | goto out_cleanup; | ||
471 | } | ||
472 | r = fence_wait(fenceB, false); | ||
473 | if (r) { | ||
474 | DRM_ERROR("Failed to wait for sync fence B\n"); | ||
475 | goto out_cleanup; | ||
476 | } | ||
477 | |||
478 | out_cleanup: | ||
479 | amdgpu_semaphore_free(adev, &semaphore, NULL); | ||
480 | |||
481 | if (fenceA) | ||
482 | fence_put(fenceA); | ||
483 | |||
484 | if (fenceB) | ||
485 | fence_put(fenceB); | ||
486 | |||
487 | if (r) | ||
488 | printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); | ||
489 | } | 252 | } |
490 | 253 | ||
491 | static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA, | 254 | static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 8f9834ab1bd5..9ca3735c563c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | |||
@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs, | |||
38 | 38 | ||
39 | TP_fast_assign( | 39 | TP_fast_assign( |
40 | __entry->bo_list = p->bo_list; | 40 | __entry->bo_list = p->bo_list; |
41 | __entry->ring = p->ibs[i].ring->idx; | 41 | __entry->ring = p->job->ring->idx; |
42 | __entry->dw = p->ibs[i].length_dw; | 42 | __entry->dw = p->job->ibs[i].length_dw; |
43 | __entry->fences = amdgpu_fence_count_emitted( | 43 | __entry->fences = amdgpu_fence_count_emitted( |
44 | p->ibs[i].ring); | 44 | p->job->ring); |
45 | ), | 45 | ), |
46 | TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", | 46 | TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", |
47 | __entry->bo_list, __entry->ring, __entry->dw, | 47 | __entry->bo_list, __entry->ring, __entry->dw, |
@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, | |||
65 | __entry->sched_job = &job->base; | 65 | __entry->sched_job = &job->base; |
66 | __entry->ib = job->ibs; | 66 | __entry->ib = job->ibs; |
67 | __entry->fence = &job->base.s_fence->base; | 67 | __entry->fence = &job->base.s_fence->base; |
68 | __entry->ring_name = job->ibs[0].ring->name; | 68 | __entry->ring_name = job->ring->name; |
69 | __entry->num_ibs = job->num_ibs; | 69 | __entry->num_ibs = job->num_ibs; |
70 | ), | 70 | ), |
71 | TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", | 71 | TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", |
@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job, | |||
90 | __entry->sched_job = &job->base; | 90 | __entry->sched_job = &job->base; |
91 | __entry->ib = job->ibs; | 91 | __entry->ib = job->ibs; |
92 | __entry->fence = &job->base.s_fence->base; | 92 | __entry->fence = &job->base.s_fence->base; |
93 | __entry->ring_name = job->ibs[0].ring->name; | 93 | __entry->ring_name = job->ring->name; |
94 | __entry->num_ibs = job->num_ibs; | 94 | __entry->num_ibs = job->num_ibs; |
95 | ), | 95 | ), |
96 | TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", | 96 | TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", |
@@ -100,18 +100,21 @@ TRACE_EVENT(amdgpu_sched_run_job, | |||
100 | 100 | ||
101 | 101 | ||
102 | TRACE_EVENT(amdgpu_vm_grab_id, | 102 | TRACE_EVENT(amdgpu_vm_grab_id, |
103 | TP_PROTO(unsigned vmid, int ring), | 103 | TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring), |
104 | TP_ARGS(vmid, ring), | 104 | TP_ARGS(vm, vmid, ring), |
105 | TP_STRUCT__entry( | 105 | TP_STRUCT__entry( |
106 | __field(struct amdgpu_vm *, vm) | ||
106 | __field(u32, vmid) | 107 | __field(u32, vmid) |
107 | __field(u32, ring) | 108 | __field(u32, ring) |
108 | ), | 109 | ), |
109 | 110 | ||
110 | TP_fast_assign( | 111 | TP_fast_assign( |
112 | __entry->vm = vm; | ||
111 | __entry->vmid = vmid; | 113 | __entry->vmid = vmid; |
112 | __entry->ring = ring; | 114 | __entry->ring = ring; |
113 | ), | 115 | ), |
114 | TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) | 116 | TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid, |
117 | __entry->ring) | ||
115 | ); | 118 | ); |
116 | 119 | ||
117 | TRACE_EVENT(amdgpu_vm_bo_map, | 120 | TRACE_EVENT(amdgpu_vm_bo_map, |
@@ -247,42 +250,6 @@ TRACE_EVENT(amdgpu_bo_list_set, | |||
247 | TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) | 250 | TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) |
248 | ); | 251 | ); |
249 | 252 | ||
250 | DECLARE_EVENT_CLASS(amdgpu_semaphore_request, | ||
251 | |||
252 | TP_PROTO(int ring, struct amdgpu_semaphore *sem), | ||
253 | |||
254 | TP_ARGS(ring, sem), | ||
255 | |||
256 | TP_STRUCT__entry( | ||
257 | __field(int, ring) | ||
258 | __field(signed, waiters) | ||
259 | __field(uint64_t, gpu_addr) | ||
260 | ), | ||
261 | |||
262 | TP_fast_assign( | ||
263 | __entry->ring = ring; | ||
264 | __entry->waiters = sem->waiters; | ||
265 | __entry->gpu_addr = sem->gpu_addr; | ||
266 | ), | ||
267 | |||
268 | TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring, | ||
269 | __entry->waiters, __entry->gpu_addr) | ||
270 | ); | ||
271 | |||
272 | DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale, | ||
273 | |||
274 | TP_PROTO(int ring, struct amdgpu_semaphore *sem), | ||
275 | |||
276 | TP_ARGS(ring, sem) | ||
277 | ); | ||
278 | |||
279 | DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait, | ||
280 | |||
281 | TP_PROTO(int ring, struct amdgpu_semaphore *sem), | ||
282 | |||
283 | TP_ARGS(ring, sem) | ||
284 | ); | ||
285 | |||
286 | #endif | 253 | #endif |
287 | 254 | ||
288 | /* This part must be outside protection */ | 255 | /* This part must be outside protection */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 55cf05e1c81c..e52fc641edfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) | |||
77 | static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | 77 | static int amdgpu_ttm_global_init(struct amdgpu_device *adev) |
78 | { | 78 | { |
79 | struct drm_global_reference *global_ref; | 79 | struct drm_global_reference *global_ref; |
80 | struct amdgpu_ring *ring; | ||
81 | struct amd_sched_rq *rq; | ||
80 | int r; | 82 | int r; |
81 | 83 | ||
82 | adev->mman.mem_global_referenced = false; | 84 | adev->mman.mem_global_referenced = false; |
@@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | |||
106 | return r; | 108 | return r; |
107 | } | 109 | } |
108 | 110 | ||
111 | ring = adev->mman.buffer_funcs_ring; | ||
112 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; | ||
113 | r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, | ||
114 | rq, amdgpu_sched_jobs); | ||
115 | if (r != 0) { | ||
116 | DRM_ERROR("Failed setting up TTM BO move run queue.\n"); | ||
117 | drm_global_item_unref(&adev->mman.mem_global_ref); | ||
118 | drm_global_item_unref(&adev->mman.bo_global_ref.ref); | ||
119 | return r; | ||
120 | } | ||
121 | |||
109 | adev->mman.mem_global_referenced = true; | 122 | adev->mman.mem_global_referenced = true; |
123 | |||
110 | return 0; | 124 | return 0; |
111 | } | 125 | } |
112 | 126 | ||
113 | static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) | 127 | static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) |
114 | { | 128 | { |
115 | if (adev->mman.mem_global_referenced) { | 129 | if (adev->mman.mem_global_referenced) { |
130 | amd_sched_entity_fini(adev->mman.entity.sched, | ||
131 | &adev->mman.entity); | ||
116 | drm_global_item_unref(&adev->mman.bo_global_ref.ref); | 132 | drm_global_item_unref(&adev->mman.bo_global_ref.ref); |
117 | drm_global_item_unref(&adev->mman.mem_global_ref); | 133 | drm_global_item_unref(&adev->mman.mem_global_ref); |
118 | adev->mman.mem_global_referenced = false; | 134 | adev->mman.mem_global_referenced = false; |
@@ -499,9 +515,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) | |||
499 | enum dma_data_direction direction = write ? | 515 | enum dma_data_direction direction = write ? |
500 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; | 516 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; |
501 | 517 | ||
502 | if (current->mm != gtt->usermm) | ||
503 | return -EPERM; | ||
504 | |||
505 | if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { | 518 | if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { |
506 | /* check that we only pin down anonymous memory | 519 | /* check that we only pin down anonymous memory |
507 | to prevent problems with writeback */ | 520 | to prevent problems with writeback */ |
@@ -773,14 +786,33 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | |||
773 | return 0; | 786 | return 0; |
774 | } | 787 | } |
775 | 788 | ||
776 | bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm) | 789 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) |
790 | { | ||
791 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
792 | |||
793 | if (gtt == NULL) | ||
794 | return NULL; | ||
795 | |||
796 | return gtt->usermm; | ||
797 | } | ||
798 | |||
799 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | ||
800 | unsigned long end) | ||
777 | { | 801 | { |
778 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 802 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
803 | unsigned long size; | ||
779 | 804 | ||
780 | if (gtt == NULL) | 805 | if (gtt == NULL) |
781 | return false; | 806 | return false; |
782 | 807 | ||
783 | return !!gtt->userptr; | 808 | if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr) |
809 | return false; | ||
810 | |||
811 | size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; | ||
812 | if (gtt->userptr > end || gtt->userptr + size <= start) | ||
813 | return false; | ||
814 | |||
815 | return true; | ||
784 | } | 816 | } |
785 | 817 | ||
786 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) | 818 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) |
@@ -996,9 +1028,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, | |||
996 | struct fence **fence) | 1028 | struct fence **fence) |
997 | { | 1029 | { |
998 | struct amdgpu_device *adev = ring->adev; | 1030 | struct amdgpu_device *adev = ring->adev; |
1031 | struct amdgpu_job *job; | ||
1032 | |||
999 | uint32_t max_bytes; | 1033 | uint32_t max_bytes; |
1000 | unsigned num_loops, num_dw; | 1034 | unsigned num_loops, num_dw; |
1001 | struct amdgpu_ib *ib; | ||
1002 | unsigned i; | 1035 | unsigned i; |
1003 | int r; | 1036 | int r; |
1004 | 1037 | ||
@@ -1010,20 +1043,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, | |||
1010 | while (num_dw & 0x7) | 1043 | while (num_dw & 0x7) |
1011 | num_dw++; | 1044 | num_dw++; |
1012 | 1045 | ||
1013 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | 1046 | r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); |
1014 | if (!ib) | 1047 | if (r) |
1015 | return -ENOMEM; | ||
1016 | |||
1017 | r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); | ||
1018 | if (r) { | ||
1019 | kfree(ib); | ||
1020 | return r; | 1048 | return r; |
1021 | } | ||
1022 | |||
1023 | ib->length_dw = 0; | ||
1024 | 1049 | ||
1025 | if (resv) { | 1050 | if (resv) { |
1026 | r = amdgpu_sync_resv(adev, &ib->sync, resv, | 1051 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
1027 | AMDGPU_FENCE_OWNER_UNDEFINED); | 1052 | AMDGPU_FENCE_OWNER_UNDEFINED); |
1028 | if (r) { | 1053 | if (r) { |
1029 | DRM_ERROR("sync failed (%d).\n", r); | 1054 | DRM_ERROR("sync failed (%d).\n", r); |
@@ -1034,31 +1059,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, | |||
1034 | for (i = 0; i < num_loops; i++) { | 1059 | for (i = 0; i < num_loops; i++) { |
1035 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); | 1060 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); |
1036 | 1061 | ||
1037 | amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, | 1062 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, |
1038 | cur_size_in_bytes); | 1063 | dst_offset, cur_size_in_bytes); |
1039 | 1064 | ||
1040 | src_offset += cur_size_in_bytes; | 1065 | src_offset += cur_size_in_bytes; |
1041 | dst_offset += cur_size_in_bytes; | 1066 | dst_offset += cur_size_in_bytes; |
1042 | byte_count -= cur_size_in_bytes; | 1067 | byte_count -= cur_size_in_bytes; |
1043 | } | 1068 | } |
1044 | 1069 | ||
1045 | amdgpu_vm_pad_ib(adev, ib); | 1070 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
1046 | WARN_ON(ib->length_dw > num_dw); | 1071 | WARN_ON(job->ibs[0].length_dw > num_dw); |
1047 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | 1072 | r = amdgpu_job_submit(job, ring, &adev->mman.entity, |
1048 | &amdgpu_vm_free_job, | 1073 | AMDGPU_FENCE_OWNER_UNDEFINED, fence); |
1049 | AMDGPU_FENCE_OWNER_UNDEFINED, | ||
1050 | fence); | ||
1051 | if (r) | 1074 | if (r) |
1052 | goto error_free; | 1075 | goto error_free; |
1053 | 1076 | ||
1054 | if (!amdgpu_enable_scheduler) { | ||
1055 | amdgpu_ib_free(adev, ib); | ||
1056 | kfree(ib); | ||
1057 | } | ||
1058 | return 0; | 1077 | return 0; |
1078 | |||
1059 | error_free: | 1079 | error_free: |
1060 | amdgpu_ib_free(adev, ib); | 1080 | amdgpu_job_free(job); |
1061 | kfree(ib); | ||
1062 | return r; | 1081 | return r; |
1063 | } | 1082 | } |
1064 | 1083 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 53f987aeeacf..1de82bf4fc79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | |||
@@ -91,6 +91,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work); | |||
91 | 91 | ||
92 | int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | 92 | int amdgpu_uvd_sw_init(struct amdgpu_device *adev) |
93 | { | 93 | { |
94 | struct amdgpu_ring *ring; | ||
95 | struct amd_sched_rq *rq; | ||
94 | unsigned long bo_size; | 96 | unsigned long bo_size; |
95 | const char *fw_name; | 97 | const char *fw_name; |
96 | const struct common_firmware_header *hdr; | 98 | const struct common_firmware_header *hdr; |
@@ -191,6 +193,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | |||
191 | 193 | ||
192 | amdgpu_bo_unreserve(adev->uvd.vcpu_bo); | 194 | amdgpu_bo_unreserve(adev->uvd.vcpu_bo); |
193 | 195 | ||
196 | ring = &adev->uvd.ring; | ||
197 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | ||
198 | r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity, | ||
199 | rq, amdgpu_sched_jobs); | ||
200 | if (r != 0) { | ||
201 | DRM_ERROR("Failed setting up UVD run queue.\n"); | ||
202 | return r; | ||
203 | } | ||
204 | |||
194 | for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { | 205 | for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { |
195 | atomic_set(&adev->uvd.handles[i], 0); | 206 | atomic_set(&adev->uvd.handles[i], 0); |
196 | adev->uvd.filp[i] = NULL; | 207 | adev->uvd.filp[i] = NULL; |
@@ -210,6 +221,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) | |||
210 | if (adev->uvd.vcpu_bo == NULL) | 221 | if (adev->uvd.vcpu_bo == NULL) |
211 | return 0; | 222 | return 0; |
212 | 223 | ||
224 | amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); | ||
225 | |||
213 | r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); | 226 | r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); |
214 | if (!r) { | 227 | if (!r) { |
215 | amdgpu_bo_kunmap(adev->uvd.vcpu_bo); | 228 | amdgpu_bo_kunmap(adev->uvd.vcpu_bo); |
@@ -241,7 +254,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) | |||
241 | 254 | ||
242 | amdgpu_uvd_note_usage(adev); | 255 | amdgpu_uvd_note_usage(adev); |
243 | 256 | ||
244 | r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); | 257 | r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence); |
245 | if (r) { | 258 | if (r) { |
246 | DRM_ERROR("Error destroying UVD (%d)!\n", r); | 259 | DRM_ERROR("Error destroying UVD (%d)!\n", r); |
247 | continue; | 260 | continue; |
@@ -295,7 +308,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |||
295 | 308 | ||
296 | amdgpu_uvd_note_usage(adev); | 309 | amdgpu_uvd_note_usage(adev); |
297 | 310 | ||
298 | r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); | 311 | r = amdgpu_uvd_get_destroy_msg(ring, handle, |
312 | false, &fence); | ||
299 | if (r) { | 313 | if (r) { |
300 | DRM_ERROR("Error destroying UVD (%d)!\n", r); | 314 | DRM_ERROR("Error destroying UVD (%d)!\n", r); |
301 | continue; | 315 | continue; |
@@ -616,7 +630,6 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) | |||
616 | { | 630 | { |
617 | struct amdgpu_bo_va_mapping *mapping; | 631 | struct amdgpu_bo_va_mapping *mapping; |
618 | struct amdgpu_bo *bo; | 632 | struct amdgpu_bo *bo; |
619 | struct amdgpu_ib *ib; | ||
620 | uint32_t cmd, lo, hi; | 633 | uint32_t cmd, lo, hi; |
621 | uint64_t start, end; | 634 | uint64_t start, end; |
622 | uint64_t addr; | 635 | uint64_t addr; |
@@ -638,9 +651,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) | |||
638 | addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; | 651 | addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; |
639 | start += addr; | 652 | start += addr; |
640 | 653 | ||
641 | ib = &ctx->parser->ibs[ctx->ib_idx]; | 654 | amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, |
642 | ib->ptr[ctx->data0] = start & 0xFFFFFFFF; | 655 | lower_32_bits(start)); |
643 | ib->ptr[ctx->data1] = start >> 32; | 656 | amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1, |
657 | upper_32_bits(start)); | ||
644 | 658 | ||
645 | cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; | 659 | cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; |
646 | if (cmd < 0x4) { | 660 | if (cmd < 0x4) { |
@@ -702,7 +716,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) | |||
702 | static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, | 716 | static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, |
703 | int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) | 717 | int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) |
704 | { | 718 | { |
705 | struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; | 719 | struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; |
706 | int i, r; | 720 | int i, r; |
707 | 721 | ||
708 | ctx->idx++; | 722 | ctx->idx++; |
@@ -748,7 +762,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, | |||
748 | static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, | 762 | static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, |
749 | int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) | 763 | int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) |
750 | { | 764 | { |
751 | struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; | 765 | struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; |
752 | int r; | 766 | int r; |
753 | 767 | ||
754 | for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { | 768 | for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { |
@@ -790,7 +804,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) | |||
790 | [0x00000003] = 2048, | 804 | [0x00000003] = 2048, |
791 | [0x00000004] = 0xFFFFFFFF, | 805 | [0x00000004] = 0xFFFFFFFF, |
792 | }; | 806 | }; |
793 | struct amdgpu_ib *ib = &parser->ibs[ib_idx]; | 807 | struct amdgpu_ib *ib = &parser->job->ibs[ib_idx]; |
794 | int r; | 808 | int r; |
795 | 809 | ||
796 | if (ib->length_dw % 16) { | 810 | if (ib->length_dw % 16) { |
@@ -823,22 +837,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) | |||
823 | return 0; | 837 | return 0; |
824 | } | 838 | } |
825 | 839 | ||
826 | static int amdgpu_uvd_free_job( | 840 | static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, |
827 | struct amdgpu_job *job) | 841 | bool direct, struct fence **fence) |
828 | { | ||
829 | amdgpu_ib_free(job->adev, job->ibs); | ||
830 | kfree(job->ibs); | ||
831 | return 0; | ||
832 | } | ||
833 | |||
834 | static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, | ||
835 | struct amdgpu_bo *bo, | ||
836 | struct fence **fence) | ||
837 | { | 842 | { |
838 | struct ttm_validate_buffer tv; | 843 | struct ttm_validate_buffer tv; |
839 | struct ww_acquire_ctx ticket; | 844 | struct ww_acquire_ctx ticket; |
840 | struct list_head head; | 845 | struct list_head head; |
841 | struct amdgpu_ib *ib = NULL; | 846 | struct amdgpu_job *job; |
847 | struct amdgpu_ib *ib; | ||
842 | struct fence *f = NULL; | 848 | struct fence *f = NULL; |
843 | struct amdgpu_device *adev = ring->adev; | 849 | struct amdgpu_device *adev = ring->adev; |
844 | uint64_t addr; | 850 | uint64_t addr; |
@@ -862,15 +868,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, | |||
862 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | 868 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
863 | if (r) | 869 | if (r) |
864 | goto err; | 870 | goto err; |
865 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | 871 | |
866 | if (!ib) { | 872 | r = amdgpu_job_alloc_with_ib(adev, 64, &job); |
867 | r = -ENOMEM; | ||
868 | goto err; | ||
869 | } | ||
870 | r = amdgpu_ib_get(ring, NULL, 64, ib); | ||
871 | if (r) | 873 | if (r) |
872 | goto err1; | 874 | goto err; |
873 | 875 | ||
876 | ib = &job->ibs[0]; | ||
874 | addr = amdgpu_bo_gpu_offset(bo); | 877 | addr = amdgpu_bo_gpu_offset(bo); |
875 | ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); | 878 | ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); |
876 | ib->ptr[1] = addr; | 879 | ib->ptr[1] = addr; |
@@ -882,12 +885,19 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, | |||
882 | ib->ptr[i] = PACKET2(0); | 885 | ib->ptr[i] = PACKET2(0); |
883 | ib->length_dw = 16; | 886 | ib->length_dw = 16; |
884 | 887 | ||
885 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | 888 | if (direct) { |
886 | &amdgpu_uvd_free_job, | 889 | r = amdgpu_ib_schedule(ring, 1, ib, |
887 | AMDGPU_FENCE_OWNER_UNDEFINED, | 890 | AMDGPU_FENCE_OWNER_UNDEFINED, NULL, &f); |
888 | &f); | 891 | if (r) |
889 | if (r) | 892 | goto err_free; |
890 | goto err2; | 893 | |
894 | amdgpu_job_free(job); | ||
895 | } else { | ||
896 | r = amdgpu_job_submit(job, ring, &adev->uvd.entity, | ||
897 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); | ||
898 | if (r) | ||
899 | goto err_free; | ||
900 | } | ||
891 | 901 | ||
892 | ttm_eu_fence_buffer_objects(&ticket, &head, f); | 902 | ttm_eu_fence_buffer_objects(&ticket, &head, f); |
893 | 903 | ||
@@ -895,16 +905,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, | |||
895 | *fence = fence_get(f); | 905 | *fence = fence_get(f); |
896 | amdgpu_bo_unref(&bo); | 906 | amdgpu_bo_unref(&bo); |
897 | fence_put(f); | 907 | fence_put(f); |
898 | if (amdgpu_enable_scheduler) | ||
899 | return 0; | ||
900 | 908 | ||
901 | amdgpu_ib_free(ring->adev, ib); | ||
902 | kfree(ib); | ||
903 | return 0; | 909 | return 0; |
904 | err2: | 910 | |
905 | amdgpu_ib_free(ring->adev, ib); | 911 | err_free: |
906 | err1: | 912 | amdgpu_job_free(job); |
907 | kfree(ib); | 913 | |
908 | err: | 914 | err: |
909 | ttm_eu_backoff_reservation(&ticket, &head); | 915 | ttm_eu_backoff_reservation(&ticket, &head); |
910 | return r; | 916 | return r; |
@@ -959,11 +965,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
959 | amdgpu_bo_kunmap(bo); | 965 | amdgpu_bo_kunmap(bo); |
960 | amdgpu_bo_unreserve(bo); | 966 | amdgpu_bo_unreserve(bo); |
961 | 967 | ||
962 | return amdgpu_uvd_send_msg(ring, bo, fence); | 968 | return amdgpu_uvd_send_msg(ring, bo, true, fence); |
963 | } | 969 | } |
964 | 970 | ||
965 | int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | 971 | int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
966 | struct fence **fence) | 972 | bool direct, struct fence **fence) |
967 | { | 973 | { |
968 | struct amdgpu_device *adev = ring->adev; | 974 | struct amdgpu_device *adev = ring->adev; |
969 | struct amdgpu_bo *bo; | 975 | struct amdgpu_bo *bo; |
@@ -1001,7 +1007,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
1001 | amdgpu_bo_kunmap(bo); | 1007 | amdgpu_bo_kunmap(bo); |
1002 | amdgpu_bo_unreserve(bo); | 1008 | amdgpu_bo_unreserve(bo); |
1003 | 1009 | ||
1004 | return amdgpu_uvd_send_msg(ring, bo, fence); | 1010 | return amdgpu_uvd_send_msg(ring, bo, direct, fence); |
1005 | } | 1011 | } |
1006 | 1012 | ||
1007 | static void amdgpu_uvd_idle_work_handler(struct work_struct *work) | 1013 | static void amdgpu_uvd_idle_work_handler(struct work_struct *work) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 1724c2c86151..9a3b449081a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | |||
@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev); | |||
31 | int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | 31 | int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, |
32 | struct fence **fence); | 32 | struct fence **fence); |
33 | int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | 33 | int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
34 | struct fence **fence); | 34 | bool direct, struct fence **fence); |
35 | void amdgpu_uvd_free_handles(struct amdgpu_device *adev, | 35 | void amdgpu_uvd_free_handles(struct amdgpu_device *adev, |
36 | struct drm_file *filp); | 36 | struct drm_file *filp); |
37 | int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); | 37 | int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a745eeeb5d82..39c3aa60381a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | |||
@@ -74,6 +74,8 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work); | |||
74 | */ | 74 | */ |
75 | int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) | 75 | int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) |
76 | { | 76 | { |
77 | struct amdgpu_ring *ring; | ||
78 | struct amd_sched_rq *rq; | ||
77 | const char *fw_name; | 79 | const char *fw_name; |
78 | const struct common_firmware_header *hdr; | 80 | const struct common_firmware_header *hdr; |
79 | unsigned ucode_version, version_major, version_minor, binary_id; | 81 | unsigned ucode_version, version_major, version_minor, binary_id; |
@@ -170,6 +172,16 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) | |||
170 | return r; | 172 | return r; |
171 | } | 173 | } |
172 | 174 | ||
175 | |||
176 | ring = &adev->vce.ring[0]; | ||
177 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | ||
178 | r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, | ||
179 | rq, amdgpu_sched_jobs); | ||
180 | if (r != 0) { | ||
181 | DRM_ERROR("Failed setting up VCE run queue.\n"); | ||
182 | return r; | ||
183 | } | ||
184 | |||
173 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | 185 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { |
174 | atomic_set(&adev->vce.handles[i], 0); | 186 | atomic_set(&adev->vce.handles[i], 0); |
175 | adev->vce.filp[i] = NULL; | 187 | adev->vce.filp[i] = NULL; |
@@ -190,6 +202,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) | |||
190 | if (adev->vce.vcpu_bo == NULL) | 202 | if (adev->vce.vcpu_bo == NULL) |
191 | return 0; | 203 | return 0; |
192 | 204 | ||
205 | amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); | ||
206 | |||
193 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | 207 | amdgpu_bo_unref(&adev->vce.vcpu_bo); |
194 | 208 | ||
195 | amdgpu_ring_fini(&adev->vce.ring[0]); | 209 | amdgpu_ring_fini(&adev->vce.ring[0]); |
@@ -337,7 +351,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |||
337 | 351 | ||
338 | amdgpu_vce_note_usage(adev); | 352 | amdgpu_vce_note_usage(adev); |
339 | 353 | ||
340 | r = amdgpu_vce_get_destroy_msg(ring, handle, NULL); | 354 | r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL); |
341 | if (r) | 355 | if (r) |
342 | DRM_ERROR("Error destroying VCE handle (%d)!\n", r); | 356 | DRM_ERROR("Error destroying VCE handle (%d)!\n", r); |
343 | 357 | ||
@@ -346,14 +360,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |||
346 | } | 360 | } |
347 | } | 361 | } |
348 | 362 | ||
349 | static int amdgpu_vce_free_job( | ||
350 | struct amdgpu_job *job) | ||
351 | { | ||
352 | amdgpu_ib_free(job->adev, job->ibs); | ||
353 | kfree(job->ibs); | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | /** | 363 | /** |
358 | * amdgpu_vce_get_create_msg - generate a VCE create msg | 364 | * amdgpu_vce_get_create_msg - generate a VCE create msg |
359 | * | 365 | * |
@@ -368,21 +374,17 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
368 | struct fence **fence) | 374 | struct fence **fence) |
369 | { | 375 | { |
370 | const unsigned ib_size_dw = 1024; | 376 | const unsigned ib_size_dw = 1024; |
371 | struct amdgpu_ib *ib = NULL; | 377 | struct amdgpu_job *job; |
378 | struct amdgpu_ib *ib; | ||
372 | struct fence *f = NULL; | 379 | struct fence *f = NULL; |
373 | struct amdgpu_device *adev = ring->adev; | ||
374 | uint64_t dummy; | 380 | uint64_t dummy; |
375 | int i, r; | 381 | int i, r; |
376 | 382 | ||
377 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | 383 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
378 | if (!ib) | 384 | if (r) |
379 | return -ENOMEM; | ||
380 | r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib); | ||
381 | if (r) { | ||
382 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | ||
383 | kfree(ib); | ||
384 | return r; | 385 | return r; |
385 | } | 386 | |
387 | ib = &job->ibs[0]; | ||
386 | 388 | ||
387 | dummy = ib->gpu_addr + 1024; | 389 | dummy = ib->gpu_addr + 1024; |
388 | 390 | ||
@@ -423,20 +425,19 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
423 | for (i = ib->length_dw; i < ib_size_dw; ++i) | 425 | for (i = ib->length_dw; i < ib_size_dw; ++i) |
424 | ib->ptr[i] = 0x0; | 426 | ib->ptr[i] = 0x0; |
425 | 427 | ||
426 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | 428 | r = amdgpu_ib_schedule(ring, 1, ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
427 | &amdgpu_vce_free_job, | 429 | NULL, &f); |
428 | AMDGPU_FENCE_OWNER_UNDEFINED, | ||
429 | &f); | ||
430 | if (r) | 430 | if (r) |
431 | goto err; | 431 | goto err; |
432 | |||
433 | amdgpu_job_free(job); | ||
432 | if (fence) | 434 | if (fence) |
433 | *fence = fence_get(f); | 435 | *fence = fence_get(f); |
434 | fence_put(f); | 436 | fence_put(f); |
435 | if (amdgpu_enable_scheduler) | 437 | return 0; |
436 | return 0; | 438 | |
437 | err: | 439 | err: |
438 | amdgpu_ib_free(adev, ib); | 440 | amdgpu_job_free(job); |
439 | kfree(ib); | ||
440 | return r; | 441 | return r; |
441 | } | 442 | } |
442 | 443 | ||
@@ -451,26 +452,20 @@ err: | |||
451 | * Close up a stream for HW test or if userspace failed to do so | 452 | * Close up a stream for HW test or if userspace failed to do so |
452 | */ | 453 | */ |
453 | int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | 454 | int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
454 | struct fence **fence) | 455 | bool direct, struct fence **fence) |
455 | { | 456 | { |
456 | const unsigned ib_size_dw = 1024; | 457 | const unsigned ib_size_dw = 1024; |
457 | struct amdgpu_ib *ib = NULL; | 458 | struct amdgpu_job *job; |
459 | struct amdgpu_ib *ib; | ||
458 | struct fence *f = NULL; | 460 | struct fence *f = NULL; |
459 | struct amdgpu_device *adev = ring->adev; | ||
460 | uint64_t dummy; | 461 | uint64_t dummy; |
461 | int i, r; | 462 | int i, r; |
462 | 463 | ||
463 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | 464 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
464 | if (!ib) | 465 | if (r) |
465 | return -ENOMEM; | ||
466 | |||
467 | r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib); | ||
468 | if (r) { | ||
469 | kfree(ib); | ||
470 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | ||
471 | return r; | 466 | return r; |
472 | } | ||
473 | 467 | ||
468 | ib = &job->ibs[0]; | ||
474 | dummy = ib->gpu_addr + 1024; | 469 | dummy = ib->gpu_addr + 1024; |
475 | 470 | ||
476 | /* stitch together an VCE destroy msg */ | 471 | /* stitch together an VCE destroy msg */ |
@@ -490,20 +485,29 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
490 | 485 | ||
491 | for (i = ib->length_dw; i < ib_size_dw; ++i) | 486 | for (i = ib->length_dw; i < ib_size_dw; ++i) |
492 | ib->ptr[i] = 0x0; | 487 | ib->ptr[i] = 0x0; |
493 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | 488 | |
494 | &amdgpu_vce_free_job, | 489 | if (direct) { |
495 | AMDGPU_FENCE_OWNER_UNDEFINED, | 490 | r = amdgpu_ib_schedule(ring, 1, ib, |
496 | &f); | 491 | AMDGPU_FENCE_OWNER_UNDEFINED, |
497 | if (r) | 492 | NULL, &f); |
498 | goto err; | 493 | if (r) |
494 | goto err; | ||
495 | |||
496 | amdgpu_job_free(job); | ||
497 | } else { | ||
498 | r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, | ||
499 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); | ||
500 | if (r) | ||
501 | goto err; | ||
502 | } | ||
503 | |||
499 | if (fence) | 504 | if (fence) |
500 | *fence = fence_get(f); | 505 | *fence = fence_get(f); |
501 | fence_put(f); | 506 | fence_put(f); |
502 | if (amdgpu_enable_scheduler) | 507 | return 0; |
503 | return 0; | 508 | |
504 | err: | 509 | err: |
505 | amdgpu_ib_free(adev, ib); | 510 | amdgpu_job_free(job); |
506 | kfree(ib); | ||
507 | return r; | 511 | return r; |
508 | } | 512 | } |
509 | 513 | ||
@@ -521,7 +525,6 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, | |||
521 | int lo, int hi, unsigned size, uint32_t index) | 525 | int lo, int hi, unsigned size, uint32_t index) |
522 | { | 526 | { |
523 | struct amdgpu_bo_va_mapping *mapping; | 527 | struct amdgpu_bo_va_mapping *mapping; |
524 | struct amdgpu_ib *ib = &p->ibs[ib_idx]; | ||
525 | struct amdgpu_bo *bo; | 528 | struct amdgpu_bo *bo; |
526 | uint64_t addr; | 529 | uint64_t addr; |
527 | 530 | ||
@@ -550,8 +553,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, | |||
550 | addr += amdgpu_bo_gpu_offset(bo); | 553 | addr += amdgpu_bo_gpu_offset(bo); |
551 | addr -= ((uint64_t)size) * ((uint64_t)index); | 554 | addr -= ((uint64_t)size) * ((uint64_t)index); |
552 | 555 | ||
553 | ib->ptr[lo] = addr & 0xFFFFFFFF; | 556 | amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); |
554 | ib->ptr[hi] = addr >> 32; | 557 | amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); |
555 | 558 | ||
556 | return 0; | 559 | return 0; |
557 | } | 560 | } |
@@ -606,7 +609,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, | |||
606 | */ | 609 | */ |
607 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) | 610 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) |
608 | { | 611 | { |
609 | struct amdgpu_ib *ib = &p->ibs[ib_idx]; | 612 | struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; |
610 | unsigned fb_idx = 0, bs_idx = 0; | 613 | unsigned fb_idx = 0, bs_idx = 0; |
611 | int session_idx = -1; | 614 | int session_idx = -1; |
612 | bool destroyed = false; | 615 | bool destroyed = false; |
@@ -743,30 +746,6 @@ out: | |||
743 | } | 746 | } |
744 | 747 | ||
745 | /** | 748 | /** |
746 | * amdgpu_vce_ring_emit_semaphore - emit a semaphore command | ||
747 | * | ||
748 | * @ring: engine to use | ||
749 | * @semaphore: address of semaphore | ||
750 | * @emit_wait: true=emit wait, false=emit signal | ||
751 | * | ||
752 | */ | ||
753 | bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
754 | struct amdgpu_semaphore *semaphore, | ||
755 | bool emit_wait) | ||
756 | { | ||
757 | uint64_t addr = semaphore->gpu_addr; | ||
758 | |||
759 | amdgpu_ring_write(ring, VCE_CMD_SEMAPHORE); | ||
760 | amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); | ||
761 | amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); | ||
762 | amdgpu_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); | ||
763 | if (!emit_wait) | ||
764 | amdgpu_ring_write(ring, VCE_CMD_END); | ||
765 | |||
766 | return true; | ||
767 | } | ||
768 | |||
769 | /** | ||
770 | * amdgpu_vce_ring_emit_ib - execute indirect buffer | 749 | * amdgpu_vce_ring_emit_ib - execute indirect buffer |
771 | * | 750 | * |
772 | * @ring: engine to use | 751 | * @ring: engine to use |
@@ -814,14 +793,14 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) | |||
814 | unsigned i; | 793 | unsigned i; |
815 | int r; | 794 | int r; |
816 | 795 | ||
817 | r = amdgpu_ring_lock(ring, 16); | 796 | r = amdgpu_ring_alloc(ring, 16); |
818 | if (r) { | 797 | if (r) { |
819 | DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", | 798 | DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", |
820 | ring->idx, r); | 799 | ring->idx, r); |
821 | return r; | 800 | return r; |
822 | } | 801 | } |
823 | amdgpu_ring_write(ring, VCE_CMD_END); | 802 | amdgpu_ring_write(ring, VCE_CMD_END); |
824 | amdgpu_ring_unlock_commit(ring); | 803 | amdgpu_ring_commit(ring); |
825 | 804 | ||
826 | for (i = 0; i < adev->usec_timeout; i++) { | 805 | for (i = 0; i < adev->usec_timeout; i++) { |
827 | if (amdgpu_ring_get_rptr(ring) != rptr) | 806 | if (amdgpu_ring_get_rptr(ring) != rptr) |
@@ -862,7 +841,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring) | |||
862 | goto error; | 841 | goto error; |
863 | } | 842 | } |
864 | 843 | ||
865 | r = amdgpu_vce_get_destroy_msg(ring, 1, &fence); | 844 | r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); |
866 | if (r) { | 845 | if (r) { |
867 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); | 846 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); |
868 | goto error; | 847 | goto error; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ba2da8ee5906..ef99d2370182 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | |||
@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev); | |||
31 | int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | 31 | int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, |
32 | struct fence **fence); | 32 | struct fence **fence); |
33 | int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | 33 | int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
34 | struct fence **fence); | 34 | bool direct, struct fence **fence); |
35 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); | 35 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); |
36 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); | 36 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); |
37 | bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
38 | struct amdgpu_semaphore *semaphore, | ||
39 | bool emit_wait); | ||
40 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); | 37 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); |
41 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, | 38 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, |
42 | unsigned flags); | 39 | unsigned flags); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9599f7559b3d..264c5968a1d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -55,7 +55,7 @@ | |||
55 | * | 55 | * |
56 | * @adev: amdgpu_device pointer | 56 | * @adev: amdgpu_device pointer |
57 | * | 57 | * |
58 | * Calculate the number of page directory entries (cayman+). | 58 | * Calculate the number of page directory entries. |
59 | */ | 59 | */ |
60 | static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) | 60 | static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) |
61 | { | 61 | { |
@@ -67,7 +67,7 @@ static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) | |||
67 | * | 67 | * |
68 | * @adev: amdgpu_device pointer | 68 | * @adev: amdgpu_device pointer |
69 | * | 69 | * |
70 | * Calculate the size of the page directory in bytes (cayman+). | 70 | * Calculate the size of the page directory in bytes. |
71 | */ | 71 | */ |
72 | static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) | 72 | static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) |
73 | { | 73 | { |
@@ -89,8 +89,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
89 | struct amdgpu_bo_list_entry *entry) | 89 | struct amdgpu_bo_list_entry *entry) |
90 | { | 90 | { |
91 | entry->robj = vm->page_directory; | 91 | entry->robj = vm->page_directory; |
92 | entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM; | ||
93 | entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM; | ||
94 | entry->priority = 0; | 92 | entry->priority = 0; |
95 | entry->tv.bo = &vm->page_directory->tbo; | 93 | entry->tv.bo = &vm->page_directory->tbo; |
96 | entry->tv.shared = true; | 94 | entry->tv.shared = true; |
@@ -154,29 +152,34 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | |||
154 | * @vm: vm to allocate id for | 152 | * @vm: vm to allocate id for |
155 | * @ring: ring we want to submit job to | 153 | * @ring: ring we want to submit job to |
156 | * @sync: sync object where we add dependencies | 154 | * @sync: sync object where we add dependencies |
155 | * @fence: fence protecting ID from reuse | ||
157 | * | 156 | * |
158 | * Allocate an id for the vm, adding fences to the sync obj as necessary. | 157 | * Allocate an id for the vm, adding fences to the sync obj as necessary. |
159 | * | ||
160 | * Global mutex must be locked! | ||
161 | */ | 158 | */ |
162 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | 159 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, |
163 | struct amdgpu_sync *sync) | 160 | struct amdgpu_sync *sync, struct fence *fence) |
164 | { | 161 | { |
165 | struct fence *best[AMDGPU_MAX_RINGS] = {}; | ||
166 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; | 162 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; |
167 | struct amdgpu_device *adev = ring->adev; | 163 | struct amdgpu_device *adev = ring->adev; |
164 | struct amdgpu_vm_manager_id *id; | ||
165 | int r; | ||
168 | 166 | ||
169 | unsigned choices[2] = {}; | 167 | mutex_lock(&adev->vm_manager.lock); |
170 | unsigned i; | ||
171 | 168 | ||
172 | /* check if the id is still valid */ | 169 | /* check if the id is still valid */ |
173 | if (vm_id->id) { | 170 | if (vm_id->id) { |
174 | unsigned id = vm_id->id; | ||
175 | long owner; | 171 | long owner; |
176 | 172 | ||
177 | owner = atomic_long_read(&adev->vm_manager.ids[id].owner); | 173 | id = &adev->vm_manager.ids[vm_id->id]; |
174 | owner = atomic_long_read(&id->owner); | ||
178 | if (owner == (long)vm) { | 175 | if (owner == (long)vm) { |
179 | trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); | 176 | list_move_tail(&id->list, &adev->vm_manager.ids_lru); |
177 | trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); | ||
178 | |||
179 | fence_put(id->active); | ||
180 | id->active = fence_get(fence); | ||
181 | |||
182 | mutex_unlock(&adev->vm_manager.lock); | ||
180 | return 0; | 183 | return 0; |
181 | } | 184 | } |
182 | } | 185 | } |
@@ -184,41 +187,24 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
184 | /* we definately need to flush */ | 187 | /* we definately need to flush */ |
185 | vm_id->pd_gpu_addr = ~0ll; | 188 | vm_id->pd_gpu_addr = ~0ll; |
186 | 189 | ||
187 | /* skip over VMID 0, since it is the system VM */ | 190 | id = list_first_entry(&adev->vm_manager.ids_lru, |
188 | for (i = 1; i < adev->vm_manager.nvm; ++i) { | 191 | struct amdgpu_vm_manager_id, |
189 | struct fence *fence = adev->vm_manager.ids[i].active; | 192 | list); |
190 | struct amdgpu_ring *fring; | 193 | list_move_tail(&id->list, &adev->vm_manager.ids_lru); |
191 | 194 | atomic_long_set(&id->owner, (long)vm); | |
192 | if (fence == NULL) { | ||
193 | /* found a free one */ | ||
194 | vm_id->id = i; | ||
195 | trace_amdgpu_vm_grab_id(i, ring->idx); | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | fring = amdgpu_ring_from_fence(fence); | ||
200 | if (best[fring->idx] == NULL || | ||
201 | fence_is_later(best[fring->idx], fence)) { | ||
202 | best[fring->idx] = fence; | ||
203 | choices[fring == ring ? 0 : 1] = i; | ||
204 | } | ||
205 | } | ||
206 | 195 | ||
207 | for (i = 0; i < 2; ++i) { | 196 | vm_id->id = id - adev->vm_manager.ids; |
208 | if (choices[i]) { | 197 | trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); |
209 | struct fence *fence; | ||
210 | 198 | ||
211 | fence = adev->vm_manager.ids[choices[i]].active; | 199 | r = amdgpu_sync_fence(ring->adev, sync, id->active); |
212 | vm_id->id = choices[i]; | ||
213 | 200 | ||
214 | trace_amdgpu_vm_grab_id(choices[i], ring->idx); | 201 | if (!r) { |
215 | return amdgpu_sync_fence(ring->adev, sync, fence); | 202 | fence_put(id->active); |
216 | } | 203 | id->active = fence_get(fence); |
217 | } | 204 | } |
218 | 205 | ||
219 | /* should never happen */ | 206 | mutex_unlock(&adev->vm_manager.lock); |
220 | BUG(); | 207 | return r; |
221 | return -EINVAL; | ||
222 | } | 208 | } |
223 | 209 | ||
224 | /** | 210 | /** |
@@ -228,9 +214,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
228 | * @vm: vm we want to flush | 214 | * @vm: vm we want to flush |
229 | * @updates: last vm update that we waited for | 215 | * @updates: last vm update that we waited for |
230 | * | 216 | * |
231 | * Flush the vm (cayman+). | 217 | * Flush the vm. |
232 | * | ||
233 | * Global and local mutex must be locked! | ||
234 | */ | 218 | */ |
235 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | 219 | void amdgpu_vm_flush(struct amdgpu_ring *ring, |
236 | struct amdgpu_vm *vm, | 220 | struct amdgpu_vm *vm, |
@@ -260,36 +244,12 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, | |||
260 | } | 244 | } |
261 | 245 | ||
262 | /** | 246 | /** |
263 | * amdgpu_vm_fence - remember fence for vm | ||
264 | * | ||
265 | * @adev: amdgpu_device pointer | ||
266 | * @vm: vm we want to fence | ||
267 | * @fence: fence to remember | ||
268 | * | ||
269 | * Fence the vm (cayman+). | ||
270 | * Set the fence used to protect page table and id. | ||
271 | * | ||
272 | * Global and local mutex must be locked! | ||
273 | */ | ||
274 | void amdgpu_vm_fence(struct amdgpu_device *adev, | ||
275 | struct amdgpu_vm *vm, | ||
276 | struct fence *fence) | ||
277 | { | ||
278 | struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence); | ||
279 | unsigned vm_id = vm->ids[ring->idx].id; | ||
280 | |||
281 | fence_put(adev->vm_manager.ids[vm_id].active); | ||
282 | adev->vm_manager.ids[vm_id].active = fence_get(fence); | ||
283 | atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm); | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo | 247 | * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo |
288 | * | 248 | * |
289 | * @vm: requested vm | 249 | * @vm: requested vm |
290 | * @bo: requested buffer object | 250 | * @bo: requested buffer object |
291 | * | 251 | * |
292 | * Find @bo inside the requested vm (cayman+). | 252 | * Find @bo inside the requested vm. |
293 | * Search inside the @bos vm list for the requested vm | 253 | * Search inside the @bos vm list for the requested vm |
294 | * Returns the found bo_va or NULL if none is found | 254 | * Returns the found bo_va or NULL if none is found |
295 | * | 255 | * |
@@ -312,32 +272,40 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | |||
312 | * amdgpu_vm_update_pages - helper to call the right asic function | 272 | * amdgpu_vm_update_pages - helper to call the right asic function |
313 | * | 273 | * |
314 | * @adev: amdgpu_device pointer | 274 | * @adev: amdgpu_device pointer |
275 | * @gtt: GART instance to use for mapping | ||
276 | * @gtt_flags: GTT hw access flags | ||
315 | * @ib: indirect buffer to fill with commands | 277 | * @ib: indirect buffer to fill with commands |
316 | * @pe: addr of the page entry | 278 | * @pe: addr of the page entry |
317 | * @addr: dst addr to write into pe | 279 | * @addr: dst addr to write into pe |
318 | * @count: number of page entries to update | 280 | * @count: number of page entries to update |
319 | * @incr: increase next addr by incr bytes | 281 | * @incr: increase next addr by incr bytes |
320 | * @flags: hw access flags | 282 | * @flags: hw access flags |
321 | * @gtt_flags: GTT hw access flags | ||
322 | * | 283 | * |
323 | * Traces the parameters and calls the right asic functions | 284 | * Traces the parameters and calls the right asic functions |
324 | * to setup the page table using the DMA. | 285 | * to setup the page table using the DMA. |
325 | */ | 286 | */ |
326 | static void amdgpu_vm_update_pages(struct amdgpu_device *adev, | 287 | static void amdgpu_vm_update_pages(struct amdgpu_device *adev, |
288 | struct amdgpu_gart *gtt, | ||
289 | uint32_t gtt_flags, | ||
327 | struct amdgpu_ib *ib, | 290 | struct amdgpu_ib *ib, |
328 | uint64_t pe, uint64_t addr, | 291 | uint64_t pe, uint64_t addr, |
329 | unsigned count, uint32_t incr, | 292 | unsigned count, uint32_t incr, |
330 | uint32_t flags, uint32_t gtt_flags) | 293 | uint32_t flags) |
331 | { | 294 | { |
332 | trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); | 295 | trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); |
333 | 296 | ||
334 | if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { | 297 | if ((gtt == &adev->gart) && (flags == gtt_flags)) { |
335 | uint64_t src = adev->gart.table_addr + (addr >> 12) * 8; | 298 | uint64_t src = gtt->table_addr + (addr >> 12) * 8; |
336 | amdgpu_vm_copy_pte(adev, ib, pe, src, count); | 299 | amdgpu_vm_copy_pte(adev, ib, pe, src, count); |
337 | 300 | ||
338 | } else if ((flags & AMDGPU_PTE_SYSTEM) || (count < 3)) { | 301 | } else if (gtt) { |
339 | amdgpu_vm_write_pte(adev, ib, pe, addr, | 302 | dma_addr_t *pages_addr = gtt->pages_addr; |
340 | count, incr, flags); | 303 | amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr, |
304 | count, incr, flags); | ||
305 | |||
306 | } else if (count < 3) { | ||
307 | amdgpu_vm_write_pte(adev, ib, NULL, pe, addr, | ||
308 | count, incr, flags); | ||
341 | 309 | ||
342 | } else { | 310 | } else { |
343 | amdgpu_vm_set_pte_pde(adev, ib, pe, addr, | 311 | amdgpu_vm_set_pte_pde(adev, ib, pe, addr, |
@@ -345,15 +313,6 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, | |||
345 | } | 313 | } |
346 | } | 314 | } |
347 | 315 | ||
348 | int amdgpu_vm_free_job(struct amdgpu_job *job) | ||
349 | { | ||
350 | int i; | ||
351 | for (i = 0; i < job->num_ibs; i++) | ||
352 | amdgpu_ib_free(job->adev, &job->ibs[i]); | ||
353 | kfree(job->ibs); | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | /** | 316 | /** |
358 | * amdgpu_vm_clear_bo - initially clear the page dir/table | 317 | * amdgpu_vm_clear_bo - initially clear the page dir/table |
359 | * | 318 | * |
@@ -363,15 +322,18 @@ int amdgpu_vm_free_job(struct amdgpu_job *job) | |||
363 | * need to reserve bo first before calling it. | 322 | * need to reserve bo first before calling it. |
364 | */ | 323 | */ |
365 | static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | 324 | static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, |
325 | struct amdgpu_vm *vm, | ||
366 | struct amdgpu_bo *bo) | 326 | struct amdgpu_bo *bo) |
367 | { | 327 | { |
368 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; | 328 | struct amdgpu_ring *ring; |
369 | struct fence *fence = NULL; | 329 | struct fence *fence = NULL; |
370 | struct amdgpu_ib *ib; | 330 | struct amdgpu_job *job; |
371 | unsigned entries; | 331 | unsigned entries; |
372 | uint64_t addr; | 332 | uint64_t addr; |
373 | int r; | 333 | int r; |
374 | 334 | ||
335 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); | ||
336 | |||
375 | r = reservation_object_reserve_shared(bo->tbo.resv); | 337 | r = reservation_object_reserve_shared(bo->tbo.resv); |
376 | if (r) | 338 | if (r) |
377 | return r; | 339 | return r; |
@@ -383,56 +345,57 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | |||
383 | addr = amdgpu_bo_gpu_offset(bo); | 345 | addr = amdgpu_bo_gpu_offset(bo); |
384 | entries = amdgpu_bo_size(bo) / 8; | 346 | entries = amdgpu_bo_size(bo) / 8; |
385 | 347 | ||
386 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | 348 | r = amdgpu_job_alloc_with_ib(adev, 64, &job); |
387 | if (!ib) | 349 | if (r) |
388 | goto error; | 350 | goto error; |
389 | 351 | ||
390 | r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); | 352 | amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries, |
353 | 0, 0); | ||
354 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); | ||
355 | |||
356 | WARN_ON(job->ibs[0].length_dw > 64); | ||
357 | r = amdgpu_job_submit(job, ring, &vm->entity, | ||
358 | AMDGPU_FENCE_OWNER_VM, &fence); | ||
391 | if (r) | 359 | if (r) |
392 | goto error_free; | 360 | goto error_free; |
393 | 361 | ||
394 | ib->length_dw = 0; | 362 | amdgpu_bo_fence(bo, fence, true); |
395 | |||
396 | amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0); | ||
397 | amdgpu_vm_pad_ib(adev, ib); | ||
398 | WARN_ON(ib->length_dw > 64); | ||
399 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | ||
400 | &amdgpu_vm_free_job, | ||
401 | AMDGPU_FENCE_OWNER_VM, | ||
402 | &fence); | ||
403 | if (!r) | ||
404 | amdgpu_bo_fence(bo, fence, true); | ||
405 | fence_put(fence); | 363 | fence_put(fence); |
406 | if (amdgpu_enable_scheduler) | 364 | return 0; |
407 | return 0; | ||
408 | 365 | ||
409 | error_free: | 366 | error_free: |
410 | amdgpu_ib_free(adev, ib); | 367 | amdgpu_job_free(job); |
411 | kfree(ib); | ||
412 | 368 | ||
413 | error: | 369 | error: |
414 | return r; | 370 | return r; |
415 | } | 371 | } |
416 | 372 | ||
417 | /** | 373 | /** |
418 | * amdgpu_vm_map_gart - get the physical address of a gart page | 374 | * amdgpu_vm_map_gart - Resolve gart mapping of addr |
419 | * | 375 | * |
420 | * @adev: amdgpu_device pointer | 376 | * @pages_addr: optional DMA address to use for lookup |
421 | * @addr: the unmapped addr | 377 | * @addr: the unmapped addr |
422 | * | 378 | * |
423 | * Look up the physical address of the page that the pte resolves | 379 | * Look up the physical address of the page that the pte resolves |
424 | * to (cayman+). | 380 | * to and return the pointer for the page table entry. |
425 | * Returns the physical address of the page. | ||
426 | */ | 381 | */ |
427 | uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr) | 382 | uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) |
428 | { | 383 | { |
429 | uint64_t result; | 384 | uint64_t result; |
430 | 385 | ||
431 | /* page table offset */ | 386 | if (pages_addr) { |
432 | result = adev->gart.pages_addr[addr >> PAGE_SHIFT]; | 387 | /* page table offset */ |
388 | result = pages_addr[addr >> PAGE_SHIFT]; | ||
433 | 389 | ||
434 | /* in case cpu page size != gpu page size*/ | 390 | /* in case cpu page size != gpu page size*/ |
435 | result |= addr & (~PAGE_MASK); | 391 | result |= addr & (~PAGE_MASK); |
392 | |||
393 | } else { | ||
394 | /* No mapping required */ | ||
395 | result = addr; | ||
396 | } | ||
397 | |||
398 | result &= 0xFFFFFFFFFFFFF000ULL; | ||
436 | 399 | ||
437 | return result; | 400 | return result; |
438 | } | 401 | } |
@@ -446,45 +409,37 @@ uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr) | |||
446 | * @end: end of GPU address range | 409 | * @end: end of GPU address range |
447 | * | 410 | * |
448 | * Allocates new page tables if necessary | 411 | * Allocates new page tables if necessary |
449 | * and updates the page directory (cayman+). | 412 | * and updates the page directory. |
450 | * Returns 0 for success, error for failure. | 413 | * Returns 0 for success, error for failure. |
451 | * | ||
452 | * Global and local mutex must be locked! | ||
453 | */ | 414 | */ |
454 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | 415 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, |
455 | struct amdgpu_vm *vm) | 416 | struct amdgpu_vm *vm) |
456 | { | 417 | { |
457 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; | 418 | struct amdgpu_ring *ring; |
458 | struct amdgpu_bo *pd = vm->page_directory; | 419 | struct amdgpu_bo *pd = vm->page_directory; |
459 | uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); | 420 | uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); |
460 | uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; | 421 | uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; |
461 | uint64_t last_pde = ~0, last_pt = ~0; | 422 | uint64_t last_pde = ~0, last_pt = ~0; |
462 | unsigned count = 0, pt_idx, ndw; | 423 | unsigned count = 0, pt_idx, ndw; |
424 | struct amdgpu_job *job; | ||
463 | struct amdgpu_ib *ib; | 425 | struct amdgpu_ib *ib; |
464 | struct fence *fence = NULL; | 426 | struct fence *fence = NULL; |
465 | 427 | ||
466 | int r; | 428 | int r; |
467 | 429 | ||
430 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); | ||
431 | |||
468 | /* padding, etc. */ | 432 | /* padding, etc. */ |
469 | ndw = 64; | 433 | ndw = 64; |
470 | 434 | ||
471 | /* assume the worst case */ | 435 | /* assume the worst case */ |
472 | ndw += vm->max_pde_used * 6; | 436 | ndw += vm->max_pde_used * 6; |
473 | 437 | ||
474 | /* update too big for an IB */ | 438 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); |
475 | if (ndw > 0xfffff) | 439 | if (r) |
476 | return -ENOMEM; | ||
477 | |||
478 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | ||
479 | if (!ib) | ||
480 | return -ENOMEM; | ||
481 | |||
482 | r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); | ||
483 | if (r) { | ||
484 | kfree(ib); | ||
485 | return r; | 440 | return r; |
486 | } | 441 | |
487 | ib->length_dw = 0; | 442 | ib = &job->ibs[0]; |
488 | 443 | ||
489 | /* walk over the address space and update the page directory */ | 444 | /* walk over the address space and update the page directory */ |
490 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { | 445 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
@@ -504,9 +459,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
504 | ((last_pt + incr * count) != pt)) { | 459 | ((last_pt + incr * count) != pt)) { |
505 | 460 | ||
506 | if (count) { | 461 | if (count) { |
507 | amdgpu_vm_update_pages(adev, ib, last_pde, | 462 | amdgpu_vm_update_pages(adev, NULL, 0, ib, |
508 | last_pt, count, incr, | 463 | last_pde, last_pt, |
509 | AMDGPU_PTE_VALID, 0); | 464 | count, incr, |
465 | AMDGPU_PTE_VALID); | ||
510 | } | 466 | } |
511 | 467 | ||
512 | count = 1; | 468 | count = 1; |
@@ -518,17 +474,16 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
518 | } | 474 | } |
519 | 475 | ||
520 | if (count) | 476 | if (count) |
521 | amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, | 477 | amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt, |
522 | incr, AMDGPU_PTE_VALID, 0); | 478 | count, incr, AMDGPU_PTE_VALID); |
523 | 479 | ||
524 | if (ib->length_dw != 0) { | 480 | if (ib->length_dw != 0) { |
525 | amdgpu_vm_pad_ib(adev, ib); | 481 | amdgpu_ring_pad_ib(ring, ib); |
526 | amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); | 482 | amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, |
483 | AMDGPU_FENCE_OWNER_VM); | ||
527 | WARN_ON(ib->length_dw > ndw); | 484 | WARN_ON(ib->length_dw > ndw); |
528 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | 485 | r = amdgpu_job_submit(job, ring, &vm->entity, |
529 | &amdgpu_vm_free_job, | 486 | AMDGPU_FENCE_OWNER_VM, &fence); |
530 | AMDGPU_FENCE_OWNER_VM, | ||
531 | &fence); | ||
532 | if (r) | 487 | if (r) |
533 | goto error_free; | 488 | goto error_free; |
534 | 489 | ||
@@ -536,18 +491,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
536 | fence_put(vm->page_directory_fence); | 491 | fence_put(vm->page_directory_fence); |
537 | vm->page_directory_fence = fence_get(fence); | 492 | vm->page_directory_fence = fence_get(fence); |
538 | fence_put(fence); | 493 | fence_put(fence); |
539 | } | ||
540 | 494 | ||
541 | if (!amdgpu_enable_scheduler || ib->length_dw == 0) { | 495 | } else { |
542 | amdgpu_ib_free(adev, ib); | 496 | amdgpu_job_free(job); |
543 | kfree(ib); | ||
544 | } | 497 | } |
545 | 498 | ||
546 | return 0; | 499 | return 0; |
547 | 500 | ||
548 | error_free: | 501 | error_free: |
549 | amdgpu_ib_free(adev, ib); | 502 | amdgpu_job_free(job); |
550 | kfree(ib); | ||
551 | return r; | 503 | return r; |
552 | } | 504 | } |
553 | 505 | ||
@@ -555,20 +507,20 @@ error_free: | |||
555 | * amdgpu_vm_frag_ptes - add fragment information to PTEs | 507 | * amdgpu_vm_frag_ptes - add fragment information to PTEs |
556 | * | 508 | * |
557 | * @adev: amdgpu_device pointer | 509 | * @adev: amdgpu_device pointer |
510 | * @gtt: GART instance to use for mapping | ||
511 | * @gtt_flags: GTT hw mapping flags | ||
558 | * @ib: IB for the update | 512 | * @ib: IB for the update |
559 | * @pe_start: first PTE to handle | 513 | * @pe_start: first PTE to handle |
560 | * @pe_end: last PTE to handle | 514 | * @pe_end: last PTE to handle |
561 | * @addr: addr those PTEs should point to | 515 | * @addr: addr those PTEs should point to |
562 | * @flags: hw mapping flags | 516 | * @flags: hw mapping flags |
563 | * @gtt_flags: GTT hw mapping flags | ||
564 | * | ||
565 | * Global and local mutex must be locked! | ||
566 | */ | 517 | */ |
567 | static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, | 518 | static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, |
519 | struct amdgpu_gart *gtt, | ||
520 | uint32_t gtt_flags, | ||
568 | struct amdgpu_ib *ib, | 521 | struct amdgpu_ib *ib, |
569 | uint64_t pe_start, uint64_t pe_end, | 522 | uint64_t pe_start, uint64_t pe_end, |
570 | uint64_t addr, uint32_t flags, | 523 | uint64_t addr, uint32_t flags) |
571 | uint32_t gtt_flags) | ||
572 | { | 524 | { |
573 | /** | 525 | /** |
574 | * The MC L1 TLB supports variable sized pages, based on a fragment | 526 | * The MC L1 TLB supports variable sized pages, based on a fragment |
@@ -598,36 +550,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, | |||
598 | 550 | ||
599 | unsigned count; | 551 | unsigned count; |
600 | 552 | ||
553 | /* Abort early if there isn't anything to do */ | ||
554 | if (pe_start == pe_end) | ||
555 | return; | ||
556 | |||
601 | /* system pages are non continuously */ | 557 | /* system pages are non continuously */ |
602 | if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) || | 558 | if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { |
603 | (frag_start >= frag_end)) { | ||
604 | 559 | ||
605 | count = (pe_end - pe_start) / 8; | 560 | count = (pe_end - pe_start) / 8; |
606 | amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, | 561 | amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start, |
607 | AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); | 562 | addr, count, AMDGPU_GPU_PAGE_SIZE, |
563 | flags); | ||
608 | return; | 564 | return; |
609 | } | 565 | } |
610 | 566 | ||
611 | /* handle the 4K area at the beginning */ | 567 | /* handle the 4K area at the beginning */ |
612 | if (pe_start != frag_start) { | 568 | if (pe_start != frag_start) { |
613 | count = (frag_start - pe_start) / 8; | 569 | count = (frag_start - pe_start) / 8; |
614 | amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, | 570 | amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr, |
615 | AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); | 571 | count, AMDGPU_GPU_PAGE_SIZE, flags); |
616 | addr += AMDGPU_GPU_PAGE_SIZE * count; | 572 | addr += AMDGPU_GPU_PAGE_SIZE * count; |
617 | } | 573 | } |
618 | 574 | ||
619 | /* handle the area in the middle */ | 575 | /* handle the area in the middle */ |
620 | count = (frag_end - frag_start) / 8; | 576 | count = (frag_end - frag_start) / 8; |
621 | amdgpu_vm_update_pages(adev, ib, frag_start, addr, count, | 577 | amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count, |
622 | AMDGPU_GPU_PAGE_SIZE, flags | frag_flags, | 578 | AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); |
623 | gtt_flags); | ||
624 | 579 | ||
625 | /* handle the 4K area at the end */ | 580 | /* handle the 4K area at the end */ |
626 | if (frag_end != pe_end) { | 581 | if (frag_end != pe_end) { |
627 | addr += AMDGPU_GPU_PAGE_SIZE * count; | 582 | addr += AMDGPU_GPU_PAGE_SIZE * count; |
628 | count = (pe_end - frag_end) / 8; | 583 | count = (pe_end - frag_end) / 8; |
629 | amdgpu_vm_update_pages(adev, ib, frag_end, addr, count, | 584 | amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr, |
630 | AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); | 585 | count, AMDGPU_GPU_PAGE_SIZE, flags); |
631 | } | 586 | } |
632 | } | 587 | } |
633 | 588 | ||
@@ -635,122 +590,105 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, | |||
635 | * amdgpu_vm_update_ptes - make sure that page tables are valid | 590 | * amdgpu_vm_update_ptes - make sure that page tables are valid |
636 | * | 591 | * |
637 | * @adev: amdgpu_device pointer | 592 | * @adev: amdgpu_device pointer |
593 | * @gtt: GART instance to use for mapping | ||
594 | * @gtt_flags: GTT hw mapping flags | ||
638 | * @vm: requested vm | 595 | * @vm: requested vm |
639 | * @start: start of GPU address range | 596 | * @start: start of GPU address range |
640 | * @end: end of GPU address range | 597 | * @end: end of GPU address range |
641 | * @dst: destination address to map to | 598 | * @dst: destination address to map to |
642 | * @flags: mapping flags | 599 | * @flags: mapping flags |
643 | * | 600 | * |
644 | * Update the page tables in the range @start - @end (cayman+). | 601 | * Update the page tables in the range @start - @end. |
645 | * | ||
646 | * Global and local mutex must be locked! | ||
647 | */ | 602 | */ |
648 | static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, | 603 | static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, |
649 | struct amdgpu_vm *vm, | 604 | struct amdgpu_gart *gtt, |
650 | struct amdgpu_ib *ib, | 605 | uint32_t gtt_flags, |
651 | uint64_t start, uint64_t end, | 606 | struct amdgpu_vm *vm, |
652 | uint64_t dst, uint32_t flags, | 607 | struct amdgpu_ib *ib, |
653 | uint32_t gtt_flags) | 608 | uint64_t start, uint64_t end, |
609 | uint64_t dst, uint32_t flags) | ||
654 | { | 610 | { |
655 | uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; | 611 | const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; |
656 | uint64_t last_pte = ~0, last_dst = ~0; | ||
657 | void *owner = AMDGPU_FENCE_OWNER_VM; | ||
658 | unsigned count = 0; | ||
659 | uint64_t addr; | ||
660 | 612 | ||
661 | /* sync to everything on unmapping */ | 613 | uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0; |
662 | if (!(flags & AMDGPU_PTE_VALID)) | 614 | uint64_t addr; |
663 | owner = AMDGPU_FENCE_OWNER_UNDEFINED; | ||
664 | 615 | ||
665 | /* walk over the address space and update the page tables */ | 616 | /* walk over the address space and update the page tables */ |
666 | for (addr = start; addr < end; ) { | 617 | for (addr = start; addr < end; ) { |
667 | uint64_t pt_idx = addr >> amdgpu_vm_block_size; | 618 | uint64_t pt_idx = addr >> amdgpu_vm_block_size; |
668 | struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; | 619 | struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; |
669 | unsigned nptes; | 620 | unsigned nptes; |
670 | uint64_t pte; | 621 | uint64_t pe_start; |
671 | int r; | ||
672 | |||
673 | amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner); | ||
674 | r = reservation_object_reserve_shared(pt->tbo.resv); | ||
675 | if (r) | ||
676 | return r; | ||
677 | 622 | ||
678 | if ((addr & ~mask) == (end & ~mask)) | 623 | if ((addr & ~mask) == (end & ~mask)) |
679 | nptes = end - addr; | 624 | nptes = end - addr; |
680 | else | 625 | else |
681 | nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); | 626 | nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); |
682 | 627 | ||
683 | pte = amdgpu_bo_gpu_offset(pt); | 628 | pe_start = amdgpu_bo_gpu_offset(pt); |
684 | pte += (addr & mask) * 8; | 629 | pe_start += (addr & mask) * 8; |
685 | 630 | ||
686 | if ((last_pte + 8 * count) != pte) { | 631 | if (last_pe_end != pe_start) { |
687 | 632 | ||
688 | if (count) { | 633 | amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, |
689 | amdgpu_vm_frag_ptes(adev, ib, last_pte, | 634 | last_pe_start, last_pe_end, |
690 | last_pte + 8 * count, | 635 | last_dst, flags); |
691 | last_dst, flags, | ||
692 | gtt_flags); | ||
693 | } | ||
694 | 636 | ||
695 | count = nptes; | 637 | last_pe_start = pe_start; |
696 | last_pte = pte; | 638 | last_pe_end = pe_start + 8 * nptes; |
697 | last_dst = dst; | 639 | last_dst = dst; |
698 | } else { | 640 | } else { |
699 | count += nptes; | 641 | last_pe_end += 8 * nptes; |
700 | } | 642 | } |
701 | 643 | ||
702 | addr += nptes; | 644 | addr += nptes; |
703 | dst += nptes * AMDGPU_GPU_PAGE_SIZE; | 645 | dst += nptes * AMDGPU_GPU_PAGE_SIZE; |
704 | } | 646 | } |
705 | 647 | ||
706 | if (count) { | 648 | amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, |
707 | amdgpu_vm_frag_ptes(adev, ib, last_pte, | 649 | last_pe_start, last_pe_end, |
708 | last_pte + 8 * count, | 650 | last_dst, flags); |
709 | last_dst, flags, gtt_flags); | ||
710 | } | ||
711 | |||
712 | return 0; | ||
713 | } | 651 | } |
714 | 652 | ||
715 | /** | 653 | /** |
716 | * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table | 654 | * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table |
717 | * | 655 | * |
718 | * @adev: amdgpu_device pointer | 656 | * @adev: amdgpu_device pointer |
657 | * @gtt: GART instance to use for mapping | ||
658 | * @gtt_flags: flags as they are used for GTT | ||
719 | * @vm: requested vm | 659 | * @vm: requested vm |
720 | * @mapping: mapped range and flags to use for the update | 660 | * @start: start of mapped range |
661 | * @last: last mapped entry | ||
662 | * @flags: flags for the entries | ||
721 | * @addr: addr to set the area to | 663 | * @addr: addr to set the area to |
722 | * @gtt_flags: flags as they are used for GTT | ||
723 | * @fence: optional resulting fence | 664 | * @fence: optional resulting fence |
724 | * | 665 | * |
725 | * Fill in the page table entries for @mapping. | 666 | * Fill in the page table entries between @start and @last. |
726 | * Returns 0 for success, -EINVAL for failure. | 667 | * Returns 0 for success, -EINVAL for failure. |
727 | * | ||
728 | * Object have to be reserved and mutex must be locked! | ||
729 | */ | 668 | */ |
730 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | 669 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, |
670 | struct amdgpu_gart *gtt, | ||
671 | uint32_t gtt_flags, | ||
731 | struct amdgpu_vm *vm, | 672 | struct amdgpu_vm *vm, |
732 | struct amdgpu_bo_va_mapping *mapping, | 673 | uint64_t start, uint64_t last, |
733 | uint64_t addr, uint32_t gtt_flags, | 674 | uint32_t flags, uint64_t addr, |
734 | struct fence **fence) | 675 | struct fence **fence) |
735 | { | 676 | { |
736 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; | 677 | struct amdgpu_ring *ring; |
678 | void *owner = AMDGPU_FENCE_OWNER_VM; | ||
737 | unsigned nptes, ncmds, ndw; | 679 | unsigned nptes, ncmds, ndw; |
738 | uint32_t flags = gtt_flags; | 680 | struct amdgpu_job *job; |
739 | struct amdgpu_ib *ib; | 681 | struct amdgpu_ib *ib; |
740 | struct fence *f = NULL; | 682 | struct fence *f = NULL; |
741 | int r; | 683 | int r; |
742 | 684 | ||
743 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | 685 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); |
744 | * but in case of something, we filter the flags in first place | ||
745 | */ | ||
746 | if (!(mapping->flags & AMDGPU_PTE_READABLE)) | ||
747 | flags &= ~AMDGPU_PTE_READABLE; | ||
748 | if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) | ||
749 | flags &= ~AMDGPU_PTE_WRITEABLE; | ||
750 | 686 | ||
751 | trace_amdgpu_vm_bo_update(mapping); | 687 | /* sync to everything on unmapping */ |
688 | if (!(flags & AMDGPU_PTE_VALID)) | ||
689 | owner = AMDGPU_FENCE_OWNER_UNDEFINED; | ||
752 | 690 | ||
753 | nptes = mapping->it.last - mapping->it.start + 1; | 691 | nptes = last - start + 1; |
754 | 692 | ||
755 | /* | 693 | /* |
756 | * reserve space for one command every (1 << BLOCK_SIZE) | 694 | * reserve space for one command every (1 << BLOCK_SIZE) |
@@ -761,11 +699,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
761 | /* padding, etc. */ | 699 | /* padding, etc. */ |
762 | ndw = 64; | 700 | ndw = 64; |
763 | 701 | ||
764 | if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { | 702 | if ((gtt == &adev->gart) && (flags == gtt_flags)) { |
765 | /* only copy commands needed */ | 703 | /* only copy commands needed */ |
766 | ndw += ncmds * 7; | 704 | ndw += ncmds * 7; |
767 | 705 | ||
768 | } else if (flags & AMDGPU_PTE_SYSTEM) { | 706 | } else if (gtt) { |
769 | /* header for write data commands */ | 707 | /* header for write data commands */ |
770 | ndw += ncmds * 4; | 708 | ndw += ncmds * 4; |
771 | 709 | ||
@@ -780,38 +718,28 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
780 | ndw += 2 * 10; | 718 | ndw += 2 * 10; |
781 | } | 719 | } |
782 | 720 | ||
783 | /* update too big for an IB */ | 721 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); |
784 | if (ndw > 0xfffff) | 722 | if (r) |
785 | return -ENOMEM; | ||
786 | |||
787 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | ||
788 | if (!ib) | ||
789 | return -ENOMEM; | ||
790 | |||
791 | r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); | ||
792 | if (r) { | ||
793 | kfree(ib); | ||
794 | return r; | 723 | return r; |
795 | } | ||
796 | 724 | ||
797 | ib->length_dw = 0; | 725 | ib = &job->ibs[0]; |
798 | 726 | ||
799 | r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, | 727 | r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, |
800 | mapping->it.last + 1, addr + mapping->offset, | 728 | owner); |
801 | flags, gtt_flags); | 729 | if (r) |
730 | goto error_free; | ||
802 | 731 | ||
803 | if (r) { | 732 | r = reservation_object_reserve_shared(vm->page_directory->tbo.resv); |
804 | amdgpu_ib_free(adev, ib); | 733 | if (r) |
805 | kfree(ib); | 734 | goto error_free; |
806 | return r; | 735 | |
807 | } | 736 | amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1, |
737 | addr, flags); | ||
808 | 738 | ||
809 | amdgpu_vm_pad_ib(adev, ib); | 739 | amdgpu_ring_pad_ib(ring, ib); |
810 | WARN_ON(ib->length_dw > ndw); | 740 | WARN_ON(ib->length_dw > ndw); |
811 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | 741 | r = amdgpu_job_submit(job, ring, &vm->entity, |
812 | &amdgpu_vm_free_job, | 742 | AMDGPU_FENCE_OWNER_VM, &f); |
813 | AMDGPU_FENCE_OWNER_VM, | ||
814 | &f); | ||
815 | if (r) | 743 | if (r) |
816 | goto error_free; | 744 | goto error_free; |
817 | 745 | ||
@@ -821,19 +749,76 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
821 | *fence = fence_get(f); | 749 | *fence = fence_get(f); |
822 | } | 750 | } |
823 | fence_put(f); | 751 | fence_put(f); |
824 | if (!amdgpu_enable_scheduler) { | ||
825 | amdgpu_ib_free(adev, ib); | ||
826 | kfree(ib); | ||
827 | } | ||
828 | return 0; | 752 | return 0; |
829 | 753 | ||
830 | error_free: | 754 | error_free: |
831 | amdgpu_ib_free(adev, ib); | 755 | amdgpu_job_free(job); |
832 | kfree(ib); | ||
833 | return r; | 756 | return r; |
834 | } | 757 | } |
835 | 758 | ||
836 | /** | 759 | /** |
760 | * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks | ||
761 | * | ||
762 | * @adev: amdgpu_device pointer | ||
763 | * @gtt: GART instance to use for mapping | ||
764 | * @vm: requested vm | ||
765 | * @mapping: mapped range and flags to use for the update | ||
766 | * @addr: addr to set the area to | ||
767 | * @gtt_flags: flags as they are used for GTT | ||
768 | * @fence: optional resulting fence | ||
769 | * | ||
770 | * Split the mapping into smaller chunks so that each update fits | ||
771 | * into a SDMA IB. | ||
772 | * Returns 0 for success, -EINVAL for failure. | ||
773 | */ | ||
774 | static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | ||
775 | struct amdgpu_gart *gtt, | ||
776 | uint32_t gtt_flags, | ||
777 | struct amdgpu_vm *vm, | ||
778 | struct amdgpu_bo_va_mapping *mapping, | ||
779 | uint64_t addr, struct fence **fence) | ||
780 | { | ||
781 | const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; | ||
782 | |||
783 | uint64_t start = mapping->it.start; | ||
784 | uint32_t flags = gtt_flags; | ||
785 | int r; | ||
786 | |||
787 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | ||
788 | * but in case of something, we filter the flags in first place | ||
789 | */ | ||
790 | if (!(mapping->flags & AMDGPU_PTE_READABLE)) | ||
791 | flags &= ~AMDGPU_PTE_READABLE; | ||
792 | if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) | ||
793 | flags &= ~AMDGPU_PTE_WRITEABLE; | ||
794 | |||
795 | trace_amdgpu_vm_bo_update(mapping); | ||
796 | |||
797 | addr += mapping->offset; | ||
798 | |||
799 | if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags))) | ||
800 | return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, | ||
801 | start, mapping->it.last, | ||
802 | flags, addr, fence); | ||
803 | |||
804 | while (start != mapping->it.last + 1) { | ||
805 | uint64_t last; | ||
806 | |||
807 | last = min((uint64_t)mapping->it.last, start + max_size); | ||
808 | r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, | ||
809 | start, last, flags, addr, | ||
810 | fence); | ||
811 | if (r) | ||
812 | return r; | ||
813 | |||
814 | start = last + 1; | ||
815 | addr += max_size; | ||
816 | } | ||
817 | |||
818 | return 0; | ||
819 | } | ||
820 | |||
821 | /** | ||
837 | * amdgpu_vm_bo_update - update all BO mappings in the vm page table | 822 | * amdgpu_vm_bo_update - update all BO mappings in the vm page table |
838 | * | 823 | * |
839 | * @adev: amdgpu_device pointer | 824 | * @adev: amdgpu_device pointer |
@@ -851,14 +836,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
851 | { | 836 | { |
852 | struct amdgpu_vm *vm = bo_va->vm; | 837 | struct amdgpu_vm *vm = bo_va->vm; |
853 | struct amdgpu_bo_va_mapping *mapping; | 838 | struct amdgpu_bo_va_mapping *mapping; |
839 | struct amdgpu_gart *gtt = NULL; | ||
854 | uint32_t flags; | 840 | uint32_t flags; |
855 | uint64_t addr; | 841 | uint64_t addr; |
856 | int r; | 842 | int r; |
857 | 843 | ||
858 | if (mem) { | 844 | if (mem) { |
859 | addr = (u64)mem->start << PAGE_SHIFT; | 845 | addr = (u64)mem->start << PAGE_SHIFT; |
860 | if (mem->mem_type != TTM_PL_TT) | 846 | switch (mem->mem_type) { |
847 | case TTM_PL_TT: | ||
848 | gtt = &bo_va->bo->adev->gart; | ||
849 | break; | ||
850 | |||
851 | case TTM_PL_VRAM: | ||
861 | addr += adev->vm_manager.vram_base_offset; | 852 | addr += adev->vm_manager.vram_base_offset; |
853 | break; | ||
854 | |||
855 | default: | ||
856 | break; | ||
857 | } | ||
862 | } else { | 858 | } else { |
863 | addr = 0; | 859 | addr = 0; |
864 | } | 860 | } |
@@ -871,8 +867,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
871 | spin_unlock(&vm->status_lock); | 867 | spin_unlock(&vm->status_lock); |
872 | 868 | ||
873 | list_for_each_entry(mapping, &bo_va->invalids, list) { | 869 | list_for_each_entry(mapping, &bo_va->invalids, list) { |
874 | r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, | 870 | r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr, |
875 | flags, &bo_va->last_pt_update); | 871 | &bo_va->last_pt_update); |
876 | if (r) | 872 | if (r) |
877 | return r; | 873 | return r; |
878 | } | 874 | } |
@@ -918,7 +914,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
918 | struct amdgpu_bo_va_mapping, list); | 914 | struct amdgpu_bo_va_mapping, list); |
919 | list_del(&mapping->list); | 915 | list_del(&mapping->list); |
920 | spin_unlock(&vm->freed_lock); | 916 | spin_unlock(&vm->freed_lock); |
921 | r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); | 917 | r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping, |
918 | 0, NULL); | ||
922 | kfree(mapping); | 919 | kfree(mapping); |
923 | if (r) | 920 | if (r) |
924 | return r; | 921 | return r; |
@@ -976,7 +973,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | |||
976 | * @vm: requested vm | 973 | * @vm: requested vm |
977 | * @bo: amdgpu buffer object | 974 | * @bo: amdgpu buffer object |
978 | * | 975 | * |
979 | * Add @bo into the requested vm (cayman+). | 976 | * Add @bo into the requested vm. |
980 | * Add @bo to the list of bos associated with the vm | 977 | * Add @bo to the list of bos associated with the vm |
981 | * Returns newly added bo_va or NULL for failure | 978 | * Returns newly added bo_va or NULL for failure |
982 | * | 979 | * |
@@ -1117,15 +1114,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1117 | */ | 1114 | */ |
1118 | pt->parent = amdgpu_bo_ref(vm->page_directory); | 1115 | pt->parent = amdgpu_bo_ref(vm->page_directory); |
1119 | 1116 | ||
1120 | r = amdgpu_vm_clear_bo(adev, pt); | 1117 | r = amdgpu_vm_clear_bo(adev, vm, pt); |
1121 | if (r) { | 1118 | if (r) { |
1122 | amdgpu_bo_unref(&pt); | 1119 | amdgpu_bo_unref(&pt); |
1123 | goto error_free; | 1120 | goto error_free; |
1124 | } | 1121 | } |
1125 | 1122 | ||
1126 | entry->robj = pt; | 1123 | entry->robj = pt; |
1127 | entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM; | ||
1128 | entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM; | ||
1129 | entry->priority = 0; | 1124 | entry->priority = 0; |
1130 | entry->tv.bo = &entry->robj->tbo; | 1125 | entry->tv.bo = &entry->robj->tbo; |
1131 | entry->tv.shared = true; | 1126 | entry->tv.shared = true; |
@@ -1210,7 +1205,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
1210 | * @adev: amdgpu_device pointer | 1205 | * @adev: amdgpu_device pointer |
1211 | * @bo_va: requested bo_va | 1206 | * @bo_va: requested bo_va |
1212 | * | 1207 | * |
1213 | * Remove @bo_va->bo from the requested vm (cayman+). | 1208 | * Remove @bo_va->bo from the requested vm. |
1214 | * | 1209 | * |
1215 | * Object have to be reserved! | 1210 | * Object have to be reserved! |
1216 | */ | 1211 | */ |
@@ -1255,7 +1250,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
1255 | * @vm: requested vm | 1250 | * @vm: requested vm |
1256 | * @bo: amdgpu buffer object | 1251 | * @bo: amdgpu buffer object |
1257 | * | 1252 | * |
1258 | * Mark @bo as invalid (cayman+). | 1253 | * Mark @bo as invalid. |
1259 | */ | 1254 | */ |
1260 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | 1255 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, |
1261 | struct amdgpu_bo *bo) | 1256 | struct amdgpu_bo *bo) |
@@ -1276,13 +1271,16 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | |||
1276 | * @adev: amdgpu_device pointer | 1271 | * @adev: amdgpu_device pointer |
1277 | * @vm: requested vm | 1272 | * @vm: requested vm |
1278 | * | 1273 | * |
1279 | * Init @vm fields (cayman+). | 1274 | * Init @vm fields. |
1280 | */ | 1275 | */ |
1281 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 1276 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
1282 | { | 1277 | { |
1283 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, | 1278 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, |
1284 | AMDGPU_VM_PTE_COUNT * 8); | 1279 | AMDGPU_VM_PTE_COUNT * 8); |
1285 | unsigned pd_size, pd_entries; | 1280 | unsigned pd_size, pd_entries; |
1281 | unsigned ring_instance; | ||
1282 | struct amdgpu_ring *ring; | ||
1283 | struct amd_sched_rq *rq; | ||
1286 | int i, r; | 1284 | int i, r; |
1287 | 1285 | ||
1288 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 1286 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
@@ -1306,6 +1304,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1306 | return -ENOMEM; | 1304 | return -ENOMEM; |
1307 | } | 1305 | } |
1308 | 1306 | ||
1307 | /* create scheduler entity for page table updates */ | ||
1308 | |||
1309 | ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); | ||
1310 | ring_instance %= adev->vm_manager.vm_pte_num_rings; | ||
1311 | ring = adev->vm_manager.vm_pte_rings[ring_instance]; | ||
1312 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; | ||
1313 | r = amd_sched_entity_init(&ring->sched, &vm->entity, | ||
1314 | rq, amdgpu_sched_jobs); | ||
1315 | if (r) | ||
1316 | return r; | ||
1317 | |||
1309 | vm->page_directory_fence = NULL; | 1318 | vm->page_directory_fence = NULL; |
1310 | 1319 | ||
1311 | r = amdgpu_bo_create(adev, pd_size, align, true, | 1320 | r = amdgpu_bo_create(adev, pd_size, align, true, |
@@ -1313,22 +1322,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1313 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS, | 1322 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS, |
1314 | NULL, NULL, &vm->page_directory); | 1323 | NULL, NULL, &vm->page_directory); |
1315 | if (r) | 1324 | if (r) |
1316 | return r; | 1325 | goto error_free_sched_entity; |
1326 | |||
1317 | r = amdgpu_bo_reserve(vm->page_directory, false); | 1327 | r = amdgpu_bo_reserve(vm->page_directory, false); |
1318 | if (r) { | 1328 | if (r) |
1319 | amdgpu_bo_unref(&vm->page_directory); | 1329 | goto error_free_page_directory; |
1320 | vm->page_directory = NULL; | 1330 | |
1321 | return r; | 1331 | r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory); |
1322 | } | ||
1323 | r = amdgpu_vm_clear_bo(adev, vm->page_directory); | ||
1324 | amdgpu_bo_unreserve(vm->page_directory); | 1332 | amdgpu_bo_unreserve(vm->page_directory); |
1325 | if (r) { | 1333 | if (r) |
1326 | amdgpu_bo_unref(&vm->page_directory); | 1334 | goto error_free_page_directory; |
1327 | vm->page_directory = NULL; | ||
1328 | return r; | ||
1329 | } | ||
1330 | 1335 | ||
1331 | return 0; | 1336 | return 0; |
1337 | |||
1338 | error_free_page_directory: | ||
1339 | amdgpu_bo_unref(&vm->page_directory); | ||
1340 | vm->page_directory = NULL; | ||
1341 | |||
1342 | error_free_sched_entity: | ||
1343 | amd_sched_entity_fini(&ring->sched, &vm->entity); | ||
1344 | |||
1345 | return r; | ||
1332 | } | 1346 | } |
1333 | 1347 | ||
1334 | /** | 1348 | /** |
@@ -1337,7 +1351,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1337 | * @adev: amdgpu_device pointer | 1351 | * @adev: amdgpu_device pointer |
1338 | * @vm: requested vm | 1352 | * @vm: requested vm |
1339 | * | 1353 | * |
1340 | * Tear down @vm (cayman+). | 1354 | * Tear down @vm. |
1341 | * Unbind the VM and remove all bos from the vm bo list | 1355 | * Unbind the VM and remove all bos from the vm bo list |
1342 | */ | 1356 | */ |
1343 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 1357 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
@@ -1345,6 +1359,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1345 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 1359 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
1346 | int i; | 1360 | int i; |
1347 | 1361 | ||
1362 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); | ||
1363 | |||
1348 | if (!RB_EMPTY_ROOT(&vm->va)) { | 1364 | if (!RB_EMPTY_ROOT(&vm->va)) { |
1349 | dev_err(adev->dev, "still active bo inside vm\n"); | 1365 | dev_err(adev->dev, "still active bo inside vm\n"); |
1350 | } | 1366 | } |
@@ -1375,6 +1391,27 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1375 | } | 1391 | } |
1376 | 1392 | ||
1377 | /** | 1393 | /** |
1394 | * amdgpu_vm_manager_init - init the VM manager | ||
1395 | * | ||
1396 | * @adev: amdgpu_device pointer | ||
1397 | * | ||
1398 | * Initialize the VM manager structures | ||
1399 | */ | ||
1400 | void amdgpu_vm_manager_init(struct amdgpu_device *adev) | ||
1401 | { | ||
1402 | unsigned i; | ||
1403 | |||
1404 | INIT_LIST_HEAD(&adev->vm_manager.ids_lru); | ||
1405 | |||
1406 | /* skip over VMID 0, since it is the system VM */ | ||
1407 | for (i = 1; i < adev->vm_manager.num_ids; ++i) | ||
1408 | list_add_tail(&adev->vm_manager.ids[i].list, | ||
1409 | &adev->vm_manager.ids_lru); | ||
1410 | |||
1411 | atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); | ||
1412 | } | ||
1413 | |||
1414 | /** | ||
1378 | * amdgpu_vm_manager_fini - cleanup VM manager | 1415 | * amdgpu_vm_manager_fini - cleanup VM manager |
1379 | * | 1416 | * |
1380 | * @adev: amdgpu_device pointer | 1417 | * @adev: amdgpu_device pointer |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fd9c9588ef46..6b1f0539ce9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c | |||
@@ -1059,257 +1059,6 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, | |||
1059 | return -EINVAL; | 1059 | return -EINVAL; |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | static void cik_print_gpu_status_regs(struct amdgpu_device *adev) | ||
1063 | { | ||
1064 | dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", | ||
1065 | RREG32(mmGRBM_STATUS)); | ||
1066 | dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", | ||
1067 | RREG32(mmGRBM_STATUS2)); | ||
1068 | dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", | ||
1069 | RREG32(mmGRBM_STATUS_SE0)); | ||
1070 | dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", | ||
1071 | RREG32(mmGRBM_STATUS_SE1)); | ||
1072 | dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", | ||
1073 | RREG32(mmGRBM_STATUS_SE2)); | ||
1074 | dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", | ||
1075 | RREG32(mmGRBM_STATUS_SE3)); | ||
1076 | dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", | ||
1077 | RREG32(mmSRBM_STATUS)); | ||
1078 | dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", | ||
1079 | RREG32(mmSRBM_STATUS2)); | ||
1080 | dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", | ||
1081 | RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); | ||
1082 | dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", | ||
1083 | RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); | ||
1084 | dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); | ||
1085 | dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", | ||
1086 | RREG32(mmCP_STALLED_STAT1)); | ||
1087 | dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", | ||
1088 | RREG32(mmCP_STALLED_STAT2)); | ||
1089 | dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", | ||
1090 | RREG32(mmCP_STALLED_STAT3)); | ||
1091 | dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", | ||
1092 | RREG32(mmCP_CPF_BUSY_STAT)); | ||
1093 | dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", | ||
1094 | RREG32(mmCP_CPF_STALLED_STAT1)); | ||
1095 | dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); | ||
1096 | dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); | ||
1097 | dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", | ||
1098 | RREG32(mmCP_CPC_STALLED_STAT1)); | ||
1099 | dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); | ||
1100 | } | ||
1101 | |||
1102 | /** | ||
1103 | * cik_gpu_check_soft_reset - check which blocks are busy | ||
1104 | * | ||
1105 | * @adev: amdgpu_device pointer | ||
1106 | * | ||
1107 | * Check which blocks are busy and return the relevant reset | ||
1108 | * mask to be used by cik_gpu_soft_reset(). | ||
1109 | * Returns a mask of the blocks to be reset. | ||
1110 | */ | ||
1111 | u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev) | ||
1112 | { | ||
1113 | u32 reset_mask = 0; | ||
1114 | u32 tmp; | ||
1115 | |||
1116 | /* GRBM_STATUS */ | ||
1117 | tmp = RREG32(mmGRBM_STATUS); | ||
1118 | if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | | ||
1119 | GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | | ||
1120 | GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | | ||
1121 | GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | | ||
1122 | GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | | ||
1123 | GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) | ||
1124 | reset_mask |= AMDGPU_RESET_GFX; | ||
1125 | |||
1126 | if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) | ||
1127 | reset_mask |= AMDGPU_RESET_CP; | ||
1128 | |||
1129 | /* GRBM_STATUS2 */ | ||
1130 | tmp = RREG32(mmGRBM_STATUS2); | ||
1131 | if (tmp & GRBM_STATUS2__RLC_BUSY_MASK) | ||
1132 | reset_mask |= AMDGPU_RESET_RLC; | ||
1133 | |||
1134 | /* SDMA0_STATUS_REG */ | ||
1135 | tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); | ||
1136 | if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) | ||
1137 | reset_mask |= AMDGPU_RESET_DMA; | ||
1138 | |||
1139 | /* SDMA1_STATUS_REG */ | ||
1140 | tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); | ||
1141 | if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) | ||
1142 | reset_mask |= AMDGPU_RESET_DMA1; | ||
1143 | |||
1144 | /* SRBM_STATUS2 */ | ||
1145 | tmp = RREG32(mmSRBM_STATUS2); | ||
1146 | if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) | ||
1147 | reset_mask |= AMDGPU_RESET_DMA; | ||
1148 | |||
1149 | if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) | ||
1150 | reset_mask |= AMDGPU_RESET_DMA1; | ||
1151 | |||
1152 | /* SRBM_STATUS */ | ||
1153 | tmp = RREG32(mmSRBM_STATUS); | ||
1154 | |||
1155 | if (tmp & SRBM_STATUS__IH_BUSY_MASK) | ||
1156 | reset_mask |= AMDGPU_RESET_IH; | ||
1157 | |||
1158 | if (tmp & SRBM_STATUS__SEM_BUSY_MASK) | ||
1159 | reset_mask |= AMDGPU_RESET_SEM; | ||
1160 | |||
1161 | if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK) | ||
1162 | reset_mask |= AMDGPU_RESET_GRBM; | ||
1163 | |||
1164 | if (tmp & SRBM_STATUS__VMC_BUSY_MASK) | ||
1165 | reset_mask |= AMDGPU_RESET_VMC; | ||
1166 | |||
1167 | if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | | ||
1168 | SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) | ||
1169 | reset_mask |= AMDGPU_RESET_MC; | ||
1170 | |||
1171 | if (amdgpu_display_is_display_hung(adev)) | ||
1172 | reset_mask |= AMDGPU_RESET_DISPLAY; | ||
1173 | |||
1174 | /* Skip MC reset as it's mostly likely not hung, just busy */ | ||
1175 | if (reset_mask & AMDGPU_RESET_MC) { | ||
1176 | DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); | ||
1177 | reset_mask &= ~AMDGPU_RESET_MC; | ||
1178 | } | ||
1179 | |||
1180 | return reset_mask; | ||
1181 | } | ||
1182 | |||
1183 | /** | ||
1184 | * cik_gpu_soft_reset - soft reset GPU | ||
1185 | * | ||
1186 | * @adev: amdgpu_device pointer | ||
1187 | * @reset_mask: mask of which blocks to reset | ||
1188 | * | ||
1189 | * Soft reset the blocks specified in @reset_mask. | ||
1190 | */ | ||
1191 | static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask) | ||
1192 | { | ||
1193 | struct amdgpu_mode_mc_save save; | ||
1194 | u32 grbm_soft_reset = 0, srbm_soft_reset = 0; | ||
1195 | u32 tmp; | ||
1196 | |||
1197 | if (reset_mask == 0) | ||
1198 | return; | ||
1199 | |||
1200 | dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask); | ||
1201 | |||
1202 | cik_print_gpu_status_regs(adev); | ||
1203 | dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", | ||
1204 | RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); | ||
1205 | dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", | ||
1206 | RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); | ||
1207 | |||
1208 | /* disable CG/PG */ | ||
1209 | |||
1210 | /* stop the rlc */ | ||
1211 | gfx_v7_0_rlc_stop(adev); | ||
1212 | |||
1213 | /* Disable GFX parsing/prefetching */ | ||
1214 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); | ||
1215 | |||
1216 | /* Disable MEC parsing/prefetching */ | ||
1217 | WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); | ||
1218 | |||
1219 | if (reset_mask & AMDGPU_RESET_DMA) { | ||
1220 | /* sdma0 */ | ||
1221 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); | ||
1222 | tmp |= SDMA0_F32_CNTL__HALT_MASK; | ||
1223 | WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); | ||
1224 | } | ||
1225 | if (reset_mask & AMDGPU_RESET_DMA1) { | ||
1226 | /* sdma1 */ | ||
1227 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); | ||
1228 | tmp |= SDMA0_F32_CNTL__HALT_MASK; | ||
1229 | WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); | ||
1230 | } | ||
1231 | |||
1232 | gmc_v7_0_mc_stop(adev, &save); | ||
1233 | if (amdgpu_asic_wait_for_mc_idle(adev)) { | ||
1234 | dev_warn(adev->dev, "Wait for MC idle timedout !\n"); | ||
1235 | } | ||
1236 | |||
1237 | if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) | ||
1238 | grbm_soft_reset = GRBM_SOFT_RESET__SOFT_RESET_CP_MASK | | ||
1239 | GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK; | ||
1240 | |||
1241 | if (reset_mask & AMDGPU_RESET_CP) { | ||
1242 | grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK; | ||
1243 | |||
1244 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; | ||
1245 | } | ||
1246 | |||
1247 | if (reset_mask & AMDGPU_RESET_DMA) | ||
1248 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; | ||
1249 | |||
1250 | if (reset_mask & AMDGPU_RESET_DMA1) | ||
1251 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; | ||
1252 | |||
1253 | if (reset_mask & AMDGPU_RESET_DISPLAY) | ||
1254 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; | ||
1255 | |||
1256 | if (reset_mask & AMDGPU_RESET_RLC) | ||
1257 | grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK; | ||
1258 | |||
1259 | if (reset_mask & AMDGPU_RESET_SEM) | ||
1260 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SEM_MASK; | ||
1261 | |||
1262 | if (reset_mask & AMDGPU_RESET_IH) | ||
1263 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; | ||
1264 | |||
1265 | if (reset_mask & AMDGPU_RESET_GRBM) | ||
1266 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; | ||
1267 | |||
1268 | if (reset_mask & AMDGPU_RESET_VMC) | ||
1269 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK; | ||
1270 | |||
1271 | if (!(adev->flags & AMD_IS_APU)) { | ||
1272 | if (reset_mask & AMDGPU_RESET_MC) | ||
1273 | srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK; | ||
1274 | } | ||
1275 | |||
1276 | if (grbm_soft_reset) { | ||
1277 | tmp = RREG32(mmGRBM_SOFT_RESET); | ||
1278 | tmp |= grbm_soft_reset; | ||
1279 | dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); | ||
1280 | WREG32(mmGRBM_SOFT_RESET, tmp); | ||
1281 | tmp = RREG32(mmGRBM_SOFT_RESET); | ||
1282 | |||
1283 | udelay(50); | ||
1284 | |||
1285 | tmp &= ~grbm_soft_reset; | ||
1286 | WREG32(mmGRBM_SOFT_RESET, tmp); | ||
1287 | tmp = RREG32(mmGRBM_SOFT_RESET); | ||
1288 | } | ||
1289 | |||
1290 | if (srbm_soft_reset) { | ||
1291 | tmp = RREG32(mmSRBM_SOFT_RESET); | ||
1292 | tmp |= srbm_soft_reset; | ||
1293 | dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); | ||
1294 | WREG32(mmSRBM_SOFT_RESET, tmp); | ||
1295 | tmp = RREG32(mmSRBM_SOFT_RESET); | ||
1296 | |||
1297 | udelay(50); | ||
1298 | |||
1299 | tmp &= ~srbm_soft_reset; | ||
1300 | WREG32(mmSRBM_SOFT_RESET, tmp); | ||
1301 | tmp = RREG32(mmSRBM_SOFT_RESET); | ||
1302 | } | ||
1303 | |||
1304 | /* Wait a little for things to settle down */ | ||
1305 | udelay(50); | ||
1306 | |||
1307 | gmc_v7_0_mc_resume(adev, &save); | ||
1308 | udelay(50); | ||
1309 | |||
1310 | cik_print_gpu_status_regs(adev); | ||
1311 | } | ||
1312 | |||
1313 | struct kv_reset_save_regs { | 1062 | struct kv_reset_save_regs { |
1314 | u32 gmcon_reng_execute; | 1063 | u32 gmcon_reng_execute; |
1315 | u32 gmcon_misc; | 1064 | u32 gmcon_misc; |
@@ -1405,45 +1154,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev, | |||
1405 | 1154 | ||
1406 | static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) | 1155 | static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) |
1407 | { | 1156 | { |
1408 | struct amdgpu_mode_mc_save save; | ||
1409 | struct kv_reset_save_regs kv_save = { 0 }; | 1157 | struct kv_reset_save_regs kv_save = { 0 }; |
1410 | u32 tmp, i; | 1158 | u32 i; |
1411 | 1159 | ||
1412 | dev_info(adev->dev, "GPU pci config reset\n"); | 1160 | dev_info(adev->dev, "GPU pci config reset\n"); |
1413 | 1161 | ||
1414 | /* disable dpm? */ | ||
1415 | |||
1416 | /* disable cg/pg */ | ||
1417 | |||
1418 | /* Disable GFX parsing/prefetching */ | ||
1419 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | | ||
1420 | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); | ||
1421 | |||
1422 | /* Disable MEC parsing/prefetching */ | ||
1423 | WREG32(mmCP_MEC_CNTL, | ||
1424 | CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); | ||
1425 | |||
1426 | /* sdma0 */ | ||
1427 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); | ||
1428 | tmp |= SDMA0_F32_CNTL__HALT_MASK; | ||
1429 | WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); | ||
1430 | /* sdma1 */ | ||
1431 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); | ||
1432 | tmp |= SDMA0_F32_CNTL__HALT_MASK; | ||
1433 | WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); | ||
1434 | /* XXX other engines? */ | ||
1435 | |||
1436 | /* halt the rlc, disable cp internal ints */ | ||
1437 | gfx_v7_0_rlc_stop(adev); | ||
1438 | |||
1439 | udelay(50); | ||
1440 | |||
1441 | /* disable mem access */ | ||
1442 | gmc_v7_0_mc_stop(adev, &save); | ||
1443 | if (amdgpu_asic_wait_for_mc_idle(adev)) { | ||
1444 | dev_warn(adev->dev, "Wait for MC idle timed out !\n"); | ||
1445 | } | ||
1446 | |||
1447 | if (adev->flags & AMD_IS_APU) | 1162 | if (adev->flags & AMD_IS_APU) |
1448 | kv_save_regs_for_reset(adev, &kv_save); | 1163 | kv_save_regs_for_reset(adev, &kv_save); |
1449 | 1164 | ||
@@ -1489,26 +1204,11 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu | |||
1489 | */ | 1204 | */ |
1490 | static int cik_asic_reset(struct amdgpu_device *adev) | 1205 | static int cik_asic_reset(struct amdgpu_device *adev) |
1491 | { | 1206 | { |
1492 | u32 reset_mask; | 1207 | cik_set_bios_scratch_engine_hung(adev, true); |
1493 | |||
1494 | reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); | ||
1495 | |||
1496 | if (reset_mask) | ||
1497 | cik_set_bios_scratch_engine_hung(adev, true); | ||
1498 | |||
1499 | /* try soft reset */ | ||
1500 | cik_gpu_soft_reset(adev, reset_mask); | ||
1501 | |||
1502 | reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); | ||
1503 | |||
1504 | /* try pci config reset */ | ||
1505 | if (reset_mask && amdgpu_hard_reset) | ||
1506 | cik_gpu_pci_config_reset(adev); | ||
1507 | 1208 | ||
1508 | reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); | 1209 | cik_gpu_pci_config_reset(adev); |
1509 | 1210 | ||
1510 | if (!reset_mask) | 1211 | cik_set_bios_scratch_engine_hung(adev, false); |
1511 | cik_set_bios_scratch_engine_hung(adev, false); | ||
1512 | 1212 | ||
1513 | return 0; | 1213 | return 0; |
1514 | } | 1214 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 5f712ceddf08..675f34916aab 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c | |||
@@ -295,30 +295,6 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq | |||
295 | } | 295 | } |
296 | 296 | ||
297 | /** | 297 | /** |
298 | * cik_sdma_ring_emit_semaphore - emit a semaphore on the dma ring | ||
299 | * | ||
300 | * @ring: amdgpu_ring structure holding ring information | ||
301 | * @semaphore: amdgpu semaphore object | ||
302 | * @emit_wait: wait or signal semaphore | ||
303 | * | ||
304 | * Add a DMA semaphore packet to the ring wait on or signal | ||
305 | * other rings (CIK). | ||
306 | */ | ||
307 | static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
308 | struct amdgpu_semaphore *semaphore, | ||
309 | bool emit_wait) | ||
310 | { | ||
311 | u64 addr = semaphore->gpu_addr; | ||
312 | u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; | ||
313 | |||
314 | amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); | ||
315 | amdgpu_ring_write(ring, addr & 0xfffffff8); | ||
316 | amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); | ||
317 | |||
318 | return true; | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * cik_sdma_gfx_stop - stop the gfx async dma engines | 298 | * cik_sdma_gfx_stop - stop the gfx async dma engines |
323 | * | 299 | * |
324 | * @adev: amdgpu_device pointer | 300 | * @adev: amdgpu_device pointer |
@@ -417,6 +393,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) | |||
417 | cik_srbm_select(adev, 0, 0, 0, 0); | 393 | cik_srbm_select(adev, 0, 0, 0, 0); |
418 | mutex_unlock(&adev->srbm_mutex); | 394 | mutex_unlock(&adev->srbm_mutex); |
419 | 395 | ||
396 | WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], | ||
397 | adev->gfx.config.gb_addr_config & 0x70); | ||
398 | |||
420 | WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); | 399 | WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); |
421 | WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); | 400 | WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); |
422 | 401 | ||
@@ -584,7 +563,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) | |||
584 | tmp = 0xCAFEDEAD; | 563 | tmp = 0xCAFEDEAD; |
585 | adev->wb.wb[index] = cpu_to_le32(tmp); | 564 | adev->wb.wb[index] = cpu_to_le32(tmp); |
586 | 565 | ||
587 | r = amdgpu_ring_lock(ring, 5); | 566 | r = amdgpu_ring_alloc(ring, 5); |
588 | if (r) { | 567 | if (r) { |
589 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 568 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
590 | amdgpu_wb_free(adev, index); | 569 | amdgpu_wb_free(adev, index); |
@@ -595,7 +574,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) | |||
595 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); | 574 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); |
596 | amdgpu_ring_write(ring, 1); /* number of DWs to follow */ | 575 | amdgpu_ring_write(ring, 1); /* number of DWs to follow */ |
597 | amdgpu_ring_write(ring, 0xDEADBEEF); | 576 | amdgpu_ring_write(ring, 0xDEADBEEF); |
598 | amdgpu_ring_unlock_commit(ring); | 577 | amdgpu_ring_commit(ring); |
599 | 578 | ||
600 | for (i = 0; i < adev->usec_timeout; i++) { | 579 | for (i = 0; i < adev->usec_timeout; i++) { |
601 | tmp = le32_to_cpu(adev->wb.wb[index]); | 580 | tmp = le32_to_cpu(adev->wb.wb[index]); |
@@ -645,7 +624,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) | |||
645 | tmp = 0xCAFEDEAD; | 624 | tmp = 0xCAFEDEAD; |
646 | adev->wb.wb[index] = cpu_to_le32(tmp); | 625 | adev->wb.wb[index] = cpu_to_le32(tmp); |
647 | memset(&ib, 0, sizeof(ib)); | 626 | memset(&ib, 0, sizeof(ib)); |
648 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 627 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
649 | if (r) { | 628 | if (r) { |
650 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 629 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
651 | goto err0; | 630 | goto err0; |
@@ -657,9 +636,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) | |||
657 | ib.ptr[3] = 1; | 636 | ib.ptr[3] = 1; |
658 | ib.ptr[4] = 0xDEADBEEF; | 637 | ib.ptr[4] = 0xDEADBEEF; |
659 | ib.length_dw = 5; | 638 | ib.length_dw = 5; |
660 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, | 639 | r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
661 | AMDGPU_FENCE_OWNER_UNDEFINED, | 640 | NULL, &f); |
662 | &f); | ||
663 | if (r) | 641 | if (r) |
664 | goto err1; | 642 | goto err1; |
665 | 643 | ||
@@ -738,7 +716,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib, | |||
738 | * Update PTEs by writing them manually using sDMA (CIK). | 716 | * Update PTEs by writing them manually using sDMA (CIK). |
739 | */ | 717 | */ |
740 | static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, | 718 | static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, |
741 | uint64_t pe, | 719 | const dma_addr_t *pages_addr, uint64_t pe, |
742 | uint64_t addr, unsigned count, | 720 | uint64_t addr, unsigned count, |
743 | uint32_t incr, uint32_t flags) | 721 | uint32_t incr, uint32_t flags) |
744 | { | 722 | { |
@@ -757,14 +735,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, | |||
757 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | 735 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); |
758 | ib->ptr[ib->length_dw++] = ndw; | 736 | ib->ptr[ib->length_dw++] = ndw; |
759 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | 737 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
760 | if (flags & AMDGPU_PTE_SYSTEM) { | 738 | value = amdgpu_vm_map_gart(pages_addr, addr); |
761 | value = amdgpu_vm_map_gart(ib->ring->adev, addr); | ||
762 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
763 | } else if (flags & AMDGPU_PTE_VALID) { | ||
764 | value = addr; | ||
765 | } else { | ||
766 | value = 0; | ||
767 | } | ||
768 | addr += incr; | 739 | addr += incr; |
769 | value |= flags; | 740 | value |= flags; |
770 | ib->ptr[ib->length_dw++] = value; | 741 | ib->ptr[ib->length_dw++] = value; |
@@ -827,9 +798,9 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, | |||
827 | * @ib: indirect buffer to fill with padding | 798 | * @ib: indirect buffer to fill with padding |
828 | * | 799 | * |
829 | */ | 800 | */ |
830 | static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) | 801 | static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
831 | { | 802 | { |
832 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); | 803 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
833 | u32 pad_count; | 804 | u32 pad_count; |
834 | int i; | 805 | int i; |
835 | 806 | ||
@@ -1097,6 +1068,8 @@ static void cik_sdma_print_status(void *handle) | |||
1097 | i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); | 1068 | i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); |
1098 | dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", | 1069 | dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", |
1099 | i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); | 1070 | i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); |
1071 | dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", | ||
1072 | i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); | ||
1100 | mutex_lock(&adev->srbm_mutex); | 1073 | mutex_lock(&adev->srbm_mutex); |
1101 | for (j = 0; j < 16; j++) { | 1074 | for (j = 0; j < 16; j++) { |
1102 | cik_srbm_select(adev, 0, 0, 0, j); | 1075 | cik_srbm_select(adev, 0, 0, 0, j); |
@@ -1297,12 +1270,12 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { | |||
1297 | .parse_cs = NULL, | 1270 | .parse_cs = NULL, |
1298 | .emit_ib = cik_sdma_ring_emit_ib, | 1271 | .emit_ib = cik_sdma_ring_emit_ib, |
1299 | .emit_fence = cik_sdma_ring_emit_fence, | 1272 | .emit_fence = cik_sdma_ring_emit_fence, |
1300 | .emit_semaphore = cik_sdma_ring_emit_semaphore, | ||
1301 | .emit_vm_flush = cik_sdma_ring_emit_vm_flush, | 1273 | .emit_vm_flush = cik_sdma_ring_emit_vm_flush, |
1302 | .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, | 1274 | .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, |
1303 | .test_ring = cik_sdma_ring_test_ring, | 1275 | .test_ring = cik_sdma_ring_test_ring, |
1304 | .test_ib = cik_sdma_ring_test_ib, | 1276 | .test_ib = cik_sdma_ring_test_ib, |
1305 | .insert_nop = cik_sdma_ring_insert_nop, | 1277 | .insert_nop = cik_sdma_ring_insert_nop, |
1278 | .pad_ib = cik_sdma_ring_pad_ib, | ||
1306 | }; | 1279 | }; |
1307 | 1280 | ||
1308 | static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) | 1281 | static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) |
@@ -1399,14 +1372,18 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { | |||
1399 | .copy_pte = cik_sdma_vm_copy_pte, | 1372 | .copy_pte = cik_sdma_vm_copy_pte, |
1400 | .write_pte = cik_sdma_vm_write_pte, | 1373 | .write_pte = cik_sdma_vm_write_pte, |
1401 | .set_pte_pde = cik_sdma_vm_set_pte_pde, | 1374 | .set_pte_pde = cik_sdma_vm_set_pte_pde, |
1402 | .pad_ib = cik_sdma_vm_pad_ib, | ||
1403 | }; | 1375 | }; |
1404 | 1376 | ||
1405 | static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) | 1377 | static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) |
1406 | { | 1378 | { |
1379 | unsigned i; | ||
1380 | |||
1407 | if (adev->vm_manager.vm_pte_funcs == NULL) { | 1381 | if (adev->vm_manager.vm_pte_funcs == NULL) { |
1408 | adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; | 1382 | adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; |
1409 | adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; | 1383 | for (i = 0; i < adev->sdma.num_instances; i++) |
1410 | adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; | 1384 | adev->vm_manager.vm_pte_rings[i] = |
1385 | &adev->sdma.instance[i].ring; | ||
1386 | |||
1387 | adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; | ||
1411 | } | 1388 | } |
1412 | } | 1389 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 34830189311e..e3ff809a0cae 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | |||
@@ -2670,7 +2670,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc) | |||
2670 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 2670 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
2671 | 2671 | ||
2672 | drm_crtc_cleanup(crtc); | 2672 | drm_crtc_cleanup(crtc); |
2673 | destroy_workqueue(amdgpu_crtc->pflip_queue); | ||
2674 | kfree(amdgpu_crtc); | 2673 | kfree(amdgpu_crtc); |
2675 | } | 2674 | } |
2676 | 2675 | ||
@@ -2890,7 +2889,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) | |||
2890 | 2889 | ||
2891 | drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); | 2890 | drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); |
2892 | amdgpu_crtc->crtc_id = index; | 2891 | amdgpu_crtc->crtc_id = index; |
2893 | amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue"); | ||
2894 | adev->mode_info.crtcs[index] = amdgpu_crtc; | 2892 | adev->mode_info.crtcs[index] = amdgpu_crtc; |
2895 | 2893 | ||
2896 | amdgpu_crtc->max_cursor_width = 128; | 2894 | amdgpu_crtc->max_cursor_width = 128; |
@@ -3366,7 +3364,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev, | |||
3366 | spin_unlock_irqrestore(&adev->ddev->event_lock, flags); | 3364 | spin_unlock_irqrestore(&adev->ddev->event_lock, flags); |
3367 | 3365 | ||
3368 | drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); | 3366 | drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); |
3369 | queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); | 3367 | schedule_work(&works->unpin_work); |
3370 | 3368 | ||
3371 | return 0; | 3369 | return 0; |
3372 | } | 3370 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 36deea162779..6b6c9b6879ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | |||
@@ -2661,7 +2661,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc) | |||
2661 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 2661 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
2662 | 2662 | ||
2663 | drm_crtc_cleanup(crtc); | 2663 | drm_crtc_cleanup(crtc); |
2664 | destroy_workqueue(amdgpu_crtc->pflip_queue); | ||
2665 | kfree(amdgpu_crtc); | 2664 | kfree(amdgpu_crtc); |
2666 | } | 2665 | } |
2667 | 2666 | ||
@@ -2881,7 +2880,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) | |||
2881 | 2880 | ||
2882 | drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); | 2881 | drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); |
2883 | amdgpu_crtc->crtc_id = index; | 2882 | amdgpu_crtc->crtc_id = index; |
2884 | amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue"); | ||
2885 | adev->mode_info.crtcs[index] = amdgpu_crtc; | 2883 | adev->mode_info.crtcs[index] = amdgpu_crtc; |
2886 | 2884 | ||
2887 | amdgpu_crtc->max_cursor_width = 128; | 2885 | amdgpu_crtc->max_cursor_width = 128; |
@@ -3361,7 +3359,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev, | |||
3361 | spin_unlock_irqrestore(&adev->ddev->event_lock, flags); | 3359 | spin_unlock_irqrestore(&adev->ddev->event_lock, flags); |
3362 | 3360 | ||
3363 | drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); | 3361 | drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); |
3364 | queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); | 3362 | schedule_work(&works->unpin_work); |
3365 | 3363 | ||
3366 | return 0; | 3364 | return 0; |
3367 | } | 3365 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 25dd8b668ea5..56bea36a6b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | |||
@@ -2582,7 +2582,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc) | |||
2582 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 2582 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
2583 | 2583 | ||
2584 | drm_crtc_cleanup(crtc); | 2584 | drm_crtc_cleanup(crtc); |
2585 | destroy_workqueue(amdgpu_crtc->pflip_queue); | ||
2586 | kfree(amdgpu_crtc); | 2585 | kfree(amdgpu_crtc); |
2587 | } | 2586 | } |
2588 | 2587 | ||
@@ -2809,7 +2808,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) | |||
2809 | 2808 | ||
2810 | drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); | 2809 | drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); |
2811 | amdgpu_crtc->crtc_id = index; | 2810 | amdgpu_crtc->crtc_id = index; |
2812 | amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue"); | ||
2813 | adev->mode_info.crtcs[index] = amdgpu_crtc; | 2811 | adev->mode_info.crtcs[index] = amdgpu_crtc; |
2814 | 2812 | ||
2815 | amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH; | 2813 | amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH; |
@@ -3375,7 +3373,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev, | |||
3375 | spin_unlock_irqrestore(&adev->ddev->event_lock, flags); | 3373 | spin_unlock_irqrestore(&adev->ddev->event_lock, flags); |
3376 | 3374 | ||
3377 | drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); | 3375 | drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); |
3378 | queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); | 3376 | schedule_work(&works->unpin_work); |
3379 | 3377 | ||
3380 | return 0; | 3378 | return 0; |
3381 | } | 3379 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c index e35340afd3db..b336c918d6a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c | |||
@@ -272,6 +272,12 @@ static int fiji_smu_upload_firmware_image(struct amdgpu_device *adev) | |||
272 | if (!adev->pm.fw) | 272 | if (!adev->pm.fw) |
273 | return -EINVAL; | 273 | return -EINVAL; |
274 | 274 | ||
275 | /* Skip SMC ucode loading on SR-IOV capable boards. | ||
276 | * vbios does this for us in asic_init in that case. | ||
277 | */ | ||
278 | if (adev->virtualization.supports_sr_iov) | ||
279 | return 0; | ||
280 | |||
275 | hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; | 281 | hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; |
276 | amdgpu_ucode_print_smc_hdr(&hdr->header); | 282 | amdgpu_ucode_print_smc_hdr(&hdr->header); |
277 | 283 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 6c76139de1c9..250bcbce7fdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
@@ -31,8 +31,6 @@ | |||
31 | #include "amdgpu_ucode.h" | 31 | #include "amdgpu_ucode.h" |
32 | #include "clearstate_ci.h" | 32 | #include "clearstate_ci.h" |
33 | 33 | ||
34 | #include "uvd/uvd_4_2_d.h" | ||
35 | |||
36 | #include "dce/dce_8_0_d.h" | 34 | #include "dce/dce_8_0_d.h" |
37 | #include "dce/dce_8_0_sh_mask.h" | 35 | #include "dce/dce_8_0_sh_mask.h" |
38 | 36 | ||
@@ -1006,9 +1004,15 @@ out: | |||
1006 | */ | 1004 | */ |
1007 | static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) | 1005 | static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) |
1008 | { | 1006 | { |
1009 | const u32 num_tile_mode_states = 32; | 1007 | const u32 num_tile_mode_states = |
1010 | const u32 num_secondary_tile_mode_states = 16; | 1008 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); |
1011 | u32 reg_offset, gb_tile_moden, split_equal_to_row_size; | 1009 | const u32 num_secondary_tile_mode_states = |
1010 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | ||
1011 | u32 reg_offset, split_equal_to_row_size; | ||
1012 | uint32_t *tile, *macrotile; | ||
1013 | |||
1014 | tile = adev->gfx.config.tile_mode_array; | ||
1015 | macrotile = adev->gfx.config.macrotile_mode_array; | ||
1012 | 1016 | ||
1013 | switch (adev->gfx.config.mem_row_size_in_kb) { | 1017 | switch (adev->gfx.config.mem_row_size_in_kb) { |
1014 | case 1: | 1018 | case 1: |
@@ -1023,832 +1027,531 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) | |||
1023 | break; | 1027 | break; |
1024 | } | 1028 | } |
1025 | 1029 | ||
1030 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) | ||
1031 | tile[reg_offset] = 0; | ||
1032 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) | ||
1033 | macrotile[reg_offset] = 0; | ||
1034 | |||
1026 | switch (adev->asic_type) { | 1035 | switch (adev->asic_type) { |
1027 | case CHIP_BONAIRE: | 1036 | case CHIP_BONAIRE: |
1028 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 1037 | tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1029 | switch (reg_offset) { | 1038 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1030 | case 0: | 1039 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
1031 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1040 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1032 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1041 | tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1033 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 1042 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1034 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1043 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | |
1035 | break; | 1044 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1036 | case 1: | 1045 | tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1037 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1046 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1038 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1047 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
1039 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | 1048 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1040 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1049 | tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1041 | break; | 1050 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1042 | case 2: | 1051 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
1043 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1052 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1044 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1053 | tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1045 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 1054 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1046 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1055 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1047 | break; | 1056 | TILE_SPLIT(split_equal_to_row_size)); |
1048 | case 3: | 1057 | tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1049 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1058 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1050 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1059 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1051 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 1060 | tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1052 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1061 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1053 | break; | 1062 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1054 | case 4: | 1063 | TILE_SPLIT(split_equal_to_row_size)); |
1055 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1064 | tile[7] = (TILE_SPLIT(split_equal_to_row_size)); |
1056 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1065 | tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | |
1057 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1066 | PIPE_CONFIG(ADDR_SURF_P4_16x16)); |
1058 | TILE_SPLIT(split_equal_to_row_size)); | 1067 | tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1059 | break; | 1068 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1060 | case 5: | 1069 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); |
1061 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1070 | tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1062 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1071 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1063 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1072 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1064 | break; | 1073 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1065 | case 6: | 1074 | tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1066 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1075 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1067 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1076 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1068 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1077 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1069 | TILE_SPLIT(split_equal_to_row_size)); | 1078 | tile[12] = (TILE_SPLIT(split_equal_to_row_size)); |
1070 | break; | 1079 | tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1071 | case 7: | 1080 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1072 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1081 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); |
1073 | break; | 1082 | tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1074 | 1083 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | |
1075 | case 8: | 1084 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1076 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | | 1085 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1077 | PIPE_CONFIG(ADDR_SURF_P4_16x16)); | 1086 | tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | |
1078 | break; | 1087 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1079 | case 9: | 1088 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1080 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1089 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1081 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1090 | tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1082 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); | 1091 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1083 | break; | 1092 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1084 | case 10: | 1093 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1085 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1094 | tile[17] = (TILE_SPLIT(split_equal_to_row_size)); |
1086 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1095 | tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
1087 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1096 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1088 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1097 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1089 | break; | 1098 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1090 | case 11: | 1099 | tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
1091 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1100 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1092 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1101 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); |
1093 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1102 | tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
1094 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1103 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1095 | break; | 1104 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1096 | case 12: | 1105 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1097 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1106 | tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | |
1098 | break; | 1107 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1099 | case 13: | 1108 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1100 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1109 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1101 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1110 | tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | |
1102 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); | 1111 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1103 | break; | 1112 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1104 | case 14: | 1113 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1105 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1114 | tile[23] = (TILE_SPLIT(split_equal_to_row_size)); |
1106 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1115 | tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
1107 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1116 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1108 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1117 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1109 | break; | 1118 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1110 | case 15: | 1119 | tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | |
1111 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | | 1120 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1112 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1121 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1113 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1122 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1114 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1123 | tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | |
1115 | break; | 1124 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1116 | case 16: | 1125 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1117 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1126 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1118 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1127 | tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1119 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1128 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1120 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1129 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); |
1121 | break; | 1130 | tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1122 | case 17: | 1131 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1123 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1132 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1124 | break; | 1133 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1125 | case 18: | 1134 | tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1126 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1135 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1127 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1136 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1128 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1137 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1129 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1138 | tile[30] = (TILE_SPLIT(split_equal_to_row_size)); |
1130 | break; | 1139 | |
1131 | case 19: | 1140 | macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1132 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1141 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1133 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1142 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1134 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); | 1143 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1135 | break; | 1144 | macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1136 | case 20: | 1145 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1137 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1146 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1138 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1147 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1139 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1148 | macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1140 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1149 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1141 | break; | 1150 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1142 | case 21: | 1151 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1143 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | | 1152 | macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1144 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1153 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1145 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1154 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1146 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1155 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1147 | break; | 1156 | macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1148 | case 22: | 1157 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1149 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | | 1158 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1150 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1159 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1151 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1160 | macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1152 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1161 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1153 | break; | 1162 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1154 | case 23: | 1163 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1155 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1164 | macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1156 | break; | 1165 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1157 | case 24: | 1166 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1158 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1167 | NUM_BANKS(ADDR_SURF_4_BANK)); |
1159 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1168 | macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | |
1160 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1169 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | |
1161 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1170 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1162 | break; | 1171 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1163 | case 25: | 1172 | macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | |
1164 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | 1173 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1165 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1174 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1166 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1175 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1167 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1176 | macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1168 | break; | 1177 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1169 | case 26: | 1178 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1170 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | | 1179 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1171 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1180 | macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1172 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1181 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1173 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1182 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1174 | break; | 1183 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1175 | case 27: | 1184 | macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1176 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1185 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1177 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1186 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1178 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); | 1187 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1179 | break; | 1188 | macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1180 | case 28: | 1189 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1181 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1190 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1182 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1191 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1183 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1192 | macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1184 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1193 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1185 | break; | 1194 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1186 | case 29: | 1195 | NUM_BANKS(ADDR_SURF_4_BANK)); |
1187 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1196 | |
1188 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1197 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
1189 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1198 | WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]); |
1190 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1199 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) |
1191 | break; | 1200 | if (reg_offset != 7) |
1192 | case 30: | 1201 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]); |
1193 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | ||
1194 | break; | ||
1195 | default: | ||
1196 | gb_tile_moden = 0; | ||
1197 | break; | ||
1198 | } | ||
1199 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | ||
1200 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | ||
1201 | } | ||
1202 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { | ||
1203 | switch (reg_offset) { | ||
1204 | case 0: | ||
1205 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1206 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1207 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1208 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1209 | break; | ||
1210 | case 1: | ||
1211 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1212 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1213 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1214 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1215 | break; | ||
1216 | case 2: | ||
1217 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1218 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1219 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1220 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1221 | break; | ||
1222 | case 3: | ||
1223 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1224 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1225 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1226 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1227 | break; | ||
1228 | case 4: | ||
1229 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1230 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1231 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1232 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1233 | break; | ||
1234 | case 5: | ||
1235 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1236 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1237 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1238 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1239 | break; | ||
1240 | case 6: | ||
1241 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1242 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1243 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1244 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
1245 | break; | ||
1246 | case 8: | ||
1247 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
1248 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | | ||
1249 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1250 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1251 | break; | ||
1252 | case 9: | ||
1253 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
1254 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1255 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1256 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1257 | break; | ||
1258 | case 10: | ||
1259 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1260 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1261 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1262 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1263 | break; | ||
1264 | case 11: | ||
1265 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1266 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1267 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1268 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1269 | break; | ||
1270 | case 12: | ||
1271 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1272 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1273 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1274 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1275 | break; | ||
1276 | case 13: | ||
1277 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1278 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1279 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1280 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1281 | break; | ||
1282 | case 14: | ||
1283 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1284 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1285 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1286 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
1287 | break; | ||
1288 | default: | ||
1289 | gb_tile_moden = 0; | ||
1290 | break; | ||
1291 | } | ||
1292 | adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; | ||
1293 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); | ||
1294 | } | ||
1295 | break; | 1202 | break; |
1296 | case CHIP_HAWAII: | 1203 | case CHIP_HAWAII: |
1297 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 1204 | tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1298 | switch (reg_offset) { | 1205 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1299 | case 0: | 1206 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
1300 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1207 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1301 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1208 | tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1302 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 1209 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1303 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1210 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | |
1304 | break; | 1211 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1305 | case 1: | 1212 | tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1306 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1213 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1307 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1214 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
1308 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | 1215 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1309 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1216 | tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1310 | break; | 1217 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1311 | case 2: | 1218 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
1312 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1219 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1313 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1220 | tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1314 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 1221 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1315 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1222 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1316 | break; | 1223 | TILE_SPLIT(split_equal_to_row_size)); |
1317 | case 3: | 1224 | tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1318 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1225 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1319 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1226 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1320 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 1227 | TILE_SPLIT(split_equal_to_row_size)); |
1321 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1228 | tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1322 | break; | 1229 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1323 | case 4: | 1230 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1324 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1231 | TILE_SPLIT(split_equal_to_row_size)); |
1325 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1232 | tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1326 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1233 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1327 | TILE_SPLIT(split_equal_to_row_size)); | 1234 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1328 | break; | 1235 | TILE_SPLIT(split_equal_to_row_size)); |
1329 | case 5: | 1236 | tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | |
1330 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1237 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); |
1331 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1238 | tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1332 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1239 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1333 | TILE_SPLIT(split_equal_to_row_size)); | 1240 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); |
1334 | break; | 1241 | tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1335 | case 6: | 1242 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1336 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1243 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1337 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1244 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1338 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1245 | tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1339 | TILE_SPLIT(split_equal_to_row_size)); | 1246 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1340 | break; | 1247 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1341 | case 7: | 1248 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1342 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1249 | tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | |
1343 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1250 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1344 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1251 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1345 | TILE_SPLIT(split_equal_to_row_size)); | 1252 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1346 | break; | 1253 | tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1347 | 1254 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | |
1348 | case 8: | 1255 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); |
1349 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | | 1256 | tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1350 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); | 1257 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1351 | break; | 1258 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1352 | case 9: | 1259 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1353 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1260 | tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | |
1354 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1261 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1355 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); | 1262 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1356 | break; | 1263 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1357 | case 10: | 1264 | tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1358 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1265 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1359 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1266 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1360 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1267 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1361 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1268 | tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1362 | break; | 1269 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1363 | case 11: | 1270 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1364 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1271 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1365 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1272 | tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
1366 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1273 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1367 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1274 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1368 | break; | 1275 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1369 | case 12: | 1276 | tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
1370 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | | 1277 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1371 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1278 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); |
1372 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1279 | tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
1373 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1280 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1374 | break; | 1281 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1375 | case 13: | 1282 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1376 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1283 | tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | |
1377 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1284 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1378 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); | 1285 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1379 | break; | 1286 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1380 | case 14: | 1287 | tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | |
1381 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1288 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1382 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1289 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1383 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1290 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1384 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1291 | tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | |
1385 | break; | 1292 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1386 | case 15: | 1293 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1387 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | | 1294 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1388 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1295 | tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
1389 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1296 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1390 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1297 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1391 | break; | 1298 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1392 | case 16: | 1299 | tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | |
1393 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1300 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1394 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1301 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1395 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1302 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1396 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1303 | tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | |
1397 | break; | 1304 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1398 | case 17: | 1305 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1399 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1306 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1400 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1307 | tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1401 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1308 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1402 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1309 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); |
1403 | break; | 1310 | tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1404 | case 18: | 1311 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1405 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1312 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1406 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1313 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1407 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1314 | tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1408 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1315 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
1409 | break; | 1316 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1410 | case 19: | 1317 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1411 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1318 | tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1412 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1319 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | |
1413 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); | 1320 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1414 | break; | 1321 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1415 | case 20: | 1322 | |
1416 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1323 | macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1417 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1324 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1418 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1325 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1419 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1326 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1420 | break; | 1327 | macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1421 | case 21: | 1328 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1422 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | | 1329 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1423 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1330 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1424 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1331 | macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1425 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1332 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1426 | break; | 1333 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1427 | case 22: | 1334 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1428 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | | 1335 | macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1429 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1336 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1430 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1337 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1431 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1338 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1432 | break; | 1339 | macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1433 | case 23: | 1340 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1434 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | | 1341 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1435 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1342 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1436 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1343 | macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1437 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1344 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1438 | break; | 1345 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1439 | case 24: | 1346 | NUM_BANKS(ADDR_SURF_4_BANK)); |
1440 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1347 | macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1441 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1348 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1442 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1349 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1443 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1350 | NUM_BANKS(ADDR_SURF_4_BANK)); |
1444 | break; | 1351 | macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1445 | case 25: | 1352 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1446 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | 1353 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1447 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1354 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1448 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1355 | macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1449 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1356 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1450 | break; | 1357 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1451 | case 26: | 1358 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1452 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | | 1359 | macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1453 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1360 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1454 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1361 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1455 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1362 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1456 | break; | 1363 | macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1457 | case 27: | 1364 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1458 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1365 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1459 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1366 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1460 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); | 1367 | macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1461 | break; | 1368 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1462 | case 28: | 1369 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1463 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1370 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1464 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1371 | macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1465 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1372 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1466 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1373 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1467 | break; | 1374 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1468 | case 29: | 1375 | macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1469 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1376 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1470 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 1377 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | |
1471 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1378 | NUM_BANKS(ADDR_SURF_4_BANK)); |
1472 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1379 | |
1473 | break; | 1380 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
1474 | case 30: | 1381 | WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]); |
1475 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1382 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) |
1476 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | 1383 | if (reg_offset != 7) |
1477 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1384 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]); |
1478 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1479 | break; | ||
1480 | default: | ||
1481 | gb_tile_moden = 0; | ||
1482 | break; | ||
1483 | } | ||
1484 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | ||
1485 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | ||
1486 | } | ||
1487 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { | ||
1488 | switch (reg_offset) { | ||
1489 | case 0: | ||
1490 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1491 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1492 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1493 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1494 | break; | ||
1495 | case 1: | ||
1496 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1497 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1498 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1499 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1500 | break; | ||
1501 | case 2: | ||
1502 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1503 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1504 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1505 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1506 | break; | ||
1507 | case 3: | ||
1508 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1509 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1510 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1511 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1512 | break; | ||
1513 | case 4: | ||
1514 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1515 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1516 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1517 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1518 | break; | ||
1519 | case 5: | ||
1520 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1521 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1522 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1523 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
1524 | break; | ||
1525 | case 6: | ||
1526 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1527 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1528 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1529 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
1530 | break; | ||
1531 | case 8: | ||
1532 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1533 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1534 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1535 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1536 | break; | ||
1537 | case 9: | ||
1538 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1539 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1540 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1541 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1542 | break; | ||
1543 | case 10: | ||
1544 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1545 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1546 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1547 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1548 | break; | ||
1549 | case 11: | ||
1550 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1551 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1552 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1553 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1554 | break; | ||
1555 | case 12: | ||
1556 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1557 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1558 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1559 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1560 | break; | ||
1561 | case 13: | ||
1562 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1563 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1564 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1565 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1566 | break; | ||
1567 | case 14: | ||
1568 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1569 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1570 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1571 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
1572 | break; | ||
1573 | default: | ||
1574 | gb_tile_moden = 0; | ||
1575 | break; | ||
1576 | } | ||
1577 | adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; | ||
1578 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); | ||
1579 | } | ||
1580 | break; | 1385 | break; |
1581 | case CHIP_KABINI: | 1386 | case CHIP_KABINI: |
1582 | case CHIP_KAVERI: | 1387 | case CHIP_KAVERI: |
1583 | case CHIP_MULLINS: | 1388 | case CHIP_MULLINS: |
1584 | default: | 1389 | default: |
1585 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 1390 | tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1586 | switch (reg_offset) { | 1391 | PIPE_CONFIG(ADDR_SURF_P2) | |
1587 | case 0: | 1392 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
1588 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1393 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1589 | PIPE_CONFIG(ADDR_SURF_P2) | | 1394 | tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1590 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 1395 | PIPE_CONFIG(ADDR_SURF_P2) | |
1591 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1396 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | |
1592 | break; | 1397 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1593 | case 1: | 1398 | tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1594 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1399 | PIPE_CONFIG(ADDR_SURF_P2) | |
1595 | PIPE_CONFIG(ADDR_SURF_P2) | | 1400 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | |
1596 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | 1401 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1597 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1402 | tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1598 | break; | 1403 | PIPE_CONFIG(ADDR_SURF_P2) | |
1599 | case 2: | 1404 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | |
1600 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1405 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1601 | PIPE_CONFIG(ADDR_SURF_P2) | | 1406 | tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1602 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | 1407 | PIPE_CONFIG(ADDR_SURF_P2) | |
1603 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1408 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1604 | break; | 1409 | TILE_SPLIT(split_equal_to_row_size)); |
1605 | case 3: | 1410 | tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1606 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1411 | PIPE_CONFIG(ADDR_SURF_P2) | |
1607 | PIPE_CONFIG(ADDR_SURF_P2) | | 1412 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); |
1608 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | 1413 | tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1609 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1414 | PIPE_CONFIG(ADDR_SURF_P2) | |
1610 | break; | 1415 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | |
1611 | case 4: | 1416 | TILE_SPLIT(split_equal_to_row_size)); |
1612 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1417 | tile[7] = (TILE_SPLIT(split_equal_to_row_size)); |
1613 | PIPE_CONFIG(ADDR_SURF_P2) | | 1418 | tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | |
1614 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1419 | PIPE_CONFIG(ADDR_SURF_P2)); |
1615 | TILE_SPLIT(split_equal_to_row_size)); | 1420 | tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1616 | break; | 1421 | PIPE_CONFIG(ADDR_SURF_P2) | |
1617 | case 5: | 1422 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); |
1618 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1423 | tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1619 | PIPE_CONFIG(ADDR_SURF_P2) | | 1424 | PIPE_CONFIG(ADDR_SURF_P2) | |
1620 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | 1425 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1621 | break; | 1426 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1622 | case 6: | 1427 | tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1623 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1428 | PIPE_CONFIG(ADDR_SURF_P2) | |
1624 | PIPE_CONFIG(ADDR_SURF_P2) | | 1429 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | |
1625 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | | 1430 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1626 | TILE_SPLIT(split_equal_to_row_size)); | 1431 | tile[12] = (TILE_SPLIT(split_equal_to_row_size)); |
1627 | break; | 1432 | tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1628 | case 7: | 1433 | PIPE_CONFIG(ADDR_SURF_P2) | |
1629 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1434 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); |
1630 | break; | 1435 | tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1631 | 1436 | PIPE_CONFIG(ADDR_SURF_P2) | | |
1632 | case 8: | 1437 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1633 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | | 1438 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1634 | PIPE_CONFIG(ADDR_SURF_P2)); | 1439 | tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | |
1635 | break; | 1440 | PIPE_CONFIG(ADDR_SURF_P2) | |
1636 | case 9: | 1441 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1637 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1442 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1638 | PIPE_CONFIG(ADDR_SURF_P2) | | 1443 | tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1639 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); | 1444 | PIPE_CONFIG(ADDR_SURF_P2) | |
1640 | break; | 1445 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1641 | case 10: | 1446 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1642 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1447 | tile[17] = (TILE_SPLIT(split_equal_to_row_size)); |
1643 | PIPE_CONFIG(ADDR_SURF_P2) | | 1448 | tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
1644 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1449 | PIPE_CONFIG(ADDR_SURF_P2) | |
1645 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1450 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1646 | break; | 1451 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1647 | case 11: | 1452 | tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | |
1648 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1453 | PIPE_CONFIG(ADDR_SURF_P2) | |
1649 | PIPE_CONFIG(ADDR_SURF_P2) | | 1454 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); |
1650 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | 1455 | tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
1651 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1456 | PIPE_CONFIG(ADDR_SURF_P2) | |
1652 | break; | 1457 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1653 | case 12: | 1458 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1654 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1459 | tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | |
1655 | break; | 1460 | PIPE_CONFIG(ADDR_SURF_P2) | |
1656 | case 13: | 1461 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1657 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1462 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1658 | PIPE_CONFIG(ADDR_SURF_P2) | | 1463 | tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | |
1659 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); | 1464 | PIPE_CONFIG(ADDR_SURF_P2) | |
1660 | break; | 1465 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1661 | case 14: | 1466 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1662 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1467 | tile[23] = (TILE_SPLIT(split_equal_to_row_size)); |
1663 | PIPE_CONFIG(ADDR_SURF_P2) | | 1468 | tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | |
1664 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1469 | PIPE_CONFIG(ADDR_SURF_P2) | |
1665 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1470 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | |
1666 | break; | 1471 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1667 | case 15: | 1472 | tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | |
1668 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | | 1473 | PIPE_CONFIG(ADDR_SURF_P2) | |
1669 | PIPE_CONFIG(ADDR_SURF_P2) | | 1474 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1670 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1475 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1671 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1476 | tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | |
1672 | break; | 1477 | PIPE_CONFIG(ADDR_SURF_P2) | |
1673 | case 16: | 1478 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | |
1674 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1479 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); |
1675 | PIPE_CONFIG(ADDR_SURF_P2) | | 1480 | tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | |
1676 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1481 | PIPE_CONFIG(ADDR_SURF_P2) | |
1677 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1482 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); |
1678 | break; | 1483 | tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
1679 | case 17: | 1484 | PIPE_CONFIG(ADDR_SURF_P2) | |
1680 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1485 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1681 | break; | 1486 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); |
1682 | case 18: | 1487 | tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | |
1683 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1488 | PIPE_CONFIG(ADDR_SURF_P2) | |
1684 | PIPE_CONFIG(ADDR_SURF_P2) | | 1489 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | |
1685 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1490 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); |
1686 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1491 | tile[30] = (TILE_SPLIT(split_equal_to_row_size)); |
1687 | break; | 1492 | |
1688 | case 19: | 1493 | macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1689 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | 1494 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1690 | PIPE_CONFIG(ADDR_SURF_P2) | | 1495 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1691 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); | 1496 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1692 | break; | 1497 | macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1693 | case 20: | 1498 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1694 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1499 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1695 | PIPE_CONFIG(ADDR_SURF_P2) | | 1500 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1696 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1501 | macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1697 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1502 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1698 | break; | 1503 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1699 | case 21: | 1504 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1700 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | | 1505 | macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1701 | PIPE_CONFIG(ADDR_SURF_P2) | | 1506 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1702 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1507 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1703 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1508 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1704 | break; | 1509 | macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1705 | case 22: | 1510 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1706 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | | 1511 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1707 | PIPE_CONFIG(ADDR_SURF_P2) | | 1512 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1708 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1513 | macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1709 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1514 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1710 | break; | 1515 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1711 | case 23: | 1516 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1712 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | 1517 | macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1713 | break; | 1518 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1714 | case 24: | 1519 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1715 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | 1520 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1716 | PIPE_CONFIG(ADDR_SURF_P2) | | 1521 | macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | |
1717 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | 1522 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | |
1718 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1523 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1719 | break; | 1524 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1720 | case 25: | 1525 | macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | |
1721 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | 1526 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1722 | PIPE_CONFIG(ADDR_SURF_P2) | | 1527 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1723 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1528 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1724 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1529 | macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | |
1725 | break; | 1530 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | |
1726 | case 26: | 1531 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1727 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | | 1532 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1728 | PIPE_CONFIG(ADDR_SURF_P2) | | 1533 | macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | |
1729 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | 1534 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1730 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | 1535 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1731 | break; | 1536 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1732 | case 27: | 1537 | macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1733 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | 1538 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | |
1734 | PIPE_CONFIG(ADDR_SURF_P2) | | 1539 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1735 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); | 1540 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1736 | break; | 1541 | macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1737 | case 28: | 1542 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1738 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 1543 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | |
1739 | PIPE_CONFIG(ADDR_SURF_P2) | | 1544 | NUM_BANKS(ADDR_SURF_16_BANK)); |
1740 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1545 | macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | |
1741 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | 1546 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | |
1742 | break; | 1547 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | |
1743 | case 29: | 1548 | NUM_BANKS(ADDR_SURF_8_BANK)); |
1744 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | 1549 | |
1745 | PIPE_CONFIG(ADDR_SURF_P2) | | 1550 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) |
1746 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | 1551 | WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]); |
1747 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | 1552 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) |
1748 | break; | 1553 | if (reg_offset != 7) |
1749 | case 30: | 1554 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]); |
1750 | gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); | ||
1751 | break; | ||
1752 | default: | ||
1753 | gb_tile_moden = 0; | ||
1754 | break; | ||
1755 | } | ||
1756 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | ||
1757 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | ||
1758 | } | ||
1759 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { | ||
1760 | switch (reg_offset) { | ||
1761 | case 0: | ||
1762 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1763 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1764 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1765 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1766 | break; | ||
1767 | case 1: | ||
1768 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1769 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1770 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1771 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1772 | break; | ||
1773 | case 2: | ||
1774 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1775 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1776 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1777 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1778 | break; | ||
1779 | case 3: | ||
1780 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1781 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1782 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1783 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1784 | break; | ||
1785 | case 4: | ||
1786 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1787 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1788 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1789 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1790 | break; | ||
1791 | case 5: | ||
1792 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1793 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1794 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1795 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1796 | break; | ||
1797 | case 6: | ||
1798 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1799 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1800 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1801 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1802 | break; | ||
1803 | case 8: | ||
1804 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | | ||
1805 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | | ||
1806 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1807 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1808 | break; | ||
1809 | case 9: | ||
1810 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | | ||
1811 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1812 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1813 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1814 | break; | ||
1815 | case 10: | ||
1816 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
1817 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1818 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1819 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1820 | break; | ||
1821 | case 11: | ||
1822 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | | ||
1823 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1824 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1825 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1826 | break; | ||
1827 | case 12: | ||
1828 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1829 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1830 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1831 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1832 | break; | ||
1833 | case 13: | ||
1834 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1835 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1836 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | | ||
1837 | NUM_BANKS(ADDR_SURF_16_BANK)); | ||
1838 | break; | ||
1839 | case 14: | ||
1840 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1841 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1842 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1843 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1844 | break; | ||
1845 | default: | ||
1846 | gb_tile_moden = 0; | ||
1847 | break; | ||
1848 | } | ||
1849 | adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; | ||
1850 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); | ||
1851 | } | ||
1852 | break; | 1555 | break; |
1853 | } | 1556 | } |
1854 | } | 1557 | } |
@@ -1893,45 +1596,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) | |||
1893 | */ | 1596 | */ |
1894 | static u32 gfx_v7_0_create_bitmask(u32 bit_width) | 1597 | static u32 gfx_v7_0_create_bitmask(u32 bit_width) |
1895 | { | 1598 | { |
1896 | u32 i, mask = 0; | 1599 | return (u32)((1ULL << bit_width) - 1); |
1897 | |||
1898 | for (i = 0; i < bit_width; i++) { | ||
1899 | mask <<= 1; | ||
1900 | mask |= 1; | ||
1901 | } | ||
1902 | return mask; | ||
1903 | } | 1600 | } |
1904 | 1601 | ||
1905 | /** | 1602 | /** |
1906 | * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs | 1603 | * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs |
1907 | * | 1604 | * |
1908 | * @adev: amdgpu_device pointer | 1605 | * @adev: amdgpu_device pointer |
1909 | * @max_rb_num: max RBs (render backends) for the asic | ||
1910 | * @se_num: number of SEs (shader engines) for the asic | ||
1911 | * @sh_per_se: number of SH blocks per SE for the asic | ||
1912 | * | 1606 | * |
1913 | * Calculates the bitmask of disabled RBs (CIK). | 1607 | * Calculates the bitmask of enabled RBs (CIK). |
1914 | * Returns the disabled RB bitmask. | 1608 | * Returns the enabled RB bitmask. |
1915 | */ | 1609 | */ |
1916 | static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, | 1610 | static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) |
1917 | u32 max_rb_num_per_se, | ||
1918 | u32 sh_per_se) | ||
1919 | { | 1611 | { |
1920 | u32 data, mask; | 1612 | u32 data, mask; |
1921 | 1613 | ||
1922 | data = RREG32(mmCC_RB_BACKEND_DISABLE); | 1614 | data = RREG32(mmCC_RB_BACKEND_DISABLE); |
1923 | if (data & 1) | ||
1924 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; | ||
1925 | else | ||
1926 | data = 0; | ||
1927 | |||
1928 | data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); | 1615 | data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); |
1929 | 1616 | ||
1617 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; | ||
1930 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; | 1618 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; |
1931 | 1619 | ||
1932 | mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se); | 1620 | mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / |
1621 | adev->gfx.config.max_sh_per_se); | ||
1933 | 1622 | ||
1934 | return data & mask; | 1623 | return (~data) & mask; |
1935 | } | 1624 | } |
1936 | 1625 | ||
1937 | /** | 1626 | /** |
@@ -1940,73 +1629,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, | |||
1940 | * @adev: amdgpu_device pointer | 1629 | * @adev: amdgpu_device pointer |
1941 | * @se_num: number of SEs (shader engines) for the asic | 1630 | * @se_num: number of SEs (shader engines) for the asic |
1942 | * @sh_per_se: number of SH blocks per SE for the asic | 1631 | * @sh_per_se: number of SH blocks per SE for the asic |
1943 | * @max_rb_num: max RBs (render backends) for the asic | ||
1944 | * | 1632 | * |
1945 | * Configures per-SE/SH RB registers (CIK). | 1633 | * Configures per-SE/SH RB registers (CIK). |
1946 | */ | 1634 | */ |
1947 | static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, | 1635 | static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) |
1948 | u32 se_num, u32 sh_per_se, | ||
1949 | u32 max_rb_num_per_se) | ||
1950 | { | 1636 | { |
1951 | int i, j; | 1637 | int i, j; |
1952 | u32 data, mask; | 1638 | u32 data, tmp, num_rbs = 0; |
1953 | u32 disabled_rbs = 0; | 1639 | u32 active_rbs = 0; |
1954 | u32 enabled_rbs = 0; | ||
1955 | 1640 | ||
1956 | mutex_lock(&adev->grbm_idx_mutex); | 1641 | mutex_lock(&adev->grbm_idx_mutex); |
1957 | for (i = 0; i < se_num; i++) { | 1642 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { |
1958 | for (j = 0; j < sh_per_se; j++) { | 1643 | for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { |
1959 | gfx_v7_0_select_se_sh(adev, i, j); | 1644 | gfx_v7_0_select_se_sh(adev, i, j); |
1960 | data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se); | 1645 | data = gfx_v7_0_get_rb_active_bitmap(adev); |
1961 | if (adev->asic_type == CHIP_HAWAII) | 1646 | if (adev->asic_type == CHIP_HAWAII) |
1962 | disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); | 1647 | active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * |
1648 | HAWAII_RB_BITMAP_WIDTH_PER_SH); | ||
1963 | else | 1649 | else |
1964 | disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); | 1650 | active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * |
1651 | CIK_RB_BITMAP_WIDTH_PER_SH); | ||
1965 | } | 1652 | } |
1966 | } | 1653 | } |
1967 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | 1654 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); |
1968 | mutex_unlock(&adev->grbm_idx_mutex); | 1655 | mutex_unlock(&adev->grbm_idx_mutex); |
1969 | 1656 | ||
1970 | mask = 1; | 1657 | adev->gfx.config.backend_enable_mask = active_rbs; |
1971 | for (i = 0; i < max_rb_num_per_se * se_num; i++) { | 1658 | tmp = active_rbs; |
1972 | if (!(disabled_rbs & mask)) | 1659 | while (tmp >>= 1) |
1973 | enabled_rbs |= mask; | 1660 | num_rbs++; |
1974 | mask <<= 1; | 1661 | adev->gfx.config.num_rbs = num_rbs; |
1975 | } | ||
1976 | |||
1977 | adev->gfx.config.backend_enable_mask = enabled_rbs; | ||
1978 | |||
1979 | mutex_lock(&adev->grbm_idx_mutex); | ||
1980 | for (i = 0; i < se_num; i++) { | ||
1981 | gfx_v7_0_select_se_sh(adev, i, 0xffffffff); | ||
1982 | data = 0; | ||
1983 | for (j = 0; j < sh_per_se; j++) { | ||
1984 | switch (enabled_rbs & 3) { | ||
1985 | case 0: | ||
1986 | if (j == 0) | ||
1987 | data |= (RASTER_CONFIG_RB_MAP_3 << | ||
1988 | PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); | ||
1989 | else | ||
1990 | data |= (RASTER_CONFIG_RB_MAP_0 << | ||
1991 | PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); | ||
1992 | break; | ||
1993 | case 1: | ||
1994 | data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); | ||
1995 | break; | ||
1996 | case 2: | ||
1997 | data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); | ||
1998 | break; | ||
1999 | case 3: | ||
2000 | default: | ||
2001 | data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); | ||
2002 | break; | ||
2003 | } | ||
2004 | enabled_rbs >>= 2; | ||
2005 | } | ||
2006 | WREG32(mmPA_SC_RASTER_CONFIG, data); | ||
2007 | } | ||
2008 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | ||
2009 | mutex_unlock(&adev->grbm_idx_mutex); | ||
2010 | } | 1662 | } |
2011 | 1663 | ||
2012 | /** | 1664 | /** |
@@ -2059,192 +1711,23 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) | |||
2059 | */ | 1711 | */ |
2060 | static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) | 1712 | static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) |
2061 | { | 1713 | { |
2062 | u32 gb_addr_config; | 1714 | u32 tmp, sh_mem_cfg; |
2063 | u32 mc_shared_chmap, mc_arb_ramcfg; | ||
2064 | u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; | ||
2065 | u32 sh_mem_cfg; | ||
2066 | u32 tmp; | ||
2067 | int i; | 1715 | int i; |
2068 | 1716 | ||
2069 | switch (adev->asic_type) { | ||
2070 | case CHIP_BONAIRE: | ||
2071 | adev->gfx.config.max_shader_engines = 2; | ||
2072 | adev->gfx.config.max_tile_pipes = 4; | ||
2073 | adev->gfx.config.max_cu_per_sh = 7; | ||
2074 | adev->gfx.config.max_sh_per_se = 1; | ||
2075 | adev->gfx.config.max_backends_per_se = 2; | ||
2076 | adev->gfx.config.max_texture_channel_caches = 4; | ||
2077 | adev->gfx.config.max_gprs = 256; | ||
2078 | adev->gfx.config.max_gs_threads = 32; | ||
2079 | adev->gfx.config.max_hw_contexts = 8; | ||
2080 | |||
2081 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
2082 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
2083 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
2084 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
2085 | gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; | ||
2086 | break; | ||
2087 | case CHIP_HAWAII: | ||
2088 | adev->gfx.config.max_shader_engines = 4; | ||
2089 | adev->gfx.config.max_tile_pipes = 16; | ||
2090 | adev->gfx.config.max_cu_per_sh = 11; | ||
2091 | adev->gfx.config.max_sh_per_se = 1; | ||
2092 | adev->gfx.config.max_backends_per_se = 4; | ||
2093 | adev->gfx.config.max_texture_channel_caches = 16; | ||
2094 | adev->gfx.config.max_gprs = 256; | ||
2095 | adev->gfx.config.max_gs_threads = 32; | ||
2096 | adev->gfx.config.max_hw_contexts = 8; | ||
2097 | |||
2098 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
2099 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
2100 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
2101 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
2102 | gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN; | ||
2103 | break; | ||
2104 | case CHIP_KAVERI: | ||
2105 | adev->gfx.config.max_shader_engines = 1; | ||
2106 | adev->gfx.config.max_tile_pipes = 4; | ||
2107 | if ((adev->pdev->device == 0x1304) || | ||
2108 | (adev->pdev->device == 0x1305) || | ||
2109 | (adev->pdev->device == 0x130C) || | ||
2110 | (adev->pdev->device == 0x130F) || | ||
2111 | (adev->pdev->device == 0x1310) || | ||
2112 | (adev->pdev->device == 0x1311) || | ||
2113 | (adev->pdev->device == 0x131C)) { | ||
2114 | adev->gfx.config.max_cu_per_sh = 8; | ||
2115 | adev->gfx.config.max_backends_per_se = 2; | ||
2116 | } else if ((adev->pdev->device == 0x1309) || | ||
2117 | (adev->pdev->device == 0x130A) || | ||
2118 | (adev->pdev->device == 0x130D) || | ||
2119 | (adev->pdev->device == 0x1313) || | ||
2120 | (adev->pdev->device == 0x131D)) { | ||
2121 | adev->gfx.config.max_cu_per_sh = 6; | ||
2122 | adev->gfx.config.max_backends_per_se = 2; | ||
2123 | } else if ((adev->pdev->device == 0x1306) || | ||
2124 | (adev->pdev->device == 0x1307) || | ||
2125 | (adev->pdev->device == 0x130B) || | ||
2126 | (adev->pdev->device == 0x130E) || | ||
2127 | (adev->pdev->device == 0x1315) || | ||
2128 | (adev->pdev->device == 0x131B)) { | ||
2129 | adev->gfx.config.max_cu_per_sh = 4; | ||
2130 | adev->gfx.config.max_backends_per_se = 1; | ||
2131 | } else { | ||
2132 | adev->gfx.config.max_cu_per_sh = 3; | ||
2133 | adev->gfx.config.max_backends_per_se = 1; | ||
2134 | } | ||
2135 | adev->gfx.config.max_sh_per_se = 1; | ||
2136 | adev->gfx.config.max_texture_channel_caches = 4; | ||
2137 | adev->gfx.config.max_gprs = 256; | ||
2138 | adev->gfx.config.max_gs_threads = 16; | ||
2139 | adev->gfx.config.max_hw_contexts = 8; | ||
2140 | |||
2141 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
2142 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
2143 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
2144 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
2145 | gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; | ||
2146 | break; | ||
2147 | case CHIP_KABINI: | ||
2148 | case CHIP_MULLINS: | ||
2149 | default: | ||
2150 | adev->gfx.config.max_shader_engines = 1; | ||
2151 | adev->gfx.config.max_tile_pipes = 2; | ||
2152 | adev->gfx.config.max_cu_per_sh = 2; | ||
2153 | adev->gfx.config.max_sh_per_se = 1; | ||
2154 | adev->gfx.config.max_backends_per_se = 1; | ||
2155 | adev->gfx.config.max_texture_channel_caches = 2; | ||
2156 | adev->gfx.config.max_gprs = 256; | ||
2157 | adev->gfx.config.max_gs_threads = 16; | ||
2158 | adev->gfx.config.max_hw_contexts = 8; | ||
2159 | |||
2160 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
2161 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
2162 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
2163 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
2164 | gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; | ||
2165 | break; | ||
2166 | } | ||
2167 | |||
2168 | WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); | 1717 | WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); |
2169 | 1718 | ||
2170 | mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); | 1719 | WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); |
2171 | adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); | 1720 | WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); |
2172 | mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; | 1721 | WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); |
2173 | |||
2174 | adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; | ||
2175 | adev->gfx.config.mem_max_burst_length_bytes = 256; | ||
2176 | if (adev->flags & AMD_IS_APU) { | ||
2177 | /* Get memory bank mapping mode. */ | ||
2178 | tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); | ||
2179 | dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); | ||
2180 | dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); | ||
2181 | |||
2182 | tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); | ||
2183 | dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); | ||
2184 | dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); | ||
2185 | |||
2186 | /* Validate settings in case only one DIMM installed. */ | ||
2187 | if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) | ||
2188 | dimm00_addr_map = 0; | ||
2189 | if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) | ||
2190 | dimm01_addr_map = 0; | ||
2191 | if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) | ||
2192 | dimm10_addr_map = 0; | ||
2193 | if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) | ||
2194 | dimm11_addr_map = 0; | ||
2195 | |||
2196 | /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ | ||
2197 | /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ | ||
2198 | if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) | ||
2199 | adev->gfx.config.mem_row_size_in_kb = 2; | ||
2200 | else | ||
2201 | adev->gfx.config.mem_row_size_in_kb = 1; | ||
2202 | } else { | ||
2203 | tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT; | ||
2204 | adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; | ||
2205 | if (adev->gfx.config.mem_row_size_in_kb > 4) | ||
2206 | adev->gfx.config.mem_row_size_in_kb = 4; | ||
2207 | } | ||
2208 | /* XXX use MC settings? */ | ||
2209 | adev->gfx.config.shader_engine_tile_size = 32; | ||
2210 | adev->gfx.config.num_gpus = 1; | ||
2211 | adev->gfx.config.multi_gpu_tile_size = 64; | ||
2212 | |||
2213 | /* fix up row size */ | ||
2214 | gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK; | ||
2215 | switch (adev->gfx.config.mem_row_size_in_kb) { | ||
2216 | case 1: | ||
2217 | default: | ||
2218 | gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); | ||
2219 | break; | ||
2220 | case 2: | ||
2221 | gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); | ||
2222 | break; | ||
2223 | case 4: | ||
2224 | gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); | ||
2225 | break; | ||
2226 | } | ||
2227 | adev->gfx.config.gb_addr_config = gb_addr_config; | ||
2228 | |||
2229 | WREG32(mmGB_ADDR_CONFIG, gb_addr_config); | ||
2230 | WREG32(mmHDP_ADDR_CONFIG, gb_addr_config); | ||
2231 | WREG32(mmDMIF_ADDR_CALC, gb_addr_config); | ||
2232 | WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); | ||
2233 | WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); | ||
2234 | WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config); | ||
2235 | WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); | ||
2236 | WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); | ||
2237 | 1722 | ||
2238 | gfx_v7_0_tiling_mode_table_init(adev); | 1723 | gfx_v7_0_tiling_mode_table_init(adev); |
2239 | 1724 | ||
2240 | gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines, | 1725 | gfx_v7_0_setup_rb(adev); |
2241 | adev->gfx.config.max_sh_per_se, | ||
2242 | adev->gfx.config.max_backends_per_se); | ||
2243 | 1726 | ||
2244 | /* set HW defaults for 3D engine */ | 1727 | /* set HW defaults for 3D engine */ |
2245 | WREG32(mmCP_MEQ_THRESHOLDS, | 1728 | WREG32(mmCP_MEQ_THRESHOLDS, |
2246 | (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | | 1729 | (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | |
2247 | (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); | 1730 | (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); |
2248 | 1731 | ||
2249 | mutex_lock(&adev->grbm_idx_mutex); | 1732 | mutex_lock(&adev->grbm_idx_mutex); |
2250 | /* | 1733 | /* |
@@ -2255,7 +1738,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) | |||
2255 | 1738 | ||
2256 | /* XXX SH_MEM regs */ | 1739 | /* XXX SH_MEM regs */ |
2257 | /* where to put LDS, scratch, GPUVM in FSA64 space */ | 1740 | /* where to put LDS, scratch, GPUVM in FSA64 space */ |
2258 | sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, | 1741 | sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, |
2259 | SH_MEM_ALIGNMENT_MODE_UNALIGNED); | 1742 | SH_MEM_ALIGNMENT_MODE_UNALIGNED); |
2260 | 1743 | ||
2261 | mutex_lock(&adev->srbm_mutex); | 1744 | mutex_lock(&adev->srbm_mutex); |
@@ -2379,7 +1862,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
2379 | return r; | 1862 | return r; |
2380 | } | 1863 | } |
2381 | WREG32(scratch, 0xCAFEDEAD); | 1864 | WREG32(scratch, 0xCAFEDEAD); |
2382 | r = amdgpu_ring_lock(ring, 3); | 1865 | r = amdgpu_ring_alloc(ring, 3); |
2383 | if (r) { | 1866 | if (r) { |
2384 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); | 1867 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); |
2385 | amdgpu_gfx_scratch_free(adev, scratch); | 1868 | amdgpu_gfx_scratch_free(adev, scratch); |
@@ -2388,7 +1871,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
2388 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | 1871 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
2389 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | 1872 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
2390 | amdgpu_ring_write(ring, 0xDEADBEEF); | 1873 | amdgpu_ring_write(ring, 0xDEADBEEF); |
2391 | amdgpu_ring_unlock_commit(ring); | 1874 | amdgpu_ring_commit(ring); |
2392 | 1875 | ||
2393 | for (i = 0; i < adev->usec_timeout; i++) { | 1876 | for (i = 0; i < adev->usec_timeout; i++) { |
2394 | tmp = RREG32(scratch); | 1877 | tmp = RREG32(scratch); |
@@ -2516,36 +1999,6 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, | |||
2516 | amdgpu_ring_write(ring, upper_32_bits(seq)); | 1999 | amdgpu_ring_write(ring, upper_32_bits(seq)); |
2517 | } | 2000 | } |
2518 | 2001 | ||
2519 | /** | ||
2520 | * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring | ||
2521 | * | ||
2522 | * @ring: amdgpu ring buffer object | ||
2523 | * @semaphore: amdgpu semaphore object | ||
2524 | * @emit_wait: Is this a sempahore wait? | ||
2525 | * | ||
2526 | * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP | ||
2527 | * from running ahead of semaphore waits. | ||
2528 | */ | ||
2529 | static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
2530 | struct amdgpu_semaphore *semaphore, | ||
2531 | bool emit_wait) | ||
2532 | { | ||
2533 | uint64_t addr = semaphore->gpu_addr; | ||
2534 | unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; | ||
2535 | |||
2536 | amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); | ||
2537 | amdgpu_ring_write(ring, addr & 0xffffffff); | ||
2538 | amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); | ||
2539 | |||
2540 | if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) { | ||
2541 | /* Prevent the PFP from running ahead of the semaphore wait */ | ||
2542 | amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); | ||
2543 | amdgpu_ring_write(ring, 0x0); | ||
2544 | } | ||
2545 | |||
2546 | return true; | ||
2547 | } | ||
2548 | |||
2549 | /* | 2002 | /* |
2550 | * IB stuff | 2003 | * IB stuff |
2551 | */ | 2004 | */ |
@@ -2661,7 +2114,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) | |||
2661 | } | 2114 | } |
2662 | WREG32(scratch, 0xCAFEDEAD); | 2115 | WREG32(scratch, 0xCAFEDEAD); |
2663 | memset(&ib, 0, sizeof(ib)); | 2116 | memset(&ib, 0, sizeof(ib)); |
2664 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 2117 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
2665 | if (r) { | 2118 | if (r) { |
2666 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 2119 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
2667 | goto err1; | 2120 | goto err1; |
@@ -2671,9 +2124,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) | |||
2671 | ib.ptr[2] = 0xDEADBEEF; | 2124 | ib.ptr[2] = 0xDEADBEEF; |
2672 | ib.length_dw = 3; | 2125 | ib.length_dw = 3; |
2673 | 2126 | ||
2674 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, | 2127 | r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
2675 | AMDGPU_FENCE_OWNER_UNDEFINED, | 2128 | NULL, &f); |
2676 | &f); | ||
2677 | if (r) | 2129 | if (r) |
2678 | goto err2; | 2130 | goto err2; |
2679 | 2131 | ||
@@ -2842,7 +2294,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2842 | 2294 | ||
2843 | gfx_v7_0_cp_gfx_enable(adev, true); | 2295 | gfx_v7_0_cp_gfx_enable(adev, true); |
2844 | 2296 | ||
2845 | r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8); | 2297 | r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8); |
2846 | if (r) { | 2298 | if (r) { |
2847 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); | 2299 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); |
2848 | return r; | 2300 | return r; |
@@ -2911,7 +2363,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2911 | amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ | 2363 | amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ |
2912 | amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ | 2364 | amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ |
2913 | 2365 | ||
2914 | amdgpu_ring_unlock_commit(ring); | 2366 | amdgpu_ring_commit(ring); |
2915 | 2367 | ||
2916 | return 0; | 2368 | return 0; |
2917 | } | 2369 | } |
@@ -2989,21 +2441,14 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) | |||
2989 | 2441 | ||
2990 | static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) | 2442 | static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) |
2991 | { | 2443 | { |
2992 | u32 rptr; | 2444 | return ring->adev->wb.wb[ring->rptr_offs]; |
2993 | |||
2994 | rptr = ring->adev->wb.wb[ring->rptr_offs]; | ||
2995 | |||
2996 | return rptr; | ||
2997 | } | 2445 | } |
2998 | 2446 | ||
2999 | static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) | 2447 | static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) |
3000 | { | 2448 | { |
3001 | struct amdgpu_device *adev = ring->adev; | 2449 | struct amdgpu_device *adev = ring->adev; |
3002 | u32 wptr; | ||
3003 | 2450 | ||
3004 | wptr = RREG32(mmCP_RB0_WPTR); | 2451 | return RREG32(mmCP_RB0_WPTR); |
3005 | |||
3006 | return wptr; | ||
3007 | } | 2452 | } |
3008 | 2453 | ||
3009 | static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) | 2454 | static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) |
@@ -3016,21 +2461,13 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) | |||
3016 | 2461 | ||
3017 | static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring) | 2462 | static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring) |
3018 | { | 2463 | { |
3019 | u32 rptr; | 2464 | return ring->adev->wb.wb[ring->rptr_offs]; |
3020 | |||
3021 | rptr = ring->adev->wb.wb[ring->rptr_offs]; | ||
3022 | |||
3023 | return rptr; | ||
3024 | } | 2465 | } |
3025 | 2466 | ||
3026 | static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) | 2467 | static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) |
3027 | { | 2468 | { |
3028 | u32 wptr; | ||
3029 | |||
3030 | /* XXX check if swapping is necessary on BE */ | 2469 | /* XXX check if swapping is necessary on BE */ |
3031 | wptr = ring->adev->wb.wb[ring->wptr_offs]; | 2470 | return ring->adev->wb.wb[ring->wptr_offs]; |
3032 | |||
3033 | return wptr; | ||
3034 | } | 2471 | } |
3035 | 2472 | ||
3036 | static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) | 2473 | static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) |
@@ -3126,21 +2563,6 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev) | |||
3126 | } | 2563 | } |
3127 | 2564 | ||
3128 | /** | 2565 | /** |
3129 | * gfx_v7_0_cp_compute_start - start the compute queues | ||
3130 | * | ||
3131 | * @adev: amdgpu_device pointer | ||
3132 | * | ||
3133 | * Enable the compute queues. | ||
3134 | * Returns 0 for success, error for failure. | ||
3135 | */ | ||
3136 | static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev) | ||
3137 | { | ||
3138 | gfx_v7_0_cp_compute_enable(adev, true); | ||
3139 | |||
3140 | return 0; | ||
3141 | } | ||
3142 | |||
3143 | /** | ||
3144 | * gfx_v7_0_cp_compute_fini - stop the compute queues | 2566 | * gfx_v7_0_cp_compute_fini - stop the compute queues |
3145 | * | 2567 | * |
3146 | * @adev: amdgpu_device pointer | 2568 | * @adev: amdgpu_device pointer |
@@ -3330,9 +2752,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) | |||
3330 | u32 *buf; | 2752 | u32 *buf; |
3331 | struct bonaire_mqd *mqd; | 2753 | struct bonaire_mqd *mqd; |
3332 | 2754 | ||
3333 | r = gfx_v7_0_cp_compute_start(adev); | 2755 | gfx_v7_0_cp_compute_enable(adev, true); |
3334 | if (r) | ||
3335 | return r; | ||
3336 | 2756 | ||
3337 | /* fix up chicken bits */ | 2757 | /* fix up chicken bits */ |
3338 | tmp = RREG32(mmCP_CPF_DEBUG); | 2758 | tmp = RREG32(mmCP_CPF_DEBUG); |
@@ -4395,28 +3815,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, | |||
4395 | } | 3815 | } |
4396 | } | 3816 | } |
4397 | 3817 | ||
4398 | static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev, | 3818 | static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) |
4399 | u32 se, u32 sh) | ||
4400 | { | 3819 | { |
4401 | u32 mask = 0, tmp, tmp1; | 3820 | u32 data, mask; |
4402 | int i; | ||
4403 | |||
4404 | gfx_v7_0_select_se_sh(adev, se, sh); | ||
4405 | tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); | ||
4406 | tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); | ||
4407 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | ||
4408 | 3821 | ||
4409 | tmp &= 0xffff0000; | 3822 | data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); |
3823 | data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); | ||
4410 | 3824 | ||
4411 | tmp |= tmp1; | 3825 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; |
4412 | tmp >>= 16; | 3826 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; |
4413 | 3827 | ||
4414 | for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { | 3828 | mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / |
4415 | mask <<= 1; | 3829 | adev->gfx.config.max_sh_per_se); |
4416 | mask |= 1; | ||
4417 | } | ||
4418 | 3830 | ||
4419 | return (~tmp) & mask; | 3831 | return (~data) & mask; |
4420 | } | 3832 | } |
4421 | 3833 | ||
4422 | static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) | 3834 | static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) |
@@ -4754,6 +4166,172 @@ static int gfx_v7_0_late_init(void *handle) | |||
4754 | return 0; | 4166 | return 0; |
4755 | } | 4167 | } |
4756 | 4168 | ||
4169 | static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) | ||
4170 | { | ||
4171 | u32 gb_addr_config; | ||
4172 | u32 mc_shared_chmap, mc_arb_ramcfg; | ||
4173 | u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; | ||
4174 | u32 tmp; | ||
4175 | |||
4176 | switch (adev->asic_type) { | ||
4177 | case CHIP_BONAIRE: | ||
4178 | adev->gfx.config.max_shader_engines = 2; | ||
4179 | adev->gfx.config.max_tile_pipes = 4; | ||
4180 | adev->gfx.config.max_cu_per_sh = 7; | ||
4181 | adev->gfx.config.max_sh_per_se = 1; | ||
4182 | adev->gfx.config.max_backends_per_se = 2; | ||
4183 | adev->gfx.config.max_texture_channel_caches = 4; | ||
4184 | adev->gfx.config.max_gprs = 256; | ||
4185 | adev->gfx.config.max_gs_threads = 32; | ||
4186 | adev->gfx.config.max_hw_contexts = 8; | ||
4187 | |||
4188 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
4189 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
4190 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
4191 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
4192 | gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; | ||
4193 | break; | ||
4194 | case CHIP_HAWAII: | ||
4195 | adev->gfx.config.max_shader_engines = 4; | ||
4196 | adev->gfx.config.max_tile_pipes = 16; | ||
4197 | adev->gfx.config.max_cu_per_sh = 11; | ||
4198 | adev->gfx.config.max_sh_per_se = 1; | ||
4199 | adev->gfx.config.max_backends_per_se = 4; | ||
4200 | adev->gfx.config.max_texture_channel_caches = 16; | ||
4201 | adev->gfx.config.max_gprs = 256; | ||
4202 | adev->gfx.config.max_gs_threads = 32; | ||
4203 | adev->gfx.config.max_hw_contexts = 8; | ||
4204 | |||
4205 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
4206 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
4207 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
4208 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
4209 | gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN; | ||
4210 | break; | ||
4211 | case CHIP_KAVERI: | ||
4212 | adev->gfx.config.max_shader_engines = 1; | ||
4213 | adev->gfx.config.max_tile_pipes = 4; | ||
4214 | if ((adev->pdev->device == 0x1304) || | ||
4215 | (adev->pdev->device == 0x1305) || | ||
4216 | (adev->pdev->device == 0x130C) || | ||
4217 | (adev->pdev->device == 0x130F) || | ||
4218 | (adev->pdev->device == 0x1310) || | ||
4219 | (adev->pdev->device == 0x1311) || | ||
4220 | (adev->pdev->device == 0x131C)) { | ||
4221 | adev->gfx.config.max_cu_per_sh = 8; | ||
4222 | adev->gfx.config.max_backends_per_se = 2; | ||
4223 | } else if ((adev->pdev->device == 0x1309) || | ||
4224 | (adev->pdev->device == 0x130A) || | ||
4225 | (adev->pdev->device == 0x130D) || | ||
4226 | (adev->pdev->device == 0x1313) || | ||
4227 | (adev->pdev->device == 0x131D)) { | ||
4228 | adev->gfx.config.max_cu_per_sh = 6; | ||
4229 | adev->gfx.config.max_backends_per_se = 2; | ||
4230 | } else if ((adev->pdev->device == 0x1306) || | ||
4231 | (adev->pdev->device == 0x1307) || | ||
4232 | (adev->pdev->device == 0x130B) || | ||
4233 | (adev->pdev->device == 0x130E) || | ||
4234 | (adev->pdev->device == 0x1315) || | ||
4235 | (adev->pdev->device == 0x131B)) { | ||
4236 | adev->gfx.config.max_cu_per_sh = 4; | ||
4237 | adev->gfx.config.max_backends_per_se = 1; | ||
4238 | } else { | ||
4239 | adev->gfx.config.max_cu_per_sh = 3; | ||
4240 | adev->gfx.config.max_backends_per_se = 1; | ||
4241 | } | ||
4242 | adev->gfx.config.max_sh_per_se = 1; | ||
4243 | adev->gfx.config.max_texture_channel_caches = 4; | ||
4244 | adev->gfx.config.max_gprs = 256; | ||
4245 | adev->gfx.config.max_gs_threads = 16; | ||
4246 | adev->gfx.config.max_hw_contexts = 8; | ||
4247 | |||
4248 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
4249 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
4250 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
4251 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
4252 | gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; | ||
4253 | break; | ||
4254 | case CHIP_KABINI: | ||
4255 | case CHIP_MULLINS: | ||
4256 | default: | ||
4257 | adev->gfx.config.max_shader_engines = 1; | ||
4258 | adev->gfx.config.max_tile_pipes = 2; | ||
4259 | adev->gfx.config.max_cu_per_sh = 2; | ||
4260 | adev->gfx.config.max_sh_per_se = 1; | ||
4261 | adev->gfx.config.max_backends_per_se = 1; | ||
4262 | adev->gfx.config.max_texture_channel_caches = 2; | ||
4263 | adev->gfx.config.max_gprs = 256; | ||
4264 | adev->gfx.config.max_gs_threads = 16; | ||
4265 | adev->gfx.config.max_hw_contexts = 8; | ||
4266 | |||
4267 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
4268 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
4269 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
4270 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; | ||
4271 | gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; | ||
4272 | break; | ||
4273 | } | ||
4274 | |||
4275 | mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); | ||
4276 | adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); | ||
4277 | mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; | ||
4278 | |||
4279 | adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; | ||
4280 | adev->gfx.config.mem_max_burst_length_bytes = 256; | ||
4281 | if (adev->flags & AMD_IS_APU) { | ||
4282 | /* Get memory bank mapping mode. */ | ||
4283 | tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); | ||
4284 | dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); | ||
4285 | dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); | ||
4286 | |||
4287 | tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); | ||
4288 | dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); | ||
4289 | dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); | ||
4290 | |||
4291 | /* Validate settings in case only one DIMM installed. */ | ||
4292 | if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) | ||
4293 | dimm00_addr_map = 0; | ||
4294 | if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) | ||
4295 | dimm01_addr_map = 0; | ||
4296 | if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) | ||
4297 | dimm10_addr_map = 0; | ||
4298 | if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) | ||
4299 | dimm11_addr_map = 0; | ||
4300 | |||
4301 | /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ | ||
4302 | /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ | ||
4303 | if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) | ||
4304 | adev->gfx.config.mem_row_size_in_kb = 2; | ||
4305 | else | ||
4306 | adev->gfx.config.mem_row_size_in_kb = 1; | ||
4307 | } else { | ||
4308 | tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT; | ||
4309 | adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; | ||
4310 | if (adev->gfx.config.mem_row_size_in_kb > 4) | ||
4311 | adev->gfx.config.mem_row_size_in_kb = 4; | ||
4312 | } | ||
4313 | /* XXX use MC settings? */ | ||
4314 | adev->gfx.config.shader_engine_tile_size = 32; | ||
4315 | adev->gfx.config.num_gpus = 1; | ||
4316 | adev->gfx.config.multi_gpu_tile_size = 64; | ||
4317 | |||
4318 | /* fix up row size */ | ||
4319 | gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK; | ||
4320 | switch (adev->gfx.config.mem_row_size_in_kb) { | ||
4321 | case 1: | ||
4322 | default: | ||
4323 | gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); | ||
4324 | break; | ||
4325 | case 2: | ||
4326 | gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); | ||
4327 | break; | ||
4328 | case 4: | ||
4329 | gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); | ||
4330 | break; | ||
4331 | } | ||
4332 | adev->gfx.config.gb_addr_config = gb_addr_config; | ||
4333 | } | ||
4334 | |||
4757 | static int gfx_v7_0_sw_init(void *handle) | 4335 | static int gfx_v7_0_sw_init(void *handle) |
4758 | { | 4336 | { |
4759 | struct amdgpu_ring *ring; | 4337 | struct amdgpu_ring *ring; |
@@ -4857,6 +4435,10 @@ static int gfx_v7_0_sw_init(void *handle) | |||
4857 | if (r) | 4435 | if (r) |
4858 | return r; | 4436 | return r; |
4859 | 4437 | ||
4438 | adev->gfx.ce_ram_size = 0x8000; | ||
4439 | |||
4440 | gfx_v7_0_gpu_early_init(adev); | ||
4441 | |||
4860 | return r; | 4442 | return r; |
4861 | } | 4443 | } |
4862 | 4444 | ||
@@ -4897,8 +4479,6 @@ static int gfx_v7_0_hw_init(void *handle) | |||
4897 | if (r) | 4479 | if (r) |
4898 | return r; | 4480 | return r; |
4899 | 4481 | ||
4900 | adev->gfx.ce_ram_size = 0x8000; | ||
4901 | |||
4902 | return r; | 4482 | return r; |
4903 | } | 4483 | } |
4904 | 4484 | ||
@@ -5015,16 +4595,6 @@ static void gfx_v7_0_print_status(void *handle) | |||
5015 | RREG32(mmHDP_ADDR_CONFIG)); | 4595 | RREG32(mmHDP_ADDR_CONFIG)); |
5016 | dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", | 4596 | dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", |
5017 | RREG32(mmDMIF_ADDR_CALC)); | 4597 | RREG32(mmDMIF_ADDR_CALC)); |
5018 | dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n", | ||
5019 | RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET)); | ||
5020 | dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n", | ||
5021 | RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET)); | ||
5022 | dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", | ||
5023 | RREG32(mmUVD_UDEC_ADDR_CONFIG)); | ||
5024 | dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", | ||
5025 | RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); | ||
5026 | dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", | ||
5027 | RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); | ||
5028 | 4598 | ||
5029 | dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", | 4599 | dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", |
5030 | RREG32(mmCP_MEQ_THRESHOLDS)); | 4600 | RREG32(mmCP_MEQ_THRESHOLDS)); |
@@ -5567,13 +5137,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { | |||
5567 | .parse_cs = NULL, | 5137 | .parse_cs = NULL, |
5568 | .emit_ib = gfx_v7_0_ring_emit_ib_gfx, | 5138 | .emit_ib = gfx_v7_0_ring_emit_ib_gfx, |
5569 | .emit_fence = gfx_v7_0_ring_emit_fence_gfx, | 5139 | .emit_fence = gfx_v7_0_ring_emit_fence_gfx, |
5570 | .emit_semaphore = gfx_v7_0_ring_emit_semaphore, | ||
5571 | .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, | 5140 | .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, |
5572 | .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, | 5141 | .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, |
5573 | .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, | 5142 | .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, |
5574 | .test_ring = gfx_v7_0_ring_test_ring, | 5143 | .test_ring = gfx_v7_0_ring_test_ring, |
5575 | .test_ib = gfx_v7_0_ring_test_ib, | 5144 | .test_ib = gfx_v7_0_ring_test_ib, |
5576 | .insert_nop = amdgpu_ring_insert_nop, | 5145 | .insert_nop = amdgpu_ring_insert_nop, |
5146 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
5577 | }; | 5147 | }; |
5578 | 5148 | ||
5579 | static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { | 5149 | static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { |
@@ -5583,13 +5153,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { | |||
5583 | .parse_cs = NULL, | 5153 | .parse_cs = NULL, |
5584 | .emit_ib = gfx_v7_0_ring_emit_ib_compute, | 5154 | .emit_ib = gfx_v7_0_ring_emit_ib_compute, |
5585 | .emit_fence = gfx_v7_0_ring_emit_fence_compute, | 5155 | .emit_fence = gfx_v7_0_ring_emit_fence_compute, |
5586 | .emit_semaphore = gfx_v7_0_ring_emit_semaphore, | ||
5587 | .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, | 5156 | .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, |
5588 | .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, | 5157 | .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, |
5589 | .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, | 5158 | .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, |
5590 | .test_ring = gfx_v7_0_ring_test_ring, | 5159 | .test_ring = gfx_v7_0_ring_test_ring, |
5591 | .test_ib = gfx_v7_0_ring_test_ib, | 5160 | .test_ib = gfx_v7_0_ring_test_ib, |
5592 | .insert_nop = amdgpu_ring_insert_nop, | 5161 | .insert_nop = amdgpu_ring_insert_nop, |
5162 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
5593 | }; | 5163 | }; |
5594 | 5164 | ||
5595 | static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) | 5165 | static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) |
@@ -5659,7 +5229,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) | |||
5659 | 5229 | ||
5660 | 5230 | ||
5661 | int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, | 5231 | int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, |
5662 | struct amdgpu_cu_info *cu_info) | 5232 | struct amdgpu_cu_info *cu_info) |
5663 | { | 5233 | { |
5664 | int i, j, k, counter, active_cu_number = 0; | 5234 | int i, j, k, counter, active_cu_number = 0; |
5665 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; | 5235 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; |
@@ -5673,10 +5243,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, | |||
5673 | mask = 1; | 5243 | mask = 1; |
5674 | ao_bitmap = 0; | 5244 | ao_bitmap = 0; |
5675 | counter = 0; | 5245 | counter = 0; |
5676 | bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j); | 5246 | gfx_v7_0_select_se_sh(adev, i, j); |
5247 | bitmap = gfx_v7_0_get_cu_active_bitmap(adev); | ||
5677 | cu_info->bitmap[i][j] = bitmap; | 5248 | cu_info->bitmap[i][j] = bitmap; |
5678 | 5249 | ||
5679 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { | 5250 | for (k = 0; k < 16; k ++) { |
5680 | if (bitmap & mask) { | 5251 | if (bitmap & mask) { |
5681 | if (counter < 2) | 5252 | if (counter < 2) |
5682 | ao_bitmap |= mask; | 5253 | ao_bitmap |= mask; |
@@ -5688,9 +5259,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, | |||
5688 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | 5259 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); |
5689 | } | 5260 | } |
5690 | } | 5261 | } |
5262 | gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | ||
5263 | mutex_unlock(&adev->grbm_idx_mutex); | ||
5691 | 5264 | ||
5692 | cu_info->number = active_cu_number; | 5265 | cu_info->number = active_cu_number; |
5693 | cu_info->ao_cu_mask = ao_cu_mask; | 5266 | cu_info->ao_cu_mask = ao_cu_mask; |
5694 | mutex_unlock(&adev->grbm_idx_mutex); | 5267 | |
5695 | return 0; | 5268 | return 0; |
5696 | } | 5269 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8f8ec37ecd88..10c865087d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -43,9 +43,6 @@ | |||
43 | #include "gca/gfx_8_0_sh_mask.h" | 43 | #include "gca/gfx_8_0_sh_mask.h" |
44 | #include "gca/gfx_8_0_enum.h" | 44 | #include "gca/gfx_8_0_enum.h" |
45 | 45 | ||
46 | #include "uvd/uvd_5_0_d.h" | ||
47 | #include "uvd/uvd_5_0_sh_mask.h" | ||
48 | |||
49 | #include "dce/dce_10_0_d.h" | 46 | #include "dce/dce_10_0_d.h" |
50 | #include "dce/dce_10_0_sh_mask.h" | 47 | #include "dce/dce_10_0_sh_mask.h" |
51 | 48 | ||
@@ -652,7 +649,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) | |||
652 | return r; | 649 | return r; |
653 | } | 650 | } |
654 | WREG32(scratch, 0xCAFEDEAD); | 651 | WREG32(scratch, 0xCAFEDEAD); |
655 | r = amdgpu_ring_lock(ring, 3); | 652 | r = amdgpu_ring_alloc(ring, 3); |
656 | if (r) { | 653 | if (r) { |
657 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 654 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", |
658 | ring->idx, r); | 655 | ring->idx, r); |
@@ -662,7 +659,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) | |||
662 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | 659 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
663 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | 660 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
664 | amdgpu_ring_write(ring, 0xDEADBEEF); | 661 | amdgpu_ring_write(ring, 0xDEADBEEF); |
665 | amdgpu_ring_unlock_commit(ring); | 662 | amdgpu_ring_commit(ring); |
666 | 663 | ||
667 | for (i = 0; i < adev->usec_timeout; i++) { | 664 | for (i = 0; i < adev->usec_timeout; i++) { |
668 | tmp = RREG32(scratch); | 665 | tmp = RREG32(scratch); |
@@ -699,7 +696,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) | |||
699 | } | 696 | } |
700 | WREG32(scratch, 0xCAFEDEAD); | 697 | WREG32(scratch, 0xCAFEDEAD); |
701 | memset(&ib, 0, sizeof(ib)); | 698 | memset(&ib, 0, sizeof(ib)); |
702 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 699 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
703 | if (r) { | 700 | if (r) { |
704 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 701 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
705 | goto err1; | 702 | goto err1; |
@@ -709,9 +706,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) | |||
709 | ib.ptr[2] = 0xDEADBEEF; | 706 | ib.ptr[2] = 0xDEADBEEF; |
710 | ib.length_dw = 3; | 707 | ib.length_dw = 3; |
711 | 708 | ||
712 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, | 709 | r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
713 | AMDGPU_FENCE_OWNER_UNDEFINED, | 710 | NULL, &f); |
714 | &f); | ||
715 | if (r) | 711 | if (r) |
716 | goto err2; | 712 | goto err2; |
717 | 713 | ||
@@ -1171,7 +1167,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) | |||
1171 | 1167 | ||
1172 | /* allocate an indirect buffer to put the commands in */ | 1168 | /* allocate an indirect buffer to put the commands in */ |
1173 | memset(&ib, 0, sizeof(ib)); | 1169 | memset(&ib, 0, sizeof(ib)); |
1174 | r = amdgpu_ib_get(ring, NULL, total_size, &ib); | 1170 | r = amdgpu_ib_get(adev, NULL, total_size, &ib); |
1175 | if (r) { | 1171 | if (r) { |
1176 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 1172 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
1177 | return r; | 1173 | return r; |
@@ -1266,9 +1262,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) | |||
1266 | ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); | 1262 | ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); |
1267 | 1263 | ||
1268 | /* shedule the ib on the ring */ | 1264 | /* shedule the ib on the ring */ |
1269 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, | 1265 | r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
1270 | AMDGPU_FENCE_OWNER_UNDEFINED, | 1266 | NULL, &f); |
1271 | &f); | ||
1272 | if (r) { | 1267 | if (r) { |
1273 | DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); | 1268 | DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); |
1274 | goto fail; | 1269 | goto fail; |
@@ -2574,11 +2569,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) | |||
2574 | } | 2569 | } |
2575 | } | 2570 | } |
2576 | 2571 | ||
2577 | static u32 gfx_v8_0_create_bitmask(u32 bit_width) | ||
2578 | { | ||
2579 | return (u32)((1ULL << bit_width) - 1); | ||
2580 | } | ||
2581 | |||
2582 | void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) | 2572 | void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) |
2583 | { | 2573 | { |
2584 | u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); | 2574 | u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); |
@@ -2599,89 +2589,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) | |||
2599 | WREG32(mmGRBM_GFX_INDEX, data); | 2589 | WREG32(mmGRBM_GFX_INDEX, data); |
2600 | } | 2590 | } |
2601 | 2591 | ||
2602 | static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, | 2592 | static u32 gfx_v8_0_create_bitmask(u32 bit_width) |
2603 | u32 max_rb_num_per_se, | 2593 | { |
2604 | u32 sh_per_se) | 2594 | return (u32)((1ULL << bit_width) - 1); |
2595 | } | ||
2596 | |||
2597 | static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) | ||
2605 | { | 2598 | { |
2606 | u32 data, mask; | 2599 | u32 data, mask; |
2607 | 2600 | ||
2608 | data = RREG32(mmCC_RB_BACKEND_DISABLE); | 2601 | data = RREG32(mmCC_RB_BACKEND_DISABLE); |
2609 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; | ||
2610 | |||
2611 | data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); | 2602 | data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); |
2612 | 2603 | ||
2604 | data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; | ||
2613 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; | 2605 | data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; |
2614 | 2606 | ||
2615 | mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se); | 2607 | mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / |
2608 | adev->gfx.config.max_sh_per_se); | ||
2616 | 2609 | ||
2617 | return data & mask; | 2610 | return (~data) & mask; |
2618 | } | 2611 | } |
2619 | 2612 | ||
2620 | static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, | 2613 | static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) |
2621 | u32 se_num, u32 sh_per_se, | ||
2622 | u32 max_rb_num_per_se) | ||
2623 | { | 2614 | { |
2624 | int i, j; | 2615 | int i, j; |
2625 | u32 data, mask; | 2616 | u32 data, tmp, num_rbs = 0; |
2626 | u32 disabled_rbs = 0; | 2617 | u32 active_rbs = 0; |
2627 | u32 enabled_rbs = 0; | ||
2628 | 2618 | ||
2629 | mutex_lock(&adev->grbm_idx_mutex); | 2619 | mutex_lock(&adev->grbm_idx_mutex); |
2630 | for (i = 0; i < se_num; i++) { | 2620 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { |
2631 | for (j = 0; j < sh_per_se; j++) { | 2621 | for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { |
2632 | gfx_v8_0_select_se_sh(adev, i, j); | 2622 | gfx_v8_0_select_se_sh(adev, i, j); |
2633 | data = gfx_v8_0_get_rb_disabled(adev, | 2623 | data = gfx_v8_0_get_rb_active_bitmap(adev); |
2634 | max_rb_num_per_se, sh_per_se); | 2624 | active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * |
2635 | disabled_rbs |= data << ((i * sh_per_se + j) * | 2625 | RB_BITMAP_WIDTH_PER_SH); |
2636 | RB_BITMAP_WIDTH_PER_SH); | ||
2637 | } | 2626 | } |
2638 | } | 2627 | } |
2639 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | 2628 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); |
2640 | mutex_unlock(&adev->grbm_idx_mutex); | 2629 | mutex_unlock(&adev->grbm_idx_mutex); |
2641 | 2630 | ||
2642 | mask = 1; | 2631 | adev->gfx.config.backend_enable_mask = active_rbs; |
2643 | for (i = 0; i < max_rb_num_per_se * se_num; i++) { | 2632 | tmp = active_rbs; |
2644 | if (!(disabled_rbs & mask)) | 2633 | while (tmp >>= 1) |
2645 | enabled_rbs |= mask; | 2634 | num_rbs++; |
2646 | mask <<= 1; | 2635 | adev->gfx.config.num_rbs = num_rbs; |
2647 | } | ||
2648 | |||
2649 | adev->gfx.config.backend_enable_mask = enabled_rbs; | ||
2650 | |||
2651 | mutex_lock(&adev->grbm_idx_mutex); | ||
2652 | for (i = 0; i < se_num; i++) { | ||
2653 | gfx_v8_0_select_se_sh(adev, i, 0xffffffff); | ||
2654 | data = RREG32(mmPA_SC_RASTER_CONFIG); | ||
2655 | for (j = 0; j < sh_per_se; j++) { | ||
2656 | switch (enabled_rbs & 3) { | ||
2657 | case 0: | ||
2658 | if (j == 0) | ||
2659 | data |= (RASTER_CONFIG_RB_MAP_3 << | ||
2660 | PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); | ||
2661 | else | ||
2662 | data |= (RASTER_CONFIG_RB_MAP_0 << | ||
2663 | PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); | ||
2664 | break; | ||
2665 | case 1: | ||
2666 | data |= (RASTER_CONFIG_RB_MAP_0 << | ||
2667 | (i * sh_per_se + j) * 2); | ||
2668 | break; | ||
2669 | case 2: | ||
2670 | data |= (RASTER_CONFIG_RB_MAP_3 << | ||
2671 | (i * sh_per_se + j) * 2); | ||
2672 | break; | ||
2673 | case 3: | ||
2674 | default: | ||
2675 | data |= (RASTER_CONFIG_RB_MAP_2 << | ||
2676 | (i * sh_per_se + j) * 2); | ||
2677 | break; | ||
2678 | } | ||
2679 | enabled_rbs >>= 2; | ||
2680 | } | ||
2681 | WREG32(mmPA_SC_RASTER_CONFIG, data); | ||
2682 | } | ||
2683 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | ||
2684 | mutex_unlock(&adev->grbm_idx_mutex); | ||
2685 | } | 2636 | } |
2686 | 2637 | ||
2687 | /** | 2638 | /** |
@@ -2741,19 +2692,10 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) | |||
2741 | WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | 2692 | WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); |
2742 | WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | 2693 | WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); |
2743 | WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); | 2694 | WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); |
2744 | WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, | ||
2745 | adev->gfx.config.gb_addr_config & 0x70); | ||
2746 | WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, | ||
2747 | adev->gfx.config.gb_addr_config & 0x70); | ||
2748 | WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
2749 | WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
2750 | WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
2751 | 2695 | ||
2752 | gfx_v8_0_tiling_mode_table_init(adev); | 2696 | gfx_v8_0_tiling_mode_table_init(adev); |
2753 | 2697 | ||
2754 | gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines, | 2698 | gfx_v8_0_setup_rb(adev); |
2755 | adev->gfx.config.max_sh_per_se, | ||
2756 | adev->gfx.config.max_backends_per_se); | ||
2757 | 2699 | ||
2758 | /* XXX SH_MEM regs */ | 2700 | /* XXX SH_MEM regs */ |
2759 | /* where to put LDS, scratch, GPUVM in FSA64 space */ | 2701 | /* where to put LDS, scratch, GPUVM in FSA64 space */ |
@@ -3062,7 +3004,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) | |||
3062 | 3004 | ||
3063 | gfx_v8_0_cp_gfx_enable(adev, true); | 3005 | gfx_v8_0_cp_gfx_enable(adev, true); |
3064 | 3006 | ||
3065 | r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4); | 3007 | r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); |
3066 | if (r) { | 3008 | if (r) { |
3067 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); | 3009 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); |
3068 | return r; | 3010 | return r; |
@@ -3126,7 +3068,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) | |||
3126 | amdgpu_ring_write(ring, 0x8000); | 3068 | amdgpu_ring_write(ring, 0x8000); |
3127 | amdgpu_ring_write(ring, 0x8000); | 3069 | amdgpu_ring_write(ring, 0x8000); |
3128 | 3070 | ||
3129 | amdgpu_ring_unlock_commit(ring); | 3071 | amdgpu_ring_commit(ring); |
3130 | 3072 | ||
3131 | return 0; | 3073 | return 0; |
3132 | } | 3074 | } |
@@ -3226,13 +3168,6 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) | |||
3226 | udelay(50); | 3168 | udelay(50); |
3227 | } | 3169 | } |
3228 | 3170 | ||
3229 | static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev) | ||
3230 | { | ||
3231 | gfx_v8_0_cp_compute_enable(adev, true); | ||
3232 | |||
3233 | return 0; | ||
3234 | } | ||
3235 | |||
3236 | static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) | 3171 | static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) |
3237 | { | 3172 | { |
3238 | const struct gfx_firmware_header_v1_0 *mec_hdr; | 3173 | const struct gfx_firmware_header_v1_0 *mec_hdr; |
@@ -3802,9 +3737,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) | |||
3802 | WREG32(mmCP_PQ_STATUS, tmp); | 3737 | WREG32(mmCP_PQ_STATUS, tmp); |
3803 | } | 3738 | } |
3804 | 3739 | ||
3805 | r = gfx_v8_0_cp_compute_start(adev); | 3740 | gfx_v8_0_cp_compute_enable(adev, true); |
3806 | if (r) | ||
3807 | return r; | ||
3808 | 3741 | ||
3809 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 3742 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
3810 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; | 3743 | struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; |
@@ -4016,16 +3949,6 @@ static void gfx_v8_0_print_status(void *handle) | |||
4016 | RREG32(mmHDP_ADDR_CONFIG)); | 3949 | RREG32(mmHDP_ADDR_CONFIG)); |
4017 | dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", | 3950 | dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", |
4018 | RREG32(mmDMIF_ADDR_CALC)); | 3951 | RREG32(mmDMIF_ADDR_CALC)); |
4019 | dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n", | ||
4020 | RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET)); | ||
4021 | dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n", | ||
4022 | RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET)); | ||
4023 | dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", | ||
4024 | RREG32(mmUVD_UDEC_ADDR_CONFIG)); | ||
4025 | dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", | ||
4026 | RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); | ||
4027 | dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", | ||
4028 | RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); | ||
4029 | 3952 | ||
4030 | dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", | 3953 | dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", |
4031 | RREG32(mmCP_MEQ_THRESHOLDS)); | 3954 | RREG32(mmCP_MEQ_THRESHOLDS)); |
@@ -4762,49 +4685,11 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, | |||
4762 | 4685 | ||
4763 | } | 4686 | } |
4764 | 4687 | ||
4765 | /** | ||
4766 | * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring | ||
4767 | * | ||
4768 | * @ring: amdgpu ring buffer object | ||
4769 | * @semaphore: amdgpu semaphore object | ||
4770 | * @emit_wait: Is this a sempahore wait? | ||
4771 | * | ||
4772 | * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP | ||
4773 | * from running ahead of semaphore waits. | ||
4774 | */ | ||
4775 | static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
4776 | struct amdgpu_semaphore *semaphore, | ||
4777 | bool emit_wait) | ||
4778 | { | ||
4779 | uint64_t addr = semaphore->gpu_addr; | ||
4780 | unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; | ||
4781 | |||
4782 | if (ring->adev->asic_type == CHIP_TOPAZ || | ||
4783 | ring->adev->asic_type == CHIP_TONGA || | ||
4784 | ring->adev->asic_type == CHIP_FIJI) | ||
4785 | /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */ | ||
4786 | return false; | ||
4787 | else { | ||
4788 | amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2)); | ||
4789 | amdgpu_ring_write(ring, lower_32_bits(addr)); | ||
4790 | amdgpu_ring_write(ring, upper_32_bits(addr)); | ||
4791 | amdgpu_ring_write(ring, sel); | ||
4792 | } | ||
4793 | |||
4794 | if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) { | ||
4795 | /* Prevent the PFP from running ahead of the semaphore wait */ | ||
4796 | amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); | ||
4797 | amdgpu_ring_write(ring, 0x0); | ||
4798 | } | ||
4799 | |||
4800 | return true; | ||
4801 | } | ||
4802 | |||
4803 | static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | 4688 | static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, |
4804 | unsigned vm_id, uint64_t pd_addr) | 4689 | unsigned vm_id, uint64_t pd_addr) |
4805 | { | 4690 | { |
4806 | int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); | 4691 | int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); |
4807 | uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; | 4692 | uint32_t seq = ring->fence_drv.sync_seq; |
4808 | uint64_t addr = ring->fence_drv.gpu_addr; | 4693 | uint64_t addr = ring->fence_drv.gpu_addr; |
4809 | 4694 | ||
4810 | amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); | 4695 | amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); |
@@ -5145,13 +5030,13 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { | |||
5145 | .parse_cs = NULL, | 5030 | .parse_cs = NULL, |
5146 | .emit_ib = gfx_v8_0_ring_emit_ib_gfx, | 5031 | .emit_ib = gfx_v8_0_ring_emit_ib_gfx, |
5147 | .emit_fence = gfx_v8_0_ring_emit_fence_gfx, | 5032 | .emit_fence = gfx_v8_0_ring_emit_fence_gfx, |
5148 | .emit_semaphore = gfx_v8_0_ring_emit_semaphore, | ||
5149 | .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, | 5033 | .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, |
5150 | .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, | 5034 | .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, |
5151 | .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, | 5035 | .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, |
5152 | .test_ring = gfx_v8_0_ring_test_ring, | 5036 | .test_ring = gfx_v8_0_ring_test_ring, |
5153 | .test_ib = gfx_v8_0_ring_test_ib, | 5037 | .test_ib = gfx_v8_0_ring_test_ib, |
5154 | .insert_nop = amdgpu_ring_insert_nop, | 5038 | .insert_nop = amdgpu_ring_insert_nop, |
5039 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
5155 | }; | 5040 | }; |
5156 | 5041 | ||
5157 | static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { | 5042 | static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { |
@@ -5161,13 +5046,13 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { | |||
5161 | .parse_cs = NULL, | 5046 | .parse_cs = NULL, |
5162 | .emit_ib = gfx_v8_0_ring_emit_ib_compute, | 5047 | .emit_ib = gfx_v8_0_ring_emit_ib_compute, |
5163 | .emit_fence = gfx_v8_0_ring_emit_fence_compute, | 5048 | .emit_fence = gfx_v8_0_ring_emit_fence_compute, |
5164 | .emit_semaphore = gfx_v8_0_ring_emit_semaphore, | ||
5165 | .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, | 5049 | .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, |
5166 | .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, | 5050 | .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, |
5167 | .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, | 5051 | .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, |
5168 | .test_ring = gfx_v8_0_ring_test_ring, | 5052 | .test_ring = gfx_v8_0_ring_test_ring, |
5169 | .test_ib = gfx_v8_0_ring_test_ib, | 5053 | .test_ib = gfx_v8_0_ring_test_ib, |
5170 | .insert_nop = amdgpu_ring_insert_nop, | 5054 | .insert_nop = amdgpu_ring_insert_nop, |
5055 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
5171 | }; | 5056 | }; |
5172 | 5057 | ||
5173 | static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) | 5058 | static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) |
@@ -5236,32 +5121,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) | |||
5236 | } | 5121 | } |
5237 | } | 5122 | } |
5238 | 5123 | ||
5239 | static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev, | 5124 | static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) |
5240 | u32 se, u32 sh) | ||
5241 | { | 5125 | { |
5242 | u32 mask = 0, tmp, tmp1; | 5126 | u32 data, mask; |
5243 | int i; | ||
5244 | |||
5245 | gfx_v8_0_select_se_sh(adev, se, sh); | ||
5246 | tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); | ||
5247 | tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); | ||
5248 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | ||
5249 | 5127 | ||
5250 | tmp &= 0xffff0000; | 5128 | data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); |
5129 | data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); | ||
5251 | 5130 | ||
5252 | tmp |= tmp1; | 5131 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; |
5253 | tmp >>= 16; | 5132 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; |
5254 | 5133 | ||
5255 | for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { | 5134 | mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / |
5256 | mask <<= 1; | 5135 | adev->gfx.config.max_sh_per_se); |
5257 | mask |= 1; | ||
5258 | } | ||
5259 | 5136 | ||
5260 | return (~tmp) & mask; | 5137 | return (~data) & mask; |
5261 | } | 5138 | } |
5262 | 5139 | ||
5263 | int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, | 5140 | int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, |
5264 | struct amdgpu_cu_info *cu_info) | 5141 | struct amdgpu_cu_info *cu_info) |
5265 | { | 5142 | { |
5266 | int i, j, k, counter, active_cu_number = 0; | 5143 | int i, j, k, counter, active_cu_number = 0; |
5267 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; | 5144 | u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; |
@@ -5275,10 +5152,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, | |||
5275 | mask = 1; | 5152 | mask = 1; |
5276 | ao_bitmap = 0; | 5153 | ao_bitmap = 0; |
5277 | counter = 0; | 5154 | counter = 0; |
5278 | bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j); | 5155 | gfx_v8_0_select_se_sh(adev, i, j); |
5156 | bitmap = gfx_v8_0_get_cu_active_bitmap(adev); | ||
5279 | cu_info->bitmap[i][j] = bitmap; | 5157 | cu_info->bitmap[i][j] = bitmap; |
5280 | 5158 | ||
5281 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { | 5159 | for (k = 0; k < 16; k ++) { |
5282 | if (bitmap & mask) { | 5160 | if (bitmap & mask) { |
5283 | if (counter < 2) | 5161 | if (counter < 2) |
5284 | ao_bitmap |= mask; | 5162 | ao_bitmap |= mask; |
@@ -5290,9 +5168,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, | |||
5290 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); | 5168 | ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); |
5291 | } | 5169 | } |
5292 | } | 5170 | } |
5171 | gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); | ||
5172 | mutex_unlock(&adev->grbm_idx_mutex); | ||
5293 | 5173 | ||
5294 | cu_info->number = active_cu_number; | 5174 | cu_info->number = active_cu_number; |
5295 | cu_info->ao_cu_mask = ao_cu_mask; | 5175 | cu_info->ao_cu_mask = ao_cu_mask; |
5296 | mutex_unlock(&adev->grbm_idx_mutex); | 5176 | |
5297 | return 0; | 5177 | return 0; |
5298 | } | 5178 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 8aa2991ab379..68ee66b38e5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
@@ -694,7 +694,8 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev) | |||
694 | * amdgpu graphics/compute will use VMIDs 1-7 | 694 | * amdgpu graphics/compute will use VMIDs 1-7 |
695 | * amdkfd will use VMIDs 8-15 | 695 | * amdkfd will use VMIDs 8-15 |
696 | */ | 696 | */ |
697 | adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS; | 697 | adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; |
698 | amdgpu_vm_manager_init(adev); | ||
698 | 699 | ||
699 | /* base offset of vram pages */ | 700 | /* base offset of vram pages */ |
700 | if (adev->flags & AMD_IS_APU) { | 701 | if (adev->flags & AMD_IS_APU) { |
@@ -926,10 +927,6 @@ static int gmc_v7_0_sw_init(void *handle) | |||
926 | int dma_bits; | 927 | int dma_bits; |
927 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 928 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
928 | 929 | ||
929 | r = amdgpu_gem_init(adev); | ||
930 | if (r) | ||
931 | return r; | ||
932 | |||
933 | r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); | 930 | r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); |
934 | if (r) | 931 | if (r) |
935 | return r; | 932 | return r; |
@@ -1010,7 +1007,7 @@ static int gmc_v7_0_sw_fini(void *handle) | |||
1010 | adev->vm_manager.enabled = false; | 1007 | adev->vm_manager.enabled = false; |
1011 | } | 1008 | } |
1012 | gmc_v7_0_gart_fini(adev); | 1009 | gmc_v7_0_gart_fini(adev); |
1013 | amdgpu_gem_fini(adev); | 1010 | amdgpu_gem_force_release(adev); |
1014 | amdgpu_bo_fini(adev); | 1011 | amdgpu_bo_fini(adev); |
1015 | 1012 | ||
1016 | return 0; | 1013 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 3efd45546241..757803ae7c4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
@@ -252,6 +252,12 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev) | |||
252 | if (!adev->mc.fw) | 252 | if (!adev->mc.fw) |
253 | return -EINVAL; | 253 | return -EINVAL; |
254 | 254 | ||
255 | /* Skip MC ucode loading on SR-IOV capable boards. | ||
256 | * vbios does this for us in asic_init in that case. | ||
257 | */ | ||
258 | if (adev->virtualization.supports_sr_iov) | ||
259 | return 0; | ||
260 | |||
255 | hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; | 261 | hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; |
256 | amdgpu_ucode_print_mc_hdr(&hdr->header); | 262 | amdgpu_ucode_print_mc_hdr(&hdr->header); |
257 | 263 | ||
@@ -774,7 +780,8 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev) | |||
774 | * amdgpu graphics/compute will use VMIDs 1-7 | 780 | * amdgpu graphics/compute will use VMIDs 1-7 |
775 | * amdkfd will use VMIDs 8-15 | 781 | * amdkfd will use VMIDs 8-15 |
776 | */ | 782 | */ |
777 | adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS; | 783 | adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; |
784 | amdgpu_vm_manager_init(adev); | ||
778 | 785 | ||
779 | /* base offset of vram pages */ | 786 | /* base offset of vram pages */ |
780 | if (adev->flags & AMD_IS_APU) { | 787 | if (adev->flags & AMD_IS_APU) { |
@@ -880,10 +887,6 @@ static int gmc_v8_0_sw_init(void *handle) | |||
880 | int dma_bits; | 887 | int dma_bits; |
881 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 888 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
882 | 889 | ||
883 | r = amdgpu_gem_init(adev); | ||
884 | if (r) | ||
885 | return r; | ||
886 | |||
887 | r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); | 890 | r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); |
888 | if (r) | 891 | if (r) |
889 | return r; | 892 | return r; |
@@ -964,7 +967,7 @@ static int gmc_v8_0_sw_fini(void *handle) | |||
964 | adev->vm_manager.enabled = false; | 967 | adev->vm_manager.enabled = false; |
965 | } | 968 | } |
966 | gmc_v8_0_gart_fini(adev); | 969 | gmc_v8_0_gart_fini(adev); |
967 | amdgpu_gem_fini(adev); | 970 | amdgpu_gem_force_release(adev); |
968 | amdgpu_bo_fini(adev); | 971 | amdgpu_bo_fini(adev); |
969 | 972 | ||
970 | return 0; | 973 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c index 090486c18249..52ee08193295 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c | |||
@@ -279,6 +279,12 @@ static int iceland_smu_upload_firmware_image(struct amdgpu_device *adev) | |||
279 | if (!adev->pm.fw) | 279 | if (!adev->pm.fw) |
280 | return -EINVAL; | 280 | return -EINVAL; |
281 | 281 | ||
282 | /* Skip SMC ucode loading on SR-IOV capable boards. | ||
283 | * vbios does this for us in asic_init in that case. | ||
284 | */ | ||
285 | if (adev->virtualization.supports_sr_iov) | ||
286 | return 0; | ||
287 | |||
282 | hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; | 288 | hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; |
283 | amdgpu_ucode_print_smc_hdr(&hdr->header); | 289 | amdgpu_ucode_print_smc_hdr(&hdr->header); |
284 | 290 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2cf50180cc51..29ec986dd6fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | |||
@@ -335,31 +335,6 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se | |||
335 | } | 335 | } |
336 | 336 | ||
337 | /** | 337 | /** |
338 | * sdma_v2_4_ring_emit_semaphore - emit a semaphore on the dma ring | ||
339 | * | ||
340 | * @ring: amdgpu_ring structure holding ring information | ||
341 | * @semaphore: amdgpu semaphore object | ||
342 | * @emit_wait: wait or signal semaphore | ||
343 | * | ||
344 | * Add a DMA semaphore packet to the ring wait on or signal | ||
345 | * other rings (VI). | ||
346 | */ | ||
347 | static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
348 | struct amdgpu_semaphore *semaphore, | ||
349 | bool emit_wait) | ||
350 | { | ||
351 | u64 addr = semaphore->gpu_addr; | ||
352 | u32 sig = emit_wait ? 0 : 1; | ||
353 | |||
354 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) | | ||
355 | SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig)); | ||
356 | amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8); | ||
357 | amdgpu_ring_write(ring, upper_32_bits(addr)); | ||
358 | |||
359 | return true; | ||
360 | } | ||
361 | |||
362 | /** | ||
363 | * sdma_v2_4_gfx_stop - stop the gfx async dma engines | 338 | * sdma_v2_4_gfx_stop - stop the gfx async dma engines |
364 | * | 339 | * |
365 | * @adev: amdgpu_device pointer | 340 | * @adev: amdgpu_device pointer |
@@ -459,6 +434,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) | |||
459 | vi_srbm_select(adev, 0, 0, 0, 0); | 434 | vi_srbm_select(adev, 0, 0, 0, 0); |
460 | mutex_unlock(&adev->srbm_mutex); | 435 | mutex_unlock(&adev->srbm_mutex); |
461 | 436 | ||
437 | WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], | ||
438 | adev->gfx.config.gb_addr_config & 0x70); | ||
439 | |||
462 | WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); | 440 | WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); |
463 | 441 | ||
464 | /* Set ring buffer size in dwords */ | 442 | /* Set ring buffer size in dwords */ |
@@ -636,7 +614,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) | |||
636 | tmp = 0xCAFEDEAD; | 614 | tmp = 0xCAFEDEAD; |
637 | adev->wb.wb[index] = cpu_to_le32(tmp); | 615 | adev->wb.wb[index] = cpu_to_le32(tmp); |
638 | 616 | ||
639 | r = amdgpu_ring_lock(ring, 5); | 617 | r = amdgpu_ring_alloc(ring, 5); |
640 | if (r) { | 618 | if (r) { |
641 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 619 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
642 | amdgpu_wb_free(adev, index); | 620 | amdgpu_wb_free(adev, index); |
@@ -649,7 +627,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) | |||
649 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); | 627 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); |
650 | amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); | 628 | amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); |
651 | amdgpu_ring_write(ring, 0xDEADBEEF); | 629 | amdgpu_ring_write(ring, 0xDEADBEEF); |
652 | amdgpu_ring_unlock_commit(ring); | 630 | amdgpu_ring_commit(ring); |
653 | 631 | ||
654 | for (i = 0; i < adev->usec_timeout; i++) { | 632 | for (i = 0; i < adev->usec_timeout; i++) { |
655 | tmp = le32_to_cpu(adev->wb.wb[index]); | 633 | tmp = le32_to_cpu(adev->wb.wb[index]); |
@@ -699,7 +677,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) | |||
699 | tmp = 0xCAFEDEAD; | 677 | tmp = 0xCAFEDEAD; |
700 | adev->wb.wb[index] = cpu_to_le32(tmp); | 678 | adev->wb.wb[index] = cpu_to_le32(tmp); |
701 | memset(&ib, 0, sizeof(ib)); | 679 | memset(&ib, 0, sizeof(ib)); |
702 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 680 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
703 | if (r) { | 681 | if (r) { |
704 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 682 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
705 | goto err0; | 683 | goto err0; |
@@ -716,9 +694,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) | |||
716 | ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); | 694 | ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); |
717 | ib.length_dw = 8; | 695 | ib.length_dw = 8; |
718 | 696 | ||
719 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, | 697 | r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
720 | AMDGPU_FENCE_OWNER_UNDEFINED, | 698 | NULL, &f); |
721 | &f); | ||
722 | if (r) | 699 | if (r) |
723 | goto err1; | 700 | goto err1; |
724 | 701 | ||
@@ -797,7 +774,7 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib, | |||
797 | * Update PTEs by writing them manually using sDMA (CIK). | 774 | * Update PTEs by writing them manually using sDMA (CIK). |
798 | */ | 775 | */ |
799 | static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, | 776 | static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, |
800 | uint64_t pe, | 777 | const dma_addr_t *pages_addr, uint64_t pe, |
801 | uint64_t addr, unsigned count, | 778 | uint64_t addr, unsigned count, |
802 | uint32_t incr, uint32_t flags) | 779 | uint32_t incr, uint32_t flags) |
803 | { | 780 | { |
@@ -816,14 +793,7 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, | |||
816 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | 793 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); |
817 | ib->ptr[ib->length_dw++] = ndw; | 794 | ib->ptr[ib->length_dw++] = ndw; |
818 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | 795 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
819 | if (flags & AMDGPU_PTE_SYSTEM) { | 796 | value = amdgpu_vm_map_gart(pages_addr, addr); |
820 | value = amdgpu_vm_map_gart(ib->ring->adev, addr); | ||
821 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
822 | } else if (flags & AMDGPU_PTE_VALID) { | ||
823 | value = addr; | ||
824 | } else { | ||
825 | value = 0; | ||
826 | } | ||
827 | addr += incr; | 797 | addr += incr; |
828 | value |= flags; | 798 | value |= flags; |
829 | ib->ptr[ib->length_dw++] = value; | 799 | ib->ptr[ib->length_dw++] = value; |
@@ -881,14 +851,14 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, | |||
881 | } | 851 | } |
882 | 852 | ||
883 | /** | 853 | /** |
884 | * sdma_v2_4_vm_pad_ib - pad the IB to the required number of dw | 854 | * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw |
885 | * | 855 | * |
886 | * @ib: indirect buffer to fill with padding | 856 | * @ib: indirect buffer to fill with padding |
887 | * | 857 | * |
888 | */ | 858 | */ |
889 | static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) | 859 | static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
890 | { | 860 | { |
891 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); | 861 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
892 | u32 pad_count; | 862 | u32 pad_count; |
893 | int i; | 863 | int i; |
894 | 864 | ||
@@ -1111,6 +1081,8 @@ static void sdma_v2_4_print_status(void *handle) | |||
1111 | i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); | 1081 | i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); |
1112 | dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", | 1082 | dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", |
1113 | i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); | 1083 | i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); |
1084 | dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", | ||
1085 | i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); | ||
1114 | mutex_lock(&adev->srbm_mutex); | 1086 | mutex_lock(&adev->srbm_mutex); |
1115 | for (j = 0; j < 16; j++) { | 1087 | for (j = 0; j < 16; j++) { |
1116 | vi_srbm_select(adev, 0, 0, 0, j); | 1088 | vi_srbm_select(adev, 0, 0, 0, j); |
@@ -1302,12 +1274,12 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { | |||
1302 | .parse_cs = NULL, | 1274 | .parse_cs = NULL, |
1303 | .emit_ib = sdma_v2_4_ring_emit_ib, | 1275 | .emit_ib = sdma_v2_4_ring_emit_ib, |
1304 | .emit_fence = sdma_v2_4_ring_emit_fence, | 1276 | .emit_fence = sdma_v2_4_ring_emit_fence, |
1305 | .emit_semaphore = sdma_v2_4_ring_emit_semaphore, | ||
1306 | .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, | 1277 | .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, |
1307 | .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, | 1278 | .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, |
1308 | .test_ring = sdma_v2_4_ring_test_ring, | 1279 | .test_ring = sdma_v2_4_ring_test_ring, |
1309 | .test_ib = sdma_v2_4_ring_test_ib, | 1280 | .test_ib = sdma_v2_4_ring_test_ib, |
1310 | .insert_nop = sdma_v2_4_ring_insert_nop, | 1281 | .insert_nop = sdma_v2_4_ring_insert_nop, |
1282 | .pad_ib = sdma_v2_4_ring_pad_ib, | ||
1311 | }; | 1283 | }; |
1312 | 1284 | ||
1313 | static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) | 1285 | static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) |
@@ -1405,14 +1377,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { | |||
1405 | .copy_pte = sdma_v2_4_vm_copy_pte, | 1377 | .copy_pte = sdma_v2_4_vm_copy_pte, |
1406 | .write_pte = sdma_v2_4_vm_write_pte, | 1378 | .write_pte = sdma_v2_4_vm_write_pte, |
1407 | .set_pte_pde = sdma_v2_4_vm_set_pte_pde, | 1379 | .set_pte_pde = sdma_v2_4_vm_set_pte_pde, |
1408 | .pad_ib = sdma_v2_4_vm_pad_ib, | ||
1409 | }; | 1380 | }; |
1410 | 1381 | ||
1411 | static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) | 1382 | static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) |
1412 | { | 1383 | { |
1384 | unsigned i; | ||
1385 | |||
1413 | if (adev->vm_manager.vm_pte_funcs == NULL) { | 1386 | if (adev->vm_manager.vm_pte_funcs == NULL) { |
1414 | adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; | 1387 | adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; |
1415 | adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; | 1388 | for (i = 0; i < adev->sdma.num_instances; i++) |
1416 | adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; | 1389 | adev->vm_manager.vm_pte_rings[i] = |
1390 | &adev->sdma.instance[i].ring; | ||
1391 | |||
1392 | adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; | ||
1417 | } | 1393 | } |
1418 | } | 1394 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index ad54c46751b0..6f064d7076e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
@@ -444,32 +444,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se | |||
444 | amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); | 444 | amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); |
445 | } | 445 | } |
446 | 446 | ||
447 | |||
448 | /** | ||
449 | * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring | ||
450 | * | ||
451 | * @ring: amdgpu_ring structure holding ring information | ||
452 | * @semaphore: amdgpu semaphore object | ||
453 | * @emit_wait: wait or signal semaphore | ||
454 | * | ||
455 | * Add a DMA semaphore packet to the ring wait on or signal | ||
456 | * other rings (VI). | ||
457 | */ | ||
458 | static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
459 | struct amdgpu_semaphore *semaphore, | ||
460 | bool emit_wait) | ||
461 | { | ||
462 | u64 addr = semaphore->gpu_addr; | ||
463 | u32 sig = emit_wait ? 0 : 1; | ||
464 | |||
465 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) | | ||
466 | SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig)); | ||
467 | amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8); | ||
468 | amdgpu_ring_write(ring, upper_32_bits(addr)); | ||
469 | |||
470 | return true; | ||
471 | } | ||
472 | |||
473 | /** | 447 | /** |
474 | * sdma_v3_0_gfx_stop - stop the gfx async dma engines | 448 | * sdma_v3_0_gfx_stop - stop the gfx async dma engines |
475 | * | 449 | * |
@@ -596,6 +570,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
596 | vi_srbm_select(adev, 0, 0, 0, 0); | 570 | vi_srbm_select(adev, 0, 0, 0, 0); |
597 | mutex_unlock(&adev->srbm_mutex); | 571 | mutex_unlock(&adev->srbm_mutex); |
598 | 572 | ||
573 | WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], | ||
574 | adev->gfx.config.gb_addr_config & 0x70); | ||
575 | |||
599 | WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); | 576 | WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); |
600 | 577 | ||
601 | /* Set ring buffer size in dwords */ | 578 | /* Set ring buffer size in dwords */ |
@@ -788,7 +765,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) | |||
788 | tmp = 0xCAFEDEAD; | 765 | tmp = 0xCAFEDEAD; |
789 | adev->wb.wb[index] = cpu_to_le32(tmp); | 766 | adev->wb.wb[index] = cpu_to_le32(tmp); |
790 | 767 | ||
791 | r = amdgpu_ring_lock(ring, 5); | 768 | r = amdgpu_ring_alloc(ring, 5); |
792 | if (r) { | 769 | if (r) { |
793 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 770 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
794 | amdgpu_wb_free(adev, index); | 771 | amdgpu_wb_free(adev, index); |
@@ -801,7 +778,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) | |||
801 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); | 778 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); |
802 | amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); | 779 | amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); |
803 | amdgpu_ring_write(ring, 0xDEADBEEF); | 780 | amdgpu_ring_write(ring, 0xDEADBEEF); |
804 | amdgpu_ring_unlock_commit(ring); | 781 | amdgpu_ring_commit(ring); |
805 | 782 | ||
806 | for (i = 0; i < adev->usec_timeout; i++) { | 783 | for (i = 0; i < adev->usec_timeout; i++) { |
807 | tmp = le32_to_cpu(adev->wb.wb[index]); | 784 | tmp = le32_to_cpu(adev->wb.wb[index]); |
@@ -851,7 +828,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) | |||
851 | tmp = 0xCAFEDEAD; | 828 | tmp = 0xCAFEDEAD; |
852 | adev->wb.wb[index] = cpu_to_le32(tmp); | 829 | adev->wb.wb[index] = cpu_to_le32(tmp); |
853 | memset(&ib, 0, sizeof(ib)); | 830 | memset(&ib, 0, sizeof(ib)); |
854 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 831 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
855 | if (r) { | 832 | if (r) { |
856 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 833 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
857 | goto err0; | 834 | goto err0; |
@@ -868,9 +845,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) | |||
868 | ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); | 845 | ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); |
869 | ib.length_dw = 8; | 846 | ib.length_dw = 8; |
870 | 847 | ||
871 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, | 848 | r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, |
872 | AMDGPU_FENCE_OWNER_UNDEFINED, | 849 | NULL, &f); |
873 | &f); | ||
874 | if (r) | 850 | if (r) |
875 | goto err1; | 851 | goto err1; |
876 | 852 | ||
@@ -948,7 +924,7 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, | |||
948 | * Update PTEs by writing them manually using sDMA (CIK). | 924 | * Update PTEs by writing them manually using sDMA (CIK). |
949 | */ | 925 | */ |
950 | static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, | 926 | static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, |
951 | uint64_t pe, | 927 | const dma_addr_t *pages_addr, uint64_t pe, |
952 | uint64_t addr, unsigned count, | 928 | uint64_t addr, unsigned count, |
953 | uint32_t incr, uint32_t flags) | 929 | uint32_t incr, uint32_t flags) |
954 | { | 930 | { |
@@ -967,14 +943,7 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, | |||
967 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); | 943 | ib->ptr[ib->length_dw++] = upper_32_bits(pe); |
968 | ib->ptr[ib->length_dw++] = ndw; | 944 | ib->ptr[ib->length_dw++] = ndw; |
969 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | 945 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
970 | if (flags & AMDGPU_PTE_SYSTEM) { | 946 | value = amdgpu_vm_map_gart(pages_addr, addr); |
971 | value = amdgpu_vm_map_gart(ib->ring->adev, addr); | ||
972 | value &= 0xFFFFFFFFFFFFF000ULL; | ||
973 | } else if (flags & AMDGPU_PTE_VALID) { | ||
974 | value = addr; | ||
975 | } else { | ||
976 | value = 0; | ||
977 | } | ||
978 | addr += incr; | 947 | addr += incr; |
979 | value |= flags; | 948 | value |= flags; |
980 | ib->ptr[ib->length_dw++] = value; | 949 | ib->ptr[ib->length_dw++] = value; |
@@ -1032,14 +1001,14 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, | |||
1032 | } | 1001 | } |
1033 | 1002 | ||
1034 | /** | 1003 | /** |
1035 | * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw | 1004 | * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw |
1036 | * | 1005 | * |
1037 | * @ib: indirect buffer to fill with padding | 1006 | * @ib: indirect buffer to fill with padding |
1038 | * | 1007 | * |
1039 | */ | 1008 | */ |
1040 | static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) | 1009 | static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
1041 | { | 1010 | { |
1042 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); | 1011 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
1043 | u32 pad_count; | 1012 | u32 pad_count; |
1044 | int i; | 1013 | int i; |
1045 | 1014 | ||
@@ -1275,6 +1244,8 @@ static void sdma_v3_0_print_status(void *handle) | |||
1275 | i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); | 1244 | i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); |
1276 | dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", | 1245 | dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", |
1277 | i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); | 1246 | i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); |
1247 | dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", | ||
1248 | i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); | ||
1278 | mutex_lock(&adev->srbm_mutex); | 1249 | mutex_lock(&adev->srbm_mutex); |
1279 | for (j = 0; j < 16; j++) { | 1250 | for (j = 0; j < 16; j++) { |
1280 | vi_srbm_select(adev, 0, 0, 0, j); | 1251 | vi_srbm_select(adev, 0, 0, 0, j); |
@@ -1570,12 +1541,12 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { | |||
1570 | .parse_cs = NULL, | 1541 | .parse_cs = NULL, |
1571 | .emit_ib = sdma_v3_0_ring_emit_ib, | 1542 | .emit_ib = sdma_v3_0_ring_emit_ib, |
1572 | .emit_fence = sdma_v3_0_ring_emit_fence, | 1543 | .emit_fence = sdma_v3_0_ring_emit_fence, |
1573 | .emit_semaphore = sdma_v3_0_ring_emit_semaphore, | ||
1574 | .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, | 1544 | .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, |
1575 | .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, | 1545 | .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, |
1576 | .test_ring = sdma_v3_0_ring_test_ring, | 1546 | .test_ring = sdma_v3_0_ring_test_ring, |
1577 | .test_ib = sdma_v3_0_ring_test_ib, | 1547 | .test_ib = sdma_v3_0_ring_test_ib, |
1578 | .insert_nop = sdma_v3_0_ring_insert_nop, | 1548 | .insert_nop = sdma_v3_0_ring_insert_nop, |
1549 | .pad_ib = sdma_v3_0_ring_pad_ib, | ||
1579 | }; | 1550 | }; |
1580 | 1551 | ||
1581 | static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) | 1552 | static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) |
@@ -1673,14 +1644,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { | |||
1673 | .copy_pte = sdma_v3_0_vm_copy_pte, | 1644 | .copy_pte = sdma_v3_0_vm_copy_pte, |
1674 | .write_pte = sdma_v3_0_vm_write_pte, | 1645 | .write_pte = sdma_v3_0_vm_write_pte, |
1675 | .set_pte_pde = sdma_v3_0_vm_set_pte_pde, | 1646 | .set_pte_pde = sdma_v3_0_vm_set_pte_pde, |
1676 | .pad_ib = sdma_v3_0_vm_pad_ib, | ||
1677 | }; | 1647 | }; |
1678 | 1648 | ||
1679 | static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) | 1649 | static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) |
1680 | { | 1650 | { |
1651 | unsigned i; | ||
1652 | |||
1681 | if (adev->vm_manager.vm_pte_funcs == NULL) { | 1653 | if (adev->vm_manager.vm_pte_funcs == NULL) { |
1682 | adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; | 1654 | adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; |
1683 | adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; | 1655 | for (i = 0; i < adev->sdma.num_instances; i++) |
1684 | adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; | 1656 | adev->vm_manager.vm_pte_rings[i] = |
1657 | &adev->sdma.instance[i].ring; | ||
1658 | |||
1659 | adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; | ||
1685 | } | 1660 | } |
1686 | } | 1661 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c index 361c49a82323..083893dd68c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c | |||
@@ -272,6 +272,12 @@ static int tonga_smu_upload_firmware_image(struct amdgpu_device *adev) | |||
272 | if (!adev->pm.fw) | 272 | if (!adev->pm.fw) |
273 | return -EINVAL; | 273 | return -EINVAL; |
274 | 274 | ||
275 | /* Skip SMC ucode loading on SR-IOV capable boards. | ||
276 | * vbios does this for us in asic_init in that case. | ||
277 | */ | ||
278 | if (adev->virtualization.supports_sr_iov) | ||
279 | return 0; | ||
280 | |||
275 | hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; | 281 | hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; |
276 | amdgpu_ucode_print_smc_hdr(&hdr->header); | 282 | amdgpu_ucode_print_smc_hdr(&hdr->header); |
277 | 283 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 5e9f73af83a8..70ed73fa5156 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | |||
@@ -164,7 +164,7 @@ static int uvd_v4_2_hw_init(void *handle) | |||
164 | goto done; | 164 | goto done; |
165 | } | 165 | } |
166 | 166 | ||
167 | r = amdgpu_ring_lock(ring, 10); | 167 | r = amdgpu_ring_alloc(ring, 10); |
168 | if (r) { | 168 | if (r) { |
169 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); | 169 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); |
170 | goto done; | 170 | goto done; |
@@ -189,7 +189,7 @@ static int uvd_v4_2_hw_init(void *handle) | |||
189 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); | 189 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); |
190 | amdgpu_ring_write(ring, 3); | 190 | amdgpu_ring_write(ring, 3); |
191 | 191 | ||
192 | amdgpu_ring_unlock_commit(ring); | 192 | amdgpu_ring_commit(ring); |
193 | 193 | ||
194 | done: | 194 | done: |
195 | /* lower clocks again */ | 195 | /* lower clocks again */ |
@@ -439,33 +439,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq | |||
439 | } | 439 | } |
440 | 440 | ||
441 | /** | 441 | /** |
442 | * uvd_v4_2_ring_emit_semaphore - emit semaphore command | ||
443 | * | ||
444 | * @ring: amdgpu_ring pointer | ||
445 | * @semaphore: semaphore to emit commands for | ||
446 | * @emit_wait: true if we should emit a wait command | ||
447 | * | ||
448 | * Emit a semaphore command (either wait or signal) to the UVD ring. | ||
449 | */ | ||
450 | static bool uvd_v4_2_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
451 | struct amdgpu_semaphore *semaphore, | ||
452 | bool emit_wait) | ||
453 | { | ||
454 | uint64_t addr = semaphore->gpu_addr; | ||
455 | |||
456 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0)); | ||
457 | amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); | ||
458 | |||
459 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0)); | ||
460 | amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); | ||
461 | |||
462 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0)); | ||
463 | amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); | ||
464 | |||
465 | return true; | ||
466 | } | ||
467 | |||
468 | /** | ||
469 | * uvd_v4_2_ring_test_ring - register write test | 442 | * uvd_v4_2_ring_test_ring - register write test |
470 | * | 443 | * |
471 | * @ring: amdgpu_ring pointer | 444 | * @ring: amdgpu_ring pointer |
@@ -480,7 +453,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) | |||
480 | int r; | 453 | int r; |
481 | 454 | ||
482 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 455 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
483 | r = amdgpu_ring_lock(ring, 3); | 456 | r = amdgpu_ring_alloc(ring, 3); |
484 | if (r) { | 457 | if (r) { |
485 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 458 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", |
486 | ring->idx, r); | 459 | ring->idx, r); |
@@ -488,7 +461,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) | |||
488 | } | 461 | } |
489 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); | 462 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); |
490 | amdgpu_ring_write(ring, 0xDEADBEEF); | 463 | amdgpu_ring_write(ring, 0xDEADBEEF); |
491 | amdgpu_ring_unlock_commit(ring); | 464 | amdgpu_ring_commit(ring); |
492 | for (i = 0; i < adev->usec_timeout; i++) { | 465 | for (i = 0; i < adev->usec_timeout; i++) { |
493 | tmp = RREG32(mmUVD_CONTEXT_ID); | 466 | tmp = RREG32(mmUVD_CONTEXT_ID); |
494 | if (tmp == 0xDEADBEEF) | 467 | if (tmp == 0xDEADBEEF) |
@@ -549,7 +522,7 @@ static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring) | |||
549 | goto error; | 522 | goto error; |
550 | } | 523 | } |
551 | 524 | ||
552 | r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); | 525 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); |
553 | if (r) { | 526 | if (r) { |
554 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); | 527 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); |
555 | goto error; | 528 | goto error; |
@@ -603,6 +576,10 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) | |||
603 | addr = (adev->uvd.gpu_addr >> 32) & 0xFF; | 576 | addr = (adev->uvd.gpu_addr >> 32) & 0xFF; |
604 | WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); | 577 | WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); |
605 | 578 | ||
579 | WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
580 | WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
581 | WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
582 | |||
606 | uvd_v4_2_init_cg(adev); | 583 | uvd_v4_2_init_cg(adev); |
607 | } | 584 | } |
608 | 585 | ||
@@ -804,6 +781,13 @@ static void uvd_v4_2_print_status(void *handle) | |||
804 | RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); | 781 | RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); |
805 | dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", | 782 | dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", |
806 | RREG32(mmUVD_CONTEXT_ID)); | 783 | RREG32(mmUVD_CONTEXT_ID)); |
784 | dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", | ||
785 | RREG32(mmUVD_UDEC_ADDR_CONFIG)); | ||
786 | dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", | ||
787 | RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); | ||
788 | dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", | ||
789 | RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); | ||
790 | |||
807 | } | 791 | } |
808 | 792 | ||
809 | static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev, | 793 | static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev, |
@@ -882,10 +866,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { | |||
882 | .parse_cs = amdgpu_uvd_ring_parse_cs, | 866 | .parse_cs = amdgpu_uvd_ring_parse_cs, |
883 | .emit_ib = uvd_v4_2_ring_emit_ib, | 867 | .emit_ib = uvd_v4_2_ring_emit_ib, |
884 | .emit_fence = uvd_v4_2_ring_emit_fence, | 868 | .emit_fence = uvd_v4_2_ring_emit_fence, |
885 | .emit_semaphore = uvd_v4_2_ring_emit_semaphore, | ||
886 | .test_ring = uvd_v4_2_ring_test_ring, | 869 | .test_ring = uvd_v4_2_ring_test_ring, |
887 | .test_ib = uvd_v4_2_ring_test_ib, | 870 | .test_ib = uvd_v4_2_ring_test_ib, |
888 | .insert_nop = amdgpu_ring_insert_nop, | 871 | .insert_nop = amdgpu_ring_insert_nop, |
872 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
889 | }; | 873 | }; |
890 | 874 | ||
891 | static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) | 875 | static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 38864f562981..578ffb62fdb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | |||
@@ -160,7 +160,7 @@ static int uvd_v5_0_hw_init(void *handle) | |||
160 | goto done; | 160 | goto done; |
161 | } | 161 | } |
162 | 162 | ||
163 | r = amdgpu_ring_lock(ring, 10); | 163 | r = amdgpu_ring_alloc(ring, 10); |
164 | if (r) { | 164 | if (r) { |
165 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); | 165 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); |
166 | goto done; | 166 | goto done; |
@@ -185,7 +185,7 @@ static int uvd_v5_0_hw_init(void *handle) | |||
185 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); | 185 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); |
186 | amdgpu_ring_write(ring, 3); | 186 | amdgpu_ring_write(ring, 3); |
187 | 187 | ||
188 | amdgpu_ring_unlock_commit(ring); | 188 | amdgpu_ring_commit(ring); |
189 | 189 | ||
190 | done: | 190 | done: |
191 | /* lower clocks again */ | 191 | /* lower clocks again */ |
@@ -279,6 +279,10 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) | |||
279 | size = AMDGPU_UVD_HEAP_SIZE; | 279 | size = AMDGPU_UVD_HEAP_SIZE; |
280 | WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); | 280 | WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); |
281 | WREG32(mmUVD_VCPU_CACHE_SIZE2, size); | 281 | WREG32(mmUVD_VCPU_CACHE_SIZE2, size); |
282 | |||
283 | WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
284 | WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
285 | WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
282 | } | 286 | } |
283 | 287 | ||
284 | /** | 288 | /** |
@@ -483,33 +487,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq | |||
483 | } | 487 | } |
484 | 488 | ||
485 | /** | 489 | /** |
486 | * uvd_v5_0_ring_emit_semaphore - emit semaphore command | ||
487 | * | ||
488 | * @ring: amdgpu_ring pointer | ||
489 | * @semaphore: semaphore to emit commands for | ||
490 | * @emit_wait: true if we should emit a wait command | ||
491 | * | ||
492 | * Emit a semaphore command (either wait or signal) to the UVD ring. | ||
493 | */ | ||
494 | static bool uvd_v5_0_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
495 | struct amdgpu_semaphore *semaphore, | ||
496 | bool emit_wait) | ||
497 | { | ||
498 | uint64_t addr = semaphore->gpu_addr; | ||
499 | |||
500 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0)); | ||
501 | amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); | ||
502 | |||
503 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0)); | ||
504 | amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); | ||
505 | |||
506 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0)); | ||
507 | amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); | ||
508 | |||
509 | return true; | ||
510 | } | ||
511 | |||
512 | /** | ||
513 | * uvd_v5_0_ring_test_ring - register write test | 490 | * uvd_v5_0_ring_test_ring - register write test |
514 | * | 491 | * |
515 | * @ring: amdgpu_ring pointer | 492 | * @ring: amdgpu_ring pointer |
@@ -524,7 +501,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) | |||
524 | int r; | 501 | int r; |
525 | 502 | ||
526 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 503 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
527 | r = amdgpu_ring_lock(ring, 3); | 504 | r = amdgpu_ring_alloc(ring, 3); |
528 | if (r) { | 505 | if (r) { |
529 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 506 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", |
530 | ring->idx, r); | 507 | ring->idx, r); |
@@ -532,7 +509,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) | |||
532 | } | 509 | } |
533 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); | 510 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); |
534 | amdgpu_ring_write(ring, 0xDEADBEEF); | 511 | amdgpu_ring_write(ring, 0xDEADBEEF); |
535 | amdgpu_ring_unlock_commit(ring); | 512 | amdgpu_ring_commit(ring); |
536 | for (i = 0; i < adev->usec_timeout; i++) { | 513 | for (i = 0; i < adev->usec_timeout; i++) { |
537 | tmp = RREG32(mmUVD_CONTEXT_ID); | 514 | tmp = RREG32(mmUVD_CONTEXT_ID); |
538 | if (tmp == 0xDEADBEEF) | 515 | if (tmp == 0xDEADBEEF) |
@@ -595,7 +572,7 @@ static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring) | |||
595 | goto error; | 572 | goto error; |
596 | } | 573 | } |
597 | 574 | ||
598 | r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); | 575 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); |
599 | if (r) { | 576 | if (r) { |
600 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); | 577 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); |
601 | goto error; | 578 | goto error; |
@@ -751,6 +728,12 @@ static void uvd_v5_0_print_status(void *handle) | |||
751 | RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); | 728 | RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); |
752 | dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", | 729 | dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", |
753 | RREG32(mmUVD_CONTEXT_ID)); | 730 | RREG32(mmUVD_CONTEXT_ID)); |
731 | dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", | ||
732 | RREG32(mmUVD_UDEC_ADDR_CONFIG)); | ||
733 | dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", | ||
734 | RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); | ||
735 | dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", | ||
736 | RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); | ||
754 | } | 737 | } |
755 | 738 | ||
756 | static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev, | 739 | static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev, |
@@ -821,10 +804,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { | |||
821 | .parse_cs = amdgpu_uvd_ring_parse_cs, | 804 | .parse_cs = amdgpu_uvd_ring_parse_cs, |
822 | .emit_ib = uvd_v5_0_ring_emit_ib, | 805 | .emit_ib = uvd_v5_0_ring_emit_ib, |
823 | .emit_fence = uvd_v5_0_ring_emit_fence, | 806 | .emit_fence = uvd_v5_0_ring_emit_fence, |
824 | .emit_semaphore = uvd_v5_0_ring_emit_semaphore, | ||
825 | .test_ring = uvd_v5_0_ring_test_ring, | 807 | .test_ring = uvd_v5_0_ring_test_ring, |
826 | .test_ib = uvd_v5_0_ring_test_ib, | 808 | .test_ib = uvd_v5_0_ring_test_ib, |
827 | .insert_nop = amdgpu_ring_insert_nop, | 809 | .insert_nop = amdgpu_ring_insert_nop, |
810 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
828 | }; | 811 | }; |
829 | 812 | ||
830 | static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) | 813 | static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 3d5913926436..d4da1f04378c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | |||
@@ -157,7 +157,7 @@ static int uvd_v6_0_hw_init(void *handle) | |||
157 | goto done; | 157 | goto done; |
158 | } | 158 | } |
159 | 159 | ||
160 | r = amdgpu_ring_lock(ring, 10); | 160 | r = amdgpu_ring_alloc(ring, 10); |
161 | if (r) { | 161 | if (r) { |
162 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); | 162 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); |
163 | goto done; | 163 | goto done; |
@@ -182,7 +182,7 @@ static int uvd_v6_0_hw_init(void *handle) | |||
182 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); | 182 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); |
183 | amdgpu_ring_write(ring, 3); | 183 | amdgpu_ring_write(ring, 3); |
184 | 184 | ||
185 | amdgpu_ring_unlock_commit(ring); | 185 | amdgpu_ring_commit(ring); |
186 | 186 | ||
187 | done: | 187 | done: |
188 | if (!r) | 188 | if (!r) |
@@ -277,6 +277,10 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) | |||
277 | size = AMDGPU_UVD_HEAP_SIZE; | 277 | size = AMDGPU_UVD_HEAP_SIZE; |
278 | WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); | 278 | WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); |
279 | WREG32(mmUVD_VCPU_CACHE_SIZE2, size); | 279 | WREG32(mmUVD_VCPU_CACHE_SIZE2, size); |
280 | |||
281 | WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
282 | WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
283 | WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | ||
280 | } | 284 | } |
281 | 285 | ||
282 | static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, | 286 | static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, |
@@ -722,33 +726,6 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq | |||
722 | } | 726 | } |
723 | 727 | ||
724 | /** | 728 | /** |
725 | * uvd_v6_0_ring_emit_semaphore - emit semaphore command | ||
726 | * | ||
727 | * @ring: amdgpu_ring pointer | ||
728 | * @semaphore: semaphore to emit commands for | ||
729 | * @emit_wait: true if we should emit a wait command | ||
730 | * | ||
731 | * Emit a semaphore command (either wait or signal) to the UVD ring. | ||
732 | */ | ||
733 | static bool uvd_v6_0_ring_emit_semaphore(struct amdgpu_ring *ring, | ||
734 | struct amdgpu_semaphore *semaphore, | ||
735 | bool emit_wait) | ||
736 | { | ||
737 | uint64_t addr = semaphore->gpu_addr; | ||
738 | |||
739 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0)); | ||
740 | amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); | ||
741 | |||
742 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0)); | ||
743 | amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); | ||
744 | |||
745 | amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0)); | ||
746 | amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); | ||
747 | |||
748 | return true; | ||
749 | } | ||
750 | |||
751 | /** | ||
752 | * uvd_v6_0_ring_test_ring - register write test | 729 | * uvd_v6_0_ring_test_ring - register write test |
753 | * | 730 | * |
754 | * @ring: amdgpu_ring pointer | 731 | * @ring: amdgpu_ring pointer |
@@ -763,7 +740,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
763 | int r; | 740 | int r; |
764 | 741 | ||
765 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 742 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
766 | r = amdgpu_ring_lock(ring, 3); | 743 | r = amdgpu_ring_alloc(ring, 3); |
767 | if (r) { | 744 | if (r) { |
768 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 745 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", |
769 | ring->idx, r); | 746 | ring->idx, r); |
@@ -771,7 +748,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
771 | } | 748 | } |
772 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); | 749 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); |
773 | amdgpu_ring_write(ring, 0xDEADBEEF); | 750 | amdgpu_ring_write(ring, 0xDEADBEEF); |
774 | amdgpu_ring_unlock_commit(ring); | 751 | amdgpu_ring_commit(ring); |
775 | for (i = 0; i < adev->usec_timeout; i++) { | 752 | for (i = 0; i < adev->usec_timeout; i++) { |
776 | tmp = RREG32(mmUVD_CONTEXT_ID); | 753 | tmp = RREG32(mmUVD_CONTEXT_ID); |
777 | if (tmp == 0xDEADBEEF) | 754 | if (tmp == 0xDEADBEEF) |
@@ -827,7 +804,7 @@ static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring) | |||
827 | goto error; | 804 | goto error; |
828 | } | 805 | } |
829 | 806 | ||
830 | r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); | 807 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); |
831 | if (r) { | 808 | if (r) { |
832 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); | 809 | DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); |
833 | goto error; | 810 | goto error; |
@@ -974,6 +951,12 @@ static void uvd_v6_0_print_status(void *handle) | |||
974 | RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); | 951 | RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); |
975 | dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", | 952 | dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", |
976 | RREG32(mmUVD_CONTEXT_ID)); | 953 | RREG32(mmUVD_CONTEXT_ID)); |
954 | dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", | ||
955 | RREG32(mmUVD_UDEC_ADDR_CONFIG)); | ||
956 | dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", | ||
957 | RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); | ||
958 | dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", | ||
959 | RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); | ||
977 | } | 960 | } |
978 | 961 | ||
979 | static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev, | 962 | static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev, |
@@ -1062,10 +1045,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = { | |||
1062 | .parse_cs = amdgpu_uvd_ring_parse_cs, | 1045 | .parse_cs = amdgpu_uvd_ring_parse_cs, |
1063 | .emit_ib = uvd_v6_0_ring_emit_ib, | 1046 | .emit_ib = uvd_v6_0_ring_emit_ib, |
1064 | .emit_fence = uvd_v6_0_ring_emit_fence, | 1047 | .emit_fence = uvd_v6_0_ring_emit_fence, |
1065 | .emit_semaphore = uvd_v6_0_ring_emit_semaphore, | ||
1066 | .test_ring = uvd_v6_0_ring_test_ring, | 1048 | .test_ring = uvd_v6_0_ring_test_ring, |
1067 | .test_ib = uvd_v6_0_ring_test_ib, | 1049 | .test_ib = uvd_v6_0_ring_test_ib, |
1068 | .insert_nop = amdgpu_ring_insert_nop, | 1050 | .insert_nop = amdgpu_ring_insert_nop, |
1051 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
1069 | }; | 1052 | }; |
1070 | 1053 | ||
1071 | static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) | 1054 | static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 52ac7a8f1e58..9c804f436974 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | |||
@@ -639,10 +639,10 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { | |||
639 | .parse_cs = amdgpu_vce_ring_parse_cs, | 639 | .parse_cs = amdgpu_vce_ring_parse_cs, |
640 | .emit_ib = amdgpu_vce_ring_emit_ib, | 640 | .emit_ib = amdgpu_vce_ring_emit_ib, |
641 | .emit_fence = amdgpu_vce_ring_emit_fence, | 641 | .emit_fence = amdgpu_vce_ring_emit_fence, |
642 | .emit_semaphore = amdgpu_vce_ring_emit_semaphore, | ||
643 | .test_ring = amdgpu_vce_ring_test_ring, | 642 | .test_ring = amdgpu_vce_ring_test_ring, |
644 | .test_ib = amdgpu_vce_ring_test_ib, | 643 | .test_ib = amdgpu_vce_ring_test_ib, |
645 | .insert_nop = amdgpu_ring_insert_nop, | 644 | .insert_nop = amdgpu_ring_insert_nop, |
645 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
646 | }; | 646 | }; |
647 | 647 | ||
648 | static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) | 648 | static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index e99af81e4aec..8f8d479061f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | |||
@@ -759,10 +759,10 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = { | |||
759 | .parse_cs = amdgpu_vce_ring_parse_cs, | 759 | .parse_cs = amdgpu_vce_ring_parse_cs, |
760 | .emit_ib = amdgpu_vce_ring_emit_ib, | 760 | .emit_ib = amdgpu_vce_ring_emit_ib, |
761 | .emit_fence = amdgpu_vce_ring_emit_fence, | 761 | .emit_fence = amdgpu_vce_ring_emit_fence, |
762 | .emit_semaphore = amdgpu_vce_ring_emit_semaphore, | ||
763 | .test_ring = amdgpu_vce_ring_test_ring, | 762 | .test_ring = amdgpu_vce_ring_test_ring, |
764 | .test_ib = amdgpu_vce_ring_test_ib, | 763 | .test_ib = amdgpu_vce_ring_test_ib, |
765 | .insert_nop = amdgpu_ring_insert_nop, | 764 | .insert_nop = amdgpu_ring_insert_nop, |
765 | .pad_ib = amdgpu_ring_generic_pad_ib, | ||
766 | }; | 766 | }; |
767 | 767 | ||
768 | static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) | 768 | static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 89f5a1ff6f43..125003517544 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c | |||
@@ -74,6 +74,9 @@ | |||
74 | #include "uvd_v6_0.h" | 74 | #include "uvd_v6_0.h" |
75 | #include "vce_v3_0.h" | 75 | #include "vce_v3_0.h" |
76 | #include "amdgpu_powerplay.h" | 76 | #include "amdgpu_powerplay.h" |
77 | #if defined(CONFIG_DRM_AMD_ACP) | ||
78 | #include "amdgpu_acp.h" | ||
79 | #endif | ||
77 | 80 | ||
78 | /* | 81 | /* |
79 | * Indirect registers accessor | 82 | * Indirect registers accessor |
@@ -571,374 +574,12 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, | |||
571 | return -EINVAL; | 574 | return -EINVAL; |
572 | } | 575 | } |
573 | 576 | ||
574 | static void vi_print_gpu_status_regs(struct amdgpu_device *adev) | ||
575 | { | ||
576 | dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", | ||
577 | RREG32(mmGRBM_STATUS)); | ||
578 | dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", | ||
579 | RREG32(mmGRBM_STATUS2)); | ||
580 | dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", | ||
581 | RREG32(mmGRBM_STATUS_SE0)); | ||
582 | dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", | ||
583 | RREG32(mmGRBM_STATUS_SE1)); | ||
584 | dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", | ||
585 | RREG32(mmGRBM_STATUS_SE2)); | ||
586 | dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", | ||
587 | RREG32(mmGRBM_STATUS_SE3)); | ||
588 | dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", | ||
589 | RREG32(mmSRBM_STATUS)); | ||
590 | dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", | ||
591 | RREG32(mmSRBM_STATUS2)); | ||
592 | dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", | ||
593 | RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); | ||
594 | if (adev->sdma.num_instances > 1) { | ||
595 | dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", | ||
596 | RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); | ||
597 | } | ||
598 | dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); | ||
599 | dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", | ||
600 | RREG32(mmCP_STALLED_STAT1)); | ||
601 | dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", | ||
602 | RREG32(mmCP_STALLED_STAT2)); | ||
603 | dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", | ||
604 | RREG32(mmCP_STALLED_STAT3)); | ||
605 | dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", | ||
606 | RREG32(mmCP_CPF_BUSY_STAT)); | ||
607 | dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", | ||
608 | RREG32(mmCP_CPF_STALLED_STAT1)); | ||
609 | dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); | ||
610 | dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); | ||
611 | dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", | ||
612 | RREG32(mmCP_CPC_STALLED_STAT1)); | ||
613 | dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); | ||
614 | } | ||
615 | |||
616 | /** | ||
617 | * vi_gpu_check_soft_reset - check which blocks are busy | ||
618 | * | ||
619 | * @adev: amdgpu_device pointer | ||
620 | * | ||
621 | * Check which blocks are busy and return the relevant reset | ||
622 | * mask to be used by vi_gpu_soft_reset(). | ||
623 | * Returns a mask of the blocks to be reset. | ||
624 | */ | ||
625 | u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev) | ||
626 | { | ||
627 | u32 reset_mask = 0; | ||
628 | u32 tmp; | ||
629 | |||
630 | /* GRBM_STATUS */ | ||
631 | tmp = RREG32(mmGRBM_STATUS); | ||
632 | if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | | ||
633 | GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | | ||
634 | GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | | ||
635 | GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | | ||
636 | GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | | ||
637 | GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) | ||
638 | reset_mask |= AMDGPU_RESET_GFX; | ||
639 | |||
640 | if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) | ||
641 | reset_mask |= AMDGPU_RESET_CP; | ||
642 | |||
643 | /* GRBM_STATUS2 */ | ||
644 | tmp = RREG32(mmGRBM_STATUS2); | ||
645 | if (tmp & GRBM_STATUS2__RLC_BUSY_MASK) | ||
646 | reset_mask |= AMDGPU_RESET_RLC; | ||
647 | |||
648 | if (tmp & (GRBM_STATUS2__CPF_BUSY_MASK | | ||
649 | GRBM_STATUS2__CPC_BUSY_MASK | | ||
650 | GRBM_STATUS2__CPG_BUSY_MASK)) | ||
651 | reset_mask |= AMDGPU_RESET_CP; | ||
652 | |||
653 | /* SRBM_STATUS2 */ | ||
654 | tmp = RREG32(mmSRBM_STATUS2); | ||
655 | if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) | ||
656 | reset_mask |= AMDGPU_RESET_DMA; | ||
657 | |||
658 | if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) | ||
659 | reset_mask |= AMDGPU_RESET_DMA1; | ||
660 | |||
661 | /* SRBM_STATUS */ | ||
662 | tmp = RREG32(mmSRBM_STATUS); | ||
663 | |||
664 | if (tmp & SRBM_STATUS__IH_BUSY_MASK) | ||
665 | reset_mask |= AMDGPU_RESET_IH; | ||
666 | |||
667 | if (tmp & SRBM_STATUS__SEM_BUSY_MASK) | ||
668 | reset_mask |= AMDGPU_RESET_SEM; | ||
669 | |||
670 | if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK) | ||
671 | reset_mask |= AMDGPU_RESET_GRBM; | ||
672 | |||
673 | if (adev->asic_type != CHIP_TOPAZ) { | ||
674 | if (tmp & (SRBM_STATUS__UVD_RQ_PENDING_MASK | | ||
675 | SRBM_STATUS__UVD_BUSY_MASK)) | ||
676 | reset_mask |= AMDGPU_RESET_UVD; | ||
677 | } | ||
678 | |||
679 | if (tmp & SRBM_STATUS__VMC_BUSY_MASK) | ||
680 | reset_mask |= AMDGPU_RESET_VMC; | ||
681 | |||
682 | if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | | ||
683 | SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) | ||
684 | reset_mask |= AMDGPU_RESET_MC; | ||
685 | |||
686 | /* SDMA0_STATUS_REG */ | ||
687 | tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); | ||
688 | if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) | ||
689 | reset_mask |= AMDGPU_RESET_DMA; | ||
690 | |||
691 | /* SDMA1_STATUS_REG */ | ||
692 | if (adev->sdma.num_instances > 1) { | ||
693 | tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); | ||
694 | if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) | ||
695 | reset_mask |= AMDGPU_RESET_DMA1; | ||
696 | } | ||
697 | #if 0 | ||
698 | /* VCE_STATUS */ | ||
699 | if (adev->asic_type != CHIP_TOPAZ) { | ||
700 | tmp = RREG32(mmVCE_STATUS); | ||
701 | if (tmp & VCE_STATUS__VCPU_REPORT_RB0_BUSY_MASK) | ||
702 | reset_mask |= AMDGPU_RESET_VCE; | ||
703 | if (tmp & VCE_STATUS__VCPU_REPORT_RB1_BUSY_MASK) | ||
704 | reset_mask |= AMDGPU_RESET_VCE1; | ||
705 | |||
706 | } | ||
707 | |||
708 | if (adev->asic_type != CHIP_TOPAZ) { | ||
709 | if (amdgpu_display_is_display_hung(adev)) | ||
710 | reset_mask |= AMDGPU_RESET_DISPLAY; | ||
711 | } | ||
712 | #endif | ||
713 | |||
714 | /* Skip MC reset as it's mostly likely not hung, just busy */ | ||
715 | if (reset_mask & AMDGPU_RESET_MC) { | ||
716 | DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); | ||
717 | reset_mask &= ~AMDGPU_RESET_MC; | ||
718 | } | ||
719 | |||
720 | return reset_mask; | ||
721 | } | ||
722 | |||
723 | /** | ||
724 | * vi_gpu_soft_reset - soft reset GPU | ||
725 | * | ||
726 | * @adev: amdgpu_device pointer | ||
727 | * @reset_mask: mask of which blocks to reset | ||
728 | * | ||
729 | * Soft reset the blocks specified in @reset_mask. | ||
730 | */ | ||
731 | static void vi_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask) | ||
732 | { | ||
733 | struct amdgpu_mode_mc_save save; | ||
734 | u32 grbm_soft_reset = 0, srbm_soft_reset = 0; | ||
735 | u32 tmp; | ||
736 | |||
737 | if (reset_mask == 0) | ||
738 | return; | ||
739 | |||
740 | dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask); | ||
741 | |||
742 | vi_print_gpu_status_regs(adev); | ||
743 | dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", | ||
744 | RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); | ||
745 | dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", | ||
746 | RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); | ||
747 | |||
748 | /* disable CG/PG */ | ||
749 | |||
750 | /* stop the rlc */ | ||
751 | //XXX | ||
752 | //gfx_v8_0_rlc_stop(adev); | ||
753 | |||
754 | /* Disable GFX parsing/prefetching */ | ||
755 | tmp = RREG32(mmCP_ME_CNTL); | ||
756 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); | ||
757 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); | ||
758 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); | ||
759 | WREG32(mmCP_ME_CNTL, tmp); | ||
760 | |||
761 | /* Disable MEC parsing/prefetching */ | ||
762 | tmp = RREG32(mmCP_MEC_CNTL); | ||
763 | tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1); | ||
764 | tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1); | ||
765 | WREG32(mmCP_MEC_CNTL, tmp); | ||
766 | |||
767 | if (reset_mask & AMDGPU_RESET_DMA) { | ||
768 | /* sdma0 */ | ||
769 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); | ||
770 | tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); | ||
771 | WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); | ||
772 | } | ||
773 | if (reset_mask & AMDGPU_RESET_DMA1) { | ||
774 | /* sdma1 */ | ||
775 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); | ||
776 | tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); | ||
777 | WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); | ||
778 | } | ||
779 | |||
780 | gmc_v8_0_mc_stop(adev, &save); | ||
781 | if (amdgpu_asic_wait_for_mc_idle(adev)) { | ||
782 | dev_warn(adev->dev, "Wait for MC idle timedout !\n"); | ||
783 | } | ||
784 | |||
785 | if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) { | ||
786 | grbm_soft_reset = | ||
787 | REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); | ||
788 | grbm_soft_reset = | ||
789 | REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); | ||
790 | } | ||
791 | |||
792 | if (reset_mask & AMDGPU_RESET_CP) { | ||
793 | grbm_soft_reset = | ||
794 | REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); | ||
795 | srbm_soft_reset = | ||
796 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); | ||
797 | } | ||
798 | |||
799 | if (reset_mask & AMDGPU_RESET_DMA) | ||
800 | srbm_soft_reset = | ||
801 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA, 1); | ||
802 | |||
803 | if (reset_mask & AMDGPU_RESET_DMA1) | ||
804 | srbm_soft_reset = | ||
805 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1, 1); | ||
806 | |||
807 | if (reset_mask & AMDGPU_RESET_DISPLAY) | ||
808 | srbm_soft_reset = | ||
809 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_DC, 1); | ||
810 | |||
811 | if (reset_mask & AMDGPU_RESET_RLC) | ||
812 | grbm_soft_reset = | ||
813 | REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); | ||
814 | |||
815 | if (reset_mask & AMDGPU_RESET_SEM) | ||
816 | srbm_soft_reset = | ||
817 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); | ||
818 | |||
819 | if (reset_mask & AMDGPU_RESET_IH) | ||
820 | srbm_soft_reset = | ||
821 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_IH, 1); | ||
822 | |||
823 | if (reset_mask & AMDGPU_RESET_GRBM) | ||
824 | srbm_soft_reset = | ||
825 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); | ||
826 | |||
827 | if (reset_mask & AMDGPU_RESET_VMC) | ||
828 | srbm_soft_reset = | ||
829 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VMC, 1); | ||
830 | |||
831 | if (reset_mask & AMDGPU_RESET_UVD) | ||
832 | srbm_soft_reset = | ||
833 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); | ||
834 | |||
835 | if (reset_mask & AMDGPU_RESET_VCE) | ||
836 | srbm_soft_reset = | ||
837 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); | ||
838 | |||
839 | if (reset_mask & AMDGPU_RESET_VCE) | ||
840 | srbm_soft_reset = | ||
841 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); | ||
842 | |||
843 | if (!(adev->flags & AMD_IS_APU)) { | ||
844 | if (reset_mask & AMDGPU_RESET_MC) | ||
845 | srbm_soft_reset = | ||
846 | REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1); | ||
847 | } | ||
848 | |||
849 | if (grbm_soft_reset) { | ||
850 | tmp = RREG32(mmGRBM_SOFT_RESET); | ||
851 | tmp |= grbm_soft_reset; | ||
852 | dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); | ||
853 | WREG32(mmGRBM_SOFT_RESET, tmp); | ||
854 | tmp = RREG32(mmGRBM_SOFT_RESET); | ||
855 | |||
856 | udelay(50); | ||
857 | |||
858 | tmp &= ~grbm_soft_reset; | ||
859 | WREG32(mmGRBM_SOFT_RESET, tmp); | ||
860 | tmp = RREG32(mmGRBM_SOFT_RESET); | ||
861 | } | ||
862 | |||
863 | if (srbm_soft_reset) { | ||
864 | tmp = RREG32(mmSRBM_SOFT_RESET); | ||
865 | tmp |= srbm_soft_reset; | ||
866 | dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); | ||
867 | WREG32(mmSRBM_SOFT_RESET, tmp); | ||
868 | tmp = RREG32(mmSRBM_SOFT_RESET); | ||
869 | |||
870 | udelay(50); | ||
871 | |||
872 | tmp &= ~srbm_soft_reset; | ||
873 | WREG32(mmSRBM_SOFT_RESET, tmp); | ||
874 | tmp = RREG32(mmSRBM_SOFT_RESET); | ||
875 | } | ||
876 | |||
877 | /* Wait a little for things to settle down */ | ||
878 | udelay(50); | ||
879 | |||
880 | gmc_v8_0_mc_resume(adev, &save); | ||
881 | udelay(50); | ||
882 | |||
883 | vi_print_gpu_status_regs(adev); | ||
884 | } | ||
885 | |||
886 | static void vi_gpu_pci_config_reset(struct amdgpu_device *adev) | 577 | static void vi_gpu_pci_config_reset(struct amdgpu_device *adev) |
887 | { | 578 | { |
888 | struct amdgpu_mode_mc_save save; | 579 | u32 i; |
889 | u32 tmp, i; | ||
890 | 580 | ||
891 | dev_info(adev->dev, "GPU pci config reset\n"); | 581 | dev_info(adev->dev, "GPU pci config reset\n"); |
892 | 582 | ||
893 | /* disable dpm? */ | ||
894 | |||
895 | /* disable cg/pg */ | ||
896 | |||
897 | /* Disable GFX parsing/prefetching */ | ||
898 | tmp = RREG32(mmCP_ME_CNTL); | ||
899 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); | ||
900 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); | ||
901 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); | ||
902 | WREG32(mmCP_ME_CNTL, tmp); | ||
903 | |||
904 | /* Disable MEC parsing/prefetching */ | ||
905 | tmp = RREG32(mmCP_MEC_CNTL); | ||
906 | tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1); | ||
907 | tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1); | ||
908 | WREG32(mmCP_MEC_CNTL, tmp); | ||
909 | |||
910 | /* Disable GFX parsing/prefetching */ | ||
911 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | | ||
912 | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); | ||
913 | |||
914 | /* Disable MEC parsing/prefetching */ | ||
915 | WREG32(mmCP_MEC_CNTL, | ||
916 | CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); | ||
917 | |||
918 | /* sdma0 */ | ||
919 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); | ||
920 | tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); | ||
921 | WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); | ||
922 | |||
923 | /* sdma1 */ | ||
924 | tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); | ||
925 | tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); | ||
926 | WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); | ||
927 | |||
928 | /* XXX other engines? */ | ||
929 | |||
930 | /* halt the rlc, disable cp internal ints */ | ||
931 | //XXX | ||
932 | //gfx_v8_0_rlc_stop(adev); | ||
933 | |||
934 | udelay(50); | ||
935 | |||
936 | /* disable mem access */ | ||
937 | gmc_v8_0_mc_stop(adev, &save); | ||
938 | if (amdgpu_asic_wait_for_mc_idle(adev)) { | ||
939 | dev_warn(adev->dev, "Wait for MC idle timed out !\n"); | ||
940 | } | ||
941 | |||
942 | /* disable BM */ | 583 | /* disable BM */ |
943 | pci_clear_master(adev->pdev); | 584 | pci_clear_master(adev->pdev); |
944 | /* reset */ | 585 | /* reset */ |
@@ -978,26 +619,11 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun | |||
978 | */ | 619 | */ |
979 | static int vi_asic_reset(struct amdgpu_device *adev) | 620 | static int vi_asic_reset(struct amdgpu_device *adev) |
980 | { | 621 | { |
981 | u32 reset_mask; | 622 | vi_set_bios_scratch_engine_hung(adev, true); |
982 | |||
983 | reset_mask = vi_gpu_check_soft_reset(adev); | ||
984 | |||
985 | if (reset_mask) | ||
986 | vi_set_bios_scratch_engine_hung(adev, true); | ||
987 | |||
988 | /* try soft reset */ | ||
989 | vi_gpu_soft_reset(adev, reset_mask); | ||
990 | |||
991 | reset_mask = vi_gpu_check_soft_reset(adev); | ||
992 | 623 | ||
993 | /* try pci config reset */ | 624 | vi_gpu_pci_config_reset(adev); |
994 | if (reset_mask && amdgpu_hard_reset) | ||
995 | vi_gpu_pci_config_reset(adev); | ||
996 | 625 | ||
997 | reset_mask = vi_gpu_check_soft_reset(adev); | 626 | vi_set_bios_scratch_engine_hung(adev, false); |
998 | |||
999 | if (!reset_mask) | ||
1000 | vi_set_bios_scratch_engine_hung(adev, false); | ||
1001 | 627 | ||
1002 | return 0; | 628 | return 0; |
1003 | } | 629 | } |
@@ -1347,6 +973,15 @@ static const struct amdgpu_ip_block_version cz_ip_blocks[] = | |||
1347 | .rev = 0, | 973 | .rev = 0, |
1348 | .funcs = &vce_v3_0_ip_funcs, | 974 | .funcs = &vce_v3_0_ip_funcs, |
1349 | }, | 975 | }, |
976 | #if defined(CONFIG_DRM_AMD_ACP) | ||
977 | { | ||
978 | .type = AMD_IP_BLOCK_TYPE_ACP, | ||
979 | .major = 2, | ||
980 | .minor = 2, | ||
981 | .rev = 0, | ||
982 | .funcs = &acp_ip_funcs, | ||
983 | }, | ||
984 | #endif | ||
1350 | }; | 985 | }; |
1351 | 986 | ||
1352 | int vi_set_ip_blocks(struct amdgpu_device *adev) | 987 | int vi_set_ip_blocks(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1195d06f55bc..15ff8b2c26e7 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h | |||
@@ -73,6 +73,7 @@ enum amd_ip_block_type { | |||
73 | AMD_IP_BLOCK_TYPE_SDMA, | 73 | AMD_IP_BLOCK_TYPE_SDMA, |
74 | AMD_IP_BLOCK_TYPE_UVD, | 74 | AMD_IP_BLOCK_TYPE_UVD, |
75 | AMD_IP_BLOCK_TYPE_VCE, | 75 | AMD_IP_BLOCK_TYPE_VCE, |
76 | AMD_IP_BLOCK_TYPE_ACP, | ||
76 | }; | 77 | }; |
77 | 78 | ||
78 | enum amd_clockgating_state { | 79 | enum amd_clockgating_state { |
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h index dc52ea0df4b4..d3ccf5a86de0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h | |||
@@ -1379,6 +1379,7 @@ | |||
1379 | #define mmDC_GPIO_PAD_STRENGTH_1 0x1978 | 1379 | #define mmDC_GPIO_PAD_STRENGTH_1 0x1978 |
1380 | #define mmDC_GPIO_PAD_STRENGTH_2 0x1979 | 1380 | #define mmDC_GPIO_PAD_STRENGTH_2 0x1979 |
1381 | #define mmPHY_AUX_CNTL 0x197f | 1381 | #define mmPHY_AUX_CNTL 0x197f |
1382 | #define mmDC_GPIO_I2CPAD_MASK 0x1974 | ||
1382 | #define mmDC_GPIO_I2CPAD_A 0x1975 | 1383 | #define mmDC_GPIO_I2CPAD_A 0x1975 |
1383 | #define mmDC_GPIO_I2CPAD_EN 0x1976 | 1384 | #define mmDC_GPIO_I2CPAD_EN 0x1976 |
1384 | #define mmDC_GPIO_I2CPAD_Y 0x1977 | 1385 | #define mmDC_GPIO_I2CPAD_Y 0x1977 |
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h new file mode 100644 index 000000000000..6bea30ef3df5 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h | |||
@@ -0,0 +1,1117 @@ | |||
1 | /* | ||
2 | * DCE_8_0 Register documentation | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included | ||
14 | * in all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN | ||
20 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
21 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | #ifndef DCE_8_0_ENUM_H | ||
25 | #define DCE_8_0_ENUM_H | ||
26 | |||
27 | typedef enum SurfaceEndian { | ||
28 | ENDIAN_NONE = 0x0, | ||
29 | ENDIAN_8IN16 = 0x1, | ||
30 | ENDIAN_8IN32 = 0x2, | ||
31 | ENDIAN_8IN64 = 0x3, | ||
32 | } SurfaceEndian; | ||
33 | typedef enum ArrayMode { | ||
34 | ARRAY_LINEAR_GENERAL = 0x0, | ||
35 | ARRAY_LINEAR_ALIGNED = 0x1, | ||
36 | ARRAY_1D_TILED_THIN1 = 0x2, | ||
37 | ARRAY_1D_TILED_THICK = 0x3, | ||
38 | ARRAY_2D_TILED_THIN1 = 0x4, | ||
39 | ARRAY_PRT_TILED_THIN1 = 0x5, | ||
40 | ARRAY_PRT_2D_TILED_THIN1 = 0x6, | ||
41 | ARRAY_2D_TILED_THICK = 0x7, | ||
42 | ARRAY_2D_TILED_XTHICK = 0x8, | ||
43 | ARRAY_PRT_TILED_THICK = 0x9, | ||
44 | ARRAY_PRT_2D_TILED_THICK = 0xa, | ||
45 | ARRAY_PRT_3D_TILED_THIN1 = 0xb, | ||
46 | ARRAY_3D_TILED_THIN1 = 0xc, | ||
47 | ARRAY_3D_TILED_THICK = 0xd, | ||
48 | ARRAY_3D_TILED_XTHICK = 0xe, | ||
49 | ARRAY_PRT_3D_TILED_THICK = 0xf, | ||
50 | } ArrayMode; | ||
51 | typedef enum PipeTiling { | ||
52 | CONFIG_1_PIPE = 0x0, | ||
53 | CONFIG_2_PIPE = 0x1, | ||
54 | CONFIG_4_PIPE = 0x2, | ||
55 | CONFIG_8_PIPE = 0x3, | ||
56 | } PipeTiling; | ||
57 | typedef enum BankTiling { | ||
58 | CONFIG_4_BANK = 0x0, | ||
59 | CONFIG_8_BANK = 0x1, | ||
60 | } BankTiling; | ||
61 | typedef enum GroupInterleave { | ||
62 | CONFIG_256B_GROUP = 0x0, | ||
63 | CONFIG_512B_GROUP = 0x1, | ||
64 | } GroupInterleave; | ||
65 | typedef enum RowTiling { | ||
66 | CONFIG_1KB_ROW = 0x0, | ||
67 | CONFIG_2KB_ROW = 0x1, | ||
68 | CONFIG_4KB_ROW = 0x2, | ||
69 | CONFIG_8KB_ROW = 0x3, | ||
70 | CONFIG_1KB_ROW_OPT = 0x4, | ||
71 | CONFIG_2KB_ROW_OPT = 0x5, | ||
72 | CONFIG_4KB_ROW_OPT = 0x6, | ||
73 | CONFIG_8KB_ROW_OPT = 0x7, | ||
74 | } RowTiling; | ||
75 | typedef enum BankSwapBytes { | ||
76 | CONFIG_128B_SWAPS = 0x0, | ||
77 | CONFIG_256B_SWAPS = 0x1, | ||
78 | CONFIG_512B_SWAPS = 0x2, | ||
79 | CONFIG_1KB_SWAPS = 0x3, | ||
80 | } BankSwapBytes; | ||
81 | typedef enum SampleSplitBytes { | ||
82 | CONFIG_1KB_SPLIT = 0x0, | ||
83 | CONFIG_2KB_SPLIT = 0x1, | ||
84 | CONFIG_4KB_SPLIT = 0x2, | ||
85 | CONFIG_8KB_SPLIT = 0x3, | ||
86 | } SampleSplitBytes; | ||
87 | typedef enum NumPipes { | ||
88 | ADDR_CONFIG_1_PIPE = 0x0, | ||
89 | ADDR_CONFIG_2_PIPE = 0x1, | ||
90 | ADDR_CONFIG_4_PIPE = 0x2, | ||
91 | ADDR_CONFIG_8_PIPE = 0x3, | ||
92 | } NumPipes; | ||
93 | typedef enum PipeInterleaveSize { | ||
94 | ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x0, | ||
95 | ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x1, | ||
96 | } PipeInterleaveSize; | ||
97 | typedef enum BankInterleaveSize { | ||
98 | ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x0, | ||
99 | ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x1, | ||
100 | ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x2, | ||
101 | ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x3, | ||
102 | } BankInterleaveSize; | ||
103 | typedef enum NumShaderEngines { | ||
104 | ADDR_CONFIG_1_SHADER_ENGINE = 0x0, | ||
105 | ADDR_CONFIG_2_SHADER_ENGINE = 0x1, | ||
106 | } NumShaderEngines; | ||
107 | typedef enum ShaderEngineTileSize { | ||
108 | ADDR_CONFIG_SE_TILE_16 = 0x0, | ||
109 | ADDR_CONFIG_SE_TILE_32 = 0x1, | ||
110 | } ShaderEngineTileSize; | ||
111 | typedef enum NumGPUs { | ||
112 | ADDR_CONFIG_1_GPU = 0x0, | ||
113 | ADDR_CONFIG_2_GPU = 0x1, | ||
114 | ADDR_CONFIG_4_GPU = 0x2, | ||
115 | } NumGPUs; | ||
116 | typedef enum MultiGPUTileSize { | ||
117 | ADDR_CONFIG_GPU_TILE_16 = 0x0, | ||
118 | ADDR_CONFIG_GPU_TILE_32 = 0x1, | ||
119 | ADDR_CONFIG_GPU_TILE_64 = 0x2, | ||
120 | ADDR_CONFIG_GPU_TILE_128 = 0x3, | ||
121 | } MultiGPUTileSize; | ||
122 | typedef enum RowSize { | ||
123 | ADDR_CONFIG_1KB_ROW = 0x0, | ||
124 | ADDR_CONFIG_2KB_ROW = 0x1, | ||
125 | ADDR_CONFIG_4KB_ROW = 0x2, | ||
126 | } RowSize; | ||
127 | typedef enum NumLowerPipes { | ||
128 | ADDR_CONFIG_1_LOWER_PIPES = 0x0, | ||
129 | ADDR_CONFIG_2_LOWER_PIPES = 0x1, | ||
130 | } NumLowerPipes; | ||
131 | typedef enum DebugBlockId { | ||
132 | DBG_CLIENT_BLKID_RESERVED = 0x0, | ||
133 | DBG_CLIENT_BLKID_dbg = 0x1, | ||
134 | DBG_CLIENT_BLKID_uvdu_0 = 0x2, | ||
135 | DBG_CLIENT_BLKID_uvdu_1 = 0x3, | ||
136 | DBG_CLIENT_BLKID_uvdu_2 = 0x4, | ||
137 | DBG_CLIENT_BLKID_uvdu_3 = 0x5, | ||
138 | DBG_CLIENT_BLKID_uvdu_4 = 0x6, | ||
139 | DBG_CLIENT_BLKID_uvdu_5 = 0x7, | ||
140 | DBG_CLIENT_BLKID_uvdu_6 = 0x8, | ||
141 | DBG_CLIENT_BLKID_uvdm_0 = 0x9, | ||
142 | DBG_CLIENT_BLKID_uvdm_1 = 0xa, | ||
143 | DBG_CLIENT_BLKID_uvdm_2 = 0xb, | ||
144 | DBG_CLIENT_BLKID_uvdm_3 = 0xc, | ||
145 | DBG_CLIENT_BLKID_vcea_0 = 0xd, | ||
146 | DBG_CLIENT_BLKID_vcea_1 = 0xe, | ||
147 | DBG_CLIENT_BLKID_vcea_2 = 0xf, | ||
148 | DBG_CLIENT_BLKID_vcea_3 = 0x10, | ||
149 | DBG_CLIENT_BLKID_vcea_4 = 0x11, | ||
150 | DBG_CLIENT_BLKID_vcea_5 = 0x12, | ||
151 | DBG_CLIENT_BLKID_vcea_6 = 0x13, | ||
152 | DBG_CLIENT_BLKID_vceb_0 = 0x14, | ||
153 | DBG_CLIENT_BLKID_vceb_1 = 0x15, | ||
154 | DBG_CLIENT_BLKID_vceb_2 = 0x16, | ||
155 | DBG_CLIENT_BLKID_dco = 0x17, | ||
156 | DBG_CLIENT_BLKID_xdma = 0x18, | ||
157 | DBG_CLIENT_BLKID_smu_0 = 0x19, | ||
158 | DBG_CLIENT_BLKID_smu_1 = 0x1a, | ||
159 | DBG_CLIENT_BLKID_smu_2 = 0x1b, | ||
160 | DBG_CLIENT_BLKID_gck = 0x1c, | ||
161 | DBG_CLIENT_BLKID_tmonw0 = 0x1d, | ||
162 | DBG_CLIENT_BLKID_tmonw1 = 0x1e, | ||
163 | DBG_CLIENT_BLKID_grbm = 0x1f, | ||
164 | DBG_CLIENT_BLKID_rlc = 0x20, | ||
165 | DBG_CLIENT_BLKID_ds0 = 0x21, | ||
166 | DBG_CLIENT_BLKID_cpg_0 = 0x22, | ||
167 | DBG_CLIENT_BLKID_cpg_1 = 0x23, | ||
168 | DBG_CLIENT_BLKID_cpc_0 = 0x24, | ||
169 | DBG_CLIENT_BLKID_cpc_1 = 0x25, | ||
170 | DBG_CLIENT_BLKID_cpf = 0x26, | ||
171 | DBG_CLIENT_BLKID_scf0 = 0x27, | ||
172 | DBG_CLIENT_BLKID_scf1 = 0x28, | ||
173 | DBG_CLIENT_BLKID_scf2 = 0x29, | ||
174 | DBG_CLIENT_BLKID_scf3 = 0x2a, | ||
175 | DBG_CLIENT_BLKID_pc0 = 0x2b, | ||
176 | DBG_CLIENT_BLKID_pc1 = 0x2c, | ||
177 | DBG_CLIENT_BLKID_pc2 = 0x2d, | ||
178 | DBG_CLIENT_BLKID_pc3 = 0x2e, | ||
179 | DBG_CLIENT_BLKID_vgt0 = 0x2f, | ||
180 | DBG_CLIENT_BLKID_vgt1 = 0x30, | ||
181 | DBG_CLIENT_BLKID_vgt2 = 0x31, | ||
182 | DBG_CLIENT_BLKID_vgt3 = 0x32, | ||
183 | DBG_CLIENT_BLKID_sx00 = 0x33, | ||
184 | DBG_CLIENT_BLKID_sx10 = 0x34, | ||
185 | DBG_CLIENT_BLKID_sx20 = 0x35, | ||
186 | DBG_CLIENT_BLKID_sx30 = 0x36, | ||
187 | DBG_CLIENT_BLKID_cb001 = 0x37, | ||
188 | DBG_CLIENT_BLKID_cb200 = 0x38, | ||
189 | DBG_CLIENT_BLKID_cb201 = 0x39, | ||
190 | DBG_CLIENT_BLKID_cbr0 = 0x3a, | ||
191 | DBG_CLIENT_BLKID_cb000 = 0x3b, | ||
192 | DBG_CLIENT_BLKID_cb101 = 0x3c, | ||
193 | DBG_CLIENT_BLKID_cb300 = 0x3d, | ||
194 | DBG_CLIENT_BLKID_cb301 = 0x3e, | ||
195 | DBG_CLIENT_BLKID_cbr1 = 0x3f, | ||
196 | DBG_CLIENT_BLKID_cb100 = 0x40, | ||
197 | DBG_CLIENT_BLKID_ia0 = 0x41, | ||
198 | DBG_CLIENT_BLKID_ia1 = 0x42, | ||
199 | DBG_CLIENT_BLKID_bci0 = 0x43, | ||
200 | DBG_CLIENT_BLKID_bci1 = 0x44, | ||
201 | DBG_CLIENT_BLKID_bci2 = 0x45, | ||
202 | DBG_CLIENT_BLKID_bci3 = 0x46, | ||
203 | DBG_CLIENT_BLKID_pa0 = 0x47, | ||
204 | DBG_CLIENT_BLKID_pa1 = 0x48, | ||
205 | DBG_CLIENT_BLKID_spim0 = 0x49, | ||
206 | DBG_CLIENT_BLKID_spim1 = 0x4a, | ||
207 | DBG_CLIENT_BLKID_spim2 = 0x4b, | ||
208 | DBG_CLIENT_BLKID_spim3 = 0x4c, | ||
209 | DBG_CLIENT_BLKID_sdma = 0x4d, | ||
210 | DBG_CLIENT_BLKID_ih = 0x4e, | ||
211 | DBG_CLIENT_BLKID_sem = 0x4f, | ||
212 | DBG_CLIENT_BLKID_srbm = 0x50, | ||
213 | DBG_CLIENT_BLKID_hdp = 0x51, | ||
214 | DBG_CLIENT_BLKID_acp_0 = 0x52, | ||
215 | DBG_CLIENT_BLKID_acp_1 = 0x53, | ||
216 | DBG_CLIENT_BLKID_sam = 0x54, | ||
217 | DBG_CLIENT_BLKID_mcc0 = 0x55, | ||
218 | DBG_CLIENT_BLKID_mcc1 = 0x56, | ||
219 | DBG_CLIENT_BLKID_mcc2 = 0x57, | ||
220 | DBG_CLIENT_BLKID_mcc3 = 0x58, | ||
221 | DBG_CLIENT_BLKID_mcd0 = 0x59, | ||
222 | DBG_CLIENT_BLKID_mcd1 = 0x5a, | ||
223 | DBG_CLIENT_BLKID_mcd2 = 0x5b, | ||
224 | DBG_CLIENT_BLKID_mcd3 = 0x5c, | ||
225 | DBG_CLIENT_BLKID_mcb = 0x5d, | ||
226 | DBG_CLIENT_BLKID_vmc = 0x5e, | ||
227 | DBG_CLIENT_BLKID_gmcon = 0x5f, | ||
228 | DBG_CLIENT_BLKID_gdc_0 = 0x60, | ||
229 | DBG_CLIENT_BLKID_gdc_1 = 0x61, | ||
230 | DBG_CLIENT_BLKID_gdc_2 = 0x62, | ||
231 | DBG_CLIENT_BLKID_gdc_3 = 0x63, | ||
232 | DBG_CLIENT_BLKID_gdc_4 = 0x64, | ||
233 | DBG_CLIENT_BLKID_gdc_5 = 0x65, | ||
234 | DBG_CLIENT_BLKID_gdc_6 = 0x66, | ||
235 | DBG_CLIENT_BLKID_gdc_7 = 0x67, | ||
236 | DBG_CLIENT_BLKID_gdc_8 = 0x68, | ||
237 | DBG_CLIENT_BLKID_gdc_9 = 0x69, | ||
238 | DBG_CLIENT_BLKID_gdc_10 = 0x6a, | ||
239 | DBG_CLIENT_BLKID_gdc_11 = 0x6b, | ||
240 | DBG_CLIENT_BLKID_gdc_12 = 0x6c, | ||
241 | DBG_CLIENT_BLKID_gdc_13 = 0x6d, | ||
242 | DBG_CLIENT_BLKID_gdc_14 = 0x6e, | ||
243 | DBG_CLIENT_BLKID_gdc_15 = 0x6f, | ||
244 | DBG_CLIENT_BLKID_gdc_16 = 0x70, | ||
245 | DBG_CLIENT_BLKID_gdc_17 = 0x71, | ||
246 | DBG_CLIENT_BLKID_gdc_18 = 0x72, | ||
247 | DBG_CLIENT_BLKID_gdc_19 = 0x73, | ||
248 | DBG_CLIENT_BLKID_gdc_20 = 0x74, | ||
249 | DBG_CLIENT_BLKID_gdc_21 = 0x75, | ||
250 | DBG_CLIENT_BLKID_gdc_22 = 0x76, | ||
251 | DBG_CLIENT_BLKID_wd = 0x77, | ||
252 | DBG_CLIENT_BLKID_sdma_0 = 0x78, | ||
253 | DBG_CLIENT_BLKID_sdma_1 = 0x79, | ||
254 | } DebugBlockId; | ||
255 | typedef enum DebugBlockId_OLD { | ||
256 | DBG_BLOCK_ID_RESERVED = 0x0, | ||
257 | DBG_BLOCK_ID_DBG = 0x1, | ||
258 | DBG_BLOCK_ID_VMC = 0x2, | ||
259 | DBG_BLOCK_ID_PDMA = 0x3, | ||
260 | DBG_BLOCK_ID_CG = 0x4, | ||
261 | DBG_BLOCK_ID_SRBM = 0x5, | ||
262 | DBG_BLOCK_ID_GRBM = 0x6, | ||
263 | DBG_BLOCK_ID_RLC = 0x7, | ||
264 | DBG_BLOCK_ID_CSC = 0x8, | ||
265 | DBG_BLOCK_ID_SEM = 0x9, | ||
266 | DBG_BLOCK_ID_IH = 0xa, | ||
267 | DBG_BLOCK_ID_SC = 0xb, | ||
268 | DBG_BLOCK_ID_SQ = 0xc, | ||
269 | DBG_BLOCK_ID_AVP = 0xd, | ||
270 | DBG_BLOCK_ID_GMCON = 0xe, | ||
271 | DBG_BLOCK_ID_SMU = 0xf, | ||
272 | DBG_BLOCK_ID_DMA0 = 0x10, | ||
273 | DBG_BLOCK_ID_DMA1 = 0x11, | ||
274 | DBG_BLOCK_ID_SPIM = 0x12, | ||
275 | DBG_BLOCK_ID_GDS = 0x13, | ||
276 | DBG_BLOCK_ID_SPIS = 0x14, | ||
277 | DBG_BLOCK_ID_UNUSED0 = 0x15, | ||
278 | DBG_BLOCK_ID_PA0 = 0x16, | ||
279 | DBG_BLOCK_ID_PA1 = 0x17, | ||
280 | DBG_BLOCK_ID_CP0 = 0x18, | ||
281 | DBG_BLOCK_ID_CP1 = 0x19, | ||
282 | DBG_BLOCK_ID_CP2 = 0x1a, | ||
283 | DBG_BLOCK_ID_UNUSED1 = 0x1b, | ||
284 | DBG_BLOCK_ID_UVDU = 0x1c, | ||
285 | DBG_BLOCK_ID_UVDM = 0x1d, | ||
286 | DBG_BLOCK_ID_VCE = 0x1e, | ||
287 | DBG_BLOCK_ID_UNUSED2 = 0x1f, | ||
288 | DBG_BLOCK_ID_VGT0 = 0x20, | ||
289 | DBG_BLOCK_ID_VGT1 = 0x21, | ||
290 | DBG_BLOCK_ID_IA = 0x22, | ||
291 | DBG_BLOCK_ID_UNUSED3 = 0x23, | ||
292 | DBG_BLOCK_ID_SCT0 = 0x24, | ||
293 | DBG_BLOCK_ID_SCT1 = 0x25, | ||
294 | DBG_BLOCK_ID_SPM0 = 0x26, | ||
295 | DBG_BLOCK_ID_SPM1 = 0x27, | ||
296 | DBG_BLOCK_ID_TCAA = 0x28, | ||
297 | DBG_BLOCK_ID_TCAB = 0x29, | ||
298 | DBG_BLOCK_ID_TCCA = 0x2a, | ||
299 | DBG_BLOCK_ID_TCCB = 0x2b, | ||
300 | DBG_BLOCK_ID_MCC0 = 0x2c, | ||
301 | DBG_BLOCK_ID_MCC1 = 0x2d, | ||
302 | DBG_BLOCK_ID_MCC2 = 0x2e, | ||
303 | DBG_BLOCK_ID_MCC3 = 0x2f, | ||
304 | DBG_BLOCK_ID_SX0 = 0x30, | ||
305 | DBG_BLOCK_ID_SX1 = 0x31, | ||
306 | DBG_BLOCK_ID_SX2 = 0x32, | ||
307 | DBG_BLOCK_ID_SX3 = 0x33, | ||
308 | DBG_BLOCK_ID_UNUSED4 = 0x34, | ||
309 | DBG_BLOCK_ID_UNUSED5 = 0x35, | ||
310 | DBG_BLOCK_ID_UNUSED6 = 0x36, | ||
311 | DBG_BLOCK_ID_UNUSED7 = 0x37, | ||
312 | DBG_BLOCK_ID_PC0 = 0x38, | ||
313 | DBG_BLOCK_ID_PC1 = 0x39, | ||
314 | DBG_BLOCK_ID_UNUSED8 = 0x3a, | ||
315 | DBG_BLOCK_ID_UNUSED9 = 0x3b, | ||
316 | DBG_BLOCK_ID_UNUSED10 = 0x3c, | ||
317 | DBG_BLOCK_ID_UNUSED11 = 0x3d, | ||
318 | DBG_BLOCK_ID_MCB = 0x3e, | ||
319 | DBG_BLOCK_ID_UNUSED12 = 0x3f, | ||
320 | DBG_BLOCK_ID_SCB0 = 0x40, | ||
321 | DBG_BLOCK_ID_SCB1 = 0x41, | ||
322 | DBG_BLOCK_ID_UNUSED13 = 0x42, | ||
323 | DBG_BLOCK_ID_UNUSED14 = 0x43, | ||
324 | DBG_BLOCK_ID_SCF0 = 0x44, | ||
325 | DBG_BLOCK_ID_SCF1 = 0x45, | ||
326 | DBG_BLOCK_ID_UNUSED15 = 0x46, | ||
327 | DBG_BLOCK_ID_UNUSED16 = 0x47, | ||
328 | DBG_BLOCK_ID_BCI0 = 0x48, | ||
329 | DBG_BLOCK_ID_BCI1 = 0x49, | ||
330 | DBG_BLOCK_ID_BCI2 = 0x4a, | ||
331 | DBG_BLOCK_ID_BCI3 = 0x4b, | ||
332 | DBG_BLOCK_ID_UNUSED17 = 0x4c, | ||
333 | DBG_BLOCK_ID_UNUSED18 = 0x4d, | ||
334 | DBG_BLOCK_ID_UNUSED19 = 0x4e, | ||
335 | DBG_BLOCK_ID_UNUSED20 = 0x4f, | ||
336 | DBG_BLOCK_ID_CB00 = 0x50, | ||
337 | DBG_BLOCK_ID_CB01 = 0x51, | ||
338 | DBG_BLOCK_ID_CB02 = 0x52, | ||
339 | DBG_BLOCK_ID_CB03 = 0x53, | ||
340 | DBG_BLOCK_ID_CB04 = 0x54, | ||
341 | DBG_BLOCK_ID_UNUSED21 = 0x55, | ||
342 | DBG_BLOCK_ID_UNUSED22 = 0x56, | ||
343 | DBG_BLOCK_ID_UNUSED23 = 0x57, | ||
344 | DBG_BLOCK_ID_CB10 = 0x58, | ||
345 | DBG_BLOCK_ID_CB11 = 0x59, | ||
346 | DBG_BLOCK_ID_CB12 = 0x5a, | ||
347 | DBG_BLOCK_ID_CB13 = 0x5b, | ||
348 | DBG_BLOCK_ID_CB14 = 0x5c, | ||
349 | DBG_BLOCK_ID_UNUSED24 = 0x5d, | ||
350 | DBG_BLOCK_ID_UNUSED25 = 0x5e, | ||
351 | DBG_BLOCK_ID_UNUSED26 = 0x5f, | ||
352 | DBG_BLOCK_ID_TCP0 = 0x60, | ||
353 | DBG_BLOCK_ID_TCP1 = 0x61, | ||
354 | DBG_BLOCK_ID_TCP2 = 0x62, | ||
355 | DBG_BLOCK_ID_TCP3 = 0x63, | ||
356 | DBG_BLOCK_ID_TCP4 = 0x64, | ||
357 | DBG_BLOCK_ID_TCP5 = 0x65, | ||
358 | DBG_BLOCK_ID_TCP6 = 0x66, | ||
359 | DBG_BLOCK_ID_TCP7 = 0x67, | ||
360 | DBG_BLOCK_ID_TCP8 = 0x68, | ||
361 | DBG_BLOCK_ID_TCP9 = 0x69, | ||
362 | DBG_BLOCK_ID_TCP10 = 0x6a, | ||
363 | DBG_BLOCK_ID_TCP11 = 0x6b, | ||
364 | DBG_BLOCK_ID_TCP12 = 0x6c, | ||
365 | DBG_BLOCK_ID_TCP13 = 0x6d, | ||
366 | DBG_BLOCK_ID_TCP14 = 0x6e, | ||
367 | DBG_BLOCK_ID_TCP15 = 0x6f, | ||
368 | DBG_BLOCK_ID_TCP16 = 0x70, | ||
369 | DBG_BLOCK_ID_TCP17 = 0x71, | ||
370 | DBG_BLOCK_ID_TCP18 = 0x72, | ||
371 | DBG_BLOCK_ID_TCP19 = 0x73, | ||
372 | DBG_BLOCK_ID_TCP20 = 0x74, | ||
373 | DBG_BLOCK_ID_TCP21 = 0x75, | ||
374 | DBG_BLOCK_ID_TCP22 = 0x76, | ||
375 | DBG_BLOCK_ID_TCP23 = 0x77, | ||
376 | DBG_BLOCK_ID_TCP_RESERVED0 = 0x78, | ||
377 | DBG_BLOCK_ID_TCP_RESERVED1 = 0x79, | ||
378 | DBG_BLOCK_ID_TCP_RESERVED2 = 0x7a, | ||
379 | DBG_BLOCK_ID_TCP_RESERVED3 = 0x7b, | ||
380 | DBG_BLOCK_ID_TCP_RESERVED4 = 0x7c, | ||
381 | DBG_BLOCK_ID_TCP_RESERVED5 = 0x7d, | ||
382 | DBG_BLOCK_ID_TCP_RESERVED6 = 0x7e, | ||
383 | DBG_BLOCK_ID_TCP_RESERVED7 = 0x7f, | ||
384 | DBG_BLOCK_ID_DB00 = 0x80, | ||
385 | DBG_BLOCK_ID_DB01 = 0x81, | ||
386 | DBG_BLOCK_ID_DB02 = 0x82, | ||
387 | DBG_BLOCK_ID_DB03 = 0x83, | ||
388 | DBG_BLOCK_ID_DB04 = 0x84, | ||
389 | DBG_BLOCK_ID_UNUSED27 = 0x85, | ||
390 | DBG_BLOCK_ID_UNUSED28 = 0x86, | ||
391 | DBG_BLOCK_ID_UNUSED29 = 0x87, | ||
392 | DBG_BLOCK_ID_DB10 = 0x88, | ||
393 | DBG_BLOCK_ID_DB11 = 0x89, | ||
394 | DBG_BLOCK_ID_DB12 = 0x8a, | ||
395 | DBG_BLOCK_ID_DB13 = 0x8b, | ||
396 | DBG_BLOCK_ID_DB14 = 0x8c, | ||
397 | DBG_BLOCK_ID_UNUSED30 = 0x8d, | ||
398 | DBG_BLOCK_ID_UNUSED31 = 0x8e, | ||
399 | DBG_BLOCK_ID_UNUSED32 = 0x8f, | ||
400 | DBG_BLOCK_ID_TCC0 = 0x90, | ||
401 | DBG_BLOCK_ID_TCC1 = 0x91, | ||
402 | DBG_BLOCK_ID_TCC2 = 0x92, | ||
403 | DBG_BLOCK_ID_TCC3 = 0x93, | ||
404 | DBG_BLOCK_ID_TCC4 = 0x94, | ||
405 | DBG_BLOCK_ID_TCC5 = 0x95, | ||
406 | DBG_BLOCK_ID_TCC6 = 0x96, | ||
407 | DBG_BLOCK_ID_TCC7 = 0x97, | ||
408 | DBG_BLOCK_ID_SPS00 = 0x98, | ||
409 | DBG_BLOCK_ID_SPS01 = 0x99, | ||
410 | DBG_BLOCK_ID_SPS02 = 0x9a, | ||
411 | DBG_BLOCK_ID_SPS10 = 0x9b, | ||
412 | DBG_BLOCK_ID_SPS11 = 0x9c, | ||
413 | DBG_BLOCK_ID_SPS12 = 0x9d, | ||
414 | DBG_BLOCK_ID_UNUSED33 = 0x9e, | ||
415 | DBG_BLOCK_ID_UNUSED34 = 0x9f, | ||
416 | DBG_BLOCK_ID_TA00 = 0xa0, | ||
417 | DBG_BLOCK_ID_TA01 = 0xa1, | ||
418 | DBG_BLOCK_ID_TA02 = 0xa2, | ||
419 | DBG_BLOCK_ID_TA03 = 0xa3, | ||
420 | DBG_BLOCK_ID_TA04 = 0xa4, | ||
421 | DBG_BLOCK_ID_TA05 = 0xa5, | ||
422 | DBG_BLOCK_ID_TA06 = 0xa6, | ||
423 | DBG_BLOCK_ID_TA07 = 0xa7, | ||
424 | DBG_BLOCK_ID_TA08 = 0xa8, | ||
425 | DBG_BLOCK_ID_TA09 = 0xa9, | ||
426 | DBG_BLOCK_ID_TA0A = 0xaa, | ||
427 | DBG_BLOCK_ID_TA0B = 0xab, | ||
428 | DBG_BLOCK_ID_UNUSED35 = 0xac, | ||
429 | DBG_BLOCK_ID_UNUSED36 = 0xad, | ||
430 | DBG_BLOCK_ID_UNUSED37 = 0xae, | ||
431 | DBG_BLOCK_ID_UNUSED38 = 0xaf, | ||
432 | DBG_BLOCK_ID_TA10 = 0xb0, | ||
433 | DBG_BLOCK_ID_TA11 = 0xb1, | ||
434 | DBG_BLOCK_ID_TA12 = 0xb2, | ||
435 | DBG_BLOCK_ID_TA13 = 0xb3, | ||
436 | DBG_BLOCK_ID_TA14 = 0xb4, | ||
437 | DBG_BLOCK_ID_TA15 = 0xb5, | ||
438 | DBG_BLOCK_ID_TA16 = 0xb6, | ||
439 | DBG_BLOCK_ID_TA17 = 0xb7, | ||
440 | DBG_BLOCK_ID_TA18 = 0xb8, | ||
441 | DBG_BLOCK_ID_TA19 = 0xb9, | ||
442 | DBG_BLOCK_ID_TA1A = 0xba, | ||
443 | DBG_BLOCK_ID_TA1B = 0xbb, | ||
444 | DBG_BLOCK_ID_UNUSED39 = 0xbc, | ||
445 | DBG_BLOCK_ID_UNUSED40 = 0xbd, | ||
446 | DBG_BLOCK_ID_UNUSED41 = 0xbe, | ||
447 | DBG_BLOCK_ID_UNUSED42 = 0xbf, | ||
448 | DBG_BLOCK_ID_TD00 = 0xc0, | ||
449 | DBG_BLOCK_ID_TD01 = 0xc1, | ||
450 | DBG_BLOCK_ID_TD02 = 0xc2, | ||
451 | DBG_BLOCK_ID_TD03 = 0xc3, | ||
452 | DBG_BLOCK_ID_TD04 = 0xc4, | ||
453 | DBG_BLOCK_ID_TD05 = 0xc5, | ||
454 | DBG_BLOCK_ID_TD06 = 0xc6, | ||
455 | DBG_BLOCK_ID_TD07 = 0xc7, | ||
456 | DBG_BLOCK_ID_TD08 = 0xc8, | ||
457 | DBG_BLOCK_ID_TD09 = 0xc9, | ||
458 | DBG_BLOCK_ID_TD0A = 0xca, | ||
459 | DBG_BLOCK_ID_TD0B = 0xcb, | ||
460 | DBG_BLOCK_ID_UNUSED43 = 0xcc, | ||
461 | DBG_BLOCK_ID_UNUSED44 = 0xcd, | ||
462 | DBG_BLOCK_ID_UNUSED45 = 0xce, | ||
463 | DBG_BLOCK_ID_UNUSED46 = 0xcf, | ||
464 | DBG_BLOCK_ID_TD10 = 0xd0, | ||
465 | DBG_BLOCK_ID_TD11 = 0xd1, | ||
466 | DBG_BLOCK_ID_TD12 = 0xd2, | ||
467 | DBG_BLOCK_ID_TD13 = 0xd3, | ||
468 | DBG_BLOCK_ID_TD14 = 0xd4, | ||
469 | DBG_BLOCK_ID_TD15 = 0xd5, | ||
470 | DBG_BLOCK_ID_TD16 = 0xd6, | ||
471 | DBG_BLOCK_ID_TD17 = 0xd7, | ||
472 | DBG_BLOCK_ID_TD18 = 0xd8, | ||
473 | DBG_BLOCK_ID_TD19 = 0xd9, | ||
474 | DBG_BLOCK_ID_TD1A = 0xda, | ||
475 | DBG_BLOCK_ID_TD1B = 0xdb, | ||
476 | DBG_BLOCK_ID_UNUSED47 = 0xdc, | ||
477 | DBG_BLOCK_ID_UNUSED48 = 0xdd, | ||
478 | DBG_BLOCK_ID_UNUSED49 = 0xde, | ||
479 | DBG_BLOCK_ID_UNUSED50 = 0xdf, | ||
480 | DBG_BLOCK_ID_MCD0 = 0xe0, | ||
481 | DBG_BLOCK_ID_MCD1 = 0xe1, | ||
482 | DBG_BLOCK_ID_MCD2 = 0xe2, | ||
483 | DBG_BLOCK_ID_MCD3 = 0xe3, | ||
484 | DBG_BLOCK_ID_MCD4 = 0xe4, | ||
485 | DBG_BLOCK_ID_MCD5 = 0xe5, | ||
486 | DBG_BLOCK_ID_UNUSED51 = 0xe6, | ||
487 | DBG_BLOCK_ID_UNUSED52 = 0xe7, | ||
488 | } DebugBlockId_OLD; | ||
489 | typedef enum DebugBlockId_BY2 { | ||
490 | DBG_BLOCK_ID_RESERVED_BY2 = 0x0, | ||
491 | DBG_BLOCK_ID_VMC_BY2 = 0x1, | ||
492 | DBG_BLOCK_ID_CG_BY2 = 0x2, | ||
493 | DBG_BLOCK_ID_GRBM_BY2 = 0x3, | ||
494 | DBG_BLOCK_ID_CSC_BY2 = 0x4, | ||
495 | DBG_BLOCK_ID_IH_BY2 = 0x5, | ||
496 | DBG_BLOCK_ID_SQ_BY2 = 0x6, | ||
497 | DBG_BLOCK_ID_GMCON_BY2 = 0x7, | ||
498 | DBG_BLOCK_ID_DMA0_BY2 = 0x8, | ||
499 | DBG_BLOCK_ID_SPIM_BY2 = 0x9, | ||
500 | DBG_BLOCK_ID_SPIS_BY2 = 0xa, | ||
501 | DBG_BLOCK_ID_PA0_BY2 = 0xb, | ||
502 | DBG_BLOCK_ID_CP0_BY2 = 0xc, | ||
503 | DBG_BLOCK_ID_CP2_BY2 = 0xd, | ||
504 | DBG_BLOCK_ID_UVDU_BY2 = 0xe, | ||
505 | DBG_BLOCK_ID_VCE_BY2 = 0xf, | ||
506 | DBG_BLOCK_ID_VGT0_BY2 = 0x10, | ||
507 | DBG_BLOCK_ID_IA_BY2 = 0x11, | ||
508 | DBG_BLOCK_ID_SCT0_BY2 = 0x12, | ||
509 | DBG_BLOCK_ID_SPM0_BY2 = 0x13, | ||
510 | DBG_BLOCK_ID_TCAA_BY2 = 0x14, | ||
511 | DBG_BLOCK_ID_TCCA_BY2 = 0x15, | ||
512 | DBG_BLOCK_ID_MCC0_BY2 = 0x16, | ||
513 | DBG_BLOCK_ID_MCC2_BY2 = 0x17, | ||
514 | DBG_BLOCK_ID_SX0_BY2 = 0x18, | ||
515 | DBG_BLOCK_ID_SX2_BY2 = 0x19, | ||
516 | DBG_BLOCK_ID_UNUSED4_BY2 = 0x1a, | ||
517 | DBG_BLOCK_ID_UNUSED6_BY2 = 0x1b, | ||
518 | DBG_BLOCK_ID_PC0_BY2 = 0x1c, | ||
519 | DBG_BLOCK_ID_UNUSED8_BY2 = 0x1d, | ||
520 | DBG_BLOCK_ID_UNUSED10_BY2 = 0x1e, | ||
521 | DBG_BLOCK_ID_MCB_BY2 = 0x1f, | ||
522 | DBG_BLOCK_ID_SCB0_BY2 = 0x20, | ||
523 | DBG_BLOCK_ID_UNUSED13_BY2 = 0x21, | ||
524 | DBG_BLOCK_ID_SCF0_BY2 = 0x22, | ||
525 | DBG_BLOCK_ID_UNUSED15_BY2 = 0x23, | ||
526 | DBG_BLOCK_ID_BCI0_BY2 = 0x24, | ||
527 | DBG_BLOCK_ID_BCI2_BY2 = 0x25, | ||
528 | DBG_BLOCK_ID_UNUSED17_BY2 = 0x26, | ||
529 | DBG_BLOCK_ID_UNUSED19_BY2 = 0x27, | ||
530 | DBG_BLOCK_ID_CB00_BY2 = 0x28, | ||
531 | DBG_BLOCK_ID_CB02_BY2 = 0x29, | ||
532 | DBG_BLOCK_ID_CB04_BY2 = 0x2a, | ||
533 | DBG_BLOCK_ID_UNUSED22_BY2 = 0x2b, | ||
534 | DBG_BLOCK_ID_CB10_BY2 = 0x2c, | ||
535 | DBG_BLOCK_ID_CB12_BY2 = 0x2d, | ||
536 | DBG_BLOCK_ID_CB14_BY2 = 0x2e, | ||
537 | DBG_BLOCK_ID_UNUSED25_BY2 = 0x2f, | ||
538 | DBG_BLOCK_ID_TCP0_BY2 = 0x30, | ||
539 | DBG_BLOCK_ID_TCP2_BY2 = 0x31, | ||
540 | DBG_BLOCK_ID_TCP4_BY2 = 0x32, | ||
541 | DBG_BLOCK_ID_TCP6_BY2 = 0x33, | ||
542 | DBG_BLOCK_ID_TCP8_BY2 = 0x34, | ||
543 | DBG_BLOCK_ID_TCP10_BY2 = 0x35, | ||
544 | DBG_BLOCK_ID_TCP12_BY2 = 0x36, | ||
545 | DBG_BLOCK_ID_TCP14_BY2 = 0x37, | ||
546 | DBG_BLOCK_ID_TCP16_BY2 = 0x38, | ||
547 | DBG_BLOCK_ID_TCP18_BY2 = 0x39, | ||
548 | DBG_BLOCK_ID_TCP20_BY2 = 0x3a, | ||
549 | DBG_BLOCK_ID_TCP22_BY2 = 0x3b, | ||
550 | DBG_BLOCK_ID_TCP_RESERVED0_BY2 = 0x3c, | ||
551 | DBG_BLOCK_ID_TCP_RESERVED2_BY2 = 0x3d, | ||
552 | DBG_BLOCK_ID_TCP_RESERVED4_BY2 = 0x3e, | ||
553 | DBG_BLOCK_ID_TCP_RESERVED6_BY2 = 0x3f, | ||
554 | DBG_BLOCK_ID_DB00_BY2 = 0x40, | ||
555 | DBG_BLOCK_ID_DB02_BY2 = 0x41, | ||
556 | DBG_BLOCK_ID_DB04_BY2 = 0x42, | ||
557 | DBG_BLOCK_ID_UNUSED28_BY2 = 0x43, | ||
558 | DBG_BLOCK_ID_DB10_BY2 = 0x44, | ||
559 | DBG_BLOCK_ID_DB12_BY2 = 0x45, | ||
560 | DBG_BLOCK_ID_DB14_BY2 = 0x46, | ||
561 | DBG_BLOCK_ID_UNUSED31_BY2 = 0x47, | ||
562 | DBG_BLOCK_ID_TCC0_BY2 = 0x48, | ||
563 | DBG_BLOCK_ID_TCC2_BY2 = 0x49, | ||
564 | DBG_BLOCK_ID_TCC4_BY2 = 0x4a, | ||
565 | DBG_BLOCK_ID_TCC6_BY2 = 0x4b, | ||
566 | DBG_BLOCK_ID_SPS00_BY2 = 0x4c, | ||
567 | DBG_BLOCK_ID_SPS02_BY2 = 0x4d, | ||
568 | DBG_BLOCK_ID_SPS11_BY2 = 0x4e, | ||
569 | DBG_BLOCK_ID_UNUSED33_BY2 = 0x4f, | ||
570 | DBG_BLOCK_ID_TA00_BY2 = 0x50, | ||
571 | DBG_BLOCK_ID_TA02_BY2 = 0x51, | ||
572 | DBG_BLOCK_ID_TA04_BY2 = 0x52, | ||
573 | DBG_BLOCK_ID_TA06_BY2 = 0x53, | ||
574 | DBG_BLOCK_ID_TA08_BY2 = 0x54, | ||
575 | DBG_BLOCK_ID_TA0A_BY2 = 0x55, | ||
576 | DBG_BLOCK_ID_UNUSED35_BY2 = 0x56, | ||
577 | DBG_BLOCK_ID_UNUSED37_BY2 = 0x57, | ||
578 | DBG_BLOCK_ID_TA10_BY2 = 0x58, | ||
579 | DBG_BLOCK_ID_TA12_BY2 = 0x59, | ||
580 | DBG_BLOCK_ID_TA14_BY2 = 0x5a, | ||
581 | DBG_BLOCK_ID_TA16_BY2 = 0x5b, | ||
582 | DBG_BLOCK_ID_TA18_BY2 = 0x5c, | ||
583 | DBG_BLOCK_ID_TA1A_BY2 = 0x5d, | ||
584 | DBG_BLOCK_ID_UNUSED39_BY2 = 0x5e, | ||
585 | DBG_BLOCK_ID_UNUSED41_BY2 = 0x5f, | ||
586 | DBG_BLOCK_ID_TD00_BY2 = 0x60, | ||
587 | DBG_BLOCK_ID_TD02_BY2 = 0x61, | ||
588 | DBG_BLOCK_ID_TD04_BY2 = 0x62, | ||
589 | DBG_BLOCK_ID_TD06_BY2 = 0x63, | ||
590 | DBG_BLOCK_ID_TD08_BY2 = 0x64, | ||
591 | DBG_BLOCK_ID_TD0A_BY2 = 0x65, | ||
592 | DBG_BLOCK_ID_UNUSED43_BY2 = 0x66, | ||
593 | DBG_BLOCK_ID_UNUSED45_BY2 = 0x67, | ||
594 | DBG_BLOCK_ID_TD10_BY2 = 0x68, | ||
595 | DBG_BLOCK_ID_TD12_BY2 = 0x69, | ||
596 | DBG_BLOCK_ID_TD14_BY2 = 0x6a, | ||
597 | DBG_BLOCK_ID_TD16_BY2 = 0x6b, | ||
598 | DBG_BLOCK_ID_TD18_BY2 = 0x6c, | ||
599 | DBG_BLOCK_ID_TD1A_BY2 = 0x6d, | ||
600 | DBG_BLOCK_ID_UNUSED47_BY2 = 0x6e, | ||
601 | DBG_BLOCK_ID_UNUSED49_BY2 = 0x6f, | ||
602 | DBG_BLOCK_ID_MCD0_BY2 = 0x70, | ||
603 | DBG_BLOCK_ID_MCD2_BY2 = 0x71, | ||
604 | DBG_BLOCK_ID_MCD4_BY2 = 0x72, | ||
605 | DBG_BLOCK_ID_UNUSED51_BY2 = 0x73, | ||
606 | } DebugBlockId_BY2; | ||
607 | typedef enum DebugBlockId_BY4 { | ||
608 | DBG_BLOCK_ID_RESERVED_BY4 = 0x0, | ||
609 | DBG_BLOCK_ID_CG_BY4 = 0x1, | ||
610 | DBG_BLOCK_ID_CSC_BY4 = 0x2, | ||
611 | DBG_BLOCK_ID_SQ_BY4 = 0x3, | ||
612 | DBG_BLOCK_ID_DMA0_BY4 = 0x4, | ||
613 | DBG_BLOCK_ID_SPIS_BY4 = 0x5, | ||
614 | DBG_BLOCK_ID_CP0_BY4 = 0x6, | ||
615 | DBG_BLOCK_ID_UVDU_BY4 = 0x7, | ||
616 | DBG_BLOCK_ID_VGT0_BY4 = 0x8, | ||
617 | DBG_BLOCK_ID_SCT0_BY4 = 0x9, | ||
618 | DBG_BLOCK_ID_TCAA_BY4 = 0xa, | ||
619 | DBG_BLOCK_ID_MCC0_BY4 = 0xb, | ||
620 | DBG_BLOCK_ID_SX0_BY4 = 0xc, | ||
621 | DBG_BLOCK_ID_UNUSED4_BY4 = 0xd, | ||
622 | DBG_BLOCK_ID_PC0_BY4 = 0xe, | ||
623 | DBG_BLOCK_ID_UNUSED10_BY4 = 0xf, | ||
624 | DBG_BLOCK_ID_SCB0_BY4 = 0x10, | ||
625 | DBG_BLOCK_ID_SCF0_BY4 = 0x11, | ||
626 | DBG_BLOCK_ID_BCI0_BY4 = 0x12, | ||
627 | DBG_BLOCK_ID_UNUSED17_BY4 = 0x13, | ||
628 | DBG_BLOCK_ID_CB00_BY4 = 0x14, | ||
629 | DBG_BLOCK_ID_CB04_BY4 = 0x15, | ||
630 | DBG_BLOCK_ID_CB10_BY4 = 0x16, | ||
631 | DBG_BLOCK_ID_CB14_BY4 = 0x17, | ||
632 | DBG_BLOCK_ID_TCP0_BY4 = 0x18, | ||
633 | DBG_BLOCK_ID_TCP4_BY4 = 0x19, | ||
634 | DBG_BLOCK_ID_TCP8_BY4 = 0x1a, | ||
635 | DBG_BLOCK_ID_TCP12_BY4 = 0x1b, | ||
636 | DBG_BLOCK_ID_TCP16_BY4 = 0x1c, | ||
637 | DBG_BLOCK_ID_TCP20_BY4 = 0x1d, | ||
638 | DBG_BLOCK_ID_TCP_RESERVED0_BY4 = 0x1e, | ||
639 | DBG_BLOCK_ID_TCP_RESERVED4_BY4 = 0x1f, | ||
640 | DBG_BLOCK_ID_DB_BY4 = 0x20, | ||
641 | DBG_BLOCK_ID_DB04_BY4 = 0x21, | ||
642 | DBG_BLOCK_ID_DB10_BY4 = 0x22, | ||
643 | DBG_BLOCK_ID_DB14_BY4 = 0x23, | ||
644 | DBG_BLOCK_ID_TCC0_BY4 = 0x24, | ||
645 | DBG_BLOCK_ID_TCC4_BY4 = 0x25, | ||
646 | DBG_BLOCK_ID_SPS00_BY4 = 0x26, | ||
647 | DBG_BLOCK_ID_SPS11_BY4 = 0x27, | ||
648 | DBG_BLOCK_ID_TA00_BY4 = 0x28, | ||
649 | DBG_BLOCK_ID_TA04_BY4 = 0x29, | ||
650 | DBG_BLOCK_ID_TA08_BY4 = 0x2a, | ||
651 | DBG_BLOCK_ID_UNUSED35_BY4 = 0x2b, | ||
652 | DBG_BLOCK_ID_TA10_BY4 = 0x2c, | ||
653 | DBG_BLOCK_ID_TA14_BY4 = 0x2d, | ||
654 | DBG_BLOCK_ID_TA18_BY4 = 0x2e, | ||
655 | DBG_BLOCK_ID_UNUSED39_BY4 = 0x2f, | ||
656 | DBG_BLOCK_ID_TD00_BY4 = 0x30, | ||
657 | DBG_BLOCK_ID_TD04_BY4 = 0x31, | ||
658 | DBG_BLOCK_ID_TD08_BY4 = 0x32, | ||
659 | DBG_BLOCK_ID_UNUSED43_BY4 = 0x33, | ||
660 | DBG_BLOCK_ID_TD10_BY4 = 0x34, | ||
661 | DBG_BLOCK_ID_TD14_BY4 = 0x35, | ||
662 | DBG_BLOCK_ID_TD18_BY4 = 0x36, | ||
663 | DBG_BLOCK_ID_UNUSED47_BY4 = 0x37, | ||
664 | DBG_BLOCK_ID_MCD0_BY4 = 0x38, | ||
665 | DBG_BLOCK_ID_MCD4_BY4 = 0x39, | ||
666 | } DebugBlockId_BY4; | ||
667 | typedef enum DebugBlockId_BY8 { | ||
668 | DBG_BLOCK_ID_RESERVED_BY8 = 0x0, | ||
669 | DBG_BLOCK_ID_CSC_BY8 = 0x1, | ||
670 | DBG_BLOCK_ID_DMA0_BY8 = 0x2, | ||
671 | DBG_BLOCK_ID_CP0_BY8 = 0x3, | ||
672 | DBG_BLOCK_ID_VGT0_BY8 = 0x4, | ||
673 | DBG_BLOCK_ID_TCAA_BY8 = 0x5, | ||
674 | DBG_BLOCK_ID_SX0_BY8 = 0x6, | ||
675 | DBG_BLOCK_ID_PC0_BY8 = 0x7, | ||
676 | DBG_BLOCK_ID_SCB0_BY8 = 0x8, | ||
677 | DBG_BLOCK_ID_BCI0_BY8 = 0x9, | ||
678 | DBG_BLOCK_ID_CB00_BY8 = 0xa, | ||
679 | DBG_BLOCK_ID_CB10_BY8 = 0xb, | ||
680 | DBG_BLOCK_ID_TCP0_BY8 = 0xc, | ||
681 | DBG_BLOCK_ID_TCP8_BY8 = 0xd, | ||
682 | DBG_BLOCK_ID_TCP16_BY8 = 0xe, | ||
683 | DBG_BLOCK_ID_TCP_RESERVED0_BY8 = 0xf, | ||
684 | DBG_BLOCK_ID_DB00_BY8 = 0x10, | ||
685 | DBG_BLOCK_ID_DB10_BY8 = 0x11, | ||
686 | DBG_BLOCK_ID_TCC0_BY8 = 0x12, | ||
687 | DBG_BLOCK_ID_SPS00_BY8 = 0x13, | ||
688 | DBG_BLOCK_ID_TA00_BY8 = 0x14, | ||
689 | DBG_BLOCK_ID_TA08_BY8 = 0x15, | ||
690 | DBG_BLOCK_ID_TA10_BY8 = 0x16, | ||
691 | DBG_BLOCK_ID_TA18_BY8 = 0x17, | ||
692 | DBG_BLOCK_ID_TD00_BY8 = 0x18, | ||
693 | DBG_BLOCK_ID_TD08_BY8 = 0x19, | ||
694 | DBG_BLOCK_ID_TD10_BY8 = 0x1a, | ||
695 | DBG_BLOCK_ID_TD18_BY8 = 0x1b, | ||
696 | DBG_BLOCK_ID_MCD0_BY8 = 0x1c, | ||
697 | } DebugBlockId_BY8; | ||
698 | typedef enum DebugBlockId_BY16 { | ||
699 | DBG_BLOCK_ID_RESERVED_BY16 = 0x0, | ||
700 | DBG_BLOCK_ID_DMA0_BY16 = 0x1, | ||
701 | DBG_BLOCK_ID_VGT0_BY16 = 0x2, | ||
702 | DBG_BLOCK_ID_SX0_BY16 = 0x3, | ||
703 | DBG_BLOCK_ID_SCB0_BY16 = 0x4, | ||
704 | DBG_BLOCK_ID_CB00_BY16 = 0x5, | ||
705 | DBG_BLOCK_ID_TCP0_BY16 = 0x6, | ||
706 | DBG_BLOCK_ID_TCP16_BY16 = 0x7, | ||
707 | DBG_BLOCK_ID_DB00_BY16 = 0x8, | ||
708 | DBG_BLOCK_ID_TCC0_BY16 = 0x9, | ||
709 | DBG_BLOCK_ID_TA00_BY16 = 0xa, | ||
710 | DBG_BLOCK_ID_TA10_BY16 = 0xb, | ||
711 | DBG_BLOCK_ID_TD00_BY16 = 0xc, | ||
712 | DBG_BLOCK_ID_TD10_BY16 = 0xd, | ||
713 | DBG_BLOCK_ID_MCD0_BY16 = 0xe, | ||
714 | } DebugBlockId_BY16; | ||
715 | typedef enum CompareRef { | ||
716 | REF_NEVER = 0x0, | ||
717 | REF_LESS = 0x1, | ||
718 | REF_EQUAL = 0x2, | ||
719 | REF_LEQUAL = 0x3, | ||
720 | REF_GREATER = 0x4, | ||
721 | REF_NOTEQUAL = 0x5, | ||
722 | REF_GEQUAL = 0x6, | ||
723 | REF_ALWAYS = 0x7, | ||
724 | } CompareRef; | ||
725 | typedef enum ReadSize { | ||
726 | READ_256_BITS = 0x0, | ||
727 | READ_512_BITS = 0x1, | ||
728 | } ReadSize; | ||
729 | typedef enum DepthFormat { | ||
730 | DEPTH_INVALID = 0x0, | ||
731 | DEPTH_16 = 0x1, | ||
732 | DEPTH_X8_24 = 0x2, | ||
733 | DEPTH_8_24 = 0x3, | ||
734 | DEPTH_X8_24_FLOAT = 0x4, | ||
735 | DEPTH_8_24_FLOAT = 0x5, | ||
736 | DEPTH_32_FLOAT = 0x6, | ||
737 | DEPTH_X24_8_32_FLOAT = 0x7, | ||
738 | } DepthFormat; | ||
739 | typedef enum ZFormat { | ||
740 | Z_INVALID = 0x0, | ||
741 | Z_16 = 0x1, | ||
742 | Z_24 = 0x2, | ||
743 | Z_32_FLOAT = 0x3, | ||
744 | } ZFormat; | ||
745 | typedef enum StencilFormat { | ||
746 | STENCIL_INVALID = 0x0, | ||
747 | STENCIL_8 = 0x1, | ||
748 | } StencilFormat; | ||
749 | typedef enum CmaskMode { | ||
750 | CMASK_CLEAR_NONE = 0x0, | ||
751 | CMASK_CLEAR_ONE = 0x1, | ||
752 | CMASK_CLEAR_ALL = 0x2, | ||
753 | CMASK_ANY_EXPANDED = 0x3, | ||
754 | CMASK_ALPHA0_FRAG1 = 0x4, | ||
755 | CMASK_ALPHA0_FRAG2 = 0x5, | ||
756 | CMASK_ALPHA0_FRAG4 = 0x6, | ||
757 | CMASK_ALPHA0_FRAGS = 0x7, | ||
758 | CMASK_ALPHA1_FRAG1 = 0x8, | ||
759 | CMASK_ALPHA1_FRAG2 = 0x9, | ||
760 | CMASK_ALPHA1_FRAG4 = 0xa, | ||
761 | CMASK_ALPHA1_FRAGS = 0xb, | ||
762 | CMASK_ALPHAX_FRAG1 = 0xc, | ||
763 | CMASK_ALPHAX_FRAG2 = 0xd, | ||
764 | CMASK_ALPHAX_FRAG4 = 0xe, | ||
765 | CMASK_ALPHAX_FRAGS = 0xf, | ||
766 | } CmaskMode; | ||
767 | typedef enum QuadExportFormat { | ||
768 | EXPORT_UNUSED = 0x0, | ||
769 | EXPORT_32_R = 0x1, | ||
770 | EXPORT_32_GR = 0x2, | ||
771 | EXPORT_32_AR = 0x3, | ||
772 | EXPORT_FP16_ABGR = 0x4, | ||
773 | EXPORT_UNSIGNED16_ABGR = 0x5, | ||
774 | EXPORT_SIGNED16_ABGR = 0x6, | ||
775 | EXPORT_32_ABGR = 0x7, | ||
776 | } QuadExportFormat; | ||
777 | typedef enum QuadExportFormatOld { | ||
778 | EXPORT_4P_32BPC_ABGR = 0x0, | ||
779 | EXPORT_4P_16BPC_ABGR = 0x1, | ||
780 | EXPORT_4P_32BPC_GR = 0x2, | ||
781 | EXPORT_4P_32BPC_AR = 0x3, | ||
782 | EXPORT_2P_32BPC_ABGR = 0x4, | ||
783 | EXPORT_8P_32BPC_R = 0x5, | ||
784 | } QuadExportFormatOld; | ||
785 | typedef enum ColorFormat { | ||
786 | COLOR_INVALID = 0x0, | ||
787 | COLOR_8 = 0x1, | ||
788 | COLOR_16 = 0x2, | ||
789 | COLOR_8_8 = 0x3, | ||
790 | COLOR_32 = 0x4, | ||
791 | COLOR_16_16 = 0x5, | ||
792 | COLOR_10_11_11 = 0x6, | ||
793 | COLOR_11_11_10 = 0x7, | ||
794 | COLOR_10_10_10_2 = 0x8, | ||
795 | COLOR_2_10_10_10 = 0x9, | ||
796 | COLOR_8_8_8_8 = 0xa, | ||
797 | COLOR_32_32 = 0xb, | ||
798 | COLOR_16_16_16_16 = 0xc, | ||
799 | COLOR_RESERVED_13 = 0xd, | ||
800 | COLOR_32_32_32_32 = 0xe, | ||
801 | COLOR_RESERVED_15 = 0xf, | ||
802 | COLOR_5_6_5 = 0x10, | ||
803 | COLOR_1_5_5_5 = 0x11, | ||
804 | COLOR_5_5_5_1 = 0x12, | ||
805 | COLOR_4_4_4_4 = 0x13, | ||
806 | COLOR_8_24 = 0x14, | ||
807 | COLOR_24_8 = 0x15, | ||
808 | COLOR_X24_8_32_FLOAT = 0x16, | ||
809 | COLOR_RESERVED_23 = 0x17, | ||
810 | } ColorFormat; | ||
811 | typedef enum SurfaceFormat { | ||
812 | FMT_INVALID = 0x0, | ||
813 | FMT_8 = 0x1, | ||
814 | FMT_16 = 0x2, | ||
815 | FMT_8_8 = 0x3, | ||
816 | FMT_32 = 0x4, | ||
817 | FMT_16_16 = 0x5, | ||
818 | FMT_10_11_11 = 0x6, | ||
819 | FMT_11_11_10 = 0x7, | ||
820 | FMT_10_10_10_2 = 0x8, | ||
821 | FMT_2_10_10_10 = 0x9, | ||
822 | FMT_8_8_8_8 = 0xa, | ||
823 | FMT_32_32 = 0xb, | ||
824 | FMT_16_16_16_16 = 0xc, | ||
825 | FMT_32_32_32 = 0xd, | ||
826 | FMT_32_32_32_32 = 0xe, | ||
827 | FMT_RESERVED_4 = 0xf, | ||
828 | FMT_5_6_5 = 0x10, | ||
829 | FMT_1_5_5_5 = 0x11, | ||
830 | FMT_5_5_5_1 = 0x12, | ||
831 | FMT_4_4_4_4 = 0x13, | ||
832 | FMT_8_24 = 0x14, | ||
833 | FMT_24_8 = 0x15, | ||
834 | FMT_X24_8_32_FLOAT = 0x16, | ||
835 | FMT_RESERVED_33 = 0x17, | ||
836 | FMT_11_11_10_FLOAT = 0x18, | ||
837 | FMT_16_FLOAT = 0x19, | ||
838 | FMT_32_FLOAT = 0x1a, | ||
839 | FMT_16_16_FLOAT = 0x1b, | ||
840 | FMT_8_24_FLOAT = 0x1c, | ||
841 | FMT_24_8_FLOAT = 0x1d, | ||
842 | FMT_32_32_FLOAT = 0x1e, | ||
843 | FMT_10_11_11_FLOAT = 0x1f, | ||
844 | FMT_16_16_16_16_FLOAT = 0x20, | ||
845 | FMT_3_3_2 = 0x21, | ||
846 | FMT_6_5_5 = 0x22, | ||
847 | FMT_32_32_32_32_FLOAT = 0x23, | ||
848 | FMT_RESERVED_36 = 0x24, | ||
849 | FMT_1 = 0x25, | ||
850 | FMT_1_REVERSED = 0x26, | ||
851 | FMT_GB_GR = 0x27, | ||
852 | FMT_BG_RG = 0x28, | ||
853 | FMT_32_AS_8 = 0x29, | ||
854 | FMT_32_AS_8_8 = 0x2a, | ||
855 | FMT_5_9_9_9_SHAREDEXP = 0x2b, | ||
856 | FMT_8_8_8 = 0x2c, | ||
857 | FMT_16_16_16 = 0x2d, | ||
858 | FMT_16_16_16_FLOAT = 0x2e, | ||
859 | FMT_4_4 = 0x2f, | ||
860 | FMT_32_32_32_FLOAT = 0x30, | ||
861 | FMT_BC1 = 0x31, | ||
862 | FMT_BC2 = 0x32, | ||
863 | FMT_BC3 = 0x33, | ||
864 | FMT_BC4 = 0x34, | ||
865 | FMT_BC5 = 0x35, | ||
866 | FMT_BC6 = 0x36, | ||
867 | FMT_BC7 = 0x37, | ||
868 | FMT_32_AS_32_32_32_32 = 0x38, | ||
869 | FMT_APC3 = 0x39, | ||
870 | FMT_APC4 = 0x3a, | ||
871 | FMT_APC5 = 0x3b, | ||
872 | FMT_APC6 = 0x3c, | ||
873 | FMT_APC7 = 0x3d, | ||
874 | FMT_CTX1 = 0x3e, | ||
875 | FMT_RESERVED_63 = 0x3f, | ||
876 | } SurfaceFormat; | ||
877 | typedef enum BUF_DATA_FORMAT { | ||
878 | BUF_DATA_FORMAT_INVALID = 0x0, | ||
879 | BUF_DATA_FORMAT_8 = 0x1, | ||
880 | BUF_DATA_FORMAT_16 = 0x2, | ||
881 | BUF_DATA_FORMAT_8_8 = 0x3, | ||
882 | BUF_DATA_FORMAT_32 = 0x4, | ||
883 | BUF_DATA_FORMAT_16_16 = 0x5, | ||
884 | BUF_DATA_FORMAT_10_11_11 = 0x6, | ||
885 | BUF_DATA_FORMAT_11_11_10 = 0x7, | ||
886 | BUF_DATA_FORMAT_10_10_10_2 = 0x8, | ||
887 | BUF_DATA_FORMAT_2_10_10_10 = 0x9, | ||
888 | BUF_DATA_FORMAT_8_8_8_8 = 0xa, | ||
889 | BUF_DATA_FORMAT_32_32 = 0xb, | ||
890 | BUF_DATA_FORMAT_16_16_16_16 = 0xc, | ||
891 | BUF_DATA_FORMAT_32_32_32 = 0xd, | ||
892 | BUF_DATA_FORMAT_32_32_32_32 = 0xe, | ||
893 | BUF_DATA_FORMAT_RESERVED_15 = 0xf, | ||
894 | } BUF_DATA_FORMAT; | ||
895 | typedef enum IMG_DATA_FORMAT { | ||
896 | IMG_DATA_FORMAT_INVALID = 0x0, | ||
897 | IMG_DATA_FORMAT_8 = 0x1, | ||
898 | IMG_DATA_FORMAT_16 = 0x2, | ||
899 | IMG_DATA_FORMAT_8_8 = 0x3, | ||
900 | IMG_DATA_FORMAT_32 = 0x4, | ||
901 | IMG_DATA_FORMAT_16_16 = 0x5, | ||
902 | IMG_DATA_FORMAT_10_11_11 = 0x6, | ||
903 | IMG_DATA_FORMAT_11_11_10 = 0x7, | ||
904 | IMG_DATA_FORMAT_10_10_10_2 = 0x8, | ||
905 | IMG_DATA_FORMAT_2_10_10_10 = 0x9, | ||
906 | IMG_DATA_FORMAT_8_8_8_8 = 0xa, | ||
907 | IMG_DATA_FORMAT_32_32 = 0xb, | ||
908 | IMG_DATA_FORMAT_16_16_16_16 = 0xc, | ||
909 | IMG_DATA_FORMAT_32_32_32 = 0xd, | ||
910 | IMG_DATA_FORMAT_32_32_32_32 = 0xe, | ||
911 | IMG_DATA_FORMAT_RESERVED_15 = 0xf, | ||
912 | IMG_DATA_FORMAT_5_6_5 = 0x10, | ||
913 | IMG_DATA_FORMAT_1_5_5_5 = 0x11, | ||
914 | IMG_DATA_FORMAT_5_5_5_1 = 0x12, | ||
915 | IMG_DATA_FORMAT_4_4_4_4 = 0x13, | ||
916 | IMG_DATA_FORMAT_8_24 = 0x14, | ||
917 | IMG_DATA_FORMAT_24_8 = 0x15, | ||
918 | IMG_DATA_FORMAT_X24_8_32 = 0x16, | ||
919 | IMG_DATA_FORMAT_RESERVED_23 = 0x17, | ||
920 | IMG_DATA_FORMAT_RESERVED_24 = 0x18, | ||
921 | IMG_DATA_FORMAT_RESERVED_25 = 0x19, | ||
922 | IMG_DATA_FORMAT_RESERVED_26 = 0x1a, | ||
923 | IMG_DATA_FORMAT_RESERVED_27 = 0x1b, | ||
924 | IMG_DATA_FORMAT_RESERVED_28 = 0x1c, | ||
925 | IMG_DATA_FORMAT_RESERVED_29 = 0x1d, | ||
926 | IMG_DATA_FORMAT_RESERVED_30 = 0x1e, | ||
927 | IMG_DATA_FORMAT_RESERVED_31 = 0x1f, | ||
928 | IMG_DATA_FORMAT_GB_GR = 0x20, | ||
929 | IMG_DATA_FORMAT_BG_RG = 0x21, | ||
930 | IMG_DATA_FORMAT_5_9_9_9 = 0x22, | ||
931 | IMG_DATA_FORMAT_BC1 = 0x23, | ||
932 | IMG_DATA_FORMAT_BC2 = 0x24, | ||
933 | IMG_DATA_FORMAT_BC3 = 0x25, | ||
934 | IMG_DATA_FORMAT_BC4 = 0x26, | ||
935 | IMG_DATA_FORMAT_BC5 = 0x27, | ||
936 | IMG_DATA_FORMAT_BC6 = 0x28, | ||
937 | IMG_DATA_FORMAT_BC7 = 0x29, | ||
938 | IMG_DATA_FORMAT_RESERVED_42 = 0x2a, | ||
939 | IMG_DATA_FORMAT_RESERVED_43 = 0x2b, | ||
940 | IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c, | ||
941 | IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d, | ||
942 | IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e, | ||
943 | IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f, | ||
944 | IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30, | ||
945 | IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31, | ||
946 | IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32, | ||
947 | IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33, | ||
948 | IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34, | ||
949 | IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35, | ||
950 | IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36, | ||
951 | IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37, | ||
952 | IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38, | ||
953 | IMG_DATA_FORMAT_4_4 = 0x39, | ||
954 | IMG_DATA_FORMAT_6_5_5 = 0x3a, | ||
955 | IMG_DATA_FORMAT_1 = 0x3b, | ||
956 | IMG_DATA_FORMAT_1_REVERSED = 0x3c, | ||
957 | IMG_DATA_FORMAT_32_AS_8 = 0x3d, | ||
958 | IMG_DATA_FORMAT_32_AS_8_8 = 0x3e, | ||
959 | IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f, | ||
960 | } IMG_DATA_FORMAT; | ||
961 | typedef enum BUF_NUM_FORMAT { | ||
962 | BUF_NUM_FORMAT_UNORM = 0x0, | ||
963 | BUF_NUM_FORMAT_SNORM = 0x1, | ||
964 | BUF_NUM_FORMAT_USCALED = 0x2, | ||
965 | BUF_NUM_FORMAT_SSCALED = 0x3, | ||
966 | BUF_NUM_FORMAT_UINT = 0x4, | ||
967 | BUF_NUM_FORMAT_SINT = 0x5, | ||
968 | BUF_NUM_FORMAT_SNORM_OGL = 0x6, | ||
969 | BUF_NUM_FORMAT_FLOAT = 0x7, | ||
970 | } BUF_NUM_FORMAT; | ||
971 | typedef enum IMG_NUM_FORMAT { | ||
972 | IMG_NUM_FORMAT_UNORM = 0x0, | ||
973 | IMG_NUM_FORMAT_SNORM = 0x1, | ||
974 | IMG_NUM_FORMAT_USCALED = 0x2, | ||
975 | IMG_NUM_FORMAT_SSCALED = 0x3, | ||
976 | IMG_NUM_FORMAT_UINT = 0x4, | ||
977 | IMG_NUM_FORMAT_SINT = 0x5, | ||
978 | IMG_NUM_FORMAT_SNORM_OGL = 0x6, | ||
979 | IMG_NUM_FORMAT_FLOAT = 0x7, | ||
980 | IMG_NUM_FORMAT_RESERVED_8 = 0x8, | ||
981 | IMG_NUM_FORMAT_SRGB = 0x9, | ||
982 | IMG_NUM_FORMAT_UBNORM = 0xa, | ||
983 | IMG_NUM_FORMAT_UBNORM_OGL = 0xb, | ||
984 | IMG_NUM_FORMAT_UBINT = 0xc, | ||
985 | IMG_NUM_FORMAT_UBSCALED = 0xd, | ||
986 | IMG_NUM_FORMAT_RESERVED_14 = 0xe, | ||
987 | IMG_NUM_FORMAT_RESERVED_15 = 0xf, | ||
988 | } IMG_NUM_FORMAT; | ||
989 | typedef enum TileType { | ||
990 | ARRAY_COLOR_TILE = 0x0, | ||
991 | ARRAY_DEPTH_TILE = 0x1, | ||
992 | } TileType; | ||
993 | typedef enum NonDispTilingOrder { | ||
994 | ADDR_SURF_MICRO_TILING_DISPLAY = 0x0, | ||
995 | ADDR_SURF_MICRO_TILING_NON_DISPLAY = 0x1, | ||
996 | } NonDispTilingOrder; | ||
997 | typedef enum MicroTileMode { | ||
998 | ADDR_SURF_DISPLAY_MICRO_TILING = 0x0, | ||
999 | ADDR_SURF_THIN_MICRO_TILING = 0x1, | ||
1000 | ADDR_SURF_DEPTH_MICRO_TILING = 0x2, | ||
1001 | ADDR_SURF_ROTATED_MICRO_TILING = 0x3, | ||
1002 | ADDR_SURF_THICK_MICRO_TILING = 0x4, | ||
1003 | } MicroTileMode; | ||
1004 | typedef enum TileSplit { | ||
1005 | ADDR_SURF_TILE_SPLIT_64B = 0x0, | ||
1006 | ADDR_SURF_TILE_SPLIT_128B = 0x1, | ||
1007 | ADDR_SURF_TILE_SPLIT_256B = 0x2, | ||
1008 | ADDR_SURF_TILE_SPLIT_512B = 0x3, | ||
1009 | ADDR_SURF_TILE_SPLIT_1KB = 0x4, | ||
1010 | ADDR_SURF_TILE_SPLIT_2KB = 0x5, | ||
1011 | ADDR_SURF_TILE_SPLIT_4KB = 0x6, | ||
1012 | } TileSplit; | ||
1013 | typedef enum SampleSplit { | ||
1014 | ADDR_SURF_SAMPLE_SPLIT_1 = 0x0, | ||
1015 | ADDR_SURF_SAMPLE_SPLIT_2 = 0x1, | ||
1016 | ADDR_SURF_SAMPLE_SPLIT_4 = 0x2, | ||
1017 | ADDR_SURF_SAMPLE_SPLIT_8 = 0x3, | ||
1018 | } SampleSplit; | ||
1019 | typedef enum PipeConfig { | ||
1020 | ADDR_SURF_P2 = 0x0, | ||
1021 | ADDR_SURF_P2_RESERVED0 = 0x1, | ||
1022 | ADDR_SURF_P2_RESERVED1 = 0x2, | ||
1023 | ADDR_SURF_P2_RESERVED2 = 0x3, | ||
1024 | ADDR_SURF_P4_8x16 = 0x4, | ||
1025 | ADDR_SURF_P4_16x16 = 0x5, | ||
1026 | ADDR_SURF_P4_16x32 = 0x6, | ||
1027 | ADDR_SURF_P4_32x32 = 0x7, | ||
1028 | ADDR_SURF_P8_16x16_8x16 = 0x8, | ||
1029 | ADDR_SURF_P8_16x32_8x16 = 0x9, | ||
1030 | ADDR_SURF_P8_32x32_8x16 = 0xa, | ||
1031 | ADDR_SURF_P8_16x32_16x16 = 0xb, | ||
1032 | ADDR_SURF_P8_32x32_16x16 = 0xc, | ||
1033 | ADDR_SURF_P8_32x32_16x32 = 0xd, | ||
1034 | ADDR_SURF_P8_32x64_32x32 = 0xe, | ||
1035 | } PipeConfig; | ||
1036 | typedef enum NumBanks { | ||
1037 | ADDR_SURF_2_BANK = 0x0, | ||
1038 | ADDR_SURF_4_BANK = 0x1, | ||
1039 | ADDR_SURF_8_BANK = 0x2, | ||
1040 | ADDR_SURF_16_BANK = 0x3, | ||
1041 | } NumBanks; | ||
1042 | typedef enum BankWidth { | ||
1043 | ADDR_SURF_BANK_WIDTH_1 = 0x0, | ||
1044 | ADDR_SURF_BANK_WIDTH_2 = 0x1, | ||
1045 | ADDR_SURF_BANK_WIDTH_4 = 0x2, | ||
1046 | ADDR_SURF_BANK_WIDTH_8 = 0x3, | ||
1047 | } BankWidth; | ||
1048 | typedef enum BankHeight { | ||
1049 | ADDR_SURF_BANK_HEIGHT_1 = 0x0, | ||
1050 | ADDR_SURF_BANK_HEIGHT_2 = 0x1, | ||
1051 | ADDR_SURF_BANK_HEIGHT_4 = 0x2, | ||
1052 | ADDR_SURF_BANK_HEIGHT_8 = 0x3, | ||
1053 | } BankHeight; | ||
1054 | typedef enum BankWidthHeight { | ||
1055 | ADDR_SURF_BANK_WH_1 = 0x0, | ||
1056 | ADDR_SURF_BANK_WH_2 = 0x1, | ||
1057 | ADDR_SURF_BANK_WH_4 = 0x2, | ||
1058 | ADDR_SURF_BANK_WH_8 = 0x3, | ||
1059 | } BankWidthHeight; | ||
1060 | typedef enum MacroTileAspect { | ||
1061 | ADDR_SURF_MACRO_ASPECT_1 = 0x0, | ||
1062 | ADDR_SURF_MACRO_ASPECT_2 = 0x1, | ||
1063 | ADDR_SURF_MACRO_ASPECT_4 = 0x2, | ||
1064 | ADDR_SURF_MACRO_ASPECT_8 = 0x3, | ||
1065 | } MacroTileAspect; | ||
1066 | typedef enum TCC_CACHE_POLICIES { | ||
1067 | TCC_CACHE_POLICY_LRU = 0x0, | ||
1068 | TCC_CACHE_POLICY_STREAM = 0x1, | ||
1069 | TCC_CACHE_POLICY_BYPASS = 0x2, | ||
1070 | } TCC_CACHE_POLICIES; | ||
1071 | typedef enum PERFMON_COUNTER_MODE { | ||
1072 | PERFMON_COUNTER_MODE_ACCUM = 0x0, | ||
1073 | PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x1, | ||
1074 | PERFMON_COUNTER_MODE_MAX = 0x2, | ||
1075 | PERFMON_COUNTER_MODE_DIRTY = 0x3, | ||
1076 | PERFMON_COUNTER_MODE_SAMPLE = 0x4, | ||
1077 | PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT = 0x5, | ||
1078 | PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT = 0x6, | ||
1079 | PERFMON_COUNTER_MODE_CYCLES_GE_HI = 0x7, | ||
1080 | PERFMON_COUNTER_MODE_CYCLES_EQ_HI = 0x8, | ||
1081 | PERFMON_COUNTER_MODE_INACTIVE_CYCLES = 0x9, | ||
1082 | PERFMON_COUNTER_MODE_RESERVED = 0xf, | ||
1083 | } PERFMON_COUNTER_MODE; | ||
1084 | typedef enum PERFMON_SPM_MODE { | ||
1085 | PERFMON_SPM_MODE_OFF = 0x0, | ||
1086 | PERFMON_SPM_MODE_16BIT_CLAMP = 0x1, | ||
1087 | PERFMON_SPM_MODE_16BIT_NO_CLAMP = 0x2, | ||
1088 | PERFMON_SPM_MODE_32BIT_CLAMP = 0x3, | ||
1089 | PERFMON_SPM_MODE_32BIT_NO_CLAMP = 0x4, | ||
1090 | PERFMON_SPM_MODE_RESERVED_5 = 0x5, | ||
1091 | PERFMON_SPM_MODE_RESERVED_6 = 0x6, | ||
1092 | PERFMON_SPM_MODE_RESERVED_7 = 0x7, | ||
1093 | PERFMON_SPM_MODE_TEST_MODE_0 = 0x8, | ||
1094 | PERFMON_SPM_MODE_TEST_MODE_1 = 0x9, | ||
1095 | PERFMON_SPM_MODE_TEST_MODE_2 = 0xa, | ||
1096 | } PERFMON_SPM_MODE; | ||
1097 | typedef enum SurfaceTiling { | ||
1098 | ARRAY_LINEAR = 0x0, | ||
1099 | ARRAY_TILED = 0x1, | ||
1100 | } SurfaceTiling; | ||
1101 | typedef enum SurfaceArray { | ||
1102 | ARRAY_1D = 0x0, | ||
1103 | ARRAY_2D = 0x1, | ||
1104 | ARRAY_3D = 0x2, | ||
1105 | ARRAY_3D_SLICE = 0x3, | ||
1106 | } SurfaceArray; | ||
1107 | typedef enum ColorArray { | ||
1108 | ARRAY_2D_ALT_COLOR = 0x0, | ||
1109 | ARRAY_2D_COLOR = 0x1, | ||
1110 | ARRAY_3D_SLICE_COLOR = 0x3, | ||
1111 | } ColorArray; | ||
1112 | typedef enum DepthArray { | ||
1113 | ARRAY_2D_ALT_DEPTH = 0x0, | ||
1114 | ARRAY_2D_DEPTH = 0x1, | ||
1115 | } DepthArray; | ||
1116 | |||
1117 | #endif /* DCE_8_0_ENUM_H */ | ||
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h index 8a2930734477..c331c9fe7b81 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h | |||
@@ -4130,6 +4130,18 @@ | |||
4130 | #define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe | 4130 | #define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe |
4131 | #define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000 | 4131 | #define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000 |
4132 | #define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10 | 4132 | #define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10 |
4133 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK_MASK 0x1 | ||
4134 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK__SHIFT 0x0 | ||
4135 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS_MASK 0x2 | ||
4136 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS__SHIFT 0x1 | ||
4137 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV_MASK 0x4 | ||
4138 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV__SHIFT 0x2 | ||
4139 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK_MASK 0x10 | ||
4140 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK__SHIFT 0x4 | ||
4141 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS_MASK 0x20 | ||
4142 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS__SHIFT 0x5 | ||
4143 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV_MASK 0x40 | ||
4144 | #define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV__SHIFT 0x6 | ||
4133 | #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1 | 4145 | #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1 |
4134 | #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0 | 4146 | #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0 |
4135 | #define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2 | 4147 | #define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2 |
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h new file mode 100644 index 000000000000..d21c6b14662f --- /dev/null +++ b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * Volcanic Islands IV SRC Register documentation | ||
3 | * | ||
4 | * Copyright (C) 2015 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included | ||
14 | * in all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN | ||
20 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
21 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | #ifndef _IVSRCID_VISLANDS30_H_ | ||
25 | #define _IVSRCID_VISLANDS30_H_ | ||
26 | |||
27 | |||
28 | // IV Source IDs | ||
29 | |||
30 | #define VISLANDS30_IV_SRCID_D1_V_UPDATE_INT 7 // 0x07 | ||
31 | #define VISLANDS30_IV_EXTID_D1_V_UPDATE_INT 0 | ||
32 | |||
33 | #define VISLANDS30_IV_SRCID_D1_GRPH_PFLIP 8 // 0x08 | ||
34 | #define VISLANDS30_IV_EXTID_D1_GRPH_PFLIP 0 | ||
35 | |||
36 | #define VISLANDS30_IV_SRCID_D2_V_UPDATE_INT 9 // 0x09 | ||
37 | #define VISLANDS30_IV_EXTID_D2_V_UPDATE_INT 0 | ||
38 | |||
39 | #define VISLANDS30_IV_SRCID_D2_GRPH_PFLIP 10 // 0x0a | ||
40 | #define VISLANDS30_IV_EXTID_D2_GRPH_PFLIP 0 | ||
41 | |||
42 | #define VISLANDS30_IV_SRCID_D3_V_UPDATE_INT 11 // 0x0b | ||
43 | #define VISLANDS30_IV_EXTID_D3_V_UPDATE_INT 0 | ||
44 | |||
45 | #define VISLANDS30_IV_SRCID_D3_GRPH_PFLIP 12 // 0x0c | ||
46 | #define VISLANDS30_IV_EXTID_D3_GRPH_PFLIP 0 | ||
47 | |||
48 | #define VISLANDS30_IV_SRCID_D4_V_UPDATE_INT 13 // 0x0d | ||
49 | #define VISLANDS30_IV_EXTID_D4_V_UPDATE_INT 0 | ||
50 | |||
51 | #define VISLANDS30_IV_SRCID_D4_GRPH_PFLIP 14 // 0x0e | ||
52 | #define VISLANDS30_IV_EXTID_D4_GRPH_PFLIP 0 | ||
53 | |||
54 | #define VISLANDS30_IV_SRCID_D5_V_UPDATE_INT 15 // 0x0f | ||
55 | #define VISLANDS30_IV_EXTID_D5_V_UPDATE_INT 0 | ||
56 | |||
57 | #define VISLANDS30_IV_SRCID_D5_GRPH_PFLIP 16 // 0x10 | ||
58 | #define VISLANDS30_IV_EXTID_D5_GRPH_PFLIP 0 | ||
59 | |||
60 | #define VISLANDS30_IV_SRCID_D6_V_UPDATE_INT 17 // 0x11 | ||
61 | #define VISLANDS30_IV_EXTID_D6_V_UPDATE_INT 0 | ||
62 | |||
63 | #define VISLANDS30_IV_SRCID_D6_GRPH_PFLIP 18 // 0x12 | ||
64 | #define VISLANDS30_IV_EXTID_D6_GRPH_PFLIP 0 | ||
65 | |||
66 | #define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A 42 // 0x2a | ||
67 | #define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_A 0 | ||
68 | |||
69 | #define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_B 42 // 0x2a | ||
70 | #define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_B 1 | ||
71 | |||
72 | #define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_C 42 // 0x2a | ||
73 | #define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_C 2 | ||
74 | |||
75 | #define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_D 42 // 0x2a | ||
76 | #define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_D 3 | ||
77 | |||
78 | #define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_E 42 // 0x2a | ||
79 | #define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_E 4 | ||
80 | |||
81 | #define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_F 42 // 0x2a | ||
82 | #define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_F 5 | ||
83 | |||
84 | #define VISLANDS30_IV_SRCID_HPD_RX_A 42 // 0x2a | ||
85 | #define VISLANDS30_IV_EXTID_HPD_RX_A 6 | ||
86 | |||
87 | #define VISLANDS30_IV_SRCID_HPD_RX_B 42 // 0x2a | ||
88 | #define VISLANDS30_IV_EXTID_HPD_RX_B 7 | ||
89 | |||
90 | #define VISLANDS30_IV_SRCID_HPD_RX_C 42 // 0x2a | ||
91 | #define VISLANDS30_IV_EXTID_HPD_RX_C 8 | ||
92 | |||
93 | #define VISLANDS30_IV_SRCID_HPD_RX_D 42 // 0x2a | ||
94 | #define VISLANDS30_IV_EXTID_HPD_RX_D 9 | ||
95 | |||
96 | #define VISLANDS30_IV_SRCID_HPD_RX_E 42 // 0x2a | ||
97 | #define VISLANDS30_IV_EXTID_HPD_RX_E 10 | ||
98 | |||
99 | #define VISLANDS30_IV_SRCID_HPD_RX_F 42 // 0x2a | ||
100 | #define VISLANDS30_IV_EXTID_HPD_RX_F 11 | ||
101 | |||
102 | #endif // _IVSRCID_VISLANDS30_H_ | ||
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index aa67244a77ae..2ee4190f8c89 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "pp_instance.h" | 29 | #include "pp_instance.h" |
30 | #include "power_state.h" | 30 | #include "power_state.h" |
31 | #include "eventmanager.h" | 31 | #include "eventmanager.h" |
32 | #include "pp_debug.h" | ||
32 | 33 | ||
33 | #define PP_CHECK(handle) \ | 34 | #define PP_CHECK(handle) \ |
34 | do { \ | 35 | do { \ |
@@ -433,7 +434,10 @@ enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle) | |||
433 | case PP_StateUILabel_Performance: | 434 | case PP_StateUILabel_Performance: |
434 | return POWER_STATE_TYPE_PERFORMANCE; | 435 | return POWER_STATE_TYPE_PERFORMANCE; |
435 | default: | 436 | default: |
436 | return POWER_STATE_TYPE_DEFAULT; | 437 | if (state->classification.flags & PP_StateClassificationFlag_Boot) |
438 | return POWER_STATE_TYPE_INTERNAL_BOOT; | ||
439 | else | ||
440 | return POWER_STATE_TYPE_DEFAULT; | ||
437 | } | 441 | } |
438 | } | 442 | } |
439 | 443 | ||
@@ -535,6 +539,112 @@ static int pp_dpm_get_temperature(void *handle) | |||
535 | return hwmgr->hwmgr_func->get_temperature(hwmgr); | 539 | return hwmgr->hwmgr_func->get_temperature(hwmgr); |
536 | } | 540 | } |
537 | 541 | ||
542 | static int pp_dpm_get_pp_num_states(void *handle, | ||
543 | struct pp_states_info *data) | ||
544 | { | ||
545 | struct pp_hwmgr *hwmgr; | ||
546 | int i; | ||
547 | |||
548 | if (!handle) | ||
549 | return -EINVAL; | ||
550 | |||
551 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
552 | |||
553 | if (hwmgr == NULL || hwmgr->ps == NULL) | ||
554 | return -EINVAL; | ||
555 | |||
556 | data->nums = hwmgr->num_ps; | ||
557 | |||
558 | for (i = 0; i < hwmgr->num_ps; i++) { | ||
559 | struct pp_power_state *state = (struct pp_power_state *) | ||
560 | ((unsigned long)hwmgr->ps + i * hwmgr->ps_size); | ||
561 | switch (state->classification.ui_label) { | ||
562 | case PP_StateUILabel_Battery: | ||
563 | data->states[i] = POWER_STATE_TYPE_BATTERY; | ||
564 | break; | ||
565 | case PP_StateUILabel_Balanced: | ||
566 | data->states[i] = POWER_STATE_TYPE_BALANCED; | ||
567 | break; | ||
568 | case PP_StateUILabel_Performance: | ||
569 | data->states[i] = POWER_STATE_TYPE_PERFORMANCE; | ||
570 | break; | ||
571 | default: | ||
572 | if (state->classification.flags & PP_StateClassificationFlag_Boot) | ||
573 | data->states[i] = POWER_STATE_TYPE_INTERNAL_BOOT; | ||
574 | else | ||
575 | data->states[i] = POWER_STATE_TYPE_DEFAULT; | ||
576 | } | ||
577 | } | ||
578 | |||
579 | return 0; | ||
580 | } | ||
581 | |||
582 | static int pp_dpm_get_pp_table(void *handle, char **table) | ||
583 | { | ||
584 | struct pp_hwmgr *hwmgr; | ||
585 | |||
586 | if (!handle) | ||
587 | return -EINVAL; | ||
588 | |||
589 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
590 | |||
591 | if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || | ||
592 | hwmgr->hwmgr_func->get_pp_table == NULL) | ||
593 | return -EINVAL; | ||
594 | |||
595 | return hwmgr->hwmgr_func->get_pp_table(hwmgr, table); | ||
596 | } | ||
597 | |||
598 | static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size) | ||
599 | { | ||
600 | struct pp_hwmgr *hwmgr; | ||
601 | |||
602 | if (!handle) | ||
603 | return -EINVAL; | ||
604 | |||
605 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
606 | |||
607 | if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || | ||
608 | hwmgr->hwmgr_func->set_pp_table == NULL) | ||
609 | return -EINVAL; | ||
610 | |||
611 | return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size); | ||
612 | } | ||
613 | |||
614 | static int pp_dpm_force_clock_level(void *handle, | ||
615 | enum pp_clock_type type, int level) | ||
616 | { | ||
617 | struct pp_hwmgr *hwmgr; | ||
618 | |||
619 | if (!handle) | ||
620 | return -EINVAL; | ||
621 | |||
622 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
623 | |||
624 | if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || | ||
625 | hwmgr->hwmgr_func->force_clock_level == NULL) | ||
626 | return -EINVAL; | ||
627 | |||
628 | return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, level); | ||
629 | } | ||
630 | |||
631 | static int pp_dpm_print_clock_levels(void *handle, | ||
632 | enum pp_clock_type type, char *buf) | ||
633 | { | ||
634 | struct pp_hwmgr *hwmgr; | ||
635 | |||
636 | if (!handle) | ||
637 | return -EINVAL; | ||
638 | |||
639 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
640 | |||
641 | if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || | ||
642 | hwmgr->hwmgr_func->print_clock_levels == NULL) | ||
643 | return -EINVAL; | ||
644 | |||
645 | return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf); | ||
646 | } | ||
647 | |||
538 | const struct amd_powerplay_funcs pp_dpm_funcs = { | 648 | const struct amd_powerplay_funcs pp_dpm_funcs = { |
539 | .get_temperature = pp_dpm_get_temperature, | 649 | .get_temperature = pp_dpm_get_temperature, |
540 | .load_firmware = pp_dpm_load_fw, | 650 | .load_firmware = pp_dpm_load_fw, |
@@ -552,6 +662,11 @@ const struct amd_powerplay_funcs pp_dpm_funcs = { | |||
552 | .get_fan_control_mode = pp_dpm_get_fan_control_mode, | 662 | .get_fan_control_mode = pp_dpm_get_fan_control_mode, |
553 | .set_fan_speed_percent = pp_dpm_set_fan_speed_percent, | 663 | .set_fan_speed_percent = pp_dpm_set_fan_speed_percent, |
554 | .get_fan_speed_percent = pp_dpm_get_fan_speed_percent, | 664 | .get_fan_speed_percent = pp_dpm_get_fan_speed_percent, |
665 | .get_pp_num_states = pp_dpm_get_pp_num_states, | ||
666 | .get_pp_table = pp_dpm_get_pp_table, | ||
667 | .set_pp_table = pp_dpm_set_pp_table, | ||
668 | .force_clock_level = pp_dpm_force_clock_level, | ||
669 | .print_clock_levels = pp_dpm_print_clock_levels, | ||
555 | }; | 670 | }; |
556 | 671 | ||
557 | static int amd_pp_instance_init(struct amd_pp_init *pp_init, | 672 | static int amd_pp_instance_init(struct amd_pp_init *pp_init, |
@@ -635,10 +750,10 @@ int amd_powerplay_fini(void *handle) | |||
635 | 750 | ||
636 | /* export this function to DAL */ | 751 | /* export this function to DAL */ |
637 | 752 | ||
638 | int amd_powerplay_display_configuration_change(void *handle, const void *input) | 753 | int amd_powerplay_display_configuration_change(void *handle, |
754 | const struct amd_pp_display_configuration *display_config) | ||
639 | { | 755 | { |
640 | struct pp_hwmgr *hwmgr; | 756 | struct pp_hwmgr *hwmgr; |
641 | const struct amd_pp_display_configuration *display_config = input; | ||
642 | 757 | ||
643 | PP_CHECK((struct pp_instance *)handle); | 758 | PP_CHECK((struct pp_instance *)handle); |
644 | 759 | ||
@@ -650,7 +765,7 @@ int amd_powerplay_display_configuration_change(void *handle, const void *input) | |||
650 | } | 765 | } |
651 | 766 | ||
652 | int amd_powerplay_get_display_power_level(void *handle, | 767 | int amd_powerplay_get_display_power_level(void *handle, |
653 | struct amd_pp_dal_clock_info *output) | 768 | struct amd_pp_simple_clock_info *output) |
654 | { | 769 | { |
655 | struct pp_hwmgr *hwmgr; | 770 | struct pp_hwmgr *hwmgr; |
656 | 771 | ||
@@ -663,3 +778,86 @@ int amd_powerplay_get_display_power_level(void *handle, | |||
663 | 778 | ||
664 | return phm_get_dal_power_level(hwmgr, output); | 779 | return phm_get_dal_power_level(hwmgr, output); |
665 | } | 780 | } |
781 | |||
782 | int amd_powerplay_get_current_clocks(void *handle, | ||
783 | struct amd_pp_clock_info *clocks) | ||
784 | { | ||
785 | struct pp_hwmgr *hwmgr; | ||
786 | struct amd_pp_simple_clock_info simple_clocks; | ||
787 | struct pp_clock_info hw_clocks; | ||
788 | |||
789 | PP_CHECK((struct pp_instance *)handle); | ||
790 | |||
791 | if (clocks == NULL) | ||
792 | return -EINVAL; | ||
793 | |||
794 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
795 | |||
796 | phm_get_dal_power_level(hwmgr, &simple_clocks); | ||
797 | |||
798 | if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PowerContainment)) { | ||
799 | if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_PowerContainment)) | ||
800 | PP_ASSERT_WITH_CODE(0, "Error in PHM_GetPowerContainmentClockInfo", return -1); | ||
801 | } else { | ||
802 | if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_Activity)) | ||
803 | PP_ASSERT_WITH_CODE(0, "Error in PHM_GetClockInfo", return -1); | ||
804 | } | ||
805 | |||
806 | clocks->min_engine_clock = hw_clocks.min_eng_clk; | ||
807 | clocks->max_engine_clock = hw_clocks.max_eng_clk; | ||
808 | clocks->min_memory_clock = hw_clocks.min_mem_clk; | ||
809 | clocks->max_memory_clock = hw_clocks.max_mem_clk; | ||
810 | clocks->min_bus_bandwidth = hw_clocks.min_bus_bandwidth; | ||
811 | clocks->max_bus_bandwidth = hw_clocks.max_bus_bandwidth; | ||
812 | |||
813 | clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk; | ||
814 | clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk; | ||
815 | |||
816 | clocks->max_clocks_state = simple_clocks.level; | ||
817 | |||
818 | if (0 == phm_get_current_shallow_sleep_clocks(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks)) { | ||
819 | clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk; | ||
820 | clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk; | ||
821 | } | ||
822 | |||
823 | return 0; | ||
824 | |||
825 | } | ||
826 | |||
827 | int amd_powerplay_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks) | ||
828 | { | ||
829 | int result = -1; | ||
830 | |||
831 | struct pp_hwmgr *hwmgr; | ||
832 | |||
833 | PP_CHECK((struct pp_instance *)handle); | ||
834 | |||
835 | if (clocks == NULL) | ||
836 | return -EINVAL; | ||
837 | |||
838 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
839 | |||
840 | result = phm_get_clock_by_type(hwmgr, type, clocks); | ||
841 | |||
842 | return result; | ||
843 | } | ||
844 | |||
845 | int amd_powerplay_get_display_mode_validation_clocks(void *handle, | ||
846 | struct amd_pp_simple_clock_info *clocks) | ||
847 | { | ||
848 | int result = -1; | ||
849 | struct pp_hwmgr *hwmgr; | ||
850 | |||
851 | PP_CHECK((struct pp_instance *)handle); | ||
852 | |||
853 | if (clocks == NULL) | ||
854 | return -EINVAL; | ||
855 | |||
856 | hwmgr = ((struct pp_instance *)handle)->hwmgr; | ||
857 | |||
858 | if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState)) | ||
859 | result = phm_get_max_high_clocks(hwmgr, clocks); | ||
860 | |||
861 | return result; | ||
862 | } | ||
863 | |||
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index 0874ab42ee95..ef1daf1251c7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c | |||
@@ -715,7 +715,6 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr, | |||
715 | unsigned long clock = 0; | 715 | unsigned long clock = 0; |
716 | unsigned long level; | 716 | unsigned long level; |
717 | unsigned long stable_pstate_sclk; | 717 | unsigned long stable_pstate_sclk; |
718 | struct PP_Clocks clocks; | ||
719 | unsigned long percentage; | 718 | unsigned long percentage; |
720 | 719 | ||
721 | cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk; | 720 | cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk; |
@@ -726,8 +725,9 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr, | |||
726 | else | 725 | else |
727 | cz_hwmgr->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk; | 726 | cz_hwmgr->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk; |
728 | 727 | ||
729 | /*PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks);*/ | 728 | clock = hwmgr->display_config.min_core_set_clock; |
730 | clock = clocks.engineClock; | 729 | if (clock == 0) |
730 | printk(KERN_ERR "[ powerplay ] min_core_set_clock not set\n"); | ||
731 | 731 | ||
732 | if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) { | 732 | if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) { |
733 | cz_hwmgr->sclk_dpm.hard_min_clk = clock; | 733 | cz_hwmgr->sclk_dpm.hard_min_clk = clock; |
@@ -883,9 +883,9 @@ static int cz_tf_update_low_mem_pstate(struct pp_hwmgr *hwmgr, | |||
883 | 883 | ||
884 | if (pnew_state->action == FORCE_HIGH) | 884 | if (pnew_state->action == FORCE_HIGH) |
885 | cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); | 885 | cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); |
886 | else if(pnew_state->action == CANCEL_FORCE_HIGH) | 886 | else if (pnew_state->action == CANCEL_FORCE_HIGH) |
887 | cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); | 887 | cz_nbdpm_pstate_enable_disable(hwmgr, true, disable_switch); |
888 | else | 888 | else |
889 | cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch); | 889 | cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch); |
890 | } | 890 | } |
891 | return 0; | 891 | return 0; |
@@ -1110,9 +1110,10 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, | |||
1110 | cast_const_PhwCzPowerState(&pcurrent_ps->hardware); | 1110 | cast_const_PhwCzPowerState(&pcurrent_ps->hardware); |
1111 | 1111 | ||
1112 | struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); | 1112 | struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); |
1113 | struct PP_Clocks clocks; | 1113 | struct PP_Clocks clocks = {0, 0, 0, 0}; |
1114 | bool force_high; | 1114 | bool force_high; |
1115 | unsigned long num_of_active_displays = 4; | 1115 | uint32_t num_of_active_displays = 0; |
1116 | struct cgs_display_info info = {0}; | ||
1116 | 1117 | ||
1117 | cz_ps->evclk = hwmgr->vce_arbiter.evclk; | 1118 | cz_ps->evclk = hwmgr->vce_arbiter.evclk; |
1118 | cz_ps->ecclk = hwmgr->vce_arbiter.ecclk; | 1119 | cz_ps->ecclk = hwmgr->vce_arbiter.ecclk; |
@@ -1124,12 +1125,15 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, | |||
1124 | 1125 | ||
1125 | cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); | 1126 | cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); |
1126 | 1127 | ||
1127 | /* to do PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks); */ | 1128 | clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ? |
1128 | /* PECI_GetNumberOfActiveDisplays(pHwMgr->pPECI, &numOfActiveDisplays); */ | 1129 | hwmgr->display_config.min_mem_set_clock : |
1130 | cz_hwmgr->sys_info.nbp_memory_clock[1]; | ||
1131 | |||
1132 | cgs_get_active_displays_info(hwmgr->device, &info); | ||
1133 | num_of_active_displays = info.display_count; | ||
1134 | |||
1129 | if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) | 1135 | if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) |
1130 | clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; | 1136 | clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; |
1131 | else | ||
1132 | clocks.memoryClock = 0; | ||
1133 | 1137 | ||
1134 | if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk) | 1138 | if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk) |
1135 | clocks.memoryClock = hwmgr->gfx_arbiter.mclk; | 1139 | clocks.memoryClock = hwmgr->gfx_arbiter.mclk; |
@@ -1199,6 +1203,7 @@ static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr) | |||
1199 | printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n"); | 1203 | printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n"); |
1200 | return result; | 1204 | return result; |
1201 | } | 1205 | } |
1206 | hwmgr->platform_descriptor.hardwareActivityPerformanceLevels = CZ_MAX_HARDWARE_POWERLEVELS; | ||
1202 | 1207 | ||
1203 | result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings)); | 1208 | result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings)); |
1204 | if (result != 0) { | 1209 | if (result != 0) { |
@@ -1630,10 +1635,10 @@ static void cz_hw_print_display_cfg( | |||
1630 | & PWRMGT_SEPARATION_TIME_MASK) | 1635 | & PWRMGT_SEPARATION_TIME_MASK) |
1631 | << PWRMGT_SEPARATION_TIME_SHIFT; | 1636 | << PWRMGT_SEPARATION_TIME_SHIFT; |
1632 | 1637 | ||
1633 | data|= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0) | 1638 | data |= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0) |
1634 | << PWRMGT_DISABLE_CPU_CSTATES_SHIFT; | 1639 | << PWRMGT_DISABLE_CPU_CSTATES_SHIFT; |
1635 | 1640 | ||
1636 | data|= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0) | 1641 | data |= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0) |
1637 | << PWRMGT_DISABLE_CPU_PSTATES_SHIFT; | 1642 | << PWRMGT_DISABLE_CPU_PSTATES_SHIFT; |
1638 | 1643 | ||
1639 | PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n", | 1644 | PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n", |
@@ -1648,9 +1653,9 @@ static void cz_hw_print_display_cfg( | |||
1648 | } | 1653 | } |
1649 | 1654 | ||
1650 | 1655 | ||
1651 | static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, | 1656 | static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, |
1652 | bool cc6_disable, bool pstate_disable, bool pstate_switch_disable) | 1657 | bool cc6_disable, bool pstate_disable, bool pstate_switch_disable) |
1653 | { | 1658 | { |
1654 | struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend); | 1659 | struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend); |
1655 | 1660 | ||
1656 | if (separation_time != | 1661 | if (separation_time != |
@@ -1678,20 +1683,19 @@ static void cz_hw_print_display_cfg( | |||
1678 | return 0; | 1683 | return 0; |
1679 | } | 1684 | } |
1680 | 1685 | ||
1681 | static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr, | 1686 | static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr, |
1682 | struct amd_pp_dal_clock_info*info) | 1687 | struct amd_pp_simple_clock_info *info) |
1683 | { | 1688 | { |
1684 | uint32_t i; | 1689 | uint32_t i; |
1685 | const struct phm_clock_voltage_dependency_table * table = | 1690 | const struct phm_clock_voltage_dependency_table *table = |
1686 | hwmgr->dyn_state.vddc_dep_on_dal_pwrl; | 1691 | hwmgr->dyn_state.vddc_dep_on_dal_pwrl; |
1687 | const struct phm_clock_and_voltage_limits* limits = | 1692 | const struct phm_clock_and_voltage_limits *limits = |
1688 | &hwmgr->dyn_state.max_clock_voltage_on_ac; | 1693 | &hwmgr->dyn_state.max_clock_voltage_on_ac; |
1689 | 1694 | ||
1690 | info->engine_max_clock = limits->sclk; | 1695 | info->engine_max_clock = limits->sclk; |
1691 | info->memory_max_clock = limits->mclk; | 1696 | info->memory_max_clock = limits->mclk; |
1692 | 1697 | ||
1693 | for (i = table->count - 1; i > 0; i--) { | 1698 | for (i = table->count - 1; i > 0; i--) { |
1694 | |||
1695 | if (limits->vddc >= table->entries[i].v) { | 1699 | if (limits->vddc >= table->entries[i].v) { |
1696 | info->level = table->entries[i].clk; | 1700 | info->level = table->entries[i].clk; |
1697 | return 0; | 1701 | return 0; |
@@ -1700,6 +1704,158 @@ static void cz_hw_print_display_cfg( | |||
1700 | return -EINVAL; | 1704 | return -EINVAL; |
1701 | } | 1705 | } |
1702 | 1706 | ||
1707 | static int cz_force_clock_level(struct pp_hwmgr *hwmgr, | ||
1708 | enum pp_clock_type type, int level) | ||
1709 | { | ||
1710 | if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) | ||
1711 | return -EINVAL; | ||
1712 | |||
1713 | switch (type) { | ||
1714 | case PP_SCLK: | ||
1715 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
1716 | PPSMC_MSG_SetSclkSoftMin, | ||
1717 | (1 << level)); | ||
1718 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
1719 | PPSMC_MSG_SetSclkSoftMax, | ||
1720 | (1 << level)); | ||
1721 | break; | ||
1722 | default: | ||
1723 | break; | ||
1724 | } | ||
1725 | |||
1726 | return 0; | ||
1727 | } | ||
1728 | |||
1729 | static int cz_print_clock_levels(struct pp_hwmgr *hwmgr, | ||
1730 | enum pp_clock_type type, char *buf) | ||
1731 | { | ||
1732 | struct phm_clock_voltage_dependency_table *sclk_table = | ||
1733 | hwmgr->dyn_state.vddc_dependency_on_sclk; | ||
1734 | int i, now, size = 0; | ||
1735 | |||
1736 | switch (type) { | ||
1737 | case PP_SCLK: | ||
1738 | now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device, | ||
1739 | CGS_IND_REG__SMC, | ||
1740 | ixTARGET_AND_CURRENT_PROFILE_INDEX), | ||
1741 | TARGET_AND_CURRENT_PROFILE_INDEX, | ||
1742 | CURR_SCLK_INDEX); | ||
1743 | |||
1744 | for (i = 0; i < sclk_table->count; i++) | ||
1745 | size += sprintf(buf + size, "%d: %uMhz %s\n", | ||
1746 | i, sclk_table->entries[i].clk / 100, | ||
1747 | (i == now) ? "*" : ""); | ||
1748 | break; | ||
1749 | default: | ||
1750 | break; | ||
1751 | } | ||
1752 | return size; | ||
1753 | } | ||
1754 | |||
1755 | static int cz_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, | ||
1756 | PHM_PerformanceLevelDesignation designation, uint32_t index, | ||
1757 | PHM_PerformanceLevel *level) | ||
1758 | { | ||
1759 | const struct cz_power_state *ps; | ||
1760 | struct cz_hwmgr *data; | ||
1761 | uint32_t level_index; | ||
1762 | uint32_t i; | ||
1763 | |||
1764 | if (level == NULL || hwmgr == NULL || state == NULL) | ||
1765 | return -EINVAL; | ||
1766 | |||
1767 | data = (struct cz_hwmgr *)(hwmgr->backend); | ||
1768 | ps = cast_const_PhwCzPowerState(state); | ||
1769 | |||
1770 | level_index = index > ps->level - 1 ? ps->level - 1 : index; | ||
1771 | |||
1772 | level->coreClock = ps->levels[level_index].engineClock; | ||
1773 | |||
1774 | if (designation == PHM_PerformanceLevelDesignation_PowerContainment) { | ||
1775 | for (i = 1; i < ps->level; i++) { | ||
1776 | if (ps->levels[i].engineClock > data->dce_slow_sclk_threshold) { | ||
1777 | level->coreClock = ps->levels[i].engineClock; | ||
1778 | break; | ||
1779 | } | ||
1780 | } | ||
1781 | } | ||
1782 | |||
1783 | if (level_index == 0) | ||
1784 | level->memory_clock = data->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1]; | ||
1785 | else | ||
1786 | level->memory_clock = data->sys_info.nbp_memory_clock[0]; | ||
1787 | |||
1788 | level->vddc = (cz_convert_8Bit_index_to_voltage(hwmgr, ps->levels[level_index].vddcIndex) + 2) / 4; | ||
1789 | level->nonLocalMemoryFreq = 0; | ||
1790 | level->nonLocalMemoryWidth = 0; | ||
1791 | |||
1792 | return 0; | ||
1793 | } | ||
1794 | |||
1795 | static int cz_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, | ||
1796 | const struct pp_hw_power_state *state, struct pp_clock_info *clock_info) | ||
1797 | { | ||
1798 | const struct cz_power_state *ps = cast_const_PhwCzPowerState(state); | ||
1799 | |||
1800 | clock_info->min_eng_clk = ps->levels[0].engineClock / (1 << (ps->levels[0].ssDividerIndex)); | ||
1801 | clock_info->max_eng_clk = ps->levels[ps->level - 1].engineClock / (1 << (ps->levels[ps->level - 1].ssDividerIndex)); | ||
1802 | |||
1803 | return 0; | ||
1804 | } | ||
1805 | |||
1806 | static int cz_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, | ||
1807 | struct amd_pp_clocks *clocks) | ||
1808 | { | ||
1809 | struct cz_hwmgr *data = (struct cz_hwmgr *)(hwmgr->backend); | ||
1810 | int i; | ||
1811 | struct phm_clock_voltage_dependency_table *table; | ||
1812 | |||
1813 | clocks->count = cz_get_max_sclk_level(hwmgr); | ||
1814 | switch (type) { | ||
1815 | case amd_pp_disp_clock: | ||
1816 | for (i = 0; i < clocks->count; i++) | ||
1817 | clocks->clock[i] = data->sys_info.display_clock[i]; | ||
1818 | break; | ||
1819 | case amd_pp_sys_clock: | ||
1820 | table = hwmgr->dyn_state.vddc_dependency_on_sclk; | ||
1821 | for (i = 0; i < clocks->count; i++) | ||
1822 | clocks->clock[i] = table->entries[i].clk; | ||
1823 | break; | ||
1824 | case amd_pp_mem_clock: | ||
1825 | clocks->count = CZ_NUM_NBPMEMORYCLOCK; | ||
1826 | for (i = 0; i < clocks->count; i++) | ||
1827 | clocks->clock[i] = data->sys_info.nbp_memory_clock[clocks->count - 1 - i]; | ||
1828 | break; | ||
1829 | default: | ||
1830 | return -1; | ||
1831 | } | ||
1832 | |||
1833 | return 0; | ||
1834 | } | ||
1835 | |||
1836 | static int cz_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks) | ||
1837 | { | ||
1838 | struct phm_clock_voltage_dependency_table *table = | ||
1839 | hwmgr->dyn_state.vddc_dependency_on_sclk; | ||
1840 | unsigned long level; | ||
1841 | const struct phm_clock_and_voltage_limits *limits = | ||
1842 | &hwmgr->dyn_state.max_clock_voltage_on_ac; | ||
1843 | |||
1844 | if ((NULL == table) || (table->count <= 0) || (clocks == NULL)) | ||
1845 | return -EINVAL; | ||
1846 | |||
1847 | level = cz_get_max_sclk_level(hwmgr) - 1; | ||
1848 | |||
1849 | if (level < table->count) | ||
1850 | clocks->engine_max_clock = table->entries[level].clk; | ||
1851 | else | ||
1852 | clocks->engine_max_clock = table->entries[table->count - 1].clk; | ||
1853 | |||
1854 | clocks->memory_max_clock = limits->mclk; | ||
1855 | |||
1856 | return 0; | ||
1857 | } | ||
1858 | |||
1703 | static const struct pp_hwmgr_func cz_hwmgr_funcs = { | 1859 | static const struct pp_hwmgr_func cz_hwmgr_funcs = { |
1704 | .backend_init = cz_hwmgr_backend_init, | 1860 | .backend_init = cz_hwmgr_backend_init, |
1705 | .backend_fini = cz_hwmgr_backend_fini, | 1861 | .backend_fini = cz_hwmgr_backend_fini, |
@@ -1718,7 +1874,13 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = { | |||
1718 | .print_current_perforce_level = cz_print_current_perforce_level, | 1874 | .print_current_perforce_level = cz_print_current_perforce_level, |
1719 | .set_cpu_power_state = cz_set_cpu_power_state, | 1875 | .set_cpu_power_state = cz_set_cpu_power_state, |
1720 | .store_cc6_data = cz_store_cc6_data, | 1876 | .store_cc6_data = cz_store_cc6_data, |
1721 | .get_dal_power_level= cz_get_dal_power_level, | 1877 | .force_clock_level = cz_force_clock_level, |
1878 | .print_clock_levels = cz_print_clock_levels, | ||
1879 | .get_dal_power_level = cz_get_dal_power_level, | ||
1880 | .get_performance_level = cz_get_performance_level, | ||
1881 | .get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks, | ||
1882 | .get_clock_by_type = cz_get_clock_by_type, | ||
1883 | .get_max_high_clocks = cz_get_max_high_clocks, | ||
1722 | }; | 1884 | }; |
1723 | 1885 | ||
1724 | int cz_hwmgr_init(struct pp_hwmgr *hwmgr) | 1886 | int cz_hwmgr_init(struct pp_hwmgr *hwmgr) |
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c index 28031a7eddba..5cca2ecc6bea 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c | |||
@@ -5073,6 +5073,125 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr) | |||
5073 | CG_FDO_CTRL2, FDO_PWM_MODE); | 5073 | CG_FDO_CTRL2, FDO_PWM_MODE); |
5074 | } | 5074 | } |
5075 | 5075 | ||
5076 | static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table) | ||
5077 | { | ||
5078 | struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); | ||
5079 | |||
5080 | *table = (char *)&data->smc_state_table; | ||
5081 | |||
5082 | return sizeof(struct SMU73_Discrete_DpmTable); | ||
5083 | } | ||
5084 | |||
5085 | static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size) | ||
5086 | { | ||
5087 | struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); | ||
5088 | |||
5089 | void *table = (void *)&data->smc_state_table; | ||
5090 | |||
5091 | memcpy(table, buf, size); | ||
5092 | |||
5093 | return 0; | ||
5094 | } | ||
5095 | |||
5096 | static int fiji_force_clock_level(struct pp_hwmgr *hwmgr, | ||
5097 | enum pp_clock_type type, int level) | ||
5098 | { | ||
5099 | struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); | ||
5100 | |||
5101 | if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) | ||
5102 | return -EINVAL; | ||
5103 | |||
5104 | switch (type) { | ||
5105 | case PP_SCLK: | ||
5106 | if (!data->sclk_dpm_key_disabled) | ||
5107 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
5108 | PPSMC_MSG_SCLKDPM_SetEnabledMask, | ||
5109 | (1 << level)); | ||
5110 | break; | ||
5111 | case PP_MCLK: | ||
5112 | if (!data->mclk_dpm_key_disabled) | ||
5113 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
5114 | PPSMC_MSG_MCLKDPM_SetEnabledMask, | ||
5115 | (1 << level)); | ||
5116 | break; | ||
5117 | case PP_PCIE: | ||
5118 | if (!data->pcie_dpm_key_disabled) | ||
5119 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
5120 | PPSMC_MSG_PCIeDPM_ForceLevel, | ||
5121 | (1 << level)); | ||
5122 | break; | ||
5123 | default: | ||
5124 | break; | ||
5125 | } | ||
5126 | |||
5127 | return 0; | ||
5128 | } | ||
5129 | |||
5130 | static int fiji_print_clock_levels(struct pp_hwmgr *hwmgr, | ||
5131 | enum pp_clock_type type, char *buf) | ||
5132 | { | ||
5133 | struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); | ||
5134 | struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); | ||
5135 | struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); | ||
5136 | struct fiji_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table); | ||
5137 | int i, now, size = 0; | ||
5138 | uint32_t clock, pcie_speed; | ||
5139 | |||
5140 | switch (type) { | ||
5141 | case PP_SCLK: | ||
5142 | smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency); | ||
5143 | clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); | ||
5144 | |||
5145 | for (i = 0; i < sclk_table->count; i++) { | ||
5146 | if (clock > sclk_table->dpm_levels[i].value) | ||
5147 | continue; | ||
5148 | break; | ||
5149 | } | ||
5150 | now = i; | ||
5151 | |||
5152 | for (i = 0; i < sclk_table->count; i++) | ||
5153 | size += sprintf(buf + size, "%d: %uMhz %s\n", | ||
5154 | i, sclk_table->dpm_levels[i].value / 100, | ||
5155 | (i == now) ? "*" : ""); | ||
5156 | break; | ||
5157 | case PP_MCLK: | ||
5158 | smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency); | ||
5159 | clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); | ||
5160 | |||
5161 | for (i = 0; i < mclk_table->count; i++) { | ||
5162 | if (clock > mclk_table->dpm_levels[i].value) | ||
5163 | continue; | ||
5164 | break; | ||
5165 | } | ||
5166 | now = i; | ||
5167 | |||
5168 | for (i = 0; i < mclk_table->count; i++) | ||
5169 | size += sprintf(buf + size, "%d: %uMhz %s\n", | ||
5170 | i, mclk_table->dpm_levels[i].value / 100, | ||
5171 | (i == now) ? "*" : ""); | ||
5172 | break; | ||
5173 | case PP_PCIE: | ||
5174 | pcie_speed = fiji_get_current_pcie_speed(hwmgr); | ||
5175 | for (i = 0; i < pcie_table->count; i++) { | ||
5176 | if (pcie_speed != pcie_table->dpm_levels[i].value) | ||
5177 | continue; | ||
5178 | break; | ||
5179 | } | ||
5180 | now = i; | ||
5181 | |||
5182 | for (i = 0; i < pcie_table->count; i++) | ||
5183 | size += sprintf(buf + size, "%d: %s %s\n", i, | ||
5184 | (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" : | ||
5185 | (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" : | ||
5186 | (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "", | ||
5187 | (i == now) ? "*" : ""); | ||
5188 | break; | ||
5189 | default: | ||
5190 | break; | ||
5191 | } | ||
5192 | return size; | ||
5193 | } | ||
5194 | |||
5076 | static const struct pp_hwmgr_func fiji_hwmgr_funcs = { | 5195 | static const struct pp_hwmgr_func fiji_hwmgr_funcs = { |
5077 | .backend_init = &fiji_hwmgr_backend_init, | 5196 | .backend_init = &fiji_hwmgr_backend_init, |
5078 | .backend_fini = &tonga_hwmgr_backend_fini, | 5197 | .backend_fini = &tonga_hwmgr_backend_fini, |
@@ -5108,6 +5227,10 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = { | |||
5108 | .register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt, | 5227 | .register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt, |
5109 | .set_fan_control_mode = fiji_set_fan_control_mode, | 5228 | .set_fan_control_mode = fiji_set_fan_control_mode, |
5110 | .get_fan_control_mode = fiji_get_fan_control_mode, | 5229 | .get_fan_control_mode = fiji_get_fan_control_mode, |
5230 | .get_pp_table = fiji_get_pp_table, | ||
5231 | .set_pp_table = fiji_set_pp_table, | ||
5232 | .force_clock_level = fiji_force_clock_level, | ||
5233 | .print_clock_levels = fiji_print_clock_levels, | ||
5111 | }; | 5234 | }; |
5112 | 5235 | ||
5113 | int fiji_hwmgr_init(struct pp_hwmgr *hwmgr) | 5236 | int fiji_hwmgr_init(struct pp_hwmgr *hwmgr) |
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 0f2d5e4bc241..be31bed2538a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #include "power_state.h" | 26 | #include "power_state.h" |
27 | #include "pp_acpi.h" | 27 | #include "pp_acpi.h" |
28 | #include "amd_acpi.h" | 28 | #include "amd_acpi.h" |
29 | #include "amd_powerplay.h" | 29 | #include "pp_debug.h" |
30 | 30 | ||
31 | #define PHM_FUNC_CHECK(hw) \ | 31 | #define PHM_FUNC_CHECK(hw) \ |
32 | do { \ | 32 | do { \ |
@@ -313,13 +313,12 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, | |||
313 | } | 313 | } |
314 | 314 | ||
315 | int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, | 315 | int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, |
316 | struct amd_pp_dal_clock_info *info) | 316 | struct amd_pp_simple_clock_info *info) |
317 | { | 317 | { |
318 | PHM_FUNC_CHECK(hwmgr); | 318 | PHM_FUNC_CHECK(hwmgr); |
319 | 319 | ||
320 | if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL) | 320 | if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL) |
321 | return -EINVAL; | 321 | return -EINVAL; |
322 | |||
323 | return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info); | 322 | return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info); |
324 | } | 323 | } |
325 | 324 | ||
@@ -332,3 +331,91 @@ int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr) | |||
332 | 331 | ||
333 | return 0; | 332 | return 0; |
334 | } | 333 | } |
334 | |||
335 | |||
336 | int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, | ||
337 | PHM_PerformanceLevelDesignation designation, uint32_t index, | ||
338 | PHM_PerformanceLevel *level) | ||
339 | { | ||
340 | PHM_FUNC_CHECK(hwmgr); | ||
341 | if (hwmgr->hwmgr_func->get_performance_level == NULL) | ||
342 | return -EINVAL; | ||
343 | |||
344 | return hwmgr->hwmgr_func->get_performance_level(hwmgr, state, designation, index, level); | ||
345 | |||
346 | |||
347 | } | ||
348 | |||
349 | |||
350 | /** | ||
351 | * Gets Clock Info. | ||
352 | * | ||
353 | * @param pHwMgr the address of the powerplay hardware manager. | ||
354 | * @param pPowerState the address of the Power State structure. | ||
355 | * @param pClockInfo the address of PP_ClockInfo structure where the result will be returned. | ||
356 | * @exception PP_Result_Failed if any of the paramters is NULL, otherwise the return value from the back-end. | ||
357 | */ | ||
358 | int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *pclock_info, | ||
359 | PHM_PerformanceLevelDesignation designation) | ||
360 | { | ||
361 | int result; | ||
362 | PHM_PerformanceLevel performance_level; | ||
363 | |||
364 | PHM_FUNC_CHECK(hwmgr); | ||
365 | |||
366 | PP_ASSERT_WITH_CODE((NULL != state), "Invalid Input!", return -EINVAL); | ||
367 | PP_ASSERT_WITH_CODE((NULL != pclock_info), "Invalid Input!", return -EINVAL); | ||
368 | |||
369 | result = phm_get_performance_level(hwmgr, state, PHM_PerformanceLevelDesignation_Activity, 0, &performance_level); | ||
370 | |||
371 | PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve minimum clocks.", return result); | ||
372 | |||
373 | |||
374 | pclock_info->min_mem_clk = performance_level.memory_clock; | ||
375 | pclock_info->min_eng_clk = performance_level.coreClock; | ||
376 | pclock_info->min_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth; | ||
377 | |||
378 | |||
379 | result = phm_get_performance_level(hwmgr, state, designation, | ||
380 | (hwmgr->platform_descriptor.hardwareActivityPerformanceLevels - 1), &performance_level); | ||
381 | |||
382 | PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve maximum clocks.", return result); | ||
383 | |||
384 | pclock_info->max_mem_clk = performance_level.memory_clock; | ||
385 | pclock_info->max_eng_clk = performance_level.coreClock; | ||
386 | pclock_info->max_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth; | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info) | ||
392 | { | ||
393 | PHM_FUNC_CHECK(hwmgr); | ||
394 | |||
395 | if (hwmgr->hwmgr_func->get_current_shallow_sleep_clocks == NULL) | ||
396 | return -EINVAL; | ||
397 | |||
398 | return hwmgr->hwmgr_func->get_current_shallow_sleep_clocks(hwmgr, state, clock_info); | ||
399 | |||
400 | } | ||
401 | |||
402 | int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks) | ||
403 | { | ||
404 | PHM_FUNC_CHECK(hwmgr); | ||
405 | |||
406 | if (hwmgr->hwmgr_func->get_clock_by_type == NULL) | ||
407 | return -EINVAL; | ||
408 | |||
409 | return hwmgr->hwmgr_func->get_clock_by_type(hwmgr, type, clocks); | ||
410 | |||
411 | } | ||
412 | |||
413 | int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks) | ||
414 | { | ||
415 | PHM_FUNC_CHECK(hwmgr); | ||
416 | |||
417 | if (hwmgr->hwmgr_func->get_max_high_clocks == NULL) | ||
418 | return -EINVAL; | ||
419 | |||
420 | return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks); | ||
421 | } | ||
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h index b7429a527828..b10df328d58c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h | |||
@@ -293,7 +293,7 @@ fInt GetScaledFraction(int X, int factor) | |||
293 | } | 293 | } |
294 | 294 | ||
295 | if (factor == 1) | 295 | if (factor == 1) |
296 | return (ConvertToFraction(X)); | 296 | return ConvertToFraction(X); |
297 | 297 | ||
298 | fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor)); | 298 | fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor)); |
299 | 299 | ||
@@ -371,7 +371,7 @@ fInt fDivide (fInt X, fInt Y) | |||
371 | fZERO = ConvertToFraction(0); | 371 | fZERO = ConvertToFraction(0); |
372 | 372 | ||
373 | if (Equal(Y, fZERO)) | 373 | if (Equal(Y, fZERO)) |
374 | return fZERO; | 374 | return fZERO; |
375 | 375 | ||
376 | longlongX = (int64_t)X.full; | 376 | longlongX = (int64_t)X.full; |
377 | longlongY = (int64_t)Y.full; | 377 | longlongY = (int64_t)Y.full; |
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c index 44a925006479..bc83fa35ec46 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c | |||
@@ -6018,6 +6018,125 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr) | |||
6018 | CG_FDO_CTRL2, FDO_PWM_MODE); | 6018 | CG_FDO_CTRL2, FDO_PWM_MODE); |
6019 | } | 6019 | } |
6020 | 6020 | ||
6021 | static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table) | ||
6022 | { | ||
6023 | struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); | ||
6024 | |||
6025 | *table = (char *)&data->smc_state_table; | ||
6026 | |||
6027 | return sizeof(struct SMU72_Discrete_DpmTable); | ||
6028 | } | ||
6029 | |||
6030 | static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size) | ||
6031 | { | ||
6032 | struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); | ||
6033 | |||
6034 | void *table = (void *)&data->smc_state_table; | ||
6035 | |||
6036 | memcpy(table, buf, size); | ||
6037 | |||
6038 | return 0; | ||
6039 | } | ||
6040 | |||
6041 | static int tonga_force_clock_level(struct pp_hwmgr *hwmgr, | ||
6042 | enum pp_clock_type type, int level) | ||
6043 | { | ||
6044 | struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); | ||
6045 | |||
6046 | if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) | ||
6047 | return -EINVAL; | ||
6048 | |||
6049 | switch (type) { | ||
6050 | case PP_SCLK: | ||
6051 | if (!data->sclk_dpm_key_disabled) | ||
6052 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
6053 | PPSMC_MSG_SCLKDPM_SetEnabledMask, | ||
6054 | (1 << level)); | ||
6055 | break; | ||
6056 | case PP_MCLK: | ||
6057 | if (!data->mclk_dpm_key_disabled) | ||
6058 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
6059 | PPSMC_MSG_MCLKDPM_SetEnabledMask, | ||
6060 | (1 << level)); | ||
6061 | break; | ||
6062 | case PP_PCIE: | ||
6063 | if (!data->pcie_dpm_key_disabled) | ||
6064 | smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, | ||
6065 | PPSMC_MSG_PCIeDPM_ForceLevel, | ||
6066 | (1 << level)); | ||
6067 | break; | ||
6068 | default: | ||
6069 | break; | ||
6070 | } | ||
6071 | |||
6072 | return 0; | ||
6073 | } | ||
6074 | |||
6075 | static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr, | ||
6076 | enum pp_clock_type type, char *buf) | ||
6077 | { | ||
6078 | struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); | ||
6079 | struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); | ||
6080 | struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); | ||
6081 | struct tonga_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table); | ||
6082 | int i, now, size = 0; | ||
6083 | uint32_t clock, pcie_speed; | ||
6084 | |||
6085 | switch (type) { | ||
6086 | case PP_SCLK: | ||
6087 | smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency); | ||
6088 | clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); | ||
6089 | |||
6090 | for (i = 0; i < sclk_table->count; i++) { | ||
6091 | if (clock > sclk_table->dpm_levels[i].value) | ||
6092 | continue; | ||
6093 | break; | ||
6094 | } | ||
6095 | now = i; | ||
6096 | |||
6097 | for (i = 0; i < sclk_table->count; i++) | ||
6098 | size += sprintf(buf + size, "%d: %uMhz %s\n", | ||
6099 | i, sclk_table->dpm_levels[i].value / 100, | ||
6100 | (i == now) ? "*" : ""); | ||
6101 | break; | ||
6102 | case PP_MCLK: | ||
6103 | smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency); | ||
6104 | clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); | ||
6105 | |||
6106 | for (i = 0; i < mclk_table->count; i++) { | ||
6107 | if (clock > mclk_table->dpm_levels[i].value) | ||
6108 | continue; | ||
6109 | break; | ||
6110 | } | ||
6111 | now = i; | ||
6112 | |||
6113 | for (i = 0; i < mclk_table->count; i++) | ||
6114 | size += sprintf(buf + size, "%d: %uMhz %s\n", | ||
6115 | i, mclk_table->dpm_levels[i].value / 100, | ||
6116 | (i == now) ? "*" : ""); | ||
6117 | break; | ||
6118 | case PP_PCIE: | ||
6119 | pcie_speed = tonga_get_current_pcie_speed(hwmgr); | ||
6120 | for (i = 0; i < pcie_table->count; i++) { | ||
6121 | if (pcie_speed != pcie_table->dpm_levels[i].value) | ||
6122 | continue; | ||
6123 | break; | ||
6124 | } | ||
6125 | now = i; | ||
6126 | |||
6127 | for (i = 0; i < pcie_table->count; i++) | ||
6128 | size += sprintf(buf + size, "%d: %s %s\n", i, | ||
6129 | (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x8" : | ||
6130 | (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" : | ||
6131 | (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "", | ||
6132 | (i == now) ? "*" : ""); | ||
6133 | break; | ||
6134 | default: | ||
6135 | break; | ||
6136 | } | ||
6137 | return size; | ||
6138 | } | ||
6139 | |||
6021 | static const struct pp_hwmgr_func tonga_hwmgr_funcs = { | 6140 | static const struct pp_hwmgr_func tonga_hwmgr_funcs = { |
6022 | .backend_init = &tonga_hwmgr_backend_init, | 6141 | .backend_init = &tonga_hwmgr_backend_init, |
6023 | .backend_fini = &tonga_hwmgr_backend_fini, | 6142 | .backend_fini = &tonga_hwmgr_backend_fini, |
@@ -6055,6 +6174,10 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = { | |||
6055 | .check_states_equal = tonga_check_states_equal, | 6174 | .check_states_equal = tonga_check_states_equal, |
6056 | .set_fan_control_mode = tonga_set_fan_control_mode, | 6175 | .set_fan_control_mode = tonga_set_fan_control_mode, |
6057 | .get_fan_control_mode = tonga_get_fan_control_mode, | 6176 | .get_fan_control_mode = tonga_get_fan_control_mode, |
6177 | .get_pp_table = tonga_get_pp_table, | ||
6178 | .set_pp_table = tonga_set_pp_table, | ||
6179 | .force_clock_level = tonga_force_clock_level, | ||
6180 | .print_clock_levels = tonga_print_clock_levels, | ||
6058 | }; | 6181 | }; |
6059 | 6182 | ||
6060 | int tonga_hwmgr_init(struct pp_hwmgr *hwmgr) | 6183 | int tonga_hwmgr_init(struct pp_hwmgr *hwmgr) |
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h index e61a3e67852e..7255f7ddf93a 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "amd_shared.h" | 29 | #include "amd_shared.h" |
30 | #include "cgs_common.h" | 30 | #include "cgs_common.h" |
31 | 31 | ||
32 | |||
32 | enum amd_pp_event { | 33 | enum amd_pp_event { |
33 | AMD_PP_EVENT_INITIALIZE = 0, | 34 | AMD_PP_EVENT_INITIALIZE = 0, |
34 | AMD_PP_EVENT_UNINITIALIZE, | 35 | AMD_PP_EVENT_UNINITIALIZE, |
@@ -123,6 +124,7 @@ enum amd_dpm_forced_level { | |||
123 | AMD_DPM_FORCED_LEVEL_AUTO = 0, | 124 | AMD_DPM_FORCED_LEVEL_AUTO = 0, |
124 | AMD_DPM_FORCED_LEVEL_LOW = 1, | 125 | AMD_DPM_FORCED_LEVEL_LOW = 1, |
125 | AMD_DPM_FORCED_LEVEL_HIGH = 2, | 126 | AMD_DPM_FORCED_LEVEL_HIGH = 2, |
127 | AMD_DPM_FORCED_LEVEL_MANUAL = 3, | ||
126 | }; | 128 | }; |
127 | 129 | ||
128 | struct amd_pp_init { | 130 | struct amd_pp_init { |
@@ -212,12 +214,55 @@ struct amd_pp_display_configuration { | |||
212 | uint32_t dce_tolerable_mclk_in_active_latency; | 214 | uint32_t dce_tolerable_mclk_in_active_latency; |
213 | }; | 215 | }; |
214 | 216 | ||
215 | struct amd_pp_dal_clock_info { | 217 | struct amd_pp_simple_clock_info { |
216 | uint32_t engine_max_clock; | 218 | uint32_t engine_max_clock; |
217 | uint32_t memory_max_clock; | 219 | uint32_t memory_max_clock; |
218 | uint32_t level; | 220 | uint32_t level; |
219 | }; | 221 | }; |
220 | 222 | ||
223 | enum PP_DAL_POWERLEVEL { | ||
224 | PP_DAL_POWERLEVEL_INVALID = 0, | ||
225 | PP_DAL_POWERLEVEL_ULTRALOW, | ||
226 | PP_DAL_POWERLEVEL_LOW, | ||
227 | PP_DAL_POWERLEVEL_NOMINAL, | ||
228 | PP_DAL_POWERLEVEL_PERFORMANCE, | ||
229 | |||
230 | PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW, | ||
231 | PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW, | ||
232 | PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL, | ||
233 | PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE, | ||
234 | PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1, | ||
235 | PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1, | ||
236 | PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1, | ||
237 | PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1, | ||
238 | }; | ||
239 | |||
240 | struct amd_pp_clock_info { | ||
241 | uint32_t min_engine_clock; | ||
242 | uint32_t max_engine_clock; | ||
243 | uint32_t min_memory_clock; | ||
244 | uint32_t max_memory_clock; | ||
245 | uint32_t min_bus_bandwidth; | ||
246 | uint32_t max_bus_bandwidth; | ||
247 | uint32_t max_engine_clock_in_sr; | ||
248 | uint32_t min_engine_clock_in_sr; | ||
249 | enum PP_DAL_POWERLEVEL max_clocks_state; | ||
250 | }; | ||
251 | |||
252 | enum amd_pp_clock_type { | ||
253 | amd_pp_disp_clock = 1, | ||
254 | amd_pp_sys_clock, | ||
255 | amd_pp_mem_clock | ||
256 | }; | ||
257 | |||
258 | #define MAX_NUM_CLOCKS 16 | ||
259 | |||
260 | struct amd_pp_clocks { | ||
261 | uint32_t count; | ||
262 | uint32_t clock[MAX_NUM_CLOCKS]; | ||
263 | }; | ||
264 | |||
265 | |||
221 | enum { | 266 | enum { |
222 | PP_GROUP_UNKNOWN = 0, | 267 | PP_GROUP_UNKNOWN = 0, |
223 | PP_GROUP_GFX = 1, | 268 | PP_GROUP_GFX = 1, |
@@ -225,6 +270,17 @@ enum { | |||
225 | PP_GROUP_MAX | 270 | PP_GROUP_MAX |
226 | }; | 271 | }; |
227 | 272 | ||
273 | enum pp_clock_type { | ||
274 | PP_SCLK, | ||
275 | PP_MCLK, | ||
276 | PP_PCIE, | ||
277 | }; | ||
278 | |||
279 | struct pp_states_info { | ||
280 | uint32_t nums; | ||
281 | uint32_t states[16]; | ||
282 | }; | ||
283 | |||
228 | #define PP_GROUP_MASK 0xF0000000 | 284 | #define PP_GROUP_MASK 0xF0000000 |
229 | #define PP_GROUP_SHIFT 28 | 285 | #define PP_GROUP_SHIFT 28 |
230 | 286 | ||
@@ -278,6 +334,11 @@ struct amd_powerplay_funcs { | |||
278 | int (*get_fan_control_mode)(void *handle); | 334 | int (*get_fan_control_mode)(void *handle); |
279 | int (*set_fan_speed_percent)(void *handle, uint32_t percent); | 335 | int (*set_fan_speed_percent)(void *handle, uint32_t percent); |
280 | int (*get_fan_speed_percent)(void *handle, uint32_t *speed); | 336 | int (*get_fan_speed_percent)(void *handle, uint32_t *speed); |
337 | int (*get_pp_num_states)(void *handle, struct pp_states_info *data); | ||
338 | int (*get_pp_table)(void *handle, char **table); | ||
339 | int (*set_pp_table)(void *handle, const char *buf, size_t size); | ||
340 | int (*force_clock_level)(void *handle, enum pp_clock_type type, int level); | ||
341 | int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf); | ||
281 | }; | 342 | }; |
282 | 343 | ||
283 | struct amd_powerplay { | 344 | struct amd_powerplay { |
@@ -288,12 +349,23 @@ struct amd_powerplay { | |||
288 | 349 | ||
289 | int amd_powerplay_init(struct amd_pp_init *pp_init, | 350 | int amd_powerplay_init(struct amd_pp_init *pp_init, |
290 | struct amd_powerplay *amd_pp); | 351 | struct amd_powerplay *amd_pp); |
352 | |||
291 | int amd_powerplay_fini(void *handle); | 353 | int amd_powerplay_fini(void *handle); |
292 | 354 | ||
293 | int amd_powerplay_display_configuration_change(void *handle, const void *input); | 355 | int amd_powerplay_display_configuration_change(void *handle, |
356 | const struct amd_pp_display_configuration *input); | ||
294 | 357 | ||
295 | int amd_powerplay_get_display_power_level(void *handle, | 358 | int amd_powerplay_get_display_power_level(void *handle, |
296 | struct amd_pp_dal_clock_info *output); | 359 | struct amd_pp_simple_clock_info *output); |
360 | |||
361 | int amd_powerplay_get_current_clocks(void *handle, | ||
362 | struct amd_pp_clock_info *output); | ||
363 | |||
364 | int amd_powerplay_get_clock_by_type(void *handle, | ||
365 | enum amd_pp_clock_type type, | ||
366 | struct amd_pp_clocks *clocks); | ||
297 | 367 | ||
368 | int amd_powerplay_get_display_mode_validation_clocks(void *handle, | ||
369 | struct amd_pp_simple_clock_info *output); | ||
298 | 370 | ||
299 | #endif /* _AMD_POWERPLAY_H_ */ | 371 | #endif /* _AMD_POWERPLAY_H_ */ |
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 91795efe1336..040d3f7cbf49 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h | |||
@@ -31,6 +31,7 @@ struct pp_power_state; | |||
31 | enum amd_dpm_forced_level; | 31 | enum amd_dpm_forced_level; |
32 | struct PP_TemperatureRange; | 32 | struct PP_TemperatureRange; |
33 | 33 | ||
34 | |||
34 | struct phm_fan_speed_info { | 35 | struct phm_fan_speed_info { |
35 | uint32_t min_percent; | 36 | uint32_t min_percent; |
36 | uint32_t max_percent; | 37 | uint32_t max_percent; |
@@ -290,6 +291,15 @@ struct PP_Clocks { | |||
290 | uint32_t engineClockInSR; | 291 | uint32_t engineClockInSR; |
291 | }; | 292 | }; |
292 | 293 | ||
294 | struct pp_clock_info { | ||
295 | uint32_t min_mem_clk; | ||
296 | uint32_t max_mem_clk; | ||
297 | uint32_t min_eng_clk; | ||
298 | uint32_t max_eng_clk; | ||
299 | uint32_t min_bus_bandwidth; | ||
300 | uint32_t max_bus_bandwidth; | ||
301 | }; | ||
302 | |||
293 | struct phm_platform_descriptor { | 303 | struct phm_platform_descriptor { |
294 | uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES]; | 304 | uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES]; |
295 | uint32_t vbiosInterruptId; | 305 | uint32_t vbiosInterruptId; |
@@ -323,24 +333,6 @@ struct phm_clocks { | |||
323 | uint32_t clock[MAX_NUM_CLOCKS]; | 333 | uint32_t clock[MAX_NUM_CLOCKS]; |
324 | }; | 334 | }; |
325 | 335 | ||
326 | enum PP_DAL_POWERLEVEL { | ||
327 | PP_DAL_POWERLEVEL_INVALID = 0, | ||
328 | PP_DAL_POWERLEVEL_ULTRALOW, | ||
329 | PP_DAL_POWERLEVEL_LOW, | ||
330 | PP_DAL_POWERLEVEL_NOMINAL, | ||
331 | PP_DAL_POWERLEVEL_PERFORMANCE, | ||
332 | |||
333 | PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW, | ||
334 | PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW, | ||
335 | PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL, | ||
336 | PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE, | ||
337 | PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1, | ||
338 | PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1, | ||
339 | PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1, | ||
340 | PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1, | ||
341 | }; | ||
342 | |||
343 | |||
344 | extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr); | 336 | extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr); |
345 | extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate); | 337 | extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate); |
346 | extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate); | 338 | extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate); |
@@ -375,11 +367,25 @@ extern int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, | |||
375 | const struct amd_pp_display_configuration *display_config); | 367 | const struct amd_pp_display_configuration *display_config); |
376 | 368 | ||
377 | extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, | 369 | extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, |
378 | struct amd_pp_dal_clock_info*info); | 370 | struct amd_pp_simple_clock_info *info); |
379 | 371 | ||
380 | extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr); | 372 | extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr); |
381 | 373 | ||
382 | extern int phm_power_down_asic(struct pp_hwmgr *hwmgr); | 374 | extern int phm_power_down_asic(struct pp_hwmgr *hwmgr); |
383 | 375 | ||
376 | extern int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, | ||
377 | PHM_PerformanceLevelDesignation designation, uint32_t index, | ||
378 | PHM_PerformanceLevel *level); | ||
379 | |||
380 | extern int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, | ||
381 | struct pp_clock_info *pclock_info, | ||
382 | PHM_PerformanceLevelDesignation designation); | ||
383 | |||
384 | extern int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info); | ||
385 | |||
386 | extern int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks); | ||
387 | |||
388 | extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); | ||
389 | |||
384 | #endif /* _HARDWARE_MANAGER_H_ */ | 390 | #endif /* _HARDWARE_MANAGER_H_ */ |
385 | 391 | ||
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index aeaa3dbba525..928f5a740cba 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | |||
@@ -325,8 +325,18 @@ struct pp_hwmgr_func { | |||
325 | bool cc6_disable, bool pstate_disable, | 325 | bool cc6_disable, bool pstate_disable, |
326 | bool pstate_switch_disable); | 326 | bool pstate_switch_disable); |
327 | int (*get_dal_power_level)(struct pp_hwmgr *hwmgr, | 327 | int (*get_dal_power_level)(struct pp_hwmgr *hwmgr, |
328 | struct amd_pp_dal_clock_info *info); | 328 | struct amd_pp_simple_clock_info *info); |
329 | int (*get_performance_level)(struct pp_hwmgr *, const struct pp_hw_power_state *, | ||
330 | PHM_PerformanceLevelDesignation, uint32_t, PHM_PerformanceLevel *); | ||
331 | int (*get_current_shallow_sleep_clocks)(struct pp_hwmgr *hwmgr, | ||
332 | const struct pp_hw_power_state *state, struct pp_clock_info *clock_info); | ||
333 | int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks); | ||
334 | int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); | ||
329 | int (*power_off_asic)(struct pp_hwmgr *hwmgr); | 335 | int (*power_off_asic)(struct pp_hwmgr *hwmgr); |
336 | int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table); | ||
337 | int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size); | ||
338 | int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, int level); | ||
339 | int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf); | ||
330 | }; | 340 | }; |
331 | 341 | ||
332 | struct pp_table_func { | 342 | struct pp_table_func { |
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 8b2becd1aa07..a5ff9458d359 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | |||
@@ -229,6 +229,14 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) | |||
229 | amd_sched_wakeup(entity->sched); | 229 | amd_sched_wakeup(entity->sched); |
230 | } | 230 | } |
231 | 231 | ||
232 | static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb) | ||
233 | { | ||
234 | struct amd_sched_entity *entity = | ||
235 | container_of(cb, struct amd_sched_entity, cb); | ||
236 | entity->dependency = NULL; | ||
237 | fence_put(f); | ||
238 | } | ||
239 | |||
232 | static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) | 240 | static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) |
233 | { | 241 | { |
234 | struct amd_gpu_scheduler *sched = entity->sched; | 242 | struct amd_gpu_scheduler *sched = entity->sched; |
@@ -251,7 +259,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) | |||
251 | } | 259 | } |
252 | 260 | ||
253 | /* Wait for fence to be scheduled */ | 261 | /* Wait for fence to be scheduled */ |
254 | entity->cb.func = amd_sched_entity_wakeup; | 262 | entity->cb.func = amd_sched_entity_clear_dep; |
255 | list_add_tail(&entity->cb.node, &s_fence->scheduled_cb); | 263 | list_add_tail(&entity->cb.node, &s_fence->scheduled_cb); |
256 | return true; | 264 | return true; |
257 | } | 265 | } |
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 4c30d8c65558..06001400ce8b 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c | |||
@@ -4219,13 +4219,20 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
4219 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | 4219 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
4220 | return r; | 4220 | return r; |
4221 | } | 4221 | } |
4222 | r = radeon_fence_wait(ib.fence, false); | 4222 | r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( |
4223 | if (r) { | 4223 | RADEON_USEC_IB_TEST_TIMEOUT)); |
4224 | if (r < 0) { | ||
4224 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 4225 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
4225 | radeon_scratch_free(rdev, scratch); | 4226 | radeon_scratch_free(rdev, scratch); |
4226 | radeon_ib_free(rdev, &ib); | 4227 | radeon_ib_free(rdev, &ib); |
4227 | return r; | 4228 | return r; |
4229 | } else if (r == 0) { | ||
4230 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
4231 | radeon_scratch_free(rdev, scratch); | ||
4232 | radeon_ib_free(rdev, &ib); | ||
4233 | return -ETIMEDOUT; | ||
4228 | } | 4234 | } |
4235 | r = 0; | ||
4229 | for (i = 0; i < rdev->usec_timeout; i++) { | 4236 | for (i = 0; i < rdev->usec_timeout; i++) { |
4230 | tmp = RREG32(scratch); | 4237 | tmp = RREG32(scratch); |
4231 | if (tmp == 0xDEADBEEF) | 4238 | if (tmp == 0xDEADBEEF) |
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index d16f2eebd95e..9c351dc8a9e0 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c | |||
@@ -737,11 +737,16 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
737 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | 737 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
738 | return r; | 738 | return r; |
739 | } | 739 | } |
740 | r = radeon_fence_wait(ib.fence, false); | 740 | r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( |
741 | if (r) { | 741 | RADEON_USEC_IB_TEST_TIMEOUT)); |
742 | if (r < 0) { | ||
742 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 743 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
743 | return r; | 744 | return r; |
745 | } else if (r == 0) { | ||
746 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
747 | return -ETIMEDOUT; | ||
744 | } | 748 | } |
749 | r = 0; | ||
745 | for (i = 0; i < rdev->usec_timeout; i++) { | 750 | for (i = 0; i < rdev->usec_timeout; i++) { |
746 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); | 751 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); |
747 | if (tmp == 0xDEADBEEF) | 752 | if (tmp == 0xDEADBEEF) |
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 5eae0a88dd3e..6e478a248628 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c | |||
@@ -3732,11 +3732,17 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
3732 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | 3732 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
3733 | goto free_ib; | 3733 | goto free_ib; |
3734 | } | 3734 | } |
3735 | r = radeon_fence_wait(ib.fence, false); | 3735 | r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( |
3736 | if (r) { | 3736 | RADEON_USEC_IB_TEST_TIMEOUT)); |
3737 | if (r < 0) { | ||
3737 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 3738 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
3738 | goto free_ib; | 3739 | goto free_ib; |
3740 | } else if (r == 0) { | ||
3741 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
3742 | r = -ETIMEDOUT; | ||
3743 | goto free_ib; | ||
3739 | } | 3744 | } |
3745 | r = 0; | ||
3740 | for (i = 0; i < rdev->usec_timeout; i++) { | 3746 | for (i = 0; i < rdev->usec_timeout; i++) { |
3741 | tmp = RREG32(scratch); | 3747 | tmp = RREG32(scratch); |
3742 | if (tmp == 0xDEADBEEF) { | 3748 | if (tmp == 0xDEADBEEF) { |
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cc2fdf0be37a..ed121042247f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c | |||
@@ -3381,11 +3381,17 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
3381 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | 3381 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
3382 | goto free_ib; | 3382 | goto free_ib; |
3383 | } | 3383 | } |
3384 | r = radeon_fence_wait(ib.fence, false); | 3384 | r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( |
3385 | if (r) { | 3385 | RADEON_USEC_IB_TEST_TIMEOUT)); |
3386 | if (r < 0) { | ||
3386 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 3387 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
3387 | goto free_ib; | 3388 | goto free_ib; |
3389 | } else if (r == 0) { | ||
3390 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
3391 | r = -ETIMEDOUT; | ||
3392 | goto free_ib; | ||
3388 | } | 3393 | } |
3394 | r = 0; | ||
3389 | for (i = 0; i < rdev->usec_timeout; i++) { | 3395 | for (i = 0; i < rdev->usec_timeout; i++) { |
3390 | tmp = RREG32(scratch); | 3396 | tmp = RREG32(scratch); |
3391 | if (tmp == 0xDEADBEEF) | 3397 | if (tmp == 0xDEADBEEF) |
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index d2dd29ab24fa..fb65e6fb5c4f 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c | |||
@@ -368,11 +368,16 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
368 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | 368 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
369 | return r; | 369 | return r; |
370 | } | 370 | } |
371 | r = radeon_fence_wait(ib.fence, false); | 371 | r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( |
372 | if (r) { | 372 | RADEON_USEC_IB_TEST_TIMEOUT)); |
373 | if (r < 0) { | ||
373 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 374 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
374 | return r; | 375 | return r; |
376 | } else if (r == 0) { | ||
377 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
378 | return -ETIMEDOUT; | ||
375 | } | 379 | } |
380 | r = 0; | ||
376 | for (i = 0; i < rdev->usec_timeout; i++) { | 381 | for (i = 0; i < rdev->usec_timeout; i++) { |
377 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); | 382 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); |
378 | if (tmp == 0xDEADBEEF) | 383 | if (tmp == 0xDEADBEEF) |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 78a51b3eda10..007be29a0020 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -120,6 +120,7 @@ extern int radeon_mst; | |||
120 | */ | 120 | */ |
121 | #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ | 121 | #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ |
122 | #define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) | 122 | #define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) |
123 | #define RADEON_USEC_IB_TEST_TIMEOUT 1000000 /* 1s */ | ||
123 | /* RADEON_IB_POOL_SIZE must be a power of 2 */ | 124 | /* RADEON_IB_POOL_SIZE must be a power of 2 */ |
124 | #define RADEON_IB_POOL_SIZE 16 | 125 | #define RADEON_IB_POOL_SIZE 16 |
125 | #define RADEON_DEBUGFS_MAX_COMPONENTS 32 | 126 | #define RADEON_DEBUGFS_MAX_COMPONENTS 32 |
@@ -382,6 +383,7 @@ void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring); | |||
382 | int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); | 383 | int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); |
383 | void radeon_fence_process(struct radeon_device *rdev, int ring); | 384 | void radeon_fence_process(struct radeon_device *rdev, int ring); |
384 | bool radeon_fence_signaled(struct radeon_fence *fence); | 385 | bool radeon_fence_signaled(struct radeon_fence *fence); |
386 | long radeon_fence_wait_timeout(struct radeon_fence *fence, bool interruptible, long timeout); | ||
385 | int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); | 387 | int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); |
386 | int radeon_fence_wait_next(struct radeon_device *rdev, int ring); | 388 | int radeon_fence_wait_next(struct radeon_device *rdev, int ring); |
387 | int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); | 389 | int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); |
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 298ea1c453c3..a4674bfd979a 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c | |||
@@ -1686,6 +1686,9 @@ void radeon_modeset_fini(struct radeon_device *rdev) | |||
1686 | radeon_fbdev_fini(rdev); | 1686 | radeon_fbdev_fini(rdev); |
1687 | kfree(rdev->mode_info.bios_hardcoded_edid); | 1687 | kfree(rdev->mode_info.bios_hardcoded_edid); |
1688 | 1688 | ||
1689 | /* free i2c buses */ | ||
1690 | radeon_i2c_fini(rdev); | ||
1691 | |||
1689 | if (rdev->mode_info.mode_config_initialized) { | 1692 | if (rdev->mode_info.mode_config_initialized) { |
1690 | radeon_afmt_fini(rdev); | 1693 | radeon_afmt_fini(rdev); |
1691 | drm_kms_helper_poll_fini(rdev->ddev); | 1694 | drm_kms_helper_poll_fini(rdev->ddev); |
@@ -1693,8 +1696,6 @@ void radeon_modeset_fini(struct radeon_device *rdev) | |||
1693 | drm_mode_config_cleanup(rdev->ddev); | 1696 | drm_mode_config_cleanup(rdev->ddev); |
1694 | rdev->mode_info.mode_config_initialized = false; | 1697 | rdev->mode_info.mode_config_initialized = false; |
1695 | } | 1698 | } |
1696 | /* free i2c buses */ | ||
1697 | radeon_i2c_fini(rdev); | ||
1698 | } | 1699 | } |
1699 | 1700 | ||
1700 | static bool is_hdtv_mode(const struct drm_display_mode *mode) | 1701 | static bool is_hdtv_mode(const struct drm_display_mode *mode) |
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 05815c47b246..7ef075acde9c 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c | |||
@@ -527,7 +527,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, | |||
527 | } | 527 | } |
528 | 528 | ||
529 | /** | 529 | /** |
530 | * radeon_fence_wait - wait for a fence to signal | 530 | * radeon_fence_wait_timeout - wait for a fence to signal with timeout |
531 | * | 531 | * |
532 | * @fence: radeon fence object | 532 | * @fence: radeon fence object |
533 | * @intr: use interruptible sleep | 533 | * @intr: use interruptible sleep |
@@ -535,12 +535,15 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, | |||
535 | * Wait for the requested fence to signal (all asics). | 535 | * Wait for the requested fence to signal (all asics). |
536 | * @intr selects whether to use interruptable (true) or non-interruptable | 536 | * @intr selects whether to use interruptable (true) or non-interruptable |
537 | * (false) sleep when waiting for the fence. | 537 | * (false) sleep when waiting for the fence. |
538 | * Returns 0 if the fence has passed, error for all other cases. | 538 | * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait |
539 | * Returns remaining time if the sequence number has passed, 0 when | ||
540 | * the wait timeout, or an error for all other cases. | ||
539 | */ | 541 | */ |
540 | int radeon_fence_wait(struct radeon_fence *fence, bool intr) | 542 | long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout) |
541 | { | 543 | { |
542 | uint64_t seq[RADEON_NUM_RINGS] = {}; | 544 | uint64_t seq[RADEON_NUM_RINGS] = {}; |
543 | long r; | 545 | long r; |
546 | int r_sig; | ||
544 | 547 | ||
545 | /* | 548 | /* |
546 | * This function should not be called on !radeon fences. | 549 | * This function should not be called on !radeon fences. |
@@ -552,15 +555,36 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) | |||
552 | return fence_wait(&fence->base, intr); | 555 | return fence_wait(&fence->base, intr); |
553 | 556 | ||
554 | seq[fence->ring] = fence->seq; | 557 | seq[fence->ring] = fence->seq; |
555 | r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); | 558 | r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout); |
556 | if (r < 0) { | 559 | if (r <= 0) { |
557 | return r; | 560 | return r; |
558 | } | 561 | } |
559 | 562 | ||
560 | r = fence_signal(&fence->base); | 563 | r_sig = fence_signal(&fence->base); |
561 | if (!r) | 564 | if (!r_sig) |
562 | FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); | 565 | FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); |
563 | return 0; | 566 | return r; |
567 | } | ||
568 | |||
569 | /** | ||
570 | * radeon_fence_wait - wait for a fence to signal | ||
571 | * | ||
572 | * @fence: radeon fence object | ||
573 | * @intr: use interruptible sleep | ||
574 | * | ||
575 | * Wait for the requested fence to signal (all asics). | ||
576 | * @intr selects whether to use interruptable (true) or non-interruptable | ||
577 | * (false) sleep when waiting for the fence. | ||
578 | * Returns 0 if the fence has passed, error for all other cases. | ||
579 | */ | ||
580 | int radeon_fence_wait(struct radeon_fence *fence, bool intr) | ||
581 | { | ||
582 | long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT); | ||
583 | if (r > 0) { | ||
584 | return 0; | ||
585 | } else { | ||
586 | return r; | ||
587 | } | ||
564 | } | 588 | } |
565 | 589 | ||
566 | /** | 590 | /** |
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 7eb1ae758906..566a1a01f6d1 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c | |||
@@ -810,11 +810,16 @@ int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
810 | goto error; | 810 | goto error; |
811 | } | 811 | } |
812 | 812 | ||
813 | r = radeon_fence_wait(fence, false); | 813 | r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies( |
814 | if (r) { | 814 | RADEON_USEC_IB_TEST_TIMEOUT)); |
815 | if (r < 0) { | ||
815 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 816 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
817 | } else if (r == 0) { | ||
818 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
819 | r = -ETIMEDOUT; | ||
816 | } else { | 820 | } else { |
817 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); | 821 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); |
822 | r = 0; | ||
818 | } | 823 | } |
819 | error: | 824 | error: |
820 | radeon_fence_unref(&fence); | 825 | radeon_fence_unref(&fence); |
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index c6b1cbca47fc..12ddcfa82e20 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c | |||
@@ -522,11 +522,17 @@ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |||
522 | goto error; | 522 | goto error; |
523 | } | 523 | } |
524 | 524 | ||
525 | r = radeon_fence_wait(fence, false); | 525 | r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies( |
526 | if (r) { | 526 | RADEON_USEC_IB_TEST_TIMEOUT)); |
527 | if (r < 0) { | ||
527 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | 528 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
528 | goto error; | 529 | goto error; |
530 | } else if (r == 0) { | ||
531 | DRM_ERROR("radeon: fence wait timed out.\n"); | ||
532 | r = -ETIMEDOUT; | ||
533 | goto error; | ||
529 | } | 534 | } |
535 | r = 0; | ||
530 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); | 536 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); |
531 | error: | 537 | error: |
532 | radeon_fence_unref(&fence); | 538 | radeon_fence_unref(&fence); |