diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 716 |
1 files changed, 595 insertions, 121 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index af1b879a9ee9..34af664b9f93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include "amdgpu_pm.h" | 59 | #include "amdgpu_pm.h" |
60 | 60 | ||
61 | MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); | 61 | MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); |
62 | MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); | ||
62 | MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); | 63 | MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); |
63 | 64 | ||
64 | #define AMDGPU_RESUME_MS 2000 | 65 | #define AMDGPU_RESUME_MS 2000 |
@@ -83,10 +84,21 @@ static const char *amdgpu_asic_name[] = { | |||
83 | "POLARIS11", | 84 | "POLARIS11", |
84 | "POLARIS12", | 85 | "POLARIS12", |
85 | "VEGA10", | 86 | "VEGA10", |
87 | "VEGA12", | ||
86 | "RAVEN", | 88 | "RAVEN", |
87 | "LAST", | 89 | "LAST", |
88 | }; | 90 | }; |
89 | 91 | ||
92 | static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); | ||
93 | |||
94 | /** | ||
95 | * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control | ||
96 | * | ||
97 | * @dev: drm_device pointer | ||
98 | * | ||
99 | * Returns true if the device is a dGPU with HG/PX power control, | ||
100 | * otherwise return false. | ||
101 | */ | ||
90 | bool amdgpu_device_is_px(struct drm_device *dev) | 102 | bool amdgpu_device_is_px(struct drm_device *dev) |
91 | { | 103 | { |
92 | struct amdgpu_device *adev = dev->dev_private; | 104 | struct amdgpu_device *adev = dev->dev_private; |
@@ -99,6 +111,15 @@ bool amdgpu_device_is_px(struct drm_device *dev) | |||
99 | /* | 111 | /* |
100 | * MMIO register access helper functions. | 112 | * MMIO register access helper functions. |
101 | */ | 113 | */ |
114 | /** | ||
115 | * amdgpu_mm_rreg - read a memory mapped IO register | ||
116 | * | ||
117 | * @adev: amdgpu_device pointer | ||
118 | * @reg: dword aligned register offset | ||
119 | * @acc_flags: access flags which require special behavior | ||
120 | * | ||
121 | * Returns the 32 bit value from the offset specified. | ||
122 | */ | ||
102 | uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, | 123 | uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, |
103 | uint32_t acc_flags) | 124 | uint32_t acc_flags) |
104 | { | 125 | { |
@@ -121,6 +142,58 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, | |||
121 | return ret; | 142 | return ret; |
122 | } | 143 | } |
123 | 144 | ||
145 | /* | ||
146 | * MMIO register read with bytes helper functions | ||
147 | * @offset:bytes offset from MMIO start | ||
148 | * | ||
149 | */ | ||
150 | |||
151 | /** | ||
152 | * amdgpu_mm_rreg8 - read a memory mapped IO register | ||
153 | * | ||
154 | * @adev: amdgpu_device pointer | ||
155 | * @offset: byte aligned register offset | ||
156 | * | ||
157 | * Returns the 8 bit value from the offset specified. | ||
158 | */ | ||
159 | uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { | ||
160 | if (offset < adev->rmmio_size) | ||
161 | return (readb(adev->rmmio + offset)); | ||
162 | BUG(); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * MMIO register write with bytes helper functions | ||
167 | * @offset:bytes offset from MMIO start | ||
168 | * @value: the value want to be written to the register | ||
169 | * | ||
170 | */ | ||
171 | /** | ||
172 | * amdgpu_mm_wreg8 - read a memory mapped IO register | ||
173 | * | ||
174 | * @adev: amdgpu_device pointer | ||
175 | * @offset: byte aligned register offset | ||
176 | * @value: 8 bit value to write | ||
177 | * | ||
178 | * Writes the value specified to the offset specified. | ||
179 | */ | ||
180 | void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { | ||
181 | if (offset < adev->rmmio_size) | ||
182 | writeb(value, adev->rmmio + offset); | ||
183 | else | ||
184 | BUG(); | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * amdgpu_mm_wreg - write to a memory mapped IO register | ||
189 | * | ||
190 | * @adev: amdgpu_device pointer | ||
191 | * @reg: dword aligned register offset | ||
192 | * @v: 32 bit value to write to the register | ||
193 | * @acc_flags: access flags which require special behavior | ||
194 | * | ||
195 | * Writes the value specified to the offset specified. | ||
196 | */ | ||
124 | void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, | 197 | void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, |
125 | uint32_t acc_flags) | 198 | uint32_t acc_flags) |
126 | { | 199 | { |
@@ -149,6 +222,14 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, | |||
149 | } | 222 | } |
150 | } | 223 | } |
151 | 224 | ||
225 | /** | ||
226 | * amdgpu_io_rreg - read an IO register | ||
227 | * | ||
228 | * @adev: amdgpu_device pointer | ||
229 | * @reg: dword aligned register offset | ||
230 | * | ||
231 | * Returns the 32 bit value from the offset specified. | ||
232 | */ | ||
152 | u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) | 233 | u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) |
153 | { | 234 | { |
154 | if ((reg * 4) < adev->rio_mem_size) | 235 | if ((reg * 4) < adev->rio_mem_size) |
@@ -159,6 +240,15 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) | |||
159 | } | 240 | } |
160 | } | 241 | } |
161 | 242 | ||
243 | /** | ||
244 | * amdgpu_io_wreg - write to an IO register | ||
245 | * | ||
246 | * @adev: amdgpu_device pointer | ||
247 | * @reg: dword aligned register offset | ||
248 | * @v: 32 bit value to write to the register | ||
249 | * | ||
250 | * Writes the value specified to the offset specified. | ||
251 | */ | ||
162 | void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) | 252 | void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) |
163 | { | 253 | { |
164 | if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { | 254 | if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { |
@@ -327,6 +417,14 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, | |||
327 | BUG(); | 417 | BUG(); |
328 | } | 418 | } |
329 | 419 | ||
420 | /** | ||
421 | * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page | ||
422 | * | ||
423 | * @adev: amdgpu device pointer | ||
424 | * | ||
425 | * Allocates a scratch page of VRAM for use by various things in the | ||
426 | * driver. | ||
427 | */ | ||
330 | static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) | 428 | static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) |
331 | { | 429 | { |
332 | return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, | 430 | return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, |
@@ -336,6 +434,13 @@ static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) | |||
336 | (void **)&adev->vram_scratch.ptr); | 434 | (void **)&adev->vram_scratch.ptr); |
337 | } | 435 | } |
338 | 436 | ||
437 | /** | ||
438 | * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page | ||
439 | * | ||
440 | * @adev: amdgpu device pointer | ||
441 | * | ||
442 | * Frees the VRAM scratch page. | ||
443 | */ | ||
339 | static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) | 444 | static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) |
340 | { | 445 | { |
341 | amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); | 446 | amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); |
@@ -377,6 +482,14 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, | |||
377 | } | 482 | } |
378 | } | 483 | } |
379 | 484 | ||
485 | /** | ||
486 | * amdgpu_device_pci_config_reset - reset the GPU | ||
487 | * | ||
488 | * @adev: amdgpu_device pointer | ||
489 | * | ||
490 | * Resets the GPU using the pci config reset sequence. | ||
491 | * Only applicable to asics prior to vega10. | ||
492 | */ | ||
380 | void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) | 493 | void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) |
381 | { | 494 | { |
382 | pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); | 495 | pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); |
@@ -537,6 +650,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) | |||
537 | 650 | ||
538 | /** | 651 | /** |
539 | * amdgpu_device_vram_location - try to find VRAM location | 652 | * amdgpu_device_vram_location - try to find VRAM location |
653 | * | ||
540 | * @adev: amdgpu device structure holding all necessary informations | 654 | * @adev: amdgpu device structure holding all necessary informations |
541 | * @mc: memory controller structure holding memory informations | 655 | * @mc: memory controller structure holding memory informations |
542 | * @base: base address at which to put VRAM | 656 | * @base: base address at which to put VRAM |
@@ -545,7 +659,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) | |||
545 | * as parameter. | 659 | * as parameter. |
546 | */ | 660 | */ |
547 | void amdgpu_device_vram_location(struct amdgpu_device *adev, | 661 | void amdgpu_device_vram_location(struct amdgpu_device *adev, |
548 | struct amdgpu_mc *mc, u64 base) | 662 | struct amdgpu_gmc *mc, u64 base) |
549 | { | 663 | { |
550 | uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; | 664 | uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; |
551 | 665 | ||
@@ -560,6 +674,7 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, | |||
560 | 674 | ||
561 | /** | 675 | /** |
562 | * amdgpu_device_gart_location - try to find GTT location | 676 | * amdgpu_device_gart_location - try to find GTT location |
677 | * | ||
563 | * @adev: amdgpu device structure holding all necessary informations | 678 | * @adev: amdgpu device structure holding all necessary informations |
564 | * @mc: memory controller structure holding memory informations | 679 | * @mc: memory controller structure holding memory informations |
565 | * | 680 | * |
@@ -571,11 +686,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, | |||
571 | * FIXME: when reducing GTT size align new size on power of 2. | 686 | * FIXME: when reducing GTT size align new size on power of 2. |
572 | */ | 687 | */ |
573 | void amdgpu_device_gart_location(struct amdgpu_device *adev, | 688 | void amdgpu_device_gart_location(struct amdgpu_device *adev, |
574 | struct amdgpu_mc *mc) | 689 | struct amdgpu_gmc *mc) |
575 | { | 690 | { |
576 | u64 size_af, size_bf; | 691 | u64 size_af, size_bf; |
577 | 692 | ||
578 | size_af = adev->mc.mc_mask - mc->vram_end; | 693 | size_af = adev->gmc.mc_mask - mc->vram_end; |
579 | size_bf = mc->vram_start; | 694 | size_bf = mc->vram_start; |
580 | if (size_bf > size_af) { | 695 | if (size_bf > size_af) { |
581 | if (mc->gart_size > size_bf) { | 696 | if (mc->gart_size > size_bf) { |
@@ -609,7 +724,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, | |||
609 | */ | 724 | */ |
610 | int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) | 725 | int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) |
611 | { | 726 | { |
612 | u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size); | 727 | u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); |
613 | u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; | 728 | u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; |
614 | struct pci_bus *root; | 729 | struct pci_bus *root; |
615 | struct resource *res; | 730 | struct resource *res; |
@@ -746,6 +861,16 @@ static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) | |||
746 | return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; | 861 | return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; |
747 | } | 862 | } |
748 | 863 | ||
864 | /** | ||
865 | * amdgpu_device_check_block_size - validate the vm block size | ||
866 | * | ||
867 | * @adev: amdgpu_device pointer | ||
868 | * | ||
869 | * Validates the vm block size specified via module parameter. | ||
870 | * The vm block size defines number of bits in page table versus page directory, | ||
871 | * a page is 4KB so we have 12 bits offset, minimum 9 bits in the | ||
872 | * page table and the remaining bits are in the page directory. | ||
873 | */ | ||
749 | static void amdgpu_device_check_block_size(struct amdgpu_device *adev) | 874 | static void amdgpu_device_check_block_size(struct amdgpu_device *adev) |
750 | { | 875 | { |
751 | /* defines number of bits in page table versus page directory, | 876 | /* defines number of bits in page table versus page directory, |
@@ -761,6 +886,14 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev) | |||
761 | } | 886 | } |
762 | } | 887 | } |
763 | 888 | ||
889 | /** | ||
890 | * amdgpu_device_check_vm_size - validate the vm size | ||
891 | * | ||
892 | * @adev: amdgpu_device pointer | ||
893 | * | ||
894 | * Validates the vm size in GB specified via module parameter. | ||
895 | * The VM size is the size of the GPU virtual memory space in GB. | ||
896 | */ | ||
764 | static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) | 897 | static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) |
765 | { | 898 | { |
766 | /* no need to check the default value */ | 899 | /* no need to check the default value */ |
@@ -830,6 +963,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) | |||
830 | dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); | 963 | dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); |
831 | amdgpu_lockup_timeout = 10000; | 964 | amdgpu_lockup_timeout = 10000; |
832 | } | 965 | } |
966 | |||
967 | adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); | ||
833 | } | 968 | } |
834 | 969 | ||
835 | /** | 970 | /** |
@@ -893,6 +1028,17 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { | |||
893 | .can_switch = amdgpu_switcheroo_can_switch, | 1028 | .can_switch = amdgpu_switcheroo_can_switch, |
894 | }; | 1029 | }; |
895 | 1030 | ||
1031 | /** | ||
1032 | * amdgpu_device_ip_set_clockgating_state - set the CG state | ||
1033 | * | ||
1034 | * @adev: amdgpu_device pointer | ||
1035 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) | ||
1036 | * @state: clockgating state (gate or ungate) | ||
1037 | * | ||
1038 | * Sets the requested clockgating state for all instances of | ||
1039 | * the hardware IP specified. | ||
1040 | * Returns the error code from the last instance. | ||
1041 | */ | ||
896 | int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, | 1042 | int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, |
897 | enum amd_ip_block_type block_type, | 1043 | enum amd_ip_block_type block_type, |
898 | enum amd_clockgating_state state) | 1044 | enum amd_clockgating_state state) |
@@ -915,6 +1061,17 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, | |||
915 | return r; | 1061 | return r; |
916 | } | 1062 | } |
917 | 1063 | ||
1064 | /** | ||
1065 | * amdgpu_device_ip_set_powergating_state - set the PG state | ||
1066 | * | ||
1067 | * @adev: amdgpu_device pointer | ||
1068 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) | ||
1069 | * @state: powergating state (gate or ungate) | ||
1070 | * | ||
1071 | * Sets the requested powergating state for all instances of | ||
1072 | * the hardware IP specified. | ||
1073 | * Returns the error code from the last instance. | ||
1074 | */ | ||
918 | int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, | 1075 | int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, |
919 | enum amd_ip_block_type block_type, | 1076 | enum amd_ip_block_type block_type, |
920 | enum amd_powergating_state state) | 1077 | enum amd_powergating_state state) |
@@ -937,6 +1094,17 @@ int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, | |||
937 | return r; | 1094 | return r; |
938 | } | 1095 | } |
939 | 1096 | ||
1097 | /** | ||
1098 | * amdgpu_device_ip_get_clockgating_state - get the CG state | ||
1099 | * | ||
1100 | * @adev: amdgpu_device pointer | ||
1101 | * @flags: clockgating feature flags | ||
1102 | * | ||
1103 | * Walks the list of IPs on the device and updates the clockgating | ||
1104 | * flags for each IP. | ||
1105 | * Updates @flags with the feature flags for each hardware IP where | ||
1106 | * clockgating is enabled. | ||
1107 | */ | ||
940 | void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, | 1108 | void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, |
941 | u32 *flags) | 1109 | u32 *flags) |
942 | { | 1110 | { |
@@ -950,6 +1118,15 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, | |||
950 | } | 1118 | } |
951 | } | 1119 | } |
952 | 1120 | ||
1121 | /** | ||
1122 | * amdgpu_device_ip_wait_for_idle - wait for idle | ||
1123 | * | ||
1124 | * @adev: amdgpu_device pointer | ||
1125 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) | ||
1126 | * | ||
1127 | * Waits for the request hardware IP to be idle. | ||
1128 | * Returns 0 for success or a negative error code on failure. | ||
1129 | */ | ||
953 | int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, | 1130 | int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, |
954 | enum amd_ip_block_type block_type) | 1131 | enum amd_ip_block_type block_type) |
955 | { | 1132 | { |
@@ -969,6 +1146,15 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, | |||
969 | 1146 | ||
970 | } | 1147 | } |
971 | 1148 | ||
1149 | /** | ||
1150 | * amdgpu_device_ip_is_idle - is the hardware IP idle | ||
1151 | * | ||
1152 | * @adev: amdgpu_device pointer | ||
1153 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) | ||
1154 | * | ||
1155 | * Check if the hardware IP is idle or not. | ||
1156 | * Returns true if it the IP is idle, false if not. | ||
1157 | */ | ||
972 | bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, | 1158 | bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, |
973 | enum amd_ip_block_type block_type) | 1159 | enum amd_ip_block_type block_type) |
974 | { | 1160 | { |
@@ -984,6 +1170,15 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, | |||
984 | 1170 | ||
985 | } | 1171 | } |
986 | 1172 | ||
1173 | /** | ||
1174 | * amdgpu_device_ip_get_ip_block - get a hw IP pointer | ||
1175 | * | ||
1176 | * @adev: amdgpu_device pointer | ||
1177 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) | ||
1178 | * | ||
1179 | * Returns a pointer to the hardware IP block structure | ||
1180 | * if it exists for the asic, otherwise NULL. | ||
1181 | */ | ||
987 | struct amdgpu_ip_block * | 1182 | struct amdgpu_ip_block * |
988 | amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, | 1183 | amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, |
989 | enum amd_ip_block_type type) | 1184 | enum amd_ip_block_type type) |
@@ -1037,7 +1232,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, | |||
1037 | if (!ip_block_version) | 1232 | if (!ip_block_version) |
1038 | return -EINVAL; | 1233 | return -EINVAL; |
1039 | 1234 | ||
1040 | DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, | 1235 | DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, |
1041 | ip_block_version->funcs->name); | 1236 | ip_block_version->funcs->name); |
1042 | 1237 | ||
1043 | adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; | 1238 | adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; |
@@ -1045,6 +1240,18 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, | |||
1045 | return 0; | 1240 | return 0; |
1046 | } | 1241 | } |
1047 | 1242 | ||
1243 | /** | ||
1244 | * amdgpu_device_enable_virtual_display - enable virtual display feature | ||
1245 | * | ||
1246 | * @adev: amdgpu_device pointer | ||
1247 | * | ||
1248 | * Enabled the virtual display feature if the user has enabled it via | ||
1249 | * the module parameter virtual_display. This feature provides a virtual | ||
1250 | * display hardware on headless boards or in virtualized environments. | ||
1251 | * This function parses and validates the configuration string specified by | ||
1252 | * the user and configues the virtual display configuration (number of | ||
1253 | * virtual connectors, crtcs, etc.) specified. | ||
1254 | */ | ||
1048 | static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) | 1255 | static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) |
1049 | { | 1256 | { |
1050 | adev->enable_virtual_display = false; | 1257 | adev->enable_virtual_display = false; |
@@ -1090,6 +1297,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) | |||
1090 | } | 1297 | } |
1091 | } | 1298 | } |
1092 | 1299 | ||
1300 | /** | ||
1301 | * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware | ||
1302 | * | ||
1303 | * @adev: amdgpu_device pointer | ||
1304 | * | ||
1305 | * Parses the asic configuration parameters specified in the gpu info | ||
1306 | * firmware and makes them availale to the driver for use in configuring | ||
1307 | * the asic. | ||
1308 | * Returns 0 on success, -EINVAL on failure. | ||
1309 | */ | ||
1093 | static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) | 1310 | static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) |
1094 | { | 1311 | { |
1095 | const char *chip_name; | 1312 | const char *chip_name; |
@@ -1127,6 +1344,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) | |||
1127 | case CHIP_VEGA10: | 1344 | case CHIP_VEGA10: |
1128 | chip_name = "vega10"; | 1345 | chip_name = "vega10"; |
1129 | break; | 1346 | break; |
1347 | case CHIP_VEGA12: | ||
1348 | chip_name = "vega12"; | ||
1349 | break; | ||
1130 | case CHIP_RAVEN: | 1350 | case CHIP_RAVEN: |
1131 | chip_name = "raven"; | 1351 | chip_name = "raven"; |
1132 | break; | 1352 | break; |
@@ -1188,6 +1408,16 @@ out: | |||
1188 | return err; | 1408 | return err; |
1189 | } | 1409 | } |
1190 | 1410 | ||
1411 | /** | ||
1412 | * amdgpu_device_ip_early_init - run early init for hardware IPs | ||
1413 | * | ||
1414 | * @adev: amdgpu_device pointer | ||
1415 | * | ||
1416 | * Early initialization pass for hardware IPs. The hardware IPs that make | ||
1417 | * up each asic are discovered each IP's early_init callback is run. This | ||
1418 | * is the first stage in initializing the asic. | ||
1419 | * Returns 0 on success, negative error code on failure. | ||
1420 | */ | ||
1191 | static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) | 1421 | static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) |
1192 | { | 1422 | { |
1193 | int i, r; | 1423 | int i, r; |
@@ -1240,8 +1470,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) | |||
1240 | return r; | 1470 | return r; |
1241 | break; | 1471 | break; |
1242 | #endif | 1472 | #endif |
1243 | case CHIP_VEGA10: | 1473 | case CHIP_VEGA10: |
1244 | case CHIP_RAVEN: | 1474 | case CHIP_VEGA12: |
1475 | case CHIP_RAVEN: | ||
1245 | if (adev->asic_type == CHIP_RAVEN) | 1476 | if (adev->asic_type == CHIP_RAVEN) |
1246 | adev->family = AMDGPU_FAMILY_RV; | 1477 | adev->family = AMDGPU_FAMILY_RV; |
1247 | else | 1478 | else |
@@ -1297,6 +1528,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) | |||
1297 | return 0; | 1528 | return 0; |
1298 | } | 1529 | } |
1299 | 1530 | ||
1531 | /** | ||
1532 | * amdgpu_device_ip_init - run init for hardware IPs | ||
1533 | * | ||
1534 | * @adev: amdgpu_device pointer | ||
1535 | * | ||
1536 | * Main initialization pass for hardware IPs. The list of all the hardware | ||
1537 | * IPs that make up the asic is walked and the sw_init and hw_init callbacks | ||
1538 | * are run. sw_init initializes the software state associated with each IP | ||
1539 | * and hw_init initializes the hardware associated with each IP. | ||
1540 | * Returns 0 on success, negative error code on failure. | ||
1541 | */ | ||
1300 | static int amdgpu_device_ip_init(struct amdgpu_device *adev) | 1542 | static int amdgpu_device_ip_init(struct amdgpu_device *adev) |
1301 | { | 1543 | { |
1302 | int i, r; | 1544 | int i, r; |
@@ -1311,6 +1553,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1311 | return r; | 1553 | return r; |
1312 | } | 1554 | } |
1313 | adev->ip_blocks[i].status.sw = true; | 1555 | adev->ip_blocks[i].status.sw = true; |
1556 | |||
1314 | /* need to do gmc hw init early so we can allocate gpu mem */ | 1557 | /* need to do gmc hw init early so we can allocate gpu mem */ |
1315 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { | 1558 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { |
1316 | r = amdgpu_device_vram_scratch_init(adev); | 1559 | r = amdgpu_device_vram_scratch_init(adev); |
@@ -1344,8 +1587,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1344 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1587 | for (i = 0; i < adev->num_ip_blocks; i++) { |
1345 | if (!adev->ip_blocks[i].status.sw) | 1588 | if (!adev->ip_blocks[i].status.sw) |
1346 | continue; | 1589 | continue; |
1347 | /* gmc hw init is done early */ | 1590 | if (adev->ip_blocks[i].status.hw) |
1348 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) | ||
1349 | continue; | 1591 | continue; |
1350 | r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); | 1592 | r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); |
1351 | if (r) { | 1593 | if (r) { |
@@ -1364,27 +1606,61 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1364 | return 0; | 1606 | return 0; |
1365 | } | 1607 | } |
1366 | 1608 | ||
1609 | /** | ||
1610 | * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer | ||
1611 | * | ||
1612 | * @adev: amdgpu_device pointer | ||
1613 | * | ||
1614 | * Writes a reset magic value to the gart pointer in VRAM. The driver calls | ||
1615 | * this function before a GPU reset. If the value is retained after a | ||
1616 | * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. | ||
1617 | */ | ||
1367 | static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) | 1618 | static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) |
1368 | { | 1619 | { |
1369 | memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); | 1620 | memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); |
1370 | } | 1621 | } |
1371 | 1622 | ||
1623 | /** | ||
1624 | * amdgpu_device_check_vram_lost - check if vram is valid | ||
1625 | * | ||
1626 | * @adev: amdgpu_device pointer | ||
1627 | * | ||
1628 | * Checks the reset magic value written to the gart pointer in VRAM. | ||
1629 | * The driver calls this after a GPU reset to see if the contents of | ||
1630 | * VRAM is lost or now. | ||
1631 | * returns true if vram is lost, false if not. | ||
1632 | */ | ||
1372 | static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) | 1633 | static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) |
1373 | { | 1634 | { |
1374 | return !!memcmp(adev->gart.ptr, adev->reset_magic, | 1635 | return !!memcmp(adev->gart.ptr, adev->reset_magic, |
1375 | AMDGPU_RESET_MAGIC_NUM); | 1636 | AMDGPU_RESET_MAGIC_NUM); |
1376 | } | 1637 | } |
1377 | 1638 | ||
1639 | /** | ||
1640 | * amdgpu_device_ip_late_set_cg_state - late init for clockgating | ||
1641 | * | ||
1642 | * @adev: amdgpu_device pointer | ||
1643 | * | ||
1644 | * Late initialization pass enabling clockgating for hardware IPs. | ||
1645 | * The list of all the hardware IPs that make up the asic is walked and the | ||
1646 | * set_clockgating_state callbacks are run. This stage is run late | ||
1647 | * in the init process. | ||
1648 | * Returns 0 on success, negative error code on failure. | ||
1649 | */ | ||
1378 | static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) | 1650 | static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) |
1379 | { | 1651 | { |
1380 | int i = 0, r; | 1652 | int i = 0, r; |
1381 | 1653 | ||
1654 | if (amdgpu_emu_mode == 1) | ||
1655 | return 0; | ||
1656 | |||
1382 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1657 | for (i = 0; i < adev->num_ip_blocks; i++) { |
1383 | if (!adev->ip_blocks[i].status.valid) | 1658 | if (!adev->ip_blocks[i].status.valid) |
1384 | continue; | 1659 | continue; |
1385 | /* skip CG for VCE/UVD, it's handled specially */ | 1660 | /* skip CG for VCE/UVD, it's handled specially */ |
1386 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && | 1661 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
1387 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { | 1662 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && |
1663 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { | ||
1388 | /* enable clockgating to save power */ | 1664 | /* enable clockgating to save power */ |
1389 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, | 1665 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, |
1390 | AMD_CG_STATE_GATE); | 1666 | AMD_CG_STATE_GATE); |
@@ -1398,6 +1674,18 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) | |||
1398 | return 0; | 1674 | return 0; |
1399 | } | 1675 | } |
1400 | 1676 | ||
1677 | /** | ||
1678 | * amdgpu_device_ip_late_init - run late init for hardware IPs | ||
1679 | * | ||
1680 | * @adev: amdgpu_device pointer | ||
1681 | * | ||
1682 | * Late initialization pass for hardware IPs. The list of all the hardware | ||
1683 | * IPs that make up the asic is walked and the late_init callbacks are run. | ||
1684 | * late_init covers any special initialization that an IP requires | ||
1685 | * after all of the have been initialized or something that needs to happen | ||
1686 | * late in the init process. | ||
1687 | * Returns 0 on success, negative error code on failure. | ||
1688 | */ | ||
1401 | static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) | 1689 | static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) |
1402 | { | 1690 | { |
1403 | int i = 0, r; | 1691 | int i = 0, r; |
@@ -1424,6 +1712,17 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) | |||
1424 | return 0; | 1712 | return 0; |
1425 | } | 1713 | } |
1426 | 1714 | ||
1715 | /** | ||
1716 | * amdgpu_device_ip_fini - run fini for hardware IPs | ||
1717 | * | ||
1718 | * @adev: amdgpu_device pointer | ||
1719 | * | ||
1720 | * Main teardown pass for hardware IPs. The list of all the hardware | ||
1721 | * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks | ||
1722 | * are run. hw_fini tears down the hardware associated with each IP | ||
1723 | * and sw_fini tears down any software state associated with each IP. | ||
1724 | * Returns 0 on success, negative error code on failure. | ||
1725 | */ | ||
1427 | static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | 1726 | static int amdgpu_device_ip_fini(struct amdgpu_device *adev) |
1428 | { | 1727 | { |
1429 | int i, r; | 1728 | int i, r; |
@@ -1433,7 +1732,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | |||
1433 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1732 | for (i = 0; i < adev->num_ip_blocks; i++) { |
1434 | if (!adev->ip_blocks[i].status.hw) | 1733 | if (!adev->ip_blocks[i].status.hw) |
1435 | continue; | 1734 | continue; |
1436 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { | 1735 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC && |
1736 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { | ||
1437 | /* ungate blocks before hw fini so that we can shutdown the blocks safely */ | 1737 | /* ungate blocks before hw fini so that we can shutdown the blocks safely */ |
1438 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, | 1738 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, |
1439 | AMD_CG_STATE_UNGATE); | 1739 | AMD_CG_STATE_UNGATE); |
@@ -1458,7 +1758,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | |||
1458 | continue; | 1758 | continue; |
1459 | 1759 | ||
1460 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && | 1760 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
1461 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { | 1761 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && |
1762 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { | ||
1462 | /* ungate blocks before hw fini so that we can shutdown the blocks safely */ | 1763 | /* ungate blocks before hw fini so that we can shutdown the blocks safely */ |
1463 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, | 1764 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, |
1464 | AMD_CG_STATE_UNGATE); | 1765 | AMD_CG_STATE_UNGATE); |
@@ -1479,6 +1780,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | |||
1479 | adev->ip_blocks[i].status.hw = false; | 1780 | adev->ip_blocks[i].status.hw = false; |
1480 | } | 1781 | } |
1481 | 1782 | ||
1783 | |||
1482 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { | 1784 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
1483 | if (!adev->ip_blocks[i].status.sw) | 1785 | if (!adev->ip_blocks[i].status.sw) |
1484 | continue; | 1786 | continue; |
@@ -1514,6 +1816,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | |||
1514 | return 0; | 1816 | return 0; |
1515 | } | 1817 | } |
1516 | 1818 | ||
1819 | /** | ||
1820 | * amdgpu_device_ip_late_init_func_handler - work handler for clockgating | ||
1821 | * | ||
1822 | * @work: work_struct | ||
1823 | * | ||
1824 | * Work handler for amdgpu_device_ip_late_set_cg_state. We put the | ||
1825 | * clockgating setup into a worker thread to speed up driver init and | ||
1826 | * resume from suspend. | ||
1827 | */ | ||
1517 | static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) | 1828 | static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) |
1518 | { | 1829 | { |
1519 | struct amdgpu_device *adev = | 1830 | struct amdgpu_device *adev = |
@@ -1521,6 +1832,17 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) | |||
1521 | amdgpu_device_ip_late_set_cg_state(adev); | 1832 | amdgpu_device_ip_late_set_cg_state(adev); |
1522 | } | 1833 | } |
1523 | 1834 | ||
1835 | /** | ||
1836 | * amdgpu_device_ip_suspend - run suspend for hardware IPs | ||
1837 | * | ||
1838 | * @adev: amdgpu_device pointer | ||
1839 | * | ||
1840 | * Main suspend function for hardware IPs. The list of all the hardware | ||
1841 | * IPs that make up the asic is walked, clockgating is disabled and the | ||
1842 | * suspend callbacks are run. suspend puts the hardware and software state | ||
1843 | * in each IP into a state suitable for suspend. | ||
1844 | * Returns 0 on success, negative error code on failure. | ||
1845 | */ | ||
1524 | int amdgpu_device_ip_suspend(struct amdgpu_device *adev) | 1846 | int amdgpu_device_ip_suspend(struct amdgpu_device *adev) |
1525 | { | 1847 | { |
1526 | int i, r; | 1848 | int i, r; |
@@ -1539,7 +1861,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) | |||
1539 | if (!adev->ip_blocks[i].status.valid) | 1861 | if (!adev->ip_blocks[i].status.valid) |
1540 | continue; | 1862 | continue; |
1541 | /* ungate blocks so that suspend can properly shut them down */ | 1863 | /* ungate blocks so that suspend can properly shut them down */ |
1542 | if (i != AMD_IP_BLOCK_TYPE_SMC) { | 1864 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC && |
1865 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { | ||
1543 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, | 1866 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, |
1544 | AMD_CG_STATE_UNGATE); | 1867 | AMD_CG_STATE_UNGATE); |
1545 | if (r) { | 1868 | if (r) { |
@@ -1585,6 +1908,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) | |||
1585 | 1908 | ||
1586 | r = block->version->funcs->hw_init(adev); | 1909 | r = block->version->funcs->hw_init(adev); |
1587 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); | 1910 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); |
1911 | if (r) | ||
1912 | return r; | ||
1588 | } | 1913 | } |
1589 | } | 1914 | } |
1590 | 1915 | ||
@@ -1618,12 +1943,26 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) | |||
1618 | 1943 | ||
1619 | r = block->version->funcs->hw_init(adev); | 1944 | r = block->version->funcs->hw_init(adev); |
1620 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); | 1945 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); |
1946 | if (r) | ||
1947 | return r; | ||
1621 | } | 1948 | } |
1622 | } | 1949 | } |
1623 | 1950 | ||
1624 | return 0; | 1951 | return 0; |
1625 | } | 1952 | } |
1626 | 1953 | ||
1954 | /** | ||
1955 | * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs | ||
1956 | * | ||
1957 | * @adev: amdgpu_device pointer | ||
1958 | * | ||
1959 | * First resume function for hardware IPs. The list of all the hardware | ||
1960 | * IPs that make up the asic is walked and the resume callbacks are run for | ||
1961 | * COMMON, GMC, and IH. resume puts the hardware into a functional state | ||
1962 | * after a suspend and updates the software state as necessary. This | ||
1963 | * function is also used for restoring the GPU after a GPU reset. | ||
1964 | * Returns 0 on success, negative error code on failure. | ||
1965 | */ | ||
1627 | static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) | 1966 | static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) |
1628 | { | 1967 | { |
1629 | int i, r; | 1968 | int i, r; |
@@ -1632,9 +1971,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) | |||
1632 | if (!adev->ip_blocks[i].status.valid) | 1971 | if (!adev->ip_blocks[i].status.valid) |
1633 | continue; | 1972 | continue; |
1634 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || | 1973 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || |
1635 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || | 1974 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || |
1636 | adev->ip_blocks[i].version->type == | 1975 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { |
1637 | AMD_IP_BLOCK_TYPE_IH) { | ||
1638 | r = adev->ip_blocks[i].version->funcs->resume(adev); | 1976 | r = adev->ip_blocks[i].version->funcs->resume(adev); |
1639 | if (r) { | 1977 | if (r) { |
1640 | DRM_ERROR("resume of IP block <%s> failed %d\n", | 1978 | DRM_ERROR("resume of IP block <%s> failed %d\n", |
@@ -1647,6 +1985,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) | |||
1647 | return 0; | 1985 | return 0; |
1648 | } | 1986 | } |
1649 | 1987 | ||
1988 | /** | ||
1989 | * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs | ||
1990 | * | ||
1991 | * @adev: amdgpu_device pointer | ||
1992 | * | ||
1993 | * First resume function for hardware IPs. The list of all the hardware | ||
1994 | * IPs that make up the asic is walked and the resume callbacks are run for | ||
1995 | * all blocks except COMMON, GMC, and IH. resume puts the hardware into a | ||
1996 | * functional state after a suspend and updates the software state as | ||
1997 | * necessary. This function is also used for restoring the GPU after a GPU | ||
1998 | * reset. | ||
1999 | * Returns 0 on success, negative error code on failure. | ||
2000 | */ | ||
1650 | static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) | 2001 | static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) |
1651 | { | 2002 | { |
1652 | int i, r; | 2003 | int i, r; |
@@ -1655,8 +2006,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) | |||
1655 | if (!adev->ip_blocks[i].status.valid) | 2006 | if (!adev->ip_blocks[i].status.valid) |
1656 | continue; | 2007 | continue; |
1657 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || | 2008 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || |
1658 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || | 2009 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || |
1659 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) | 2010 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) |
1660 | continue; | 2011 | continue; |
1661 | r = adev->ip_blocks[i].version->funcs->resume(adev); | 2012 | r = adev->ip_blocks[i].version->funcs->resume(adev); |
1662 | if (r) { | 2013 | if (r) { |
@@ -1669,6 +2020,18 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) | |||
1669 | return 0; | 2020 | return 0; |
1670 | } | 2021 | } |
1671 | 2022 | ||
2023 | /** | ||
2024 | * amdgpu_device_ip_resume - run resume for hardware IPs | ||
2025 | * | ||
2026 | * @adev: amdgpu_device pointer | ||
2027 | * | ||
2028 | * Main resume function for hardware IPs. The hardware IPs | ||
2029 | * are split into two resume functions because they are | ||
2030 | * are also used in in recovering from a GPU reset and some additional | ||
2031 | * steps need to be take between them. In this case (S3/S4) they are | ||
2032 | * run sequentially. | ||
2033 | * Returns 0 on success, negative error code on failure. | ||
2034 | */ | ||
1672 | static int amdgpu_device_ip_resume(struct amdgpu_device *adev) | 2035 | static int amdgpu_device_ip_resume(struct amdgpu_device *adev) |
1673 | { | 2036 | { |
1674 | int r; | 2037 | int r; |
@@ -1681,6 +2044,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) | |||
1681 | return r; | 2044 | return r; |
1682 | } | 2045 | } |
1683 | 2046 | ||
2047 | /** | ||
2048 | * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV | ||
2049 | * | ||
2050 | * @adev: amdgpu_device pointer | ||
2051 | * | ||
2052 | * Query the VBIOS data tables to determine if the board supports SR-IOV. | ||
2053 | */ | ||
1684 | static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) | 2054 | static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) |
1685 | { | 2055 | { |
1686 | if (amdgpu_sriov_vf(adev)) { | 2056 | if (amdgpu_sriov_vf(adev)) { |
@@ -1697,6 +2067,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) | |||
1697 | } | 2067 | } |
1698 | } | 2068 | } |
1699 | 2069 | ||
2070 | /** | ||
2071 | * amdgpu_device_asic_has_dc_support - determine if DC supports the asic | ||
2072 | * | ||
2073 | * @asic_type: AMD asic type | ||
2074 | * | ||
2075 | * Check if there is DC (new modesetting infrastructre) support for an asic. | ||
2076 | * returns true if DC has support, false if not. | ||
2077 | */ | ||
1700 | bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) | 2078 | bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) |
1701 | { | 2079 | { |
1702 | switch (asic_type) { | 2080 | switch (asic_type) { |
@@ -1704,6 +2082,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) | |||
1704 | case CHIP_BONAIRE: | 2082 | case CHIP_BONAIRE: |
1705 | case CHIP_HAWAII: | 2083 | case CHIP_HAWAII: |
1706 | case CHIP_KAVERI: | 2084 | case CHIP_KAVERI: |
2085 | case CHIP_KABINI: | ||
2086 | case CHIP_MULLINS: | ||
1707 | case CHIP_CARRIZO: | 2087 | case CHIP_CARRIZO: |
1708 | case CHIP_STONEY: | 2088 | case CHIP_STONEY: |
1709 | case CHIP_POLARIS11: | 2089 | case CHIP_POLARIS11: |
@@ -1714,10 +2094,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) | |||
1714 | #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) | 2094 | #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) |
1715 | return amdgpu_dc != 0; | 2095 | return amdgpu_dc != 0; |
1716 | #endif | 2096 | #endif |
1717 | case CHIP_KABINI: | ||
1718 | case CHIP_MULLINS: | ||
1719 | return amdgpu_dc > 0; | ||
1720 | case CHIP_VEGA10: | 2097 | case CHIP_VEGA10: |
2098 | case CHIP_VEGA12: | ||
1721 | #if defined(CONFIG_DRM_AMD_DC_DCN1_0) | 2099 | #if defined(CONFIG_DRM_AMD_DC_DCN1_0) |
1722 | case CHIP_RAVEN: | 2100 | case CHIP_RAVEN: |
1723 | #endif | 2101 | #endif |
@@ -1771,14 +2149,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1771 | adev->flags = flags; | 2149 | adev->flags = flags; |
1772 | adev->asic_type = flags & AMD_ASIC_MASK; | 2150 | adev->asic_type = flags & AMD_ASIC_MASK; |
1773 | adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; | 2151 | adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; |
1774 | adev->mc.gart_size = 512 * 1024 * 1024; | 2152 | if (amdgpu_emu_mode == 1) |
2153 | adev->usec_timeout *= 2; | ||
2154 | adev->gmc.gart_size = 512 * 1024 * 1024; | ||
1775 | adev->accel_working = false; | 2155 | adev->accel_working = false; |
1776 | adev->num_rings = 0; | 2156 | adev->num_rings = 0; |
1777 | adev->mman.buffer_funcs = NULL; | 2157 | adev->mman.buffer_funcs = NULL; |
1778 | adev->mman.buffer_funcs_ring = NULL; | 2158 | adev->mman.buffer_funcs_ring = NULL; |
1779 | adev->vm_manager.vm_pte_funcs = NULL; | 2159 | adev->vm_manager.vm_pte_funcs = NULL; |
1780 | adev->vm_manager.vm_pte_num_rings = 0; | 2160 | adev->vm_manager.vm_pte_num_rings = 0; |
1781 | adev->gart.gart_funcs = NULL; | 2161 | adev->gmc.gmc_funcs = NULL; |
1782 | adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); | 2162 | adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); |
1783 | bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); | 2163 | bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
1784 | 2164 | ||
@@ -1867,6 +2247,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1867 | if (adev->rio_mem == NULL) | 2247 | if (adev->rio_mem == NULL) |
1868 | DRM_INFO("PCI I/O BAR is not found.\n"); | 2248 | DRM_INFO("PCI I/O BAR is not found.\n"); |
1869 | 2249 | ||
2250 | amdgpu_device_get_pcie_info(adev); | ||
2251 | |||
1870 | /* early init functions */ | 2252 | /* early init functions */ |
1871 | r = amdgpu_device_ip_early_init(adev); | 2253 | r = amdgpu_device_ip_early_init(adev); |
1872 | if (r) | 2254 | if (r) |
@@ -1885,6 +2267,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1885 | if (runtime) | 2267 | if (runtime) |
1886 | vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); | 2268 | vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); |
1887 | 2269 | ||
2270 | if (amdgpu_emu_mode == 1) { | ||
2271 | /* post the asic on emulation mode */ | ||
2272 | emu_soc_asic_init(adev); | ||
2273 | goto fence_driver_init; | ||
2274 | } | ||
2275 | |||
1888 | /* Read BIOS */ | 2276 | /* Read BIOS */ |
1889 | if (!amdgpu_get_bios(adev)) { | 2277 | if (!amdgpu_get_bios(adev)) { |
1890 | r = -EINVAL; | 2278 | r = -EINVAL; |
@@ -1937,6 +2325,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1937 | amdgpu_atombios_i2c_init(adev); | 2325 | amdgpu_atombios_i2c_init(adev); |
1938 | } | 2326 | } |
1939 | 2327 | ||
2328 | fence_driver_init: | ||
1940 | /* Fence driver */ | 2329 | /* Fence driver */ |
1941 | r = amdgpu_fence_driver_init(adev); | 2330 | r = amdgpu_fence_driver_init(adev); |
1942 | if (r) { | 2331 | if (r) { |
@@ -1964,7 +2353,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
1964 | } | 2353 | } |
1965 | dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); | 2354 | dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); |
1966 | amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); | 2355 | amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); |
1967 | amdgpu_device_ip_fini(adev); | ||
1968 | goto failed; | 2356 | goto failed; |
1969 | } | 2357 | } |
1970 | 2358 | ||
@@ -2063,11 +2451,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
2063 | 2451 | ||
2064 | DRM_INFO("amdgpu: finishing device.\n"); | 2452 | DRM_INFO("amdgpu: finishing device.\n"); |
2065 | adev->shutdown = true; | 2453 | adev->shutdown = true; |
2066 | if (adev->mode_info.mode_config_initialized) | 2454 | /* disable all interrupts */ |
2067 | drm_crtc_force_disable_all(adev->ddev); | 2455 | amdgpu_irq_disable_all(adev); |
2068 | 2456 | if (adev->mode_info.mode_config_initialized){ | |
2457 | if (!amdgpu_device_has_dc_support(adev)) | ||
2458 | drm_crtc_force_disable_all(adev->ddev); | ||
2459 | else | ||
2460 | drm_atomic_helper_shutdown(adev->ddev); | ||
2461 | } | ||
2069 | amdgpu_ib_pool_fini(adev); | 2462 | amdgpu_ib_pool_fini(adev); |
2070 | amdgpu_fence_driver_fini(adev); | 2463 | amdgpu_fence_driver_fini(adev); |
2464 | amdgpu_pm_sysfs_fini(adev); | ||
2071 | amdgpu_fbdev_fini(adev); | 2465 | amdgpu_fbdev_fini(adev); |
2072 | r = amdgpu_device_ip_fini(adev); | 2466 | r = amdgpu_device_ip_fini(adev); |
2073 | if (adev->firmware.gpu_info_fw) { | 2467 | if (adev->firmware.gpu_info_fw) { |
@@ -2079,7 +2473,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
2079 | /* free i2c buses */ | 2473 | /* free i2c buses */ |
2080 | if (!amdgpu_device_has_dc_support(adev)) | 2474 | if (!amdgpu_device_has_dc_support(adev)) |
2081 | amdgpu_i2c_fini(adev); | 2475 | amdgpu_i2c_fini(adev); |
2082 | amdgpu_atombios_fini(adev); | 2476 | |
2477 | if (amdgpu_emu_mode != 1) | ||
2478 | amdgpu_atombios_fini(adev); | ||
2479 | |||
2083 | kfree(adev->bios); | 2480 | kfree(adev->bios); |
2084 | adev->bios = NULL; | 2481 | adev->bios = NULL; |
2085 | if (!pci_is_thunderbolt_attached(adev->pdev)) | 2482 | if (!pci_is_thunderbolt_attached(adev->pdev)) |
@@ -2093,7 +2490,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
2093 | iounmap(adev->rmmio); | 2490 | iounmap(adev->rmmio); |
2094 | adev->rmmio = NULL; | 2491 | adev->rmmio = NULL; |
2095 | amdgpu_device_doorbell_fini(adev); | 2492 | amdgpu_device_doorbell_fini(adev); |
2096 | amdgpu_pm_sysfs_fini(adev); | ||
2097 | amdgpu_debugfs_regs_cleanup(adev); | 2493 | amdgpu_debugfs_regs_cleanup(adev); |
2098 | } | 2494 | } |
2099 | 2495 | ||
@@ -2322,6 +2718,16 @@ unlock: | |||
2322 | return r; | 2718 | return r; |
2323 | } | 2719 | } |
2324 | 2720 | ||
2721 | /** | ||
2722 | * amdgpu_device_ip_check_soft_reset - did soft reset succeed | ||
2723 | * | ||
2724 | * @adev: amdgpu_device pointer | ||
2725 | * | ||
2726 | * The list of all the hardware IPs that make up the asic is walked and | ||
2727 | * the check_soft_reset callbacks are run. check_soft_reset determines | ||
2728 | * if the asic is still hung or not. | ||
2729 | * Returns true if any of the IPs are still in a hung state, false if not. | ||
2730 | */ | ||
2325 | static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) | 2731 | static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) |
2326 | { | 2732 | { |
2327 | int i; | 2733 | int i; |
@@ -2344,6 +2750,17 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) | |||
2344 | return asic_hang; | 2750 | return asic_hang; |
2345 | } | 2751 | } |
2346 | 2752 | ||
2753 | /** | ||
2754 | * amdgpu_device_ip_pre_soft_reset - prepare for soft reset | ||
2755 | * | ||
2756 | * @adev: amdgpu_device pointer | ||
2757 | * | ||
2758 | * The list of all the hardware IPs that make up the asic is walked and the | ||
2759 | * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset | ||
2760 | * handles any IP specific hardware or software state changes that are | ||
2761 | * necessary for a soft reset to succeed. | ||
2762 | * Returns 0 on success, negative error code on failure. | ||
2763 | */ | ||
2347 | static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) | 2764 | static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) |
2348 | { | 2765 | { |
2349 | int i, r = 0; | 2766 | int i, r = 0; |
@@ -2362,6 +2779,15 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) | |||
2362 | return 0; | 2779 | return 0; |
2363 | } | 2780 | } |
2364 | 2781 | ||
2782 | /** | ||
2783 | * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed | ||
2784 | * | ||
2785 | * @adev: amdgpu_device pointer | ||
2786 | * | ||
2787 | * Some hardware IPs cannot be soft reset. If they are hung, a full gpu | ||
2788 | * reset is necessary to recover. | ||
2789 | * Returns true if a full asic reset is required, false if not. | ||
2790 | */ | ||
2365 | static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) | 2791 | static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) |
2366 | { | 2792 | { |
2367 | int i; | 2793 | int i; |
@@ -2383,6 +2809,17 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) | |||
2383 | return false; | 2809 | return false; |
2384 | } | 2810 | } |
2385 | 2811 | ||
2812 | /** | ||
2813 | * amdgpu_device_ip_soft_reset - do a soft reset | ||
2814 | * | ||
2815 | * @adev: amdgpu_device pointer | ||
2816 | * | ||
2817 | * The list of all the hardware IPs that make up the asic is walked and the | ||
2818 | * soft_reset callbacks are run if the block is hung. soft_reset handles any | ||
2819 | * IP specific hardware or software state changes that are necessary to soft | ||
2820 | * reset the IP. | ||
2821 | * Returns 0 on success, negative error code on failure. | ||
2822 | */ | ||
2386 | static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) | 2823 | static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) |
2387 | { | 2824 | { |
2388 | int i, r = 0; | 2825 | int i, r = 0; |
@@ -2401,6 +2838,17 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) | |||
2401 | return 0; | 2838 | return 0; |
2402 | } | 2839 | } |
2403 | 2840 | ||
2841 | /** | ||
2842 | * amdgpu_device_ip_post_soft_reset - clean up from soft reset | ||
2843 | * | ||
2844 | * @adev: amdgpu_device pointer | ||
2845 | * | ||
2846 | * The list of all the hardware IPs that make up the asic is walked and the | ||
2847 | * post_soft_reset callbacks are run if the asic was hung. post_soft_reset | ||
2848 | * handles any IP specific hardware or software state changes that are | ||
2849 | * necessary after the IP has been soft reset. | ||
2850 | * Returns 0 on success, negative error code on failure. | ||
2851 | */ | ||
2404 | static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) | 2852 | static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) |
2405 | { | 2853 | { |
2406 | int i, r = 0; | 2854 | int i, r = 0; |
@@ -2418,6 +2866,19 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) | |||
2418 | return 0; | 2866 | return 0; |
2419 | } | 2867 | } |
2420 | 2868 | ||
2869 | /** | ||
2870 | * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers | ||
2871 | * | ||
2872 | * @adev: amdgpu_device pointer | ||
2873 | * @ring: amdgpu_ring for the engine handling the buffer operations | ||
2874 | * @bo: amdgpu_bo buffer whose shadow is being restored | ||
2875 | * @fence: dma_fence associated with the operation | ||
2876 | * | ||
2877 | * Restores the VRAM buffer contents from the shadow in GTT. Used to | ||
2878 | * restore things like GPUVM page tables after a GPU reset where | ||
2879 | * the contents of VRAM might be lost. | ||
2880 | * Returns 0 on success, negative error code on failure. | ||
2881 | */ | ||
2421 | static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, | 2882 | static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, |
2422 | struct amdgpu_ring *ring, | 2883 | struct amdgpu_ring *ring, |
2423 | struct amdgpu_bo *bo, | 2884 | struct amdgpu_bo *bo, |
@@ -2453,17 +2914,81 @@ err: | |||
2453 | return r; | 2914 | return r; |
2454 | } | 2915 | } |
2455 | 2916 | ||
2456 | /* | 2917 | /** |
2918 | * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents | ||
2919 | * | ||
2920 | * @adev: amdgpu_device pointer | ||
2921 | * | ||
2922 | * Restores the contents of VRAM buffers from the shadows in GTT. Used to | ||
2923 | * restore things like GPUVM page tables after a GPU reset where | ||
2924 | * the contents of VRAM might be lost. | ||
2925 | * Returns 0 on success, 1 on failure. | ||
2926 | */ | ||
2927 | static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev) | ||
2928 | { | ||
2929 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | ||
2930 | struct amdgpu_bo *bo, *tmp; | ||
2931 | struct dma_fence *fence = NULL, *next = NULL; | ||
2932 | long r = 1; | ||
2933 | int i = 0; | ||
2934 | long tmo; | ||
2935 | |||
2936 | if (amdgpu_sriov_runtime(adev)) | ||
2937 | tmo = msecs_to_jiffies(amdgpu_lockup_timeout); | ||
2938 | else | ||
2939 | tmo = msecs_to_jiffies(100); | ||
2940 | |||
2941 | DRM_INFO("recover vram bo from shadow start\n"); | ||
2942 | mutex_lock(&adev->shadow_list_lock); | ||
2943 | list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { | ||
2944 | next = NULL; | ||
2945 | amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); | ||
2946 | if (fence) { | ||
2947 | r = dma_fence_wait_timeout(fence, false, tmo); | ||
2948 | if (r == 0) | ||
2949 | pr_err("wait fence %p[%d] timeout\n", fence, i); | ||
2950 | else if (r < 0) | ||
2951 | pr_err("wait fence %p[%d] interrupted\n", fence, i); | ||
2952 | if (r < 1) { | ||
2953 | dma_fence_put(fence); | ||
2954 | fence = next; | ||
2955 | break; | ||
2956 | } | ||
2957 | i++; | ||
2958 | } | ||
2959 | |||
2960 | dma_fence_put(fence); | ||
2961 | fence = next; | ||
2962 | } | ||
2963 | mutex_unlock(&adev->shadow_list_lock); | ||
2964 | |||
2965 | if (fence) { | ||
2966 | r = dma_fence_wait_timeout(fence, false, tmo); | ||
2967 | if (r == 0) | ||
2968 | pr_err("wait fence %p[%d] timeout\n", fence, i); | ||
2969 | else if (r < 0) | ||
2970 | pr_err("wait fence %p[%d] interrupted\n", fence, i); | ||
2971 | |||
2972 | } | ||
2973 | dma_fence_put(fence); | ||
2974 | |||
2975 | if (r > 0) | ||
2976 | DRM_INFO("recover vram bo from shadow done\n"); | ||
2977 | else | ||
2978 | DRM_ERROR("recover vram bo from shadow failed\n"); | ||
2979 | |||
2980 | return (r > 0) ? 0 : 1; | ||
2981 | } | ||
2982 | |||
2983 | /** | ||
2457 | * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough | 2984 | * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough |
2458 | * | 2985 | * |
2459 | * @adev: amdgpu device pointer | 2986 | * @adev: amdgpu device pointer |
2460 | * @reset_flags: output param tells caller the reset result | ||
2461 | * | 2987 | * |
2462 | * attempt to do soft-reset or full-reset and reinitialize Asic | 2988 | * attempt to do soft-reset or full-reset and reinitialize Asic |
2463 | * return 0 means successed otherwise failed | 2989 | * return 0 means successed otherwise failed |
2464 | */ | 2990 | */ |
2465 | static int amdgpu_device_reset(struct amdgpu_device *adev, | 2991 | static int amdgpu_device_reset(struct amdgpu_device *adev) |
2466 | uint64_t* reset_flags) | ||
2467 | { | 2992 | { |
2468 | bool need_full_reset, vram_lost = 0; | 2993 | bool need_full_reset, vram_lost = 0; |
2469 | int r; | 2994 | int r; |
@@ -2478,7 +3003,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev, | |||
2478 | DRM_INFO("soft reset failed, will fallback to full reset!\n"); | 3003 | DRM_INFO("soft reset failed, will fallback to full reset!\n"); |
2479 | need_full_reset = true; | 3004 | need_full_reset = true; |
2480 | } | 3005 | } |
2481 | |||
2482 | } | 3006 | } |
2483 | 3007 | ||
2484 | if (need_full_reset) { | 3008 | if (need_full_reset) { |
@@ -2527,28 +3051,21 @@ out: | |||
2527 | } | 3051 | } |
2528 | } | 3052 | } |
2529 | 3053 | ||
2530 | if (reset_flags) { | 3054 | if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost)) |
2531 | if (vram_lost) | 3055 | r = amdgpu_device_handle_vram_lost(adev); |
2532 | (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; | ||
2533 | |||
2534 | if (need_full_reset) | ||
2535 | (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; | ||
2536 | } | ||
2537 | 3056 | ||
2538 | return r; | 3057 | return r; |
2539 | } | 3058 | } |
2540 | 3059 | ||
2541 | /* | 3060 | /** |
2542 | * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf | 3061 | * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf |
2543 | * | 3062 | * |
2544 | * @adev: amdgpu device pointer | 3063 | * @adev: amdgpu device pointer |
2545 | * @reset_flags: output param tells caller the reset result | ||
2546 | * | 3064 | * |
2547 | * do VF FLR and reinitialize Asic | 3065 | * do VF FLR and reinitialize Asic |
2548 | * return 0 means successed otherwise failed | 3066 | * return 0 means successed otherwise failed |
2549 | */ | 3067 | */ |
2550 | static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, | 3068 | static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, |
2551 | uint64_t *reset_flags, | ||
2552 | bool from_hypervisor) | 3069 | bool from_hypervisor) |
2553 | { | 3070 | { |
2554 | int r; | 3071 | int r; |
@@ -2570,28 +3087,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, | |||
2570 | 3087 | ||
2571 | /* now we are okay to resume SMC/CP/SDMA */ | 3088 | /* now we are okay to resume SMC/CP/SDMA */ |
2572 | r = amdgpu_device_ip_reinit_late_sriov(adev); | 3089 | r = amdgpu_device_ip_reinit_late_sriov(adev); |
3090 | amdgpu_virt_release_full_gpu(adev, true); | ||
2573 | if (r) | 3091 | if (r) |
2574 | goto error; | 3092 | goto error; |
2575 | 3093 | ||
2576 | amdgpu_irq_gpu_reset_resume_helper(adev); | 3094 | amdgpu_irq_gpu_reset_resume_helper(adev); |
2577 | r = amdgpu_ib_ring_tests(adev); | 3095 | r = amdgpu_ib_ring_tests(adev); |
2578 | if (r) | ||
2579 | dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); | ||
2580 | |||
2581 | error: | ||
2582 | /* release full control of GPU after ib test */ | ||
2583 | amdgpu_virt_release_full_gpu(adev, true); | ||
2584 | 3096 | ||
2585 | if (reset_flags) { | 3097 | if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { |
2586 | if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { | 3098 | atomic_inc(&adev->vram_lost_counter); |
2587 | (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; | 3099 | r = amdgpu_device_handle_vram_lost(adev); |
2588 | atomic_inc(&adev->vram_lost_counter); | ||
2589 | } | ||
2590 | |||
2591 | /* VF FLR or hotlink reset is always full-reset */ | ||
2592 | (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; | ||
2593 | } | 3100 | } |
2594 | 3101 | ||
3102 | error: | ||
3103 | |||
2595 | return r; | 3104 | return r; |
2596 | } | 3105 | } |
2597 | 3106 | ||
@@ -2609,7 +3118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
2609 | struct amdgpu_job *job, bool force) | 3118 | struct amdgpu_job *job, bool force) |
2610 | { | 3119 | { |
2611 | struct drm_atomic_state *state = NULL; | 3120 | struct drm_atomic_state *state = NULL; |
2612 | uint64_t reset_flags = 0; | ||
2613 | int i, r, resched; | 3121 | int i, r, resched; |
2614 | 3122 | ||
2615 | if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { | 3123 | if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { |
@@ -2631,22 +3139,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
2631 | 3139 | ||
2632 | /* block TTM */ | 3140 | /* block TTM */ |
2633 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); | 3141 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); |
3142 | |||
2634 | /* store modesetting */ | 3143 | /* store modesetting */ |
2635 | if (amdgpu_device_has_dc_support(adev)) | 3144 | if (amdgpu_device_has_dc_support(adev)) |
2636 | state = drm_atomic_helper_suspend(adev->ddev); | 3145 | state = drm_atomic_helper_suspend(adev->ddev); |
2637 | 3146 | ||
2638 | /* block scheduler */ | 3147 | /* block all schedulers and reset given job's ring */ |
2639 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 3148 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
2640 | struct amdgpu_ring *ring = adev->rings[i]; | 3149 | struct amdgpu_ring *ring = adev->rings[i]; |
2641 | 3150 | ||
2642 | if (!ring || !ring->sched.thread) | 3151 | if (!ring || !ring->sched.thread) |
2643 | continue; | 3152 | continue; |
2644 | 3153 | ||
2645 | /* only focus on the ring hit timeout if &job not NULL */ | 3154 | kthread_park(ring->sched.thread); |
3155 | |||
2646 | if (job && job->ring->idx != i) | 3156 | if (job && job->ring->idx != i) |
2647 | continue; | 3157 | continue; |
2648 | 3158 | ||
2649 | kthread_park(ring->sched.thread); | ||
2650 | drm_sched_hw_job_reset(&ring->sched, &job->base); | 3159 | drm_sched_hw_job_reset(&ring->sched, &job->base); |
2651 | 3160 | ||
2652 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 3161 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
@@ -2654,68 +3163,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
2654 | } | 3163 | } |
2655 | 3164 | ||
2656 | if (amdgpu_sriov_vf(adev)) | 3165 | if (amdgpu_sriov_vf(adev)) |
2657 | r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true); | 3166 | r = amdgpu_device_reset_sriov(adev, job ? false : true); |
2658 | else | 3167 | else |
2659 | r = amdgpu_device_reset(adev, &reset_flags); | 3168 | r = amdgpu_device_reset(adev); |
2660 | |||
2661 | if (!r) { | ||
2662 | if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) || | ||
2663 | (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) { | ||
2664 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | ||
2665 | struct amdgpu_bo *bo, *tmp; | ||
2666 | struct dma_fence *fence = NULL, *next = NULL; | ||
2667 | |||
2668 | DRM_INFO("recover vram bo from shadow\n"); | ||
2669 | mutex_lock(&adev->shadow_list_lock); | ||
2670 | list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { | ||
2671 | next = NULL; | ||
2672 | amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); | ||
2673 | if (fence) { | ||
2674 | r = dma_fence_wait(fence, false); | ||
2675 | if (r) { | ||
2676 | WARN(r, "recovery from shadow isn't completed\n"); | ||
2677 | break; | ||
2678 | } | ||
2679 | } | ||
2680 | |||
2681 | dma_fence_put(fence); | ||
2682 | fence = next; | ||
2683 | } | ||
2684 | mutex_unlock(&adev->shadow_list_lock); | ||
2685 | if (fence) { | ||
2686 | r = dma_fence_wait(fence, false); | ||
2687 | if (r) | ||
2688 | WARN(r, "recovery from shadow isn't completed\n"); | ||
2689 | } | ||
2690 | dma_fence_put(fence); | ||
2691 | } | ||
2692 | |||
2693 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
2694 | struct amdgpu_ring *ring = adev->rings[i]; | ||
2695 | 3169 | ||
2696 | if (!ring || !ring->sched.thread) | 3170 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
2697 | continue; | 3171 | struct amdgpu_ring *ring = adev->rings[i]; |
2698 | 3172 | ||
2699 | /* only focus on the ring hit timeout if &job not NULL */ | 3173 | if (!ring || !ring->sched.thread) |
2700 | if (job && job->ring->idx != i) | 3174 | continue; |
2701 | continue; | ||
2702 | 3175 | ||
3176 | /* only need recovery sched of the given job's ring | ||
3177 | * or all rings (in the case @job is NULL) | ||
3178 | * after above amdgpu_reset accomplished | ||
3179 | */ | ||
3180 | if ((!job || job->ring->idx == i) && !r) | ||
2703 | drm_sched_job_recovery(&ring->sched); | 3181 | drm_sched_job_recovery(&ring->sched); |
2704 | kthread_unpark(ring->sched.thread); | ||
2705 | } | ||
2706 | } else { | ||
2707 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
2708 | struct amdgpu_ring *ring = adev->rings[i]; | ||
2709 | 3182 | ||
2710 | if (!ring || !ring->sched.thread) | 3183 | kthread_unpark(ring->sched.thread); |
2711 | continue; | ||
2712 | |||
2713 | /* only focus on the ring hit timeout if &job not NULL */ | ||
2714 | if (job && job->ring->idx != i) | ||
2715 | continue; | ||
2716 | |||
2717 | kthread_unpark(adev->rings[i]->sched.thread); | ||
2718 | } | ||
2719 | } | 3184 | } |
2720 | 3185 | ||
2721 | if (amdgpu_device_has_dc_support(adev)) { | 3186 | if (amdgpu_device_has_dc_support(adev)) { |
@@ -2741,7 +3206,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
2741 | return r; | 3206 | return r; |
2742 | } | 3207 | } |
2743 | 3208 | ||
2744 | void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) | 3209 | /** |
3210 | * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot | ||
3211 | * | ||
3212 | * @adev: amdgpu_device pointer | ||
3213 | * | ||
3214 | * Fetchs and stores in the driver the PCIE capabilities (gen speed | ||
3215 | * and lanes) of the slot the device is in. Handles APUs and | ||
3216 | * virtualized environments where PCIE config space may not be available. | ||
3217 | */ | ||
3218 | static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) | ||
2745 | { | 3219 | { |
2746 | u32 mask; | 3220 | u32 mask; |
2747 | int ret; | 3221 | int ret; |