aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c267
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c670
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c543
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c103
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c249
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h398
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h3
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h417
26 files changed, 2886 insertions, 39 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 616dfd4a1398..908360584e4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -71,6 +71,12 @@ amdgpu-y += \
71 amdgpu_vce.o \ 71 amdgpu_vce.o \
72 vce_v3_0.o 72 vce_v3_0.o
73 73
74# add amdkfd interfaces
75amdgpu-y += \
76 amdgpu_amdkfd.o \
77 amdgpu_amdkfd_gfx_v7.o \
78 amdgpu_amdkfd_gfx_v8.o
79
74amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o 80amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
75amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o 81amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
76amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o 82amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 01657830b470..f3791e0d27d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2011,6 +2011,9 @@ struct amdgpu_device {
2011 /* tracking pinned memory */ 2011 /* tracking pinned memory */
2012 u64 vram_pin_size; 2012 u64 vram_pin_size;
2013 u64 gart_pin_size; 2013 u64 gart_pin_size;
2014
2015 /* amdkfd interface */
2016 struct kfd_dev *kfd;
2014}; 2017};
2015 2018
2016bool amdgpu_device_is_px(struct drm_device *dev); 2019bool amdgpu_device_is_px(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
new file mode 100644
index 000000000000..bc763e0c8f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -0,0 +1,267 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "amdgpu_amdkfd.h"
24#include "amdgpu_family.h"
25#include <drm/drmP.h>
26#include "amdgpu.h"
27#include <linux/module.h>
28
29const struct kfd2kgd_calls *kfd2kgd;
30const struct kgd2kfd_calls *kgd2kfd;
31bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
32
33bool amdgpu_amdkfd_init(void)
34{
35#if defined(CONFIG_HSA_AMD_MODULE)
36 bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
37
38 kgd2kfd_init_p = symbol_request(kgd2kfd_init);
39
40 if (kgd2kfd_init_p == NULL)
41 return false;
42#endif
43 return true;
44}
45
46bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
47{
48#if defined(CONFIG_HSA_AMD_MODULE)
49 bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
50#endif
51
52 switch (rdev->asic_type) {
53 case CHIP_KAVERI:
54 kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
55 break;
56 case CHIP_CARRIZO:
57 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
58 break;
59 default:
60 return false;
61 }
62
63#if defined(CONFIG_HSA_AMD_MODULE)
64 kgd2kfd_init_p = symbol_request(kgd2kfd_init);
65
66 if (kgd2kfd_init_p == NULL) {
67 kfd2kgd = NULL;
68 return false;
69 }
70
71 if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
72 symbol_put(kgd2kfd_init);
73 kfd2kgd = NULL;
74 kgd2kfd = NULL;
75
76 return false;
77 }
78
79 return true;
80#elif defined(CONFIG_HSA_AMD)
81 if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
82 kfd2kgd = NULL;
83 kgd2kfd = NULL;
84 return false;
85 }
86
87 return true;
88#else
89 kfd2kgd = NULL;
90 return false;
91#endif
92}
93
94void amdgpu_amdkfd_fini(void)
95{
96 if (kgd2kfd) {
97 kgd2kfd->exit();
98 symbol_put(kgd2kfd_init);
99 }
100}
101
102void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
103{
104 if (kgd2kfd)
105 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
106 rdev->pdev, kfd2kgd);
107}
108
109void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
110{
111 if (rdev->kfd) {
112 struct kgd2kfd_shared_resources gpu_resources = {
113 .compute_vmid_bitmap = 0xFF00,
114
115 .first_compute_pipe = 1,
116 .compute_pipe_count = 4 - 1,
117 };
118
119 amdgpu_doorbell_get_kfd_info(rdev,
120 &gpu_resources.doorbell_physical_address,
121 &gpu_resources.doorbell_aperture_size,
122 &gpu_resources.doorbell_start_offset);
123
124 kgd2kfd->device_init(rdev->kfd, &gpu_resources);
125 }
126}
127
128void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
129{
130 if (rdev->kfd) {
131 kgd2kfd->device_exit(rdev->kfd);
132 rdev->kfd = NULL;
133 }
134}
135
136void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
137 const void *ih_ring_entry)
138{
139 if (rdev->kfd)
140 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
141}
142
143void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
144{
145 if (rdev->kfd)
146 kgd2kfd->suspend(rdev->kfd);
147}
148
149int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
150{
151 int r = 0;
152
153 if (rdev->kfd)
154 r = kgd2kfd->resume(rdev->kfd);
155
156 return r;
157}
158
159u32 pool_to_domain(enum kgd_memory_pool p)
160{
161 switch (p) {
162 case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
163 default: return AMDGPU_GEM_DOMAIN_GTT;
164 }
165}
166
167int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
168 void **mem_obj, uint64_t *gpu_addr,
169 void **cpu_ptr)
170{
171 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
172 struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
173 int r;
174
175 BUG_ON(kgd == NULL);
176 BUG_ON(gpu_addr == NULL);
177 BUG_ON(cpu_ptr == NULL);
178
179 *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
180 if ((*mem) == NULL)
181 return -ENOMEM;
182
183 r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
184 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo);
185 if (r) {
186 dev_err(rdev->dev,
187 "failed to allocate BO for amdkfd (%d)\n", r);
188 return r;
189 }
190
191 /* map the buffer */
192 r = amdgpu_bo_reserve((*mem)->bo, true);
193 if (r) {
194 dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
195 goto allocate_mem_reserve_bo_failed;
196 }
197
198 r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
199 &(*mem)->gpu_addr);
200 if (r) {
201 dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
202 goto allocate_mem_pin_bo_failed;
203 }
204 *gpu_addr = (*mem)->gpu_addr;
205
206 r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
207 if (r) {
208 dev_err(rdev->dev,
209 "(%d) failed to map bo to kernel for amdkfd\n", r);
210 goto allocate_mem_kmap_bo_failed;
211 }
212 *cpu_ptr = (*mem)->cpu_ptr;
213
214 amdgpu_bo_unreserve((*mem)->bo);
215
216 return 0;
217
218allocate_mem_kmap_bo_failed:
219 amdgpu_bo_unpin((*mem)->bo);
220allocate_mem_pin_bo_failed:
221 amdgpu_bo_unreserve((*mem)->bo);
222allocate_mem_reserve_bo_failed:
223 amdgpu_bo_unref(&(*mem)->bo);
224
225 return r;
226}
227
228void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
229{
230 struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
231
232 BUG_ON(mem == NULL);
233
234 amdgpu_bo_reserve(mem->bo, true);
235 amdgpu_bo_kunmap(mem->bo);
236 amdgpu_bo_unpin(mem->bo);
237 amdgpu_bo_unreserve(mem->bo);
238 amdgpu_bo_unref(&(mem->bo));
239 kfree(mem);
240}
241
242uint64_t get_vmem_size(struct kgd_dev *kgd)
243{
244 struct amdgpu_device *rdev =
245 (struct amdgpu_device *)kgd;
246
247 BUG_ON(kgd == NULL);
248
249 return rdev->mc.real_vram_size;
250}
251
252uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
253{
254 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
255
256 if (rdev->asic_funcs->get_gpu_clock_counter)
257 return rdev->asic_funcs->get_gpu_clock_counter(rdev);
258 return 0;
259}
260
261uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
262{
263 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
264
265 /* The sclk is in quantas of 10kHz */
266 return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
267}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
new file mode 100644
index 000000000000..a8be765542e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
24
25#ifndef AMDGPU_AMDKFD_H_INCLUDED
26#define AMDGPU_AMDKFD_H_INCLUDED
27
28#include <linux/types.h>
29#include <kgd_kfd_interface.h>
30
31struct amdgpu_device;
32
33struct kgd_mem {
34 struct amdgpu_bo *bo;
35 uint64_t gpu_addr;
36 void *cpu_ptr;
37};
38
39bool amdgpu_amdkfd_init(void);
40void amdgpu_amdkfd_fini(void);
41
42bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
43
44void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
45int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
46void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
47 const void *ih_ring_entry);
48void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
49void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
50void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
51
52struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
53struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
54
55/* Shared API */
56int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
57 void **mem_obj, uint64_t *gpu_addr,
58 void **cpu_ptr);
59void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
60uint64_t get_vmem_size(struct kgd_dev *kgd);
61uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
62
63uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
64
65#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
new file mode 100644
index 000000000000..2daad335b809
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -0,0 +1,670 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/fdtable.h>
24#include <linux/uaccess.h>
25#include <linux/firmware.h>
26#include <drm/drmP.h>
27#include "amdgpu.h"
28#include "amdgpu_amdkfd.h"
29#include "cikd.h"
30#include "cik_sdma.h"
31#include "amdgpu_ucode.h"
32#include "gca/gfx_7_2_d.h"
33#include "gca/gfx_7_2_enum.h"
34#include "gca/gfx_7_2_sh_mask.h"
35#include "oss/oss_2_0_d.h"
36#include "oss/oss_2_0_sh_mask.h"
37#include "gmc/gmc_7_1_d.h"
38#include "gmc/gmc_7_1_sh_mask.h"
39#include "cik_structs.h"
40
41#define CIK_PIPE_PER_MEC (4)
42
43enum {
44 MAX_TRAPID = 8, /* 3 bits in the bitfield. */
45 MAX_WATCH_ADDRESSES = 4
46};
47
48enum {
49 ADDRESS_WATCH_REG_ADDR_HI = 0,
50 ADDRESS_WATCH_REG_ADDR_LO,
51 ADDRESS_WATCH_REG_CNTL,
52 ADDRESS_WATCH_REG_MAX
53};
54
55/* not defined in the CI/KV reg file */
56enum {
57 ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
58 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
59 ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
60 /* extend the mask to 26 bits to match the low address field */
61 ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
62 ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
63};
64
65static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
66 mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
67 mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
68 mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
69 mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
70};
71
72union TCP_WATCH_CNTL_BITS {
73 struct {
74 uint32_t mask:24;
75 uint32_t vmid:4;
76 uint32_t atc:1;
77 uint32_t mode:2;
78 uint32_t valid:1;
79 } bitfields, bits;
80 uint32_t u32All;
81 signed int i32All;
82 float f32All;
83};
84
85/*
86 * Register access functions
87 */
88
89static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
90 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
91 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
92
93static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
94 unsigned int vmid);
95
96static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
97 uint32_t hpd_size, uint64_t hpd_gpu_addr);
98static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
99static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
100 uint32_t queue_id, uint32_t __user *wptr);
101static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
102static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
103 uint32_t pipe_id, uint32_t queue_id);
104
105static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
106 unsigned int timeout, uint32_t pipe_id,
107 uint32_t queue_id);
108static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
109static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
110 unsigned int timeout);
111static int kgd_address_watch_disable(struct kgd_dev *kgd);
112static int kgd_address_watch_execute(struct kgd_dev *kgd,
113 unsigned int watch_point_id,
114 uint32_t cntl_val,
115 uint32_t addr_hi,
116 uint32_t addr_lo);
117static int kgd_wave_control_execute(struct kgd_dev *kgd,
118 uint32_t gfx_index_val,
119 uint32_t sq_cmd);
120static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
121 unsigned int watch_point_id,
122 unsigned int reg_offset);
123
124static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
125static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
126 uint8_t vmid);
127static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
128
129static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
130
131static const struct kfd2kgd_calls kfd2kgd = {
132 .init_gtt_mem_allocation = alloc_gtt_mem,
133 .free_gtt_mem = free_gtt_mem,
134 .get_vmem_size = get_vmem_size,
135 .get_gpu_clock_counter = get_gpu_clock_counter,
136 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
137 .program_sh_mem_settings = kgd_program_sh_mem_settings,
138 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
139 .init_pipeline = kgd_init_pipeline,
140 .init_interrupts = kgd_init_interrupts,
141 .hqd_load = kgd_hqd_load,
142 .hqd_sdma_load = kgd_hqd_sdma_load,
143 .hqd_is_occupied = kgd_hqd_is_occupied,
144 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
145 .hqd_destroy = kgd_hqd_destroy,
146 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
147 .address_watch_disable = kgd_address_watch_disable,
148 .address_watch_execute = kgd_address_watch_execute,
149 .wave_control_execute = kgd_wave_control_execute,
150 .address_watch_get_offset = kgd_address_watch_get_offset,
151 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
152 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
153 .write_vmid_invalidate_request = write_vmid_invalidate_request,
154 .get_fw_version = get_fw_version
155};
156
157struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
158{
159 return (struct kfd2kgd_calls *)&kfd2kgd;
160}
161
162static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
163{
164 return (struct amdgpu_device *)kgd;
165}
166
167static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
168 uint32_t queue, uint32_t vmid)
169{
170 struct amdgpu_device *adev = get_amdgpu_device(kgd);
171 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
172
173 mutex_lock(&adev->srbm_mutex);
174 WREG32(mmSRBM_GFX_CNTL, value);
175}
176
177static void unlock_srbm(struct kgd_dev *kgd)
178{
179 struct amdgpu_device *adev = get_amdgpu_device(kgd);
180
181 WREG32(mmSRBM_GFX_CNTL, 0);
182 mutex_unlock(&adev->srbm_mutex);
183}
184
185static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
186 uint32_t queue_id)
187{
188 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
189 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
190
191 lock_srbm(kgd, mec, pipe, queue_id, 0);
192}
193
194static void release_queue(struct kgd_dev *kgd)
195{
196 unlock_srbm(kgd);
197}
198
199static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
200 uint32_t sh_mem_config,
201 uint32_t sh_mem_ape1_base,
202 uint32_t sh_mem_ape1_limit,
203 uint32_t sh_mem_bases)
204{
205 struct amdgpu_device *adev = get_amdgpu_device(kgd);
206
207 lock_srbm(kgd, 0, 0, 0, vmid);
208
209 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
210 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
211 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
212 WREG32(mmSH_MEM_BASES, sh_mem_bases);
213
214 unlock_srbm(kgd);
215}
216
217static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
218 unsigned int vmid)
219{
220 struct amdgpu_device *adev = get_amdgpu_device(kgd);
221
222 /*
223 * We have to assume that there is no outstanding mapping.
224 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
225 * a mapping is in progress or because a mapping finished and the
226 * SW cleared it. So the protocol is to always wait & clear.
227 */
228 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
229 ATC_VMID0_PASID_MAPPING__VALID_MASK;
230
231 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
232
233 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
234 cpu_relax();
235 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
236
237 /* Mapping vmid to pasid also for IH block */
238 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
239
240 return 0;
241}
242
243static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
244 uint32_t hpd_size, uint64_t hpd_gpu_addr)
245{
246 struct amdgpu_device *adev = get_amdgpu_device(kgd);
247
248 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
249 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
250
251 lock_srbm(kgd, mec, pipe, 0, 0);
252 WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
253 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
254 WREG32(mmCP_HPD_EOP_VMID, 0);
255 WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
256 unlock_srbm(kgd);
257
258 return 0;
259}
260
261static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
262{
263 struct amdgpu_device *adev = get_amdgpu_device(kgd);
264 uint32_t mec;
265 uint32_t pipe;
266
267 mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
268 pipe = (pipe_id % CIK_PIPE_PER_MEC);
269
270 lock_srbm(kgd, mec, pipe, 0, 0);
271
272 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
273 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
274
275 unlock_srbm(kgd);
276
277 return 0;
278}
279
280static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
281{
282 uint32_t retval;
283
284 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
285 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
286
287 pr_debug("kfd: sdma base address: 0x%x\n", retval);
288
289 return retval;
290}
291
292static inline struct cik_mqd *get_mqd(void *mqd)
293{
294 return (struct cik_mqd *)mqd;
295}
296
297static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
298{
299 return (struct cik_sdma_rlc_registers *)mqd;
300}
301
302static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
303 uint32_t queue_id, uint32_t __user *wptr)
304{
305 struct amdgpu_device *adev = get_amdgpu_device(kgd);
306 uint32_t wptr_shadow, is_wptr_shadow_valid;
307 struct cik_mqd *m;
308
309 m = get_mqd(mqd);
310
311 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
312
313 acquire_queue(kgd, pipe_id, queue_id);
314 WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
315 WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
316 WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
317
318 WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
319 WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
320 WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
321
322 WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
323 WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
324 WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
325
326 WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
327
328 WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
329 WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
330 WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
331
332 WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
333 WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
334 WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
335 WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
336
337 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
338 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
339 m->cp_hqd_pq_rptr_report_addr_hi);
340
341 WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
342
343 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
344 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
345
346 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
347
348 WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
349
350 WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
351
352 WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
353 WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
354
355 WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
356
357 if (is_wptr_shadow_valid)
358 WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
359
360 WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
361 release_queue(kgd);
362
363 return 0;
364}
365
366static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
367{
368 struct amdgpu_device *adev = get_amdgpu_device(kgd);
369 struct cik_sdma_rlc_registers *m;
370 uint32_t sdma_base_addr;
371
372 m = get_sdma_mqd(mqd);
373 sdma_base_addr = get_sdma_base_addr(m);
374
375 WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
376 m->sdma_rlc_virtual_addr);
377
378 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
379 m->sdma_rlc_rb_base);
380
381 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
382 m->sdma_rlc_rb_base_hi);
383
384 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
385 m->sdma_rlc_rb_rptr_addr_lo);
386
387 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
388 m->sdma_rlc_rb_rptr_addr_hi);
389
390 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
391 m->sdma_rlc_doorbell);
392
393 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
394 m->sdma_rlc_rb_cntl);
395
396 return 0;
397}
398
399static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
400 uint32_t pipe_id, uint32_t queue_id)
401{
402 struct amdgpu_device *adev = get_amdgpu_device(kgd);
403 uint32_t act;
404 bool retval = false;
405 uint32_t low, high;
406
407 acquire_queue(kgd, pipe_id, queue_id);
408 act = RREG32(mmCP_HQD_ACTIVE);
409 if (act) {
410 low = lower_32_bits(queue_address >> 8);
411 high = upper_32_bits(queue_address >> 8);
412
413 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
414 high == RREG32(mmCP_HQD_PQ_BASE_HI))
415 retval = true;
416 }
417 release_queue(kgd);
418 return retval;
419}
420
421static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
422{
423 struct amdgpu_device *adev = get_amdgpu_device(kgd);
424 struct cik_sdma_rlc_registers *m;
425 uint32_t sdma_base_addr;
426 uint32_t sdma_rlc_rb_cntl;
427
428 m = get_sdma_mqd(mqd);
429 sdma_base_addr = get_sdma_base_addr(m);
430
431 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
432
433 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
434 return true;
435
436 return false;
437}
438
439static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
440 unsigned int timeout, uint32_t pipe_id,
441 uint32_t queue_id)
442{
443 struct amdgpu_device *adev = get_amdgpu_device(kgd);
444 uint32_t temp;
445
446 acquire_queue(kgd, pipe_id, queue_id);
447 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
448
449 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
450
451 while (true) {
452 temp = RREG32(mmCP_HQD_ACTIVE);
453 if (temp & CP_HQD_ACTIVE__ACTIVE__SHIFT)
454 break;
455 if (timeout == 0) {
456 pr_err("kfd: cp queue preemption time out (%dms)\n",
457 temp);
458 release_queue(kgd);
459 return -ETIME;
460 }
461 msleep(20);
462 timeout -= 20;
463 }
464
465 release_queue(kgd);
466 return 0;
467}
468
469static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
470 unsigned int timeout)
471{
472 struct amdgpu_device *adev = get_amdgpu_device(kgd);
473 struct cik_sdma_rlc_registers *m;
474 uint32_t sdma_base_addr;
475 uint32_t temp;
476
477 m = get_sdma_mqd(mqd);
478 sdma_base_addr = get_sdma_base_addr(m);
479
480 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
481 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
482 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
483
484 while (true) {
485 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
486 if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
487 break;
488 if (timeout == 0)
489 return -ETIME;
490 msleep(20);
491 timeout -= 20;
492 }
493
494 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
495 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
496 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
497 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
498
499 return 0;
500}
501
502static int kgd_address_watch_disable(struct kgd_dev *kgd)
503{
504 struct amdgpu_device *adev = get_amdgpu_device(kgd);
505 union TCP_WATCH_CNTL_BITS cntl;
506 unsigned int i;
507
508 cntl.u32All = 0;
509
510 cntl.bitfields.valid = 0;
511 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
512 cntl.bitfields.atc = 1;
513
514 /* Turning off this address until we set all the registers */
515 for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
516 WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
517 ADDRESS_WATCH_REG_CNTL], cntl.u32All);
518
519 return 0;
520}
521
522static int kgd_address_watch_execute(struct kgd_dev *kgd,
523 unsigned int watch_point_id,
524 uint32_t cntl_val,
525 uint32_t addr_hi,
526 uint32_t addr_lo)
527{
528 struct amdgpu_device *adev = get_amdgpu_device(kgd);
529 union TCP_WATCH_CNTL_BITS cntl;
530
531 cntl.u32All = cntl_val;
532
533 /* Turning off this watch point until we set all the registers */
534 cntl.bitfields.valid = 0;
535 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
536 ADDRESS_WATCH_REG_CNTL], cntl.u32All);
537
538 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
539 ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
540
541 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
542 ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
543
544 /* Enable the watch point */
545 cntl.bitfields.valid = 1;
546
547 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
548 ADDRESS_WATCH_REG_CNTL], cntl.u32All);
549
550 return 0;
551}
552
553static int kgd_wave_control_execute(struct kgd_dev *kgd,
554 uint32_t gfx_index_val,
555 uint32_t sq_cmd)
556{
557 struct amdgpu_device *adev = get_amdgpu_device(kgd);
558 uint32_t data;
559
560 mutex_lock(&adev->grbm_idx_mutex);
561
562 WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
563 WREG32(mmSQ_CMD, sq_cmd);
564
565 /* Restore the GRBM_GFX_INDEX register */
566
567 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
568 GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
569 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
570
571 WREG32(mmGRBM_GFX_INDEX, data);
572
573 mutex_unlock(&adev->grbm_idx_mutex);
574
575 return 0;
576}
577
578static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
579 unsigned int watch_point_id,
580 unsigned int reg_offset)
581{
582 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
583}
584
585static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
586 uint8_t vmid)
587{
588 uint32_t reg;
589 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
590
591 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
592 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
593}
594
595static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
596 uint8_t vmid)
597{
598 uint32_t reg;
599 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
600
601 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
602 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
603}
604
605static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
606{
607 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
608
609 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
610}
611
612static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
613{
614 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
615 const union amdgpu_firmware_header *hdr;
616
617 BUG_ON(kgd == NULL);
618
619 switch (type) {
620 case KGD_ENGINE_PFP:
621 hdr = (const union amdgpu_firmware_header *)
622 adev->gfx.pfp_fw->data;
623 break;
624
625 case KGD_ENGINE_ME:
626 hdr = (const union amdgpu_firmware_header *)
627 adev->gfx.me_fw->data;
628 break;
629
630 case KGD_ENGINE_CE:
631 hdr = (const union amdgpu_firmware_header *)
632 adev->gfx.ce_fw->data;
633 break;
634
635 case KGD_ENGINE_MEC1:
636 hdr = (const union amdgpu_firmware_header *)
637 adev->gfx.mec_fw->data;
638 break;
639
640 case KGD_ENGINE_MEC2:
641 hdr = (const union amdgpu_firmware_header *)
642 adev->gfx.mec2_fw->data;
643 break;
644
645 case KGD_ENGINE_RLC:
646 hdr = (const union amdgpu_firmware_header *)
647 adev->gfx.rlc_fw->data;
648 break;
649
650 case KGD_ENGINE_SDMA1:
651 hdr = (const union amdgpu_firmware_header *)
652 adev->sdma[0].fw->data;
653 break;
654
655 case KGD_ENGINE_SDMA2:
656 hdr = (const union amdgpu_firmware_header *)
657 adev->sdma[1].fw->data;
658 break;
659
660 default:
661 return 0;
662 }
663
664 if (hdr == NULL)
665 return 0;
666
667 /* Only 12 bit in use*/
668 return hdr->common.ucode_version;
669}
670
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
new file mode 100644
index 000000000000..dfd1d503bccf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -0,0 +1,543 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/module.h>
24#include <linux/fdtable.h>
25#include <linux/uaccess.h>
26#include <linux/firmware.h>
27#include <drm/drmP.h>
28#include "amdgpu.h"
29#include "amdgpu_amdkfd.h"
30#include "amdgpu_ucode.h"
31#include "gca/gfx_8_0_sh_mask.h"
32#include "gca/gfx_8_0_d.h"
33#include "gca/gfx_8_0_enum.h"
34#include "oss/oss_3_0_sh_mask.h"
35#include "oss/oss_3_0_d.h"
36#include "gmc/gmc_8_1_sh_mask.h"
37#include "gmc/gmc_8_1_d.h"
38#include "vi_structs.h"
39#include "vid.h"
40
41#define VI_PIPE_PER_MEC (4)
42
43struct cik_sdma_rlc_registers;
44
45/*
46 * Register access functions
47 */
48
49static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
50 uint32_t sh_mem_config,
51 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
52 uint32_t sh_mem_bases);
53static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
54 unsigned int vmid);
55static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
56 uint32_t hpd_size, uint64_t hpd_gpu_addr);
57static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
58static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
59 uint32_t queue_id, uint32_t __user *wptr);
60static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
61static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
62 uint32_t pipe_id, uint32_t queue_id);
63static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
64static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
65 unsigned int timeout, uint32_t pipe_id,
66 uint32_t queue_id);
67static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
68 unsigned int timeout);
69static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
70static int kgd_address_watch_disable(struct kgd_dev *kgd);
71static int kgd_address_watch_execute(struct kgd_dev *kgd,
72 unsigned int watch_point_id,
73 uint32_t cntl_val,
74 uint32_t addr_hi,
75 uint32_t addr_lo);
76static int kgd_wave_control_execute(struct kgd_dev *kgd,
77 uint32_t gfx_index_val,
78 uint32_t sq_cmd);
79static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
80 unsigned int watch_point_id,
81 unsigned int reg_offset);
82
83static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
84 uint8_t vmid);
85static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
86 uint8_t vmid);
87static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
88static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
89
90static const struct kfd2kgd_calls kfd2kgd = {
91 .init_gtt_mem_allocation = alloc_gtt_mem,
92 .free_gtt_mem = free_gtt_mem,
93 .get_vmem_size = get_vmem_size,
94 .get_gpu_clock_counter = get_gpu_clock_counter,
95 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
96 .program_sh_mem_settings = kgd_program_sh_mem_settings,
97 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
98 .init_pipeline = kgd_init_pipeline,
99 .init_interrupts = kgd_init_interrupts,
100 .hqd_load = kgd_hqd_load,
101 .hqd_sdma_load = kgd_hqd_sdma_load,
102 .hqd_is_occupied = kgd_hqd_is_occupied,
103 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
104 .hqd_destroy = kgd_hqd_destroy,
105 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
106 .address_watch_disable = kgd_address_watch_disable,
107 .address_watch_execute = kgd_address_watch_execute,
108 .wave_control_execute = kgd_wave_control_execute,
109 .address_watch_get_offset = kgd_address_watch_get_offset,
110 .get_atc_vmid_pasid_mapping_pasid =
111 get_atc_vmid_pasid_mapping_pasid,
112 .get_atc_vmid_pasid_mapping_valid =
113 get_atc_vmid_pasid_mapping_valid,
114 .write_vmid_invalidate_request = write_vmid_invalidate_request,
115 .get_fw_version = get_fw_version
116};
117
118struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
119{
120 return (struct kfd2kgd_calls *)&kfd2kgd;
121}
122
123static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
124{
125 return (struct amdgpu_device *)kgd;
126}
127
128static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
129 uint32_t queue, uint32_t vmid)
130{
131 struct amdgpu_device *adev = get_amdgpu_device(kgd);
132 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
133
134 mutex_lock(&adev->srbm_mutex);
135 WREG32(mmSRBM_GFX_CNTL, value);
136}
137
138static void unlock_srbm(struct kgd_dev *kgd)
139{
140 struct amdgpu_device *adev = get_amdgpu_device(kgd);
141
142 WREG32(mmSRBM_GFX_CNTL, 0);
143 mutex_unlock(&adev->srbm_mutex);
144}
145
146static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
147 uint32_t queue_id)
148{
149 uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
150 uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
151
152 lock_srbm(kgd, mec, pipe, queue_id, 0);
153}
154
155static void release_queue(struct kgd_dev *kgd)
156{
157 unlock_srbm(kgd);
158}
159
160static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
161 uint32_t sh_mem_config,
162 uint32_t sh_mem_ape1_base,
163 uint32_t sh_mem_ape1_limit,
164 uint32_t sh_mem_bases)
165{
166 struct amdgpu_device *adev = get_amdgpu_device(kgd);
167
168 lock_srbm(kgd, 0, 0, 0, vmid);
169
170 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
171 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
172 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
173 WREG32(mmSH_MEM_BASES, sh_mem_bases);
174
175 unlock_srbm(kgd);
176}
177
178static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
179 unsigned int vmid)
180{
181 struct amdgpu_device *adev = get_amdgpu_device(kgd);
182
183 /*
184 * We have to assume that there is no outstanding mapping.
185 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
186 * a mapping is in progress or because a mapping finished
187 * and the SW cleared it.
188 * So the protocol is to always wait & clear.
189 */
190 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
191 ATC_VMID0_PASID_MAPPING__VALID_MASK;
192
193 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
194
195 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
196 cpu_relax();
197 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
198
199 /* Mapping vmid to pasid also for IH block */
200 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
201
202 return 0;
203}
204
205static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
206 uint32_t hpd_size, uint64_t hpd_gpu_addr)
207{
208 return 0;
209}
210
211static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
212{
213 struct amdgpu_device *adev = get_amdgpu_device(kgd);
214 uint32_t mec;
215 uint32_t pipe;
216
217 mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
218 pipe = (pipe_id % VI_PIPE_PER_MEC);
219
220 lock_srbm(kgd, mec, pipe, 0, 0);
221
222 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
223
224 unlock_srbm(kgd);
225
226 return 0;
227}
228
229static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
230{
231 return 0;
232}
233
234static inline struct vi_mqd *get_mqd(void *mqd)
235{
236 return (struct vi_mqd *)mqd;
237}
238
239static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
240{
241 return (struct cik_sdma_rlc_registers *)mqd;
242}
243
244static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
245 uint32_t queue_id, uint32_t __user *wptr)
246{
247 struct vi_mqd *m;
248 uint32_t shadow_wptr, valid_wptr;
249 struct amdgpu_device *adev = get_amdgpu_device(kgd);
250
251 m = get_mqd(mqd);
252
253 valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
254 acquire_queue(kgd, pipe_id, queue_id);
255
256 WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
257 WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
258 WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
259
260 WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
261 WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
262 WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
263 WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
264 WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
265 WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
266 WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
267 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
268 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
269 m->cp_hqd_pq_rptr_report_addr_hi);
270
271 if (valid_wptr > 0)
272 WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
273
274 WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
275 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
276
277 WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
278 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
279 WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
280 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
281 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
282 WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
283
284 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
285 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
286 WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
287 WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
288 WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
289 WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
290 WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
291
292 WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
293
294 WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
295 WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
296 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
297 WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
298
299 WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
300
301 release_queue(kgd);
302
303 return 0;
304}
305
306static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
307{
308 return 0;
309}
310
311static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
312 uint32_t pipe_id, uint32_t queue_id)
313{
314 struct amdgpu_device *adev = get_amdgpu_device(kgd);
315 uint32_t act;
316 bool retval = false;
317 uint32_t low, high;
318
319 acquire_queue(kgd, pipe_id, queue_id);
320 act = RREG32(mmCP_HQD_ACTIVE);
321 if (act) {
322 low = lower_32_bits(queue_address >> 8);
323 high = upper_32_bits(queue_address >> 8);
324
325 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
326 high == RREG32(mmCP_HQD_PQ_BASE_HI))
327 retval = true;
328 }
329 release_queue(kgd);
330 return retval;
331}
332
333static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
334{
335 struct amdgpu_device *adev = get_amdgpu_device(kgd);
336 struct cik_sdma_rlc_registers *m;
337 uint32_t sdma_base_addr;
338 uint32_t sdma_rlc_rb_cntl;
339
340 m = get_sdma_mqd(mqd);
341 sdma_base_addr = get_sdma_base_addr(m);
342
343 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
344
345 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
346 return true;
347
348 return false;
349}
350
351static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
352 unsigned int timeout, uint32_t pipe_id,
353 uint32_t queue_id)
354{
355 struct amdgpu_device *adev = get_amdgpu_device(kgd);
356 uint32_t temp;
357
358 acquire_queue(kgd, pipe_id, queue_id);
359
360 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
361
362 while (true) {
363 temp = RREG32(mmCP_HQD_ACTIVE);
364 if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
365 break;
366 if (timeout == 0) {
367 pr_err("kfd: cp queue preemption time out (%dms)\n",
368 temp);
369 release_queue(kgd);
370 return -ETIME;
371 }
372 msleep(20);
373 timeout -= 20;
374 }
375
376 release_queue(kgd);
377 return 0;
378}
379
380static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
381 unsigned int timeout)
382{
383 struct amdgpu_device *adev = get_amdgpu_device(kgd);
384 struct cik_sdma_rlc_registers *m;
385 uint32_t sdma_base_addr;
386 uint32_t temp;
387
388 m = get_sdma_mqd(mqd);
389 sdma_base_addr = get_sdma_base_addr(m);
390
391 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
392 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
393 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
394
395 while (true) {
396 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
397 if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
398 break;
399 if (timeout == 0)
400 return -ETIME;
401 msleep(20);
402 timeout -= 20;
403 }
404
405 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
406 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
407 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
408 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
409
410 return 0;
411}
412
413static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
414 uint8_t vmid)
415{
416 uint32_t reg;
417 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
418
419 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
420 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
421}
422
423static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
424 uint8_t vmid)
425{
426 uint32_t reg;
427 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
428
429 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
430 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
431}
432
433static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
434{
435 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
436
437 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
438}
439
440static int kgd_address_watch_disable(struct kgd_dev *kgd)
441{
442 return 0;
443}
444
445static int kgd_address_watch_execute(struct kgd_dev *kgd,
446 unsigned int watch_point_id,
447 uint32_t cntl_val,
448 uint32_t addr_hi,
449 uint32_t addr_lo)
450{
451 return 0;
452}
453
454static int kgd_wave_control_execute(struct kgd_dev *kgd,
455 uint32_t gfx_index_val,
456 uint32_t sq_cmd)
457{
458 struct amdgpu_device *adev = get_amdgpu_device(kgd);
459 uint32_t data = 0;
460
461 mutex_lock(&adev->grbm_idx_mutex);
462
463 WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
464 WREG32(mmSQ_CMD, sq_cmd);
465
466 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
467 INSTANCE_BROADCAST_WRITES, 1);
468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
469 SH_BROADCAST_WRITES, 1);
470 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
471 SE_BROADCAST_WRITES, 1);
472
473 WREG32(mmGRBM_GFX_INDEX, data);
474 mutex_unlock(&adev->grbm_idx_mutex);
475
476 return 0;
477}
478
479static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
480 unsigned int watch_point_id,
481 unsigned int reg_offset)
482{
483 return 0;
484}
485
486static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
487{
488 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
489 const union amdgpu_firmware_header *hdr;
490
491 BUG_ON(kgd == NULL);
492
493 switch (type) {
494 case KGD_ENGINE_PFP:
495 hdr = (const union amdgpu_firmware_header *)
496 adev->gfx.pfp_fw->data;
497 break;
498
499 case KGD_ENGINE_ME:
500 hdr = (const union amdgpu_firmware_header *)
501 adev->gfx.me_fw->data;
502 break;
503
504 case KGD_ENGINE_CE:
505 hdr = (const union amdgpu_firmware_header *)
506 adev->gfx.ce_fw->data;
507 break;
508
509 case KGD_ENGINE_MEC1:
510 hdr = (const union amdgpu_firmware_header *)
511 adev->gfx.mec_fw->data;
512 break;
513
514 case KGD_ENGINE_MEC2:
515 hdr = (const union amdgpu_firmware_header *)
516 adev->gfx.mec2_fw->data;
517 break;
518
519 case KGD_ENGINE_RLC:
520 hdr = (const union amdgpu_firmware_header *)
521 adev->gfx.rlc_fw->data;
522 break;
523
524 case KGD_ENGINE_SDMA1:
525 hdr = (const union amdgpu_firmware_header *)
526 adev->sdma[0].fw->data;
527 break;
528
529 case KGD_ENGINE_SDMA2:
530 hdr = (const union amdgpu_firmware_header *)
531 adev->sdma[1].fw->data;
532 break;
533
534 default:
535 return 0;
536 }
537
538 if (hdr == NULL)
539 return 0;
540
541 /* Only 12 bit in use*/
542 return hdr->common.ucode_version;
543}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 56da962231fc..115906f5fda0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -44,6 +44,8 @@
44#include "amdgpu.h" 44#include "amdgpu.h"
45#include "amdgpu_irq.h" 45#include "amdgpu_irq.h"
46 46
47#include "amdgpu_amdkfd.h"
48
47/* 49/*
48 * KMS wrapper. 50 * KMS wrapper.
49 * - 3.0.0 - initial driver 51 * - 3.0.0 - initial driver
@@ -527,12 +529,15 @@ static int __init amdgpu_init(void)
527 driver->num_ioctls = amdgpu_max_kms_ioctl; 529 driver->num_ioctls = amdgpu_max_kms_ioctl;
528 amdgpu_register_atpx_handler(); 530 amdgpu_register_atpx_handler();
529 531
532 amdgpu_amdkfd_init();
533
530 /* let modprobe override vga console setting */ 534 /* let modprobe override vga console setting */
531 return drm_pci_init(driver, pdriver); 535 return drm_pci_init(driver, pdriver);
532} 536}
533 537
534static void __exit amdgpu_exit(void) 538static void __exit amdgpu_exit(void)
535{ 539{
540 amdgpu_amdkfd_fini();
536 drm_pci_exit(driver, pdriver); 541 drm_pci_exit(driver, pdriver);
537 amdgpu_unregister_atpx_handler(); 542 amdgpu_unregister_atpx_handler();
538} 543}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index db5422e65ec5..fb44dd2231b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -24,6 +24,7 @@
24#include <drm/drmP.h> 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_ih.h" 26#include "amdgpu_ih.h"
27#include "amdgpu_amdkfd.h"
27 28
28/** 29/**
29 * amdgpu_ih_ring_alloc - allocate memory for the IH ring 30 * amdgpu_ih_ring_alloc - allocate memory for the IH ring
@@ -199,6 +200,12 @@ restart_ih:
199 rmb(); 200 rmb();
200 201
201 while (adev->irq.ih.rptr != wptr) { 202 while (adev->irq.ih.rptr != wptr) {
203 u32 ring_index = adev->irq.ih.rptr >> 2;
204
205 /* Before dispatching irq to IP blocks, send it to amdkfd */
206 amdgpu_amdkfd_interrupt(adev,
207 (const void *) &adev->irq.ih.ring[ring_index]);
208
202 amdgpu_ih_decode_iv(adev, &entry); 209 amdgpu_ih_decode_iv(adev, &entry);
203 adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; 210 adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
204 211
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 5533434c7a8f..8c40a9671b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -34,6 +34,7 @@
34#include <linux/vga_switcheroo.h> 34#include <linux/vga_switcheroo.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/pm_runtime.h> 36#include <linux/pm_runtime.h>
37#include "amdgpu_amdkfd.h"
37 38
38#if defined(CONFIG_VGA_SWITCHEROO) 39#if defined(CONFIG_VGA_SWITCHEROO)
39bool amdgpu_has_atpx(void); 40bool amdgpu_has_atpx(void);
@@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
61 62
62 pm_runtime_get_sync(dev->dev); 63 pm_runtime_get_sync(dev->dev);
63 64
65 amdgpu_amdkfd_device_fini(adev);
66
64 amdgpu_acpi_fini(adev); 67 amdgpu_acpi_fini(adev);
65 68
66 amdgpu_device_fini(adev); 69 amdgpu_device_fini(adev);
@@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
118 "Error during ACPI methods call\n"); 121 "Error during ACPI methods call\n");
119 } 122 }
120 123
124 amdgpu_amdkfd_load_interface(adev);
125 amdgpu_amdkfd_device_probe(adev);
126 amdgpu_amdkfd_device_init(adev);
127
121 if (amdgpu_device_is_px(dev)) { 128 if (amdgpu_device_is_px(dev)) {
122 pm_runtime_use_autosuspend(dev->dev); 129 pm_runtime_use_autosuspend(dev->dev);
123 pm_runtime_set_autosuspend_delay(dev->dev, 5000); 130 pm_runtime_set_autosuspend_delay(dev->dev, 5000);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 341c56681841..b3b66a0d5ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -64,6 +64,8 @@
64#include "oss/oss_2_0_d.h" 64#include "oss/oss_2_0_d.h"
65#include "oss/oss_2_0_sh_mask.h" 65#include "oss/oss_2_0_sh_mask.h"
66 66
67#include "amdgpu_amdkfd.h"
68
67/* 69/*
68 * Indirect registers accessor 70 * Indirect registers accessor
69 */ 71 */
@@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle)
2448{ 2450{
2449 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2451 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2450 2452
2453 amdgpu_amdkfd_suspend(adev);
2454
2451 return cik_common_hw_fini(adev); 2455 return cik_common_hw_fini(adev);
2452} 2456}
2453 2457
2454static int cik_common_resume(void *handle) 2458static int cik_common_resume(void *handle)
2455{ 2459{
2460 int r;
2456 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2461 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2457 2462
2458 return cik_common_hw_init(adev); 2463 r = cik_common_hw_init(adev);
2464 if (r)
2465 return r;
2466
2467 return amdgpu_amdkfd_resume(adev);
2459} 2468}
2460 2469
2461static bool cik_common_is_idle(void *handle) 2470static bool cik_common_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index d19085a97064..a3e3dfaa01a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -552,6 +552,12 @@
552#define VCE_CMD_IB_AUTO 0x00000005 552#define VCE_CMD_IB_AUTO 0x00000005
553#define VCE_CMD_SEMAPHORE 0x00000006 553#define VCE_CMD_SEMAPHORE 0x00000006
554 554
555/* if PTR32, these are the bases for scratch and lds */
556#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
557#define SHARED_BASE(x) ((x) << 16) /* LDS */
558
559#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
560
555/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ 561/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
556enum { 562enum {
557 MTYPE_CACHED = 0, 563 MTYPE_CACHED = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 31bb89452e12..d98aa9d82fa1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -66,6 +66,11 @@
66 66
67#define AMDGPU_NUM_OF_VMIDS 8 67#define AMDGPU_NUM_OF_VMIDS 8
68 68
69#define PIPEID(x) ((x) << 0)
70#define MEID(x) ((x) << 2)
71#define VMID(x) ((x) << 4)
72#define QUEUEID(x) ((x) << 8)
73
69#define RB_BITMAP_WIDTH_PER_SH 2 74#define RB_BITMAP_WIDTH_PER_SH 2
70 75
71#define MC_SEQ_MISC0__MT__MASK 0xf0000000 76#define MC_SEQ_MISC0__MT__MASK 0xf0000000
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8dfac37ff327..e13c67c8d2c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@
4 4
5config HSA_AMD 5config HSA_AMD
6 tristate "HSA kernel driver for AMD GPU devices" 6 tristate "HSA kernel driver for AMD GPU devices"
7 depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64 7 depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
8 help 8 help
9 Enable this if you want to use HSA features on AMD GPU devices. 9 Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 28551153ec6d..7fc9b0f444cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -2,7 +2,8 @@
2# Makefile for Heterogenous System Architecture support for AMD GPU devices 2# Makefile for Heterogenous System Architecture support for AMD GPU devices
3# 3#
4 4
5ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ 5ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ \
6 -Idrivers/gpu/drm/amd/include/asic_reg
6 7
7amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ 8amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
8 kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ 9 kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 183be5b8414f..48769d12dd7b 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -65,17 +65,6 @@
65 65
66#define AQL_ENABLE 1 66#define AQL_ENABLE 1
67 67
68#define SDMA_RB_VMID(x) (x << 24)
69#define SDMA_RB_ENABLE (1 << 0)
70#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
71#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
72#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
73#define SDMA_OFFSET(x) (x << 0)
74#define SDMA_DB_ENABLE (1 << 28)
75#define SDMA_ATC (1 << 0)
76#define SDMA_VA_PTR32 (1 << 4)
77#define SDMA_VA_SHARED_BASE(x) (x << 8)
78
79#define GRBM_GFX_INDEX 0x30800 68#define GRBM_GFX_INDEX 0x30800
80 69
81#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) 70#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 75312c82969f..3f95f7cb4019 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = {
80 { 0x1318, &kaveri_device_info }, /* Kaveri */ 80 { 0x1318, &kaveri_device_info }, /* Kaveri */
81 { 0x131B, &kaveri_device_info }, /* Kaveri */ 81 { 0x131B, &kaveri_device_info }, /* Kaveri */
82 { 0x131C, &kaveri_device_info }, /* Kaveri */ 82 { 0x131C, &kaveri_device_info }, /* Kaveri */
83 { 0x131D, &kaveri_device_info } /* Kaveri */ 83 { 0x131D, &kaveri_device_info }, /* Kaveri */
84 { 0x9870, &carrizo_device_info }, /* Carrizo */
85 { 0x9874, &carrizo_device_info }, /* Carrizo */
86 { 0x9875, &carrizo_device_info }, /* Carrizo */
87 { 0x9876, &carrizo_device_info }, /* Carrizo */
88 { 0x9877, &carrizo_device_info } /* Carrizo */
84}; 89};
85 90
86static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 91static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 9ce8a20a7aff..23ce774ff09d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -23,6 +23,7 @@
23 23
24#include "kfd_device_queue_manager.h" 24#include "kfd_device_queue_manager.h"
25#include "cik_regs.h" 25#include "cik_regs.h"
26#include "oss/oss_2_4_sh_mask.h"
26 27
27static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, 28static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
28 struct qcm_process_device *qpd, 29 struct qcm_process_device *qpd,
@@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm,
135static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 136static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
136 struct qcm_process_device *qpd) 137 struct qcm_process_device *qpd)
137{ 138{
138 uint32_t value = SDMA_ATC; 139 uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
139 140
140 if (q->process->is_32bit_user_mode) 141 if (q->process->is_32bit_user_mode)
141 value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); 142 value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
143 get_sh_mem_bases_32(qpd_to_pdd(qpd));
142 else 144 else
143 value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( 145 value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
144 qpd_to_pdd(qpd))); 146 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &&
147 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
148
145 q->properties.sdma_vm_addr = value; 149 q->properties.sdma_vm_addr = value;
146} 150}
147 151
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 4c15212a3899..44c38e8e54d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -22,6 +22,10 @@
22 */ 22 */
23 23
24#include "kfd_device_queue_manager.h" 24#include "kfd_device_queue_manager.h"
25#include "gca/gfx_8_0_enum.h"
26#include "gca/gfx_8_0_sh_mask.h"
27#include "gca/gfx_8_0_enum.h"
28#include "oss/oss_3_0_sh_mask.h"
25 29
26static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 30static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
27 struct qcm_process_device *qpd, 31 struct qcm_process_device *qpd,
@@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
37 41
38void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) 42void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
39{ 43{
40 pr_warn("amdkfd: VI DQM is not currently supported\n");
41
42 ops->set_cache_memory_policy = set_cache_memory_policy_vi; 44 ops->set_cache_memory_policy = set_cache_memory_policy_vi;
43 ops->register_process = register_process_vi; 45 ops->register_process = register_process_vi;
44 ops->initialize = initialize_cpsch_vi; 46 ops->initialize = initialize_cpsch_vi;
45 ops->init_sdma_vm = init_sdma_vm; 47 ops->init_sdma_vm = init_sdma_vm;
46} 48}
47 49
50static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
51{
52 /* In 64-bit mode, we can only control the top 3 bits of the LDS,
53 * scratch and GPUVM apertures.
54 * The hardware fills in the remaining 59 bits according to the
55 * following pattern:
56 * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
57 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
58 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
59 *
60 * (where X/Y is the configurable nybble with the low-bit 0)
61 *
62 * LDS and scratch will have the same top nybble programmed in the
63 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
64 * GPUVM can have a different top nybble programmed in the
65 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
66 * We don't bother to support different top nybbles
67 * for LDS/Scratch and GPUVM.
68 */
69
70 BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
71 top_address_nybble == 0);
72
73 return top_address_nybble << 12 |
74 (top_address_nybble << 12) <<
75 SH_MEM_BASES__SHARED_BASE__SHIFT;
76}
77
48static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 78static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
49 struct qcm_process_device *qpd, 79 struct qcm_process_device *qpd,
50 enum cache_policy default_policy, 80 enum cache_policy default_policy,
@@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
52 void __user *alternate_aperture_base, 82 void __user *alternate_aperture_base,
53 uint64_t alternate_aperture_size) 83 uint64_t alternate_aperture_size)
54{ 84{
55 return false; 85 uint32_t default_mtype;
86 uint32_t ape1_mtype;
87
88 default_mtype = (default_policy == cache_policy_coherent) ?
89 MTYPE_CC :
90 MTYPE_NC;
91
92 ape1_mtype = (alternate_policy == cache_policy_coherent) ?
93 MTYPE_CC :
94 MTYPE_NC;
95
96 qpd->sh_mem_config = (qpd->sh_mem_config &
97 SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
98 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
99 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
100 default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
101 ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
102 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
103
104 return true;
56} 105}
57 106
58static int register_process_vi(struct device_queue_manager *dqm, 107static int register_process_vi(struct device_queue_manager *dqm,
59 struct qcm_process_device *qpd) 108 struct qcm_process_device *qpd)
60{ 109{
61 return -1; 110 struct kfd_process_device *pdd;
111 unsigned int temp;
112
113 BUG_ON(!dqm || !qpd);
114
115 pdd = qpd_to_pdd(qpd);
116
117 /* check if sh_mem_config register already configured */
118 if (qpd->sh_mem_config == 0) {
119 qpd->sh_mem_config =
120 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
121 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
122 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
123 MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
124 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
125
126 qpd->sh_mem_ape1_limit = 0;
127 qpd->sh_mem_ape1_base = 0;
128 }
129
130 if (qpd->pqm->process->is_32bit_user_mode) {
131 temp = get_sh_mem_bases_32(pdd);
132 qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
133 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
134 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
135 } else {
136 temp = get_sh_mem_bases_nybble_64(pdd);
137 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
138 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
139 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
140 }
141
142 pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
143 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
144
145 return 0;
62} 146}
63 147
64static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 148static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
65 struct qcm_process_device *qpd) 149 struct qcm_process_device *qpd)
66{ 150{
151 uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
152
153 if (q->process->is_32bit_user_mode)
154 value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
155 get_sh_mem_bases_32(qpd_to_pdd(qpd));
156 else
157 value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
158 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &&
159 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
160
161 q->properties.sdma_vm_addr = value;
67} 162}
68 163
69static int initialize_cpsch_vi(struct device_queue_manager *dqm) 164static int initialize_cpsch_vi(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 434979428fc0..d83de985e88c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -27,6 +27,7 @@
27#include "kfd_mqd_manager.h" 27#include "kfd_mqd_manager.h"
28#include "cik_regs.h" 28#include "cik_regs.h"
29#include "cik_structs.h" 29#include "cik_structs.h"
30#include "oss/oss_2_4_sh_mask.h"
30 31
31static inline struct cik_mqd *get_mqd(void *mqd) 32static inline struct cik_mqd *get_mqd(void *mqd)
32{ 33{
@@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
214 BUG_ON(!mm || !mqd || !q); 215 BUG_ON(!mm || !mqd || !q);
215 216
216 m = get_sdma_mqd(mqd); 217 m = get_sdma_mqd(mqd);
217 m->sdma_rlc_rb_cntl = 218 m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
218 SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | 219 SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
219 SDMA_RB_VMID(q->vmid) | 220 q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
220 SDMA_RPTR_WRITEBACK_ENABLE | 221 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
221 SDMA_RPTR_WRITEBACK_TIMER(6); 222 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
222 223
223 m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); 224 m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
224 m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); 225 m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
225 m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 226 m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
226 m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 227 m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
227 m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; 228 m->sdma_rlc_doorbell = q->doorbell_off <<
229 SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
230 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
231
228 m->sdma_rlc_virtual_addr = q->sdma_vm_addr; 232 m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
229 233
230 m->sdma_engine_id = q->sdma_engine_id; 234 m->sdma_engine_id = q->sdma_engine_id;
@@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
234 if (q->queue_size > 0 && 238 if (q->queue_size > 0 &&
235 q->queue_address != 0 && 239 q->queue_address != 0 &&
236 q->queue_percent > 0) { 240 q->queue_percent > 0) {
237 m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; 241 m->sdma_rlc_rb_cntl |=
242 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
243
238 q->is_active = true; 244 q->is_active = true;
239 } 245 }
240 246
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index b3a7e3ba1e38..fa32c32fa1c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -22,12 +22,255 @@
22 */ 22 */
23 23
24#include <linux/printk.h> 24#include <linux/printk.h>
25#include <linux/slab.h>
25#include "kfd_priv.h" 26#include "kfd_priv.h"
26#include "kfd_mqd_manager.h" 27#include "kfd_mqd_manager.h"
28#include "vi_structs.h"
29#include "gca/gfx_8_0_sh_mask.h"
30#include "gca/gfx_8_0_enum.h"
31
32#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
33
34static inline struct vi_mqd *get_mqd(void *mqd)
35{
36 return (struct vi_mqd *)mqd;
37}
38
39static int init_mqd(struct mqd_manager *mm, void **mqd,
40 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
41 struct queue_properties *q)
42{
43 int retval;
44 uint64_t addr;
45 struct vi_mqd *m;
46
47 retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
48 mqd_mem_obj);
49 if (retval != 0)
50 return -ENOMEM;
51
52 m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
53 addr = (*mqd_mem_obj)->gpu_addr;
54
55 memset(m, 0, sizeof(struct vi_mqd));
56
57 m->header = 0xC0310800;
58 m->compute_pipelinestat_enable = 1;
59 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
60 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
61 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
62 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
63
64 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
65 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
66
67 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
68 MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
69
70 m->cp_mqd_base_addr_lo = lower_32_bits(addr);
71 m->cp_mqd_base_addr_hi = upper_32_bits(addr);
72
73 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
74 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
75 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
76
77 m->cp_hqd_pipe_priority = 1;
78 m->cp_hqd_queue_priority = 15;
79
80 m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
81
82 if (q->format == KFD_QUEUE_FORMAT_AQL)
83 m->cp_hqd_iq_rptr = 1;
84
85 *mqd = m;
86 if (gart_addr != NULL)
87 *gart_addr = addr;
88 retval = mm->update_mqd(mm, m, q);
89
90 return retval;
91}
92
93static int load_mqd(struct mqd_manager *mm, void *mqd,
94 uint32_t pipe_id, uint32_t queue_id,
95 uint32_t __user *wptr)
96{
97 return mm->dev->kfd2kgd->hqd_load
98 (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
99}
100
101static int __update_mqd(struct mqd_manager *mm, void *mqd,
102 struct queue_properties *q, unsigned int mtype,
103 unsigned int atc_bit)
104{
105 struct vi_mqd *m;
106
107 BUG_ON(!mm || !q || !mqd);
108
109 pr_debug("kfd: In func %s\n", __func__);
110
111 m = get_mqd(mqd);
112
113 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
114 atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
115 mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
116 m->cp_hqd_pq_control |=
117 ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
118 pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
119
120 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
121 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
122
123 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
124 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
125
126 m->cp_hqd_pq_doorbell_control =
127 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
128 q->doorbell_off <<
129 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
130 pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
131 m->cp_hqd_pq_doorbell_control);
132
133 m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
134 mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
135
136 m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
137 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
138 mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
139
140 m->cp_hqd_eop_control |=
141 ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
142 m->cp_hqd_eop_base_addr_lo =
143 lower_32_bits(q->eop_ring_buffer_address >> 8);
144 m->cp_hqd_eop_base_addr_hi =
145 upper_32_bits(q->eop_ring_buffer_address >> 8);
146
147 m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
148 mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
149
150 m->cp_hqd_vmid = q->vmid;
151
152 if (q->format == KFD_QUEUE_FORMAT_AQL) {
153 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
154 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
155 }
156
157 m->cp_hqd_active = 0;
158 q->is_active = false;
159 if (q->queue_size > 0 &&
160 q->queue_address != 0 &&
161 q->queue_percent > 0) {
162 m->cp_hqd_active = 1;
163 q->is_active = true;
164 }
165
166 return 0;
167}
168
169
170static int update_mqd(struct mqd_manager *mm, void *mqd,
171 struct queue_properties *q)
172{
173 return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
174}
175
176static int destroy_mqd(struct mqd_manager *mm, void *mqd,
177 enum kfd_preempt_type type,
178 unsigned int timeout, uint32_t pipe_id,
179 uint32_t queue_id)
180{
181 return mm->dev->kfd2kgd->hqd_destroy
182 (mm->dev->kgd, type, timeout,
183 pipe_id, queue_id);
184}
185
186static void uninit_mqd(struct mqd_manager *mm, void *mqd,
187 struct kfd_mem_obj *mqd_mem_obj)
188{
189 BUG_ON(!mm || !mqd);
190 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
191}
192
193static bool is_occupied(struct mqd_manager *mm, void *mqd,
194 uint64_t queue_address, uint32_t pipe_id,
195 uint32_t queue_id)
196{
197 return mm->dev->kfd2kgd->hqd_is_occupied(
198 mm->dev->kgd, queue_address,
199 pipe_id, queue_id);
200}
201
202static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
203 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
204 struct queue_properties *q)
205{
206 struct vi_mqd *m;
207 int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
208
209 if (retval != 0)
210 return retval;
211
212 m = get_mqd(*mqd);
213
214 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
215 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
216
217 return retval;
218}
219
220static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
221 struct queue_properties *q)
222{
223 struct vi_mqd *m;
224 int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0);
225
226 if (retval != 0)
227 return retval;
228
229 m = get_mqd(mqd);
230 m->cp_hqd_vmid = q->vmid;
231 return retval;
232}
27 233
28struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 234struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
29 struct kfd_dev *dev) 235 struct kfd_dev *dev)
30{ 236{
31 pr_warn("amdkfd: VI MQD is not currently supported\n"); 237 struct mqd_manager *mqd;
32 return NULL; 238
239 BUG_ON(!dev);
240 BUG_ON(type >= KFD_MQD_TYPE_MAX);
241
242 pr_debug("kfd: In func %s\n", __func__);
243
244 mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
245 if (!mqd)
246 return NULL;
247
248 mqd->dev = dev;
249
250 switch (type) {
251 case KFD_MQD_TYPE_CP:
252 case KFD_MQD_TYPE_COMPUTE:
253 mqd->init_mqd = init_mqd;
254 mqd->uninit_mqd = uninit_mqd;
255 mqd->load_mqd = load_mqd;
256 mqd->update_mqd = update_mqd;
257 mqd->destroy_mqd = destroy_mqd;
258 mqd->is_occupied = is_occupied;
259 break;
260 case KFD_MQD_TYPE_HIQ:
261 mqd->init_mqd = init_mqd_hiq;
262 mqd->uninit_mqd = uninit_mqd;
263 mqd->load_mqd = load_mqd;
264 mqd->update_mqd = update_mqd_hiq;
265 mqd->destroy_mqd = destroy_mqd;
266 mqd->is_occupied = is_occupied;
267 break;
268 case KFD_MQD_TYPE_SDMA:
269 break;
270 default:
271 kfree(mqd);
272 return NULL;
273 }
274
275 return mqd;
33} 276}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 99b6d28a11c3..90f391434fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -27,6 +27,7 @@
27#include "kfd_kernel_queue.h" 27#include "kfd_kernel_queue.h"
28#include "kfd_priv.h" 28#include "kfd_priv.h"
29#include "kfd_pm4_headers.h" 29#include "kfd_pm4_headers.h"
30#include "kfd_pm4_headers_vi.h"
30#include "kfd_pm4_opcodes.h" 31#include "kfd_pm4_opcodes.h"
31 32
32static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, 33static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
@@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
55 bool *over_subscription) 56 bool *over_subscription)
56{ 57{
57 unsigned int process_count, queue_count; 58 unsigned int process_count, queue_count;
59 unsigned int map_queue_size;
58 60
59 BUG_ON(!pm || !rlib_size || !over_subscription); 61 BUG_ON(!pm || !rlib_size || !over_subscription);
60 62
@@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
69 pr_debug("kfd: over subscribed runlist\n"); 71 pr_debug("kfd: over subscribed runlist\n");
70 } 72 }
71 73
74 map_queue_size =
75 (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
76 sizeof(struct pm4_mes_map_queues) :
77 sizeof(struct pm4_map_queues);
72 /* calculate run list ib allocation size */ 78 /* calculate run list ib allocation size */
73 *rlib_size = process_count * sizeof(struct pm4_map_process) + 79 *rlib_size = process_count * sizeof(struct pm4_map_process) +
74 queue_count * sizeof(struct pm4_map_queues); 80 queue_count * map_queue_size;
75 81
76 /* 82 /*
77 * Increase the allocation size in case we need a chained run list 83 * Increase the allocation size in case we need a chained run list
@@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
176 return 0; 182 return 0;
177} 183}
178 184
185static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
186 struct queue *q, bool is_static)
187{
188 struct pm4_mes_map_queues *packet;
189 bool use_static = is_static;
190
191 BUG_ON(!pm || !buffer || !q);
192
193 pr_debug("kfd: In func %s\n", __func__);
194
195 packet = (struct pm4_mes_map_queues *)buffer;
196 memset(buffer, 0, sizeof(struct pm4_map_queues));
197
198 packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
199 sizeof(struct pm4_map_queues));
200 packet->bitfields2.alloc_format =
201 alloc_format__mes_map_queues__one_per_pipe_vi;
202 packet->bitfields2.num_queues = 1;
203 packet->bitfields2.queue_sel =
204 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
205
206 packet->bitfields2.engine_sel =
207 engine_sel__mes_map_queues__compute_vi;
208 packet->bitfields2.queue_type =
209 queue_type__mes_map_queues__normal_compute_vi;
210
211 switch (q->properties.type) {
212 case KFD_QUEUE_TYPE_COMPUTE:
213 if (use_static)
214 packet->bitfields2.queue_type =
215 queue_type__mes_map_queues__normal_latency_static_queue_vi;
216 break;
217 case KFD_QUEUE_TYPE_DIQ:
218 packet->bitfields2.queue_type =
219 queue_type__mes_map_queues__debug_interface_queue_vi;
220 break;
221 case KFD_QUEUE_TYPE_SDMA:
222 packet->bitfields2.engine_sel =
223 engine_sel__mes_map_queues__sdma0_vi;
224 use_static = false; /* no static queues under SDMA */
225 break;
226 default:
227 pr_err("kfd: in %s queue type %d\n", __func__,
228 q->properties.type);
229 BUG();
230 break;
231 }
232 packet->bitfields3.doorbell_offset =
233 q->properties.doorbell_off;
234
235 packet->mqd_addr_lo =
236 lower_32_bits(q->gart_mqd_addr);
237
238 packet->mqd_addr_hi =
239 upper_32_bits(q->gart_mqd_addr);
240
241 packet->wptr_addr_lo =
242 lower_32_bits((uint64_t)q->properties.write_ptr);
243
244 packet->wptr_addr_hi =
245 upper_32_bits((uint64_t)q->properties.write_ptr);
246
247 return 0;
248}
249
179static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, 250static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
180 struct queue *q, bool is_static) 251 struct queue *q, bool is_static)
181{ 252{
@@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
292 pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", 363 pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
293 kq->queue->queue, qpd->is_debug); 364 kq->queue->queue, qpd->is_debug);
294 365
295 retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 366 if (pm->dqm->dev->device_info->asic_family ==
296 kq->queue, qpd->is_debug); 367 CHIP_CARRIZO)
368 retval = pm_create_map_queue_vi(pm,
369 &rl_buffer[rl_wptr],
370 kq->queue,
371 qpd->is_debug);
372 else
373 retval = pm_create_map_queue(pm,
374 &rl_buffer[rl_wptr],
375 kq->queue,
376 qpd->is_debug);
297 if (retval != 0) 377 if (retval != 0)
298 return retval; 378 return retval;
299 379
@@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
309 pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", 389 pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
310 q->queue, qpd->is_debug); 390 q->queue, qpd->is_debug);
311 391
312 retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 392 if (pm->dqm->dev->device_info->asic_family ==
313 q, qpd->is_debug); 393 CHIP_CARRIZO)
394 retval = pm_create_map_queue_vi(pm,
395 &rl_buffer[rl_wptr],
396 q,
397 qpd->is_debug);
398 else
399 retval = pm_create_map_queue(pm,
400 &rl_buffer[rl_wptr],
401 q,
402 qpd->is_debug);
314 403
315 if (retval != 0) 404 if (retval != 0)
316 return retval; 405 return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
new file mode 100644
index 000000000000..08c721922812
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -0,0 +1,398 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef F32_MES_PM4_PACKETS_H
25#define F32_MES_PM4_PACKETS_H
26
27#ifndef PM4_MES_HEADER_DEFINED
28#define PM4_MES_HEADER_DEFINED
29union PM4_MES_TYPE_3_HEADER {
30 struct {
31 uint32_t reserved1 : 8; /* < reserved */
32 uint32_t opcode : 8; /* < IT opcode */
33 uint32_t count : 14;/* < number of DWORDs - 1 in the
34 information body. */
35 uint32_t type : 2; /* < packet identifier.
36 It should be 3 for type 3 packets */
37 };
38 uint32_t u32All;
39};
40#endif /* PM4_MES_HEADER_DEFINED */
41
42/*--------------------MES_SET_RESOURCES--------------------*/
43
44#ifndef PM4_MES_SET_RESOURCES_DEFINED
45#define PM4_MES_SET_RESOURCES_DEFINED
46enum mes_set_resources_queue_type_enum {
47 queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
48 queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
49 queue_type__mes_set_resources__hsa_debug_interface_queue = 4
50};
51
52
53struct pm4_mes_set_resources {
54 union {
55 union PM4_MES_TYPE_3_HEADER header; /* header */
56 uint32_t ordinal1;
57 };
58
59 union {
60 struct {
61 uint32_t vmid_mask:16;
62 uint32_t unmap_latency:8;
63 uint32_t reserved1:5;
64 enum mes_set_resources_queue_type_enum queue_type:3;
65 } bitfields2;
66 uint32_t ordinal2;
67 };
68
69 uint32_t queue_mask_lo;
70 uint32_t queue_mask_hi;
71 uint32_t gws_mask_lo;
72 uint32_t gws_mask_hi;
73
74 union {
75 struct {
76 uint32_t oac_mask:16;
77 uint32_t reserved2:16;
78 } bitfields7;
79 uint32_t ordinal7;
80 };
81
82 union {
83 struct {
84 uint32_t gds_heap_base:6;
85 uint32_t reserved3:5;
86 uint32_t gds_heap_size:6;
87 uint32_t reserved4:15;
88 } bitfields8;
89 uint32_t ordinal8;
90 };
91
92};
93#endif
94
95/*--------------------MES_RUN_LIST--------------------*/
96
97#ifndef PM4_MES_RUN_LIST_DEFINED
98#define PM4_MES_RUN_LIST_DEFINED
99
100struct pm4_mes_runlist {
101 union {
102 union PM4_MES_TYPE_3_HEADER header; /* header */
103 uint32_t ordinal1;
104 };
105
106 union {
107 struct {
108 uint32_t reserved1:2;
109 uint32_t ib_base_lo:30;
110 } bitfields2;
111 uint32_t ordinal2;
112 };
113
114 union {
115 struct {
116 uint32_t ib_base_hi:16;
117 uint32_t reserved2:16;
118 } bitfields3;
119 uint32_t ordinal3;
120 };
121
122 union {
123 struct {
124 uint32_t ib_size:20;
125 uint32_t chain:1;
126 uint32_t offload_polling:1;
127 uint32_t reserved3:1;
128 uint32_t valid:1;
129 uint32_t reserved4:8;
130 } bitfields4;
131 uint32_t ordinal4;
132 };
133
134};
135#endif
136
137/*--------------------MES_MAP_PROCESS--------------------*/
138
139#ifndef PM4_MES_MAP_PROCESS_DEFINED
140#define PM4_MES_MAP_PROCESS_DEFINED
141
142struct pm4_mes_map_process {
143 union {
144 union PM4_MES_TYPE_3_HEADER header; /* header */
145 uint32_t ordinal1;
146 };
147
148 union {
149 struct {
150 uint32_t pasid:16;
151 uint32_t reserved1:8;
152 uint32_t diq_enable:1;
153 uint32_t process_quantum:7;
154 } bitfields2;
155 uint32_t ordinal2;
156};
157
158 union {
159 struct {
160 uint32_t page_table_base:28;
161 uint32_t reserved2:4;
162 } bitfields3;
163 uint32_t ordinal3;
164 };
165
166 uint32_t sh_mem_bases;
167 uint32_t sh_mem_ape1_base;
168 uint32_t sh_mem_ape1_limit;
169 uint32_t sh_mem_config;
170 uint32_t gds_addr_lo;
171 uint32_t gds_addr_hi;
172
173 union {
174 struct {
175 uint32_t num_gws:6;
176 uint32_t reserved3:2;
177 uint32_t num_oac:4;
178 uint32_t reserved4:4;
179 uint32_t gds_size:6;
180 uint32_t num_queues:10;
181 } bitfields10;
182 uint32_t ordinal10;
183 };
184
185};
186#endif
187
188/*--------------------MES_MAP_QUEUES--------------------*/
189
190#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
191#define PM4_MES_MAP_QUEUES_VI_DEFINED
192enum mes_map_queues_queue_sel_vi_enum {
193 queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
194queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
195};
196
197enum mes_map_queues_queue_type_vi_enum {
198 queue_type__mes_map_queues__normal_compute_vi = 0,
199 queue_type__mes_map_queues__debug_interface_queue_vi = 1,
200 queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
201queue_type__mes_map_queues__low_latency_static_queue_vi = 3
202};
203
204enum mes_map_queues_alloc_format_vi_enum {
205 alloc_format__mes_map_queues__one_per_pipe_vi = 0,
206alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
207};
208
209enum mes_map_queues_engine_sel_vi_enum {
210 engine_sel__mes_map_queues__compute_vi = 0,
211 engine_sel__mes_map_queues__sdma0_vi = 2,
212 engine_sel__mes_map_queues__sdma1_vi = 3
213};
214
215
216struct pm4_mes_map_queues {
217 union {
218 union PM4_MES_TYPE_3_HEADER header; /* header */
219 uint32_t ordinal1;
220 };
221
222 union {
223 struct {
224 uint32_t reserved1:4;
225 enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
226 uint32_t reserved2:15;
227 enum mes_map_queues_queue_type_vi_enum queue_type:3;
228 enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
229 enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
230 uint32_t num_queues:3;
231 } bitfields2;
232 uint32_t ordinal2;
233 };
234
235 union {
236 struct {
237 uint32_t reserved3:1;
238 uint32_t check_disable:1;
239 uint32_t doorbell_offset:21;
240 uint32_t reserved4:3;
241 uint32_t queue:6;
242 } bitfields3;
243 uint32_t ordinal3;
244 };
245
246 uint32_t mqd_addr_lo;
247 uint32_t mqd_addr_hi;
248 uint32_t wptr_addr_lo;
249 uint32_t wptr_addr_hi;
250};
251#endif
252
253/*--------------------MES_QUERY_STATUS--------------------*/
254
255#ifndef PM4_MES_QUERY_STATUS_DEFINED
256#define PM4_MES_QUERY_STATUS_DEFINED
257enum mes_query_status_interrupt_sel_enum {
258 interrupt_sel__mes_query_status__completion_status = 0,
259 interrupt_sel__mes_query_status__process_status = 1,
260 interrupt_sel__mes_query_status__queue_status = 2
261};
262
263enum mes_query_status_command_enum {
264 command__mes_query_status__interrupt_only = 0,
265 command__mes_query_status__fence_only_immediate = 1,
266 command__mes_query_status__fence_only_after_write_ack = 2,
267 command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
268};
269
270enum mes_query_status_engine_sel_enum {
271 engine_sel__mes_query_status__compute = 0,
272 engine_sel__mes_query_status__sdma0_queue = 2,
273 engine_sel__mes_query_status__sdma1_queue = 3
274};
275
276struct pm4_mes_query_status {
277 union {
278 union PM4_MES_TYPE_3_HEADER header; /* header */
279 uint32_t ordinal1;
280 };
281
282 union {
283 struct {
284 uint32_t context_id:28;
285 enum mes_query_status_interrupt_sel_enum
286 interrupt_sel:2;
287 enum mes_query_status_command_enum command:2;
288 } bitfields2;
289 uint32_t ordinal2;
290 };
291
292 union {
293 struct {
294 uint32_t pasid:16;
295 uint32_t reserved1:16;
296 } bitfields3a;
297 struct {
298 uint32_t reserved2:2;
299 uint32_t doorbell_offset:21;
300 uint32_t reserved3:2;
301 enum mes_query_status_engine_sel_enum engine_sel:3;
302 uint32_t reserved4:4;
303 } bitfields3b;
304 uint32_t ordinal3;
305 };
306
307 uint32_t addr_lo;
308 uint32_t addr_hi;
309 uint32_t data_lo;
310 uint32_t data_hi;
311};
312#endif
313
314/*--------------------MES_UNMAP_QUEUES--------------------*/
315
316#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
317#define PM4_MES_UNMAP_QUEUES_DEFINED
318enum mes_unmap_queues_action_enum {
319 action__mes_unmap_queues__preempt_queues = 0,
320 action__mes_unmap_queues__reset_queues = 1,
321 action__mes_unmap_queues__disable_process_queues = 2,
322 action__mes_unmap_queues__reserved = 3
323};
324
325enum mes_unmap_queues_queue_sel_enum {
326 queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
327 queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
328 queue_sel__mes_unmap_queues__unmap_all_queues = 2,
329 queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
330};
331
332enum mes_unmap_queues_engine_sel_enum {
333 engine_sel__mes_unmap_queues__compute = 0,
334 engine_sel__mes_unmap_queues__sdma0 = 2,
335 engine_sel__mes_unmap_queues__sdmal = 3
336};
337
338struct PM4_MES_UNMAP_QUEUES {
339 union {
340 union PM4_MES_TYPE_3_HEADER header; /* header */
341 uint32_t ordinal1;
342 };
343
344 union {
345 struct {
346 enum mes_unmap_queues_action_enum action:2;
347 uint32_t reserved1:2;
348 enum mes_unmap_queues_queue_sel_enum queue_sel:2;
349 uint32_t reserved2:20;
350 enum mes_unmap_queues_engine_sel_enum engine_sel:3;
351 uint32_t num_queues:3;
352 } bitfields2;
353 uint32_t ordinal2;
354 };
355
356 union {
357 struct {
358 uint32_t pasid:16;
359 uint32_t reserved3:16;
360 } bitfields3a;
361 struct {
362 uint32_t reserved4:2;
363 uint32_t doorbell_offset0:21;
364 uint32_t reserved5:9;
365 } bitfields3b;
366 uint32_t ordinal3;
367 };
368
369 union {
370 struct {
371 uint32_t reserved6:2;
372 uint32_t doorbell_offset1:21;
373 uint32_t reserved7:9;
374 } bitfields4;
375 uint32_t ordinal4;
376 };
377
378 union {
379 struct {
380 uint32_t reserved8:2;
381 uint32_t doorbell_offset2:21;
382 uint32_t reserved9:9;
383 } bitfields5;
384 uint32_t ordinal5;
385 };
386
387 union {
388 struct {
389 uint32_t reserved10:2;
390 uint32_t doorbell_offset3:21;
391 uint32_t reserved11:9;
392 } bitfields6;
393 uint32_t ordinal6;
394 };
395};
396#endif
397
398#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c25728bc388a..74909e72a009 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
1186 * TODO: Retrieve max engine clock values from KGD 1186 * TODO: Retrieve max engine clock values from KGD
1187 */ 1187 */
1188 1188
1189 if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1190 dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
1191 pr_info("amdkfd: adding doorbell packet type capability\n");
1192 }
1193
1189 res = 0; 1194 res = 0;
1190 1195
1191err: 1196err:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 989624b3cd14..c3ddb9b95ff8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -40,6 +40,7 @@
40#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 40#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
41#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 41#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
42#define HSA_CAP_RESERVED 0xfffff000 42#define HSA_CAP_RESERVED 0xfffff000
43#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
43 44
44struct kfd_node_properties { 45struct kfd_node_properties {
45 uint32_t cpu_cores_count; 46 uint32_t cpu_cores_count;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 9080daa116b6..888250b33ea8 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -52,7 +52,8 @@ enum kgd_engine_type {
52 KGD_ENGINE_MEC1, 52 KGD_ENGINE_MEC1,
53 KGD_ENGINE_MEC2, 53 KGD_ENGINE_MEC2,
54 KGD_ENGINE_RLC, 54 KGD_ENGINE_RLC,
55 KGD_ENGINE_SDMA, 55 KGD_ENGINE_SDMA1,
56 KGD_ENGINE_SDMA2,
56 KGD_ENGINE_MAX 57 KGD_ENGINE_MAX
57}; 58};
58 59
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
new file mode 100644
index 000000000000..65cfacd7a66c
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -0,0 +1,417 @@
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef VI_STRUCTS_H_
25#define VI_STRUCTS_H_
26
27struct vi_sdma_mqd {
28 uint32_t sdmax_rlcx_rb_cntl;
29 uint32_t sdmax_rlcx_rb_base;
30 uint32_t sdmax_rlcx_rb_base_hi;
31 uint32_t sdmax_rlcx_rb_rptr;
32 uint32_t sdmax_rlcx_rb_wptr;
33 uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
34 uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
35 uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
36 uint32_t sdmax_rlcx_rb_rptr_addr_hi;
37 uint32_t sdmax_rlcx_rb_rptr_addr_lo;
38 uint32_t sdmax_rlcx_ib_cntl;
39 uint32_t sdmax_rlcx_ib_rptr;
40 uint32_t sdmax_rlcx_ib_offset;
41 uint32_t sdmax_rlcx_ib_base_lo;
42 uint32_t sdmax_rlcx_ib_base_hi;
43 uint32_t sdmax_rlcx_ib_size;
44 uint32_t sdmax_rlcx_skip_cntl;
45 uint32_t sdmax_rlcx_context_status;
46 uint32_t sdmax_rlcx_doorbell;
47 uint32_t sdmax_rlcx_virtual_addr;
48 uint32_t sdmax_rlcx_ape1_cntl;
49 uint32_t sdmax_rlcx_doorbell_log;
50 uint32_t reserved_22;
51 uint32_t reserved_23;
52 uint32_t reserved_24;
53 uint32_t reserved_25;
54 uint32_t reserved_26;
55 uint32_t reserved_27;
56 uint32_t reserved_28;
57 uint32_t reserved_29;
58 uint32_t reserved_30;
59 uint32_t reserved_31;
60 uint32_t reserved_32;
61 uint32_t reserved_33;
62 uint32_t reserved_34;
63 uint32_t reserved_35;
64 uint32_t reserved_36;
65 uint32_t reserved_37;
66 uint32_t reserved_38;
67 uint32_t reserved_39;
68 uint32_t reserved_40;
69 uint32_t reserved_41;
70 uint32_t reserved_42;
71 uint32_t reserved_43;
72 uint32_t reserved_44;
73 uint32_t reserved_45;
74 uint32_t reserved_46;
75 uint32_t reserved_47;
76 uint32_t reserved_48;
77 uint32_t reserved_49;
78 uint32_t reserved_50;
79 uint32_t reserved_51;
80 uint32_t reserved_52;
81 uint32_t reserved_53;
82 uint32_t reserved_54;
83 uint32_t reserved_55;
84 uint32_t reserved_56;
85 uint32_t reserved_57;
86 uint32_t reserved_58;
87 uint32_t reserved_59;
88 uint32_t reserved_60;
89 uint32_t reserved_61;
90 uint32_t reserved_62;
91 uint32_t reserved_63;
92 uint32_t reserved_64;
93 uint32_t reserved_65;
94 uint32_t reserved_66;
95 uint32_t reserved_67;
96 uint32_t reserved_68;
97 uint32_t reserved_69;
98 uint32_t reserved_70;
99 uint32_t reserved_71;
100 uint32_t reserved_72;
101 uint32_t reserved_73;
102 uint32_t reserved_74;
103 uint32_t reserved_75;
104 uint32_t reserved_76;
105 uint32_t reserved_77;
106 uint32_t reserved_78;
107 uint32_t reserved_79;
108 uint32_t reserved_80;
109 uint32_t reserved_81;
110 uint32_t reserved_82;
111 uint32_t reserved_83;
112 uint32_t reserved_84;
113 uint32_t reserved_85;
114 uint32_t reserved_86;
115 uint32_t reserved_87;
116 uint32_t reserved_88;
117 uint32_t reserved_89;
118 uint32_t reserved_90;
119 uint32_t reserved_91;
120 uint32_t reserved_92;
121 uint32_t reserved_93;
122 uint32_t reserved_94;
123 uint32_t reserved_95;
124 uint32_t reserved_96;
125 uint32_t reserved_97;
126 uint32_t reserved_98;
127 uint32_t reserved_99;
128 uint32_t reserved_100;
129 uint32_t reserved_101;
130 uint32_t reserved_102;
131 uint32_t reserved_103;
132 uint32_t reserved_104;
133 uint32_t reserved_105;
134 uint32_t reserved_106;
135 uint32_t reserved_107;
136 uint32_t reserved_108;
137 uint32_t reserved_109;
138 uint32_t reserved_110;
139 uint32_t reserved_111;
140 uint32_t reserved_112;
141 uint32_t reserved_113;
142 uint32_t reserved_114;
143 uint32_t reserved_115;
144 uint32_t reserved_116;
145 uint32_t reserved_117;
146 uint32_t reserved_118;
147 uint32_t reserved_119;
148 uint32_t reserved_120;
149 uint32_t reserved_121;
150 uint32_t reserved_122;
151 uint32_t reserved_123;
152 uint32_t reserved_124;
153 uint32_t reserved_125;
154 uint32_t reserved_126;
155 uint32_t reserved_127;
156};
157
158struct vi_mqd {
159 uint32_t header;
160 uint32_t compute_dispatch_initiator;
161 uint32_t compute_dim_x;
162 uint32_t compute_dim_y;
163 uint32_t compute_dim_z;
164 uint32_t compute_start_x;
165 uint32_t compute_start_y;
166 uint32_t compute_start_z;
167 uint32_t compute_num_thread_x;
168 uint32_t compute_num_thread_y;
169 uint32_t compute_num_thread_z;
170 uint32_t compute_pipelinestat_enable;
171 uint32_t compute_perfcount_enable;
172 uint32_t compute_pgm_lo;
173 uint32_t compute_pgm_hi;
174 uint32_t compute_tba_lo;
175 uint32_t compute_tba_hi;
176 uint32_t compute_tma_lo;
177 uint32_t compute_tma_hi;
178 uint32_t compute_pgm_rsrc1;
179 uint32_t compute_pgm_rsrc2;
180 uint32_t compute_vmid;
181 uint32_t compute_resource_limits;
182 uint32_t compute_static_thread_mgmt_se0;
183 uint32_t compute_static_thread_mgmt_se1;
184 uint32_t compute_tmpring_size;
185 uint32_t compute_static_thread_mgmt_se2;
186 uint32_t compute_static_thread_mgmt_se3;
187 uint32_t compute_restart_x;
188 uint32_t compute_restart_y;
189 uint32_t compute_restart_z;
190 uint32_t compute_thread_trace_enable;
191 uint32_t compute_misc_reserved;
192 uint32_t compute_dispatch_id;
193 uint32_t compute_threadgroup_id;
194 uint32_t compute_relaunch;
195 uint32_t compute_wave_restore_addr_lo;
196 uint32_t compute_wave_restore_addr_hi;
197 uint32_t compute_wave_restore_control;
198 uint32_t reserved_39;
199 uint32_t reserved_40;
200 uint32_t reserved_41;
201 uint32_t reserved_42;
202 uint32_t reserved_43;
203 uint32_t reserved_44;
204 uint32_t reserved_45;
205 uint32_t reserved_46;
206 uint32_t reserved_47;
207 uint32_t reserved_48;
208 uint32_t reserved_49;
209 uint32_t reserved_50;
210 uint32_t reserved_51;
211 uint32_t reserved_52;
212 uint32_t reserved_53;
213 uint32_t reserved_54;
214 uint32_t reserved_55;
215 uint32_t reserved_56;
216 uint32_t reserved_57;
217 uint32_t reserved_58;
218 uint32_t reserved_59;
219 uint32_t reserved_60;
220 uint32_t reserved_61;
221 uint32_t reserved_62;
222 uint32_t reserved_63;
223 uint32_t reserved_64;
224 uint32_t compute_user_data_0;
225 uint32_t compute_user_data_1;
226 uint32_t compute_user_data_2;
227 uint32_t compute_user_data_3;
228 uint32_t compute_user_data_4;
229 uint32_t compute_user_data_5;
230 uint32_t compute_user_data_6;
231 uint32_t compute_user_data_7;
232 uint32_t compute_user_data_8;
233 uint32_t compute_user_data_9;
234 uint32_t compute_user_data_10;
235 uint32_t compute_user_data_11;
236 uint32_t compute_user_data_12;
237 uint32_t compute_user_data_13;
238 uint32_t compute_user_data_14;
239 uint32_t compute_user_data_15;
240 uint32_t cp_compute_csinvoc_count_lo;
241 uint32_t cp_compute_csinvoc_count_hi;
242 uint32_t reserved_83;
243 uint32_t reserved_84;
244 uint32_t reserved_85;
245 uint32_t cp_mqd_query_time_lo;
246 uint32_t cp_mqd_query_time_hi;
247 uint32_t cp_mqd_connect_start_time_lo;
248 uint32_t cp_mqd_connect_start_time_hi;
249 uint32_t cp_mqd_connect_end_time_lo;
250 uint32_t cp_mqd_connect_end_time_hi;
251 uint32_t cp_mqd_connect_end_wf_count;
252 uint32_t cp_mqd_connect_end_pq_rptr;
253 uint32_t cp_mqd_connect_end_pq_wptr;
254 uint32_t cp_mqd_connect_end_ib_rptr;
255 uint32_t reserved_96;
256 uint32_t reserved_97;
257 uint32_t cp_mqd_save_start_time_lo;
258 uint32_t cp_mqd_save_start_time_hi;
259 uint32_t cp_mqd_save_end_time_lo;
260 uint32_t cp_mqd_save_end_time_hi;
261 uint32_t cp_mqd_restore_start_time_lo;
262 uint32_t cp_mqd_restore_start_time_hi;
263 uint32_t cp_mqd_restore_end_time_lo;
264 uint32_t cp_mqd_restore_end_time_hi;
265 uint32_t reserved_106;
266 uint32_t reserved_107;
267 uint32_t gds_cs_ctxsw_cnt0;
268 uint32_t gds_cs_ctxsw_cnt1;
269 uint32_t gds_cs_ctxsw_cnt2;
270 uint32_t gds_cs_ctxsw_cnt3;
271 uint32_t reserved_112;
272 uint32_t reserved_113;
273 uint32_t cp_pq_exe_status_lo;
274 uint32_t cp_pq_exe_status_hi;
275 uint32_t cp_packet_id_lo;
276 uint32_t cp_packet_id_hi;
277 uint32_t cp_packet_exe_status_lo;
278 uint32_t cp_packet_exe_status_hi;
279 uint32_t gds_save_base_addr_lo;
280 uint32_t gds_save_base_addr_hi;
281 uint32_t gds_save_mask_lo;
282 uint32_t gds_save_mask_hi;
283 uint32_t ctx_save_base_addr_lo;
284 uint32_t ctx_save_base_addr_hi;
285 uint32_t reserved_126;
286 uint32_t reserved_127;
287 uint32_t cp_mqd_base_addr_lo;
288 uint32_t cp_mqd_base_addr_hi;
289 uint32_t cp_hqd_active;
290 uint32_t cp_hqd_vmid;
291 uint32_t cp_hqd_persistent_state;
292 uint32_t cp_hqd_pipe_priority;
293 uint32_t cp_hqd_queue_priority;
294 uint32_t cp_hqd_quantum;
295 uint32_t cp_hqd_pq_base_lo;
296 uint32_t cp_hqd_pq_base_hi;
297 uint32_t cp_hqd_pq_rptr;
298 uint32_t cp_hqd_pq_rptr_report_addr_lo;
299 uint32_t cp_hqd_pq_rptr_report_addr_hi;
300 uint32_t cp_hqd_pq_wptr_poll_addr_lo;
301 uint32_t cp_hqd_pq_wptr_poll_addr_hi;
302 uint32_t cp_hqd_pq_doorbell_control;
303 uint32_t cp_hqd_pq_wptr;
304 uint32_t cp_hqd_pq_control;
305 uint32_t cp_hqd_ib_base_addr_lo;
306 uint32_t cp_hqd_ib_base_addr_hi;
307 uint32_t cp_hqd_ib_rptr;
308 uint32_t cp_hqd_ib_control;
309 uint32_t cp_hqd_iq_timer;
310 uint32_t cp_hqd_iq_rptr;
311 uint32_t cp_hqd_dequeue_request;
312 uint32_t cp_hqd_dma_offload;
313 uint32_t cp_hqd_sema_cmd;
314 uint32_t cp_hqd_msg_type;
315 uint32_t cp_hqd_atomic0_preop_lo;
316 uint32_t cp_hqd_atomic0_preop_hi;
317 uint32_t cp_hqd_atomic1_preop_lo;
318 uint32_t cp_hqd_atomic1_preop_hi;
319 uint32_t cp_hqd_hq_status0;
320 uint32_t cp_hqd_hq_control0;
321 uint32_t cp_mqd_control;
322 uint32_t cp_hqd_hq_status1;
323 uint32_t cp_hqd_hq_control1;
324 uint32_t cp_hqd_eop_base_addr_lo;
325 uint32_t cp_hqd_eop_base_addr_hi;
326 uint32_t cp_hqd_eop_control;
327 uint32_t cp_hqd_eop_rptr;
328 uint32_t cp_hqd_eop_wptr;
329 uint32_t cp_hqd_eop_done_events;
330 uint32_t cp_hqd_ctx_save_base_addr_lo;
331 uint32_t cp_hqd_ctx_save_base_addr_hi;
332 uint32_t cp_hqd_ctx_save_control;
333 uint32_t cp_hqd_cntl_stack_offset;
334 uint32_t cp_hqd_cntl_stack_size;
335 uint32_t cp_hqd_wg_state_offset;
336 uint32_t cp_hqd_ctx_save_size;
337 uint32_t cp_hqd_gds_resource_state;
338 uint32_t cp_hqd_error;
339 uint32_t cp_hqd_eop_wptr_mem;
340 uint32_t cp_hqd_eop_dones;
341 uint32_t reserved_182;
342 uint32_t reserved_183;
343 uint32_t reserved_184;
344 uint32_t reserved_185;
345 uint32_t reserved_186;
346 uint32_t reserved_187;
347 uint32_t reserved_188;
348 uint32_t reserved_189;
349 uint32_t reserved_190;
350 uint32_t reserved_191;
351 uint32_t iqtimer_pkt_header;
352 uint32_t iqtimer_pkt_dw0;
353 uint32_t iqtimer_pkt_dw1;
354 uint32_t iqtimer_pkt_dw2;
355 uint32_t iqtimer_pkt_dw3;
356 uint32_t iqtimer_pkt_dw4;
357 uint32_t iqtimer_pkt_dw5;
358 uint32_t iqtimer_pkt_dw6;
359 uint32_t iqtimer_pkt_dw7;
360 uint32_t iqtimer_pkt_dw8;
361 uint32_t iqtimer_pkt_dw9;
362 uint32_t iqtimer_pkt_dw10;
363 uint32_t iqtimer_pkt_dw11;
364 uint32_t iqtimer_pkt_dw12;
365 uint32_t iqtimer_pkt_dw13;
366 uint32_t iqtimer_pkt_dw14;
367 uint32_t iqtimer_pkt_dw15;
368 uint32_t iqtimer_pkt_dw16;
369 uint32_t iqtimer_pkt_dw17;
370 uint32_t iqtimer_pkt_dw18;
371 uint32_t iqtimer_pkt_dw19;
372 uint32_t iqtimer_pkt_dw20;
373 uint32_t iqtimer_pkt_dw21;
374 uint32_t iqtimer_pkt_dw22;
375 uint32_t iqtimer_pkt_dw23;
376 uint32_t iqtimer_pkt_dw24;
377 uint32_t iqtimer_pkt_dw25;
378 uint32_t iqtimer_pkt_dw26;
379 uint32_t iqtimer_pkt_dw27;
380 uint32_t iqtimer_pkt_dw28;
381 uint32_t iqtimer_pkt_dw29;
382 uint32_t iqtimer_pkt_dw30;
383 uint32_t iqtimer_pkt_dw31;
384 uint32_t reserved_225;
385 uint32_t reserved_226;
386 uint32_t reserved_227;
387 uint32_t set_resources_header;
388 uint32_t set_resources_dw1;
389 uint32_t set_resources_dw2;
390 uint32_t set_resources_dw3;
391 uint32_t set_resources_dw4;
392 uint32_t set_resources_dw5;
393 uint32_t set_resources_dw6;
394 uint32_t set_resources_dw7;
395 uint32_t reserved_236;
396 uint32_t reserved_237;
397 uint32_t reserved_238;
398 uint32_t reserved_239;
399 uint32_t queue_doorbell_id0;
400 uint32_t queue_doorbell_id1;
401 uint32_t queue_doorbell_id2;
402 uint32_t queue_doorbell_id3;
403 uint32_t queue_doorbell_id4;
404 uint32_t queue_doorbell_id5;
405 uint32_t queue_doorbell_id6;
406 uint32_t queue_doorbell_id7;
407 uint32_t queue_doorbell_id8;
408 uint32_t queue_doorbell_id9;
409 uint32_t queue_doorbell_id10;
410 uint32_t queue_doorbell_id11;
411 uint32_t queue_doorbell_id12;
412 uint32_t queue_doorbell_id13;
413 uint32_t queue_doorbell_id14;
414 uint32_t queue_doorbell_id15;
415};
416
417#endif /* VI_STRUCTS_H_ */