aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-07-24 00:26:53 -0400
committerDave Airlie <airlied@redhat.com>2015-07-24 00:26:53 -0400
commit5da612fa42465c6dda745e1b9fb514a014d23b11 (patch)
tree3bee682210dbe7127b84021d8527670aa0a0b071 /drivers/gpu/drm/amd
parent52721d9d3334c1cb1f76219a161084094ec634dc (diff)
parent7639a8c420f04ca9be87974416efb2848b0962d9 (diff)
Merge tag 'drm-amdkfd-next-2015-07-20' of git://people.freedesktop.org/~gabbayo/linux into drm-next
- Add Carrizo support for amdkfd, using the new amdgpu driver as the relevant kgd. The support includes interfaces with amdgpu both for gfx7 (Kaveri) and gfx8 (Carrizo). However, gfx7 interface is used for debugging purposes only, so amdkfd defaults to using radeon when Kaveri is installed. I would like to note that no new IOCTLs are being introduced, and there is no change in the current IOCTLs, as they are suited both for gfx7 and gfx8. * tag 'drm-amdkfd-next-2015-07-20' of git://people.freedesktop.org/~gabbayo/linux: drm/amdkfd: Set correct doorbell packet type for Carrizo drm/amdkfd: Use generic defines in new amd headers drm/amdkfd: Implement create_map_queues() for Carrizo drm/amdkfd: fix runlist length calculation drm/amdkfd: Add support for VI in DQM drm/amdkfd: add support for VI in MQD manager drm/amdkfd: add CP HWS packet headers for VI drm/amdkfd: add supported CZ devices PCI IDs to amdkfd drm/amdkfd: Add dependency of DRM_AMDGPU to Kconfig drm/amdgpu: Add amdgpu <--> amdkfd gfx8 interface drm/amdgpu: add amdgpu <--> amdkfd gfx7 interface drm/amdgpu: Add H/W agnostic amdgpu <--> amdkfd interface drm/radeon: Modify kgd_engine_type enum to match CZ
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c267
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c670
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c543
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c103
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c249
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h398
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h3
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h417
26 files changed, 2886 insertions, 39 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 616dfd4a1398..908360584e4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -71,6 +71,12 @@ amdgpu-y += \
71 amdgpu_vce.o \ 71 amdgpu_vce.o \
72 vce_v3_0.o 72 vce_v3_0.o
73 73
74# add amdkfd interfaces
75amdgpu-y += \
76 amdgpu_amdkfd.o \
77 amdgpu_amdkfd_gfx_v7.o \
78 amdgpu_amdkfd_gfx_v8.o
79
74amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o 80amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
75amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o 81amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
76amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o 82amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 01657830b470..f3791e0d27d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2011,6 +2011,9 @@ struct amdgpu_device {
2011 /* tracking pinned memory */ 2011 /* tracking pinned memory */
2012 u64 vram_pin_size; 2012 u64 vram_pin_size;
2013 u64 gart_pin_size; 2013 u64 gart_pin_size;
2014
2015 /* amdkfd interface */
2016 struct kfd_dev *kfd;
2014}; 2017};
2015 2018
2016bool amdgpu_device_is_px(struct drm_device *dev); 2019bool amdgpu_device_is_px(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
new file mode 100644
index 000000000000..bc763e0c8f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -0,0 +1,267 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "amdgpu_amdkfd.h"
24#include "amdgpu_family.h"
25#include <drm/drmP.h>
26#include "amdgpu.h"
27#include <linux/module.h>
28
29const struct kfd2kgd_calls *kfd2kgd;
30const struct kgd2kfd_calls *kgd2kfd;
31bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
32
33bool amdgpu_amdkfd_init(void)
34{
35#if defined(CONFIG_HSA_AMD_MODULE)
36 bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
37
38 kgd2kfd_init_p = symbol_request(kgd2kfd_init);
39
40 if (kgd2kfd_init_p == NULL)
41 return false;
42#endif
43 return true;
44}
45
46bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
47{
48#if defined(CONFIG_HSA_AMD_MODULE)
49 bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
50#endif
51
52 switch (rdev->asic_type) {
53 case CHIP_KAVERI:
54 kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
55 break;
56 case CHIP_CARRIZO:
57 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
58 break;
59 default:
60 return false;
61 }
62
63#if defined(CONFIG_HSA_AMD_MODULE)
64 kgd2kfd_init_p = symbol_request(kgd2kfd_init);
65
66 if (kgd2kfd_init_p == NULL) {
67 kfd2kgd = NULL;
68 return false;
69 }
70
71 if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
72 symbol_put(kgd2kfd_init);
73 kfd2kgd = NULL;
74 kgd2kfd = NULL;
75
76 return false;
77 }
78
79 return true;
80#elif defined(CONFIG_HSA_AMD)
81 if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
82 kfd2kgd = NULL;
83 kgd2kfd = NULL;
84 return false;
85 }
86
87 return true;
88#else
89 kfd2kgd = NULL;
90 return false;
91#endif
92}
93
94void amdgpu_amdkfd_fini(void)
95{
96 if (kgd2kfd) {
97 kgd2kfd->exit();
98 symbol_put(kgd2kfd_init);
99 }
100}
101
102void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
103{
104 if (kgd2kfd)
105 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
106 rdev->pdev, kfd2kgd);
107}
108
109void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
110{
111 if (rdev->kfd) {
112 struct kgd2kfd_shared_resources gpu_resources = {
113 .compute_vmid_bitmap = 0xFF00,
114
115 .first_compute_pipe = 1,
116 .compute_pipe_count = 4 - 1,
117 };
118
119 amdgpu_doorbell_get_kfd_info(rdev,
120 &gpu_resources.doorbell_physical_address,
121 &gpu_resources.doorbell_aperture_size,
122 &gpu_resources.doorbell_start_offset);
123
124 kgd2kfd->device_init(rdev->kfd, &gpu_resources);
125 }
126}
127
128void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
129{
130 if (rdev->kfd) {
131 kgd2kfd->device_exit(rdev->kfd);
132 rdev->kfd = NULL;
133 }
134}
135
136void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
137 const void *ih_ring_entry)
138{
139 if (rdev->kfd)
140 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
141}
142
143void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
144{
145 if (rdev->kfd)
146 kgd2kfd->suspend(rdev->kfd);
147}
148
149int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
150{
151 int r = 0;
152
153 if (rdev->kfd)
154 r = kgd2kfd->resume(rdev->kfd);
155
156 return r;
157}
158
159u32 pool_to_domain(enum kgd_memory_pool p)
160{
161 switch (p) {
162 case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
163 default: return AMDGPU_GEM_DOMAIN_GTT;
164 }
165}
166
167int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
168 void **mem_obj, uint64_t *gpu_addr,
169 void **cpu_ptr)
170{
171 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
172 struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
173 int r;
174
175 BUG_ON(kgd == NULL);
176 BUG_ON(gpu_addr == NULL);
177 BUG_ON(cpu_ptr == NULL);
178
179 *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
180 if ((*mem) == NULL)
181 return -ENOMEM;
182
183 r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
184 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo);
185 if (r) {
186 dev_err(rdev->dev,
187 "failed to allocate BO for amdkfd (%d)\n", r);
188 return r;
189 }
190
191 /* map the buffer */
192 r = amdgpu_bo_reserve((*mem)->bo, true);
193 if (r) {
194 dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
195 goto allocate_mem_reserve_bo_failed;
196 }
197
198 r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
199 &(*mem)->gpu_addr);
200 if (r) {
201 dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
202 goto allocate_mem_pin_bo_failed;
203 }
204 *gpu_addr = (*mem)->gpu_addr;
205
206 r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
207 if (r) {
208 dev_err(rdev->dev,
209 "(%d) failed to map bo to kernel for amdkfd\n", r);
210 goto allocate_mem_kmap_bo_failed;
211 }
212 *cpu_ptr = (*mem)->cpu_ptr;
213
214 amdgpu_bo_unreserve((*mem)->bo);
215
216 return 0;
217
218allocate_mem_kmap_bo_failed:
219 amdgpu_bo_unpin((*mem)->bo);
220allocate_mem_pin_bo_failed:
221 amdgpu_bo_unreserve((*mem)->bo);
222allocate_mem_reserve_bo_failed:
223 amdgpu_bo_unref(&(*mem)->bo);
224
225 return r;
226}
227
228void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
229{
230 struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
231
232 BUG_ON(mem == NULL);
233
234 amdgpu_bo_reserve(mem->bo, true);
235 amdgpu_bo_kunmap(mem->bo);
236 amdgpu_bo_unpin(mem->bo);
237 amdgpu_bo_unreserve(mem->bo);
238 amdgpu_bo_unref(&(mem->bo));
239 kfree(mem);
240}
241
242uint64_t get_vmem_size(struct kgd_dev *kgd)
243{
244 struct amdgpu_device *rdev =
245 (struct amdgpu_device *)kgd;
246
247 BUG_ON(kgd == NULL);
248
249 return rdev->mc.real_vram_size;
250}
251
252uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
253{
254 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
255
256 if (rdev->asic_funcs->get_gpu_clock_counter)
257 return rdev->asic_funcs->get_gpu_clock_counter(rdev);
258 return 0;
259}
260
261uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
262{
263 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
264
265 /* The sclk is in quantas of 10kHz */
266 return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
267}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
new file mode 100644
index 000000000000..a8be765542e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
24
25#ifndef AMDGPU_AMDKFD_H_INCLUDED
26#define AMDGPU_AMDKFD_H_INCLUDED
27
28#include <linux/types.h>
29#include <kgd_kfd_interface.h>
30
31struct amdgpu_device;
32
33struct kgd_mem {
34 struct amdgpu_bo *bo;
35 uint64_t gpu_addr;
36 void *cpu_ptr;
37};
38
39bool amdgpu_amdkfd_init(void);
40void amdgpu_amdkfd_fini(void);
41
42bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
43
44void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
45int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
46void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
47 const void *ih_ring_entry);
48void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
49void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
50void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
51
52struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
53struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
54
55/* Shared API */
56int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
57 void **mem_obj, uint64_t *gpu_addr,
58 void **cpu_ptr);
59void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
60uint64_t get_vmem_size(struct kgd_dev *kgd);
61uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
62
63uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
64
65#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
new file mode 100644
index 000000000000..2daad335b809
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -0,0 +1,670 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/fdtable.h>
24#include <linux/uaccess.h>
25#include <linux/firmware.h>
26#include <drm/drmP.h>
27#include "amdgpu.h"
28#include "amdgpu_amdkfd.h"
29#include "cikd.h"
30#include "cik_sdma.h"
31#include "amdgpu_ucode.h"
32#include "gca/gfx_7_2_d.h"
33#include "gca/gfx_7_2_enum.h"
34#include "gca/gfx_7_2_sh_mask.h"
35#include "oss/oss_2_0_d.h"
36#include "oss/oss_2_0_sh_mask.h"
37#include "gmc/gmc_7_1_d.h"
38#include "gmc/gmc_7_1_sh_mask.h"
39#include "cik_structs.h"
40
41#define CIK_PIPE_PER_MEC (4)
42
43enum {
44 MAX_TRAPID = 8, /* 3 bits in the bitfield. */
45 MAX_WATCH_ADDRESSES = 4
46};
47
48enum {
49 ADDRESS_WATCH_REG_ADDR_HI = 0,
50 ADDRESS_WATCH_REG_ADDR_LO,
51 ADDRESS_WATCH_REG_CNTL,
52 ADDRESS_WATCH_REG_MAX
53};
54
55/* not defined in the CI/KV reg file */
56enum {
57 ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
58 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
59 ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
60 /* extend the mask to 26 bits to match the low address field */
61 ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
62 ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
63};
64
65static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
66 mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
67 mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
68 mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
69 mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
70};
71
72union TCP_WATCH_CNTL_BITS {
73 struct {
74 uint32_t mask:24;
75 uint32_t vmid:4;
76 uint32_t atc:1;
77 uint32_t mode:2;
78 uint32_t valid:1;
79 } bitfields, bits;
80 uint32_t u32All;
81 signed int i32All;
82 float f32All;
83};
84
85/*
86 * Register access functions
87 */
88
89static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
90 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
91 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
92
93static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
94 unsigned int vmid);
95
96static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
97 uint32_t hpd_size, uint64_t hpd_gpu_addr);
98static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
99static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
100 uint32_t queue_id, uint32_t __user *wptr);
101static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
102static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
103 uint32_t pipe_id, uint32_t queue_id);
104
105static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
106 unsigned int timeout, uint32_t pipe_id,
107 uint32_t queue_id);
108static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
109static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
110 unsigned int timeout);
111static int kgd_address_watch_disable(struct kgd_dev *kgd);
112static int kgd_address_watch_execute(struct kgd_dev *kgd,
113 unsigned int watch_point_id,
114 uint32_t cntl_val,
115 uint32_t addr_hi,
116 uint32_t addr_lo);
117static int kgd_wave_control_execute(struct kgd_dev *kgd,
118 uint32_t gfx_index_val,
119 uint32_t sq_cmd);
120static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
121 unsigned int watch_point_id,
122 unsigned int reg_offset);
123
124static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
125static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
126 uint8_t vmid);
127static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
128
129static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
130
131static const struct kfd2kgd_calls kfd2kgd = {
132 .init_gtt_mem_allocation = alloc_gtt_mem,
133 .free_gtt_mem = free_gtt_mem,
134 .get_vmem_size = get_vmem_size,
135 .get_gpu_clock_counter = get_gpu_clock_counter,
136 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
137 .program_sh_mem_settings = kgd_program_sh_mem_settings,
138 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
139 .init_pipeline = kgd_init_pipeline,
140 .init_interrupts = kgd_init_interrupts,
141 .hqd_load = kgd_hqd_load,
142 .hqd_sdma_load = kgd_hqd_sdma_load,
143 .hqd_is_occupied = kgd_hqd_is_occupied,
144 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
145 .hqd_destroy = kgd_hqd_destroy,
146 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
147 .address_watch_disable = kgd_address_watch_disable,
148 .address_watch_execute = kgd_address_watch_execute,
149 .wave_control_execute = kgd_wave_control_execute,
150 .address_watch_get_offset = kgd_address_watch_get_offset,
151 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
152 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
153 .write_vmid_invalidate_request = write_vmid_invalidate_request,
154 .get_fw_version = get_fw_version
155};
156
157struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
158{
159 return (struct kfd2kgd_calls *)&kfd2kgd;
160}
161
162static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
163{
164 return (struct amdgpu_device *)kgd;
165}
166
167static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
168 uint32_t queue, uint32_t vmid)
169{
170 struct amdgpu_device *adev = get_amdgpu_device(kgd);
171 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
172
173 mutex_lock(&adev->srbm_mutex);
174 WREG32(mmSRBM_GFX_CNTL, value);
175}
176
177static void unlock_srbm(struct kgd_dev *kgd)
178{
179 struct amdgpu_device *adev = get_amdgpu_device(kgd);
180
181 WREG32(mmSRBM_GFX_CNTL, 0);
182 mutex_unlock(&adev->srbm_mutex);
183}
184
185static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
186 uint32_t queue_id)
187{
188 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
189 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
190
191 lock_srbm(kgd, mec, pipe, queue_id, 0);
192}
193
194static void release_queue(struct kgd_dev *kgd)
195{
196 unlock_srbm(kgd);
197}
198
199static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
200 uint32_t sh_mem_config,
201 uint32_t sh_mem_ape1_base,
202 uint32_t sh_mem_ape1_limit,
203 uint32_t sh_mem_bases)
204{
205 struct amdgpu_device *adev = get_amdgpu_device(kgd);
206
207 lock_srbm(kgd, 0, 0, 0, vmid);
208
209 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
210 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
211 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
212 WREG32(mmSH_MEM_BASES, sh_mem_bases);
213
214 unlock_srbm(kgd);
215}
216
217static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
218 unsigned int vmid)
219{
220 struct amdgpu_device *adev = get_amdgpu_device(kgd);
221
222 /*
223 * We have to assume that there is no outstanding mapping.
224 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
225 * a mapping is in progress or because a mapping finished and the
226 * SW cleared it. So the protocol is to always wait & clear.
227 */
228 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
229 ATC_VMID0_PASID_MAPPING__VALID_MASK;
230
231 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
232
233 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
234 cpu_relax();
235 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
236
237 /* Mapping vmid to pasid also for IH block */
238 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
239
240 return 0;
241}
242
243static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
244 uint32_t hpd_size, uint64_t hpd_gpu_addr)
245{
246 struct amdgpu_device *adev = get_amdgpu_device(kgd);
247
248 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
249 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
250
251 lock_srbm(kgd, mec, pipe, 0, 0);
252 WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
253 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
254 WREG32(mmCP_HPD_EOP_VMID, 0);
255 WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
256 unlock_srbm(kgd);
257
258 return 0;
259}
260
261static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
262{
263 struct amdgpu_device *adev = get_amdgpu_device(kgd);
264 uint32_t mec;
265 uint32_t pipe;
266
267 mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
268 pipe = (pipe_id % CIK_PIPE_PER_MEC);
269
270 lock_srbm(kgd, mec, pipe, 0, 0);
271
272 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
273 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
274
275 unlock_srbm(kgd);
276
277 return 0;
278}
279
280static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
281{
282 uint32_t retval;
283
284 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
285 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
286
287 pr_debug("kfd: sdma base address: 0x%x\n", retval);
288
289 return retval;
290}
291
292static inline struct cik_mqd *get_mqd(void *mqd)
293{
294 return (struct cik_mqd *)mqd;
295}
296
297static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
298{
299 return (struct cik_sdma_rlc_registers *)mqd;
300}
301
302static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
303 uint32_t queue_id, uint32_t __user *wptr)
304{
305 struct amdgpu_device *adev = get_amdgpu_device(kgd);
306 uint32_t wptr_shadow, is_wptr_shadow_valid;
307 struct cik_mqd *m;
308
309 m = get_mqd(mqd);
310
311 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
312
313 acquire_queue(kgd, pipe_id, queue_id);
314 WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
315 WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
316 WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
317
318 WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
319 WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
320 WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
321
322 WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
323 WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
324 WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
325
326 WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
327
328 WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
329 WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
330 WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
331
332 WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
333 WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
334 WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
335 WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
336
337 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
338 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
339 m->cp_hqd_pq_rptr_report_addr_hi);
340
341 WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
342
343 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
344 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
345
346 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
347
348 WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
349
350 WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
351
352 WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
353 WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
354
355 WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
356
357 if (is_wptr_shadow_valid)
358 WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
359
360 WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
361 release_queue(kgd);
362
363 return 0;
364}
365
366static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
367{
368 struct amdgpu_device *adev = get_amdgpu_device(kgd);
369 struct cik_sdma_rlc_registers *m;
370 uint32_t sdma_base_addr;
371
372 m = get_sdma_mqd(mqd);
373 sdma_base_addr = get_sdma_base_addr(m);
374
375 WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
376 m->sdma_rlc_virtual_addr);
377
378 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
379 m->sdma_rlc_rb_base);
380
381 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
382 m->sdma_rlc_rb_base_hi);
383
384 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
385 m->sdma_rlc_rb_rptr_addr_lo);
386
387 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
388 m->sdma_rlc_rb_rptr_addr_hi);
389
390 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
391 m->sdma_rlc_doorbell);
392
393 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
394 m->sdma_rlc_rb_cntl);
395
396 return 0;
397}
398
399static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
400 uint32_t pipe_id, uint32_t queue_id)
401{
402 struct amdgpu_device *adev = get_amdgpu_device(kgd);
403 uint32_t act;
404 bool retval = false;
405 uint32_t low, high;
406
407 acquire_queue(kgd, pipe_id, queue_id);
408 act = RREG32(mmCP_HQD_ACTIVE);
409 if (act) {
410 low = lower_32_bits(queue_address >> 8);
411 high = upper_32_bits(queue_address >> 8);
412
413 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
414 high == RREG32(mmCP_HQD_PQ_BASE_HI))
415 retval = true;
416 }
417 release_queue(kgd);
418 return retval;
419}
420
421static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
422{
423 struct amdgpu_device *adev = get_amdgpu_device(kgd);
424 struct cik_sdma_rlc_registers *m;
425 uint32_t sdma_base_addr;
426 uint32_t sdma_rlc_rb_cntl;
427
428 m = get_sdma_mqd(mqd);
429 sdma_base_addr = get_sdma_base_addr(m);
430
431 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
432
433 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
434 return true;
435
436 return false;
437}
438
439static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
440 unsigned int timeout, uint32_t pipe_id,
441 uint32_t queue_id)
442{
443 struct amdgpu_device *adev = get_amdgpu_device(kgd);
444 uint32_t temp;
445
446 acquire_queue(kgd, pipe_id, queue_id);
447 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
448
449 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
450
451 while (true) {
452 temp = RREG32(mmCP_HQD_ACTIVE);
453 if (temp & CP_HQD_ACTIVE__ACTIVE__SHIFT)
454 break;
455 if (timeout == 0) {
456 pr_err("kfd: cp queue preemption time out (%dms)\n",
457 temp);
458 release_queue(kgd);
459 return -ETIME;
460 }
461 msleep(20);
462 timeout -= 20;
463 }
464
465 release_queue(kgd);
466 return 0;
467}
468
469static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
470 unsigned int timeout)
471{
472 struct amdgpu_device *adev = get_amdgpu_device(kgd);
473 struct cik_sdma_rlc_registers *m;
474 uint32_t sdma_base_addr;
475 uint32_t temp;
476
477 m = get_sdma_mqd(mqd);
478 sdma_base_addr = get_sdma_base_addr(m);
479
480 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
481 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
482 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
483
484 while (true) {
485 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
486 if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
487 break;
488 if (timeout == 0)
489 return -ETIME;
490 msleep(20);
491 timeout -= 20;
492 }
493
494 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
495 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
496 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
497 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
498
499 return 0;
500}
501
502static int kgd_address_watch_disable(struct kgd_dev *kgd)
503{
504 struct amdgpu_device *adev = get_amdgpu_device(kgd);
505 union TCP_WATCH_CNTL_BITS cntl;
506 unsigned int i;
507
508 cntl.u32All = 0;
509
510 cntl.bitfields.valid = 0;
511 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
512 cntl.bitfields.atc = 1;
513
514 /* Turning off this address until we set all the registers */
515 for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
516 WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
517 ADDRESS_WATCH_REG_CNTL], cntl.u32All);
518
519 return 0;
520}
521
522static int kgd_address_watch_execute(struct kgd_dev *kgd,
523 unsigned int watch_point_id,
524 uint32_t cntl_val,
525 uint32_t addr_hi,
526 uint32_t addr_lo)
527{
528 struct amdgpu_device *adev = get_amdgpu_device(kgd);
529 union TCP_WATCH_CNTL_BITS cntl;
530
531 cntl.u32All = cntl_val;
532
533 /* Turning off this watch point until we set all the registers */
534 cntl.bitfields.valid = 0;
535 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
536 ADDRESS_WATCH_REG_CNTL], cntl.u32All);
537
538 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
539 ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
540
541 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
542 ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
543
544 /* Enable the watch point */
545 cntl.bitfields.valid = 1;
546
547 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
548 ADDRESS_WATCH_REG_CNTL], cntl.u32All);
549
550 return 0;
551}
552
553static int kgd_wave_control_execute(struct kgd_dev *kgd,
554 uint32_t gfx_index_val,
555 uint32_t sq_cmd)
556{
557 struct amdgpu_device *adev = get_amdgpu_device(kgd);
558 uint32_t data;
559
560 mutex_lock(&adev->grbm_idx_mutex);
561
562 WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
563 WREG32(mmSQ_CMD, sq_cmd);
564
565 /* Restore the GRBM_GFX_INDEX register */
566
567 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
568 GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
569 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
570
571 WREG32(mmGRBM_GFX_INDEX, data);
572
573 mutex_unlock(&adev->grbm_idx_mutex);
574
575 return 0;
576}
577
578static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
579 unsigned int watch_point_id,
580 unsigned int reg_offset)
581{
582 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
583}
584
585static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
586 uint8_t vmid)
587{
588 uint32_t reg;
589 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
590
591 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
592 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
593}
594
595static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
596 uint8_t vmid)
597{
598 uint32_t reg;
599 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
600
601 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
602 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
603}
604
605static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
606{
607 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
608
609 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
610}
611
612static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
613{
614 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
615 const union amdgpu_firmware_header *hdr;
616
617 BUG_ON(kgd == NULL);
618
619 switch (type) {
620 case KGD_ENGINE_PFP:
621 hdr = (const union amdgpu_firmware_header *)
622 adev->gfx.pfp_fw->data;
623 break;
624
625 case KGD_ENGINE_ME:
626 hdr = (const union amdgpu_firmware_header *)
627 adev->gfx.me_fw->data;
628 break;
629
630 case KGD_ENGINE_CE:
631 hdr = (const union amdgpu_firmware_header *)
632 adev->gfx.ce_fw->data;
633 break;
634
635 case KGD_ENGINE_MEC1:
636 hdr = (const union amdgpu_firmware_header *)
637 adev->gfx.mec_fw->data;
638 break;
639
640 case KGD_ENGINE_MEC2:
641 hdr = (const union amdgpu_firmware_header *)
642 adev->gfx.mec2_fw->data;
643 break;
644
645 case KGD_ENGINE_RLC:
646 hdr = (const union amdgpu_firmware_header *)
647 adev->gfx.rlc_fw->data;
648 break;
649
650 case KGD_ENGINE_SDMA1:
651 hdr = (const union amdgpu_firmware_header *)
652 adev->sdma[0].fw->data;
653 break;
654
655 case KGD_ENGINE_SDMA2:
656 hdr = (const union amdgpu_firmware_header *)
657 adev->sdma[1].fw->data;
658 break;
659
660 default:
661 return 0;
662 }
663
664 if (hdr == NULL)
665 return 0;
666
667 /* Only 12 bit in use*/
668 return hdr->common.ucode_version;
669}
670
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
new file mode 100644
index 000000000000..dfd1d503bccf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -0,0 +1,543 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/module.h>
24#include <linux/fdtable.h>
25#include <linux/uaccess.h>
26#include <linux/firmware.h>
27#include <drm/drmP.h>
28#include "amdgpu.h"
29#include "amdgpu_amdkfd.h"
30#include "amdgpu_ucode.h"
31#include "gca/gfx_8_0_sh_mask.h"
32#include "gca/gfx_8_0_d.h"
33#include "gca/gfx_8_0_enum.h"
34#include "oss/oss_3_0_sh_mask.h"
35#include "oss/oss_3_0_d.h"
36#include "gmc/gmc_8_1_sh_mask.h"
37#include "gmc/gmc_8_1_d.h"
38#include "vi_structs.h"
39#include "vid.h"
40
41#define VI_PIPE_PER_MEC (4)
42
43struct cik_sdma_rlc_registers;
44
45/*
46 * Register access functions
47 */
48
49static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
50 uint32_t sh_mem_config,
51 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
52 uint32_t sh_mem_bases);
53static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
54 unsigned int vmid);
55static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
56 uint32_t hpd_size, uint64_t hpd_gpu_addr);
57static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
58static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
59 uint32_t queue_id, uint32_t __user *wptr);
60static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
61static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
62 uint32_t pipe_id, uint32_t queue_id);
63static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
64static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
65 unsigned int timeout, uint32_t pipe_id,
66 uint32_t queue_id);
67static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
68 unsigned int timeout);
69static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
70static int kgd_address_watch_disable(struct kgd_dev *kgd);
71static int kgd_address_watch_execute(struct kgd_dev *kgd,
72 unsigned int watch_point_id,
73 uint32_t cntl_val,
74 uint32_t addr_hi,
75 uint32_t addr_lo);
76static int kgd_wave_control_execute(struct kgd_dev *kgd,
77 uint32_t gfx_index_val,
78 uint32_t sq_cmd);
79static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
80 unsigned int watch_point_id,
81 unsigned int reg_offset);
82
83static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
84 uint8_t vmid);
85static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
86 uint8_t vmid);
87static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
88static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
89
90static const struct kfd2kgd_calls kfd2kgd = {
91 .init_gtt_mem_allocation = alloc_gtt_mem,
92 .free_gtt_mem = free_gtt_mem,
93 .get_vmem_size = get_vmem_size,
94 .get_gpu_clock_counter = get_gpu_clock_counter,
95 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
96 .program_sh_mem_settings = kgd_program_sh_mem_settings,
97 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
98 .init_pipeline = kgd_init_pipeline,
99 .init_interrupts = kgd_init_interrupts,
100 .hqd_load = kgd_hqd_load,
101 .hqd_sdma_load = kgd_hqd_sdma_load,
102 .hqd_is_occupied = kgd_hqd_is_occupied,
103 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
104 .hqd_destroy = kgd_hqd_destroy,
105 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
106 .address_watch_disable = kgd_address_watch_disable,
107 .address_watch_execute = kgd_address_watch_execute,
108 .wave_control_execute = kgd_wave_control_execute,
109 .address_watch_get_offset = kgd_address_watch_get_offset,
110 .get_atc_vmid_pasid_mapping_pasid =
111 get_atc_vmid_pasid_mapping_pasid,
112 .get_atc_vmid_pasid_mapping_valid =
113 get_atc_vmid_pasid_mapping_valid,
114 .write_vmid_invalidate_request = write_vmid_invalidate_request,
115 .get_fw_version = get_fw_version
116};
117
118struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
119{
120 return (struct kfd2kgd_calls *)&kfd2kgd;
121}
122
123static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
124{
125 return (struct amdgpu_device *)kgd;
126}
127
128static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
129 uint32_t queue, uint32_t vmid)
130{
131 struct amdgpu_device *adev = get_amdgpu_device(kgd);
132 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
133
134 mutex_lock(&adev->srbm_mutex);
135 WREG32(mmSRBM_GFX_CNTL, value);
136}
137
138static void unlock_srbm(struct kgd_dev *kgd)
139{
140 struct amdgpu_device *adev = get_amdgpu_device(kgd);
141
142 WREG32(mmSRBM_GFX_CNTL, 0);
143 mutex_unlock(&adev->srbm_mutex);
144}
145
146static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
147 uint32_t queue_id)
148{
149 uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
150 uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
151
152 lock_srbm(kgd, mec, pipe, queue_id, 0);
153}
154
155static void release_queue(struct kgd_dev *kgd)
156{
157 unlock_srbm(kgd);
158}
159
160static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
161 uint32_t sh_mem_config,
162 uint32_t sh_mem_ape1_base,
163 uint32_t sh_mem_ape1_limit,
164 uint32_t sh_mem_bases)
165{
166 struct amdgpu_device *adev = get_amdgpu_device(kgd);
167
168 lock_srbm(kgd, 0, 0, 0, vmid);
169
170 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
171 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
172 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
173 WREG32(mmSH_MEM_BASES, sh_mem_bases);
174
175 unlock_srbm(kgd);
176}
177
178static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
179 unsigned int vmid)
180{
181 struct amdgpu_device *adev = get_amdgpu_device(kgd);
182
183 /*
184 * We have to assume that there is no outstanding mapping.
185 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
186 * a mapping is in progress or because a mapping finished
187 * and the SW cleared it.
188 * So the protocol is to always wait & clear.
189 */
190 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
191 ATC_VMID0_PASID_MAPPING__VALID_MASK;
192
193 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
194
195 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
196 cpu_relax();
197 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
198
199 /* Mapping vmid to pasid also for IH block */
200 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
201
202 return 0;
203}
204
205static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
206 uint32_t hpd_size, uint64_t hpd_gpu_addr)
207{
208 return 0;
209}
210
211static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
212{
213 struct amdgpu_device *adev = get_amdgpu_device(kgd);
214 uint32_t mec;
215 uint32_t pipe;
216
217 mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
218 pipe = (pipe_id % VI_PIPE_PER_MEC);
219
220 lock_srbm(kgd, mec, pipe, 0, 0);
221
222 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
223
224 unlock_srbm(kgd);
225
226 return 0;
227}
228
229static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
230{
231 return 0;
232}
233
234static inline struct vi_mqd *get_mqd(void *mqd)
235{
236 return (struct vi_mqd *)mqd;
237}
238
239static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
240{
241 return (struct cik_sdma_rlc_registers *)mqd;
242}
243
244static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
245 uint32_t queue_id, uint32_t __user *wptr)
246{
247 struct vi_mqd *m;
248 uint32_t shadow_wptr, valid_wptr;
249 struct amdgpu_device *adev = get_amdgpu_device(kgd);
250
251 m = get_mqd(mqd);
252
253 valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
254 acquire_queue(kgd, pipe_id, queue_id);
255
256 WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
257 WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
258 WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
259
260 WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
261 WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
262 WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
263 WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
264 WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
265 WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
266 WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
267 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
268 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
269 m->cp_hqd_pq_rptr_report_addr_hi);
270
271 if (valid_wptr > 0)
272 WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
273
274 WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
275 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
276
277 WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
278 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
279 WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
280 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
281 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
282 WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
283
284 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
285 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
286 WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
287 WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
288 WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
289 WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
290 WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
291
292 WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
293
294 WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
295 WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
296 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
297 WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
298
299 WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
300
301 release_queue(kgd);
302
303 return 0;
304}
305
306static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
307{
308 return 0;
309}
310
311static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
312 uint32_t pipe_id, uint32_t queue_id)
313{
314 struct amdgpu_device *adev = get_amdgpu_device(kgd);
315 uint32_t act;
316 bool retval = false;
317 uint32_t low, high;
318
319 acquire_queue(kgd, pipe_id, queue_id);
320 act = RREG32(mmCP_HQD_ACTIVE);
321 if (act) {
322 low = lower_32_bits(queue_address >> 8);
323 high = upper_32_bits(queue_address >> 8);
324
325 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
326 high == RREG32(mmCP_HQD_PQ_BASE_HI))
327 retval = true;
328 }
329 release_queue(kgd);
330 return retval;
331}
332
333static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
334{
335 struct amdgpu_device *adev = get_amdgpu_device(kgd);
336 struct cik_sdma_rlc_registers *m;
337 uint32_t sdma_base_addr;
338 uint32_t sdma_rlc_rb_cntl;
339
340 m = get_sdma_mqd(mqd);
341 sdma_base_addr = get_sdma_base_addr(m);
342
343 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
344
345 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
346 return true;
347
348 return false;
349}
350
351static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
352 unsigned int timeout, uint32_t pipe_id,
353 uint32_t queue_id)
354{
355 struct amdgpu_device *adev = get_amdgpu_device(kgd);
356 uint32_t temp;
357
358 acquire_queue(kgd, pipe_id, queue_id);
359
360 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
361
362 while (true) {
363 temp = RREG32(mmCP_HQD_ACTIVE);
364 if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
365 break;
366 if (timeout == 0) {
367 pr_err("kfd: cp queue preemption time out (%dms)\n",
368 temp);
369 release_queue(kgd);
370 return -ETIME;
371 }
372 msleep(20);
373 timeout -= 20;
374 }
375
376 release_queue(kgd);
377 return 0;
378}
379
380static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
381 unsigned int timeout)
382{
383 struct amdgpu_device *adev = get_amdgpu_device(kgd);
384 struct cik_sdma_rlc_registers *m;
385 uint32_t sdma_base_addr;
386 uint32_t temp;
387
388 m = get_sdma_mqd(mqd);
389 sdma_base_addr = get_sdma_base_addr(m);
390
391 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
392 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
393 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
394
395 while (true) {
396 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
397 if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
398 break;
399 if (timeout == 0)
400 return -ETIME;
401 msleep(20);
402 timeout -= 20;
403 }
404
405 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
406 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
407 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
408 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
409
410 return 0;
411}
412
413static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
414 uint8_t vmid)
415{
416 uint32_t reg;
417 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
418
419 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
420 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
421}
422
423static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
424 uint8_t vmid)
425{
426 uint32_t reg;
427 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
428
429 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
430 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
431}
432
433static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
434{
435 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
436
437 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
438}
439
440static int kgd_address_watch_disable(struct kgd_dev *kgd)
441{
442 return 0;
443}
444
445static int kgd_address_watch_execute(struct kgd_dev *kgd,
446 unsigned int watch_point_id,
447 uint32_t cntl_val,
448 uint32_t addr_hi,
449 uint32_t addr_lo)
450{
451 return 0;
452}
453
454static int kgd_wave_control_execute(struct kgd_dev *kgd,
455 uint32_t gfx_index_val,
456 uint32_t sq_cmd)
457{
458 struct amdgpu_device *adev = get_amdgpu_device(kgd);
459 uint32_t data = 0;
460
461 mutex_lock(&adev->grbm_idx_mutex);
462
463 WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
464 WREG32(mmSQ_CMD, sq_cmd);
465
466 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
467 INSTANCE_BROADCAST_WRITES, 1);
468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
469 SH_BROADCAST_WRITES, 1);
470 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
471 SE_BROADCAST_WRITES, 1);
472
473 WREG32(mmGRBM_GFX_INDEX, data);
474 mutex_unlock(&adev->grbm_idx_mutex);
475
476 return 0;
477}
478
479static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
480 unsigned int watch_point_id,
481 unsigned int reg_offset)
482{
483 return 0;
484}
485
486static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
487{
488 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
489 const union amdgpu_firmware_header *hdr;
490
491 BUG_ON(kgd == NULL);
492
493 switch (type) {
494 case KGD_ENGINE_PFP:
495 hdr = (const union amdgpu_firmware_header *)
496 adev->gfx.pfp_fw->data;
497 break;
498
499 case KGD_ENGINE_ME:
500 hdr = (const union amdgpu_firmware_header *)
501 adev->gfx.me_fw->data;
502 break;
503
504 case KGD_ENGINE_CE:
505 hdr = (const union amdgpu_firmware_header *)
506 adev->gfx.ce_fw->data;
507 break;
508
509 case KGD_ENGINE_MEC1:
510 hdr = (const union amdgpu_firmware_header *)
511 adev->gfx.mec_fw->data;
512 break;
513
514 case KGD_ENGINE_MEC2:
515 hdr = (const union amdgpu_firmware_header *)
516 adev->gfx.mec2_fw->data;
517 break;
518
519 case KGD_ENGINE_RLC:
520 hdr = (const union amdgpu_firmware_header *)
521 adev->gfx.rlc_fw->data;
522 break;
523
524 case KGD_ENGINE_SDMA1:
525 hdr = (const union amdgpu_firmware_header *)
526 adev->sdma[0].fw->data;
527 break;
528
529 case KGD_ENGINE_SDMA2:
530 hdr = (const union amdgpu_firmware_header *)
531 adev->sdma[1].fw->data;
532 break;
533
534 default:
535 return 0;
536 }
537
538 if (hdr == NULL)
539 return 0;
540
541 /* Only 12 bit in use*/
542 return hdr->common.ucode_version;
543}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 56da962231fc..115906f5fda0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -44,6 +44,8 @@
44#include "amdgpu.h" 44#include "amdgpu.h"
45#include "amdgpu_irq.h" 45#include "amdgpu_irq.h"
46 46
47#include "amdgpu_amdkfd.h"
48
47/* 49/*
48 * KMS wrapper. 50 * KMS wrapper.
49 * - 3.0.0 - initial driver 51 * - 3.0.0 - initial driver
@@ -527,12 +529,15 @@ static int __init amdgpu_init(void)
527 driver->num_ioctls = amdgpu_max_kms_ioctl; 529 driver->num_ioctls = amdgpu_max_kms_ioctl;
528 amdgpu_register_atpx_handler(); 530 amdgpu_register_atpx_handler();
529 531
532 amdgpu_amdkfd_init();
533
530 /* let modprobe override vga console setting */ 534 /* let modprobe override vga console setting */
531 return drm_pci_init(driver, pdriver); 535 return drm_pci_init(driver, pdriver);
532} 536}
533 537
534static void __exit amdgpu_exit(void) 538static void __exit amdgpu_exit(void)
535{ 539{
540 amdgpu_amdkfd_fini();
536 drm_pci_exit(driver, pdriver); 541 drm_pci_exit(driver, pdriver);
537 amdgpu_unregister_atpx_handler(); 542 amdgpu_unregister_atpx_handler();
538} 543}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index db5422e65ec5..fb44dd2231b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -24,6 +24,7 @@
24#include <drm/drmP.h> 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_ih.h" 26#include "amdgpu_ih.h"
27#include "amdgpu_amdkfd.h"
27 28
28/** 29/**
29 * amdgpu_ih_ring_alloc - allocate memory for the IH ring 30 * amdgpu_ih_ring_alloc - allocate memory for the IH ring
@@ -199,6 +200,12 @@ restart_ih:
199 rmb(); 200 rmb();
200 201
201 while (adev->irq.ih.rptr != wptr) { 202 while (adev->irq.ih.rptr != wptr) {
203 u32 ring_index = adev->irq.ih.rptr >> 2;
204
205 /* Before dispatching irq to IP blocks, send it to amdkfd */
206 amdgpu_amdkfd_interrupt(adev,
207 (const void *) &adev->irq.ih.ring[ring_index]);
208
202 amdgpu_ih_decode_iv(adev, &entry); 209 amdgpu_ih_decode_iv(adev, &entry);
203 adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; 210 adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
204 211
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 5533434c7a8f..8c40a9671b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -34,6 +34,7 @@
34#include <linux/vga_switcheroo.h> 34#include <linux/vga_switcheroo.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/pm_runtime.h> 36#include <linux/pm_runtime.h>
37#include "amdgpu_amdkfd.h"
37 38
38#if defined(CONFIG_VGA_SWITCHEROO) 39#if defined(CONFIG_VGA_SWITCHEROO)
39bool amdgpu_has_atpx(void); 40bool amdgpu_has_atpx(void);
@@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
61 62
62 pm_runtime_get_sync(dev->dev); 63 pm_runtime_get_sync(dev->dev);
63 64
65 amdgpu_amdkfd_device_fini(adev);
66
64 amdgpu_acpi_fini(adev); 67 amdgpu_acpi_fini(adev);
65 68
66 amdgpu_device_fini(adev); 69 amdgpu_device_fini(adev);
@@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
118 "Error during ACPI methods call\n"); 121 "Error during ACPI methods call\n");
119 } 122 }
120 123
124 amdgpu_amdkfd_load_interface(adev);
125 amdgpu_amdkfd_device_probe(adev);
126 amdgpu_amdkfd_device_init(adev);
127
121 if (amdgpu_device_is_px(dev)) { 128 if (amdgpu_device_is_px(dev)) {
122 pm_runtime_use_autosuspend(dev->dev); 129 pm_runtime_use_autosuspend(dev->dev);
123 pm_runtime_set_autosuspend_delay(dev->dev, 5000); 130 pm_runtime_set_autosuspend_delay(dev->dev, 5000);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 341c56681841..b3b66a0d5ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -64,6 +64,8 @@
64#include "oss/oss_2_0_d.h" 64#include "oss/oss_2_0_d.h"
65#include "oss/oss_2_0_sh_mask.h" 65#include "oss/oss_2_0_sh_mask.h"
66 66
67#include "amdgpu_amdkfd.h"
68
67/* 69/*
68 * Indirect registers accessor 70 * Indirect registers accessor
69 */ 71 */
@@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle)
2448{ 2450{
2449 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2451 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2450 2452
2453 amdgpu_amdkfd_suspend(adev);
2454
2451 return cik_common_hw_fini(adev); 2455 return cik_common_hw_fini(adev);
2452} 2456}
2453 2457
2454static int cik_common_resume(void *handle) 2458static int cik_common_resume(void *handle)
2455{ 2459{
2460 int r;
2456 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2461 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2457 2462
2458 return cik_common_hw_init(adev); 2463 r = cik_common_hw_init(adev);
2464 if (r)
2465 return r;
2466
2467 return amdgpu_amdkfd_resume(adev);
2459} 2468}
2460 2469
2461static bool cik_common_is_idle(void *handle) 2470static bool cik_common_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index d19085a97064..a3e3dfaa01a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -552,6 +552,12 @@
552#define VCE_CMD_IB_AUTO 0x00000005 552#define VCE_CMD_IB_AUTO 0x00000005
553#define VCE_CMD_SEMAPHORE 0x00000006 553#define VCE_CMD_SEMAPHORE 0x00000006
554 554
555/* if PTR32, these are the bases for scratch and lds */
556#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
557#define SHARED_BASE(x) ((x) << 16) /* LDS */
558
559#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
560
555/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ 561/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
556enum { 562enum {
557 MTYPE_CACHED = 0, 563 MTYPE_CACHED = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 31bb89452e12..d98aa9d82fa1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -66,6 +66,11 @@
66 66
67#define AMDGPU_NUM_OF_VMIDS 8 67#define AMDGPU_NUM_OF_VMIDS 8
68 68
69#define PIPEID(x) ((x) << 0)
70#define MEID(x) ((x) << 2)
71#define VMID(x) ((x) << 4)
72#define QUEUEID(x) ((x) << 8)
73
69#define RB_BITMAP_WIDTH_PER_SH 2 74#define RB_BITMAP_WIDTH_PER_SH 2
70 75
71#define MC_SEQ_MISC0__MT__MASK 0xf0000000 76#define MC_SEQ_MISC0__MT__MASK 0xf0000000
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8dfac37ff327..e13c67c8d2c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@
4 4
5config HSA_AMD 5config HSA_AMD
6 tristate "HSA kernel driver for AMD GPU devices" 6 tristate "HSA kernel driver for AMD GPU devices"
7 depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64 7 depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
8 help 8 help
9 Enable this if you want to use HSA features on AMD GPU devices. 9 Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 28551153ec6d..7fc9b0f444cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -2,7 +2,8 @@
2# Makefile for Heterogenous System Architecture support for AMD GPU devices 2# Makefile for Heterogenous System Architecture support for AMD GPU devices
3# 3#
4 4
5ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ 5ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ \
6 -Idrivers/gpu/drm/amd/include/asic_reg
6 7
7amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ 8amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
8 kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ 9 kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 183be5b8414f..48769d12dd7b 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -65,17 +65,6 @@
65 65
66#define AQL_ENABLE 1 66#define AQL_ENABLE 1
67 67
68#define SDMA_RB_VMID(x) (x << 24)
69#define SDMA_RB_ENABLE (1 << 0)
70#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
71#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
72#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
73#define SDMA_OFFSET(x) (x << 0)
74#define SDMA_DB_ENABLE (1 << 28)
75#define SDMA_ATC (1 << 0)
76#define SDMA_VA_PTR32 (1 << 4)
77#define SDMA_VA_SHARED_BASE(x) (x << 8)
78
79#define GRBM_GFX_INDEX 0x30800 68#define GRBM_GFX_INDEX 0x30800
80 69
81#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) 70#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 75312c82969f..3f95f7cb4019 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = {
80 { 0x1318, &kaveri_device_info }, /* Kaveri */ 80 { 0x1318, &kaveri_device_info }, /* Kaveri */
81 { 0x131B, &kaveri_device_info }, /* Kaveri */ 81 { 0x131B, &kaveri_device_info }, /* Kaveri */
82 { 0x131C, &kaveri_device_info }, /* Kaveri */ 82 { 0x131C, &kaveri_device_info }, /* Kaveri */
83 { 0x131D, &kaveri_device_info } /* Kaveri */ 83 { 0x131D, &kaveri_device_info }, /* Kaveri */
84 { 0x9870, &carrizo_device_info }, /* Carrizo */
85 { 0x9874, &carrizo_device_info }, /* Carrizo */
86 { 0x9875, &carrizo_device_info }, /* Carrizo */
87 { 0x9876, &carrizo_device_info }, /* Carrizo */
88 { 0x9877, &carrizo_device_info } /* Carrizo */
84}; 89};
85 90
86static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 91static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 9ce8a20a7aff..23ce774ff09d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -23,6 +23,7 @@
23 23
24#include "kfd_device_queue_manager.h" 24#include "kfd_device_queue_manager.h"
25#include "cik_regs.h" 25#include "cik_regs.h"
26#include "oss/oss_2_4_sh_mask.h"
26 27
27static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, 28static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
28 struct qcm_process_device *qpd, 29 struct qcm_process_device *qpd,
@@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm,
135static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 136static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
136 struct qcm_process_device *qpd) 137 struct qcm_process_device *qpd)
137{ 138{
138 uint32_t value = SDMA_ATC; 139 uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
139 140
140 if (q->process->is_32bit_user_mode) 141 if (q->process->is_32bit_user_mode)
141 value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); 142 value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
143 get_sh_mem_bases_32(qpd_to_pdd(qpd));
142 else 144 else
143 value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( 145 value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
144 qpd_to_pdd(qpd))); 146 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &&
147 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
148
145 q->properties.sdma_vm_addr = value; 149 q->properties.sdma_vm_addr = value;
146} 150}
147 151
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 4c15212a3899..44c38e8e54d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -22,6 +22,10 @@
22 */ 22 */
23 23
24#include "kfd_device_queue_manager.h" 24#include "kfd_device_queue_manager.h"
25#include "gca/gfx_8_0_enum.h"
26#include "gca/gfx_8_0_sh_mask.h"
27#include "gca/gfx_8_0_enum.h"
28#include "oss/oss_3_0_sh_mask.h"
25 29
26static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 30static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
27 struct qcm_process_device *qpd, 31 struct qcm_process_device *qpd,
@@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
37 41
38void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) 42void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
39{ 43{
40 pr_warn("amdkfd: VI DQM is not currently supported\n");
41
42 ops->set_cache_memory_policy = set_cache_memory_policy_vi; 44 ops->set_cache_memory_policy = set_cache_memory_policy_vi;
43 ops->register_process = register_process_vi; 45 ops->register_process = register_process_vi;
44 ops->initialize = initialize_cpsch_vi; 46 ops->initialize = initialize_cpsch_vi;
45 ops->init_sdma_vm = init_sdma_vm; 47 ops->init_sdma_vm = init_sdma_vm;
46} 48}
47 49
50static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
51{
52 /* In 64-bit mode, we can only control the top 3 bits of the LDS,
53 * scratch and GPUVM apertures.
54 * The hardware fills in the remaining 59 bits according to the
55 * following pattern:
56 * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
57 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
58 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
59 *
60 * (where X/Y is the configurable nybble with the low-bit 0)
61 *
62 * LDS and scratch will have the same top nybble programmed in the
63 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
64 * GPUVM can have a different top nybble programmed in the
65 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
66 * We don't bother to support different top nybbles
67 * for LDS/Scratch and GPUVM.
68 */
69
70 BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
71 top_address_nybble == 0);
72
73 return top_address_nybble << 12 |
74 (top_address_nybble << 12) <<
75 SH_MEM_BASES__SHARED_BASE__SHIFT;
76}
77
48static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 78static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
49 struct qcm_process_device *qpd, 79 struct qcm_process_device *qpd,
50 enum cache_policy default_policy, 80 enum cache_policy default_policy,
@@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
52 void __user *alternate_aperture_base, 82 void __user *alternate_aperture_base,
53 uint64_t alternate_aperture_size) 83 uint64_t alternate_aperture_size)
54{ 84{
55 return false; 85 uint32_t default_mtype;
86 uint32_t ape1_mtype;
87
88 default_mtype = (default_policy == cache_policy_coherent) ?
89 MTYPE_CC :
90 MTYPE_NC;
91
92 ape1_mtype = (alternate_policy == cache_policy_coherent) ?
93 MTYPE_CC :
94 MTYPE_NC;
95
96 qpd->sh_mem_config = (qpd->sh_mem_config &
97 SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
98 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
99 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
100 default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
101 ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
102 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
103
104 return true;
56} 105}
57 106
58static int register_process_vi(struct device_queue_manager *dqm, 107static int register_process_vi(struct device_queue_manager *dqm,
59 struct qcm_process_device *qpd) 108 struct qcm_process_device *qpd)
60{ 109{
61 return -1; 110 struct kfd_process_device *pdd;
111 unsigned int temp;
112
113 BUG_ON(!dqm || !qpd);
114
115 pdd = qpd_to_pdd(qpd);
116
117 /* check if sh_mem_config register already configured */
118 if (qpd->sh_mem_config == 0) {
119 qpd->sh_mem_config =
120 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
121 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
122 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
123 MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
124 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
125
126 qpd->sh_mem_ape1_limit = 0;
127 qpd->sh_mem_ape1_base = 0;
128 }
129
130 if (qpd->pqm->process->is_32bit_user_mode) {
131 temp = get_sh_mem_bases_32(pdd);
132 qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
133 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
134 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
135 } else {
136 temp = get_sh_mem_bases_nybble_64(pdd);
137 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
138 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
139 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
140 }
141
142 pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
143 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
144
145 return 0;
62} 146}
63 147
64static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 148static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
65 struct qcm_process_device *qpd) 149 struct qcm_process_device *qpd)
66{ 150{
151 uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
152
153 if (q->process->is_32bit_user_mode)
154 value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
155 get_sh_mem_bases_32(qpd_to_pdd(qpd));
156 else
157 value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
158 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &&
159 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
160
161 q->properties.sdma_vm_addr = value;
67} 162}
68 163
69static int initialize_cpsch_vi(struct device_queue_manager *dqm) 164static int initialize_cpsch_vi(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 434979428fc0..d83de985e88c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -27,6 +27,7 @@
27#include "kfd_mqd_manager.h" 27#include "kfd_mqd_manager.h"
28#include "cik_regs.h" 28#include "cik_regs.h"
29#include "cik_structs.h" 29#include "cik_structs.h"
30#include "oss/oss_2_4_sh_mask.h"
30 31
31static inline struct cik_mqd *get_mqd(void *mqd) 32static inline struct cik_mqd *get_mqd(void *mqd)
32{ 33{
@@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
214 BUG_ON(!mm || !mqd || !q); 215 BUG_ON(!mm || !mqd || !q);
215 216
216 m = get_sdma_mqd(mqd); 217 m = get_sdma_mqd(mqd);
217 m->sdma_rlc_rb_cntl = 218 m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
218 SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | 219 SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
219 SDMA_RB_VMID(q->vmid) | 220 q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
220 SDMA_RPTR_WRITEBACK_ENABLE | 221 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
221 SDMA_RPTR_WRITEBACK_TIMER(6); 222 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
222 223
223 m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); 224 m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
224 m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); 225 m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
225 m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 226 m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
226 m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 227 m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
227 m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; 228 m->sdma_rlc_doorbell = q->doorbell_off <<
229 SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
230 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
231
228 m->sdma_rlc_virtual_addr = q->sdma_vm_addr; 232 m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
229 233
230 m->sdma_engine_id = q->sdma_engine_id; 234 m->sdma_engine_id = q->sdma_engine_id;
@@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
234 if (q->queue_size > 0 && 238 if (q->queue_size > 0 &&
235 q->queue_address != 0 && 239 q->queue_address != 0 &&
236 q->queue_percent > 0) { 240 q->queue_percent > 0) {
237 m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; 241 m->sdma_rlc_rb_cntl |=
242 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
243
238 q->is_active = true; 244 q->is_active = true;
239 } 245 }
240 246
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index b3a7e3ba1e38..fa32c32fa1c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -22,12 +22,255 @@
22 */ 22 */
23 23
24#include <linux/printk.h> 24#include <linux/printk.h>
25#include <linux/slab.h>
25#include "kfd_priv.h" 26#include "kfd_priv.h"
26#include "kfd_mqd_manager.h" 27#include "kfd_mqd_manager.h"
28#include "vi_structs.h"
29#include "gca/gfx_8_0_sh_mask.h"
30#include "gca/gfx_8_0_enum.h"
31
32#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
33
34static inline struct vi_mqd *get_mqd(void *mqd)
35{
36 return (struct vi_mqd *)mqd;
37}
38
39static int init_mqd(struct mqd_manager *mm, void **mqd,
40 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
41 struct queue_properties *q)
42{
43 int retval;
44 uint64_t addr;
45 struct vi_mqd *m;
46
47 retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
48 mqd_mem_obj);
49 if (retval != 0)
50 return -ENOMEM;
51
52 m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
53 addr = (*mqd_mem_obj)->gpu_addr;
54
55 memset(m, 0, sizeof(struct vi_mqd));
56
57 m->header = 0xC0310800;
58 m->compute_pipelinestat_enable = 1;
59 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
60 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
61 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
62 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
63
64 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
65 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
66
67 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
68 MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
69
70 m->cp_mqd_base_addr_lo = lower_32_bits(addr);
71 m->cp_mqd_base_addr_hi = upper_32_bits(addr);
72
73 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
74 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
75 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
76
77 m->cp_hqd_pipe_priority = 1;
78 m->cp_hqd_queue_priority = 15;
79
80 m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
81
82 if (q->format == KFD_QUEUE_FORMAT_AQL)
83 m->cp_hqd_iq_rptr = 1;
84
85 *mqd = m;
86 if (gart_addr != NULL)
87 *gart_addr = addr;
88 retval = mm->update_mqd(mm, m, q);
89
90 return retval;
91}
92
93static int load_mqd(struct mqd_manager *mm, void *mqd,
94 uint32_t pipe_id, uint32_t queue_id,
95 uint32_t __user *wptr)
96{
97 return mm->dev->kfd2kgd->hqd_load
98 (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
99}
100
101static int __update_mqd(struct mqd_manager *mm, void *mqd,
102 struct queue_properties *q, unsigned int mtype,
103 unsigned int atc_bit)
104{
105 struct vi_mqd *m;
106
107 BUG_ON(!mm || !q || !mqd);
108
109 pr_debug("kfd: In func %s\n", __func__);
110
111 m = get_mqd(mqd);
112
113 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
114 atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
115 mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
116 m->cp_hqd_pq_control |=
117 ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
118 pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
119
120 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
121 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
122
123 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
124 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
125
126 m->cp_hqd_pq_doorbell_control =
127 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
128 q->doorbell_off <<
129 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
130 pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
131 m->cp_hqd_pq_doorbell_control);
132
133 m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
134 mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
135
136 m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
137 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
138 mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
139
140 m->cp_hqd_eop_control |=
141 ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
142 m->cp_hqd_eop_base_addr_lo =
143 lower_32_bits(q->eop_ring_buffer_address >> 8);
144 m->cp_hqd_eop_base_addr_hi =
145 upper_32_bits(q->eop_ring_buffer_address >> 8);
146
147 m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
148 mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
149
150 m->cp_hqd_vmid = q->vmid;
151
152 if (q->format == KFD_QUEUE_FORMAT_AQL) {
153 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
154 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
155 }
156
157 m->cp_hqd_active = 0;
158 q->is_active = false;
159 if (q->queue_size > 0 &&
160 q->queue_address != 0 &&
161 q->queue_percent > 0) {
162 m->cp_hqd_active = 1;
163 q->is_active = true;
164 }
165
166 return 0;
167}
168
169
170static int update_mqd(struct mqd_manager *mm, void *mqd,
171 struct queue_properties *q)
172{
173 return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
174}
175
176static int destroy_mqd(struct mqd_manager *mm, void *mqd,
177 enum kfd_preempt_type type,
178 unsigned int timeout, uint32_t pipe_id,
179 uint32_t queue_id)
180{
181 return mm->dev->kfd2kgd->hqd_destroy
182 (mm->dev->kgd, type, timeout,
183 pipe_id, queue_id);
184}
185
186static void uninit_mqd(struct mqd_manager *mm, void *mqd,
187 struct kfd_mem_obj *mqd_mem_obj)
188{
189 BUG_ON(!mm || !mqd);
190 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
191}
192
193static bool is_occupied(struct mqd_manager *mm, void *mqd,
194 uint64_t queue_address, uint32_t pipe_id,
195 uint32_t queue_id)
196{
197 return mm->dev->kfd2kgd->hqd_is_occupied(
198 mm->dev->kgd, queue_address,
199 pipe_id, queue_id);
200}
201
202static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
203 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
204 struct queue_properties *q)
205{
206 struct vi_mqd *m;
207 int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
208
209 if (retval != 0)
210 return retval;
211
212 m = get_mqd(*mqd);
213
214 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
215 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
216
217 return retval;
218}
219
220static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
221 struct queue_properties *q)
222{
223 struct vi_mqd *m;
224 int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0);
225
226 if (retval != 0)
227 return retval;
228
229 m = get_mqd(mqd);
230 m->cp_hqd_vmid = q->vmid;
231 return retval;
232}
27 233
28struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 234struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
29 struct kfd_dev *dev) 235 struct kfd_dev *dev)
30{ 236{
31 pr_warn("amdkfd: VI MQD is not currently supported\n"); 237 struct mqd_manager *mqd;
32 return NULL; 238
239 BUG_ON(!dev);
240 BUG_ON(type >= KFD_MQD_TYPE_MAX);
241
242 pr_debug("kfd: In func %s\n", __func__);
243
244 mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
245 if (!mqd)
246 return NULL;
247
248 mqd->dev = dev;
249
250 switch (type) {
251 case KFD_MQD_TYPE_CP:
252 case KFD_MQD_TYPE_COMPUTE:
253 mqd->init_mqd = init_mqd;
254 mqd->uninit_mqd = uninit_mqd;
255 mqd->load_mqd = load_mqd;
256 mqd->update_mqd = update_mqd;
257 mqd->destroy_mqd = destroy_mqd;
258 mqd->is_occupied = is_occupied;
259 break;
260 case KFD_MQD_TYPE_HIQ:
261 mqd->init_mqd = init_mqd_hiq;
262 mqd->uninit_mqd = uninit_mqd;
263 mqd->load_mqd = load_mqd;
264 mqd->update_mqd = update_mqd_hiq;
265 mqd->destroy_mqd = destroy_mqd;
266 mqd->is_occupied = is_occupied;
267 break;
268 case KFD_MQD_TYPE_SDMA:
269 break;
270 default:
271 kfree(mqd);
272 return NULL;
273 }
274
275 return mqd;
33} 276}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 99b6d28a11c3..90f391434fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -27,6 +27,7 @@
27#include "kfd_kernel_queue.h" 27#include "kfd_kernel_queue.h"
28#include "kfd_priv.h" 28#include "kfd_priv.h"
29#include "kfd_pm4_headers.h" 29#include "kfd_pm4_headers.h"
30#include "kfd_pm4_headers_vi.h"
30#include "kfd_pm4_opcodes.h" 31#include "kfd_pm4_opcodes.h"
31 32
32static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, 33static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
@@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
55 bool *over_subscription) 56 bool *over_subscription)
56{ 57{
57 unsigned int process_count, queue_count; 58 unsigned int process_count, queue_count;
59 unsigned int map_queue_size;
58 60
59 BUG_ON(!pm || !rlib_size || !over_subscription); 61 BUG_ON(!pm || !rlib_size || !over_subscription);
60 62
@@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
69 pr_debug("kfd: over subscribed runlist\n"); 71 pr_debug("kfd: over subscribed runlist\n");
70 } 72 }
71 73
74 map_queue_size =
75 (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
76 sizeof(struct pm4_mes_map_queues) :
77 sizeof(struct pm4_map_queues);
72 /* calculate run list ib allocation size */ 78 /* calculate run list ib allocation size */
73 *rlib_size = process_count * sizeof(struct pm4_map_process) + 79 *rlib_size = process_count * sizeof(struct pm4_map_process) +
74 queue_count * sizeof(struct pm4_map_queues); 80 queue_count * map_queue_size;
75 81
76 /* 82 /*
77 * Increase the allocation size in case we need a chained run list 83 * Increase the allocation size in case we need a chained run list
@@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
176 return 0; 182 return 0;
177} 183}
178 184
185static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
186 struct queue *q, bool is_static)
187{
188 struct pm4_mes_map_queues *packet;
189 bool use_static = is_static;
190
191 BUG_ON(!pm || !buffer || !q);
192
193 pr_debug("kfd: In func %s\n", __func__);
194
195 packet = (struct pm4_mes_map_queues *)buffer;
196 memset(buffer, 0, sizeof(struct pm4_map_queues));
197
198 packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
199 sizeof(struct pm4_map_queues));
200 packet->bitfields2.alloc_format =
201 alloc_format__mes_map_queues__one_per_pipe_vi;
202 packet->bitfields2.num_queues = 1;
203 packet->bitfields2.queue_sel =
204 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
205
206 packet->bitfields2.engine_sel =
207 engine_sel__mes_map_queues__compute_vi;
208 packet->bitfields2.queue_type =
209 queue_type__mes_map_queues__normal_compute_vi;
210
211 switch (q->properties.type) {
212 case KFD_QUEUE_TYPE_COMPUTE:
213 if (use_static)
214 packet->bitfields2.queue_type =
215 queue_type__mes_map_queues__normal_latency_static_queue_vi;
216 break;
217 case KFD_QUEUE_TYPE_DIQ:
218 packet->bitfields2.queue_type =
219 queue_type__mes_map_queues__debug_interface_queue_vi;
220 break;
221 case KFD_QUEUE_TYPE_SDMA:
222 packet->bitfields2.engine_sel =
223 engine_sel__mes_map_queues__sdma0_vi;
224 use_static = false; /* no static queues under SDMA */
225 break;
226 default:
227 pr_err("kfd: in %s queue type %d\n", __func__,
228 q->properties.type);
229 BUG();
230 break;
231 }
232 packet->bitfields3.doorbell_offset =
233 q->properties.doorbell_off;
234
235 packet->mqd_addr_lo =
236 lower_32_bits(q->gart_mqd_addr);
237
238 packet->mqd_addr_hi =
239 upper_32_bits(q->gart_mqd_addr);
240
241 packet->wptr_addr_lo =
242 lower_32_bits((uint64_t)q->properties.write_ptr);
243
244 packet->wptr_addr_hi =
245 upper_32_bits((uint64_t)q->properties.write_ptr);
246
247 return 0;
248}
249
179static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, 250static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
180 struct queue *q, bool is_static) 251 struct queue *q, bool is_static)
181{ 252{
@@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
292 pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", 363 pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
293 kq->queue->queue, qpd->is_debug); 364 kq->queue->queue, qpd->is_debug);
294 365
295 retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 366 if (pm->dqm->dev->device_info->asic_family ==
296 kq->queue, qpd->is_debug); 367 CHIP_CARRIZO)
368 retval = pm_create_map_queue_vi(pm,
369 &rl_buffer[rl_wptr],
370 kq->queue,
371 qpd->is_debug);
372 else
373 retval = pm_create_map_queue(pm,
374 &rl_buffer[rl_wptr],
375 kq->queue,
376 qpd->is_debug);
297 if (retval != 0) 377 if (retval != 0)
298 return retval; 378 return retval;
299 379
@@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
309 pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", 389 pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
310 q->queue, qpd->is_debug); 390 q->queue, qpd->is_debug);
311 391
312 retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 392 if (pm->dqm->dev->device_info->asic_family ==
313 q, qpd->is_debug); 393 CHIP_CARRIZO)
394 retval = pm_create_map_queue_vi(pm,
395 &rl_buffer[rl_wptr],
396 q,
397 qpd->is_debug);
398 else
399 retval = pm_create_map_queue(pm,
400 &rl_buffer[rl_wptr],
401 q,
402 qpd->is_debug);
314 403
315 if (retval != 0) 404 if (retval != 0)
316 return retval; 405 return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
new file mode 100644
index 000000000000..08c721922812
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -0,0 +1,398 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef F32_MES_PM4_PACKETS_H
25#define F32_MES_PM4_PACKETS_H
26
27#ifndef PM4_MES_HEADER_DEFINED
28#define PM4_MES_HEADER_DEFINED
29union PM4_MES_TYPE_3_HEADER {
30 struct {
31 uint32_t reserved1 : 8; /* < reserved */
32 uint32_t opcode : 8; /* < IT opcode */
33 uint32_t count : 14;/* < number of DWORDs - 1 in the
34 information body. */
35 uint32_t type : 2; /* < packet identifier.
36 It should be 3 for type 3 packets */
37 };
38 uint32_t u32All;
39};
40#endif /* PM4_MES_HEADER_DEFINED */
41
42/*--------------------MES_SET_RESOURCES--------------------*/
43
44#ifndef PM4_MES_SET_RESOURCES_DEFINED
45#define PM4_MES_SET_RESOURCES_DEFINED
46enum mes_set_resources_queue_type_enum {
47 queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
48 queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
49 queue_type__mes_set_resources__hsa_debug_interface_queue = 4
50};
51
52
53struct pm4_mes_set_resources {
54 union {
55 union PM4_MES_TYPE_3_HEADER header; /* header */
56 uint32_t ordinal1;
57 };
58
59 union {
60 struct {
61 uint32_t vmid_mask:16;
62 uint32_t unmap_latency:8;
63 uint32_t reserved1:5;
64 enum mes_set_resources_queue_type_enum queue_type:3;
65 } bitfields2;
66 uint32_t ordinal2;
67 };
68
69 uint32_t queue_mask_lo;
70 uint32_t queue_mask_hi;
71 uint32_t gws_mask_lo;
72 uint32_t gws_mask_hi;
73
74 union {
75 struct {
76 uint32_t oac_mask:16;
77 uint32_t reserved2:16;
78 } bitfields7;
79 uint32_t ordinal7;
80 };
81
82 union {
83 struct {
84 uint32_t gds_heap_base:6;
85 uint32_t reserved3:5;
86 uint32_t gds_heap_size:6;
87 uint32_t reserved4:15;
88 } bitfields8;
89 uint32_t ordinal8;
90 };
91
92};
93#endif
94
95/*--------------------MES_RUN_LIST--------------------*/
96
97#ifndef PM4_MES_RUN_LIST_DEFINED
98#define PM4_MES_RUN_LIST_DEFINED
99
100struct pm4_mes_runlist {
101 union {
102 union PM4_MES_TYPE_3_HEADER header; /* header */
103 uint32_t ordinal1;
104 };
105
106 union {
107 struct {
108 uint32_t reserved1:2;
109 uint32_t ib_base_lo:30;
110 } bitfields2;
111 uint32_t ordinal2;
112 };
113
114 union {
115 struct {
116 uint32_t ib_base_hi:16;
117 uint32_t reserved2:16;
118 } bitfields3;
119 uint32_t ordinal3;
120 };
121
122 union {
123 struct {
124 uint32_t ib_size:20;
125 uint32_t chain:1;
126 uint32_t offload_polling:1;
127 uint32_t reserved3:1;
128 uint32_t valid:1;
129 uint32_t reserved4:8;
130 } bitfields4;
131 uint32_t ordinal4;
132 };
133
134};
135#endif
136
137/*--------------------MES_MAP_PROCESS--------------------*/
138
139#ifndef PM4_MES_MAP_PROCESS_DEFINED
140#define PM4_MES_MAP_PROCESS_DEFINED
141
142struct pm4_mes_map_process {
143 union {
144 union PM4_MES_TYPE_3_HEADER header; /* header */
145 uint32_t ordinal1;
146 };
147
148 union {
149 struct {
150 uint32_t pasid:16;
151 uint32_t reserved1:8;
152 uint32_t diq_enable:1;
153 uint32_t process_quantum:7;
154 } bitfields2;
155 uint32_t ordinal2;
156};
157
158 union {
159 struct {
160 uint32_t page_table_base:28;
161 uint32_t reserved2:4;
162 } bitfields3;
163 uint32_t ordinal3;
164 };
165
166 uint32_t sh_mem_bases;
167 uint32_t sh_mem_ape1_base;
168 uint32_t sh_mem_ape1_limit;
169 uint32_t sh_mem_config;
170 uint32_t gds_addr_lo;
171 uint32_t gds_addr_hi;
172
173 union {
174 struct {
175 uint32_t num_gws:6;
176 uint32_t reserved3:2;
177 uint32_t num_oac:4;
178 uint32_t reserved4:4;
179 uint32_t gds_size:6;
180 uint32_t num_queues:10;
181 } bitfields10;
182 uint32_t ordinal10;
183 };
184
185};
186#endif
187
188/*--------------------MES_MAP_QUEUES--------------------*/
189
190#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
191#define PM4_MES_MAP_QUEUES_VI_DEFINED
192enum mes_map_queues_queue_sel_vi_enum {
193 queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
194queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
195};
196
197enum mes_map_queues_queue_type_vi_enum {
198 queue_type__mes_map_queues__normal_compute_vi = 0,
199 queue_type__mes_map_queues__debug_interface_queue_vi = 1,
200 queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
201queue_type__mes_map_queues__low_latency_static_queue_vi = 3
202};
203
204enum mes_map_queues_alloc_format_vi_enum {
205 alloc_format__mes_map_queues__one_per_pipe_vi = 0,
206alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
207};
208
209enum mes_map_queues_engine_sel_vi_enum {
210 engine_sel__mes_map_queues__compute_vi = 0,
211 engine_sel__mes_map_queues__sdma0_vi = 2,
212 engine_sel__mes_map_queues__sdma1_vi = 3
213};
214
215
216struct pm4_mes_map_queues {
217 union {
218 union PM4_MES_TYPE_3_HEADER header; /* header */
219 uint32_t ordinal1;
220 };
221
222 union {
223 struct {
224 uint32_t reserved1:4;
225 enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
226 uint32_t reserved2:15;
227 enum mes_map_queues_queue_type_vi_enum queue_type:3;
228 enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
229 enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
230 uint32_t num_queues:3;
231 } bitfields2;
232 uint32_t ordinal2;
233 };
234
235 union {
236 struct {
237 uint32_t reserved3:1;
238 uint32_t check_disable:1;
239 uint32_t doorbell_offset:21;
240 uint32_t reserved4:3;
241 uint32_t queue:6;
242 } bitfields3;
243 uint32_t ordinal3;
244 };
245
246 uint32_t mqd_addr_lo;
247 uint32_t mqd_addr_hi;
248 uint32_t wptr_addr_lo;
249 uint32_t wptr_addr_hi;
250};
251#endif
252
253/*--------------------MES_QUERY_STATUS--------------------*/
254
255#ifndef PM4_MES_QUERY_STATUS_DEFINED
256#define PM4_MES_QUERY_STATUS_DEFINED
257enum mes_query_status_interrupt_sel_enum {
258 interrupt_sel__mes_query_status__completion_status = 0,
259 interrupt_sel__mes_query_status__process_status = 1,
260 interrupt_sel__mes_query_status__queue_status = 2
261};
262
263enum mes_query_status_command_enum {
264 command__mes_query_status__interrupt_only = 0,
265 command__mes_query_status__fence_only_immediate = 1,
266 command__mes_query_status__fence_only_after_write_ack = 2,
267 command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
268};
269
270enum mes_query_status_engine_sel_enum {
271 engine_sel__mes_query_status__compute = 0,
272 engine_sel__mes_query_status__sdma0_queue = 2,
273 engine_sel__mes_query_status__sdma1_queue = 3
274};
275
276struct pm4_mes_query_status {
277 union {
278 union PM4_MES_TYPE_3_HEADER header; /* header */
279 uint32_t ordinal1;
280 };
281
282 union {
283 struct {
284 uint32_t context_id:28;
285 enum mes_query_status_interrupt_sel_enum
286 interrupt_sel:2;
287 enum mes_query_status_command_enum command:2;
288 } bitfields2;
289 uint32_t ordinal2;
290 };
291
292 union {
293 struct {
294 uint32_t pasid:16;
295 uint32_t reserved1:16;
296 } bitfields3a;
297 struct {
298 uint32_t reserved2:2;
299 uint32_t doorbell_offset:21;
300 uint32_t reserved3:2;
301 enum mes_query_status_engine_sel_enum engine_sel:3;
302 uint32_t reserved4:4;
303 } bitfields3b;
304 uint32_t ordinal3;
305 };
306
307 uint32_t addr_lo;
308 uint32_t addr_hi;
309 uint32_t data_lo;
310 uint32_t data_hi;
311};
312#endif
313
314/*--------------------MES_UNMAP_QUEUES--------------------*/
315
316#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
317#define PM4_MES_UNMAP_QUEUES_DEFINED
318enum mes_unmap_queues_action_enum {
319 action__mes_unmap_queues__preempt_queues = 0,
320 action__mes_unmap_queues__reset_queues = 1,
321 action__mes_unmap_queues__disable_process_queues = 2,
322 action__mes_unmap_queues__reserved = 3
323};
324
325enum mes_unmap_queues_queue_sel_enum {
326 queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
327 queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
328 queue_sel__mes_unmap_queues__unmap_all_queues = 2,
329 queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
330};
331
332enum mes_unmap_queues_engine_sel_enum {
333 engine_sel__mes_unmap_queues__compute = 0,
334 engine_sel__mes_unmap_queues__sdma0 = 2,
335 engine_sel__mes_unmap_queues__sdmal = 3
336};
337
338struct PM4_MES_UNMAP_QUEUES {
339 union {
340 union PM4_MES_TYPE_3_HEADER header; /* header */
341 uint32_t ordinal1;
342 };
343
344 union {
345 struct {
346 enum mes_unmap_queues_action_enum action:2;
347 uint32_t reserved1:2;
348 enum mes_unmap_queues_queue_sel_enum queue_sel:2;
349 uint32_t reserved2:20;
350 enum mes_unmap_queues_engine_sel_enum engine_sel:3;
351 uint32_t num_queues:3;
352 } bitfields2;
353 uint32_t ordinal2;
354 };
355
356 union {
357 struct {
358 uint32_t pasid:16;
359 uint32_t reserved3:16;
360 } bitfields3a;
361 struct {
362 uint32_t reserved4:2;
363 uint32_t doorbell_offset0:21;
364 uint32_t reserved5:9;
365 } bitfields3b;
366 uint32_t ordinal3;
367 };
368
369 union {
370 struct {
371 uint32_t reserved6:2;
372 uint32_t doorbell_offset1:21;
373 uint32_t reserved7:9;
374 } bitfields4;
375 uint32_t ordinal4;
376 };
377
378 union {
379 struct {
380 uint32_t reserved8:2;
381 uint32_t doorbell_offset2:21;
382 uint32_t reserved9:9;
383 } bitfields5;
384 uint32_t ordinal5;
385 };
386
387 union {
388 struct {
389 uint32_t reserved10:2;
390 uint32_t doorbell_offset3:21;
391 uint32_t reserved11:9;
392 } bitfields6;
393 uint32_t ordinal6;
394 };
395};
396#endif
397
398#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c25728bc388a..74909e72a009 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
1186 * TODO: Retrieve max engine clock values from KGD 1186 * TODO: Retrieve max engine clock values from KGD
1187 */ 1187 */
1188 1188
1189 if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1190 dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
1191 pr_info("amdkfd: adding doorbell packet type capability\n");
1192 }
1193
1189 res = 0; 1194 res = 0;
1190 1195
1191err: 1196err:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 989624b3cd14..c3ddb9b95ff8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -40,6 +40,7 @@
40#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 40#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
41#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 41#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
42#define HSA_CAP_RESERVED 0xfffff000 42#define HSA_CAP_RESERVED 0xfffff000
43#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
43 44
44struct kfd_node_properties { 45struct kfd_node_properties {
45 uint32_t cpu_cores_count; 46 uint32_t cpu_cores_count;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 9080daa116b6..888250b33ea8 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -52,7 +52,8 @@ enum kgd_engine_type {
52 KGD_ENGINE_MEC1, 52 KGD_ENGINE_MEC1,
53 KGD_ENGINE_MEC2, 53 KGD_ENGINE_MEC2,
54 KGD_ENGINE_RLC, 54 KGD_ENGINE_RLC,
55 KGD_ENGINE_SDMA, 55 KGD_ENGINE_SDMA1,
56 KGD_ENGINE_SDMA2,
56 KGD_ENGINE_MAX 57 KGD_ENGINE_MAX
57}; 58};
58 59
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
new file mode 100644
index 000000000000..65cfacd7a66c
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -0,0 +1,417 @@
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef VI_STRUCTS_H_
25#define VI_STRUCTS_H_
26
27struct vi_sdma_mqd {
28 uint32_t sdmax_rlcx_rb_cntl;
29 uint32_t sdmax_rlcx_rb_base;
30 uint32_t sdmax_rlcx_rb_base_hi;
31 uint32_t sdmax_rlcx_rb_rptr;
32 uint32_t sdmax_rlcx_rb_wptr;
33 uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
34 uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
35 uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
36 uint32_t sdmax_rlcx_rb_rptr_addr_hi;
37 uint32_t sdmax_rlcx_rb_rptr_addr_lo;
38 uint32_t sdmax_rlcx_ib_cntl;
39 uint32_t sdmax_rlcx_ib_rptr;
40 uint32_t sdmax_rlcx_ib_offset;
41 uint32_t sdmax_rlcx_ib_base_lo;
42 uint32_t sdmax_rlcx_ib_base_hi;
43 uint32_t sdmax_rlcx_ib_size;
44 uint32_t sdmax_rlcx_skip_cntl;
45 uint32_t sdmax_rlcx_context_status;
46 uint32_t sdmax_rlcx_doorbell;
47 uint32_t sdmax_rlcx_virtual_addr;
48 uint32_t sdmax_rlcx_ape1_cntl;
49 uint32_t sdmax_rlcx_doorbell_log;
50 uint32_t reserved_22;
51 uint32_t reserved_23;
52 uint32_t reserved_24;
53 uint32_t reserved_25;
54 uint32_t reserved_26;
55 uint32_t reserved_27;
56 uint32_t reserved_28;
57 uint32_t reserved_29;
58 uint32_t reserved_30;
59 uint32_t reserved_31;
60 uint32_t reserved_32;
61 uint32_t reserved_33;
62 uint32_t reserved_34;
63 uint32_t reserved_35;
64 uint32_t reserved_36;
65 uint32_t reserved_37;
66 uint32_t reserved_38;
67 uint32_t reserved_39;
68 uint32_t reserved_40;
69 uint32_t reserved_41;
70 uint32_t reserved_42;
71 uint32_t reserved_43;
72 uint32_t reserved_44;
73 uint32_t reserved_45;
74 uint32_t reserved_46;
75 uint32_t reserved_47;
76 uint32_t reserved_48;
77 uint32_t reserved_49;
78 uint32_t reserved_50;
79 uint32_t reserved_51;
80 uint32_t reserved_52;
81 uint32_t reserved_53;
82 uint32_t reserved_54;
83 uint32_t reserved_55;
84 uint32_t reserved_56;
85 uint32_t reserved_57;
86 uint32_t reserved_58;
87 uint32_t reserved_59;
88 uint32_t reserved_60;
89 uint32_t reserved_61;
90 uint32_t reserved_62;
91 uint32_t reserved_63;
92 uint32_t reserved_64;
93 uint32_t reserved_65;
94 uint32_t reserved_66;
95 uint32_t reserved_67;
96 uint32_t reserved_68;
97 uint32_t reserved_69;
98 uint32_t reserved_70;
99 uint32_t reserved_71;
100 uint32_t reserved_72;
101 uint32_t reserved_73;
102 uint32_t reserved_74;
103 uint32_t reserved_75;
104 uint32_t reserved_76;
105 uint32_t reserved_77;
106 uint32_t reserved_78;
107 uint32_t reserved_79;
108 uint32_t reserved_80;
109 uint32_t reserved_81;
110 uint32_t reserved_82;
111 uint32_t reserved_83;
112 uint32_t reserved_84;
113 uint32_t reserved_85;
114 uint32_t reserved_86;
115 uint32_t reserved_87;
116 uint32_t reserved_88;
117 uint32_t reserved_89;
118 uint32_t reserved_90;
119 uint32_t reserved_91;
120 uint32_t reserved_92;
121 uint32_t reserved_93;
122 uint32_t reserved_94;
123 uint32_t reserved_95;
124 uint32_t reserved_96;
125 uint32_t reserved_97;
126 uint32_t reserved_98;
127 uint32_t reserved_99;
128 uint32_t reserved_100;
129 uint32_t reserved_101;
130 uint32_t reserved_102;
131 uint32_t reserved_103;
132 uint32_t reserved_104;
133 uint32_t reserved_105;
134 uint32_t reserved_106;
135 uint32_t reserved_107;
136 uint32_t reserved_108;
137 uint32_t reserved_109;
138 uint32_t reserved_110;
139 uint32_t reserved_111;
140 uint32_t reserved_112;
141 uint32_t reserved_113;
142 uint32_t reserved_114;
143 uint32_t reserved_115;
144 uint32_t reserved_116;
145 uint32_t reserved_117;
146 uint32_t reserved_118;
147 uint32_t reserved_119;
148 uint32_t reserved_120;
149 uint32_t reserved_121;
150 uint32_t reserved_122;
151 uint32_t reserved_123;
152 uint32_t reserved_124;
153 uint32_t reserved_125;
154 uint32_t reserved_126;
155 uint32_t reserved_127;
156};
157
158struct vi_mqd {
159 uint32_t header;
160 uint32_t compute_dispatch_initiator;
161 uint32_t compute_dim_x;
162 uint32_t compute_dim_y;
163 uint32_t compute_dim_z;
164 uint32_t compute_start_x;
165 uint32_t compute_start_y;
166 uint32_t compute_start_z;
167 uint32_t compute_num_thread_x;
168 uint32_t compute_num_thread_y;
169 uint32_t compute_num_thread_z;
170 uint32_t compute_pipelinestat_enable;
171 uint32_t compute_perfcount_enable;
172 uint32_t compute_pgm_lo;
173 uint32_t compute_pgm_hi;
174 uint32_t compute_tba_lo;
175 uint32_t compute_tba_hi;
176 uint32_t compute_tma_lo;
177 uint32_t compute_tma_hi;
178 uint32_t compute_pgm_rsrc1;
179 uint32_t compute_pgm_rsrc2;
180 uint32_t compute_vmid;
181 uint32_t compute_resource_limits;
182 uint32_t compute_static_thread_mgmt_se0;
183 uint32_t compute_static_thread_mgmt_se1;
184 uint32_t compute_tmpring_size;
185 uint32_t compute_static_thread_mgmt_se2;
186 uint32_t compute_static_thread_mgmt_se3;
187 uint32_t compute_restart_x;
188 uint32_t compute_restart_y;
189 uint32_t compute_restart_z;
190 uint32_t compute_thread_trace_enable;
191 uint32_t compute_misc_reserved;
192 uint32_t compute_dispatch_id;
193 uint32_t compute_threadgroup_id;
194 uint32_t compute_relaunch;
195 uint32_t compute_wave_restore_addr_lo;
196 uint32_t compute_wave_restore_addr_hi;
197 uint32_t compute_wave_restore_control;
198 uint32_t reserved_39;
199 uint32_t reserved_40;
200 uint32_t reserved_41;
201 uint32_t reserved_42;
202 uint32_t reserved_43;
203 uint32_t reserved_44;
204 uint32_t reserved_45;
205 uint32_t reserved_46;
206 uint32_t reserved_47;
207 uint32_t reserved_48;
208 uint32_t reserved_49;
209 uint32_t reserved_50;
210 uint32_t reserved_51;
211 uint32_t reserved_52;
212 uint32_t reserved_53;
213 uint32_t reserved_54;
214 uint32_t reserved_55;
215 uint32_t reserved_56;
216 uint32_t reserved_57;
217 uint32_t reserved_58;
218 uint32_t reserved_59;
219 uint32_t reserved_60;
220 uint32_t reserved_61;
221 uint32_t reserved_62;
222 uint32_t reserved_63;
223 uint32_t reserved_64;
224 uint32_t compute_user_data_0;
225 uint32_t compute_user_data_1;
226 uint32_t compute_user_data_2;
227 uint32_t compute_user_data_3;
228 uint32_t compute_user_data_4;
229 uint32_t compute_user_data_5;
230 uint32_t compute_user_data_6;
231 uint32_t compute_user_data_7;
232 uint32_t compute_user_data_8;
233 uint32_t compute_user_data_9;
234 uint32_t compute_user_data_10;
235 uint32_t compute_user_data_11;
236 uint32_t compute_user_data_12;
237 uint32_t compute_user_data_13;
238 uint32_t compute_user_data_14;
239 uint32_t compute_user_data_15;
240 uint32_t cp_compute_csinvoc_count_lo;
241 uint32_t cp_compute_csinvoc_count_hi;
242 uint32_t reserved_83;
243 uint32_t reserved_84;
244 uint32_t reserved_85;
245 uint32_t cp_mqd_query_time_lo;
246 uint32_t cp_mqd_query_time_hi;
247 uint32_t cp_mqd_connect_start_time_lo;
248 uint32_t cp_mqd_connect_start_time_hi;
249 uint32_t cp_mqd_connect_end_time_lo;
250 uint32_t cp_mqd_connect_end_time_hi;
251 uint32_t cp_mqd_connect_end_wf_count;
252 uint32_t cp_mqd_connect_end_pq_rptr;
253 uint32_t cp_mqd_connect_end_pq_wptr;
254 uint32_t cp_mqd_connect_end_ib_rptr;
255 uint32_t reserved_96;
256 uint32_t reserved_97;
257 uint32_t cp_mqd_save_start_time_lo;
258 uint32_t cp_mqd_save_start_time_hi;
259 uint32_t cp_mqd_save_end_time_lo;
260 uint32_t cp_mqd_save_end_time_hi;
261 uint32_t cp_mqd_restore_start_time_lo;
262 uint32_t cp_mqd_restore_start_time_hi;
263 uint32_t cp_mqd_restore_end_time_lo;
264 uint32_t cp_mqd_restore_end_time_hi;
265 uint32_t reserved_106;
266 uint32_t reserved_107;
267 uint32_t gds_cs_ctxsw_cnt0;
268 uint32_t gds_cs_ctxsw_cnt1;
269 uint32_t gds_cs_ctxsw_cnt2;
270 uint32_t gds_cs_ctxsw_cnt3;
271 uint32_t reserved_112;
272 uint32_t reserved_113;
273 uint32_t cp_pq_exe_status_lo;
274 uint32_t cp_pq_exe_status_hi;
275 uint32_t cp_packet_id_lo;
276 uint32_t cp_packet_id_hi;
277 uint32_t cp_packet_exe_status_lo;
278 uint32_t cp_packet_exe_status_hi;
279 uint32_t gds_save_base_addr_lo;
280 uint32_t gds_save_base_addr_hi;
281 uint32_t gds_save_mask_lo;
282 uint32_t gds_save_mask_hi;
283 uint32_t ctx_save_base_addr_lo;
284 uint32_t ctx_save_base_addr_hi;
285 uint32_t reserved_126;
286 uint32_t reserved_127;
287 uint32_t cp_mqd_base_addr_lo;
288 uint32_t cp_mqd_base_addr_hi;
289 uint32_t cp_hqd_active;
290 uint32_t cp_hqd_vmid;
291 uint32_t cp_hqd_persistent_state;
292 uint32_t cp_hqd_pipe_priority;
293 uint32_t cp_hqd_queue_priority;
294 uint32_t cp_hqd_quantum;
295 uint32_t cp_hqd_pq_base_lo;
296 uint32_t cp_hqd_pq_base_hi;
297 uint32_t cp_hqd_pq_rptr;
298 uint32_t cp_hqd_pq_rptr_report_addr_lo;
299 uint32_t cp_hqd_pq_rptr_report_addr_hi;
300 uint32_t cp_hqd_pq_wptr_poll_addr_lo;
301 uint32_t cp_hqd_pq_wptr_poll_addr_hi;
302 uint32_t cp_hqd_pq_doorbell_control;
303 uint32_t cp_hqd_pq_wptr;
304 uint32_t cp_hqd_pq_control;
305 uint32_t cp_hqd_ib_base_addr_lo;
306 uint32_t cp_hqd_ib_base_addr_hi;
307 uint32_t cp_hqd_ib_rptr;
308 uint32_t cp_hqd_ib_control;
309 uint32_t cp_hqd_iq_timer;
310 uint32_t cp_hqd_iq_rptr;
311 uint32_t cp_hqd_dequeue_request;
312 uint32_t cp_hqd_dma_offload;
313 uint32_t cp_hqd_sema_cmd;
314 uint32_t cp_hqd_msg_type;
315 uint32_t cp_hqd_atomic0_preop_lo;
316 uint32_t cp_hqd_atomic0_preop_hi;
317 uint32_t cp_hqd_atomic1_preop_lo;
318 uint32_t cp_hqd_atomic1_preop_hi;
319 uint32_t cp_hqd_hq_status0;
320 uint32_t cp_hqd_hq_control0;
321 uint32_t cp_mqd_control;
322 uint32_t cp_hqd_hq_status1;
323 uint32_t cp_hqd_hq_control1;
324 uint32_t cp_hqd_eop_base_addr_lo;
325 uint32_t cp_hqd_eop_base_addr_hi;
326 uint32_t cp_hqd_eop_control;
327 uint32_t cp_hqd_eop_rptr;
328 uint32_t cp_hqd_eop_wptr;
329 uint32_t cp_hqd_eop_done_events;
330 uint32_t cp_hqd_ctx_save_base_addr_lo;
331 uint32_t cp_hqd_ctx_save_base_addr_hi;
332 uint32_t cp_hqd_ctx_save_control;
333 uint32_t cp_hqd_cntl_stack_offset;
334 uint32_t cp_hqd_cntl_stack_size;
335 uint32_t cp_hqd_wg_state_offset;
336 uint32_t cp_hqd_ctx_save_size;
337 uint32_t cp_hqd_gds_resource_state;
338 uint32_t cp_hqd_error;
339 uint32_t cp_hqd_eop_wptr_mem;
340 uint32_t cp_hqd_eop_dones;
341 uint32_t reserved_182;
342 uint32_t reserved_183;
343 uint32_t reserved_184;
344 uint32_t reserved_185;
345 uint32_t reserved_186;
346 uint32_t reserved_187;
347 uint32_t reserved_188;
348 uint32_t reserved_189;
349 uint32_t reserved_190;
350 uint32_t reserved_191;
351 uint32_t iqtimer_pkt_header;
352 uint32_t iqtimer_pkt_dw0;
353 uint32_t iqtimer_pkt_dw1;
354 uint32_t iqtimer_pkt_dw2;
355 uint32_t iqtimer_pkt_dw3;
356 uint32_t iqtimer_pkt_dw4;
357 uint32_t iqtimer_pkt_dw5;
358 uint32_t iqtimer_pkt_dw6;
359 uint32_t iqtimer_pkt_dw7;
360 uint32_t iqtimer_pkt_dw8;
361 uint32_t iqtimer_pkt_dw9;
362 uint32_t iqtimer_pkt_dw10;
363 uint32_t iqtimer_pkt_dw11;
364 uint32_t iqtimer_pkt_dw12;
365 uint32_t iqtimer_pkt_dw13;
366 uint32_t iqtimer_pkt_dw14;
367 uint32_t iqtimer_pkt_dw15;
368 uint32_t iqtimer_pkt_dw16;
369 uint32_t iqtimer_pkt_dw17;
370 uint32_t iqtimer_pkt_dw18;
371 uint32_t iqtimer_pkt_dw19;
372 uint32_t iqtimer_pkt_dw20;
373 uint32_t iqtimer_pkt_dw21;
374 uint32_t iqtimer_pkt_dw22;
375 uint32_t iqtimer_pkt_dw23;
376 uint32_t iqtimer_pkt_dw24;
377 uint32_t iqtimer_pkt_dw25;
378 uint32_t iqtimer_pkt_dw26;
379 uint32_t iqtimer_pkt_dw27;
380 uint32_t iqtimer_pkt_dw28;
381 uint32_t iqtimer_pkt_dw29;
382 uint32_t iqtimer_pkt_dw30;
383 uint32_t iqtimer_pkt_dw31;
384 uint32_t reserved_225;
385 uint32_t reserved_226;
386 uint32_t reserved_227;
387 uint32_t set_resources_header;
388 uint32_t set_resources_dw1;
389 uint32_t set_resources_dw2;
390 uint32_t set_resources_dw3;
391 uint32_t set_resources_dw4;
392 uint32_t set_resources_dw5;
393 uint32_t set_resources_dw6;
394 uint32_t set_resources_dw7;
395 uint32_t reserved_236;
396 uint32_t reserved_237;
397 uint32_t reserved_238;
398 uint32_t reserved_239;
399 uint32_t queue_doorbell_id0;
400 uint32_t queue_doorbell_id1;
401 uint32_t queue_doorbell_id2;
402 uint32_t queue_doorbell_id3;
403 uint32_t queue_doorbell_id4;
404 uint32_t queue_doorbell_id5;
405 uint32_t queue_doorbell_id6;
406 uint32_t queue_doorbell_id7;
407 uint32_t queue_doorbell_id8;
408 uint32_t queue_doorbell_id9;
409 uint32_t queue_doorbell_id10;
410 uint32_t queue_doorbell_id11;
411 uint32_t queue_doorbell_id12;
412 uint32_t queue_doorbell_id13;
413 uint32_t queue_doorbell_id14;
414 uint32_t queue_doorbell_id15;
415};
416
417#endif /* VI_STRUCTS_H_ */