aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2018-07-14 19:05:59 -0400
committerOded Gabbay <oded.gabbay@gmail.com>2018-07-14 19:05:59 -0400
commit39e7f331864d2b9e30d5f3fd2121e182b2c9c8a9 (patch)
tree148394727c9e0b6eb318c0a3dff96e6992dffefb
parenta7fe68a1e8e4bce007505f729bc33e427c540386 (diff)
drm/amdkfd: Add CU-masking ioctl to KFD
CU-masking allows a KFD client to control the set of CUs used by a user mode queue for executing compute dispatches. This can be used for optimizing the partitioning of the GPU and minimize conflicts between concurrent tasks. Signed-off-by: Flora Cui <flora.cui@amd.com> Signed-off-by: Kent Russell <kent.russell@amd.com> Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c58
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c41
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c30
9 files changed, 219 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b5338bff8cef..297b36c26a05 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -392,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
392 return retval; 392 return retval;
393} 393}
394 394
395static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
396 void *data)
397{
398 int retval;
399 const int max_num_cus = 1024;
400 struct kfd_ioctl_set_cu_mask_args *args = data;
401 struct queue_properties properties;
402 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
403 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
404
405 if ((args->num_cu_mask % 32) != 0) {
406 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
407 args->num_cu_mask);
408 return -EINVAL;
409 }
410
411 properties.cu_mask_count = args->num_cu_mask;
412 if (properties.cu_mask_count == 0) {
413 pr_debug("CU mask cannot be 0");
414 return -EINVAL;
415 }
416
417 /* To prevent an unreasonably large CU mask size, set an arbitrary
418 * limit of max_num_cus bits. We can then just drop any CU mask bits
419 * past max_num_cus bits and just use the first max_num_cus bits.
420 */
421 if (properties.cu_mask_count > max_num_cus) {
422 pr_debug("CU mask cannot be greater than 1024 bits");
423 properties.cu_mask_count = max_num_cus;
424 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
425 }
426
427 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
428 if (!properties.cu_mask)
429 return -ENOMEM;
430
431 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
432 if (retval) {
433 pr_debug("Could not copy CU mask from userspace");
434 kfree(properties.cu_mask);
435 return -EFAULT;
436 }
437
438 mutex_lock(&p->mutex);
439
440 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
441
442 mutex_unlock(&p->mutex);
443
444 if (retval)
445 kfree(properties.cu_mask);
446
447 return retval;
448}
449
395static int kfd_ioctl_set_memory_policy(struct file *filep, 450static int kfd_ioctl_set_memory_policy(struct file *filep,
396 struct kfd_process *p, void *data) 451 struct kfd_process *p, void *data)
397{ 452{
@@ -1557,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1557 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1612 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1558 kfd_ioctl_unmap_memory_from_gpu, 0), 1613 kfd_ioctl_unmap_memory_from_gpu, 0),
1559 1614
1615 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1616 kfd_ioctl_set_cu_mask, 0),
1617
1560}; 1618};
1561 1619
1562#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1620#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 2c8897e9073d..9f84b4d9fb88 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
123 prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; 123 prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
124 prop.eop_ring_buffer_address = kq->eop_gpu_addr; 124 prop.eop_ring_buffer_address = kq->eop_gpu_addr;
125 prop.eop_ring_buffer_size = PAGE_SIZE; 125 prop.eop_ring_buffer_size = PAGE_SIZE;
126 prop.cu_mask = NULL;
126 127
127 if (init_queue(&kq->queue, &prop) != 0) 128 if (init_queue(&kq->queue, &prop) != 0)
128 goto err_init_queue; 129 goto err_init_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 4b8eb506642b..3bc25ab84f34 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23 23
24#include "kfd_priv.h" 24#include "kfd_mqd_manager.h"
25 25
26struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 26struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
27 struct kfd_dev *dev) 27 struct kfd_dev *dev)
@@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
48 48
49 return NULL; 49 return NULL;
50} 50}
51
52void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
53 const uint32_t *cu_mask, uint32_t cu_mask_count,
54 uint32_t *se_mask)
55{
56 struct kfd_cu_info cu_info;
57 uint32_t cu_per_sh[4] = {0};
58 int i, se, cu = 0;
59
60 mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
61
62 if (cu_mask_count > cu_info.cu_active_number)
63 cu_mask_count = cu_info.cu_active_number;
64
65 for (se = 0; se < cu_info.num_shader_engines; se++)
66 for (i = 0; i < 4; i++)
67 cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);
68
69 /* Symmetrically map cu_mask to all SEs:
70 * cu_mask[0] bit0 -> se_mask[0] bit0;
71 * cu_mask[0] bit1 -> se_mask[1] bit0;
72 * ... (if # SE is 4)
73 * cu_mask[0] bit4 -> se_mask[0] bit1;
74 * ...
75 */
76 se = 0;
77 for (i = 0; i < cu_mask_count; i++) {
78 if (cu_mask[i / 32] & (1 << (i % 32)))
79 se_mask[se] |= 1 << cu;
80
81 do {
82 se++;
83 if (se == cu_info.num_shader_engines) {
84 se = 0;
85 cu++;
86 }
87 } while (cu >= cu_per_sh[se] && cu < 32);
88 }
89}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 8972bcfbf701..4e84052d4e21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -93,4 +93,8 @@ struct mqd_manager {
93 struct kfd_dev *dev; 93 struct kfd_dev *dev;
94}; 94};
95 95
96void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
97 const uint32_t *cu_mask, uint32_t cu_mask_count,
98 uint32_t *se_mask);
99
96#endif /* KFD_MQD_MANAGER_H_ */ 100#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 4872574f7a04..47243165a082 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
41 return (struct cik_sdma_rlc_registers *)mqd; 41 return (struct cik_sdma_rlc_registers *)mqd;
42} 42}
43 43
44static void update_cu_mask(struct mqd_manager *mm, void *mqd,
45 struct queue_properties *q)
46{
47 struct cik_mqd *m;
48 uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
49
50 if (q->cu_mask_count == 0)
51 return;
52
53 mqd_symmetrically_map_cu_mask(mm,
54 q->cu_mask, q->cu_mask_count, se_mask);
55
56 m = get_mqd(mqd);
57 m->compute_static_thread_mgmt_se0 = se_mask[0];
58 m->compute_static_thread_mgmt_se1 = se_mask[1];
59 m->compute_static_thread_mgmt_se2 = se_mask[2];
60 m->compute_static_thread_mgmt_se3 = se_mask[3];
61
62 pr_debug("Update cu mask to %#x %#x %#x %#x\n",
63 m->compute_static_thread_mgmt_se0,
64 m->compute_static_thread_mgmt_se1,
65 m->compute_static_thread_mgmt_se2,
66 m->compute_static_thread_mgmt_se3);
67}
68
44static int init_mqd(struct mqd_manager *mm, void **mqd, 69static int init_mqd(struct mqd_manager *mm, void **mqd,
45 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, 70 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
46 struct queue_properties *q) 71 struct queue_properties *q)
@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
196 if (q->format == KFD_QUEUE_FORMAT_AQL) 221 if (q->format == KFD_QUEUE_FORMAT_AQL)
197 m->cp_hqd_pq_control |= NO_UPDATE_RPTR; 222 m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
198 223
224 update_cu_mask(mm, mqd, q);
225
199 q->is_active = (q->queue_size > 0 && 226 q->is_active = (q->queue_size > 0 &&
200 q->queue_address != 0 && 227 q->queue_address != 0 &&
201 q->queue_percent > 0 && 228 q->queue_percent > 0 &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index ad5c9f80cccd..f5fc3675f21e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -41,6 +41,31 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
41 return (struct v9_sdma_mqd *)mqd; 41 return (struct v9_sdma_mqd *)mqd;
42} 42}
43 43
44static void update_cu_mask(struct mqd_manager *mm, void *mqd,
45 struct queue_properties *q)
46{
47 struct v9_mqd *m;
48 uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
49
50 if (q->cu_mask_count == 0)
51 return;
52
53 mqd_symmetrically_map_cu_mask(mm,
54 q->cu_mask, q->cu_mask_count, se_mask);
55
56 m = get_mqd(mqd);
57 m->compute_static_thread_mgmt_se0 = se_mask[0];
58 m->compute_static_thread_mgmt_se1 = se_mask[1];
59 m->compute_static_thread_mgmt_se2 = se_mask[2];
60 m->compute_static_thread_mgmt_se3 = se_mask[3];
61
62 pr_debug("update cu mask to %#x %#x %#x %#x\n",
63 m->compute_static_thread_mgmt_se0,
64 m->compute_static_thread_mgmt_se1,
65 m->compute_static_thread_mgmt_se2,
66 m->compute_static_thread_mgmt_se3);
67}
68
44static int init_mqd(struct mqd_manager *mm, void **mqd, 69static int init_mqd(struct mqd_manager *mm, void **mqd,
45 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, 70 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
46 struct queue_properties *q) 71 struct queue_properties *q)
@@ -198,6 +223,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
198 if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) 223 if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
199 m->cp_hqd_ctx_save_control = 0; 224 m->cp_hqd_ctx_save_control = 0;
200 225
226 update_cu_mask(mm, mqd, q);
227
201 q->is_active = (q->queue_size > 0 && 228 q->is_active = (q->queue_size > 0 &&
202 q->queue_address != 0 && 229 q->queue_address != 0 &&
203 q->queue_percent > 0 && 230 q->queue_percent > 0 &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 89e4242e43e7..b81fda3754da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -43,6 +43,31 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
43 return (struct vi_sdma_mqd *)mqd; 43 return (struct vi_sdma_mqd *)mqd;
44} 44}
45 45
46static void update_cu_mask(struct mqd_manager *mm, void *mqd,
47 struct queue_properties *q)
48{
49 struct vi_mqd *m;
50 uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
51
52 if (q->cu_mask_count == 0)
53 return;
54
55 mqd_symmetrically_map_cu_mask(mm,
56 q->cu_mask, q->cu_mask_count, se_mask);
57
58 m = get_mqd(mqd);
59 m->compute_static_thread_mgmt_se0 = se_mask[0];
60 m->compute_static_thread_mgmt_se1 = se_mask[1];
61 m->compute_static_thread_mgmt_se2 = se_mask[2];
62 m->compute_static_thread_mgmt_se3 = se_mask[3];
63
64 pr_debug("Update cu mask to %#x %#x %#x %#x\n",
65 m->compute_static_thread_mgmt_se0,
66 m->compute_static_thread_mgmt_se1,
67 m->compute_static_thread_mgmt_se2,
68 m->compute_static_thread_mgmt_se3);
69}
70
46static int init_mqd(struct mqd_manager *mm, void **mqd, 71static int init_mqd(struct mqd_manager *mm, void **mqd,
47 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, 72 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
48 struct queue_properties *q) 73 struct queue_properties *q)
@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
196 atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | 221 atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
197 mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; 222 mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
198 223
224 update_cu_mask(mm, mqd, q);
225
199 q->is_active = (q->queue_size > 0 && 226 q->is_active = (q->queue_size > 0 &&
200 q->queue_address != 0 && 227 q->queue_address != 0 &&
201 q->queue_percent > 0 && 228 q->queue_percent > 0 &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ca83254719fc..f971710f1c91 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -422,6 +422,9 @@ struct queue_properties {
422 uint32_t ctl_stack_size; 422 uint32_t ctl_stack_size;
423 uint64_t tba_addr; 423 uint64_t tba_addr;
424 uint64_t tma_addr; 424 uint64_t tma_addr;
425 /* Relevant for CU */
426 uint32_t cu_mask_count; /* Must be a multiple of 32 */
427 uint32_t *cu_mask;
425}; 428};
426 429
427/** 430/**
@@ -872,6 +875,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
872int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 875int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
873int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 876int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
874 struct queue_properties *p); 877 struct queue_properties *p);
878int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
879 struct queue_properties *p);
875struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, 880struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
876 unsigned int qid); 881 unsigned int qid);
877 882
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index eb4e5fb4f2f2..c8cad9c078ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -325,6 +325,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
325 if (retval != -ETIME) 325 if (retval != -ETIME)
326 goto err_destroy_queue; 326 goto err_destroy_queue;
327 } 327 }
328 kfree(pqn->q->properties.cu_mask);
329 pqn->q->properties.cu_mask = NULL;
328 uninit_queue(pqn->q); 330 uninit_queue(pqn->q);
329 } 331 }
330 332
@@ -365,6 +367,34 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
365 return 0; 367 return 0;
366} 368}
367 369
370int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
371 struct queue_properties *p)
372{
373 int retval;
374 struct process_queue_node *pqn;
375
376 pqn = get_queue_by_qid(pqm, qid);
377 if (!pqn) {
378 pr_debug("No queue %d exists for update operation\n", qid);
379 return -EFAULT;
380 }
381
382 /* Free the old CU mask memory if it is already allocated, then
383 * allocate memory for the new CU mask.
384 */
385 kfree(pqn->q->properties.cu_mask);
386
387 pqn->q->properties.cu_mask_count = p->cu_mask_count;
388 pqn->q->properties.cu_mask = p->cu_mask;
389
390 retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
391 pqn->q);
392 if (retval != 0)
393 return retval;
394
395 return 0;
396}
397
368struct kernel_queue *pqm_get_kernel_queue( 398struct kernel_queue *pqm_get_kernel_queue(
369 struct process_queue_manager *pqm, 399 struct process_queue_manager *pqm,
370 unsigned int qid) 400 unsigned int qid)