aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_int.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c49
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c615
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.h18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h41
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c72
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c9
-rw-r--r--drivers/gpu/drm/radeon/Makefile3
-rw-r--r--drivers/gpu/drm/radeon/cik.c14
-rw-r--r--drivers/gpu/drm/radeon/cikd.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon.h3
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.c901
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.h47
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c7
25 files changed, 449 insertions, 1541 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index e13c67c8d2c0..bc5a2945bd2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@
4 4
5config HSA_AMD 5config HSA_AMD
6 tristate "HSA kernel driver for AMD GPU devices" 6 tristate "HSA kernel driver for AMD GPU devices"
7 depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 7 depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
8 help 8 help
9 Enable this if you want to use HSA features on AMD GPU devices. 9 Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 211fc48697fa..3d5ccb3755d4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
36 /* Do not process in ISR, just request it to be forwarded to WQ. */ 36 /* Do not process in ISR, just request it to be forwarded to WQ. */
37 return (pasid != 0) && 37 return (pasid != 0) &&
38 (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || 38 (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
39 ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
39 ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || 40 ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
40 ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); 41 ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
41} 42}
@@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
46 unsigned int pasid; 47 unsigned int pasid;
47 const struct cik_ih_ring_entry *ihre = 48 const struct cik_ih_ring_entry *ihre =
48 (const struct cik_ih_ring_entry *)ih_ring_entry; 49 (const struct cik_ih_ring_entry *)ih_ring_entry;
50 uint32_t context_id = ihre->data & 0xfffffff;
49 51
50 pasid = (ihre->ring_id & 0xffff0000) >> 16; 52 pasid = (ihre->ring_id & 0xffff0000) >> 16;
51 53
@@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
53 return; 55 return;
54 56
55 if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) 57 if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
56 kfd_signal_event_interrupt(pasid, 0, 0); 58 kfd_signal_event_interrupt(pasid, context_id, 28);
59 else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)
60 kfd_signal_event_interrupt(pasid, context_id, 28);
57 else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) 61 else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
58 kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); 62 kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
59 else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) 63 else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
60 kfd_signal_hw_exception_event(pasid); 64 kfd_signal_hw_exception_event(pasid);
61} 65}
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
index 79a16d24c1b8..109298b9d507 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -32,9 +32,10 @@ struct cik_ih_ring_entry {
32 uint32_t reserved; 32 uint32_t reserved;
33}; 33};
34 34
35#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
36#define CIK_INTSRC_CP_END_OF_PIPE 0xB5 35#define CIK_INTSRC_CP_END_OF_PIPE 0xB5
37#define CIK_INTSRC_CP_BAD_OPCODE 0xB7 36#define CIK_INTSRC_CP_BAD_OPCODE 0xB7
37#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
38#define CIK_INTSRC_SDMA_TRAP 0xE0
38#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF 39#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
39 40
40#endif 41#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 0ef82b229754..505d39156acd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
450 return -EINVAL; 450 return -EINVAL;
451 } 451 }
452 452
453 mutex_lock(kfd_get_dbgmgr_mutex());
454 mutex_lock(&p->mutex); 453 mutex_lock(&p->mutex);
454 mutex_lock(kfd_get_dbgmgr_mutex());
455 455
456 /* 456 /*
457 * make sure that we have pdd, if this the first queue created for 457 * make sure that we have pdd, if this the first queue created for
@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
479 } 479 }
480 480
481out: 481out:
482 mutex_unlock(&p->mutex);
483 mutex_unlock(kfd_get_dbgmgr_mutex()); 482 mutex_unlock(kfd_get_dbgmgr_mutex());
483 mutex_unlock(&p->mutex);
484 484
485 return status; 485 return status;
486} 486}
@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
835 void *data) 835 void *data)
836{ 836{
837 struct kfd_ioctl_wait_events_args *args = data; 837 struct kfd_ioctl_wait_events_args *args = data;
838 enum kfd_event_wait_result wait_result;
839 int err; 838 int err;
840 839
841 err = kfd_wait_on_events(p, args->num_events, 840 err = kfd_wait_on_events(p, args->num_events,
842 (void __user *)args->events_ptr, 841 (void __user *)args->events_ptr,
843 (args->wait_for_all != 0), 842 (args->wait_for_all != 0),
844 args->timeout, &wait_result); 843 args->timeout, &args->wait_result);
845
846 args->wait_result = wait_result;
847 844
848 return err; 845 return err;
849} 846}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 46049f005b02..621a3b53a038 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
403 if (kfd->interrupts_active 403 if (kfd->interrupts_active
404 && interrupt_is_wanted(kfd, ih_ring_entry) 404 && interrupt_is_wanted(kfd, ih_ring_entry)
405 && enqueue_ih_ring_entry(kfd, ih_ring_entry)) 405 && enqueue_ih_ring_entry(kfd, ih_ring_entry))
406 schedule_work(&kfd->interrupt_work); 406 queue_work(kfd->ih_wq, &kfd->interrupt_work);
407 407
408 spin_unlock(&kfd->interrupt_lock); 408 spin_unlock(&kfd->interrupt_lock);
409} 409}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index da3b74315acf..e202921c150e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
389 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { 389 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
390 retval = unmap_queues_cpsch(dqm, 390 retval = unmap_queues_cpsch(dqm,
391 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 391 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
392 if (retval != 0) { 392 if (retval) {
393 pr_err("unmap queue failed\n"); 393 pr_err("unmap queue failed\n");
394 goto out_unlock; 394 goto out_unlock;
395 } 395 }
396 } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && 396 } else if (prev_active &&
397 prev_active &&
398 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 397 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
399 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 398 q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
400 retval = mqd->destroy_mqd(mqd, q->mqd, 399 retval = mqd->destroy_mqd(mqd, q->mqd,
@@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
408 407
409 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 408 retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
410 409
411 if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
412 retval = map_queues_cpsch(dqm);
413 else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
414 q->properties.is_active &&
415 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
416 q->properties.type == KFD_QUEUE_TYPE_SDMA))
417 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
418 &q->properties, q->process->mm);
419
420 /* 410 /*
421 * check active state vs. the previous state 411 * check active state vs. the previous state and modify
422 * and modify counter accordingly 412 * counter accordingly. map_queues_cpsch uses the
413 * dqm->queue_count to determine whether a new runlist must be
414 * uploaded.
423 */ 415 */
424 if (q->properties.is_active && !prev_active) 416 if (q->properties.is_active && !prev_active)
425 dqm->queue_count++; 417 dqm->queue_count++;
426 else if (!q->properties.is_active && prev_active) 418 else if (!q->properties.is_active && prev_active)
427 dqm->queue_count--; 419 dqm->queue_count--;
428 420
421 if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
422 retval = map_queues_cpsch(dqm);
423 else if (q->properties.is_active &&
424 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
425 q->properties.type == KFD_QUEUE_TYPE_SDMA))
426 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
427 &q->properties, q->process->mm);
428
429out_unlock: 429out_unlock:
430 mutex_unlock(&dqm->lock); 430 mutex_unlock(&dqm->lock);
431 return retval; 431 return retval;
@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm,
467 mutex_lock(&dqm->lock); 467 mutex_lock(&dqm->lock);
468 list_add(&n->list, &dqm->queues); 468 list_add(&n->list, &dqm->queues);
469 469
470 retval = dqm->ops_asic_specific.register_process(dqm, qpd); 470 retval = dqm->asic_ops.update_qpd(dqm, qpd);
471 471
472 dqm->processes_count++; 472 dqm->processes_count++;
473 473
@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
629 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 629 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
630 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 630 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
631 631
632 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 632 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
633 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 633 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
634 &q->gart_mqd_addr, &q->properties); 634 &q->gart_mqd_addr, &q->properties);
635 if (retval) 635 if (retval)
@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
696 696
697static int initialize_cpsch(struct device_queue_manager *dqm) 697static int initialize_cpsch(struct device_queue_manager *dqm)
698{ 698{
699 int retval;
700
701 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 699 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
702 700
703 mutex_init(&dqm->lock); 701 mutex_init(&dqm->lock);
@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
706 dqm->sdma_queue_count = 0; 704 dqm->sdma_queue_count = 0;
707 dqm->active_runlist = false; 705 dqm->active_runlist = false;
708 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 706 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
709 retval = dqm->ops_asic_specific.initialize(dqm);
710 if (retval)
711 mutex_destroy(&dqm->lock);
712 707
713 return retval; 708 return 0;
714} 709}
715 710
716static int start_cpsch(struct device_queue_manager *dqm) 711static int start_cpsch(struct device_queue_manager *dqm)
@@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
835 830
836 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 831 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
837 retval = allocate_sdma_queue(dqm, &q->sdma_id); 832 retval = allocate_sdma_queue(dqm, &q->sdma_id);
838 if (retval != 0) 833 if (retval)
839 goto out; 834 goto out;
840 q->properties.sdma_queue_id = 835 q->properties.sdma_queue_id =
841 q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 836 q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
850 goto out; 845 goto out;
851 } 846 }
852 847
853 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 848 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
854 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 849 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
855 &q->gart_mqd_addr, &q->properties); 850 &q->gart_mqd_addr, &q->properties);
856 if (retval) 851 if (retval)
@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1095 qpd->sh_mem_ape1_limit = limit >> 16; 1090 qpd->sh_mem_ape1_limit = limit >> 16;
1096 } 1091 }
1097 1092
1098 retval = dqm->ops_asic_specific.set_cache_memory_policy( 1093 retval = dqm->asic_ops.set_cache_memory_policy(
1099 dqm, 1094 dqm,
1100 qpd, 1095 qpd,
1101 default_policy, 1096 default_policy,
@@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1270 1265
1271 switch (dev->device_info->asic_family) { 1266 switch (dev->device_info->asic_family) {
1272 case CHIP_CARRIZO: 1267 case CHIP_CARRIZO:
1273 device_queue_manager_init_vi(&dqm->ops_asic_specific); 1268 device_queue_manager_init_vi(&dqm->asic_ops);
1274 break; 1269 break;
1275 1270
1276 case CHIP_KAVERI: 1271 case CHIP_KAVERI:
1277 device_queue_manager_init_cik(&dqm->ops_asic_specific); 1272 device_queue_manager_init_cik(&dqm->asic_ops);
1278 break; 1273 break;
1279 default: 1274 default:
1280 WARN(1, "Unexpected ASIC family %u", 1275 WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 31c2b1f9d320..5b77cb69f732 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -128,9 +128,8 @@ struct device_queue_manager_ops {
128}; 128};
129 129
130struct device_queue_manager_asic_ops { 130struct device_queue_manager_asic_ops {
131 int (*register_process)(struct device_queue_manager *dqm, 131 int (*update_qpd)(struct device_queue_manager *dqm,
132 struct qcm_process_device *qpd); 132 struct qcm_process_device *qpd);
133 int (*initialize)(struct device_queue_manager *dqm);
134 bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, 133 bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
135 struct qcm_process_device *qpd, 134 struct qcm_process_device *qpd,
136 enum cache_policy default_policy, 135 enum cache_policy default_policy,
@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops {
156 155
157struct device_queue_manager { 156struct device_queue_manager {
158 struct device_queue_manager_ops ops; 157 struct device_queue_manager_ops ops;
159 struct device_queue_manager_asic_ops ops_asic_specific; 158 struct device_queue_manager_asic_ops asic_ops;
160 159
161 struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; 160 struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
162 struct packet_manager packets; 161 struct packet_manager packets;
@@ -179,8 +178,10 @@ struct device_queue_manager {
179 bool active_runlist; 178 bool active_runlist;
180}; 179};
181 180
182void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); 181void device_queue_manager_init_cik(
183void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); 182 struct device_queue_manager_asic_ops *asic_ops);
183void device_queue_manager_init_vi(
184 struct device_queue_manager_asic_ops *asic_ops);
184void program_sh_mem_settings(struct device_queue_manager *dqm, 185void program_sh_mem_settings(struct device_queue_manager *dqm,
185 struct qcm_process_device *qpd); 186 struct qcm_process_device *qpd);
186unsigned int get_queues_num(struct device_queue_manager *dqm); 187unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 72c3cbabc0a7..28e48c90c596 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
32 enum cache_policy alternate_policy, 32 enum cache_policy alternate_policy,
33 void __user *alternate_aperture_base, 33 void __user *alternate_aperture_base,
34 uint64_t alternate_aperture_size); 34 uint64_t alternate_aperture_size);
35static int register_process_cik(struct device_queue_manager *dqm, 35static int update_qpd_cik(struct device_queue_manager *dqm,
36 struct qcm_process_device *qpd); 36 struct qcm_process_device *qpd);
37static int initialize_cpsch_cik(struct device_queue_manager *dqm);
38static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 37static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
39 struct qcm_process_device *qpd); 38 struct qcm_process_device *qpd);
40 39
41void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) 40void device_queue_manager_init_cik(
41 struct device_queue_manager_asic_ops *asic_ops)
42{ 42{
43 ops->set_cache_memory_policy = set_cache_memory_policy_cik; 43 asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
44 ops->register_process = register_process_cik; 44 asic_ops->update_qpd = update_qpd_cik;
45 ops->initialize = initialize_cpsch_cik; 45 asic_ops->init_sdma_vm = init_sdma_vm;
46 ops->init_sdma_vm = init_sdma_vm;
47} 46}
48 47
49static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 48static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
99 return true; 98 return true;
100} 99}
101 100
102static int register_process_cik(struct device_queue_manager *dqm, 101static int update_qpd_cik(struct device_queue_manager *dqm,
103 struct qcm_process_device *qpd) 102 struct qcm_process_device *qpd)
104{ 103{
105 struct kfd_process_device *pdd; 104 struct kfd_process_device *pdd;
@@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
148 147
149 q->properties.sdma_vm_addr = value; 148 q->properties.sdma_vm_addr = value;
150} 149}
151
152static int initialize_cpsch_cik(struct device_queue_manager *dqm)
153{
154 return 0;
155}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 40e9ddd096cd..2fbce57a2f21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
33 enum cache_policy alternate_policy, 33 enum cache_policy alternate_policy,
34 void __user *alternate_aperture_base, 34 void __user *alternate_aperture_base,
35 uint64_t alternate_aperture_size); 35 uint64_t alternate_aperture_size);
36static int register_process_vi(struct device_queue_manager *dqm, 36static int update_qpd_vi(struct device_queue_manager *dqm,
37 struct qcm_process_device *qpd); 37 struct qcm_process_device *qpd);
38static int initialize_cpsch_vi(struct device_queue_manager *dqm);
39static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, 38static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
40 struct qcm_process_device *qpd); 39 struct qcm_process_device *qpd);
41 40
42void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) 41void device_queue_manager_init_vi(
42 struct device_queue_manager_asic_ops *asic_ops)
43{ 43{
44 ops->set_cache_memory_policy = set_cache_memory_policy_vi; 44 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
45 ops->register_process = register_process_vi; 45 asic_ops->update_qpd = update_qpd_vi;
46 ops->initialize = initialize_cpsch_vi; 46 asic_ops->init_sdma_vm = init_sdma_vm;
47 ops->init_sdma_vm = init_sdma_vm;
48} 47}
49 48
50static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 49static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
104 return true; 103 return true;
105} 104}
106 105
107static int register_process_vi(struct device_queue_manager *dqm, 106static int update_qpd_vi(struct device_queue_manager *dqm,
108 struct qcm_process_device *qpd) 107 struct qcm_process_device *qpd)
109{ 108{
110 struct kfd_process_device *pdd; 109 struct kfd_process_device *pdd;
@@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
160 159
161 q->properties.sdma_vm_addr = value; 160 q->properties.sdma_vm_addr = value;
162} 161}
163
164static int initialize_cpsch_vi(struct device_queue_manager *dqm)
165{
166 return 0;
167}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 944abfad39c1..cb92d4b72400 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -24,8 +24,8 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/sched/signal.h> 26#include <linux/sched/signal.h>
27#include <linux/sched/mm.h>
27#include <linux/uaccess.h> 28#include <linux/uaccess.h>
28#include <linux/mm.h>
29#include <linux/mman.h> 29#include <linux/mman.h>
30#include <linux/memory.h> 30#include <linux/memory.h>
31#include "kfd_priv.h" 31#include "kfd_priv.h"
@@ -33,185 +33,89 @@
33#include <linux/device.h> 33#include <linux/device.h>
34 34
35/* 35/*
36 * A task can only be on a single wait_queue at a time, but we need to support 36 * Wrapper around wait_queue_entry_t
37 * waiting on multiple events (any/all).
38 * Instead of each event simply having a wait_queue with sleeping tasks, it
39 * has a singly-linked list of tasks.
40 * A thread that wants to sleep creates an array of these, one for each event
41 * and adds one to each event's waiter chain.
42 */ 37 */
43struct kfd_event_waiter { 38struct kfd_event_waiter {
44 struct list_head waiters; 39 wait_queue_entry_t wait;
45 struct task_struct *sleeping_task; 40 struct kfd_event *event; /* Event to wait for */
46 41 bool activated; /* Becomes true when event is signaled */
47 /* Transitions to true when the event this belongs to is signaled. */
48 bool activated;
49
50 /* Event */
51 struct kfd_event *event;
52 uint32_t input_index;
53}; 42};
54 43
55/* 44/*
56 * Over-complicated pooled allocator for event notification slots.
57 *
58 * Each signal event needs a 64-bit signal slot where the signaler will write 45 * Each signal event needs a 64-bit signal slot where the signaler will write
59 * a 1 before sending an interrupt.l (This is needed because some interrupts 46 * a 1 before sending an interrupt. (This is needed because some interrupts
60 * do not contain enough spare data bits to identify an event.) 47 * do not contain enough spare data bits to identify an event.)
61 * We get whole pages from vmalloc and map them to the process VA. 48 * We get whole pages and map them to the process VA.
62 * Individual signal events are then allocated a slot in a page. 49 * Individual signal events use their event_id as slot index.
63 */ 50 */
64 51struct kfd_signal_page {
65struct signal_page {
66 struct list_head event_pages; /* kfd_process.signal_event_pages */
67 uint64_t *kernel_address; 52 uint64_t *kernel_address;
68 uint64_t __user *user_address; 53 uint64_t __user *user_address;
69 uint32_t page_index; /* Index into the mmap aperture. */
70 unsigned int free_slots;
71 unsigned long used_slot_bitmap[0];
72}; 54};
73 55
74#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
75#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE)
76#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1)
77#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \
78 SLOT_BITMAP_SIZE * sizeof(long))
79
80/*
81 * For signal events, the event ID is used as the interrupt user data.
82 * For SQ s_sendmsg interrupts, this is limited to 8 bits.
83 */
84
85#define INTERRUPT_DATA_BITS 8
86#define SIGNAL_EVENT_ID_SLOT_SHIFT 0
87 56
88static uint64_t *page_slots(struct signal_page *page) 57static uint64_t *page_slots(struct kfd_signal_page *page)
89{ 58{
90 return page->kernel_address; 59 return page->kernel_address;
91} 60}
92 61
93static bool allocate_free_slot(struct kfd_process *process, 62static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
94 struct signal_page **out_page,
95 unsigned int *out_slot_index)
96{
97 struct signal_page *page;
98
99 list_for_each_entry(page, &process->signal_event_pages, event_pages) {
100 if (page->free_slots > 0) {
101 unsigned int slot =
102 find_first_zero_bit(page->used_slot_bitmap,
103 SLOTS_PER_PAGE);
104
105 __set_bit(slot, page->used_slot_bitmap);
106 page->free_slots--;
107
108 page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
109
110 *out_page = page;
111 *out_slot_index = slot;
112
113 pr_debug("Allocated event signal slot in page %p, slot %d\n",
114 page, slot);
115
116 return true;
117 }
118 }
119
120 pr_debug("No free event signal slots were found for process %p\n",
121 process);
122
123 return false;
124}
125
126#define list_tail_entry(head, type, member) \
127 list_entry((head)->prev, type, member)
128
129static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
130{ 63{
131 void *backing_store; 64 void *backing_store;
132 struct signal_page *page; 65 struct kfd_signal_page *page;
133 66
134 page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); 67 page = kzalloc(sizeof(*page), GFP_KERNEL);
135 if (!page) 68 if (!page)
136 goto fail_alloc_signal_page; 69 return NULL;
137 70
138 page->free_slots = SLOTS_PER_PAGE; 71 backing_store = (void *) __get_free_pages(GFP_KERNEL,
139
140 backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
141 get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); 72 get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
142 if (!backing_store) 73 if (!backing_store)
143 goto fail_alloc_signal_store; 74 goto fail_alloc_signal_store;
144 75
145 /* prevent user-mode info leaks */ 76 /* Initialize all events to unsignaled */
146 memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, 77 memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
147 KFD_SIGNAL_EVENT_LIMIT * 8); 78 KFD_SIGNAL_EVENT_LIMIT * 8);
148 79
149 page->kernel_address = backing_store; 80 page->kernel_address = backing_store;
150
151 if (list_empty(&p->signal_event_pages))
152 page->page_index = 0;
153 else
154 page->page_index = list_tail_entry(&p->signal_event_pages,
155 struct signal_page,
156 event_pages)->page_index + 1;
157
158 pr_debug("Allocated new event signal page at %p, for process %p\n", 81 pr_debug("Allocated new event signal page at %p, for process %p\n",
159 page, p); 82 page, p);
160 pr_debug("Page index is %d\n", page->page_index);
161 83
162 list_add(&page->event_pages, &p->signal_event_pages); 84 return page;
163
164 return true;
165 85
166fail_alloc_signal_store: 86fail_alloc_signal_store:
167 kfree(page); 87 kfree(page);
168fail_alloc_signal_page: 88 return NULL;
169 return false;
170} 89}
171 90
172static bool allocate_event_notification_slot(struct file *devkfd, 91static int allocate_event_notification_slot(struct kfd_process *p,
173 struct kfd_process *p, 92 struct kfd_event *ev)
174 struct signal_page **page,
175 unsigned int *signal_slot_index)
176{ 93{
177 bool ret; 94 int id;
178 95
179 ret = allocate_free_slot(p, page, signal_slot_index); 96 if (!p->signal_page) {
180 if (!ret) { 97 p->signal_page = allocate_signal_page(p);
181 ret = allocate_signal_page(devkfd, p); 98 if (!p->signal_page)
182 if (ret) 99 return -ENOMEM;
183 ret = allocate_free_slot(p, page, signal_slot_index); 100 /* Oldest user mode expects 256 event slots */
101 p->signal_mapped_size = 256*8;
184 } 102 }
185 103
186 return ret;
187}
188
189/* Assumes that the process's event_mutex is locked. */
190static void release_event_notification_slot(struct signal_page *page,
191 size_t slot_index)
192{
193 __clear_bit(slot_index, page->used_slot_bitmap);
194 page->free_slots++;
195
196 /* We don't free signal pages, they are retained by the process
197 * and reused until it exits.
198 */
199}
200
201static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
202 unsigned int page_index)
203{
204 struct signal_page *page;
205
206 /* 104 /*
207 * This is safe because we don't delete signal pages until the 105 * Compatibility with old user mode: Only use signal slots
208 * process exits. 106 * user mode has mapped, may be less than
107 * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
108 * of the event limit without breaking user mode.
209 */ 109 */
210 list_for_each_entry(page, &p->signal_event_pages, event_pages) 110 id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
211 if (page->page_index == page_index) 111 GFP_KERNEL);
212 return page; 112 if (id < 0)
113 return id;
213 114
214 return NULL; 115 ev->event_id = id;
116 page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
117
118 return 0;
215} 119}
216 120
217/* 121/*
@@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
220 */ 124 */
221static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) 125static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
222{ 126{
223 struct kfd_event *ev; 127 return idr_find(&p->event_idr, id);
224
225 hash_for_each_possible(p->events, ev, events, id)
226 if (ev->event_id == id)
227 return ev;
228
229 return NULL;
230} 128}
231 129
232static u32 make_signal_event_id(struct signal_page *page, 130/**
233 unsigned int signal_slot_index) 131 * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID
234{ 132 * @p: Pointer to struct kfd_process
235 return page->page_index | 133 * @id: ID to look up
236 (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); 134 * @bits: Number of valid bits in @id
237} 135 *
238 136 * Finds the first signaled event with a matching partial ID. If no
239/* 137 * matching signaled event is found, returns NULL. In that case the
240 * Produce a kfd event id for a nonsignal event. 138 * caller should assume that the partial ID is invalid and do an
241 * These are arbitrary numbers, so we do a sequential search through 139 * exhaustive search of all siglaned events.
242 * the hash table for an unused number. 140 *
141 * If multiple events with the same partial ID signal at the same
142 * time, they will be found one interrupt at a time, not necessarily
143 * in the same order the interrupts occurred. As long as the number of
144 * interrupts is correct, all signaled events will be seen by the
145 * driver.
243 */ 146 */
244static u32 make_nonsignal_event_id(struct kfd_process *p) 147static struct kfd_event *lookup_signaled_event_by_partial_id(
148 struct kfd_process *p, uint32_t id, uint32_t bits)
245{ 149{
246 u32 id; 150 struct kfd_event *ev;
247
248 for (id = p->next_nonsignal_event_id;
249 id < KFD_LAST_NONSIGNAL_EVENT_ID &&
250 lookup_event_by_id(p, id);
251 id++)
252 ;
253 151
254 if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { 152 if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT)
153 return NULL;
255 154
256 /* 155 /* Fast path for the common case that @id is not a partial ID
257 * What if id == LAST_NONSIGNAL_EVENT_ID - 1? 156 * and we only need a single lookup.
258 * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so 157 */
259 * the first loop fails immediately and we proceed with the 158 if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) {
260 * wraparound loop below. 159 if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
261 */ 160 return NULL;
262 p->next_nonsignal_event_id = id + 1;
263 161
264 return id; 162 return idr_find(&p->event_idr, id);
265 } 163 }
266 164
267 for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; 165 /* General case for partial IDs: Iterate over all matching IDs
268 id < KFD_LAST_NONSIGNAL_EVENT_ID && 166 * and find the first one that has signaled.
269 lookup_event_by_id(p, id); 167 */
270 id++) 168 for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) {
271 ; 169 if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
272 170 continue;
273 171
274 if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { 172 ev = idr_find(&p->event_idr, id);
275 p->next_nonsignal_event_id = id + 1;
276 return id;
277 } 173 }
278 174
279 p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; 175 return ev;
280 return 0;
281}
282
283static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
284 struct signal_page *page,
285 unsigned int signal_slot)
286{
287 return lookup_event_by_id(p, make_signal_event_id(page, signal_slot));
288} 176}
289 177
290static int create_signal_event(struct file *devkfd, 178static int create_signal_event(struct file *devkfd,
291 struct kfd_process *p, 179 struct kfd_process *p,
292 struct kfd_event *ev) 180 struct kfd_event *ev)
293{ 181{
294 if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { 182 int ret;
183
184 if (p->signal_mapped_size &&
185 p->signal_event_count == p->signal_mapped_size / 8) {
295 if (!p->signal_event_limit_reached) { 186 if (!p->signal_event_limit_reached) {
296 pr_warn("Signal event wasn't created because limit was reached\n"); 187 pr_warn("Signal event wasn't created because limit was reached\n");
297 p->signal_event_limit_reached = true; 188 p->signal_event_limit_reached = true;
298 } 189 }
299 return -ENOMEM; 190 return -ENOSPC;
300 } 191 }
301 192
302 if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, 193 ret = allocate_event_notification_slot(p, ev);
303 &ev->signal_slot_index)) { 194 if (ret) {
304 pr_warn("Signal event wasn't created because out of kernel memory\n"); 195 pr_warn("Signal event wasn't created because out of kernel memory\n");
305 return -ENOMEM; 196 return ret;
306 } 197 }
307 198
308 p->signal_event_count++; 199 p->signal_event_count++;
309 200
310 ev->user_signal_address = 201 ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
311 &ev->signal_page->user_address[ev->signal_slot_index];
312
313 ev->event_id = make_signal_event_id(ev->signal_page,
314 ev->signal_slot_index);
315
316 pr_debug("Signal event number %zu created with id %d, address %p\n", 202 pr_debug("Signal event number %zu created with id %d, address %p\n",
317 p->signal_event_count, ev->event_id, 203 p->signal_event_count, ev->event_id,
318 ev->user_signal_address); 204 ev->user_signal_address);
@@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd,
320 return 0; 206 return 0;
321} 207}
322 208
323/*
324 * No non-signal events are supported yet.
325 * We create them as events that never signal.
326 * Set event calls from user-mode are failed.
327 */
328static int create_other_event(struct kfd_process *p, struct kfd_event *ev) 209static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
329{ 210{
330 ev->event_id = make_nonsignal_event_id(p); 211 /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
331 if (ev->event_id == 0) 212 * intentional integer overflow to -1 without a compiler
332 return -ENOMEM; 213 * warning. idr_alloc treats a negative value as "maximum
214 * signed integer".
215 */
216 int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
217 (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
218 GFP_KERNEL);
219
220 if (id < 0)
221 return id;
222 ev->event_id = id;
333 223
334 return 0; 224 return 0;
335} 225}
@@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
337void kfd_event_init_process(struct kfd_process *p) 227void kfd_event_init_process(struct kfd_process *p)
338{ 228{
339 mutex_init(&p->event_mutex); 229 mutex_init(&p->event_mutex);
340 hash_init(p->events); 230 idr_init(&p->event_idr);
341 INIT_LIST_HEAD(&p->signal_event_pages); 231 p->signal_page = NULL;
342 p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
343 p->signal_event_count = 0; 232 p->signal_event_count = 0;
344} 233}
345 234
346static void destroy_event(struct kfd_process *p, struct kfd_event *ev) 235static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
347{ 236{
348 if (ev->signal_page) { 237 struct kfd_event_waiter *waiter;
349 release_event_notification_slot(ev->signal_page,
350 ev->signal_slot_index);
351 p->signal_event_count--;
352 }
353 238
354 /* 239 /* Wake up pending waiters. They will return failure */
355 * Abandon the list of waiters. Individual waiting threads will 240 list_for_each_entry(waiter, &ev->wq.head, wait.entry)
356 * clean up their own data. 241 waiter->event = NULL;
357 */ 242 wake_up_all(&ev->wq);
358 list_del(&ev->waiters); 243
244 if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
245 ev->type == KFD_EVENT_TYPE_DEBUG)
246 p->signal_event_count--;
359 247
360 hash_del(&ev->events); 248 idr_remove(&p->event_idr, ev->event_id);
361 kfree(ev); 249 kfree(ev);
362} 250}
363 251
364static void destroy_events(struct kfd_process *p) 252static void destroy_events(struct kfd_process *p)
365{ 253{
366 struct kfd_event *ev; 254 struct kfd_event *ev;
367 struct hlist_node *tmp; 255 uint32_t id;
368 unsigned int hash_bkt;
369 256
370 hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) 257 idr_for_each_entry(&p->event_idr, ev, id)
371 destroy_event(p, ev); 258 destroy_event(p, ev);
259 idr_destroy(&p->event_idr);
372} 260}
373 261
374/* 262/*
375 * We assume that the process is being destroyed and there is no need to 263 * We assume that the process is being destroyed and there is no need to
376 * unmap the pages or keep bookkeeping data in order. 264 * unmap the pages or keep bookkeeping data in order.
377 */ 265 */
378static void shutdown_signal_pages(struct kfd_process *p) 266static void shutdown_signal_page(struct kfd_process *p)
379{ 267{
380 struct signal_page *page, *tmp; 268 struct kfd_signal_page *page = p->signal_page;
381 269
382 list_for_each_entry_safe(page, tmp, &p->signal_event_pages, 270 if (page) {
383 event_pages) {
384 free_pages((unsigned long)page->kernel_address, 271 free_pages((unsigned long)page->kernel_address,
385 get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); 272 get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
386 kfree(page); 273 kfree(page);
@@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p)
390void kfd_event_free_process(struct kfd_process *p) 277void kfd_event_free_process(struct kfd_process *p)
391{ 278{
392 destroy_events(p); 279 destroy_events(p);
393 shutdown_signal_pages(p); 280 shutdown_signal_page(p);
394} 281}
395 282
396static bool event_can_be_gpu_signaled(const struct kfd_event *ev) 283static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
@@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
419 ev->auto_reset = auto_reset; 306 ev->auto_reset = auto_reset;
420 ev->signaled = false; 307 ev->signaled = false;
421 308
422 INIT_LIST_HEAD(&ev->waiters); 309 init_waitqueue_head(&ev->wq);
423 310
424 *event_page_offset = 0; 311 *event_page_offset = 0;
425 312
@@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
430 case KFD_EVENT_TYPE_DEBUG: 317 case KFD_EVENT_TYPE_DEBUG:
431 ret = create_signal_event(devkfd, p, ev); 318 ret = create_signal_event(devkfd, p, ev);
432 if (!ret) { 319 if (!ret) {
433 *event_page_offset = (ev->signal_page->page_index | 320 *event_page_offset = KFD_MMAP_EVENTS_MASK;
434 KFD_MMAP_EVENTS_MASK);
435 *event_page_offset <<= PAGE_SHIFT; 321 *event_page_offset <<= PAGE_SHIFT;
436 *event_slot_index = ev->signal_slot_index; 322 *event_slot_index = ev->event_id;
437 } 323 }
438 break; 324 break;
439 default: 325 default:
@@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
442 } 328 }
443 329
444 if (!ret) { 330 if (!ret) {
445 hash_add(p->events, &ev->events, ev->event_id);
446
447 *event_id = ev->event_id; 331 *event_id = ev->event_id;
448 *event_trigger_data = ev->event_id; 332 *event_trigger_data = ev->event_id;
449 } else { 333 } else {
@@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
477static void set_event(struct kfd_event *ev) 361static void set_event(struct kfd_event *ev)
478{ 362{
479 struct kfd_event_waiter *waiter; 363 struct kfd_event_waiter *waiter;
480 struct kfd_event_waiter *next;
481 364
482 /* Auto reset if the list is non-empty and we're waking someone. */ 365 /* Auto reset if the list is non-empty and we're waking
483 ev->signaled = !ev->auto_reset || list_empty(&ev->waiters); 366 * someone. waitqueue_active is safe here because we're
367 * protected by the p->event_mutex, which is also held when
368 * updating the wait queues in kfd_wait_on_events.
369 */
370 ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
484 371
485 list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) { 372 list_for_each_entry(waiter, &ev->wq.head, wait.entry)
486 waiter->activated = true; 373 waiter->activated = true;
487 374
488 /* _init because free_waiters will call list_del */ 375 wake_up_all(&ev->wq);
489 list_del_init(&waiter->waiters);
490
491 wake_up_process(waiter->sleeping_task);
492 }
493} 376}
494 377
495/* Assumes that p is current. */ 378/* Assumes that p is current. */
@@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
538 421
539static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) 422static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
540{ 423{
541 page_slots(ev->signal_page)[ev->signal_slot_index] = 424 page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
542 UNSIGNALED_EVENT_SLOT;
543}
544
545static bool is_slot_signaled(struct signal_page *page, unsigned int index)
546{
547 return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
548} 425}
549 426
550static void set_event_from_interrupt(struct kfd_process *p, 427static void set_event_from_interrupt(struct kfd_process *p,
@@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p,
559void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, 436void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
560 uint32_t valid_id_bits) 437 uint32_t valid_id_bits)
561{ 438{
562 struct kfd_event *ev; 439 struct kfd_event *ev = NULL;
563 440
564 /* 441 /*
565 * Because we are called from arbitrary context (workqueue) as opposed 442 * Because we are called from arbitrary context (workqueue) as opposed
@@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
573 450
574 mutex_lock(&p->event_mutex); 451 mutex_lock(&p->event_mutex);
575 452
576 if (valid_id_bits >= INTERRUPT_DATA_BITS) { 453 if (valid_id_bits)
577 /* Partial ID is a full ID. */ 454 ev = lookup_signaled_event_by_partial_id(p, partial_id,
578 ev = lookup_event_by_id(p, partial_id); 455 valid_id_bits);
456 if (ev) {
579 set_event_from_interrupt(p, ev); 457 set_event_from_interrupt(p, ev);
580 } else { 458 } else if (p->signal_page) {
581 /* 459 /*
582 * Partial ID is in fact partial. For now we completely 460 * Partial ID lookup failed. Assume that the event ID
583 * ignore it, but we could use any bits we did receive to 461 * in the interrupt payload was invalid and do an
584 * search faster. 462 * exhaustive search of signaled events.
585 */ 463 */
586 struct signal_page *page; 464 uint64_t *slots = page_slots(p->signal_page);
587 unsigned int i; 465 uint32_t id;
588 466
589 list_for_each_entry(page, &p->signal_event_pages, event_pages) 467 if (valid_id_bits)
590 for (i = 0; i < SLOTS_PER_PAGE; i++) 468 pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
591 if (is_slot_signaled(page, i)) { 469 partial_id, valid_id_bits);
592 ev = lookup_event_by_page_slot(p, 470
593 page, i); 471 if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
472 /* With relatively few events, it's faster to
473 * iterate over the event IDR
474 */
475 idr_for_each_entry(&p->event_idr, ev, id) {
476 if (id >= KFD_SIGNAL_EVENT_LIMIT)
477 break;
478
479 if (slots[id] != UNSIGNALED_EVENT_SLOT)
480 set_event_from_interrupt(p, ev);
481 }
482 } else {
483 /* With relatively many events, it's faster to
484 * iterate over the signal slots and lookup
485 * only signaled events from the IDR.
486 */
487 for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
488 if (slots[id] != UNSIGNALED_EVENT_SLOT) {
489 ev = lookup_event_by_id(p, id);
594 set_event_from_interrupt(p, ev); 490 set_event_from_interrupt(p, ev);
595 } 491 }
492 }
596 } 493 }
597 494
598 mutex_unlock(&p->event_mutex); 495 mutex_unlock(&p->event_mutex);
@@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
609 GFP_KERNEL); 506 GFP_KERNEL);
610 507
611 for (i = 0; (event_waiters) && (i < num_events) ; i++) { 508 for (i = 0; (event_waiters) && (i < num_events) ; i++) {
612 INIT_LIST_HEAD(&event_waiters[i].waiters); 509 init_wait(&event_waiters[i].wait);
613 event_waiters[i].sleeping_task = current;
614 event_waiters[i].activated = false; 510 event_waiters[i].activated = false;
615 } 511 }
616 512
617 return event_waiters; 513 return event_waiters;
618} 514}
619 515
620static int init_event_waiter(struct kfd_process *p, 516static int init_event_waiter_get_status(struct kfd_process *p,
621 struct kfd_event_waiter *waiter, 517 struct kfd_event_waiter *waiter,
622 uint32_t event_id, 518 uint32_t event_id)
623 uint32_t input_index)
624{ 519{
625 struct kfd_event *ev = lookup_event_by_id(p, event_id); 520 struct kfd_event *ev = lookup_event_by_id(p, event_id);
626 521
@@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p,
628 return -EINVAL; 523 return -EINVAL;
629 524
630 waiter->event = ev; 525 waiter->event = ev;
631 waiter->input_index = input_index;
632 waiter->activated = ev->signaled; 526 waiter->activated = ev->signaled;
633 ev->signaled = ev->signaled && !ev->auto_reset; 527 ev->signaled = ev->signaled && !ev->auto_reset;
634 528
635 list_add(&waiter->waiters, &ev->waiters);
636
637 return 0; 529 return 0;
638} 530}
639 531
640static bool test_event_condition(bool all, uint32_t num_events, 532static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
533{
534 struct kfd_event *ev = waiter->event;
535
536 /* Only add to the wait list if we actually need to
537 * wait on this event.
538 */
539 if (!waiter->activated)
540 add_wait_queue(&ev->wq, &waiter->wait);
541}
542
543/* test_event_condition - Test condition of events being waited for
544 * @all: Return completion only if all events have signaled
545 * @num_events: Number of events to wait for
546 * @event_waiters: Array of event waiters, one per event
547 *
548 * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have
549 * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all)
550 * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of
551 * the events have been destroyed.
552 */
553static uint32_t test_event_condition(bool all, uint32_t num_events,
641 struct kfd_event_waiter *event_waiters) 554 struct kfd_event_waiter *event_waiters)
642{ 555{
643 uint32_t i; 556 uint32_t i;
644 uint32_t activated_count = 0; 557 uint32_t activated_count = 0;
645 558
646 for (i = 0; i < num_events; i++) { 559 for (i = 0; i < num_events; i++) {
560 if (!event_waiters[i].event)
561 return KFD_IOC_WAIT_RESULT_FAIL;
562
647 if (event_waiters[i].activated) { 563 if (event_waiters[i].activated) {
648 if (!all) 564 if (!all)
649 return true; 565 return KFD_IOC_WAIT_RESULT_COMPLETE;
650 566
651 activated_count++; 567 activated_count++;
652 } 568 }
653 } 569 }
654 570
655 return activated_count == num_events; 571 return activated_count == num_events ?
572 KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT;
656} 573}
657 574
658/* 575/*
659 * Copy event specific data, if defined. 576 * Copy event specific data, if defined.
660 * Currently only memory exception events have additional data to copy to user 577 * Currently only memory exception events have additional data to copy to user
661 */ 578 */
662static bool copy_signaled_event_data(uint32_t num_events, 579static int copy_signaled_event_data(uint32_t num_events,
663 struct kfd_event_waiter *event_waiters, 580 struct kfd_event_waiter *event_waiters,
664 struct kfd_event_data __user *data) 581 struct kfd_event_data __user *data)
665{ 582{
@@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events,
673 waiter = &event_waiters[i]; 590 waiter = &event_waiters[i];
674 event = waiter->event; 591 event = waiter->event;
675 if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { 592 if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
676 dst = &data[waiter->input_index].memory_exception_data; 593 dst = &data[i].memory_exception_data;
677 src = &event->memory_exception_data; 594 src = &event->memory_exception_data;
678 if (copy_to_user(dst, src, 595 if (copy_to_user(dst, src,
679 sizeof(struct kfd_hsa_memory_exception_data))) 596 sizeof(struct kfd_hsa_memory_exception_data)))
680 return false; 597 return -EFAULT;
681 } 598 }
682 } 599 }
683 600
684 return true; 601 return 0;
685 602
686} 603}
687 604
@@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
710 uint32_t i; 627 uint32_t i;
711 628
712 for (i = 0; i < num_events; i++) 629 for (i = 0; i < num_events; i++)
713 list_del(&waiters[i].waiters); 630 if (waiters[i].event)
631 remove_wait_queue(&waiters[i].event->wq,
632 &waiters[i].wait);
714 633
715 kfree(waiters); 634 kfree(waiters);
716} 635}
@@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
718int kfd_wait_on_events(struct kfd_process *p, 637int kfd_wait_on_events(struct kfd_process *p,
719 uint32_t num_events, void __user *data, 638 uint32_t num_events, void __user *data,
720 bool all, uint32_t user_timeout_ms, 639 bool all, uint32_t user_timeout_ms,
721 enum kfd_event_wait_result *wait_result) 640 uint32_t *wait_result)
722{ 641{
723 struct kfd_event_data __user *events = 642 struct kfd_event_data __user *events =
724 (struct kfd_event_data __user *) data; 643 (struct kfd_event_data __user *) data;
725 uint32_t i; 644 uint32_t i;
726 int ret = 0; 645 int ret = 0;
646
727 struct kfd_event_waiter *event_waiters = NULL; 647 struct kfd_event_waiter *event_waiters = NULL;
728 long timeout = user_timeout_to_jiffies(user_timeout_ms); 648 long timeout = user_timeout_to_jiffies(user_timeout_ms);
729 649
730 mutex_lock(&p->event_mutex);
731
732 event_waiters = alloc_event_waiters(num_events); 650 event_waiters = alloc_event_waiters(num_events);
733 if (!event_waiters) { 651 if (!event_waiters) {
734 ret = -ENOMEM; 652 ret = -ENOMEM;
735 goto fail; 653 goto out;
736 } 654 }
737 655
656 mutex_lock(&p->event_mutex);
657
738 for (i = 0; i < num_events; i++) { 658 for (i = 0; i < num_events; i++) {
739 struct kfd_event_data event_data; 659 struct kfd_event_data event_data;
740 660
741 if (copy_from_user(&event_data, &events[i], 661 if (copy_from_user(&event_data, &events[i],
742 sizeof(struct kfd_event_data))) { 662 sizeof(struct kfd_event_data))) {
743 ret = -EFAULT; 663 ret = -EFAULT;
744 goto fail; 664 goto out_unlock;
745 } 665 }
746 666
747 ret = init_event_waiter(p, &event_waiters[i], 667 ret = init_event_waiter_get_status(p, &event_waiters[i],
748 event_data.event_id, i); 668 event_data.event_id);
749 if (ret) 669 if (ret)
750 goto fail; 670 goto out_unlock;
751 } 671 }
752 672
673 /* Check condition once. */
674 *wait_result = test_event_condition(all, num_events, event_waiters);
675 if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) {
676 ret = copy_signaled_event_data(num_events,
677 event_waiters, events);
678 goto out_unlock;
679 } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) {
680 /* This should not happen. Events shouldn't be
681 * destroyed while we're holding the event_mutex
682 */
683 goto out_unlock;
684 }
685
686 /* Add to wait lists if we need to wait. */
687 for (i = 0; i < num_events; i++)
688 init_event_waiter_add_to_waitlist(&event_waiters[i]);
689
753 mutex_unlock(&p->event_mutex); 690 mutex_unlock(&p->event_mutex);
754 691
755 while (true) { 692 while (true) {
@@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p,
771 break; 708 break;
772 } 709 }
773 710
774 if (test_event_condition(all, num_events, event_waiters)) { 711 /* Set task state to interruptible sleep before
775 if (copy_signaled_event_data(num_events, 712 * checking wake-up conditions. A concurrent wake-up
776 event_waiters, events)) 713 * will put the task back into runnable state. In that
777 *wait_result = KFD_WAIT_COMPLETE; 714 * case schedule_timeout will not put the task to
778 else 715 * sleep and we'll get a chance to re-check the
779 *wait_result = KFD_WAIT_ERROR; 716 * updated conditions almost immediately. Otherwise,
717 * this race condition would lead to a soft hang or a
718 * very long sleep.
719 */
720 set_current_state(TASK_INTERRUPTIBLE);
721
722 *wait_result = test_event_condition(all, num_events,
723 event_waiters);
724 if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT)
780 break; 725 break;
781 }
782 726
783 if (timeout <= 0) { 727 if (timeout <= 0)
784 *wait_result = KFD_WAIT_TIMEOUT;
785 break; 728 break;
786 }
787 729
788 timeout = schedule_timeout_interruptible(timeout); 730 timeout = schedule_timeout(timeout);
789 } 731 }
790 __set_current_state(TASK_RUNNING); 732 __set_current_state(TASK_RUNNING);
791 733
734 /* copy_signaled_event_data may sleep. So this has to happen
735 * after the task state is set back to RUNNING.
736 */
737 if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
738 ret = copy_signaled_event_data(num_events,
739 event_waiters, events);
740
792 mutex_lock(&p->event_mutex); 741 mutex_lock(&p->event_mutex);
742out_unlock:
793 free_waiters(num_events, event_waiters); 743 free_waiters(num_events, event_waiters);
794 mutex_unlock(&p->event_mutex); 744 mutex_unlock(&p->event_mutex);
795 745out:
796 return ret; 746 if (ret)
797 747 *wait_result = KFD_IOC_WAIT_RESULT_FAIL;
798fail: 748 else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL)
799 if (event_waiters) 749 ret = -EIO;
800 free_waiters(num_events, event_waiters);
801
802 mutex_unlock(&p->event_mutex);
803
804 *wait_result = KFD_WAIT_ERROR;
805 750
806 return ret; 751 return ret;
807} 752}
808 753
809int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) 754int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
810{ 755{
811
812 unsigned int page_index;
813 unsigned long pfn; 756 unsigned long pfn;
814 struct signal_page *page; 757 struct kfd_signal_page *page;
758 int ret;
815 759
816 /* check required size is logical */ 760 /* check required size doesn't exceed the allocated size */
817 if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != 761 if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) <
818 get_order(vma->vm_end - vma->vm_start)) { 762 get_order(vma->vm_end - vma->vm_start)) {
819 pr_err("Event page mmap requested illegal size\n"); 763 pr_err("Event page mmap requested illegal size\n");
820 return -EINVAL; 764 return -EINVAL;
821 } 765 }
822 766
823 page_index = vma->vm_pgoff; 767 page = p->signal_page;
824
825 page = lookup_signal_page_by_index(p, page_index);
826 if (!page) { 768 if (!page) {
827 /* Probably KFD bug, but mmap is user-accessible. */ 769 /* Probably KFD bug, but mmap is user-accessible. */
828 pr_debug("Signal page could not be found for page_index %u\n", 770 pr_debug("Signal page could not be found\n");
829 page_index);
830 return -EINVAL; 771 return -EINVAL;
831 } 772 }
832 773
@@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
847 page->user_address = (uint64_t __user *)vma->vm_start; 788 page->user_address = (uint64_t __user *)vma->vm_start;
848 789
849 /* mapping the page to user process */ 790 /* mapping the page to user process */
850 return remap_pfn_range(vma, vma->vm_start, pfn, 791 ret = remap_pfn_range(vma, vma->vm_start, pfn,
851 vma->vm_end - vma->vm_start, vma->vm_page_prot); 792 vma->vm_end - vma->vm_start, vma->vm_page_prot);
793 if (!ret)
794 p->signal_mapped_size = vma->vm_end - vma->vm_start;
795
796 return ret;
852} 797}
853 798
854/* 799/*
@@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
860{ 805{
861 struct kfd_hsa_memory_exception_data *ev_data; 806 struct kfd_hsa_memory_exception_data *ev_data;
862 struct kfd_event *ev; 807 struct kfd_event *ev;
863 int bkt; 808 uint32_t id;
864 bool send_signal = true; 809 bool send_signal = true;
865 810
866 ev_data = (struct kfd_hsa_memory_exception_data *) event_data; 811 ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
867 812
868 hash_for_each(p->events, bkt, ev, events) 813 id = KFD_FIRST_NONSIGNAL_EVENT_ID;
814 idr_for_each_entry_continue(&p->event_idr, ev, id)
869 if (ev->type == type) { 815 if (ev->type == type) {
870 send_signal = false; 816 send_signal = false;
871 dev_dbg(kfd_device, 817 dev_dbg(kfd_device,
@@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
904 * running so the lookup function returns a locked process. 850 * running so the lookup function returns a locked process.
905 */ 851 */
906 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 852 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
853 struct mm_struct *mm;
907 854
908 if (!p) 855 if (!p)
909 return; /* Presumably process exited. */ 856 return; /* Presumably process exited. */
910 857
858 /* Take a safe reference to the mm_struct, which may otherwise
859 * disappear even while the kfd_process is still referenced.
860 */
861 mm = get_task_mm(p->lead_thread);
862 if (!mm) {
863 mutex_unlock(&p->mutex);
864 return; /* Process is exiting */
865 }
866
911 memset(&memory_exception_data, 0, sizeof(memory_exception_data)); 867 memset(&memory_exception_data, 0, sizeof(memory_exception_data));
912 868
913 down_read(&p->mm->mmap_sem); 869 down_read(&mm->mmap_sem);
914 vma = find_vma(p->mm, address); 870 vma = find_vma(mm, address);
915 871
916 memory_exception_data.gpu_id = dev->id; 872 memory_exception_data.gpu_id = dev->id;
917 memory_exception_data.va = address; 873 memory_exception_data.va = address;
@@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
937 } 893 }
938 } 894 }
939 895
940 up_read(&p->mm->mmap_sem); 896 up_read(&mm->mmap_sem);
897 mmput(mm);
941 898
942 mutex_lock(&p->event_mutex); 899 mutex_lock(&p->event_mutex);
943 900
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 28f6838b1f4c..abca5bfebbff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -27,12 +27,17 @@
27#include <linux/hashtable.h> 27#include <linux/hashtable.h>
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/wait.h>
30#include "kfd_priv.h" 31#include "kfd_priv.h"
31#include <uapi/linux/kfd_ioctl.h> 32#include <uapi/linux/kfd_ioctl.h>
32 33
33#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U 34/*
34#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK 35 * IDR supports non-negative integer IDs. Small IDs are used for
35#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX 36 * signal events to match their signal slot. Use the upper half of the
37 * ID space for non-signal events.
38 */
39#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
40#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
36 41
37/* 42/*
38 * Written into kfd_signal_slot_t to indicate that the event is not signaled. 43 * Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -46,9 +51,6 @@ struct kfd_event_waiter;
46struct signal_page; 51struct signal_page;
47 52
48struct kfd_event { 53struct kfd_event {
49 /* All events in process, rooted at kfd_process.events. */
50 struct hlist_node events;
51
52 u32 event_id; 54 u32 event_id;
53 55
54 bool signaled; 56 bool signaled;
@@ -56,11 +58,9 @@ struct kfd_event {
56 58
57 int type; 59 int type;
58 60
59 struct list_head waiters; /* List of kfd_event_waiter by waiters. */ 61 wait_queue_head_t wq; /* List of event waiters. */
60 62
61 /* Only for signal events. */ 63 /* Only for signal events. */
62 struct signal_page *signal_page;
63 unsigned int signal_slot_index;
64 uint64_t __user *user_signal_address; 64 uint64_t __user *user_signal_address;
65 65
66 /* type specific data */ 66 /* type specific data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 70b3a99cffc2..035c351f47c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -42,26 +42,26 @@
42 42
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/device.h> 44#include <linux/device.h>
45#include <linux/kfifo.h>
45#include "kfd_priv.h" 46#include "kfd_priv.h"
46 47
47#define KFD_INTERRUPT_RING_SIZE 1024 48#define KFD_IH_NUM_ENTRIES 8192
48 49
49static void interrupt_wq(struct work_struct *); 50static void interrupt_wq(struct work_struct *);
50 51
51int kfd_interrupt_init(struct kfd_dev *kfd) 52int kfd_interrupt_init(struct kfd_dev *kfd)
52{ 53{
53 void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, 54 int r;
54 kfd->device_info->ih_ring_entry_size, 55
55 GFP_KERNEL); 56 r = kfifo_alloc(&kfd->ih_fifo,
56 if (!interrupt_ring) 57 KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
57 return -ENOMEM; 58 GFP_KERNEL);
58 59 if (r) {
59 kfd->interrupt_ring = interrupt_ring; 60 dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
60 kfd->interrupt_ring_size = 61 return r;
61 KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; 62 }
62 atomic_set(&kfd->interrupt_ring_wptr, 0);
63 atomic_set(&kfd->interrupt_ring_rptr, 0);
64 63
64 kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
65 spin_lock_init(&kfd->interrupt_lock); 65 spin_lock_init(&kfd->interrupt_lock);
66 66
67 INIT_WORK(&kfd->interrupt_work, interrupt_wq); 67 INIT_WORK(&kfd->interrupt_work, interrupt_wq);
@@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
92 spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 92 spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
93 93
94 /* 94 /*
95 * Flush_scheduled_work ensures that there are no outstanding 95 * flush_work ensures that there are no outstanding
96 * work-queue items that will access interrupt_ring. New work items 96 * work-queue items that will access interrupt_ring. New work items
97 * can't be created because we stopped interrupt handling above. 97 * can't be created because we stopped interrupt handling above.
98 */ 98 */
99 flush_scheduled_work(); 99 flush_workqueue(kfd->ih_wq);
100 100
101 kfree(kfd->interrupt_ring); 101 kfifo_free(&kfd->ih_fifo);
102} 102}
103 103
104/* 104/*
105 * This assumes that it can't be called concurrently with itself 105 * Assumption: single reader/writer. This function is not re-entrant
106 * but only with dequeue_ih_ring_entry.
107 */ 106 */
108bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) 107bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
109{ 108{
110 unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); 109 int count;
111 unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
112 110
113 if ((rptr - wptr) % kfd->interrupt_ring_size == 111 count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
114 kfd->device_info->ih_ring_entry_size) { 112 kfd->device_info->ih_ring_entry_size);
115 /* This is very bad, the system is likely to hang. */ 113 if (count != kfd->device_info->ih_ring_entry_size) {
116 dev_err_ratelimited(kfd_chardev(), 114 dev_err_ratelimited(kfd_chardev(),
117 "Interrupt ring overflow, dropping interrupt.\n"); 115 "Interrupt ring overflow, dropping interrupt %d\n",
116 count);
118 return false; 117 return false;
119 } 118 }
120 119
121 memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
122 kfd->device_info->ih_ring_entry_size);
123
124 wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
125 kfd->interrupt_ring_size;
126 smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
127 atomic_set(&kfd->interrupt_ring_wptr, wptr);
128
129 return true; 120 return true;
130} 121}
131 122
132/* 123/*
133 * This assumes that it can't be called concurrently with itself 124 * Assumption: single reader/writer. This function is not re-entrant
134 * but only with enqueue_ih_ring_entry.
135 */ 125 */
136static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) 126static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
137{ 127{
138 /* 128 int count;
139 * Assume that wait queues have an implicit barrier, i.e. anything that
140 * happened in the ISR before it queued work is visible.
141 */
142
143 unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
144 unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
145 129
146 if (rptr == wptr) 130 count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
147 return false; 131 kfd->device_info->ih_ring_entry_size);
148
149 memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
150 kfd->device_info->ih_ring_entry_size);
151
152 rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
153 kfd->interrupt_ring_size;
154 132
155 /* 133 WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
156 * Ensure the rptr write update is not visible until
157 * memcpy has finished reading.
158 */
159 smp_mb();
160 atomic_set(&kfd->interrupt_ring_rptr, rptr);
161 134
162 return true; 135 return count == kfd->device_info->ih_ring_entry_size;
163} 136}
164 137
165static void interrupt_wq(struct work_struct *work) 138static void interrupt_wq(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 44ffd23348fc..4859d263fa2a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
189 if (q->format == KFD_QUEUE_FORMAT_AQL) 189 if (q->format == KFD_QUEUE_FORMAT_AQL)
190 m->cp_hqd_pq_control |= NO_UPDATE_RPTR; 190 m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
191 191
192 q->is_active = false; 192 q->is_active = (q->queue_size > 0 &&
193 if (q->queue_size > 0 &&
194 q->queue_address != 0 && 193 q->queue_address != 0 &&
195 q->queue_percent > 0) { 194 q->queue_percent > 0);
196 q->is_active = true;
197 }
198 195
199 return 0; 196 return 0;
200} 197}
@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
215 m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); 212 m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
216 m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 213 m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
217 m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 214 m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
218 m->sdma_rlc_doorbell = q->doorbell_off << 215 m->sdma_rlc_doorbell =
219 SDMA0_RLC0_DOORBELL__OFFSET__SHIFT | 216 q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
220 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
221 217
222 m->sdma_rlc_virtual_addr = q->sdma_vm_addr; 218 m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
223 219
224 m->sdma_engine_id = q->sdma_engine_id; 220 m->sdma_engine_id = q->sdma_engine_id;
225 m->sdma_queue_id = q->sdma_queue_id; 221 m->sdma_queue_id = q->sdma_queue_id;
226 222
227 q->is_active = false; 223 q->is_active = (q->queue_size > 0 &&
228 if (q->queue_size > 0 &&
229 q->queue_address != 0 && 224 q->queue_address != 0 &&
230 q->queue_percent > 0) { 225 q->queue_percent > 0);
231 m->sdma_rlc_rb_cntl |=
232 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
233
234 q->is_active = true;
235 }
236 226
237 return 0; 227 return 0;
238} 228}
@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
359 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); 349 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
360 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 350 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
361 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 351 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
362 m->cp_hqd_pq_doorbell_control = DOORBELL_EN | 352 m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
363 DOORBELL_OFFSET(q->doorbell_off);
364 353
365 m->cp_hqd_vmid = q->vmid; 354 m->cp_hqd_vmid = q->vmid;
366 355
367 m->cp_hqd_active = 0; 356 q->is_active = (q->queue_size > 0 &&
368 q->is_active = false;
369 if (q->queue_size > 0 &&
370 q->queue_address != 0 && 357 q->queue_address != 0 &&
371 q->queue_percent > 0) { 358 q->queue_percent > 0);
372 m->cp_hqd_active = 1;
373 q->is_active = true;
374 }
375 359
376 return 0; 360 return 0;
377} 361}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 73cbfe186dd2..4ea854f9007b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
163 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; 163 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
164 } 164 }
165 165
166 q->is_active = false; 166 q->is_active = (q->queue_size > 0 &&
167 if (q->queue_size > 0 &&
168 q->queue_address != 0 && 167 q->queue_address != 0 &&
169 q->queue_percent > 0) { 168 q->queue_percent > 0);
170 q->is_active = true;
171 }
172 169
173 return 0; 170 return 0;
174} 171}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7d86ec9790d3..9e4134c5b481 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -31,6 +31,8 @@
31#include <linux/workqueue.h> 31#include <linux/workqueue.h>
32#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <linux/kfd_ioctl.h> 33#include <linux/kfd_ioctl.h>
34#include <linux/idr.h>
35#include <linux/kfifo.h>
34#include <kgd_kfd_interface.h> 36#include <kgd_kfd_interface.h>
35 37
36#include "amd_shared.h" 38#include "amd_shared.h"
@@ -181,10 +183,8 @@ struct kfd_dev {
181 unsigned int gtt_sa_num_of_chunks; 183 unsigned int gtt_sa_num_of_chunks;
182 184
183 /* Interrupts */ 185 /* Interrupts */
184 void *interrupt_ring; 186 struct kfifo ih_fifo;
185 size_t interrupt_ring_size; 187 struct workqueue_struct *ih_wq;
186 atomic_t interrupt_ring_rptr;
187 atomic_t interrupt_ring_wptr;
188 struct work_struct interrupt_work; 188 struct work_struct interrupt_work;
189 spinlock_t interrupt_lock; 189 spinlock_t interrupt_lock;
190 190
@@ -494,7 +494,12 @@ struct kfd_process {
494 */ 494 */
495 struct hlist_node kfd_processes; 495 struct hlist_node kfd_processes;
496 496
497 struct mm_struct *mm; 497 /*
498 * Opaque pointer to mm_struct. We don't hold a reference to
499 * it so it should never be dereferenced from here. This is
500 * only used for looking up processes by their mm.
501 */
502 void *mm;
498 503
499 struct mutex mutex; 504 struct mutex mutex;
500 505
@@ -502,6 +507,8 @@ struct kfd_process {
502 * In any process, the thread that started main() is the lead 507 * In any process, the thread that started main() is the lead
503 * thread and outlives the rest. 508 * thread and outlives the rest.
504 * It is here because amd_iommu_bind_pasid wants a task_struct. 509 * It is here because amd_iommu_bind_pasid wants a task_struct.
510 * It can also be used for safely getting a reference to the
511 * mm_struct of the process.
505 */ 512 */
506 struct task_struct *lead_thread; 513 struct task_struct *lead_thread;
507 514
@@ -522,22 +529,16 @@ struct kfd_process {
522 529
523 struct process_queue_manager pqm; 530 struct process_queue_manager pqm;
524 531
525 /* The process's queues. */
526 size_t queue_array_size;
527
528 /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
529 struct kfd_queue **queues;
530
531 /*Is the user space process 32 bit?*/ 532 /*Is the user space process 32 bit?*/
532 bool is_32bit_user_mode; 533 bool is_32bit_user_mode;
533 534
534 /* Event-related data */ 535 /* Event-related data */
535 struct mutex event_mutex; 536 struct mutex event_mutex;
536 /* All events in process hashed by ID, linked on kfd_event.events. */ 537 /* Event ID allocator and lookup */
537 DECLARE_HASHTABLE(events, 4); 538 struct idr event_idr;
538 /* struct slot_page_header.event_pages */ 539 /* Event page */
539 struct list_head signal_event_pages; 540 struct kfd_signal_page *signal_page;
540 u32 next_nonsignal_event_id; 541 size_t signal_mapped_size;
541 size_t signal_event_count; 542 size_t signal_event_count;
542 bool signal_event_limit_reached; 543 bool signal_event_limit_reached;
543}; 544};
@@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
721extern const struct kfd_event_interrupt_class event_interrupt_class_cik; 722extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
722extern const struct kfd_device_global_init_class device_global_init_class_cik; 723extern const struct kfd_device_global_init_class device_global_init_class_cik;
723 724
724enum kfd_event_wait_result {
725 KFD_WAIT_COMPLETE,
726 KFD_WAIT_TIMEOUT,
727 KFD_WAIT_ERROR
728};
729
730void kfd_event_init_process(struct kfd_process *p); 725void kfd_event_init_process(struct kfd_process *p);
731void kfd_event_free_process(struct kfd_process *p); 726void kfd_event_free_process(struct kfd_process *p);
732int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); 727int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
733int kfd_wait_on_events(struct kfd_process *p, 728int kfd_wait_on_events(struct kfd_process *p,
734 uint32_t num_events, void __user *data, 729 uint32_t num_events, void __user *data,
735 bool all, uint32_t user_timeout_ms, 730 bool all, uint32_t user_timeout_ms,
736 enum kfd_event_wait_result *wait_result); 731 uint32_t *wait_result);
737void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, 732void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
738 uint32_t valid_id_bits); 733 uint32_t valid_id_bits);
739void kfd_signal_iommu_event(struct kfd_dev *dev, 734void kfd_signal_iommu_event(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3ccb3b53216e..1f5ccd28bd41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,13 +35,6 @@ struct mm_struct;
35#include "kfd_dbgmgr.h" 35#include "kfd_dbgmgr.h"
36 36
37/* 37/*
38 * Initial size for the array of queues.
39 * The allocated size is doubled each time
40 * it is exceeded up to MAX_PROCESS_QUEUES.
41 */
42#define INITIAL_QUEUE_ARRAY_SIZE 16
43
44/*
45 * List of struct kfd_process (field kfd_process). 38 * List of struct kfd_process (field kfd_process).
46 * Unique/indexed by mm_struct* 39 * Unique/indexed by mm_struct*
47 */ 40 */
@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work)
187 180
188 mutex_destroy(&p->mutex); 181 mutex_destroy(&p->mutex);
189 182
190 kfree(p->queues);
191
192 kfree(p); 183 kfree(p);
193 184
194 kfree(work); 185 kfree(work);
@@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
200 struct kfd_process *p; 191 struct kfd_process *p;
201 192
202 p = container_of(rcu, struct kfd_process, rcu); 193 p = container_of(rcu, struct kfd_process, rcu);
203 WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
204 194
205 mmdrop(p->mm); 195 mmdrop(p->mm);
206 196
@@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
234 224
235 mutex_lock(&p->mutex); 225 mutex_lock(&p->mutex);
236 226
227 /* Iterate over all process device data structures and if the
228 * pdd is in debug mode, we should first force unregistration,
229 * then we will be able to destroy the queues
230 */
231 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
232 struct kfd_dev *dev = pdd->dev;
233
234 mutex_lock(kfd_get_dbgmgr_mutex());
235 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
236 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
237 kfd_dbgmgr_destroy(dev->dbgmgr);
238 dev->dbgmgr = NULL;
239 }
240 }
241 mutex_unlock(kfd_get_dbgmgr_mutex());
242 }
243
237 kfd_process_dequeue_from_all_devices(p); 244 kfd_process_dequeue_from_all_devices(p);
238 pqm_uninit(&p->pqm); 245 pqm_uninit(&p->pqm);
239 246
240 /* Iterate over all process device data structure and check
241 * if we should delete debug managers
242 */
243 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
244 if ((pdd->dev->dbgmgr) &&
245 (pdd->dev->dbgmgr->pasid == p->pasid))
246 kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
247
248 mutex_unlock(&p->mutex); 247 mutex_unlock(&p->mutex);
249 248
250 /* 249 /*
@@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
271 if (!process) 270 if (!process)
272 goto err_alloc_process; 271 goto err_alloc_process;
273 272
274 process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
275 sizeof(process->queues[0]), GFP_KERNEL);
276 if (!process->queues)
277 goto err_alloc_queues;
278
279 process->pasid = kfd_pasid_alloc(); 273 process->pasid = kfd_pasid_alloc();
280 if (process->pasid == 0) 274 if (process->pasid == 0)
281 goto err_alloc_pasid; 275 goto err_alloc_pasid;
@@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
298 292
299 process->lead_thread = thread->group_leader; 293 process->lead_thread = thread->group_leader;
300 294
301 process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
302
303 INIT_LIST_HEAD(&process->per_device_data); 295 INIT_LIST_HEAD(&process->per_device_data);
304 296
305 kfd_event_init_process(process); 297 kfd_event_init_process(process);
@@ -328,8 +320,6 @@ err_mmu_notifier:
328err_alloc_doorbells: 320err_alloc_doorbells:
329 kfd_pasid_free(process->pasid); 321 kfd_pasid_free(process->pasid);
330err_alloc_pasid: 322err_alloc_pasid:
331 kfree(process->queues);
332err_alloc_queues:
333 kfree(process); 323 kfree(process);
334err_alloc_process: 324err_alloc_process:
335 return ERR_PTR(err); 325 return ERR_PTR(err);
@@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
426 err = amd_iommu_bind_pasid(dev->pdev, p->pasid, 416 err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
427 p->lead_thread); 417 p->lead_thread);
428 if (err < 0) { 418 if (err < 0) {
429 pr_err("unexpected pasid %d binding failure\n", 419 pr_err("Unexpected pasid %d binding failure\n",
430 p->pasid); 420 p->pasid);
431 mutex_unlock(&p->mutex); 421 mutex_unlock(&p->mutex);
432 break; 422 break;
@@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
442} 432}
443 433
444/* 434/*
445 * Temporarily unbind currently bound processes from the device and 435 * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
446 * mark them as PDD_BOUND_SUSPENDED. These processes will be restored 436 * processes will be restored to PDD_BOUND state in
447 * to PDD_BOUND state in kfd_bind_processes_to_device. 437 * kfd_bind_processes_to_device.
448 */ 438 */
449void kfd_unbind_processes_from_device(struct kfd_dev *dev) 439void kfd_unbind_processes_from_device(struct kfd_dev *dev)
450{ 440{
451 struct kfd_process_device *pdd; 441 struct kfd_process_device *pdd;
452 struct kfd_process *p; 442 struct kfd_process *p;
453 unsigned int temp, temp_bound, temp_pasid; 443 unsigned int temp;
454 444
455 int idx = srcu_read_lock(&kfd_processes_srcu); 445 int idx = srcu_read_lock(&kfd_processes_srcu);
456 446
457 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 447 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
458 mutex_lock(&p->mutex); 448 mutex_lock(&p->mutex);
459 pdd = kfd_get_process_device_data(dev, p); 449 pdd = kfd_get_process_device_data(dev, p);
460 temp_bound = pdd->bound; 450
461 temp_pasid = p->pasid;
462 if (pdd->bound == PDD_BOUND) 451 if (pdd->bound == PDD_BOUND)
463 pdd->bound = PDD_BOUND_SUSPENDED; 452 pdd->bound = PDD_BOUND_SUSPENDED;
464 mutex_unlock(&p->mutex); 453 mutex_unlock(&p->mutex);
465
466 if (temp_bound == PDD_BOUND)
467 amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
468 } 454 }
469 455
470 srcu_read_unlock(&kfd_processes_srcu, idx); 456 srcu_read_unlock(&kfd_processes_srcu, idx);
@@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
486 472
487 pr_debug("Unbinding process %d from IOMMU\n", pasid); 473 pr_debug("Unbinding process %d from IOMMU\n", pasid);
488 474
489 if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) 475 mutex_lock(kfd_get_dbgmgr_mutex());
490 kfd_dbgmgr_destroy(dev->dbgmgr); 476
477 if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
478 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
479 kfd_dbgmgr_destroy(dev->dbgmgr);
480 dev->dbgmgr = NULL;
481 }
482 }
483
484 mutex_unlock(kfd_get_dbgmgr_mutex());
491 485
492 pdd = kfd_get_process_device_data(dev, p); 486 pdd = kfd_get_process_device_data(dev, p);
493 if (pdd) 487 if (pdd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5129dc139219..2bec902fc939 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
177 if (retval != 0) 177 if (retval != 0)
178 return retval; 178 return retval;
179 179
180 if (list_empty(&pqm->queues)) { 180 if (list_empty(&pdd->qpd.queues_list) &&
181 list_empty(&pdd->qpd.priv_queue_list)) {
181 pdd->qpd.pqm = pqm; 182 pdd->qpd.pqm = pqm;
182 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); 183 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
183 } 184 }
@@ -248,7 +249,8 @@ err_create_queue:
248err_allocate_pqn: 249err_allocate_pqn:
249 /* check if queues list is empty unregister process from device */ 250 /* check if queues list is empty unregister process from device */
250 clear_bit(*qid, pqm->queue_slot_bitmap); 251 clear_bit(*qid, pqm->queue_slot_bitmap);
251 if (list_empty(&pqm->queues)) 252 if (list_empty(&pdd->qpd.queues_list) &&
253 list_empty(&pdd->qpd.priv_queue_list))
252 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); 254 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
253 return retval; 255 return retval;
254} 256}
@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
302 kfree(pqn); 304 kfree(pqn);
303 clear_bit(qid, pqm->queue_slot_bitmap); 305 clear_bit(qid, pqm->queue_slot_bitmap);
304 306
305 if (list_empty(&pqm->queues)) 307 if (list_empty(&pdd->qpd.queues_list) &&
308 list_empty(&pdd->qpd.priv_queue_list))
306 dqm->ops.unregister_process(dqm, &pdd->qpd); 309 dqm->ops.unregister_process(dqm, &pdd->qpd);
307 310
308 return retval; 311 return retval;
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index be16c6390216..cf3e5985e3e7 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -102,8 +102,7 @@ radeon-y += \
102radeon-y += \ 102radeon-y += \
103 radeon_vce.o \ 103 radeon_vce.o \
104 vce_v1_0.o \ 104 vce_v1_0.o \
105 vce_v2_0.o \ 105 vce_v2_0.o
106 radeon_kfd.o
107 106
108radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o 107radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
109radeon-$(CONFIG_ACPI) += radeon_acpi.o 108radeon-$(CONFIG_ACPI) += radeon_acpi.o
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 3cb6c55b268d..898f9a078830 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -33,7 +33,6 @@
33#include "cik_blit_shaders.h" 33#include "cik_blit_shaders.h"
34#include "radeon_ucode.h" 34#include "radeon_ucode.h"
35#include "clearstate_ci.h" 35#include "clearstate_ci.h"
36#include "radeon_kfd.h"
37 36
38#define SH_MEM_CONFIG_GFX_DEFAULT \ 37#define SH_MEM_CONFIG_GFX_DEFAULT \
39 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) 38 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
@@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev)
5684 /* 5683 /*
5685 * number of VMs 5684 * number of VMs
5686 * VMID 0 is reserved for System 5685 * VMID 0 is reserved for System
5687 * radeon graphics/compute will use VMIDs 1-7 5686 * radeon graphics/compute will use VMIDs 1-15
5688 * amdkfd will use VMIDs 8-15
5689 */ 5687 */
5690 rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; 5688 rdev->vm_manager.nvm = 16;
5691 /* base offset of vram pages */ 5689 /* base offset of vram pages */
5692 if (rdev->flags & RADEON_IS_IGP) { 5690 if (rdev->flags & RADEON_IS_IGP) {
5693 u64 tmp = RREG32(MC_VM_FB_OFFSET); 5691 u64 tmp = RREG32(MC_VM_FB_OFFSET);
@@ -7589,9 +7587,6 @@ restart_ih:
7589 /* wptr/rptr are in bytes! */ 7587 /* wptr/rptr are in bytes! */
7590 ring_index = rptr / 4; 7588 ring_index = rptr / 4;
7591 7589
7592 radeon_kfd_interrupt(rdev,
7593 (const void *) &rdev->ih.ring[ring_index]);
7594
7595 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 7590 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7596 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 7591 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7597 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 7592 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
@@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev)
8486 if (r) 8481 if (r)
8487 return r; 8482 return r;
8488 8483
8489 r = radeon_kfd_resume(rdev);
8490 if (r)
8491 return r;
8492
8493 return 0; 8484 return 0;
8494} 8485}
8495 8486
@@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev)
8538 */ 8529 */
8539int cik_suspend(struct radeon_device *rdev) 8530int cik_suspend(struct radeon_device *rdev)
8540{ 8531{
8541 radeon_kfd_suspend(rdev);
8542 radeon_pm_suspend(rdev); 8532 radeon_pm_suspend(rdev);
8543 radeon_audio_fini(rdev); 8533 radeon_audio_fini(rdev);
8544 radeon_vm_manager_fini(rdev); 8534 radeon_vm_manager_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index e21015475ed5..cda16fcd43bb 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -30,8 +30,6 @@
30#define CIK_RB_BITMAP_WIDTH_PER_SH 2 30#define CIK_RB_BITMAP_WIDTH_PER_SH 2
31#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 31#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4
32 32
33#define RADEON_NUM_OF_VMIDS 8
34
35/* DIDT IND registers */ 33/* DIDT IND registers */
36#define DIDT_SQ_CTRL0 0x0 34#define DIDT_SQ_CTRL0 0x0
37# define DIDT_CTRL_EN (1 << 0) 35# define DIDT_CTRL_EN (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8cbaeec090c9..a8e546569858 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2456,9 +2456,6 @@ struct radeon_device {
2456 u64 vram_pin_size; 2456 u64 vram_pin_size;
2457 u64 gart_pin_size; 2457 u64 gart_pin_size;
2458 2458
2459 /* amdkfd interface */
2460 struct kfd_dev *kfd;
2461
2462 struct mutex mn_lock; 2459 struct mutex mn_lock;
2463 DECLARE_HASHTABLE(mn_hash, 7); 2460 DECLARE_HASHTABLE(mn_hash, 7);
2464}; 2461};
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index f4becad0a78c..31dd04f6baa1 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -43,7 +43,6 @@
43#include <drm/drm_fb_helper.h> 43#include <drm/drm_fb_helper.h>
44 44
45#include <drm/drm_crtc_helper.h> 45#include <drm/drm_crtc_helper.h>
46#include "radeon_kfd.h"
47 46
48/* 47/*
49 * KMS wrapper. 48 * KMS wrapper.
@@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev,
338{ 337{
339 int ret; 338 int ret;
340 339
341 /*
342 * Initialize amdkfd before starting radeon. If it was not loaded yet,
343 * defer radeon probing
344 */
345 ret = radeon_kfd_init();
346 if (ret == -EPROBE_DEFER)
347 return ret;
348
349 if (vga_switcheroo_client_probe_defer(pdev)) 340 if (vga_switcheroo_client_probe_defer(pdev))
350 return -EPROBE_DEFER; 341 return -EPROBE_DEFER;
351 342
@@ -645,7 +636,6 @@ static int __init radeon_init(void)
645 636
646static void __exit radeon_exit(void) 637static void __exit radeon_exit(void)
647{ 638{
648 radeon_kfd_fini();
649 pci_unregister_driver(pdriver); 639 pci_unregister_driver(pdriver);
650 radeon_unregister_atpx_handler(); 640 radeon_unregister_atpx_handler();
651} 641}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
deleted file mode 100644
index 385b4d76956d..000000000000
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ /dev/null
@@ -1,901 +0,0 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/module.h>
24#include <linux/fdtable.h>
25#include <linux/uaccess.h>
26#include <drm/drmP.h>
27#include "radeon.h"
28#include "cikd.h"
29#include "cik_reg.h"
30#include "radeon_kfd.h"
31#include "radeon_ucode.h"
32#include <linux/firmware.h>
33#include "cik_structs.h"
34
35#define CIK_PIPE_PER_MEC (4)
36
37static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
38 TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
39 TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
40 TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
41 TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
42};
43
44struct kgd_mem {
45 struct radeon_bo *bo;
46 uint64_t gpu_addr;
47 void *cpu_ptr;
48};
49
50
51static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
52 void **mem_obj, uint64_t *gpu_addr,
53 void **cpu_ptr);
54
55static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
56
57static uint64_t get_vmem_size(struct kgd_dev *kgd);
58static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
59
60static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
61
62static int alloc_pasid(unsigned int bits);
63static void free_pasid(unsigned int pasid);
64
65static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
66
67/*
68 * Register access functions
69 */
70
71static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
72 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
73 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
74
75static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
76 unsigned int vmid);
77
78static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
79 uint32_t hpd_size, uint64_t hpd_gpu_addr);
80static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
81static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
82 uint32_t queue_id, uint32_t __user *wptr,
83 uint32_t wptr_shift, uint32_t wptr_mask,
84 struct mm_struct *mm);
85static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
86static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
87 uint32_t pipe_id, uint32_t queue_id);
88
89static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
90 unsigned int timeout, uint32_t pipe_id,
91 uint32_t queue_id);
92static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
93static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
94 unsigned int timeout);
95static int kgd_address_watch_disable(struct kgd_dev *kgd);
96static int kgd_address_watch_execute(struct kgd_dev *kgd,
97 unsigned int watch_point_id,
98 uint32_t cntl_val,
99 uint32_t addr_hi,
100 uint32_t addr_lo);
101static int kgd_wave_control_execute(struct kgd_dev *kgd,
102 uint32_t gfx_index_val,
103 uint32_t sq_cmd);
104static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
105 unsigned int watch_point_id,
106 unsigned int reg_offset);
107
108static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
109static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
110 uint8_t vmid);
111static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
112
113static const struct kfd2kgd_calls kfd2kgd = {
114 .init_gtt_mem_allocation = alloc_gtt_mem,
115 .free_gtt_mem = free_gtt_mem,
116 .get_vmem_size = get_vmem_size,
117 .get_gpu_clock_counter = get_gpu_clock_counter,
118 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
119 .alloc_pasid = alloc_pasid,
120 .free_pasid = free_pasid,
121 .program_sh_mem_settings = kgd_program_sh_mem_settings,
122 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
123 .init_pipeline = kgd_init_pipeline,
124 .init_interrupts = kgd_init_interrupts,
125 .hqd_load = kgd_hqd_load,
126 .hqd_sdma_load = kgd_hqd_sdma_load,
127 .hqd_is_occupied = kgd_hqd_is_occupied,
128 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
129 .hqd_destroy = kgd_hqd_destroy,
130 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
131 .address_watch_disable = kgd_address_watch_disable,
132 .address_watch_execute = kgd_address_watch_execute,
133 .wave_control_execute = kgd_wave_control_execute,
134 .address_watch_get_offset = kgd_address_watch_get_offset,
135 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
136 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
137 .write_vmid_invalidate_request = write_vmid_invalidate_request,
138 .get_fw_version = get_fw_version
139};
140
141static const struct kgd2kfd_calls *kgd2kfd;
142
143int radeon_kfd_init(void)
144{
145 int ret;
146
147#if defined(CONFIG_HSA_AMD_MODULE)
148 int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
149
150 kgd2kfd_init_p = symbol_request(kgd2kfd_init);
151
152 if (kgd2kfd_init_p == NULL)
153 return -ENOENT;
154
155 ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
156 if (ret) {
157 symbol_put(kgd2kfd_init);
158 kgd2kfd = NULL;
159 }
160
161#elif defined(CONFIG_HSA_AMD)
162 ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
163 if (ret)
164 kgd2kfd = NULL;
165
166#else
167 ret = -ENOENT;
168#endif
169
170 return ret;
171}
172
173void radeon_kfd_fini(void)
174{
175 if (kgd2kfd) {
176 kgd2kfd->exit();
177 symbol_put(kgd2kfd_init);
178 }
179}
180
181void radeon_kfd_device_probe(struct radeon_device *rdev)
182{
183 if (kgd2kfd)
184 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
185 rdev->pdev, &kfd2kgd);
186}
187
188void radeon_kfd_device_init(struct radeon_device *rdev)
189{
190 int i, queue, pipe, mec;
191
192 if (rdev->kfd) {
193 struct kgd2kfd_shared_resources gpu_resources = {
194 .compute_vmid_bitmap = 0xFF00,
195 .num_pipe_per_mec = 4,
196 .num_queue_per_pipe = 8
197 };
198
199 bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
200
201 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
202 queue = i % gpu_resources.num_queue_per_pipe;
203 pipe = (i / gpu_resources.num_queue_per_pipe)
204 % gpu_resources.num_pipe_per_mec;
205 mec = (i / gpu_resources.num_queue_per_pipe)
206 / gpu_resources.num_pipe_per_mec;
207
208 if (mec == 0 && pipe > 0)
209 set_bit(i, gpu_resources.queue_bitmap);
210 }
211
212 radeon_doorbell_get_kfd_info(rdev,
213 &gpu_resources.doorbell_physical_address,
214 &gpu_resources.doorbell_aperture_size,
215 &gpu_resources.doorbell_start_offset);
216
217 kgd2kfd->device_init(rdev->kfd, &gpu_resources);
218 }
219}
220
221void radeon_kfd_device_fini(struct radeon_device *rdev)
222{
223 if (rdev->kfd) {
224 kgd2kfd->device_exit(rdev->kfd);
225 rdev->kfd = NULL;
226 }
227}
228
229void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
230{
231 if (rdev->kfd)
232 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
233}
234
235void radeon_kfd_suspend(struct radeon_device *rdev)
236{
237 if (rdev->kfd)
238 kgd2kfd->suspend(rdev->kfd);
239}
240
241int radeon_kfd_resume(struct radeon_device *rdev)
242{
243 int r = 0;
244
245 if (rdev->kfd)
246 r = kgd2kfd->resume(rdev->kfd);
247
248 return r;
249}
250
251static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
252 void **mem_obj, uint64_t *gpu_addr,
253 void **cpu_ptr)
254{
255 struct radeon_device *rdev = (struct radeon_device *)kgd;
256 struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
257 int r;
258
259 BUG_ON(kgd == NULL);
260 BUG_ON(gpu_addr == NULL);
261 BUG_ON(cpu_ptr == NULL);
262
263 *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
264 if ((*mem) == NULL)
265 return -ENOMEM;
266
267 r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
268 RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
269 if (r) {
270 dev_err(rdev->dev,
271 "failed to allocate BO for amdkfd (%d)\n", r);
272 return r;
273 }
274
275 /* map the buffer */
276 r = radeon_bo_reserve((*mem)->bo, true);
277 if (r) {
278 dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
279 goto allocate_mem_reserve_bo_failed;
280 }
281
282 r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
283 &(*mem)->gpu_addr);
284 if (r) {
285 dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
286 goto allocate_mem_pin_bo_failed;
287 }
288 *gpu_addr = (*mem)->gpu_addr;
289
290 r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
291 if (r) {
292 dev_err(rdev->dev,
293 "(%d) failed to map bo to kernel for amdkfd\n", r);
294 goto allocate_mem_kmap_bo_failed;
295 }
296 *cpu_ptr = (*mem)->cpu_ptr;
297
298 radeon_bo_unreserve((*mem)->bo);
299
300 return 0;
301
302allocate_mem_kmap_bo_failed:
303 radeon_bo_unpin((*mem)->bo);
304allocate_mem_pin_bo_failed:
305 radeon_bo_unreserve((*mem)->bo);
306allocate_mem_reserve_bo_failed:
307 radeon_bo_unref(&(*mem)->bo);
308
309 return r;
310}
311
312static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
313{
314 struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
315
316 BUG_ON(mem == NULL);
317
318 radeon_bo_reserve(mem->bo, true);
319 radeon_bo_kunmap(mem->bo);
320 radeon_bo_unpin(mem->bo);
321 radeon_bo_unreserve(mem->bo);
322 radeon_bo_unref(&(mem->bo));
323 kfree(mem);
324}
325
326static uint64_t get_vmem_size(struct kgd_dev *kgd)
327{
328 struct radeon_device *rdev = (struct radeon_device *)kgd;
329
330 BUG_ON(kgd == NULL);
331
332 return rdev->mc.real_vram_size;
333}
334
335static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
336{
337 struct radeon_device *rdev = (struct radeon_device *)kgd;
338
339 return rdev->asic->get_gpu_clock_counter(rdev);
340}
341
342static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
343{
344 struct radeon_device *rdev = (struct radeon_device *)kgd;
345
346 /* The sclk is in quantas of 10kHz */
347 return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
348}
349
350/*
351 * PASID manager
352 */
353static DEFINE_IDA(pasid_ida);
354
355static int alloc_pasid(unsigned int bits)
356{
357 int pasid = -EINVAL;
358
359 for (bits = min(bits, 31U); bits > 0; bits--) {
360 pasid = ida_simple_get(&pasid_ida,
361 1U << (bits - 1), 1U << bits,
362 GFP_KERNEL);
363 if (pasid != -ENOSPC)
364 break;
365 }
366
367 return pasid;
368}
369
370static void free_pasid(unsigned int pasid)
371{
372 ida_simple_remove(&pasid_ida, pasid);
373}
374
375static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
376{
377 return (struct radeon_device *)kgd;
378}
379
380static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
381{
382 struct radeon_device *rdev = get_radeon_device(kgd);
383
384 writel(value, (void __iomem *)(rdev->rmmio + offset));
385}
386
387static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
388{
389 struct radeon_device *rdev = get_radeon_device(kgd);
390
391 return readl((void __iomem *)(rdev->rmmio + offset));
392}
393
394static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
395 uint32_t queue, uint32_t vmid)
396{
397 struct radeon_device *rdev = get_radeon_device(kgd);
398 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
399
400 mutex_lock(&rdev->srbm_mutex);
401 write_register(kgd, SRBM_GFX_CNTL, value);
402}
403
404static void unlock_srbm(struct kgd_dev *kgd)
405{
406 struct radeon_device *rdev = get_radeon_device(kgd);
407
408 write_register(kgd, SRBM_GFX_CNTL, 0);
409 mutex_unlock(&rdev->srbm_mutex);
410}
411
412static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
413 uint32_t queue_id)
414{
415 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
416 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
417
418 lock_srbm(kgd, mec, pipe, queue_id, 0);
419}
420
421static void release_queue(struct kgd_dev *kgd)
422{
423 unlock_srbm(kgd);
424}
425
426static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
427 uint32_t sh_mem_config,
428 uint32_t sh_mem_ape1_base,
429 uint32_t sh_mem_ape1_limit,
430 uint32_t sh_mem_bases)
431{
432 lock_srbm(kgd, 0, 0, 0, vmid);
433
434 write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
435 write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
436 write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
437 write_register(kgd, SH_MEM_BASES, sh_mem_bases);
438
439 unlock_srbm(kgd);
440}
441
442static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
443 unsigned int vmid)
444{
445 /*
446 * We have to assume that there is no outstanding mapping.
447 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
448 * because a mapping is in progress or because a mapping finished and
449 * the SW cleared it.
450 * So the protocol is to always wait & clear.
451 */
452 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
453 ATC_VMID_PASID_MAPPING_VALID_MASK;
454
455 write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
456 pasid_mapping);
457
458 while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
459 (1U << vmid)))
460 cpu_relax();
461 write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
462
463 /* Mapping vmid to pasid also for IH block */
464 write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
465 pasid_mapping);
466
467 return 0;
468}
469
470static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
471 uint32_t hpd_size, uint64_t hpd_gpu_addr)
472{
473 /* nothing to do here */
474 return 0;
475}
476
477static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
478{
479 uint32_t mec;
480 uint32_t pipe;
481
482 mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
483 pipe = (pipe_id % CIK_PIPE_PER_MEC);
484
485 lock_srbm(kgd, mec, pipe, 0, 0);
486
487 write_register(kgd, CPC_INT_CNTL,
488 TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
489
490 unlock_srbm(kgd);
491
492 return 0;
493}
494
495static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
496{
497 uint32_t retval;
498
499 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
500 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
501
502 pr_debug("kfd: sdma base address: 0x%x\n", retval);
503
504 return retval;
505}
506
507static inline struct cik_mqd *get_mqd(void *mqd)
508{
509 return (struct cik_mqd *)mqd;
510}
511
512static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
513{
514 return (struct cik_sdma_rlc_registers *)mqd;
515}
516
517static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
518 uint32_t queue_id, uint32_t __user *wptr,
519 uint32_t wptr_shift, uint32_t wptr_mask,
520 struct mm_struct *mm)
521{
522 uint32_t wptr_shadow, is_wptr_shadow_valid;
523 struct cik_mqd *m;
524
525 m = get_mqd(mqd);
526
527 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
528
529 acquire_queue(kgd, pipe_id, queue_id);
530 write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
531 write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
532 write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
533
534 write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
535 write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
536 write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
537
538 write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
539 write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
540 write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
541
542 write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
543
544 write_register(kgd, CP_HQD_PERSISTENT_STATE,
545 m->cp_hqd_persistent_state);
546 write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
547 write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
548
549 write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
550 m->cp_hqd_atomic0_preop_lo);
551
552 write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
553 m->cp_hqd_atomic0_preop_hi);
554
555 write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
556 m->cp_hqd_atomic1_preop_lo);
557
558 write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
559 m->cp_hqd_atomic1_preop_hi);
560
561 write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
562 m->cp_hqd_pq_rptr_report_addr_lo);
563
564 write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
565 m->cp_hqd_pq_rptr_report_addr_hi);
566
567 write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
568
569 write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
570 m->cp_hqd_pq_wptr_poll_addr_lo);
571
572 write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
573 m->cp_hqd_pq_wptr_poll_addr_hi);
574
575 write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
576 m->cp_hqd_pq_doorbell_control);
577
578 write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
579
580 write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
581
582 write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
583 write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
584
585 write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
586
587 if (is_wptr_shadow_valid)
588 write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
589
590 write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
591 release_queue(kgd);
592
593 return 0;
594}
595
596static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
597{
598 struct cik_sdma_rlc_registers *m;
599 uint32_t sdma_base_addr;
600
601 m = get_sdma_mqd(mqd);
602 sdma_base_addr = get_sdma_base_addr(m);
603
604 write_register(kgd,
605 sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
606 m->sdma_rlc_virtual_addr);
607
608 write_register(kgd,
609 sdma_base_addr + SDMA0_RLC0_RB_BASE,
610 m->sdma_rlc_rb_base);
611
612 write_register(kgd,
613 sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
614 m->sdma_rlc_rb_base_hi);
615
616 write_register(kgd,
617 sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
618 m->sdma_rlc_rb_rptr_addr_lo);
619
620 write_register(kgd,
621 sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
622 m->sdma_rlc_rb_rptr_addr_hi);
623
624 write_register(kgd,
625 sdma_base_addr + SDMA0_RLC0_DOORBELL,
626 m->sdma_rlc_doorbell);
627
628 write_register(kgd,
629 sdma_base_addr + SDMA0_RLC0_RB_CNTL,
630 m->sdma_rlc_rb_cntl);
631
632 return 0;
633}
634
635static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
636 uint32_t pipe_id, uint32_t queue_id)
637{
638 uint32_t act;
639 bool retval = false;
640 uint32_t low, high;
641
642 acquire_queue(kgd, pipe_id, queue_id);
643 act = read_register(kgd, CP_HQD_ACTIVE);
644 if (act) {
645 low = lower_32_bits(queue_address >> 8);
646 high = upper_32_bits(queue_address >> 8);
647
648 if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
649 high == read_register(kgd, CP_HQD_PQ_BASE_HI))
650 retval = true;
651 }
652 release_queue(kgd);
653 return retval;
654}
655
656static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
657{
658 struct cik_sdma_rlc_registers *m;
659 uint32_t sdma_base_addr;
660 uint32_t sdma_rlc_rb_cntl;
661
662 m = get_sdma_mqd(mqd);
663 sdma_base_addr = get_sdma_base_addr(m);
664
665 sdma_rlc_rb_cntl = read_register(kgd,
666 sdma_base_addr + SDMA0_RLC0_RB_CNTL);
667
668 if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
669 return true;
670
671 return false;
672}
673
674static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
675 unsigned int timeout, uint32_t pipe_id,
676 uint32_t queue_id)
677{
678 uint32_t temp;
679
680 acquire_queue(kgd, pipe_id, queue_id);
681 write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
682
683 write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
684
685 while (true) {
686 temp = read_register(kgd, CP_HQD_ACTIVE);
687 if (temp & 0x1)
688 break;
689 if (timeout == 0) {
690 pr_err("kfd: cp queue preemption time out (%dms)\n",
691 temp);
692 release_queue(kgd);
693 return -ETIME;
694 }
695 msleep(20);
696 timeout -= 20;
697 }
698
699 release_queue(kgd);
700 return 0;
701}
702
703static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
704 unsigned int timeout)
705{
706 struct cik_sdma_rlc_registers *m;
707 uint32_t sdma_base_addr;
708 uint32_t temp;
709
710 m = get_sdma_mqd(mqd);
711 sdma_base_addr = get_sdma_base_addr(m);
712
713 temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
714 temp = temp & ~SDMA_RB_ENABLE;
715 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
716
717 while (true) {
718 temp = read_register(kgd, sdma_base_addr +
719 SDMA0_RLC0_CONTEXT_STATUS);
720 if (temp & SDMA_RLC_IDLE)
721 break;
722 if (timeout == 0)
723 return -ETIME;
724 msleep(20);
725 timeout -= 20;
726 }
727
728 write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
729 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
730 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
731 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
732
733 return 0;
734}
735
736static int kgd_address_watch_disable(struct kgd_dev *kgd)
737{
738 union TCP_WATCH_CNTL_BITS cntl;
739 unsigned int i;
740
741 cntl.u32All = 0;
742
743 cntl.bitfields.valid = 0;
744 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
745 cntl.bitfields.atc = 1;
746
747 /* Turning off this address until we set all the registers */
748 for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
749 write_register(kgd,
750 watchRegs[i * ADDRESS_WATCH_REG_MAX +
751 ADDRESS_WATCH_REG_CNTL],
752 cntl.u32All);
753
754 return 0;
755}
756
757static int kgd_address_watch_execute(struct kgd_dev *kgd,
758 unsigned int watch_point_id,
759 uint32_t cntl_val,
760 uint32_t addr_hi,
761 uint32_t addr_lo)
762{
763 union TCP_WATCH_CNTL_BITS cntl;
764
765 cntl.u32All = cntl_val;
766
767 /* Turning off this watch point until we set all the registers */
768 cntl.bitfields.valid = 0;
769 write_register(kgd,
770 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
771 ADDRESS_WATCH_REG_CNTL],
772 cntl.u32All);
773
774 write_register(kgd,
775 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
776 ADDRESS_WATCH_REG_ADDR_HI],
777 addr_hi);
778
779 write_register(kgd,
780 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
781 ADDRESS_WATCH_REG_ADDR_LO],
782 addr_lo);
783
784 /* Enable the watch point */
785 cntl.bitfields.valid = 1;
786
787 write_register(kgd,
788 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
789 ADDRESS_WATCH_REG_CNTL],
790 cntl.u32All);
791
792 return 0;
793}
794
795static int kgd_wave_control_execute(struct kgd_dev *kgd,
796 uint32_t gfx_index_val,
797 uint32_t sq_cmd)
798{
799 struct radeon_device *rdev = get_radeon_device(kgd);
800 uint32_t data;
801
802 mutex_lock(&rdev->grbm_idx_mutex);
803
804 write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
805 write_register(kgd, SQ_CMD, sq_cmd);
806
807 /* Restore the GRBM_GFX_INDEX register */
808
809 data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
810 SE_BROADCAST_WRITES;
811
812 write_register(kgd, GRBM_GFX_INDEX, data);
813
814 mutex_unlock(&rdev->grbm_idx_mutex);
815
816 return 0;
817}
818
819static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
820 unsigned int watch_point_id,
821 unsigned int reg_offset)
822{
823 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]
824 / 4;
825}
826
827static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
828{
829 uint32_t reg;
830 struct radeon_device *rdev = (struct radeon_device *) kgd;
831
832 reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
833 return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
834}
835
836static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
837 uint8_t vmid)
838{
839 uint32_t reg;
840 struct radeon_device *rdev = (struct radeon_device *) kgd;
841
842 reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
843 return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
844}
845
846static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
847{
848 struct radeon_device *rdev = (struct radeon_device *) kgd;
849
850 return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
851}
852
853static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
854{
855 struct radeon_device *rdev = (struct radeon_device *) kgd;
856 const union radeon_firmware_header *hdr;
857
858 BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
859
860 switch (type) {
861 case KGD_ENGINE_PFP:
862 hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
863 break;
864
865 case KGD_ENGINE_ME:
866 hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
867 break;
868
869 case KGD_ENGINE_CE:
870 hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
871 break;
872
873 case KGD_ENGINE_MEC1:
874 hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
875 break;
876
877 case KGD_ENGINE_MEC2:
878 hdr = (const union radeon_firmware_header *)
879 rdev->mec2_fw->data;
880 break;
881
882 case KGD_ENGINE_RLC:
883 hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
884 break;
885
886 case KGD_ENGINE_SDMA1:
887 case KGD_ENGINE_SDMA2:
888 hdr = (const union radeon_firmware_header *)
889 rdev->sdma_fw->data;
890 break;
891
892 default:
893 return 0;
894 }
895
896 if (hdr == NULL)
897 return 0;
898
899 /* Only 12 bit in use*/
900 return hdr->common.ucode_version;
901}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h
deleted file mode 100644
index 9df1fea8e971..000000000000
--- a/drivers/gpu/drm/radeon/radeon_kfd.h
+++ /dev/null
@@ -1,47 +0,0 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23/*
24 * radeon_kfd.h defines the private interface between the
25 * AMD kernel graphics drivers and the AMD KFD.
26 */
27
28#ifndef RADEON_KFD_H_INCLUDED
29#define RADEON_KFD_H_INCLUDED
30
31#include <linux/types.h>
32#include "kgd_kfd_interface.h"
33
34struct radeon_device;
35
36int radeon_kfd_init(void);
37void radeon_kfd_fini(void);
38
39void radeon_kfd_suspend(struct radeon_device *rdev);
40int radeon_kfd_resume(struct radeon_device *rdev);
41void radeon_kfd_interrupt(struct radeon_device *rdev,
42 const void *ih_ring_entry);
43void radeon_kfd_device_probe(struct radeon_device *rdev);
44void radeon_kfd_device_init(struct radeon_device *rdev);
45void radeon_kfd_device_fini(struct radeon_device *rdev);
46
47#endif /* RADEON_KFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dfee8f7d94ae..cde037f213d7 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -34,8 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/pm_runtime.h> 35#include <linux/pm_runtime.h>
36 36
37#include "radeon_kfd.h"
38
39#if defined(CONFIG_VGA_SWITCHEROO) 37#if defined(CONFIG_VGA_SWITCHEROO)
40bool radeon_has_atpx(void); 38bool radeon_has_atpx(void);
41#else 39#else
@@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev)
68 pm_runtime_forbid(dev->dev); 66 pm_runtime_forbid(dev->dev);
69 } 67 }
70 68
71 radeon_kfd_device_fini(rdev);
72
73 radeon_acpi_fini(rdev); 69 radeon_acpi_fini(rdev);
74 70
75 radeon_modeset_fini(rdev); 71 radeon_modeset_fini(rdev);
@@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
174 "Error during ACPI methods call\n"); 170 "Error during ACPI methods call\n");
175 } 171 }
176 172
177 radeon_kfd_device_probe(rdev);
178 radeon_kfd_device_init(rdev);
179
180 if (radeon_is_px(dev)) { 173 if (radeon_is_px(dev)) {
181 pm_runtime_use_autosuspend(dev->dev); 174 pm_runtime_use_autosuspend(dev->dev);
182 pm_runtime_set_autosuspend_delay(dev->dev, 5000); 175 pm_runtime_set_autosuspend_delay(dev->dev, 5000);