diff options
Diffstat (limited to 'drivers')
25 files changed, 449 insertions, 1541 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index e13c67c8d2c0..bc5a2945bd2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig | |||
@@ -4,6 +4,6 @@ | |||
4 | 4 | ||
5 | config HSA_AMD | 5 | config HSA_AMD |
6 | tristate "HSA kernel driver for AMD GPU devices" | 6 | tristate "HSA kernel driver for AMD GPU devices" |
7 | depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 | 7 | depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 |
8 | help | 8 | help |
9 | Enable this if you want to use HSA features on AMD GPU devices. | 9 | Enable this if you want to use HSA features on AMD GPU devices. |
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 211fc48697fa..3d5ccb3755d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | |||
@@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, | |||
36 | /* Do not process in ISR, just request it to be forwarded to WQ. */ | 36 | /* Do not process in ISR, just request it to be forwarded to WQ. */ |
37 | return (pasid != 0) && | 37 | return (pasid != 0) && |
38 | (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || | 38 | (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || |
39 | ihre->source_id == CIK_INTSRC_SDMA_TRAP || | ||
39 | ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || | 40 | ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || |
40 | ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); | 41 | ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); |
41 | } | 42 | } |
@@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, | |||
46 | unsigned int pasid; | 47 | unsigned int pasid; |
47 | const struct cik_ih_ring_entry *ihre = | 48 | const struct cik_ih_ring_entry *ihre = |
48 | (const struct cik_ih_ring_entry *)ih_ring_entry; | 49 | (const struct cik_ih_ring_entry *)ih_ring_entry; |
50 | uint32_t context_id = ihre->data & 0xfffffff; | ||
49 | 51 | ||
50 | pasid = (ihre->ring_id & 0xffff0000) >> 16; | 52 | pasid = (ihre->ring_id & 0xffff0000) >> 16; |
51 | 53 | ||
@@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, | |||
53 | return; | 55 | return; |
54 | 56 | ||
55 | if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) | 57 | if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) |
56 | kfd_signal_event_interrupt(pasid, 0, 0); | 58 | kfd_signal_event_interrupt(pasid, context_id, 28); |
59 | else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) | ||
60 | kfd_signal_event_interrupt(pasid, context_id, 28); | ||
57 | else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) | 61 | else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) |
58 | kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); | 62 | kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); |
59 | else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) | 63 | else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) |
60 | kfd_signal_hw_exception_event(pasid); | 64 | kfd_signal_hw_exception_event(pasid); |
61 | } | 65 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index 79a16d24c1b8..109298b9d507 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h | |||
@@ -32,9 +32,10 @@ struct cik_ih_ring_entry { | |||
32 | uint32_t reserved; | 32 | uint32_t reserved; |
33 | }; | 33 | }; |
34 | 34 | ||
35 | #define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 | ||
36 | #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 | 35 | #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 |
37 | #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 | 36 | #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 |
37 | #define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 | ||
38 | #define CIK_INTSRC_SDMA_TRAP 0xE0 | ||
38 | #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF | 39 | #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF |
39 | 40 | ||
40 | #endif | 41 | #endif |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0ef82b229754..505d39156acd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | |||
@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, | |||
450 | return -EINVAL; | 450 | return -EINVAL; |
451 | } | 451 | } |
452 | 452 | ||
453 | mutex_lock(kfd_get_dbgmgr_mutex()); | ||
454 | mutex_lock(&p->mutex); | 453 | mutex_lock(&p->mutex); |
454 | mutex_lock(kfd_get_dbgmgr_mutex()); | ||
455 | 455 | ||
456 | /* | 456 | /* |
457 | * make sure that we have pdd, if this the first queue created for | 457 | * make sure that we have pdd, if this the first queue created for |
@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, | |||
479 | } | 479 | } |
480 | 480 | ||
481 | out: | 481 | out: |
482 | mutex_unlock(&p->mutex); | ||
483 | mutex_unlock(kfd_get_dbgmgr_mutex()); | 482 | mutex_unlock(kfd_get_dbgmgr_mutex()); |
483 | mutex_unlock(&p->mutex); | ||
484 | 484 | ||
485 | return status; | 485 | return status; |
486 | } | 486 | } |
@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, | |||
835 | void *data) | 835 | void *data) |
836 | { | 836 | { |
837 | struct kfd_ioctl_wait_events_args *args = data; | 837 | struct kfd_ioctl_wait_events_args *args = data; |
838 | enum kfd_event_wait_result wait_result; | ||
839 | int err; | 838 | int err; |
840 | 839 | ||
841 | err = kfd_wait_on_events(p, args->num_events, | 840 | err = kfd_wait_on_events(p, args->num_events, |
842 | (void __user *)args->events_ptr, | 841 | (void __user *)args->events_ptr, |
843 | (args->wait_for_all != 0), | 842 | (args->wait_for_all != 0), |
844 | args->timeout, &wait_result); | 843 | args->timeout, &args->wait_result); |
845 | |||
846 | args->wait_result = wait_result; | ||
847 | 844 | ||
848 | return err; | 845 | return err; |
849 | } | 846 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 46049f005b02..621a3b53a038 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c | |||
@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) | |||
403 | if (kfd->interrupts_active | 403 | if (kfd->interrupts_active |
404 | && interrupt_is_wanted(kfd, ih_ring_entry) | 404 | && interrupt_is_wanted(kfd, ih_ring_entry) |
405 | && enqueue_ih_ring_entry(kfd, ih_ring_entry)) | 405 | && enqueue_ih_ring_entry(kfd, ih_ring_entry)) |
406 | schedule_work(&kfd->interrupt_work); | 406 | queue_work(kfd->ih_wq, &kfd->interrupt_work); |
407 | 407 | ||
408 | spin_unlock(&kfd->interrupt_lock); | 408 | spin_unlock(&kfd->interrupt_lock); |
409 | } | 409 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index da3b74315acf..e202921c150e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | |||
@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) | |||
389 | if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { | 389 | if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { |
390 | retval = unmap_queues_cpsch(dqm, | 390 | retval = unmap_queues_cpsch(dqm, |
391 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); | 391 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); |
392 | if (retval != 0) { | 392 | if (retval) { |
393 | pr_err("unmap queue failed\n"); | 393 | pr_err("unmap queue failed\n"); |
394 | goto out_unlock; | 394 | goto out_unlock; |
395 | } | 395 | } |
396 | } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && | 396 | } else if (prev_active && |
397 | prev_active && | ||
398 | (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || | 397 | (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || |
399 | q->properties.type == KFD_QUEUE_TYPE_SDMA)) { | 398 | q->properties.type == KFD_QUEUE_TYPE_SDMA)) { |
400 | retval = mqd->destroy_mqd(mqd, q->mqd, | 399 | retval = mqd->destroy_mqd(mqd, q->mqd, |
@@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) | |||
408 | 407 | ||
409 | retval = mqd->update_mqd(mqd, q->mqd, &q->properties); | 408 | retval = mqd->update_mqd(mqd, q->mqd, &q->properties); |
410 | 409 | ||
411 | if (sched_policy != KFD_SCHED_POLICY_NO_HWS) | ||
412 | retval = map_queues_cpsch(dqm); | ||
413 | else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && | ||
414 | q->properties.is_active && | ||
415 | (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || | ||
416 | q->properties.type == KFD_QUEUE_TYPE_SDMA)) | ||
417 | retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, | ||
418 | &q->properties, q->process->mm); | ||
419 | |||
420 | /* | 410 | /* |
421 | * check active state vs. the previous state | 411 | * check active state vs. the previous state and modify |
422 | * and modify counter accordingly | 412 | * counter accordingly. map_queues_cpsch uses the |
413 | * dqm->queue_count to determine whether a new runlist must be | ||
414 | * uploaded. | ||
423 | */ | 415 | */ |
424 | if (q->properties.is_active && !prev_active) | 416 | if (q->properties.is_active && !prev_active) |
425 | dqm->queue_count++; | 417 | dqm->queue_count++; |
426 | else if (!q->properties.is_active && prev_active) | 418 | else if (!q->properties.is_active && prev_active) |
427 | dqm->queue_count--; | 419 | dqm->queue_count--; |
428 | 420 | ||
421 | if (sched_policy != KFD_SCHED_POLICY_NO_HWS) | ||
422 | retval = map_queues_cpsch(dqm); | ||
423 | else if (q->properties.is_active && | ||
424 | (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || | ||
425 | q->properties.type == KFD_QUEUE_TYPE_SDMA)) | ||
426 | retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, | ||
427 | &q->properties, q->process->mm); | ||
428 | |||
429 | out_unlock: | 429 | out_unlock: |
430 | mutex_unlock(&dqm->lock); | 430 | mutex_unlock(&dqm->lock); |
431 | return retval; | 431 | return retval; |
@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm, | |||
467 | mutex_lock(&dqm->lock); | 467 | mutex_lock(&dqm->lock); |
468 | list_add(&n->list, &dqm->queues); | 468 | list_add(&n->list, &dqm->queues); |
469 | 469 | ||
470 | retval = dqm->ops_asic_specific.register_process(dqm, qpd); | 470 | retval = dqm->asic_ops.update_qpd(dqm, qpd); |
471 | 471 | ||
472 | dqm->processes_count++; | 472 | dqm->processes_count++; |
473 | 473 | ||
@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, | |||
629 | pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); | 629 | pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); |
630 | pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); | 630 | pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); |
631 | 631 | ||
632 | dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); | 632 | dqm->asic_ops.init_sdma_vm(dqm, q, qpd); |
633 | retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, | 633 | retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, |
634 | &q->gart_mqd_addr, &q->properties); | 634 | &q->gart_mqd_addr, &q->properties); |
635 | if (retval) | 635 | if (retval) |
@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm) | |||
696 | 696 | ||
697 | static int initialize_cpsch(struct device_queue_manager *dqm) | 697 | static int initialize_cpsch(struct device_queue_manager *dqm) |
698 | { | 698 | { |
699 | int retval; | ||
700 | |||
701 | pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); | 699 | pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); |
702 | 700 | ||
703 | mutex_init(&dqm->lock); | 701 | mutex_init(&dqm->lock); |
@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) | |||
706 | dqm->sdma_queue_count = 0; | 704 | dqm->sdma_queue_count = 0; |
707 | dqm->active_runlist = false; | 705 | dqm->active_runlist = false; |
708 | dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; | 706 | dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; |
709 | retval = dqm->ops_asic_specific.initialize(dqm); | ||
710 | if (retval) | ||
711 | mutex_destroy(&dqm->lock); | ||
712 | 707 | ||
713 | return retval; | 708 | return 0; |
714 | } | 709 | } |
715 | 710 | ||
716 | static int start_cpsch(struct device_queue_manager *dqm) | 711 | static int start_cpsch(struct device_queue_manager *dqm) |
@@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, | |||
835 | 830 | ||
836 | if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { | 831 | if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { |
837 | retval = allocate_sdma_queue(dqm, &q->sdma_id); | 832 | retval = allocate_sdma_queue(dqm, &q->sdma_id); |
838 | if (retval != 0) | 833 | if (retval) |
839 | goto out; | 834 | goto out; |
840 | q->properties.sdma_queue_id = | 835 | q->properties.sdma_queue_id = |
841 | q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; | 836 | q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; |
@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, | |||
850 | goto out; | 845 | goto out; |
851 | } | 846 | } |
852 | 847 | ||
853 | dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); | 848 | dqm->asic_ops.init_sdma_vm(dqm, q, qpd); |
854 | retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, | 849 | retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, |
855 | &q->gart_mqd_addr, &q->properties); | 850 | &q->gart_mqd_addr, &q->properties); |
856 | if (retval) | 851 | if (retval) |
@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, | |||
1095 | qpd->sh_mem_ape1_limit = limit >> 16; | 1090 | qpd->sh_mem_ape1_limit = limit >> 16; |
1096 | } | 1091 | } |
1097 | 1092 | ||
1098 | retval = dqm->ops_asic_specific.set_cache_memory_policy( | 1093 | retval = dqm->asic_ops.set_cache_memory_policy( |
1099 | dqm, | 1094 | dqm, |
1100 | qpd, | 1095 | qpd, |
1101 | default_policy, | 1096 | default_policy, |
@@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) | |||
1270 | 1265 | ||
1271 | switch (dev->device_info->asic_family) { | 1266 | switch (dev->device_info->asic_family) { |
1272 | case CHIP_CARRIZO: | 1267 | case CHIP_CARRIZO: |
1273 | device_queue_manager_init_vi(&dqm->ops_asic_specific); | 1268 | device_queue_manager_init_vi(&dqm->asic_ops); |
1274 | break; | 1269 | break; |
1275 | 1270 | ||
1276 | case CHIP_KAVERI: | 1271 | case CHIP_KAVERI: |
1277 | device_queue_manager_init_cik(&dqm->ops_asic_specific); | 1272 | device_queue_manager_init_cik(&dqm->asic_ops); |
1278 | break; | 1273 | break; |
1279 | default: | 1274 | default: |
1280 | WARN(1, "Unexpected ASIC family %u", | 1275 | WARN(1, "Unexpected ASIC family %u", |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 31c2b1f9d320..5b77cb69f732 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | |||
@@ -128,9 +128,8 @@ struct device_queue_manager_ops { | |||
128 | }; | 128 | }; |
129 | 129 | ||
130 | struct device_queue_manager_asic_ops { | 130 | struct device_queue_manager_asic_ops { |
131 | int (*register_process)(struct device_queue_manager *dqm, | 131 | int (*update_qpd)(struct device_queue_manager *dqm, |
132 | struct qcm_process_device *qpd); | 132 | struct qcm_process_device *qpd); |
133 | int (*initialize)(struct device_queue_manager *dqm); | ||
134 | bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, | 133 | bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, |
135 | struct qcm_process_device *qpd, | 134 | struct qcm_process_device *qpd, |
136 | enum cache_policy default_policy, | 135 | enum cache_policy default_policy, |
@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops { | |||
156 | 155 | ||
157 | struct device_queue_manager { | 156 | struct device_queue_manager { |
158 | struct device_queue_manager_ops ops; | 157 | struct device_queue_manager_ops ops; |
159 | struct device_queue_manager_asic_ops ops_asic_specific; | 158 | struct device_queue_manager_asic_ops asic_ops; |
160 | 159 | ||
161 | struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; | 160 | struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; |
162 | struct packet_manager packets; | 161 | struct packet_manager packets; |
@@ -179,8 +178,10 @@ struct device_queue_manager { | |||
179 | bool active_runlist; | 178 | bool active_runlist; |
180 | }; | 179 | }; |
181 | 180 | ||
182 | void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); | 181 | void device_queue_manager_init_cik( |
183 | void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); | 182 | struct device_queue_manager_asic_ops *asic_ops); |
183 | void device_queue_manager_init_vi( | ||
184 | struct device_queue_manager_asic_ops *asic_ops); | ||
184 | void program_sh_mem_settings(struct device_queue_manager *dqm, | 185 | void program_sh_mem_settings(struct device_queue_manager *dqm, |
185 | struct qcm_process_device *qpd); | 186 | struct qcm_process_device *qpd); |
186 | unsigned int get_queues_num(struct device_queue_manager *dqm); | 187 | unsigned int get_queues_num(struct device_queue_manager *dqm); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 72c3cbabc0a7..28e48c90c596 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c | |||
@@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, | |||
32 | enum cache_policy alternate_policy, | 32 | enum cache_policy alternate_policy, |
33 | void __user *alternate_aperture_base, | 33 | void __user *alternate_aperture_base, |
34 | uint64_t alternate_aperture_size); | 34 | uint64_t alternate_aperture_size); |
35 | static int register_process_cik(struct device_queue_manager *dqm, | 35 | static int update_qpd_cik(struct device_queue_manager *dqm, |
36 | struct qcm_process_device *qpd); | 36 | struct qcm_process_device *qpd); |
37 | static int initialize_cpsch_cik(struct device_queue_manager *dqm); | ||
38 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | 37 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
39 | struct qcm_process_device *qpd); | 38 | struct qcm_process_device *qpd); |
40 | 39 | ||
41 | void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) | 40 | void device_queue_manager_init_cik( |
41 | struct device_queue_manager_asic_ops *asic_ops) | ||
42 | { | 42 | { |
43 | ops->set_cache_memory_policy = set_cache_memory_policy_cik; | 43 | asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; |
44 | ops->register_process = register_process_cik; | 44 | asic_ops->update_qpd = update_qpd_cik; |
45 | ops->initialize = initialize_cpsch_cik; | 45 | asic_ops->init_sdma_vm = init_sdma_vm; |
46 | ops->init_sdma_vm = init_sdma_vm; | ||
47 | } | 46 | } |
48 | 47 | ||
49 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) | 48 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) |
@@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, | |||
99 | return true; | 98 | return true; |
100 | } | 99 | } |
101 | 100 | ||
102 | static int register_process_cik(struct device_queue_manager *dqm, | 101 | static int update_qpd_cik(struct device_queue_manager *dqm, |
103 | struct qcm_process_device *qpd) | 102 | struct qcm_process_device *qpd) |
104 | { | 103 | { |
105 | struct kfd_process_device *pdd; | 104 | struct kfd_process_device *pdd; |
@@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | |||
148 | 147 | ||
149 | q->properties.sdma_vm_addr = value; | 148 | q->properties.sdma_vm_addr = value; |
150 | } | 149 | } |
151 | |||
152 | static int initialize_cpsch_cik(struct device_queue_manager *dqm) | ||
153 | { | ||
154 | return 0; | ||
155 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 40e9ddd096cd..2fbce57a2f21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c | |||
@@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, | |||
33 | enum cache_policy alternate_policy, | 33 | enum cache_policy alternate_policy, |
34 | void __user *alternate_aperture_base, | 34 | void __user *alternate_aperture_base, |
35 | uint64_t alternate_aperture_size); | 35 | uint64_t alternate_aperture_size); |
36 | static int register_process_vi(struct device_queue_manager *dqm, | 36 | static int update_qpd_vi(struct device_queue_manager *dqm, |
37 | struct qcm_process_device *qpd); | 37 | struct qcm_process_device *qpd); |
38 | static int initialize_cpsch_vi(struct device_queue_manager *dqm); | ||
39 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | 38 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
40 | struct qcm_process_device *qpd); | 39 | struct qcm_process_device *qpd); |
41 | 40 | ||
42 | void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) | 41 | void device_queue_manager_init_vi( |
42 | struct device_queue_manager_asic_ops *asic_ops) | ||
43 | { | 43 | { |
44 | ops->set_cache_memory_policy = set_cache_memory_policy_vi; | 44 | asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; |
45 | ops->register_process = register_process_vi; | 45 | asic_ops->update_qpd = update_qpd_vi; |
46 | ops->initialize = initialize_cpsch_vi; | 46 | asic_ops->init_sdma_vm = init_sdma_vm; |
47 | ops->init_sdma_vm = init_sdma_vm; | ||
48 | } | 47 | } |
49 | 48 | ||
50 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) | 49 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) |
@@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, | |||
104 | return true; | 103 | return true; |
105 | } | 104 | } |
106 | 105 | ||
107 | static int register_process_vi(struct device_queue_manager *dqm, | 106 | static int update_qpd_vi(struct device_queue_manager *dqm, |
108 | struct qcm_process_device *qpd) | 107 | struct qcm_process_device *qpd) |
109 | { | 108 | { |
110 | struct kfd_process_device *pdd; | 109 | struct kfd_process_device *pdd; |
@@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | |||
160 | 159 | ||
161 | q->properties.sdma_vm_addr = value; | 160 | q->properties.sdma_vm_addr = value; |
162 | } | 161 | } |
163 | |||
164 | static int initialize_cpsch_vi(struct device_queue_manager *dqm) | ||
165 | { | ||
166 | return 0; | ||
167 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 944abfad39c1..cb92d4b72400 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c | |||
@@ -24,8 +24,8 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include <linux/sched/signal.h> | 26 | #include <linux/sched/signal.h> |
27 | #include <linux/sched/mm.h> | ||
27 | #include <linux/uaccess.h> | 28 | #include <linux/uaccess.h> |
28 | #include <linux/mm.h> | ||
29 | #include <linux/mman.h> | 29 | #include <linux/mman.h> |
30 | #include <linux/memory.h> | 30 | #include <linux/memory.h> |
31 | #include "kfd_priv.h" | 31 | #include "kfd_priv.h" |
@@ -33,185 +33,89 @@ | |||
33 | #include <linux/device.h> | 33 | #include <linux/device.h> |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * A task can only be on a single wait_queue at a time, but we need to support | 36 | * Wrapper around wait_queue_entry_t |
37 | * waiting on multiple events (any/all). | ||
38 | * Instead of each event simply having a wait_queue with sleeping tasks, it | ||
39 | * has a singly-linked list of tasks. | ||
40 | * A thread that wants to sleep creates an array of these, one for each event | ||
41 | * and adds one to each event's waiter chain. | ||
42 | */ | 37 | */ |
43 | struct kfd_event_waiter { | 38 | struct kfd_event_waiter { |
44 | struct list_head waiters; | 39 | wait_queue_entry_t wait; |
45 | struct task_struct *sleeping_task; | 40 | struct kfd_event *event; /* Event to wait for */ |
46 | 41 | bool activated; /* Becomes true when event is signaled */ | |
47 | /* Transitions to true when the event this belongs to is signaled. */ | ||
48 | bool activated; | ||
49 | |||
50 | /* Event */ | ||
51 | struct kfd_event *event; | ||
52 | uint32_t input_index; | ||
53 | }; | 42 | }; |
54 | 43 | ||
55 | /* | 44 | /* |
56 | * Over-complicated pooled allocator for event notification slots. | ||
57 | * | ||
58 | * Each signal event needs a 64-bit signal slot where the signaler will write | 45 | * Each signal event needs a 64-bit signal slot where the signaler will write |
59 | * a 1 before sending an interrupt.l (This is needed because some interrupts | 46 | * a 1 before sending an interrupt. (This is needed because some interrupts |
60 | * do not contain enough spare data bits to identify an event.) | 47 | * do not contain enough spare data bits to identify an event.) |
61 | * We get whole pages from vmalloc and map them to the process VA. | 48 | * We get whole pages and map them to the process VA. |
62 | * Individual signal events are then allocated a slot in a page. | 49 | * Individual signal events use their event_id as slot index. |
63 | */ | 50 | */ |
64 | 51 | struct kfd_signal_page { | |
65 | struct signal_page { | ||
66 | struct list_head event_pages; /* kfd_process.signal_event_pages */ | ||
67 | uint64_t *kernel_address; | 52 | uint64_t *kernel_address; |
68 | uint64_t __user *user_address; | 53 | uint64_t __user *user_address; |
69 | uint32_t page_index; /* Index into the mmap aperture. */ | ||
70 | unsigned int free_slots; | ||
71 | unsigned long used_slot_bitmap[0]; | ||
72 | }; | 54 | }; |
73 | 55 | ||
74 | #define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT | ||
75 | #define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) | ||
76 | #define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) | ||
77 | #define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ | ||
78 | SLOT_BITMAP_SIZE * sizeof(long)) | ||
79 | |||
80 | /* | ||
81 | * For signal events, the event ID is used as the interrupt user data. | ||
82 | * For SQ s_sendmsg interrupts, this is limited to 8 bits. | ||
83 | */ | ||
84 | |||
85 | #define INTERRUPT_DATA_BITS 8 | ||
86 | #define SIGNAL_EVENT_ID_SLOT_SHIFT 0 | ||
87 | 56 | ||
88 | static uint64_t *page_slots(struct signal_page *page) | 57 | static uint64_t *page_slots(struct kfd_signal_page *page) |
89 | { | 58 | { |
90 | return page->kernel_address; | 59 | return page->kernel_address; |
91 | } | 60 | } |
92 | 61 | ||
93 | static bool allocate_free_slot(struct kfd_process *process, | 62 | static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) |
94 | struct signal_page **out_page, | ||
95 | unsigned int *out_slot_index) | ||
96 | { | ||
97 | struct signal_page *page; | ||
98 | |||
99 | list_for_each_entry(page, &process->signal_event_pages, event_pages) { | ||
100 | if (page->free_slots > 0) { | ||
101 | unsigned int slot = | ||
102 | find_first_zero_bit(page->used_slot_bitmap, | ||
103 | SLOTS_PER_PAGE); | ||
104 | |||
105 | __set_bit(slot, page->used_slot_bitmap); | ||
106 | page->free_slots--; | ||
107 | |||
108 | page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; | ||
109 | |||
110 | *out_page = page; | ||
111 | *out_slot_index = slot; | ||
112 | |||
113 | pr_debug("Allocated event signal slot in page %p, slot %d\n", | ||
114 | page, slot); | ||
115 | |||
116 | return true; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | pr_debug("No free event signal slots were found for process %p\n", | ||
121 | process); | ||
122 | |||
123 | return false; | ||
124 | } | ||
125 | |||
126 | #define list_tail_entry(head, type, member) \ | ||
127 | list_entry((head)->prev, type, member) | ||
128 | |||
129 | static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) | ||
130 | { | 63 | { |
131 | void *backing_store; | 64 | void *backing_store; |
132 | struct signal_page *page; | 65 | struct kfd_signal_page *page; |
133 | 66 | ||
134 | page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); | 67 | page = kzalloc(sizeof(*page), GFP_KERNEL); |
135 | if (!page) | 68 | if (!page) |
136 | goto fail_alloc_signal_page; | 69 | return NULL; |
137 | 70 | ||
138 | page->free_slots = SLOTS_PER_PAGE; | 71 | backing_store = (void *) __get_free_pages(GFP_KERNEL, |
139 | |||
140 | backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
141 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); | 72 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); |
142 | if (!backing_store) | 73 | if (!backing_store) |
143 | goto fail_alloc_signal_store; | 74 | goto fail_alloc_signal_store; |
144 | 75 | ||
145 | /* prevent user-mode info leaks */ | 76 | /* Initialize all events to unsignaled */ |
146 | memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, | 77 | memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, |
147 | KFD_SIGNAL_EVENT_LIMIT * 8); | 78 | KFD_SIGNAL_EVENT_LIMIT * 8); |
148 | 79 | ||
149 | page->kernel_address = backing_store; | 80 | page->kernel_address = backing_store; |
150 | |||
151 | if (list_empty(&p->signal_event_pages)) | ||
152 | page->page_index = 0; | ||
153 | else | ||
154 | page->page_index = list_tail_entry(&p->signal_event_pages, | ||
155 | struct signal_page, | ||
156 | event_pages)->page_index + 1; | ||
157 | |||
158 | pr_debug("Allocated new event signal page at %p, for process %p\n", | 81 | pr_debug("Allocated new event signal page at %p, for process %p\n", |
159 | page, p); | 82 | page, p); |
160 | pr_debug("Page index is %d\n", page->page_index); | ||
161 | 83 | ||
162 | list_add(&page->event_pages, &p->signal_event_pages); | 84 | return page; |
163 | |||
164 | return true; | ||
165 | 85 | ||
166 | fail_alloc_signal_store: | 86 | fail_alloc_signal_store: |
167 | kfree(page); | 87 | kfree(page); |
168 | fail_alloc_signal_page: | 88 | return NULL; |
169 | return false; | ||
170 | } | 89 | } |
171 | 90 | ||
172 | static bool allocate_event_notification_slot(struct file *devkfd, | 91 | static int allocate_event_notification_slot(struct kfd_process *p, |
173 | struct kfd_process *p, | 92 | struct kfd_event *ev) |
174 | struct signal_page **page, | ||
175 | unsigned int *signal_slot_index) | ||
176 | { | 93 | { |
177 | bool ret; | 94 | int id; |
178 | 95 | ||
179 | ret = allocate_free_slot(p, page, signal_slot_index); | 96 | if (!p->signal_page) { |
180 | if (!ret) { | 97 | p->signal_page = allocate_signal_page(p); |
181 | ret = allocate_signal_page(devkfd, p); | 98 | if (!p->signal_page) |
182 | if (ret) | 99 | return -ENOMEM; |
183 | ret = allocate_free_slot(p, page, signal_slot_index); | 100 | /* Oldest user mode expects 256 event slots */ |
101 | p->signal_mapped_size = 256*8; | ||
184 | } | 102 | } |
185 | 103 | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | /* Assumes that the process's event_mutex is locked. */ | ||
190 | static void release_event_notification_slot(struct signal_page *page, | ||
191 | size_t slot_index) | ||
192 | { | ||
193 | __clear_bit(slot_index, page->used_slot_bitmap); | ||
194 | page->free_slots++; | ||
195 | |||
196 | /* We don't free signal pages, they are retained by the process | ||
197 | * and reused until it exits. | ||
198 | */ | ||
199 | } | ||
200 | |||
201 | static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, | ||
202 | unsigned int page_index) | ||
203 | { | ||
204 | struct signal_page *page; | ||
205 | |||
206 | /* | 104 | /* |
207 | * This is safe because we don't delete signal pages until the | 105 | * Compatibility with old user mode: Only use signal slots |
208 | * process exits. | 106 | * user mode has mapped, may be less than |
107 | * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase | ||
108 | * of the event limit without breaking user mode. | ||
209 | */ | 109 | */ |
210 | list_for_each_entry(page, &p->signal_event_pages, event_pages) | 110 | id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, |
211 | if (page->page_index == page_index) | 111 | GFP_KERNEL); |
212 | return page; | 112 | if (id < 0) |
113 | return id; | ||
213 | 114 | ||
214 | return NULL; | 115 | ev->event_id = id; |
116 | page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; | ||
117 | |||
118 | return 0; | ||
215 | } | 119 | } |
216 | 120 | ||
217 | /* | 121 | /* |
@@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, | |||
220 | */ | 124 | */ |
221 | static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) | 125 | static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) |
222 | { | 126 | { |
223 | struct kfd_event *ev; | 127 | return idr_find(&p->event_idr, id); |
224 | |||
225 | hash_for_each_possible(p->events, ev, events, id) | ||
226 | if (ev->event_id == id) | ||
227 | return ev; | ||
228 | |||
229 | return NULL; | ||
230 | } | 128 | } |
231 | 129 | ||
232 | static u32 make_signal_event_id(struct signal_page *page, | 130 | /** |
233 | unsigned int signal_slot_index) | 131 | * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID |
234 | { | 132 | * @p: Pointer to struct kfd_process |
235 | return page->page_index | | 133 | * @id: ID to look up |
236 | (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); | 134 | * @bits: Number of valid bits in @id |
237 | } | 135 | * |
238 | 136 | * Finds the first signaled event with a matching partial ID. If no | |
239 | /* | 137 | * matching signaled event is found, returns NULL. In that case the |
240 | * Produce a kfd event id for a nonsignal event. | 138 | * caller should assume that the partial ID is invalid and do an |
241 | * These are arbitrary numbers, so we do a sequential search through | 139 | * exhaustive search of all siglaned events. |
242 | * the hash table for an unused number. | 140 | * |
141 | * If multiple events with the same partial ID signal at the same | ||
142 | * time, they will be found one interrupt at a time, not necessarily | ||
143 | * in the same order the interrupts occurred. As long as the number of | ||
144 | * interrupts is correct, all signaled events will be seen by the | ||
145 | * driver. | ||
243 | */ | 146 | */ |
244 | static u32 make_nonsignal_event_id(struct kfd_process *p) | 147 | static struct kfd_event *lookup_signaled_event_by_partial_id( |
148 | struct kfd_process *p, uint32_t id, uint32_t bits) | ||
245 | { | 149 | { |
246 | u32 id; | 150 | struct kfd_event *ev; |
247 | |||
248 | for (id = p->next_nonsignal_event_id; | ||
249 | id < KFD_LAST_NONSIGNAL_EVENT_ID && | ||
250 | lookup_event_by_id(p, id); | ||
251 | id++) | ||
252 | ; | ||
253 | 151 | ||
254 | if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { | 152 | if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) |
153 | return NULL; | ||
255 | 154 | ||
256 | /* | 155 | /* Fast path for the common case that @id is not a partial ID |
257 | * What if id == LAST_NONSIGNAL_EVENT_ID - 1? | 156 | * and we only need a single lookup. |
258 | * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so | 157 | */ |
259 | * the first loop fails immediately and we proceed with the | 158 | if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { |
260 | * wraparound loop below. | 159 | if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) |
261 | */ | 160 | return NULL; |
262 | p->next_nonsignal_event_id = id + 1; | ||
263 | 161 | ||
264 | return id; | 162 | return idr_find(&p->event_idr, id); |
265 | } | 163 | } |
266 | 164 | ||
267 | for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; | 165 | /* General case for partial IDs: Iterate over all matching IDs |
268 | id < KFD_LAST_NONSIGNAL_EVENT_ID && | 166 | * and find the first one that has signaled. |
269 | lookup_event_by_id(p, id); | 167 | */ |
270 | id++) | 168 | for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { |
271 | ; | 169 | if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) |
272 | 170 | continue; | |
273 | 171 | ||
274 | if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { | 172 | ev = idr_find(&p->event_idr, id); |
275 | p->next_nonsignal_event_id = id + 1; | ||
276 | return id; | ||
277 | } | 173 | } |
278 | 174 | ||
279 | p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; | 175 | return ev; |
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, | ||
284 | struct signal_page *page, | ||
285 | unsigned int signal_slot) | ||
286 | { | ||
287 | return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); | ||
288 | } | 176 | } |
289 | 177 | ||
290 | static int create_signal_event(struct file *devkfd, | 178 | static int create_signal_event(struct file *devkfd, |
291 | struct kfd_process *p, | 179 | struct kfd_process *p, |
292 | struct kfd_event *ev) | 180 | struct kfd_event *ev) |
293 | { | 181 | { |
294 | if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { | 182 | int ret; |
183 | |||
184 | if (p->signal_mapped_size && | ||
185 | p->signal_event_count == p->signal_mapped_size / 8) { | ||
295 | if (!p->signal_event_limit_reached) { | 186 | if (!p->signal_event_limit_reached) { |
296 | pr_warn("Signal event wasn't created because limit was reached\n"); | 187 | pr_warn("Signal event wasn't created because limit was reached\n"); |
297 | p->signal_event_limit_reached = true; | 188 | p->signal_event_limit_reached = true; |
298 | } | 189 | } |
299 | return -ENOMEM; | 190 | return -ENOSPC; |
300 | } | 191 | } |
301 | 192 | ||
302 | if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, | 193 | ret = allocate_event_notification_slot(p, ev); |
303 | &ev->signal_slot_index)) { | 194 | if (ret) { |
304 | pr_warn("Signal event wasn't created because out of kernel memory\n"); | 195 | pr_warn("Signal event wasn't created because out of kernel memory\n"); |
305 | return -ENOMEM; | 196 | return ret; |
306 | } | 197 | } |
307 | 198 | ||
308 | p->signal_event_count++; | 199 | p->signal_event_count++; |
309 | 200 | ||
310 | ev->user_signal_address = | 201 | ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; |
311 | &ev->signal_page->user_address[ev->signal_slot_index]; | ||
312 | |||
313 | ev->event_id = make_signal_event_id(ev->signal_page, | ||
314 | ev->signal_slot_index); | ||
315 | |||
316 | pr_debug("Signal event number %zu created with id %d, address %p\n", | 202 | pr_debug("Signal event number %zu created with id %d, address %p\n", |
317 | p->signal_event_count, ev->event_id, | 203 | p->signal_event_count, ev->event_id, |
318 | ev->user_signal_address); | 204 | ev->user_signal_address); |
@@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd, | |||
320 | return 0; | 206 | return 0; |
321 | } | 207 | } |
322 | 208 | ||
323 | /* | ||
324 | * No non-signal events are supported yet. | ||
325 | * We create them as events that never signal. | ||
326 | * Set event calls from user-mode are failed. | ||
327 | */ | ||
328 | static int create_other_event(struct kfd_process *p, struct kfd_event *ev) | 209 | static int create_other_event(struct kfd_process *p, struct kfd_event *ev) |
329 | { | 210 | { |
330 | ev->event_id = make_nonsignal_event_id(p); | 211 | /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an |
331 | if (ev->event_id == 0) | 212 | * intentional integer overflow to -1 without a compiler |
332 | return -ENOMEM; | 213 | * warning. idr_alloc treats a negative value as "maximum |
214 | * signed integer". | ||
215 | */ | ||
216 | int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, | ||
217 | (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, | ||
218 | GFP_KERNEL); | ||
219 | |||
220 | if (id < 0) | ||
221 | return id; | ||
222 | ev->event_id = id; | ||
333 | 223 | ||
334 | return 0; | 224 | return 0; |
335 | } | 225 | } |
@@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) | |||
337 | void kfd_event_init_process(struct kfd_process *p) | 227 | void kfd_event_init_process(struct kfd_process *p) |
338 | { | 228 | { |
339 | mutex_init(&p->event_mutex); | 229 | mutex_init(&p->event_mutex); |
340 | hash_init(p->events); | 230 | idr_init(&p->event_idr); |
341 | INIT_LIST_HEAD(&p->signal_event_pages); | 231 | p->signal_page = NULL; |
342 | p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; | ||
343 | p->signal_event_count = 0; | 232 | p->signal_event_count = 0; |
344 | } | 233 | } |
345 | 234 | ||
346 | static void destroy_event(struct kfd_process *p, struct kfd_event *ev) | 235 | static void destroy_event(struct kfd_process *p, struct kfd_event *ev) |
347 | { | 236 | { |
348 | if (ev->signal_page) { | 237 | struct kfd_event_waiter *waiter; |
349 | release_event_notification_slot(ev->signal_page, | ||
350 | ev->signal_slot_index); | ||
351 | p->signal_event_count--; | ||
352 | } | ||
353 | 238 | ||
354 | /* | 239 | /* Wake up pending waiters. They will return failure */ |
355 | * Abandon the list of waiters. Individual waiting threads will | 240 | list_for_each_entry(waiter, &ev->wq.head, wait.entry) |
356 | * clean up their own data. | 241 | waiter->event = NULL; |
357 | */ | 242 | wake_up_all(&ev->wq); |
358 | list_del(&ev->waiters); | 243 | |
244 | if (ev->type == KFD_EVENT_TYPE_SIGNAL || | ||
245 | ev->type == KFD_EVENT_TYPE_DEBUG) | ||
246 | p->signal_event_count--; | ||
359 | 247 | ||
360 | hash_del(&ev->events); | 248 | idr_remove(&p->event_idr, ev->event_id); |
361 | kfree(ev); | 249 | kfree(ev); |
362 | } | 250 | } |
363 | 251 | ||
364 | static void destroy_events(struct kfd_process *p) | 252 | static void destroy_events(struct kfd_process *p) |
365 | { | 253 | { |
366 | struct kfd_event *ev; | 254 | struct kfd_event *ev; |
367 | struct hlist_node *tmp; | 255 | uint32_t id; |
368 | unsigned int hash_bkt; | ||
369 | 256 | ||
370 | hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) | 257 | idr_for_each_entry(&p->event_idr, ev, id) |
371 | destroy_event(p, ev); | 258 | destroy_event(p, ev); |
259 | idr_destroy(&p->event_idr); | ||
372 | } | 260 | } |
373 | 261 | ||
374 | /* | 262 | /* |
375 | * We assume that the process is being destroyed and there is no need to | 263 | * We assume that the process is being destroyed and there is no need to |
376 | * unmap the pages or keep bookkeeping data in order. | 264 | * unmap the pages or keep bookkeeping data in order. |
377 | */ | 265 | */ |
378 | static void shutdown_signal_pages(struct kfd_process *p) | 266 | static void shutdown_signal_page(struct kfd_process *p) |
379 | { | 267 | { |
380 | struct signal_page *page, *tmp; | 268 | struct kfd_signal_page *page = p->signal_page; |
381 | 269 | ||
382 | list_for_each_entry_safe(page, tmp, &p->signal_event_pages, | 270 | if (page) { |
383 | event_pages) { | ||
384 | free_pages((unsigned long)page->kernel_address, | 271 | free_pages((unsigned long)page->kernel_address, |
385 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); | 272 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); |
386 | kfree(page); | 273 | kfree(page); |
@@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p) | |||
390 | void kfd_event_free_process(struct kfd_process *p) | 277 | void kfd_event_free_process(struct kfd_process *p) |
391 | { | 278 | { |
392 | destroy_events(p); | 279 | destroy_events(p); |
393 | shutdown_signal_pages(p); | 280 | shutdown_signal_page(p); |
394 | } | 281 | } |
395 | 282 | ||
396 | static bool event_can_be_gpu_signaled(const struct kfd_event *ev) | 283 | static bool event_can_be_gpu_signaled(const struct kfd_event *ev) |
@@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, | |||
419 | ev->auto_reset = auto_reset; | 306 | ev->auto_reset = auto_reset; |
420 | ev->signaled = false; | 307 | ev->signaled = false; |
421 | 308 | ||
422 | INIT_LIST_HEAD(&ev->waiters); | 309 | init_waitqueue_head(&ev->wq); |
423 | 310 | ||
424 | *event_page_offset = 0; | 311 | *event_page_offset = 0; |
425 | 312 | ||
@@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, | |||
430 | case KFD_EVENT_TYPE_DEBUG: | 317 | case KFD_EVENT_TYPE_DEBUG: |
431 | ret = create_signal_event(devkfd, p, ev); | 318 | ret = create_signal_event(devkfd, p, ev); |
432 | if (!ret) { | 319 | if (!ret) { |
433 | *event_page_offset = (ev->signal_page->page_index | | 320 | *event_page_offset = KFD_MMAP_EVENTS_MASK; |
434 | KFD_MMAP_EVENTS_MASK); | ||
435 | *event_page_offset <<= PAGE_SHIFT; | 321 | *event_page_offset <<= PAGE_SHIFT; |
436 | *event_slot_index = ev->signal_slot_index; | 322 | *event_slot_index = ev->event_id; |
437 | } | 323 | } |
438 | break; | 324 | break; |
439 | default: | 325 | default: |
@@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, | |||
442 | } | 328 | } |
443 | 329 | ||
444 | if (!ret) { | 330 | if (!ret) { |
445 | hash_add(p->events, &ev->events, ev->event_id); | ||
446 | |||
447 | *event_id = ev->event_id; | 331 | *event_id = ev->event_id; |
448 | *event_trigger_data = ev->event_id; | 332 | *event_trigger_data = ev->event_id; |
449 | } else { | 333 | } else { |
@@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) | |||
477 | static void set_event(struct kfd_event *ev) | 361 | static void set_event(struct kfd_event *ev) |
478 | { | 362 | { |
479 | struct kfd_event_waiter *waiter; | 363 | struct kfd_event_waiter *waiter; |
480 | struct kfd_event_waiter *next; | ||
481 | 364 | ||
482 | /* Auto reset if the list is non-empty and we're waking someone. */ | 365 | /* Auto reset if the list is non-empty and we're waking |
483 | ev->signaled = !ev->auto_reset || list_empty(&ev->waiters); | 366 | * someone. waitqueue_active is safe here because we're |
367 | * protected by the p->event_mutex, which is also held when | ||
368 | * updating the wait queues in kfd_wait_on_events. | ||
369 | */ | ||
370 | ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); | ||
484 | 371 | ||
485 | list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) { | 372 | list_for_each_entry(waiter, &ev->wq.head, wait.entry) |
486 | waiter->activated = true; | 373 | waiter->activated = true; |
487 | 374 | ||
488 | /* _init because free_waiters will call list_del */ | 375 | wake_up_all(&ev->wq); |
489 | list_del_init(&waiter->waiters); | ||
490 | |||
491 | wake_up_process(waiter->sleeping_task); | ||
492 | } | ||
493 | } | 376 | } |
494 | 377 | ||
495 | /* Assumes that p is current. */ | 378 | /* Assumes that p is current. */ |
@@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) | |||
538 | 421 | ||
539 | static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) | 422 | static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) |
540 | { | 423 | { |
541 | page_slots(ev->signal_page)[ev->signal_slot_index] = | 424 | page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; |
542 | UNSIGNALED_EVENT_SLOT; | ||
543 | } | ||
544 | |||
545 | static bool is_slot_signaled(struct signal_page *page, unsigned int index) | ||
546 | { | ||
547 | return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; | ||
548 | } | 425 | } |
549 | 426 | ||
550 | static void set_event_from_interrupt(struct kfd_process *p, | 427 | static void set_event_from_interrupt(struct kfd_process *p, |
@@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p, | |||
559 | void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, | 436 | void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, |
560 | uint32_t valid_id_bits) | 437 | uint32_t valid_id_bits) |
561 | { | 438 | { |
562 | struct kfd_event *ev; | 439 | struct kfd_event *ev = NULL; |
563 | 440 | ||
564 | /* | 441 | /* |
565 | * Because we are called from arbitrary context (workqueue) as opposed | 442 | * Because we are called from arbitrary context (workqueue) as opposed |
@@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, | |||
573 | 450 | ||
574 | mutex_lock(&p->event_mutex); | 451 | mutex_lock(&p->event_mutex); |
575 | 452 | ||
576 | if (valid_id_bits >= INTERRUPT_DATA_BITS) { | 453 | if (valid_id_bits) |
577 | /* Partial ID is a full ID. */ | 454 | ev = lookup_signaled_event_by_partial_id(p, partial_id, |
578 | ev = lookup_event_by_id(p, partial_id); | 455 | valid_id_bits); |
456 | if (ev) { | ||
579 | set_event_from_interrupt(p, ev); | 457 | set_event_from_interrupt(p, ev); |
580 | } else { | 458 | } else if (p->signal_page) { |
581 | /* | 459 | /* |
582 | * Partial ID is in fact partial. For now we completely | 460 | * Partial ID lookup failed. Assume that the event ID |
583 | * ignore it, but we could use any bits we did receive to | 461 | * in the interrupt payload was invalid and do an |
584 | * search faster. | 462 | * exhaustive search of signaled events. |
585 | */ | 463 | */ |
586 | struct signal_page *page; | 464 | uint64_t *slots = page_slots(p->signal_page); |
587 | unsigned int i; | 465 | uint32_t id; |
588 | 466 | ||
589 | list_for_each_entry(page, &p->signal_event_pages, event_pages) | 467 | if (valid_id_bits) |
590 | for (i = 0; i < SLOTS_PER_PAGE; i++) | 468 | pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", |
591 | if (is_slot_signaled(page, i)) { | 469 | partial_id, valid_id_bits); |
592 | ev = lookup_event_by_page_slot(p, | 470 | |
593 | page, i); | 471 | if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { |
472 | /* With relatively few events, it's faster to | ||
473 | * iterate over the event IDR | ||
474 | */ | ||
475 | idr_for_each_entry(&p->event_idr, ev, id) { | ||
476 | if (id >= KFD_SIGNAL_EVENT_LIMIT) | ||
477 | break; | ||
478 | |||
479 | if (slots[id] != UNSIGNALED_EVENT_SLOT) | ||
480 | set_event_from_interrupt(p, ev); | ||
481 | } | ||
482 | } else { | ||
483 | /* With relatively many events, it's faster to | ||
484 | * iterate over the signal slots and lookup | ||
485 | * only signaled events from the IDR. | ||
486 | */ | ||
487 | for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) | ||
488 | if (slots[id] != UNSIGNALED_EVENT_SLOT) { | ||
489 | ev = lookup_event_by_id(p, id); | ||
594 | set_event_from_interrupt(p, ev); | 490 | set_event_from_interrupt(p, ev); |
595 | } | 491 | } |
492 | } | ||
596 | } | 493 | } |
597 | 494 | ||
598 | mutex_unlock(&p->event_mutex); | 495 | mutex_unlock(&p->event_mutex); |
@@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) | |||
609 | GFP_KERNEL); | 506 | GFP_KERNEL); |
610 | 507 | ||
611 | for (i = 0; (event_waiters) && (i < num_events) ; i++) { | 508 | for (i = 0; (event_waiters) && (i < num_events) ; i++) { |
612 | INIT_LIST_HEAD(&event_waiters[i].waiters); | 509 | init_wait(&event_waiters[i].wait); |
613 | event_waiters[i].sleeping_task = current; | ||
614 | event_waiters[i].activated = false; | 510 | event_waiters[i].activated = false; |
615 | } | 511 | } |
616 | 512 | ||
617 | return event_waiters; | 513 | return event_waiters; |
618 | } | 514 | } |
619 | 515 | ||
620 | static int init_event_waiter(struct kfd_process *p, | 516 | static int init_event_waiter_get_status(struct kfd_process *p, |
621 | struct kfd_event_waiter *waiter, | 517 | struct kfd_event_waiter *waiter, |
622 | uint32_t event_id, | 518 | uint32_t event_id) |
623 | uint32_t input_index) | ||
624 | { | 519 | { |
625 | struct kfd_event *ev = lookup_event_by_id(p, event_id); | 520 | struct kfd_event *ev = lookup_event_by_id(p, event_id); |
626 | 521 | ||
@@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p, | |||
628 | return -EINVAL; | 523 | return -EINVAL; |
629 | 524 | ||
630 | waiter->event = ev; | 525 | waiter->event = ev; |
631 | waiter->input_index = input_index; | ||
632 | waiter->activated = ev->signaled; | 526 | waiter->activated = ev->signaled; |
633 | ev->signaled = ev->signaled && !ev->auto_reset; | 527 | ev->signaled = ev->signaled && !ev->auto_reset; |
634 | 528 | ||
635 | list_add(&waiter->waiters, &ev->waiters); | ||
636 | |||
637 | return 0; | 529 | return 0; |
638 | } | 530 | } |
639 | 531 | ||
640 | static bool test_event_condition(bool all, uint32_t num_events, | 532 | static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) |
533 | { | ||
534 | struct kfd_event *ev = waiter->event; | ||
535 | |||
536 | /* Only add to the wait list if we actually need to | ||
537 | * wait on this event. | ||
538 | */ | ||
539 | if (!waiter->activated) | ||
540 | add_wait_queue(&ev->wq, &waiter->wait); | ||
541 | } | ||
542 | |||
543 | /* test_event_condition - Test condition of events being waited for | ||
544 | * @all: Return completion only if all events have signaled | ||
545 | * @num_events: Number of events to wait for | ||
546 | * @event_waiters: Array of event waiters, one per event | ||
547 | * | ||
548 | * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have | ||
549 | * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all) | ||
550 | * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of | ||
551 | * the events have been destroyed. | ||
552 | */ | ||
553 | static uint32_t test_event_condition(bool all, uint32_t num_events, | ||
641 | struct kfd_event_waiter *event_waiters) | 554 | struct kfd_event_waiter *event_waiters) |
642 | { | 555 | { |
643 | uint32_t i; | 556 | uint32_t i; |
644 | uint32_t activated_count = 0; | 557 | uint32_t activated_count = 0; |
645 | 558 | ||
646 | for (i = 0; i < num_events; i++) { | 559 | for (i = 0; i < num_events; i++) { |
560 | if (!event_waiters[i].event) | ||
561 | return KFD_IOC_WAIT_RESULT_FAIL; | ||
562 | |||
647 | if (event_waiters[i].activated) { | 563 | if (event_waiters[i].activated) { |
648 | if (!all) | 564 | if (!all) |
649 | return true; | 565 | return KFD_IOC_WAIT_RESULT_COMPLETE; |
650 | 566 | ||
651 | activated_count++; | 567 | activated_count++; |
652 | } | 568 | } |
653 | } | 569 | } |
654 | 570 | ||
655 | return activated_count == num_events; | 571 | return activated_count == num_events ? |
572 | KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT; | ||
656 | } | 573 | } |
657 | 574 | ||
658 | /* | 575 | /* |
659 | * Copy event specific data, if defined. | 576 | * Copy event specific data, if defined. |
660 | * Currently only memory exception events have additional data to copy to user | 577 | * Currently only memory exception events have additional data to copy to user |
661 | */ | 578 | */ |
662 | static bool copy_signaled_event_data(uint32_t num_events, | 579 | static int copy_signaled_event_data(uint32_t num_events, |
663 | struct kfd_event_waiter *event_waiters, | 580 | struct kfd_event_waiter *event_waiters, |
664 | struct kfd_event_data __user *data) | 581 | struct kfd_event_data __user *data) |
665 | { | 582 | { |
@@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events, | |||
673 | waiter = &event_waiters[i]; | 590 | waiter = &event_waiters[i]; |
674 | event = waiter->event; | 591 | event = waiter->event; |
675 | if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { | 592 | if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { |
676 | dst = &data[waiter->input_index].memory_exception_data; | 593 | dst = &data[i].memory_exception_data; |
677 | src = &event->memory_exception_data; | 594 | src = &event->memory_exception_data; |
678 | if (copy_to_user(dst, src, | 595 | if (copy_to_user(dst, src, |
679 | sizeof(struct kfd_hsa_memory_exception_data))) | 596 | sizeof(struct kfd_hsa_memory_exception_data))) |
680 | return false; | 597 | return -EFAULT; |
681 | } | 598 | } |
682 | } | 599 | } |
683 | 600 | ||
684 | return true; | 601 | return 0; |
685 | 602 | ||
686 | } | 603 | } |
687 | 604 | ||
@@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) | |||
710 | uint32_t i; | 627 | uint32_t i; |
711 | 628 | ||
712 | for (i = 0; i < num_events; i++) | 629 | for (i = 0; i < num_events; i++) |
713 | list_del(&waiters[i].waiters); | 630 | if (waiters[i].event) |
631 | remove_wait_queue(&waiters[i].event->wq, | ||
632 | &waiters[i].wait); | ||
714 | 633 | ||
715 | kfree(waiters); | 634 | kfree(waiters); |
716 | } | 635 | } |
@@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) | |||
718 | int kfd_wait_on_events(struct kfd_process *p, | 637 | int kfd_wait_on_events(struct kfd_process *p, |
719 | uint32_t num_events, void __user *data, | 638 | uint32_t num_events, void __user *data, |
720 | bool all, uint32_t user_timeout_ms, | 639 | bool all, uint32_t user_timeout_ms, |
721 | enum kfd_event_wait_result *wait_result) | 640 | uint32_t *wait_result) |
722 | { | 641 | { |
723 | struct kfd_event_data __user *events = | 642 | struct kfd_event_data __user *events = |
724 | (struct kfd_event_data __user *) data; | 643 | (struct kfd_event_data __user *) data; |
725 | uint32_t i; | 644 | uint32_t i; |
726 | int ret = 0; | 645 | int ret = 0; |
646 | |||
727 | struct kfd_event_waiter *event_waiters = NULL; | 647 | struct kfd_event_waiter *event_waiters = NULL; |
728 | long timeout = user_timeout_to_jiffies(user_timeout_ms); | 648 | long timeout = user_timeout_to_jiffies(user_timeout_ms); |
729 | 649 | ||
730 | mutex_lock(&p->event_mutex); | ||
731 | |||
732 | event_waiters = alloc_event_waiters(num_events); | 650 | event_waiters = alloc_event_waiters(num_events); |
733 | if (!event_waiters) { | 651 | if (!event_waiters) { |
734 | ret = -ENOMEM; | 652 | ret = -ENOMEM; |
735 | goto fail; | 653 | goto out; |
736 | } | 654 | } |
737 | 655 | ||
656 | mutex_lock(&p->event_mutex); | ||
657 | |||
738 | for (i = 0; i < num_events; i++) { | 658 | for (i = 0; i < num_events; i++) { |
739 | struct kfd_event_data event_data; | 659 | struct kfd_event_data event_data; |
740 | 660 | ||
741 | if (copy_from_user(&event_data, &events[i], | 661 | if (copy_from_user(&event_data, &events[i], |
742 | sizeof(struct kfd_event_data))) { | 662 | sizeof(struct kfd_event_data))) { |
743 | ret = -EFAULT; | 663 | ret = -EFAULT; |
744 | goto fail; | 664 | goto out_unlock; |
745 | } | 665 | } |
746 | 666 | ||
747 | ret = init_event_waiter(p, &event_waiters[i], | 667 | ret = init_event_waiter_get_status(p, &event_waiters[i], |
748 | event_data.event_id, i); | 668 | event_data.event_id); |
749 | if (ret) | 669 | if (ret) |
750 | goto fail; | 670 | goto out_unlock; |
751 | } | 671 | } |
752 | 672 | ||
673 | /* Check condition once. */ | ||
674 | *wait_result = test_event_condition(all, num_events, event_waiters); | ||
675 | if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) { | ||
676 | ret = copy_signaled_event_data(num_events, | ||
677 | event_waiters, events); | ||
678 | goto out_unlock; | ||
679 | } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) { | ||
680 | /* This should not happen. Events shouldn't be | ||
681 | * destroyed while we're holding the event_mutex | ||
682 | */ | ||
683 | goto out_unlock; | ||
684 | } | ||
685 | |||
686 | /* Add to wait lists if we need to wait. */ | ||
687 | for (i = 0; i < num_events; i++) | ||
688 | init_event_waiter_add_to_waitlist(&event_waiters[i]); | ||
689 | |||
753 | mutex_unlock(&p->event_mutex); | 690 | mutex_unlock(&p->event_mutex); |
754 | 691 | ||
755 | while (true) { | 692 | while (true) { |
@@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p, | |||
771 | break; | 708 | break; |
772 | } | 709 | } |
773 | 710 | ||
774 | if (test_event_condition(all, num_events, event_waiters)) { | 711 | /* Set task state to interruptible sleep before |
775 | if (copy_signaled_event_data(num_events, | 712 | * checking wake-up conditions. A concurrent wake-up |
776 | event_waiters, events)) | 713 | * will put the task back into runnable state. In that |
777 | *wait_result = KFD_WAIT_COMPLETE; | 714 | * case schedule_timeout will not put the task to |
778 | else | 715 | * sleep and we'll get a chance to re-check the |
779 | *wait_result = KFD_WAIT_ERROR; | 716 | * updated conditions almost immediately. Otherwise, |
717 | * this race condition would lead to a soft hang or a | ||
718 | * very long sleep. | ||
719 | */ | ||
720 | set_current_state(TASK_INTERRUPTIBLE); | ||
721 | |||
722 | *wait_result = test_event_condition(all, num_events, | ||
723 | event_waiters); | ||
724 | if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT) | ||
780 | break; | 725 | break; |
781 | } | ||
782 | 726 | ||
783 | if (timeout <= 0) { | 727 | if (timeout <= 0) |
784 | *wait_result = KFD_WAIT_TIMEOUT; | ||
785 | break; | 728 | break; |
786 | } | ||
787 | 729 | ||
788 | timeout = schedule_timeout_interruptible(timeout); | 730 | timeout = schedule_timeout(timeout); |
789 | } | 731 | } |
790 | __set_current_state(TASK_RUNNING); | 732 | __set_current_state(TASK_RUNNING); |
791 | 733 | ||
734 | /* copy_signaled_event_data may sleep. So this has to happen | ||
735 | * after the task state is set back to RUNNING. | ||
736 | */ | ||
737 | if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) | ||
738 | ret = copy_signaled_event_data(num_events, | ||
739 | event_waiters, events); | ||
740 | |||
792 | mutex_lock(&p->event_mutex); | 741 | mutex_lock(&p->event_mutex); |
742 | out_unlock: | ||
793 | free_waiters(num_events, event_waiters); | 743 | free_waiters(num_events, event_waiters); |
794 | mutex_unlock(&p->event_mutex); | 744 | mutex_unlock(&p->event_mutex); |
795 | 745 | out: | |
796 | return ret; | 746 | if (ret) |
797 | 747 | *wait_result = KFD_IOC_WAIT_RESULT_FAIL; | |
798 | fail: | 748 | else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL) |
799 | if (event_waiters) | 749 | ret = -EIO; |
800 | free_waiters(num_events, event_waiters); | ||
801 | |||
802 | mutex_unlock(&p->event_mutex); | ||
803 | |||
804 | *wait_result = KFD_WAIT_ERROR; | ||
805 | 750 | ||
806 | return ret; | 751 | return ret; |
807 | } | 752 | } |
808 | 753 | ||
809 | int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) | 754 | int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) |
810 | { | 755 | { |
811 | |||
812 | unsigned int page_index; | ||
813 | unsigned long pfn; | 756 | unsigned long pfn; |
814 | struct signal_page *page; | 757 | struct kfd_signal_page *page; |
758 | int ret; | ||
815 | 759 | ||
816 | /* check required size is logical */ | 760 | /* check required size doesn't exceed the allocated size */ |
817 | if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != | 761 | if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) < |
818 | get_order(vma->vm_end - vma->vm_start)) { | 762 | get_order(vma->vm_end - vma->vm_start)) { |
819 | pr_err("Event page mmap requested illegal size\n"); | 763 | pr_err("Event page mmap requested illegal size\n"); |
820 | return -EINVAL; | 764 | return -EINVAL; |
821 | } | 765 | } |
822 | 766 | ||
823 | page_index = vma->vm_pgoff; | 767 | page = p->signal_page; |
824 | |||
825 | page = lookup_signal_page_by_index(p, page_index); | ||
826 | if (!page) { | 768 | if (!page) { |
827 | /* Probably KFD bug, but mmap is user-accessible. */ | 769 | /* Probably KFD bug, but mmap is user-accessible. */ |
828 | pr_debug("Signal page could not be found for page_index %u\n", | 770 | pr_debug("Signal page could not be found\n"); |
829 | page_index); | ||
830 | return -EINVAL; | 771 | return -EINVAL; |
831 | } | 772 | } |
832 | 773 | ||
@@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) | |||
847 | page->user_address = (uint64_t __user *)vma->vm_start; | 788 | page->user_address = (uint64_t __user *)vma->vm_start; |
848 | 789 | ||
849 | /* mapping the page to user process */ | 790 | /* mapping the page to user process */ |
850 | return remap_pfn_range(vma, vma->vm_start, pfn, | 791 | ret = remap_pfn_range(vma, vma->vm_start, pfn, |
851 | vma->vm_end - vma->vm_start, vma->vm_page_prot); | 792 | vma->vm_end - vma->vm_start, vma->vm_page_prot); |
793 | if (!ret) | ||
794 | p->signal_mapped_size = vma->vm_end - vma->vm_start; | ||
795 | |||
796 | return ret; | ||
852 | } | 797 | } |
853 | 798 | ||
854 | /* | 799 | /* |
@@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, | |||
860 | { | 805 | { |
861 | struct kfd_hsa_memory_exception_data *ev_data; | 806 | struct kfd_hsa_memory_exception_data *ev_data; |
862 | struct kfd_event *ev; | 807 | struct kfd_event *ev; |
863 | int bkt; | 808 | uint32_t id; |
864 | bool send_signal = true; | 809 | bool send_signal = true; |
865 | 810 | ||
866 | ev_data = (struct kfd_hsa_memory_exception_data *) event_data; | 811 | ev_data = (struct kfd_hsa_memory_exception_data *) event_data; |
867 | 812 | ||
868 | hash_for_each(p->events, bkt, ev, events) | 813 | id = KFD_FIRST_NONSIGNAL_EVENT_ID; |
814 | idr_for_each_entry_continue(&p->event_idr, ev, id) | ||
869 | if (ev->type == type) { | 815 | if (ev->type == type) { |
870 | send_signal = false; | 816 | send_signal = false; |
871 | dev_dbg(kfd_device, | 817 | dev_dbg(kfd_device, |
@@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, | |||
904 | * running so the lookup function returns a locked process. | 850 | * running so the lookup function returns a locked process. |
905 | */ | 851 | */ |
906 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); | 852 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); |
853 | struct mm_struct *mm; | ||
907 | 854 | ||
908 | if (!p) | 855 | if (!p) |
909 | return; /* Presumably process exited. */ | 856 | return; /* Presumably process exited. */ |
910 | 857 | ||
858 | /* Take a safe reference to the mm_struct, which may otherwise | ||
859 | * disappear even while the kfd_process is still referenced. | ||
860 | */ | ||
861 | mm = get_task_mm(p->lead_thread); | ||
862 | if (!mm) { | ||
863 | mutex_unlock(&p->mutex); | ||
864 | return; /* Process is exiting */ | ||
865 | } | ||
866 | |||
911 | memset(&memory_exception_data, 0, sizeof(memory_exception_data)); | 867 | memset(&memory_exception_data, 0, sizeof(memory_exception_data)); |
912 | 868 | ||
913 | down_read(&p->mm->mmap_sem); | 869 | down_read(&mm->mmap_sem); |
914 | vma = find_vma(p->mm, address); | 870 | vma = find_vma(mm, address); |
915 | 871 | ||
916 | memory_exception_data.gpu_id = dev->id; | 872 | memory_exception_data.gpu_id = dev->id; |
917 | memory_exception_data.va = address; | 873 | memory_exception_data.va = address; |
@@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, | |||
937 | } | 893 | } |
938 | } | 894 | } |
939 | 895 | ||
940 | up_read(&p->mm->mmap_sem); | 896 | up_read(&mm->mmap_sem); |
897 | mmput(mm); | ||
941 | 898 | ||
942 | mutex_lock(&p->event_mutex); | 899 | mutex_lock(&p->event_mutex); |
943 | 900 | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 28f6838b1f4c..abca5bfebbff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h | |||
@@ -27,12 +27,17 @@ | |||
27 | #include <linux/hashtable.h> | 27 | #include <linux/hashtable.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/list.h> | 29 | #include <linux/list.h> |
30 | #include <linux/wait.h> | ||
30 | #include "kfd_priv.h" | 31 | #include "kfd_priv.h" |
31 | #include <uapi/linux/kfd_ioctl.h> | 32 | #include <uapi/linux/kfd_ioctl.h> |
32 | 33 | ||
33 | #define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U | 34 | /* |
34 | #define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK | 35 | * IDR supports non-negative integer IDs. Small IDs are used for |
35 | #define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX | 36 | * signal events to match their signal slot. Use the upper half of the |
37 | * ID space for non-signal events. | ||
38 | */ | ||
39 | #define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) | ||
40 | #define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX | ||
36 | 41 | ||
37 | /* | 42 | /* |
38 | * Written into kfd_signal_slot_t to indicate that the event is not signaled. | 43 | * Written into kfd_signal_slot_t to indicate that the event is not signaled. |
@@ -46,9 +51,6 @@ struct kfd_event_waiter; | |||
46 | struct signal_page; | 51 | struct signal_page; |
47 | 52 | ||
48 | struct kfd_event { | 53 | struct kfd_event { |
49 | /* All events in process, rooted at kfd_process.events. */ | ||
50 | struct hlist_node events; | ||
51 | |||
52 | u32 event_id; | 54 | u32 event_id; |
53 | 55 | ||
54 | bool signaled; | 56 | bool signaled; |
@@ -56,11 +58,9 @@ struct kfd_event { | |||
56 | 58 | ||
57 | int type; | 59 | int type; |
58 | 60 | ||
59 | struct list_head waiters; /* List of kfd_event_waiter by waiters. */ | 61 | wait_queue_head_t wq; /* List of event waiters. */ |
60 | 62 | ||
61 | /* Only for signal events. */ | 63 | /* Only for signal events. */ |
62 | struct signal_page *signal_page; | ||
63 | unsigned int signal_slot_index; | ||
64 | uint64_t __user *user_signal_address; | 64 | uint64_t __user *user_signal_address; |
65 | 65 | ||
66 | /* type specific data */ | 66 | /* type specific data */ |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 70b3a99cffc2..035c351f47c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | |||
@@ -42,26 +42,26 @@ | |||
42 | 42 | ||
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/device.h> | 44 | #include <linux/device.h> |
45 | #include <linux/kfifo.h> | ||
45 | #include "kfd_priv.h" | 46 | #include "kfd_priv.h" |
46 | 47 | ||
47 | #define KFD_INTERRUPT_RING_SIZE 1024 | 48 | #define KFD_IH_NUM_ENTRIES 8192 |
48 | 49 | ||
49 | static void interrupt_wq(struct work_struct *); | 50 | static void interrupt_wq(struct work_struct *); |
50 | 51 | ||
51 | int kfd_interrupt_init(struct kfd_dev *kfd) | 52 | int kfd_interrupt_init(struct kfd_dev *kfd) |
52 | { | 53 | { |
53 | void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, | 54 | int r; |
54 | kfd->device_info->ih_ring_entry_size, | 55 | |
55 | GFP_KERNEL); | 56 | r = kfifo_alloc(&kfd->ih_fifo, |
56 | if (!interrupt_ring) | 57 | KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, |
57 | return -ENOMEM; | 58 | GFP_KERNEL); |
58 | 59 | if (r) { | |
59 | kfd->interrupt_ring = interrupt_ring; | 60 | dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); |
60 | kfd->interrupt_ring_size = | 61 | return r; |
61 | KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; | 62 | } |
62 | atomic_set(&kfd->interrupt_ring_wptr, 0); | ||
63 | atomic_set(&kfd->interrupt_ring_rptr, 0); | ||
64 | 63 | ||
64 | kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); | ||
65 | spin_lock_init(&kfd->interrupt_lock); | 65 | spin_lock_init(&kfd->interrupt_lock); |
66 | 66 | ||
67 | INIT_WORK(&kfd->interrupt_work, interrupt_wq); | 67 | INIT_WORK(&kfd->interrupt_work, interrupt_wq); |
@@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd) | |||
92 | spin_unlock_irqrestore(&kfd->interrupt_lock, flags); | 92 | spin_unlock_irqrestore(&kfd->interrupt_lock, flags); |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * Flush_scheduled_work ensures that there are no outstanding | 95 | * flush_work ensures that there are no outstanding |
96 | * work-queue items that will access interrupt_ring. New work items | 96 | * work-queue items that will access interrupt_ring. New work items |
97 | * can't be created because we stopped interrupt handling above. | 97 | * can't be created because we stopped interrupt handling above. |
98 | */ | 98 | */ |
99 | flush_scheduled_work(); | 99 | flush_workqueue(kfd->ih_wq); |
100 | 100 | ||
101 | kfree(kfd->interrupt_ring); | 101 | kfifo_free(&kfd->ih_fifo); |
102 | } | 102 | } |
103 | 103 | ||
104 | /* | 104 | /* |
105 | * This assumes that it can't be called concurrently with itself | 105 | * Assumption: single reader/writer. This function is not re-entrant |
106 | * but only with dequeue_ih_ring_entry. | ||
107 | */ | 106 | */ |
108 | bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) | 107 | bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) |
109 | { | 108 | { |
110 | unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); | 109 | int count; |
111 | unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); | ||
112 | 110 | ||
113 | if ((rptr - wptr) % kfd->interrupt_ring_size == | 111 | count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, |
114 | kfd->device_info->ih_ring_entry_size) { | 112 | kfd->device_info->ih_ring_entry_size); |
115 | /* This is very bad, the system is likely to hang. */ | 113 | if (count != kfd->device_info->ih_ring_entry_size) { |
116 | dev_err_ratelimited(kfd_chardev(), | 114 | dev_err_ratelimited(kfd_chardev(), |
117 | "Interrupt ring overflow, dropping interrupt.\n"); | 115 | "Interrupt ring overflow, dropping interrupt %d\n", |
116 | count); | ||
118 | return false; | 117 | return false; |
119 | } | 118 | } |
120 | 119 | ||
121 | memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, | ||
122 | kfd->device_info->ih_ring_entry_size); | ||
123 | |||
124 | wptr = (wptr + kfd->device_info->ih_ring_entry_size) % | ||
125 | kfd->interrupt_ring_size; | ||
126 | smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ | ||
127 | atomic_set(&kfd->interrupt_ring_wptr, wptr); | ||
128 | |||
129 | return true; | 120 | return true; |
130 | } | 121 | } |
131 | 122 | ||
132 | /* | 123 | /* |
133 | * This assumes that it can't be called concurrently with itself | 124 | * Assumption: single reader/writer. This function is not re-entrant |
134 | * but only with enqueue_ih_ring_entry. | ||
135 | */ | 125 | */ |
136 | static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) | 126 | static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) |
137 | { | 127 | { |
138 | /* | 128 | int count; |
139 | * Assume that wait queues have an implicit barrier, i.e. anything that | ||
140 | * happened in the ISR before it queued work is visible. | ||
141 | */ | ||
142 | |||
143 | unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); | ||
144 | unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); | ||
145 | 129 | ||
146 | if (rptr == wptr) | 130 | count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, |
147 | return false; | 131 | kfd->device_info->ih_ring_entry_size); |
148 | |||
149 | memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, | ||
150 | kfd->device_info->ih_ring_entry_size); | ||
151 | |||
152 | rptr = (rptr + kfd->device_info->ih_ring_entry_size) % | ||
153 | kfd->interrupt_ring_size; | ||
154 | 132 | ||
155 | /* | 133 | WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); |
156 | * Ensure the rptr write update is not visible until | ||
157 | * memcpy has finished reading. | ||
158 | */ | ||
159 | smp_mb(); | ||
160 | atomic_set(&kfd->interrupt_ring_rptr, rptr); | ||
161 | 134 | ||
162 | return true; | 135 | return count == kfd->device_info->ih_ring_entry_size; |
163 | } | 136 | } |
164 | 137 | ||
165 | static void interrupt_wq(struct work_struct *work) | 138 | static void interrupt_wq(struct work_struct *work) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 44ffd23348fc..4859d263fa2a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | |||
@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, | |||
189 | if (q->format == KFD_QUEUE_FORMAT_AQL) | 189 | if (q->format == KFD_QUEUE_FORMAT_AQL) |
190 | m->cp_hqd_pq_control |= NO_UPDATE_RPTR; | 190 | m->cp_hqd_pq_control |= NO_UPDATE_RPTR; |
191 | 191 | ||
192 | q->is_active = false; | 192 | q->is_active = (q->queue_size > 0 && |
193 | if (q->queue_size > 0 && | ||
194 | q->queue_address != 0 && | 193 | q->queue_address != 0 && |
195 | q->queue_percent > 0) { | 194 | q->queue_percent > 0); |
196 | q->is_active = true; | ||
197 | } | ||
198 | 195 | ||
199 | return 0; | 196 | return 0; |
200 | } | 197 | } |
@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, | |||
215 | m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); | 212 | m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); |
216 | m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); | 213 | m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
217 | m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); | 214 | m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
218 | m->sdma_rlc_doorbell = q->doorbell_off << | 215 | m->sdma_rlc_doorbell = |
219 | SDMA0_RLC0_DOORBELL__OFFSET__SHIFT | | 216 | q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; |
220 | 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT; | ||
221 | 217 | ||
222 | m->sdma_rlc_virtual_addr = q->sdma_vm_addr; | 218 | m->sdma_rlc_virtual_addr = q->sdma_vm_addr; |
223 | 219 | ||
224 | m->sdma_engine_id = q->sdma_engine_id; | 220 | m->sdma_engine_id = q->sdma_engine_id; |
225 | m->sdma_queue_id = q->sdma_queue_id; | 221 | m->sdma_queue_id = q->sdma_queue_id; |
226 | 222 | ||
227 | q->is_active = false; | 223 | q->is_active = (q->queue_size > 0 && |
228 | if (q->queue_size > 0 && | ||
229 | q->queue_address != 0 && | 224 | q->queue_address != 0 && |
230 | q->queue_percent > 0) { | 225 | q->queue_percent > 0); |
231 | m->sdma_rlc_rb_cntl |= | ||
232 | 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT; | ||
233 | |||
234 | q->is_active = true; | ||
235 | } | ||
236 | 226 | ||
237 | return 0; | 227 | return 0; |
238 | } | 228 | } |
@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, | |||
359 | m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); | 349 | m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); |
360 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); | 350 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
361 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); | 351 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
362 | m->cp_hqd_pq_doorbell_control = DOORBELL_EN | | 352 | m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); |
363 | DOORBELL_OFFSET(q->doorbell_off); | ||
364 | 353 | ||
365 | m->cp_hqd_vmid = q->vmid; | 354 | m->cp_hqd_vmid = q->vmid; |
366 | 355 | ||
367 | m->cp_hqd_active = 0; | 356 | q->is_active = (q->queue_size > 0 && |
368 | q->is_active = false; | ||
369 | if (q->queue_size > 0 && | ||
370 | q->queue_address != 0 && | 357 | q->queue_address != 0 && |
371 | q->queue_percent > 0) { | 358 | q->queue_percent > 0); |
372 | m->cp_hqd_active = 1; | ||
373 | q->is_active = true; | ||
374 | } | ||
375 | 359 | ||
376 | return 0; | 360 | return 0; |
377 | } | 361 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 73cbfe186dd2..4ea854f9007b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | |||
@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, | |||
163 | 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; | 163 | 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; |
164 | } | 164 | } |
165 | 165 | ||
166 | q->is_active = false; | 166 | q->is_active = (q->queue_size > 0 && |
167 | if (q->queue_size > 0 && | ||
168 | q->queue_address != 0 && | 167 | q->queue_address != 0 && |
169 | q->queue_percent > 0) { | 168 | q->queue_percent > 0); |
170 | q->is_active = true; | ||
171 | } | ||
172 | 169 | ||
173 | return 0; | 170 | return 0; |
174 | } | 171 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7d86ec9790d3..9e4134c5b481 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/workqueue.h> | 31 | #include <linux/workqueue.h> |
32 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
33 | #include <linux/kfd_ioctl.h> | 33 | #include <linux/kfd_ioctl.h> |
34 | #include <linux/idr.h> | ||
35 | #include <linux/kfifo.h> | ||
34 | #include <kgd_kfd_interface.h> | 36 | #include <kgd_kfd_interface.h> |
35 | 37 | ||
36 | #include "amd_shared.h" | 38 | #include "amd_shared.h" |
@@ -181,10 +183,8 @@ struct kfd_dev { | |||
181 | unsigned int gtt_sa_num_of_chunks; | 183 | unsigned int gtt_sa_num_of_chunks; |
182 | 184 | ||
183 | /* Interrupts */ | 185 | /* Interrupts */ |
184 | void *interrupt_ring; | 186 | struct kfifo ih_fifo; |
185 | size_t interrupt_ring_size; | 187 | struct workqueue_struct *ih_wq; |
186 | atomic_t interrupt_ring_rptr; | ||
187 | atomic_t interrupt_ring_wptr; | ||
188 | struct work_struct interrupt_work; | 188 | struct work_struct interrupt_work; |
189 | spinlock_t interrupt_lock; | 189 | spinlock_t interrupt_lock; |
190 | 190 | ||
@@ -494,7 +494,12 @@ struct kfd_process { | |||
494 | */ | 494 | */ |
495 | struct hlist_node kfd_processes; | 495 | struct hlist_node kfd_processes; |
496 | 496 | ||
497 | struct mm_struct *mm; | 497 | /* |
498 | * Opaque pointer to mm_struct. We don't hold a reference to | ||
499 | * it so it should never be dereferenced from here. This is | ||
500 | * only used for looking up processes by their mm. | ||
501 | */ | ||
502 | void *mm; | ||
498 | 503 | ||
499 | struct mutex mutex; | 504 | struct mutex mutex; |
500 | 505 | ||
@@ -502,6 +507,8 @@ struct kfd_process { | |||
502 | * In any process, the thread that started main() is the lead | 507 | * In any process, the thread that started main() is the lead |
503 | * thread and outlives the rest. | 508 | * thread and outlives the rest. |
504 | * It is here because amd_iommu_bind_pasid wants a task_struct. | 509 | * It is here because amd_iommu_bind_pasid wants a task_struct. |
510 | * It can also be used for safely getting a reference to the | ||
511 | * mm_struct of the process. | ||
505 | */ | 512 | */ |
506 | struct task_struct *lead_thread; | 513 | struct task_struct *lead_thread; |
507 | 514 | ||
@@ -522,22 +529,16 @@ struct kfd_process { | |||
522 | 529 | ||
523 | struct process_queue_manager pqm; | 530 | struct process_queue_manager pqm; |
524 | 531 | ||
525 | /* The process's queues. */ | ||
526 | size_t queue_array_size; | ||
527 | |||
528 | /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ | ||
529 | struct kfd_queue **queues; | ||
530 | |||
531 | /*Is the user space process 32 bit?*/ | 532 | /*Is the user space process 32 bit?*/ |
532 | bool is_32bit_user_mode; | 533 | bool is_32bit_user_mode; |
533 | 534 | ||
534 | /* Event-related data */ | 535 | /* Event-related data */ |
535 | struct mutex event_mutex; | 536 | struct mutex event_mutex; |
536 | /* All events in process hashed by ID, linked on kfd_event.events. */ | 537 | /* Event ID allocator and lookup */ |
537 | DECLARE_HASHTABLE(events, 4); | 538 | struct idr event_idr; |
538 | /* struct slot_page_header.event_pages */ | 539 | /* Event page */ |
539 | struct list_head signal_event_pages; | 540 | struct kfd_signal_page *signal_page; |
540 | u32 next_nonsignal_event_id; | 541 | size_t signal_mapped_size; |
541 | size_t signal_event_count; | 542 | size_t signal_event_count; |
542 | bool signal_event_limit_reached; | 543 | bool signal_event_limit_reached; |
543 | }; | 544 | }; |
@@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); | |||
721 | extern const struct kfd_event_interrupt_class event_interrupt_class_cik; | 722 | extern const struct kfd_event_interrupt_class event_interrupt_class_cik; |
722 | extern const struct kfd_device_global_init_class device_global_init_class_cik; | 723 | extern const struct kfd_device_global_init_class device_global_init_class_cik; |
723 | 724 | ||
724 | enum kfd_event_wait_result { | ||
725 | KFD_WAIT_COMPLETE, | ||
726 | KFD_WAIT_TIMEOUT, | ||
727 | KFD_WAIT_ERROR | ||
728 | }; | ||
729 | |||
730 | void kfd_event_init_process(struct kfd_process *p); | 725 | void kfd_event_init_process(struct kfd_process *p); |
731 | void kfd_event_free_process(struct kfd_process *p); | 726 | void kfd_event_free_process(struct kfd_process *p); |
732 | int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); | 727 | int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); |
733 | int kfd_wait_on_events(struct kfd_process *p, | 728 | int kfd_wait_on_events(struct kfd_process *p, |
734 | uint32_t num_events, void __user *data, | 729 | uint32_t num_events, void __user *data, |
735 | bool all, uint32_t user_timeout_ms, | 730 | bool all, uint32_t user_timeout_ms, |
736 | enum kfd_event_wait_result *wait_result); | 731 | uint32_t *wait_result); |
737 | void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, | 732 | void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, |
738 | uint32_t valid_id_bits); | 733 | uint32_t valid_id_bits); |
739 | void kfd_signal_iommu_event(struct kfd_dev *dev, | 734 | void kfd_signal_iommu_event(struct kfd_dev *dev, |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3ccb3b53216e..1f5ccd28bd41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c | |||
@@ -35,13 +35,6 @@ struct mm_struct; | |||
35 | #include "kfd_dbgmgr.h" | 35 | #include "kfd_dbgmgr.h" |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * Initial size for the array of queues. | ||
39 | * The allocated size is doubled each time | ||
40 | * it is exceeded up to MAX_PROCESS_QUEUES. | ||
41 | */ | ||
42 | #define INITIAL_QUEUE_ARRAY_SIZE 16 | ||
43 | |||
44 | /* | ||
45 | * List of struct kfd_process (field kfd_process). | 38 | * List of struct kfd_process (field kfd_process). |
46 | * Unique/indexed by mm_struct* | 39 | * Unique/indexed by mm_struct* |
47 | */ | 40 | */ |
@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work) | |||
187 | 180 | ||
188 | mutex_destroy(&p->mutex); | 181 | mutex_destroy(&p->mutex); |
189 | 182 | ||
190 | kfree(p->queues); | ||
191 | |||
192 | kfree(p); | 183 | kfree(p); |
193 | 184 | ||
194 | kfree(work); | 185 | kfree(work); |
@@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) | |||
200 | struct kfd_process *p; | 191 | struct kfd_process *p; |
201 | 192 | ||
202 | p = container_of(rcu, struct kfd_process, rcu); | 193 | p = container_of(rcu, struct kfd_process, rcu); |
203 | WARN_ON(atomic_read(&p->mm->mm_count) <= 0); | ||
204 | 194 | ||
205 | mmdrop(p->mm); | 195 | mmdrop(p->mm); |
206 | 196 | ||
@@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, | |||
234 | 224 | ||
235 | mutex_lock(&p->mutex); | 225 | mutex_lock(&p->mutex); |
236 | 226 | ||
227 | /* Iterate over all process device data structures and if the | ||
228 | * pdd is in debug mode, we should first force unregistration, | ||
229 | * then we will be able to destroy the queues | ||
230 | */ | ||
231 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | ||
232 | struct kfd_dev *dev = pdd->dev; | ||
233 | |||
234 | mutex_lock(kfd_get_dbgmgr_mutex()); | ||
235 | if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { | ||
236 | if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { | ||
237 | kfd_dbgmgr_destroy(dev->dbgmgr); | ||
238 | dev->dbgmgr = NULL; | ||
239 | } | ||
240 | } | ||
241 | mutex_unlock(kfd_get_dbgmgr_mutex()); | ||
242 | } | ||
243 | |||
237 | kfd_process_dequeue_from_all_devices(p); | 244 | kfd_process_dequeue_from_all_devices(p); |
238 | pqm_uninit(&p->pqm); | 245 | pqm_uninit(&p->pqm); |
239 | 246 | ||
240 | /* Iterate over all process device data structure and check | ||
241 | * if we should delete debug managers | ||
242 | */ | ||
243 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | ||
244 | if ((pdd->dev->dbgmgr) && | ||
245 | (pdd->dev->dbgmgr->pasid == p->pasid)) | ||
246 | kfd_dbgmgr_destroy(pdd->dev->dbgmgr); | ||
247 | |||
248 | mutex_unlock(&p->mutex); | 247 | mutex_unlock(&p->mutex); |
249 | 248 | ||
250 | /* | 249 | /* |
@@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) | |||
271 | if (!process) | 270 | if (!process) |
272 | goto err_alloc_process; | 271 | goto err_alloc_process; |
273 | 272 | ||
274 | process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, | ||
275 | sizeof(process->queues[0]), GFP_KERNEL); | ||
276 | if (!process->queues) | ||
277 | goto err_alloc_queues; | ||
278 | |||
279 | process->pasid = kfd_pasid_alloc(); | 273 | process->pasid = kfd_pasid_alloc(); |
280 | if (process->pasid == 0) | 274 | if (process->pasid == 0) |
281 | goto err_alloc_pasid; | 275 | goto err_alloc_pasid; |
@@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) | |||
298 | 292 | ||
299 | process->lead_thread = thread->group_leader; | 293 | process->lead_thread = thread->group_leader; |
300 | 294 | ||
301 | process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; | ||
302 | |||
303 | INIT_LIST_HEAD(&process->per_device_data); | 295 | INIT_LIST_HEAD(&process->per_device_data); |
304 | 296 | ||
305 | kfd_event_init_process(process); | 297 | kfd_event_init_process(process); |
@@ -328,8 +320,6 @@ err_mmu_notifier: | |||
328 | err_alloc_doorbells: | 320 | err_alloc_doorbells: |
329 | kfd_pasid_free(process->pasid); | 321 | kfd_pasid_free(process->pasid); |
330 | err_alloc_pasid: | 322 | err_alloc_pasid: |
331 | kfree(process->queues); | ||
332 | err_alloc_queues: | ||
333 | kfree(process); | 323 | kfree(process); |
334 | err_alloc_process: | 324 | err_alloc_process: |
335 | return ERR_PTR(err); | 325 | return ERR_PTR(err); |
@@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) | |||
426 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, | 416 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, |
427 | p->lead_thread); | 417 | p->lead_thread); |
428 | if (err < 0) { | 418 | if (err < 0) { |
429 | pr_err("unexpected pasid %d binding failure\n", | 419 | pr_err("Unexpected pasid %d binding failure\n", |
430 | p->pasid); | 420 | p->pasid); |
431 | mutex_unlock(&p->mutex); | 421 | mutex_unlock(&p->mutex); |
432 | break; | 422 | break; |
@@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) | |||
442 | } | 432 | } |
443 | 433 | ||
444 | /* | 434 | /* |
445 | * Temporarily unbind currently bound processes from the device and | 435 | * Mark currently bound processes as PDD_BOUND_SUSPENDED. These |
446 | * mark them as PDD_BOUND_SUSPENDED. These processes will be restored | 436 | * processes will be restored to PDD_BOUND state in |
447 | * to PDD_BOUND state in kfd_bind_processes_to_device. | 437 | * kfd_bind_processes_to_device. |
448 | */ | 438 | */ |
449 | void kfd_unbind_processes_from_device(struct kfd_dev *dev) | 439 | void kfd_unbind_processes_from_device(struct kfd_dev *dev) |
450 | { | 440 | { |
451 | struct kfd_process_device *pdd; | 441 | struct kfd_process_device *pdd; |
452 | struct kfd_process *p; | 442 | struct kfd_process *p; |
453 | unsigned int temp, temp_bound, temp_pasid; | 443 | unsigned int temp; |
454 | 444 | ||
455 | int idx = srcu_read_lock(&kfd_processes_srcu); | 445 | int idx = srcu_read_lock(&kfd_processes_srcu); |
456 | 446 | ||
457 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | 447 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { |
458 | mutex_lock(&p->mutex); | 448 | mutex_lock(&p->mutex); |
459 | pdd = kfd_get_process_device_data(dev, p); | 449 | pdd = kfd_get_process_device_data(dev, p); |
460 | temp_bound = pdd->bound; | 450 | |
461 | temp_pasid = p->pasid; | ||
462 | if (pdd->bound == PDD_BOUND) | 451 | if (pdd->bound == PDD_BOUND) |
463 | pdd->bound = PDD_BOUND_SUSPENDED; | 452 | pdd->bound = PDD_BOUND_SUSPENDED; |
464 | mutex_unlock(&p->mutex); | 453 | mutex_unlock(&p->mutex); |
465 | |||
466 | if (temp_bound == PDD_BOUND) | ||
467 | amd_iommu_unbind_pasid(dev->pdev, temp_pasid); | ||
468 | } | 454 | } |
469 | 455 | ||
470 | srcu_read_unlock(&kfd_processes_srcu, idx); | 456 | srcu_read_unlock(&kfd_processes_srcu, idx); |
@@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) | |||
486 | 472 | ||
487 | pr_debug("Unbinding process %d from IOMMU\n", pasid); | 473 | pr_debug("Unbinding process %d from IOMMU\n", pasid); |
488 | 474 | ||
489 | if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) | 475 | mutex_lock(kfd_get_dbgmgr_mutex()); |
490 | kfd_dbgmgr_destroy(dev->dbgmgr); | 476 | |
477 | if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { | ||
478 | if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { | ||
479 | kfd_dbgmgr_destroy(dev->dbgmgr); | ||
480 | dev->dbgmgr = NULL; | ||
481 | } | ||
482 | } | ||
483 | |||
484 | mutex_unlock(kfd_get_dbgmgr_mutex()); | ||
491 | 485 | ||
492 | pdd = kfd_get_process_device_data(dev, p); | 486 | pdd = kfd_get_process_device_data(dev, p); |
493 | if (pdd) | 487 | if (pdd) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 5129dc139219..2bec902fc939 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | |||
@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, | |||
177 | if (retval != 0) | 177 | if (retval != 0) |
178 | return retval; | 178 | return retval; |
179 | 179 | ||
180 | if (list_empty(&pqm->queues)) { | 180 | if (list_empty(&pdd->qpd.queues_list) && |
181 | list_empty(&pdd->qpd.priv_queue_list)) { | ||
181 | pdd->qpd.pqm = pqm; | 182 | pdd->qpd.pqm = pqm; |
182 | dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); | 183 | dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); |
183 | } | 184 | } |
@@ -248,7 +249,8 @@ err_create_queue: | |||
248 | err_allocate_pqn: | 249 | err_allocate_pqn: |
249 | /* check if queues list is empty unregister process from device */ | 250 | /* check if queues list is empty unregister process from device */ |
250 | clear_bit(*qid, pqm->queue_slot_bitmap); | 251 | clear_bit(*qid, pqm->queue_slot_bitmap); |
251 | if (list_empty(&pqm->queues)) | 252 | if (list_empty(&pdd->qpd.queues_list) && |
253 | list_empty(&pdd->qpd.priv_queue_list)) | ||
252 | dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); | 254 | dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); |
253 | return retval; | 255 | return retval; |
254 | } | 256 | } |
@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) | |||
302 | kfree(pqn); | 304 | kfree(pqn); |
303 | clear_bit(qid, pqm->queue_slot_bitmap); | 305 | clear_bit(qid, pqm->queue_slot_bitmap); |
304 | 306 | ||
305 | if (list_empty(&pqm->queues)) | 307 | if (list_empty(&pdd->qpd.queues_list) && |
308 | list_empty(&pdd->qpd.priv_queue_list)) | ||
306 | dqm->ops.unregister_process(dqm, &pdd->qpd); | 309 | dqm->ops.unregister_process(dqm, &pdd->qpd); |
307 | 310 | ||
308 | return retval; | 311 | return retval; |
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index be16c6390216..cf3e5985e3e7 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile | |||
@@ -102,8 +102,7 @@ radeon-y += \ | |||
102 | radeon-y += \ | 102 | radeon-y += \ |
103 | radeon_vce.o \ | 103 | radeon_vce.o \ |
104 | vce_v1_0.o \ | 104 | vce_v1_0.o \ |
105 | vce_v2_0.o \ | 105 | vce_v2_0.o |
106 | radeon_kfd.o | ||
107 | 106 | ||
108 | radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o | 107 | radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o |
109 | radeon-$(CONFIG_ACPI) += radeon_acpi.o | 108 | radeon-$(CONFIG_ACPI) += radeon_acpi.o |
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 3cb6c55b268d..898f9a078830 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include "cik_blit_shaders.h" | 33 | #include "cik_blit_shaders.h" |
34 | #include "radeon_ucode.h" | 34 | #include "radeon_ucode.h" |
35 | #include "clearstate_ci.h" | 35 | #include "clearstate_ci.h" |
36 | #include "radeon_kfd.h" | ||
37 | 36 | ||
38 | #define SH_MEM_CONFIG_GFX_DEFAULT \ | 37 | #define SH_MEM_CONFIG_GFX_DEFAULT \ |
39 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | 38 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
@@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev) | |||
5684 | /* | 5683 | /* |
5685 | * number of VMs | 5684 | * number of VMs |
5686 | * VMID 0 is reserved for System | 5685 | * VMID 0 is reserved for System |
5687 | * radeon graphics/compute will use VMIDs 1-7 | 5686 | * radeon graphics/compute will use VMIDs 1-15 |
5688 | * amdkfd will use VMIDs 8-15 | ||
5689 | */ | 5687 | */ |
5690 | rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; | 5688 | rdev->vm_manager.nvm = 16; |
5691 | /* base offset of vram pages */ | 5689 | /* base offset of vram pages */ |
5692 | if (rdev->flags & RADEON_IS_IGP) { | 5690 | if (rdev->flags & RADEON_IS_IGP) { |
5693 | u64 tmp = RREG32(MC_VM_FB_OFFSET); | 5691 | u64 tmp = RREG32(MC_VM_FB_OFFSET); |
@@ -7589,9 +7587,6 @@ restart_ih: | |||
7589 | /* wptr/rptr are in bytes! */ | 7587 | /* wptr/rptr are in bytes! */ |
7590 | ring_index = rptr / 4; | 7588 | ring_index = rptr / 4; |
7591 | 7589 | ||
7592 | radeon_kfd_interrupt(rdev, | ||
7593 | (const void *) &rdev->ih.ring[ring_index]); | ||
7594 | |||
7595 | src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; | 7590 | src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; |
7596 | src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; | 7591 | src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; |
7597 | ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; | 7592 | ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; |
@@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev) | |||
8486 | if (r) | 8481 | if (r) |
8487 | return r; | 8482 | return r; |
8488 | 8483 | ||
8489 | r = radeon_kfd_resume(rdev); | ||
8490 | if (r) | ||
8491 | return r; | ||
8492 | |||
8493 | return 0; | 8484 | return 0; |
8494 | } | 8485 | } |
8495 | 8486 | ||
@@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev) | |||
8538 | */ | 8529 | */ |
8539 | int cik_suspend(struct radeon_device *rdev) | 8530 | int cik_suspend(struct radeon_device *rdev) |
8540 | { | 8531 | { |
8541 | radeon_kfd_suspend(rdev); | ||
8542 | radeon_pm_suspend(rdev); | 8532 | radeon_pm_suspend(rdev); |
8543 | radeon_audio_fini(rdev); | 8533 | radeon_audio_fini(rdev); |
8544 | radeon_vm_manager_fini(rdev); | 8534 | radeon_vm_manager_fini(rdev); |
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index e21015475ed5..cda16fcd43bb 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h | |||
@@ -30,8 +30,6 @@ | |||
30 | #define CIK_RB_BITMAP_WIDTH_PER_SH 2 | 30 | #define CIK_RB_BITMAP_WIDTH_PER_SH 2 |
31 | #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 | 31 | #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 |
32 | 32 | ||
33 | #define RADEON_NUM_OF_VMIDS 8 | ||
34 | |||
35 | /* DIDT IND registers */ | 33 | /* DIDT IND registers */ |
36 | #define DIDT_SQ_CTRL0 0x0 | 34 | #define DIDT_SQ_CTRL0 0x0 |
37 | # define DIDT_CTRL_EN (1 << 0) | 35 | # define DIDT_CTRL_EN (1 << 0) |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cbaeec090c9..a8e546569858 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -2456,9 +2456,6 @@ struct radeon_device { | |||
2456 | u64 vram_pin_size; | 2456 | u64 vram_pin_size; |
2457 | u64 gart_pin_size; | 2457 | u64 gart_pin_size; |
2458 | 2458 | ||
2459 | /* amdkfd interface */ | ||
2460 | struct kfd_dev *kfd; | ||
2461 | |||
2462 | struct mutex mn_lock; | 2459 | struct mutex mn_lock; |
2463 | DECLARE_HASHTABLE(mn_hash, 7); | 2460 | DECLARE_HASHTABLE(mn_hash, 7); |
2464 | }; | 2461 | }; |
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f4becad0a78c..31dd04f6baa1 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include <drm/drm_fb_helper.h> | 43 | #include <drm/drm_fb_helper.h> |
44 | 44 | ||
45 | #include <drm/drm_crtc_helper.h> | 45 | #include <drm/drm_crtc_helper.h> |
46 | #include "radeon_kfd.h" | ||
47 | 46 | ||
48 | /* | 47 | /* |
49 | * KMS wrapper. | 48 | * KMS wrapper. |
@@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev, | |||
338 | { | 337 | { |
339 | int ret; | 338 | int ret; |
340 | 339 | ||
341 | /* | ||
342 | * Initialize amdkfd before starting radeon. If it was not loaded yet, | ||
343 | * defer radeon probing | ||
344 | */ | ||
345 | ret = radeon_kfd_init(); | ||
346 | if (ret == -EPROBE_DEFER) | ||
347 | return ret; | ||
348 | |||
349 | if (vga_switcheroo_client_probe_defer(pdev)) | 340 | if (vga_switcheroo_client_probe_defer(pdev)) |
350 | return -EPROBE_DEFER; | 341 | return -EPROBE_DEFER; |
351 | 342 | ||
@@ -645,7 +636,6 @@ static int __init radeon_init(void) | |||
645 | 636 | ||
646 | static void __exit radeon_exit(void) | 637 | static void __exit radeon_exit(void) |
647 | { | 638 | { |
648 | radeon_kfd_fini(); | ||
649 | pci_unregister_driver(pdriver); | 639 | pci_unregister_driver(pdriver); |
650 | radeon_unregister_atpx_handler(); | 640 | radeon_unregister_atpx_handler(); |
651 | } | 641 | } |
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c deleted file mode 100644 index 385b4d76956d..000000000000 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ /dev/null | |||
@@ -1,901 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include <linux/fdtable.h> | ||
25 | #include <linux/uaccess.h> | ||
26 | #include <drm/drmP.h> | ||
27 | #include "radeon.h" | ||
28 | #include "cikd.h" | ||
29 | #include "cik_reg.h" | ||
30 | #include "radeon_kfd.h" | ||
31 | #include "radeon_ucode.h" | ||
32 | #include <linux/firmware.h> | ||
33 | #include "cik_structs.h" | ||
34 | |||
35 | #define CIK_PIPE_PER_MEC (4) | ||
36 | |||
37 | static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { | ||
38 | TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, | ||
39 | TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, | ||
40 | TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, | ||
41 | TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL | ||
42 | }; | ||
43 | |||
44 | struct kgd_mem { | ||
45 | struct radeon_bo *bo; | ||
46 | uint64_t gpu_addr; | ||
47 | void *cpu_ptr; | ||
48 | }; | ||
49 | |||
50 | |||
51 | static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | ||
52 | void **mem_obj, uint64_t *gpu_addr, | ||
53 | void **cpu_ptr); | ||
54 | |||
55 | static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); | ||
56 | |||
57 | static uint64_t get_vmem_size(struct kgd_dev *kgd); | ||
58 | static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); | ||
59 | |||
60 | static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); | ||
61 | |||
62 | static int alloc_pasid(unsigned int bits); | ||
63 | static void free_pasid(unsigned int pasid); | ||
64 | |||
65 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | ||
66 | |||
67 | /* | ||
68 | * Register access functions | ||
69 | */ | ||
70 | |||
71 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | ||
72 | uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, | ||
73 | uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); | ||
74 | |||
75 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | ||
76 | unsigned int vmid); | ||
77 | |||
78 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | ||
79 | uint32_t hpd_size, uint64_t hpd_gpu_addr); | ||
80 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | ||
81 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | ||
82 | uint32_t queue_id, uint32_t __user *wptr, | ||
83 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
84 | struct mm_struct *mm); | ||
85 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); | ||
86 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | ||
87 | uint32_t pipe_id, uint32_t queue_id); | ||
88 | |||
89 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, | ||
90 | unsigned int timeout, uint32_t pipe_id, | ||
91 | uint32_t queue_id); | ||
92 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | ||
93 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | ||
94 | unsigned int timeout); | ||
95 | static int kgd_address_watch_disable(struct kgd_dev *kgd); | ||
96 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | ||
97 | unsigned int watch_point_id, | ||
98 | uint32_t cntl_val, | ||
99 | uint32_t addr_hi, | ||
100 | uint32_t addr_lo); | ||
101 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | ||
102 | uint32_t gfx_index_val, | ||
103 | uint32_t sq_cmd); | ||
104 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | ||
105 | unsigned int watch_point_id, | ||
106 | unsigned int reg_offset); | ||
107 | |||
108 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); | ||
109 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | ||
110 | uint8_t vmid); | ||
111 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); | ||
112 | |||
113 | static const struct kfd2kgd_calls kfd2kgd = { | ||
114 | .init_gtt_mem_allocation = alloc_gtt_mem, | ||
115 | .free_gtt_mem = free_gtt_mem, | ||
116 | .get_vmem_size = get_vmem_size, | ||
117 | .get_gpu_clock_counter = get_gpu_clock_counter, | ||
118 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | ||
119 | .alloc_pasid = alloc_pasid, | ||
120 | .free_pasid = free_pasid, | ||
121 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | ||
122 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | ||
123 | .init_pipeline = kgd_init_pipeline, | ||
124 | .init_interrupts = kgd_init_interrupts, | ||
125 | .hqd_load = kgd_hqd_load, | ||
126 | .hqd_sdma_load = kgd_hqd_sdma_load, | ||
127 | .hqd_is_occupied = kgd_hqd_is_occupied, | ||
128 | .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, | ||
129 | .hqd_destroy = kgd_hqd_destroy, | ||
130 | .hqd_sdma_destroy = kgd_hqd_sdma_destroy, | ||
131 | .address_watch_disable = kgd_address_watch_disable, | ||
132 | .address_watch_execute = kgd_address_watch_execute, | ||
133 | .wave_control_execute = kgd_wave_control_execute, | ||
134 | .address_watch_get_offset = kgd_address_watch_get_offset, | ||
135 | .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, | ||
136 | .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, | ||
137 | .write_vmid_invalidate_request = write_vmid_invalidate_request, | ||
138 | .get_fw_version = get_fw_version | ||
139 | }; | ||
140 | |||
141 | static const struct kgd2kfd_calls *kgd2kfd; | ||
142 | |||
143 | int radeon_kfd_init(void) | ||
144 | { | ||
145 | int ret; | ||
146 | |||
147 | #if defined(CONFIG_HSA_AMD_MODULE) | ||
148 | int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); | ||
149 | |||
150 | kgd2kfd_init_p = symbol_request(kgd2kfd_init); | ||
151 | |||
152 | if (kgd2kfd_init_p == NULL) | ||
153 | return -ENOENT; | ||
154 | |||
155 | ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); | ||
156 | if (ret) { | ||
157 | symbol_put(kgd2kfd_init); | ||
158 | kgd2kfd = NULL; | ||
159 | } | ||
160 | |||
161 | #elif defined(CONFIG_HSA_AMD) | ||
162 | ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); | ||
163 | if (ret) | ||
164 | kgd2kfd = NULL; | ||
165 | |||
166 | #else | ||
167 | ret = -ENOENT; | ||
168 | #endif | ||
169 | |||
170 | return ret; | ||
171 | } | ||
172 | |||
173 | void radeon_kfd_fini(void) | ||
174 | { | ||
175 | if (kgd2kfd) { | ||
176 | kgd2kfd->exit(); | ||
177 | symbol_put(kgd2kfd_init); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | void radeon_kfd_device_probe(struct radeon_device *rdev) | ||
182 | { | ||
183 | if (kgd2kfd) | ||
184 | rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, | ||
185 | rdev->pdev, &kfd2kgd); | ||
186 | } | ||
187 | |||
188 | void radeon_kfd_device_init(struct radeon_device *rdev) | ||
189 | { | ||
190 | int i, queue, pipe, mec; | ||
191 | |||
192 | if (rdev->kfd) { | ||
193 | struct kgd2kfd_shared_resources gpu_resources = { | ||
194 | .compute_vmid_bitmap = 0xFF00, | ||
195 | .num_pipe_per_mec = 4, | ||
196 | .num_queue_per_pipe = 8 | ||
197 | }; | ||
198 | |||
199 | bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES); | ||
200 | |||
201 | for (i = 0; i < KGD_MAX_QUEUES; ++i) { | ||
202 | queue = i % gpu_resources.num_queue_per_pipe; | ||
203 | pipe = (i / gpu_resources.num_queue_per_pipe) | ||
204 | % gpu_resources.num_pipe_per_mec; | ||
205 | mec = (i / gpu_resources.num_queue_per_pipe) | ||
206 | / gpu_resources.num_pipe_per_mec; | ||
207 | |||
208 | if (mec == 0 && pipe > 0) | ||
209 | set_bit(i, gpu_resources.queue_bitmap); | ||
210 | } | ||
211 | |||
212 | radeon_doorbell_get_kfd_info(rdev, | ||
213 | &gpu_resources.doorbell_physical_address, | ||
214 | &gpu_resources.doorbell_aperture_size, | ||
215 | &gpu_resources.doorbell_start_offset); | ||
216 | |||
217 | kgd2kfd->device_init(rdev->kfd, &gpu_resources); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | void radeon_kfd_device_fini(struct radeon_device *rdev) | ||
222 | { | ||
223 | if (rdev->kfd) { | ||
224 | kgd2kfd->device_exit(rdev->kfd); | ||
225 | rdev->kfd = NULL; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) | ||
230 | { | ||
231 | if (rdev->kfd) | ||
232 | kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); | ||
233 | } | ||
234 | |||
235 | void radeon_kfd_suspend(struct radeon_device *rdev) | ||
236 | { | ||
237 | if (rdev->kfd) | ||
238 | kgd2kfd->suspend(rdev->kfd); | ||
239 | } | ||
240 | |||
241 | int radeon_kfd_resume(struct radeon_device *rdev) | ||
242 | { | ||
243 | int r = 0; | ||
244 | |||
245 | if (rdev->kfd) | ||
246 | r = kgd2kfd->resume(rdev->kfd); | ||
247 | |||
248 | return r; | ||
249 | } | ||
250 | |||
251 | static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | ||
252 | void **mem_obj, uint64_t *gpu_addr, | ||
253 | void **cpu_ptr) | ||
254 | { | ||
255 | struct radeon_device *rdev = (struct radeon_device *)kgd; | ||
256 | struct kgd_mem **mem = (struct kgd_mem **) mem_obj; | ||
257 | int r; | ||
258 | |||
259 | BUG_ON(kgd == NULL); | ||
260 | BUG_ON(gpu_addr == NULL); | ||
261 | BUG_ON(cpu_ptr == NULL); | ||
262 | |||
263 | *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); | ||
264 | if ((*mem) == NULL) | ||
265 | return -ENOMEM; | ||
266 | |||
267 | r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, | ||
268 | RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); | ||
269 | if (r) { | ||
270 | dev_err(rdev->dev, | ||
271 | "failed to allocate BO for amdkfd (%d)\n", r); | ||
272 | return r; | ||
273 | } | ||
274 | |||
275 | /* map the buffer */ | ||
276 | r = radeon_bo_reserve((*mem)->bo, true); | ||
277 | if (r) { | ||
278 | dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); | ||
279 | goto allocate_mem_reserve_bo_failed; | ||
280 | } | ||
281 | |||
282 | r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, | ||
283 | &(*mem)->gpu_addr); | ||
284 | if (r) { | ||
285 | dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); | ||
286 | goto allocate_mem_pin_bo_failed; | ||
287 | } | ||
288 | *gpu_addr = (*mem)->gpu_addr; | ||
289 | |||
290 | r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); | ||
291 | if (r) { | ||
292 | dev_err(rdev->dev, | ||
293 | "(%d) failed to map bo to kernel for amdkfd\n", r); | ||
294 | goto allocate_mem_kmap_bo_failed; | ||
295 | } | ||
296 | *cpu_ptr = (*mem)->cpu_ptr; | ||
297 | |||
298 | radeon_bo_unreserve((*mem)->bo); | ||
299 | |||
300 | return 0; | ||
301 | |||
302 | allocate_mem_kmap_bo_failed: | ||
303 | radeon_bo_unpin((*mem)->bo); | ||
304 | allocate_mem_pin_bo_failed: | ||
305 | radeon_bo_unreserve((*mem)->bo); | ||
306 | allocate_mem_reserve_bo_failed: | ||
307 | radeon_bo_unref(&(*mem)->bo); | ||
308 | |||
309 | return r; | ||
310 | } | ||
311 | |||
312 | static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) | ||
313 | { | ||
314 | struct kgd_mem *mem = (struct kgd_mem *) mem_obj; | ||
315 | |||
316 | BUG_ON(mem == NULL); | ||
317 | |||
318 | radeon_bo_reserve(mem->bo, true); | ||
319 | radeon_bo_kunmap(mem->bo); | ||
320 | radeon_bo_unpin(mem->bo); | ||
321 | radeon_bo_unreserve(mem->bo); | ||
322 | radeon_bo_unref(&(mem->bo)); | ||
323 | kfree(mem); | ||
324 | } | ||
325 | |||
326 | static uint64_t get_vmem_size(struct kgd_dev *kgd) | ||
327 | { | ||
328 | struct radeon_device *rdev = (struct radeon_device *)kgd; | ||
329 | |||
330 | BUG_ON(kgd == NULL); | ||
331 | |||
332 | return rdev->mc.real_vram_size; | ||
333 | } | ||
334 | |||
335 | static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) | ||
336 | { | ||
337 | struct radeon_device *rdev = (struct radeon_device *)kgd; | ||
338 | |||
339 | return rdev->asic->get_gpu_clock_counter(rdev); | ||
340 | } | ||
341 | |||
342 | static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) | ||
343 | { | ||
344 | struct radeon_device *rdev = (struct radeon_device *)kgd; | ||
345 | |||
346 | /* The sclk is in quantas of 10kHz */ | ||
347 | return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * PASID manager | ||
352 | */ | ||
353 | static DEFINE_IDA(pasid_ida); | ||
354 | |||
355 | static int alloc_pasid(unsigned int bits) | ||
356 | { | ||
357 | int pasid = -EINVAL; | ||
358 | |||
359 | for (bits = min(bits, 31U); bits > 0; bits--) { | ||
360 | pasid = ida_simple_get(&pasid_ida, | ||
361 | 1U << (bits - 1), 1U << bits, | ||
362 | GFP_KERNEL); | ||
363 | if (pasid != -ENOSPC) | ||
364 | break; | ||
365 | } | ||
366 | |||
367 | return pasid; | ||
368 | } | ||
369 | |||
370 | static void free_pasid(unsigned int pasid) | ||
371 | { | ||
372 | ida_simple_remove(&pasid_ida, pasid); | ||
373 | } | ||
374 | |||
375 | static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) | ||
376 | { | ||
377 | return (struct radeon_device *)kgd; | ||
378 | } | ||
379 | |||
380 | static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value) | ||
381 | { | ||
382 | struct radeon_device *rdev = get_radeon_device(kgd); | ||
383 | |||
384 | writel(value, (void __iomem *)(rdev->rmmio + offset)); | ||
385 | } | ||
386 | |||
387 | static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset) | ||
388 | { | ||
389 | struct radeon_device *rdev = get_radeon_device(kgd); | ||
390 | |||
391 | return readl((void __iomem *)(rdev->rmmio + offset)); | ||
392 | } | ||
393 | |||
394 | static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, | ||
395 | uint32_t queue, uint32_t vmid) | ||
396 | { | ||
397 | struct radeon_device *rdev = get_radeon_device(kgd); | ||
398 | uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); | ||
399 | |||
400 | mutex_lock(&rdev->srbm_mutex); | ||
401 | write_register(kgd, SRBM_GFX_CNTL, value); | ||
402 | } | ||
403 | |||
404 | static void unlock_srbm(struct kgd_dev *kgd) | ||
405 | { | ||
406 | struct radeon_device *rdev = get_radeon_device(kgd); | ||
407 | |||
408 | write_register(kgd, SRBM_GFX_CNTL, 0); | ||
409 | mutex_unlock(&rdev->srbm_mutex); | ||
410 | } | ||
411 | |||
412 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | ||
413 | uint32_t queue_id) | ||
414 | { | ||
415 | uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; | ||
416 | uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); | ||
417 | |||
418 | lock_srbm(kgd, mec, pipe, queue_id, 0); | ||
419 | } | ||
420 | |||
421 | static void release_queue(struct kgd_dev *kgd) | ||
422 | { | ||
423 | unlock_srbm(kgd); | ||
424 | } | ||
425 | |||
426 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | ||
427 | uint32_t sh_mem_config, | ||
428 | uint32_t sh_mem_ape1_base, | ||
429 | uint32_t sh_mem_ape1_limit, | ||
430 | uint32_t sh_mem_bases) | ||
431 | { | ||
432 | lock_srbm(kgd, 0, 0, 0, vmid); | ||
433 | |||
434 | write_register(kgd, SH_MEM_CONFIG, sh_mem_config); | ||
435 | write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base); | ||
436 | write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit); | ||
437 | write_register(kgd, SH_MEM_BASES, sh_mem_bases); | ||
438 | |||
439 | unlock_srbm(kgd); | ||
440 | } | ||
441 | |||
442 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | ||
443 | unsigned int vmid) | ||
444 | { | ||
445 | /* | ||
446 | * We have to assume that there is no outstanding mapping. | ||
447 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 | ||
448 | * because a mapping is in progress or because a mapping finished and | ||
449 | * the SW cleared it. | ||
450 | * So the protocol is to always wait & clear. | ||
451 | */ | ||
452 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | ||
453 | ATC_VMID_PASID_MAPPING_VALID_MASK; | ||
454 | |||
455 | write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), | ||
456 | pasid_mapping); | ||
457 | |||
458 | while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & | ||
459 | (1U << vmid))) | ||
460 | cpu_relax(); | ||
461 | write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); | ||
462 | |||
463 | /* Mapping vmid to pasid also for IH block */ | ||
464 | write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), | ||
465 | pasid_mapping); | ||
466 | |||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | ||
471 | uint32_t hpd_size, uint64_t hpd_gpu_addr) | ||
472 | { | ||
473 | /* nothing to do here */ | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | ||
478 | { | ||
479 | uint32_t mec; | ||
480 | uint32_t pipe; | ||
481 | |||
482 | mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; | ||
483 | pipe = (pipe_id % CIK_PIPE_PER_MEC); | ||
484 | |||
485 | lock_srbm(kgd, mec, pipe, 0, 0); | ||
486 | |||
487 | write_register(kgd, CPC_INT_CNTL, | ||
488 | TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE); | ||
489 | |||
490 | unlock_srbm(kgd); | ||
491 | |||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) | ||
496 | { | ||
497 | uint32_t retval; | ||
498 | |||
499 | retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + | ||
500 | m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; | ||
501 | |||
502 | pr_debug("kfd: sdma base address: 0x%x\n", retval); | ||
503 | |||
504 | return retval; | ||
505 | } | ||
506 | |||
507 | static inline struct cik_mqd *get_mqd(void *mqd) | ||
508 | { | ||
509 | return (struct cik_mqd *)mqd; | ||
510 | } | ||
511 | |||
512 | static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) | ||
513 | { | ||
514 | return (struct cik_sdma_rlc_registers *)mqd; | ||
515 | } | ||
516 | |||
517 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | ||
518 | uint32_t queue_id, uint32_t __user *wptr, | ||
519 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
520 | struct mm_struct *mm) | ||
521 | { | ||
522 | uint32_t wptr_shadow, is_wptr_shadow_valid; | ||
523 | struct cik_mqd *m; | ||
524 | |||
525 | m = get_mqd(mqd); | ||
526 | |||
527 | is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); | ||
528 | |||
529 | acquire_queue(kgd, pipe_id, queue_id); | ||
530 | write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); | ||
531 | write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); | ||
532 | write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); | ||
533 | |||
534 | write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); | ||
535 | write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); | ||
536 | write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); | ||
537 | |||
538 | write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control); | ||
539 | write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); | ||
540 | write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); | ||
541 | |||
542 | write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); | ||
543 | |||
544 | write_register(kgd, CP_HQD_PERSISTENT_STATE, | ||
545 | m->cp_hqd_persistent_state); | ||
546 | write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); | ||
547 | write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type); | ||
548 | |||
549 | write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, | ||
550 | m->cp_hqd_atomic0_preop_lo); | ||
551 | |||
552 | write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, | ||
553 | m->cp_hqd_atomic0_preop_hi); | ||
554 | |||
555 | write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, | ||
556 | m->cp_hqd_atomic1_preop_lo); | ||
557 | |||
558 | write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, | ||
559 | m->cp_hqd_atomic1_preop_hi); | ||
560 | |||
561 | write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, | ||
562 | m->cp_hqd_pq_rptr_report_addr_lo); | ||
563 | |||
564 | write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, | ||
565 | m->cp_hqd_pq_rptr_report_addr_hi); | ||
566 | |||
567 | write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); | ||
568 | |||
569 | write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, | ||
570 | m->cp_hqd_pq_wptr_poll_addr_lo); | ||
571 | |||
572 | write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, | ||
573 | m->cp_hqd_pq_wptr_poll_addr_hi); | ||
574 | |||
575 | write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, | ||
576 | m->cp_hqd_pq_doorbell_control); | ||
577 | |||
578 | write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid); | ||
579 | |||
580 | write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum); | ||
581 | |||
582 | write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); | ||
583 | write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); | ||
584 | |||
585 | write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); | ||
586 | |||
587 | if (is_wptr_shadow_valid) | ||
588 | write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow); | ||
589 | |||
590 | write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active); | ||
591 | release_queue(kgd); | ||
592 | |||
593 | return 0; | ||
594 | } | ||
595 | |||
596 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) | ||
597 | { | ||
598 | struct cik_sdma_rlc_registers *m; | ||
599 | uint32_t sdma_base_addr; | ||
600 | |||
601 | m = get_sdma_mqd(mqd); | ||
602 | sdma_base_addr = get_sdma_base_addr(m); | ||
603 | |||
604 | write_register(kgd, | ||
605 | sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR, | ||
606 | m->sdma_rlc_virtual_addr); | ||
607 | |||
608 | write_register(kgd, | ||
609 | sdma_base_addr + SDMA0_RLC0_RB_BASE, | ||
610 | m->sdma_rlc_rb_base); | ||
611 | |||
612 | write_register(kgd, | ||
613 | sdma_base_addr + SDMA0_RLC0_RB_BASE_HI, | ||
614 | m->sdma_rlc_rb_base_hi); | ||
615 | |||
616 | write_register(kgd, | ||
617 | sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO, | ||
618 | m->sdma_rlc_rb_rptr_addr_lo); | ||
619 | |||
620 | write_register(kgd, | ||
621 | sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI, | ||
622 | m->sdma_rlc_rb_rptr_addr_hi); | ||
623 | |||
624 | write_register(kgd, | ||
625 | sdma_base_addr + SDMA0_RLC0_DOORBELL, | ||
626 | m->sdma_rlc_doorbell); | ||
627 | |||
628 | write_register(kgd, | ||
629 | sdma_base_addr + SDMA0_RLC0_RB_CNTL, | ||
630 | m->sdma_rlc_rb_cntl); | ||
631 | |||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | ||
636 | uint32_t pipe_id, uint32_t queue_id) | ||
637 | { | ||
638 | uint32_t act; | ||
639 | bool retval = false; | ||
640 | uint32_t low, high; | ||
641 | |||
642 | acquire_queue(kgd, pipe_id, queue_id); | ||
643 | act = read_register(kgd, CP_HQD_ACTIVE); | ||
644 | if (act) { | ||
645 | low = lower_32_bits(queue_address >> 8); | ||
646 | high = upper_32_bits(queue_address >> 8); | ||
647 | |||
648 | if (low == read_register(kgd, CP_HQD_PQ_BASE) && | ||
649 | high == read_register(kgd, CP_HQD_PQ_BASE_HI)) | ||
650 | retval = true; | ||
651 | } | ||
652 | release_queue(kgd); | ||
653 | return retval; | ||
654 | } | ||
655 | |||
656 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | ||
657 | { | ||
658 | struct cik_sdma_rlc_registers *m; | ||
659 | uint32_t sdma_base_addr; | ||
660 | uint32_t sdma_rlc_rb_cntl; | ||
661 | |||
662 | m = get_sdma_mqd(mqd); | ||
663 | sdma_base_addr = get_sdma_base_addr(m); | ||
664 | |||
665 | sdma_rlc_rb_cntl = read_register(kgd, | ||
666 | sdma_base_addr + SDMA0_RLC0_RB_CNTL); | ||
667 | |||
668 | if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE) | ||
669 | return true; | ||
670 | |||
671 | return false; | ||
672 | } | ||
673 | |||
674 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, | ||
675 | unsigned int timeout, uint32_t pipe_id, | ||
676 | uint32_t queue_id) | ||
677 | { | ||
678 | uint32_t temp; | ||
679 | |||
680 | acquire_queue(kgd, pipe_id, queue_id); | ||
681 | write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0); | ||
682 | |||
683 | write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type); | ||
684 | |||
685 | while (true) { | ||
686 | temp = read_register(kgd, CP_HQD_ACTIVE); | ||
687 | if (temp & 0x1) | ||
688 | break; | ||
689 | if (timeout == 0) { | ||
690 | pr_err("kfd: cp queue preemption time out (%dms)\n", | ||
691 | temp); | ||
692 | release_queue(kgd); | ||
693 | return -ETIME; | ||
694 | } | ||
695 | msleep(20); | ||
696 | timeout -= 20; | ||
697 | } | ||
698 | |||
699 | release_queue(kgd); | ||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | ||
704 | unsigned int timeout) | ||
705 | { | ||
706 | struct cik_sdma_rlc_registers *m; | ||
707 | uint32_t sdma_base_addr; | ||
708 | uint32_t temp; | ||
709 | |||
710 | m = get_sdma_mqd(mqd); | ||
711 | sdma_base_addr = get_sdma_base_addr(m); | ||
712 | |||
713 | temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL); | ||
714 | temp = temp & ~SDMA_RB_ENABLE; | ||
715 | write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp); | ||
716 | |||
717 | while (true) { | ||
718 | temp = read_register(kgd, sdma_base_addr + | ||
719 | SDMA0_RLC0_CONTEXT_STATUS); | ||
720 | if (temp & SDMA_RLC_IDLE) | ||
721 | break; | ||
722 | if (timeout == 0) | ||
723 | return -ETIME; | ||
724 | msleep(20); | ||
725 | timeout -= 20; | ||
726 | } | ||
727 | |||
728 | write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0); | ||
729 | write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0); | ||
730 | write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0); | ||
731 | write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); | ||
732 | |||
733 | return 0; | ||
734 | } | ||
735 | |||
736 | static int kgd_address_watch_disable(struct kgd_dev *kgd) | ||
737 | { | ||
738 | union TCP_WATCH_CNTL_BITS cntl; | ||
739 | unsigned int i; | ||
740 | |||
741 | cntl.u32All = 0; | ||
742 | |||
743 | cntl.bitfields.valid = 0; | ||
744 | cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; | ||
745 | cntl.bitfields.atc = 1; | ||
746 | |||
747 | /* Turning off this address until we set all the registers */ | ||
748 | for (i = 0; i < MAX_WATCH_ADDRESSES; i++) | ||
749 | write_register(kgd, | ||
750 | watchRegs[i * ADDRESS_WATCH_REG_MAX + | ||
751 | ADDRESS_WATCH_REG_CNTL], | ||
752 | cntl.u32All); | ||
753 | |||
754 | return 0; | ||
755 | } | ||
756 | |||
757 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | ||
758 | unsigned int watch_point_id, | ||
759 | uint32_t cntl_val, | ||
760 | uint32_t addr_hi, | ||
761 | uint32_t addr_lo) | ||
762 | { | ||
763 | union TCP_WATCH_CNTL_BITS cntl; | ||
764 | |||
765 | cntl.u32All = cntl_val; | ||
766 | |||
767 | /* Turning off this watch point until we set all the registers */ | ||
768 | cntl.bitfields.valid = 0; | ||
769 | write_register(kgd, | ||
770 | watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + | ||
771 | ADDRESS_WATCH_REG_CNTL], | ||
772 | cntl.u32All); | ||
773 | |||
774 | write_register(kgd, | ||
775 | watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + | ||
776 | ADDRESS_WATCH_REG_ADDR_HI], | ||
777 | addr_hi); | ||
778 | |||
779 | write_register(kgd, | ||
780 | watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + | ||
781 | ADDRESS_WATCH_REG_ADDR_LO], | ||
782 | addr_lo); | ||
783 | |||
784 | /* Enable the watch point */ | ||
785 | cntl.bitfields.valid = 1; | ||
786 | |||
787 | write_register(kgd, | ||
788 | watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + | ||
789 | ADDRESS_WATCH_REG_CNTL], | ||
790 | cntl.u32All); | ||
791 | |||
792 | return 0; | ||
793 | } | ||
794 | |||
795 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | ||
796 | uint32_t gfx_index_val, | ||
797 | uint32_t sq_cmd) | ||
798 | { | ||
799 | struct radeon_device *rdev = get_radeon_device(kgd); | ||
800 | uint32_t data; | ||
801 | |||
802 | mutex_lock(&rdev->grbm_idx_mutex); | ||
803 | |||
804 | write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); | ||
805 | write_register(kgd, SQ_CMD, sq_cmd); | ||
806 | |||
807 | /* Restore the GRBM_GFX_INDEX register */ | ||
808 | |||
809 | data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | | ||
810 | SE_BROADCAST_WRITES; | ||
811 | |||
812 | write_register(kgd, GRBM_GFX_INDEX, data); | ||
813 | |||
814 | mutex_unlock(&rdev->grbm_idx_mutex); | ||
815 | |||
816 | return 0; | ||
817 | } | ||
818 | |||
819 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | ||
820 | unsigned int watch_point_id, | ||
821 | unsigned int reg_offset) | ||
822 | { | ||
823 | return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset] | ||
824 | / 4; | ||
825 | } | ||
826 | |||
827 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) | ||
828 | { | ||
829 | uint32_t reg; | ||
830 | struct radeon_device *rdev = (struct radeon_device *) kgd; | ||
831 | |||
832 | reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); | ||
833 | return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; | ||
834 | } | ||
835 | |||
836 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | ||
837 | uint8_t vmid) | ||
838 | { | ||
839 | uint32_t reg; | ||
840 | struct radeon_device *rdev = (struct radeon_device *) kgd; | ||
841 | |||
842 | reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); | ||
843 | return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; | ||
844 | } | ||
845 | |||
846 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | ||
847 | { | ||
848 | struct radeon_device *rdev = (struct radeon_device *) kgd; | ||
849 | |||
850 | return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); | ||
851 | } | ||
852 | |||
853 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | ||
854 | { | ||
855 | struct radeon_device *rdev = (struct radeon_device *) kgd; | ||
856 | const union radeon_firmware_header *hdr; | ||
857 | |||
858 | BUG_ON(kgd == NULL || rdev->mec_fw == NULL); | ||
859 | |||
860 | switch (type) { | ||
861 | case KGD_ENGINE_PFP: | ||
862 | hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; | ||
863 | break; | ||
864 | |||
865 | case KGD_ENGINE_ME: | ||
866 | hdr = (const union radeon_firmware_header *) rdev->me_fw->data; | ||
867 | break; | ||
868 | |||
869 | case KGD_ENGINE_CE: | ||
870 | hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; | ||
871 | break; | ||
872 | |||
873 | case KGD_ENGINE_MEC1: | ||
874 | hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; | ||
875 | break; | ||
876 | |||
877 | case KGD_ENGINE_MEC2: | ||
878 | hdr = (const union radeon_firmware_header *) | ||
879 | rdev->mec2_fw->data; | ||
880 | break; | ||
881 | |||
882 | case KGD_ENGINE_RLC: | ||
883 | hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; | ||
884 | break; | ||
885 | |||
886 | case KGD_ENGINE_SDMA1: | ||
887 | case KGD_ENGINE_SDMA2: | ||
888 | hdr = (const union radeon_firmware_header *) | ||
889 | rdev->sdma_fw->data; | ||
890 | break; | ||
891 | |||
892 | default: | ||
893 | return 0; | ||
894 | } | ||
895 | |||
896 | if (hdr == NULL) | ||
897 | return 0; | ||
898 | |||
899 | /* Only 12 bit in use*/ | ||
900 | return hdr->common.ucode_version; | ||
901 | } | ||
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h deleted file mode 100644 index 9df1fea8e971..000000000000 --- a/drivers/gpu/drm/radeon/radeon_kfd.h +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * radeon_kfd.h defines the private interface between the | ||
25 | * AMD kernel graphics drivers and the AMD KFD. | ||
26 | */ | ||
27 | |||
28 | #ifndef RADEON_KFD_H_INCLUDED | ||
29 | #define RADEON_KFD_H_INCLUDED | ||
30 | |||
31 | #include <linux/types.h> | ||
32 | #include "kgd_kfd_interface.h" | ||
33 | |||
34 | struct radeon_device; | ||
35 | |||
36 | int radeon_kfd_init(void); | ||
37 | void radeon_kfd_fini(void); | ||
38 | |||
39 | void radeon_kfd_suspend(struct radeon_device *rdev); | ||
40 | int radeon_kfd_resume(struct radeon_device *rdev); | ||
41 | void radeon_kfd_interrupt(struct radeon_device *rdev, | ||
42 | const void *ih_ring_entry); | ||
43 | void radeon_kfd_device_probe(struct radeon_device *rdev); | ||
44 | void radeon_kfd_device_init(struct radeon_device *rdev); | ||
45 | void radeon_kfd_device_fini(struct radeon_device *rdev); | ||
46 | |||
47 | #endif /* RADEON_KFD_H_INCLUDED */ | ||
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index dfee8f7d94ae..cde037f213d7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c | |||
@@ -34,8 +34,6 @@ | |||
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/pm_runtime.h> | 35 | #include <linux/pm_runtime.h> |
36 | 36 | ||
37 | #include "radeon_kfd.h" | ||
38 | |||
39 | #if defined(CONFIG_VGA_SWITCHEROO) | 37 | #if defined(CONFIG_VGA_SWITCHEROO) |
40 | bool radeon_has_atpx(void); | 38 | bool radeon_has_atpx(void); |
41 | #else | 39 | #else |
@@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) | |||
68 | pm_runtime_forbid(dev->dev); | 66 | pm_runtime_forbid(dev->dev); |
69 | } | 67 | } |
70 | 68 | ||
71 | radeon_kfd_device_fini(rdev); | ||
72 | |||
73 | radeon_acpi_fini(rdev); | 69 | radeon_acpi_fini(rdev); |
74 | 70 | ||
75 | radeon_modeset_fini(rdev); | 71 | radeon_modeset_fini(rdev); |
@@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) | |||
174 | "Error during ACPI methods call\n"); | 170 | "Error during ACPI methods call\n"); |
175 | } | 171 | } |
176 | 172 | ||
177 | radeon_kfd_device_probe(rdev); | ||
178 | radeon_kfd_device_init(rdev); | ||
179 | |||
180 | if (radeon_is_px(dev)) { | 173 | if (radeon_is_px(dev)) { |
181 | pm_runtime_use_autosuspend(dev->dev); | 174 | pm_runtime_use_autosuspend(dev->dev); |
182 | pm_runtime_set_autosuspend_delay(dev->dev, 5000); | 175 | pm_runtime_set_autosuspend_delay(dev->dev, 5000); |