diff options
author | Andres Rodriguez <andresx7@gmail.com> | 2017-02-03 16:28:48 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-05-31 16:48:54 -0400 |
commit | d0b63bb3385c5683c7531044425f4507ca5251b2 (patch) | |
tree | b2c6c0dc8ac159a6e08f3a940be5b9b66408a96a /drivers | |
parent | 763a47b8e1abc7cee0a0f550330124ef1199d58d (diff) |
drm/amdkfd: allow split HQD on per-queue granularity v5
Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.
This allows for more interesting pipe/queue splits.
v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()
v5: clamp res.queue_mask to the first MEC only
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 104 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_kfd.c | 21 |
9 files changed, 130 insertions, 55 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3200ff9bab8c..8fc5aa33347e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
@@ -95,14 +95,30 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) | |||
95 | 95 | ||
96 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | 96 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) |
97 | { | 97 | { |
98 | int i; | ||
99 | int last_valid_bit; | ||
98 | if (adev->kfd) { | 100 | if (adev->kfd) { |
99 | struct kgd2kfd_shared_resources gpu_resources = { | 101 | struct kgd2kfd_shared_resources gpu_resources = { |
100 | .compute_vmid_bitmap = 0xFF00, | 102 | .compute_vmid_bitmap = 0xFF00, |
101 | 103 | .num_mec = adev->gfx.mec.num_mec, | |
102 | .first_compute_pipe = 1, | 104 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, |
103 | .compute_pipe_count = 4 - 1, | 105 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe |
104 | }; | 106 | }; |
105 | 107 | ||
108 | /* this is going to have a few of the MSBs set that we need to | ||
109 | * clear */ | ||
110 | bitmap_complement(gpu_resources.queue_bitmap, | ||
111 | adev->gfx.mec.queue_bitmap, | ||
112 | KGD_MAX_QUEUES); | ||
113 | |||
114 | /* According to linux/bitmap.h we shouldn't use bitmap_clear if | ||
115 | * nbits is not compile time constant */ | ||
116 | last_valid_bit = adev->gfx.mec.num_mec | ||
117 | * adev->gfx.mec.num_pipe_per_mec | ||
118 | * adev->gfx.mec.num_queue_per_pipe; | ||
119 | for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) | ||
120 | clear_bit(i, gpu_resources.queue_bitmap); | ||
121 | |||
106 | amdgpu_doorbell_get_kfd_info(adev, | 122 | amdgpu_doorbell_get_kfd_info(adev, |
107 | &gpu_resources.doorbell_physical_address, | 123 | &gpu_resources.doorbell_physical_address, |
108 | &gpu_resources.doorbell_aperture_size, | 124 | &gpu_resources.doorbell_aperture_size, |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3f95f7cb4019..88187bfc5ea3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c | |||
@@ -226,6 +226,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, | |||
226 | 226 | ||
227 | kfd->shared_resources = *gpu_resources; | 227 | kfd->shared_resources = *gpu_resources; |
228 | 228 | ||
229 | /* We only use the first MEC */ | ||
230 | if (kfd->shared_resources.num_mec > 1) | ||
231 | kfd->shared_resources.num_mec = 1; | ||
232 | |||
229 | /* calculate max size of mqds needed for queues */ | 233 | /* calculate max size of mqds needed for queues */ |
230 | size = max_num_of_queues_per_device * | 234 | size = max_num_of_queues_per_device * |
231 | kfd->device_info->mqd_size_aligned; | 235 | kfd->device_info->mqd_size_aligned; |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c064dea3f285..955aa304ff48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | |||
@@ -63,21 +63,44 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) | |||
63 | return KFD_MQD_TYPE_CP; | 63 | return KFD_MQD_TYPE_CP; |
64 | } | 64 | } |
65 | 65 | ||
66 | unsigned int get_first_pipe(struct device_queue_manager *dqm) | 66 | static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) |
67 | { | ||
68 | int i; | ||
69 | int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec | ||
70 | + pipe * dqm->dev->shared_resources.num_queue_per_pipe; | ||
71 | |||
72 | /* queue is available for KFD usage if bit is 1 */ | ||
73 | for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) | ||
74 | if (test_bit(pipe_offset + i, | ||
75 | dqm->dev->shared_resources.queue_bitmap)) | ||
76 | return true; | ||
77 | return false; | ||
78 | } | ||
79 | |||
80 | unsigned int get_mec_num(struct device_queue_manager *dqm) | ||
67 | { | 81 | { |
68 | BUG_ON(!dqm || !dqm->dev); | 82 | BUG_ON(!dqm || !dqm->dev); |
69 | return dqm->dev->shared_resources.first_compute_pipe; | 83 | |
84 | return dqm->dev->shared_resources.num_mec; | ||
70 | } | 85 | } |
71 | 86 | ||
72 | unsigned int get_pipes_num(struct device_queue_manager *dqm) | 87 | unsigned int get_queues_num(struct device_queue_manager *dqm) |
73 | { | 88 | { |
74 | BUG_ON(!dqm || !dqm->dev); | 89 | BUG_ON(!dqm || !dqm->dev); |
75 | return dqm->dev->shared_resources.compute_pipe_count; | 90 | return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, |
91 | KGD_MAX_QUEUES); | ||
76 | } | 92 | } |
77 | 93 | ||
78 | static inline unsigned int get_pipes_num_cpsch(void) | 94 | unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) |
79 | { | 95 | { |
80 | return PIPE_PER_ME_CP_SCHEDULING; | 96 | BUG_ON(!dqm || !dqm->dev); |
97 | return dqm->dev->shared_resources.num_queue_per_pipe; | ||
98 | } | ||
99 | |||
100 | unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) | ||
101 | { | ||
102 | BUG_ON(!dqm || !dqm->dev); | ||
103 | return dqm->dev->shared_resources.num_pipe_per_mec; | ||
81 | } | 104 | } |
82 | 105 | ||
83 | void program_sh_mem_settings(struct device_queue_manager *dqm, | 106 | void program_sh_mem_settings(struct device_queue_manager *dqm, |
@@ -200,12 +223,16 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) | |||
200 | 223 | ||
201 | set = false; | 224 | set = false; |
202 | 225 | ||
203 | for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm); | 226 | for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm); |
204 | pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) { | 227 | pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { |
228 | |||
229 | if (!is_pipe_enabled(dqm, 0, pipe)) | ||
230 | continue; | ||
231 | |||
205 | if (dqm->allocated_queues[pipe] != 0) { | 232 | if (dqm->allocated_queues[pipe] != 0) { |
206 | bit = find_first_bit( | 233 | bit = find_first_bit( |
207 | (unsigned long *)&dqm->allocated_queues[pipe], | 234 | (unsigned long *)&dqm->allocated_queues[pipe], |
208 | QUEUES_PER_PIPE); | 235 | get_queues_per_pipe(dqm)); |
209 | 236 | ||
210 | clear_bit(bit, | 237 | clear_bit(bit, |
211 | (unsigned long *)&dqm->allocated_queues[pipe]); | 238 | (unsigned long *)&dqm->allocated_queues[pipe]); |
@@ -222,7 +249,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) | |||
222 | pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", | 249 | pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", |
223 | __func__, q->pipe, q->queue); | 250 | __func__, q->pipe, q->queue); |
224 | /* horizontal hqd allocation */ | 251 | /* horizontal hqd allocation */ |
225 | dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm); | 252 | dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); |
226 | 253 | ||
227 | return 0; | 254 | return 0; |
228 | } | 255 | } |
@@ -469,36 +496,25 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, | |||
469 | vmid); | 496 | vmid); |
470 | } | 497 | } |
471 | 498 | ||
472 | int init_pipelines(struct device_queue_manager *dqm, | ||
473 | unsigned int pipes_num, unsigned int first_pipe) | ||
474 | { | ||
475 | BUG_ON(!dqm || !dqm->dev); | ||
476 | |||
477 | pr_debug("kfd: In func %s\n", __func__); | ||
478 | |||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | static void init_interrupts(struct device_queue_manager *dqm) | 499 | static void init_interrupts(struct device_queue_manager *dqm) |
483 | { | 500 | { |
484 | unsigned int i; | 501 | unsigned int i; |
485 | 502 | ||
486 | BUG_ON(dqm == NULL); | 503 | BUG_ON(dqm == NULL); |
487 | 504 | ||
488 | for (i = 0 ; i < get_pipes_num(dqm) ; i++) | 505 | for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) |
489 | dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, | 506 | if (is_pipe_enabled(dqm, 0, i)) |
490 | i + get_first_pipe(dqm)); | 507 | dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); |
491 | } | 508 | } |
492 | 509 | ||
493 | static int init_scheduler(struct device_queue_manager *dqm) | 510 | static int init_scheduler(struct device_queue_manager *dqm) |
494 | { | 511 | { |
495 | int retval; | 512 | int retval = 0; |
496 | 513 | ||
497 | BUG_ON(!dqm); | 514 | BUG_ON(!dqm); |
498 | 515 | ||
499 | pr_debug("kfd: In %s\n", __func__); | 516 | pr_debug("kfd: In %s\n", __func__); |
500 | 517 | ||
501 | retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); | ||
502 | return retval; | 518 | return retval; |
503 | } | 519 | } |
504 | 520 | ||
@@ -509,21 +525,21 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) | |||
509 | BUG_ON(!dqm); | 525 | BUG_ON(!dqm); |
510 | 526 | ||
511 | pr_debug("kfd: In func %s num of pipes: %d\n", | 527 | pr_debug("kfd: In func %s num of pipes: %d\n", |
512 | __func__, get_pipes_num(dqm)); | 528 | __func__, get_pipes_per_mec(dqm)); |
513 | 529 | ||
514 | mutex_init(&dqm->lock); | 530 | mutex_init(&dqm->lock); |
515 | INIT_LIST_HEAD(&dqm->queues); | 531 | INIT_LIST_HEAD(&dqm->queues); |
516 | dqm->queue_count = dqm->next_pipe_to_allocate = 0; | 532 | dqm->queue_count = dqm->next_pipe_to_allocate = 0; |
517 | dqm->sdma_queue_count = 0; | 533 | dqm->sdma_queue_count = 0; |
518 | dqm->allocated_queues = kcalloc(get_pipes_num(dqm), | 534 | dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), |
519 | sizeof(unsigned int), GFP_KERNEL); | 535 | sizeof(unsigned int), GFP_KERNEL); |
520 | if (!dqm->allocated_queues) { | 536 | if (!dqm->allocated_queues) { |
521 | mutex_destroy(&dqm->lock); | 537 | mutex_destroy(&dqm->lock); |
522 | return -ENOMEM; | 538 | return -ENOMEM; |
523 | } | 539 | } |
524 | 540 | ||
525 | for (i = 0; i < get_pipes_num(dqm); i++) | 541 | for (i = 0; i < get_pipes_per_mec(dqm); i++) |
526 | dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; | 542 | dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1; |
527 | 543 | ||
528 | dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; | 544 | dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; |
529 | dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; | 545 | dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; |
@@ -630,18 +646,38 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, | |||
630 | 646 | ||
631 | static int set_sched_resources(struct device_queue_manager *dqm) | 647 | static int set_sched_resources(struct device_queue_manager *dqm) |
632 | { | 648 | { |
649 | int i, mec; | ||
633 | struct scheduling_resources res; | 650 | struct scheduling_resources res; |
634 | unsigned int queue_num, queue_mask; | ||
635 | 651 | ||
636 | BUG_ON(!dqm); | 652 | BUG_ON(!dqm); |
637 | 653 | ||
638 | pr_debug("kfd: In func %s\n", __func__); | 654 | pr_debug("kfd: In func %s\n", __func__); |
639 | 655 | ||
640 | queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE; | ||
641 | queue_mask = (1 << queue_num) - 1; | ||
642 | res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; | 656 | res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; |
643 | res.vmid_mask <<= KFD_VMID_START_OFFSET; | 657 | res.vmid_mask <<= KFD_VMID_START_OFFSET; |
644 | res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE); | 658 | |
659 | res.queue_mask = 0; | ||
660 | for (i = 0; i < KGD_MAX_QUEUES; ++i) { | ||
661 | mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) | ||
662 | / dqm->dev->shared_resources.num_pipe_per_mec; | ||
663 | |||
664 | if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) | ||
665 | continue; | ||
666 | |||
667 | /* only acquire queues from the first MEC */ | ||
668 | if (mec > 0) | ||
669 | continue; | ||
670 | |||
671 | /* This situation may be hit in the future if a new HW | ||
672 | * generation exposes more than 64 queues. If so, the | ||
673 | * definition of res.queue_mask needs updating */ | ||
674 | if (WARN_ON(i > (sizeof(res.queue_mask)*8))) { | ||
675 | pr_err("Invalid queue enabled by amdgpu: %d\n", i); | ||
676 | break; | ||
677 | } | ||
678 | |||
679 | res.queue_mask |= (1ull << i); | ||
680 | } | ||
645 | res.gws_mask = res.oac_mask = res.gds_heap_base = | 681 | res.gws_mask = res.oac_mask = res.gds_heap_base = |
646 | res.gds_heap_size = 0; | 682 | res.gds_heap_size = 0; |
647 | 683 | ||
@@ -660,7 +696,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) | |||
660 | BUG_ON(!dqm); | 696 | BUG_ON(!dqm); |
661 | 697 | ||
662 | pr_debug("kfd: In func %s num of pipes: %d\n", | 698 | pr_debug("kfd: In func %s num of pipes: %d\n", |
663 | __func__, get_pipes_num_cpsch()); | 699 | __func__, get_pipes_per_mec(dqm)); |
664 | 700 | ||
665 | mutex_init(&dqm->lock); | 701 | mutex_init(&dqm->lock); |
666 | INIT_LIST_HEAD(&dqm->queues); | 702 | INIT_LIST_HEAD(&dqm->queues); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a625b9137da2..66b9615bc3c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | |||
@@ -30,8 +30,6 @@ | |||
30 | #include "kfd_mqd_manager.h" | 30 | #include "kfd_mqd_manager.h" |
31 | 31 | ||
32 | #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500) | 32 | #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500) |
33 | #define QUEUES_PER_PIPE (8) | ||
34 | #define PIPE_PER_ME_CP_SCHEDULING (3) | ||
35 | #define CIK_VMID_NUM (8) | 33 | #define CIK_VMID_NUM (8) |
36 | #define KFD_VMID_START_OFFSET (8) | 34 | #define KFD_VMID_START_OFFSET (8) |
37 | #define VMID_PER_DEVICE CIK_VMID_NUM | 35 | #define VMID_PER_DEVICE CIK_VMID_NUM |
@@ -182,10 +180,10 @@ void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); | |||
182 | void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); | 180 | void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); |
183 | void program_sh_mem_settings(struct device_queue_manager *dqm, | 181 | void program_sh_mem_settings(struct device_queue_manager *dqm, |
184 | struct qcm_process_device *qpd); | 182 | struct qcm_process_device *qpd); |
185 | int init_pipelines(struct device_queue_manager *dqm, | 183 | unsigned int get_mec_num(struct device_queue_manager *dqm); |
186 | unsigned int pipes_num, unsigned int first_pipe); | 184 | unsigned int get_queues_num(struct device_queue_manager *dqm); |
187 | unsigned int get_first_pipe(struct device_queue_manager *dqm); | 185 | unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); |
188 | unsigned int get_pipes_num(struct device_queue_manager *dqm); | 186 | unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); |
189 | 187 | ||
190 | static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) | 188 | static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) |
191 | { | 189 | { |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index c6f435aa803f..48dc0561b402 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c | |||
@@ -151,5 +151,5 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | |||
151 | 151 | ||
152 | static int initialize_cpsch_cik(struct device_queue_manager *dqm) | 152 | static int initialize_cpsch_cik(struct device_queue_manager *dqm) |
153 | { | 153 | { |
154 | return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); | 154 | return 0; |
155 | } | 155 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index ca8c09326b31..7131998848d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | |||
@@ -65,8 +65,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, | |||
65 | 65 | ||
66 | /* check if there is over subscription*/ | 66 | /* check if there is over subscription*/ |
67 | *over_subscription = false; | 67 | *over_subscription = false; |
68 | if ((process_count > 1) || | 68 | if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { |
69 | queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) { | ||
70 | *over_subscription = true; | 69 | *over_subscription = true; |
71 | pr_debug("kfd: over subscribed runlist\n"); | 70 | pr_debug("kfd: over subscribed runlist\n"); |
72 | } | 71 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index e1fb40b84c72..32cdf2b483db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | |||
@@ -209,7 +209,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, | |||
209 | /* check if there is over subscription */ | 209 | /* check if there is over subscription */ |
210 | if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && | 210 | if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && |
211 | ((dev->dqm->processes_count >= VMID_PER_DEVICE) || | 211 | ((dev->dqm->processes_count >= VMID_PER_DEVICE) || |
212 | (dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) { | 212 | (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { |
213 | pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); | 213 | pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); |
214 | retval = -EPERM; | 214 | retval = -EPERM; |
215 | goto err_create_queue; | 215 | goto err_create_queue; |
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 67f6d1921f4c..91ef1484b3bb 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h | |||
@@ -29,10 +29,11 @@ | |||
29 | #define KGD_KFD_INTERFACE_H_INCLUDED | 29 | #define KGD_KFD_INTERFACE_H_INCLUDED |
30 | 30 | ||
31 | #include <linux/types.h> | 31 | #include <linux/types.h> |
32 | #include <linux/bitmap.h> | ||
32 | 33 | ||
33 | struct pci_dev; | 34 | struct pci_dev; |
34 | 35 | ||
35 | #define KFD_INTERFACE_VERSION 1 | 36 | #define KFD_INTERFACE_VERSION 2 |
36 | #define KGD_MAX_QUEUES 128 | 37 | #define KGD_MAX_QUEUES 128 |
37 | 38 | ||
38 | struct kfd_dev; | 39 | struct kfd_dev; |
@@ -62,11 +63,17 @@ struct kgd2kfd_shared_resources { | |||
62 | /* Bit n == 1 means VMID n is available for KFD. */ | 63 | /* Bit n == 1 means VMID n is available for KFD. */ |
63 | unsigned int compute_vmid_bitmap; | 64 | unsigned int compute_vmid_bitmap; |
64 | 65 | ||
65 | /* Compute pipes are counted starting from MEC0/pipe0 as 0. */ | 66 | /* number of mec available from the hardware */ |
66 | unsigned int first_compute_pipe; | 67 | uint32_t num_mec; |
67 | 68 | ||
68 | /* Number of MEC pipes available for KFD. */ | 69 | /* number of pipes per mec */ |
69 | unsigned int compute_pipe_count; | 70 | uint32_t num_pipe_per_mec; |
71 | |||
72 | /* number of queues per pipe */ | ||
73 | uint32_t num_queue_per_pipe; | ||
74 | |||
75 | /* Bit n == 1 means Queue n is available for KFD */ | ||
76 | DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); | ||
70 | 77 | ||
71 | /* Base address of doorbell aperture. */ | 78 | /* Base address of doorbell aperture. */ |
72 | phys_addr_t doorbell_physical_address; | 79 | phys_addr_t doorbell_physical_address; |
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index a06e3b130b9e..699fe7f9b8bf 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c | |||
@@ -179,14 +179,29 @@ void radeon_kfd_device_probe(struct radeon_device *rdev) | |||
179 | 179 | ||
180 | void radeon_kfd_device_init(struct radeon_device *rdev) | 180 | void radeon_kfd_device_init(struct radeon_device *rdev) |
181 | { | 181 | { |
182 | int i, queue, pipe, mec; | ||
183 | |||
182 | if (rdev->kfd) { | 184 | if (rdev->kfd) { |
183 | struct kgd2kfd_shared_resources gpu_resources = { | 185 | struct kgd2kfd_shared_resources gpu_resources = { |
184 | .compute_vmid_bitmap = 0xFF00, | 186 | .compute_vmid_bitmap = 0xFF00, |
185 | 187 | .num_mec = 1, | |
186 | .first_compute_pipe = 1, | 188 | .num_pipe_per_mec = 4, |
187 | .compute_pipe_count = 4 - 1, | 189 | .num_queue_per_pipe = 8 |
188 | }; | 190 | }; |
189 | 191 | ||
192 | bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES); | ||
193 | |||
194 | for (i = 0; i < KGD_MAX_QUEUES; ++i) { | ||
195 | queue = i % gpu_resources.num_queue_per_pipe; | ||
196 | pipe = (i / gpu_resources.num_queue_per_pipe) | ||
197 | % gpu_resources.num_pipe_per_mec; | ||
198 | mec = (i / gpu_resources.num_queue_per_pipe) | ||
199 | / gpu_resources.num_pipe_per_mec; | ||
200 | |||
201 | if (mec == 0 && pipe > 0) | ||
202 | set_bit(i, gpu_resources.queue_bitmap); | ||
203 | } | ||
204 | |||
190 | radeon_doorbell_get_kfd_info(rdev, | 205 | radeon_doorbell_get_kfd_info(rdev, |
191 | &gpu_resources.doorbell_physical_address, | 206 | &gpu_resources.doorbell_physical_address, |
192 | &gpu_resources.doorbell_aperture_size, | 207 | &gpu_resources.doorbell_aperture_size, |