aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndres Rodriguez <andresx7@gmail.com>2017-03-06 16:27:55 -0500
committerAlex Deucher <alexander.deucher@amd.com>2017-05-31 16:49:02 -0400
commit795f2813e628bcf57a69f2dfe413360d14a1d7f4 (patch)
treef60bfe602590fde4bd170c263a569cd8147ffdd0
parenteffd924d2f3b9c52d5bd8137c3803e83f719a290 (diff)
drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
Use an LRU policy to map usermode rings to HW compute queues. Most compute clients use one queue, and usually the first queue available. This results in poor pipe/queue work distribution when multiple compute apps are running. In most cases pipe 0 queue 0 is the only queue that gets used. In order to better distribute work across multiple HW queues, we adopt a policy to map the usermode ring ids to the LRU HW queue. This fixes a large majority of multi-app compute workloads sharing the same HW queue, even though 7 other queues are available. v2: use ring->funcs->type instead of ring->hw_ip v3: remove amdgpu_queue_mapper_funcs v4: change ring_lru_list_lock to spinlock, grab only once in lru_get() Signed-off-by: Andres Rodriguez <andresx7@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h4
5 files changed, 110 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index aad1d7bf695a..96cbe028d537 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1656,6 +1656,9 @@ struct amdgpu_device {
1656 /* link all gtt */ 1656 /* link all gtt */
1657 spinlock_t gtt_list_lock; 1657 spinlock_t gtt_list_lock;
1658 struct list_head gtt_list; 1658 struct list_head gtt_list;
1659 /* keep an lru list of rings by HW IP */
1660 struct list_head ring_lru_list;
1661 spinlock_t ring_lru_list_lock;
1659 1662
1660 /* record hw reset is performed */ 1663 /* record hw reset is performed */
1661 bool has_hw_reset; 1664 bool has_hw_reset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e731c4876a09..cce94d836221 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2073,6 +2073,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2073 INIT_LIST_HEAD(&adev->gtt_list); 2073 INIT_LIST_HEAD(&adev->gtt_list);
2074 spin_lock_init(&adev->gtt_list_lock); 2074 spin_lock_init(&adev->gtt_list_lock);
2075 2075
2076 INIT_LIST_HEAD(&adev->ring_lru_list);
2077 spin_lock_init(&adev->ring_lru_list_lock);
2078
2076 INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); 2079 INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler);
2077 2080
2078 if (adev->asic_type >= CHIP_BONAIRE) { 2081 if (adev->asic_type >= CHIP_BONAIRE) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index c13a55352db6..4073f072f6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -100,6 +100,40 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
100 return amdgpu_update_cached_map(mapper, ring, *out_ring); 100 return amdgpu_update_cached_map(mapper, ring, *out_ring);
101} 101}
102 102
103static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
104{
105 switch (hw_ip) {
106 case AMDGPU_HW_IP_GFX:
107 return AMDGPU_RING_TYPE_GFX;
108 case AMDGPU_HW_IP_COMPUTE:
109 return AMDGPU_RING_TYPE_COMPUTE;
110 case AMDGPU_HW_IP_DMA:
111 return AMDGPU_RING_TYPE_SDMA;
112 case AMDGPU_HW_IP_UVD:
113 return AMDGPU_RING_TYPE_UVD;
114 case AMDGPU_HW_IP_VCE:
115 return AMDGPU_RING_TYPE_VCE;
116 default:
117 DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
118 return -1;
119 }
120}
121
122static int amdgpu_lru_map(struct amdgpu_device *adev,
123 struct amdgpu_queue_mapper *mapper,
124 int user_ring,
125 struct amdgpu_ring **out_ring)
126{
127 int r;
128 int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
129
130 r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
131 if (r)
132 return r;
133
134 return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
135}
136
103/** 137/**
104 * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct 138 * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
105 * 139 *
@@ -230,7 +264,6 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
230 264
231 switch (mapper->hw_ip) { 265 switch (mapper->hw_ip) {
232 case AMDGPU_HW_IP_GFX: 266 case AMDGPU_HW_IP_GFX:
233 case AMDGPU_HW_IP_COMPUTE:
234 case AMDGPU_HW_IP_DMA: 267 case AMDGPU_HW_IP_DMA:
235 case AMDGPU_HW_IP_UVD: 268 case AMDGPU_HW_IP_UVD:
236 case AMDGPU_HW_IP_VCE: 269 case AMDGPU_HW_IP_VCE:
@@ -239,6 +272,9 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
239 case AMDGPU_HW_IP_VCN_ENC: 272 case AMDGPU_HW_IP_VCN_ENC:
240 r = amdgpu_identity_map(adev, mapper, ring, out_ring); 273 r = amdgpu_identity_map(adev, mapper, ring, out_ring);
241 break; 274 break;
275 case AMDGPU_HW_IP_COMPUTE:
276 r = amdgpu_lru_map(adev, mapper, ring, out_ring);
277 break;
242 default: 278 default:
243 *out_ring = NULL; 279 *out_ring = NULL;
244 r = -EINVAL; 280 r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7d95435fad16..f1076e3edf53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
135 135
136 if (ring->funcs->end_use) 136 if (ring->funcs->end_use)
137 ring->funcs->end_use(ring); 137 ring->funcs->end_use(ring);
138
139 amdgpu_ring_lru_touch(ring->adev, ring);
138} 140}
139 141
140/** 142/**
@@ -283,6 +285,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
283 } 285 }
284 286
285 ring->max_dw = max_dw; 287 ring->max_dw = max_dw;
288 INIT_LIST_HEAD(&ring->lru_list);
289 amdgpu_ring_lru_touch(adev, ring);
286 290
287 if (amdgpu_debugfs_ring_init(adev, ring)) { 291 if (amdgpu_debugfs_ring_init(adev, ring)) {
288 DRM_ERROR("Failed to register debugfs file for rings !\n"); 292 DRM_ERROR("Failed to register debugfs file for rings !\n");
@@ -327,6 +331,65 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
327 ring->adev->rings[ring->idx] = NULL; 331 ring->adev->rings[ring->idx] = NULL;
328} 332}
329 333
334static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
335 struct amdgpu_ring *ring)
336{
337 /* list_move_tail handles the case where ring isn't part of the list */
338 list_move_tail(&ring->lru_list, &adev->ring_lru_list);
339}
340
341/**
342 * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
343 *
344 * @adev: amdgpu_device pointer
345 * @type: amdgpu_ring_type enum
346 * @ring: output ring
347 *
348 * Retrieve the amdgpu_ring structure for the least recently used ring of
349 * a specific IP block (all asics).
350 * Returns 0 on success, error on failure.
351 */
352int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
353 struct amdgpu_ring **ring)
354{
355 struct amdgpu_ring *entry;
356
357 /* List is sorted in LRU order, find first entry corresponding
358 * to the desired HW IP */
359 *ring = NULL;
360 spin_lock(&adev->ring_lru_list_lock);
361 list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
362 if (entry->funcs->type == type) {
363 *ring = entry;
364 amdgpu_ring_lru_touch_locked(adev, *ring);
365 break;
366 }
367 }
368 spin_unlock(&adev->ring_lru_list_lock);
369
370 if (!*ring) {
371 DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
372 return -EINVAL;
373 }
374
375 return 0;
376}
377
378/**
379 * amdgpu_ring_lru_touch - mark a ring as recently being used
380 *
381 * @adev: amdgpu_device pointer
382 * @ring: ring to touch
383 *
384 * Move @ring to the tail of the lru list
385 */
386void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
387{
388 spin_lock(&adev->ring_lru_list_lock);
389 amdgpu_ring_lru_touch_locked(adev, ring);
390 spin_unlock(&adev->ring_lru_list_lock);
391}
392
330/* 393/*
331 * Debugfs info 394 * Debugfs info
332 */ 395 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 334307efac8b..577528a9af0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -154,6 +154,7 @@ struct amdgpu_ring {
154 const struct amdgpu_ring_funcs *funcs; 154 const struct amdgpu_ring_funcs *funcs;
155 struct amdgpu_fence_driver fence_drv; 155 struct amdgpu_fence_driver fence_drv;
156 struct amd_gpu_scheduler sched; 156 struct amd_gpu_scheduler sched;
157 struct list_head lru_list;
157 158
158 struct amdgpu_bo *ring_obj; 159 struct amdgpu_bo *ring_obj;
159 volatile uint32_t *ring; 160 volatile uint32_t *ring;
@@ -200,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
200 unsigned ring_size, struct amdgpu_irq_src *irq_src, 201 unsigned ring_size, struct amdgpu_irq_src *irq_src,
201 unsigned irq_type); 202 unsigned irq_type);
202void amdgpu_ring_fini(struct amdgpu_ring *ring); 203void amdgpu_ring_fini(struct amdgpu_ring *ring);
204int amdgpu_ring_lru_get(struct amdgpu_device *adev, int hw_ip,
205 struct amdgpu_ring **ring);
206void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
203static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) 207static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
204{ 208{
205 int i = 0; 209 int i = 0;