aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <deathsimple@vodafone.de>2012-05-09 09:34:56 -0400
committerDave Airlie <airlied@redhat.com>2012-05-09 12:22:39 -0400
commitc3b7fe8b8a0b717f90a4a0c49cffae27e46e3fb7 (patch)
treead2756cf15f8986f7e41f3470c0bd60b30397cde
parent0085c95061e836f3ed489d042b502733c094e7e4 (diff)
drm/radeon: multiple ring allocator v3
A startover with a new idea for a multiple ring allocator. Should perform as well as a normal ring allocator as long as only one ring does somthing, but falls back to a more complex algorithm if more complex things start to happen. We store the last allocated bo in last, we always try to allocate after the last allocated bo. Principle is that in a linear GPU ring progression was is after last is the oldest bo we allocated and thus the first one that should no longer be in use by the GPU. If it's not the case we skip over the bo after last to the closest done bo if such one exist. If none exist and we are not asked to block we report failure to allocate. If we are asked to block we wait on all the oldest fence of all rings. We just wait for any of those fence to complete. v2: We need to be able to let hole point to the list_head, otherwise try free will never free the first allocation of the list. Also stop calling radeon_fence_signalled more than necessary. v3: Don't free allocations without considering them as a hole, otherwise we might lose holes. Also return ENOMEM instead of ENOENT when running out of fences to wait for. Limit the number of holes we try for each ring to 3. Signed-off-by: Christian König <deathsimple@vodafone.de> Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/radeon/radeon.h7
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_sa.c312
3 files changed, 231 insertions, 107 deletions
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 37a74597e9df..cc7f16ab257f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -385,7 +385,9 @@ struct radeon_bo_list {
385struct radeon_sa_manager { 385struct radeon_sa_manager {
386 spinlock_t lock; 386 spinlock_t lock;
387 struct radeon_bo *bo; 387 struct radeon_bo *bo;
388 struct list_head sa_bo; 388 struct list_head *hole;
389 struct list_head flist[RADEON_NUM_RINGS];
390 struct list_head olist;
389 unsigned size; 391 unsigned size;
390 uint64_t gpu_addr; 392 uint64_t gpu_addr;
391 void *cpu_ptr; 393 void *cpu_ptr;
@@ -396,7 +398,8 @@ struct radeon_sa_bo;
396 398
397/* sub-allocation buffer */ 399/* sub-allocation buffer */
398struct radeon_sa_bo { 400struct radeon_sa_bo {
399 struct list_head list; 401 struct list_head olist;
402 struct list_head flist;
400 struct radeon_sa_manager *manager; 403 struct radeon_sa_manager *manager;
401 unsigned soffset; 404 unsigned soffset;
402 unsigned eoffset; 405 unsigned eoffset;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 1748d939657c..e074ff5c2ac2 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
204 204
205int radeon_ib_pool_init(struct radeon_device *rdev) 205int radeon_ib_pool_init(struct radeon_device *rdev)
206{ 206{
207 struct radeon_sa_manager tmp;
208 int i, r; 207 int i, r;
209 208
210 r = radeon_sa_bo_manager_init(rdev, &tmp,
211 RADEON_IB_POOL_SIZE*64*1024,
212 RADEON_GEM_DOMAIN_GTT);
213 if (r) {
214 return r;
215 }
216
217 radeon_mutex_lock(&rdev->ib_pool.mutex); 209 radeon_mutex_lock(&rdev->ib_pool.mutex);
218 if (rdev->ib_pool.ready) { 210 if (rdev->ib_pool.ready) {
219 radeon_mutex_unlock(&rdev->ib_pool.mutex); 211 radeon_mutex_unlock(&rdev->ib_pool.mutex);
220 radeon_sa_bo_manager_fini(rdev, &tmp);
221 return 0; 212 return 0;
222 } 213 }
223 214
224 rdev->ib_pool.sa_manager = tmp; 215 r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
225 INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo); 216 RADEON_IB_POOL_SIZE*64*1024,
217 RADEON_GEM_DOMAIN_GTT);
218 if (r) {
219 radeon_mutex_unlock(&rdev->ib_pool.mutex);
220 return r;
221 }
222
226 for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { 223 for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
227 rdev->ib_pool.ibs[i].fence = NULL; 224 rdev->ib_pool.ibs[i].fence = NULL;
228 rdev->ib_pool.ibs[i].idx = i; 225 rdev->ib_pool.ibs[i].idx = i;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 90ee8add2443..c3ac7f4c7b70 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -27,21 +27,42 @@
27 * Authors: 27 * Authors:
28 * Jerome Glisse <glisse@freedesktop.org> 28 * Jerome Glisse <glisse@freedesktop.org>
29 */ 29 */
30/* Algorithm:
31 *
32 * We store the last allocated bo in "hole", we always try to allocate
33 * after the last allocated bo. Principle is that in a linear GPU ring
34 * progression was is after last is the oldest bo we allocated and thus
35 * the first one that should no longer be in use by the GPU.
36 *
37 * If it's not the case we skip over the bo after last to the closest
38 * done bo if such one exist. If none exist and we are not asked to
39 * block we report failure to allocate.
40 *
41 * If we are asked to block we wait on all the oldest fence of all
42 * rings. We just wait for any of those fence to complete.
43 */
30#include "drmP.h" 44#include "drmP.h"
31#include "drm.h" 45#include "drm.h"
32#include "radeon.h" 46#include "radeon.h"
33 47
48static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
49static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
50
34int radeon_sa_bo_manager_init(struct radeon_device *rdev, 51int radeon_sa_bo_manager_init(struct radeon_device *rdev,
35 struct radeon_sa_manager *sa_manager, 52 struct radeon_sa_manager *sa_manager,
36 unsigned size, u32 domain) 53 unsigned size, u32 domain)
37{ 54{
38 int r; 55 int i, r;
39 56
40 spin_lock_init(&sa_manager->lock); 57 spin_lock_init(&sa_manager->lock);
41 sa_manager->bo = NULL; 58 sa_manager->bo = NULL;
42 sa_manager->size = size; 59 sa_manager->size = size;
43 sa_manager->domain = domain; 60 sa_manager->domain = domain;
44 INIT_LIST_HEAD(&sa_manager->sa_bo); 61 sa_manager->hole = &sa_manager->olist;
62 INIT_LIST_HEAD(&sa_manager->olist);
63 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
64 INIT_LIST_HEAD(&sa_manager->flist[i]);
65 }
45 66
46 r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true, 67 r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
47 RADEON_GEM_DOMAIN_CPU, &sa_manager->bo); 68 RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
@@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
58{ 79{
59 struct radeon_sa_bo *sa_bo, *tmp; 80 struct radeon_sa_bo *sa_bo, *tmp;
60 81
61 if (!list_empty(&sa_manager->sa_bo)) { 82 if (!list_empty(&sa_manager->olist)) {
62 dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n"); 83 sa_manager->hole = &sa_manager->olist,
84 radeon_sa_bo_try_free(sa_manager);
85 if (!list_empty(&sa_manager->olist)) {
86 dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
87 }
63 } 88 }
64 list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) { 89 list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
65 list_del_init(&sa_bo->list); 90 radeon_sa_bo_remove_locked(sa_bo);
66 } 91 }
67 radeon_bo_unref(&sa_manager->bo); 92 radeon_bo_unref(&sa_manager->bo);
68 sa_manager->size = 0; 93 sa_manager->size = 0;
@@ -114,111 +139,203 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
114 return r; 139 return r;
115} 140}
116 141
117/*
118 * Principe is simple, we keep a list of sub allocation in offset
119 * order (first entry has offset == 0, last entry has the highest
120 * offset).
121 *
122 * When allocating new object we first check if there is room at
123 * the end total_size - (last_object_offset + last_object_size) >=
124 * alloc_size. If so we allocate new object there.
125 *
126 * When there is not enough room at the end, we start waiting for
127 * each sub object until we reach object_offset+object_size >=
128 * alloc_size, this object then become the sub object we return.
129 *
130 * Alignment can't be bigger than page size
131 */
132
133static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo) 142static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
134{ 143{
135 list_del(&sa_bo->list); 144 struct radeon_sa_manager *sa_manager = sa_bo->manager;
145 if (sa_manager->hole == &sa_bo->olist) {
146 sa_manager->hole = sa_bo->olist.prev;
147 }
148 list_del_init(&sa_bo->olist);
149 list_del_init(&sa_bo->flist);
136 radeon_fence_unref(&sa_bo->fence); 150 radeon_fence_unref(&sa_bo->fence);
137 kfree(sa_bo); 151 kfree(sa_bo);
138} 152}
139 153
154static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
155{
156 struct radeon_sa_bo *sa_bo, *tmp;
157
158 if (sa_manager->hole->next == &sa_manager->olist)
159 return;
160
161 sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
162 list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
163 if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
164 return;
165 }
166 radeon_sa_bo_remove_locked(sa_bo);
167 }
168}
169
170static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
171{
172 struct list_head *hole = sa_manager->hole;
173
174 if (hole != &sa_manager->olist) {
175 return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
176 }
177 return 0;
178}
179
180static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
181{
182 struct list_head *hole = sa_manager->hole;
183
184 if (hole->next != &sa_manager->olist) {
185 return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
186 }
187 return sa_manager->size;
188}
189
190static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
191 struct radeon_sa_bo *sa_bo,
192 unsigned size, unsigned align)
193{
194 unsigned soffset, eoffset, wasted;
195
196 soffset = radeon_sa_bo_hole_soffset(sa_manager);
197 eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
198 wasted = (align - (soffset % align)) % align;
199
200 if ((eoffset - soffset) >= (size + wasted)) {
201 soffset += wasted;
202
203 sa_bo->manager = sa_manager;
204 sa_bo->soffset = soffset;
205 sa_bo->eoffset = soffset + size;
206 list_add(&sa_bo->olist, sa_manager->hole);
207 INIT_LIST_HEAD(&sa_bo->flist);
208 sa_manager->hole = &sa_bo->olist;
209 return true;
210 }
211 return false;
212}
213
214static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
215 struct radeon_fence **fences,
216 unsigned *tries)
217{
218 struct radeon_sa_bo *best_bo = NULL;
219 unsigned i, soffset, best, tmp;
220
221 /* if hole points to the end of the buffer */
222 if (sa_manager->hole->next == &sa_manager->olist) {
223 /* try again with its beginning */
224 sa_manager->hole = &sa_manager->olist;
225 return true;
226 }
227
228 soffset = radeon_sa_bo_hole_soffset(sa_manager);
229 /* to handle wrap around we add sa_manager->size */
230 best = sa_manager->size * 2;
231 /* go over all fence list and try to find the closest sa_bo
232 * of the current last
233 */
234 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
235 struct radeon_sa_bo *sa_bo;
236
237 if (list_empty(&sa_manager->flist[i])) {
238 continue;
239 }
240
241 sa_bo = list_first_entry(&sa_manager->flist[i],
242 struct radeon_sa_bo, flist);
243
244 if (!radeon_fence_signaled(sa_bo->fence)) {
245 fences[i] = sa_bo->fence;
246 continue;
247 }
248
249 /* limit the number of tries each ring gets */
250 if (tries[i] > 2) {
251 continue;
252 }
253
254 tmp = sa_bo->soffset;
255 if (tmp < soffset) {
256 /* wrap around, pretend it's after */
257 tmp += sa_manager->size;
258 }
259 tmp -= soffset;
260 if (tmp < best) {
261 /* this sa bo is the closest one */
262 best = tmp;
263 best_bo = sa_bo;
264 }
265 }
266
267 if (best_bo) {
268 ++tries[best_bo->fence->ring];
269 sa_manager->hole = best_bo->olist.prev;
270
271 /* we knew that this one is signaled,
272 so it's save to remote it */
273 radeon_sa_bo_remove_locked(best_bo);
274 return true;
275 }
276 return false;
277}
278
140int radeon_sa_bo_new(struct radeon_device *rdev, 279int radeon_sa_bo_new(struct radeon_device *rdev,
141 struct radeon_sa_manager *sa_manager, 280 struct radeon_sa_manager *sa_manager,
142 struct radeon_sa_bo **sa_bo, 281 struct radeon_sa_bo **sa_bo,
143 unsigned size, unsigned align, bool block) 282 unsigned size, unsigned align, bool block)
144{ 283{
145 struct radeon_fence *fence = NULL; 284 struct radeon_fence *fences[RADEON_NUM_RINGS];
146 struct radeon_sa_bo *tmp, *next; 285 unsigned tries[RADEON_NUM_RINGS];
147 struct list_head *head; 286 int i, r = -ENOMEM;
148 unsigned offset = 0, wasted = 0;
149 int r;
150 287
151 BUG_ON(align > RADEON_GPU_PAGE_SIZE); 288 BUG_ON(align > RADEON_GPU_PAGE_SIZE);
152 BUG_ON(size > sa_manager->size); 289 BUG_ON(size > sa_manager->size);
153 290
154 *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL); 291 *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
155 292 if ((*sa_bo) == NULL) {
156retry: 293 return -ENOMEM;
294 }
295 (*sa_bo)->manager = sa_manager;
296 (*sa_bo)->fence = NULL;
297 INIT_LIST_HEAD(&(*sa_bo)->olist);
298 INIT_LIST_HEAD(&(*sa_bo)->flist);
157 299
158 spin_lock(&sa_manager->lock); 300 spin_lock(&sa_manager->lock);
301 do {
302 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
303 fences[i] = NULL;
304 tries[i] = 0;
305 }
159 306
160 /* no one ? */ 307 do {
161 head = sa_manager->sa_bo.prev; 308 radeon_sa_bo_try_free(sa_manager);
162 if (list_empty(&sa_manager->sa_bo)) {
163 goto out;
164 }
165 309
166 /* look for a hole big enough */ 310 if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
167 offset = 0; 311 size, align)) {
168 list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) { 312 spin_unlock(&sa_manager->lock);
169 /* try to free this object */ 313 return 0;
170 if (tmp->fence) {
171 if (radeon_fence_signaled(tmp->fence)) {
172 radeon_sa_bo_remove_locked(tmp);
173 continue;
174 } else {
175 fence = tmp->fence;
176 } 314 }
177 }
178 315
179 /* room before this object ? */ 316 /* see if we can skip over some allocations */
180 if (offset < tmp->soffset && (tmp->soffset - offset) >= size) { 317 } while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
181 head = tmp->list.prev; 318
182 goto out; 319 if (block) {
183 } 320 spin_unlock(&sa_manager->lock);
184 offset = tmp->eoffset; 321 r = radeon_fence_wait_any(rdev, fences, false);
185 wasted = offset % align; 322 spin_lock(&sa_manager->lock);
186 if (wasted) { 323 if (r) {
187 wasted = align - wasted; 324 /* if we have nothing to wait for we
188 } 325 are practically out of memory */
189 offset += wasted; 326 if (r == -ENOENT) {
190 } 327 r = -ENOMEM;
191 /* room at the end ? */ 328 }
192 head = sa_manager->sa_bo.prev; 329 goto out_err;
193 tmp = list_entry(head, struct radeon_sa_bo, list); 330 }
194 offset = tmp->eoffset;
195 wasted = offset % align;
196 if (wasted) {
197 wasted = align - wasted;
198 }
199 offset += wasted;
200 if ((sa_manager->size - offset) < size) {
201 /* failed to find somethings big enough */
202 spin_unlock(&sa_manager->lock);
203 if (block && fence) {
204 r = radeon_fence_wait(fence, false);
205 if (r)
206 return r;
207
208 goto retry;
209 } 331 }
210 kfree(*sa_bo); 332 } while (block);
211 *sa_bo = NULL;
212 return -ENOMEM;
213 }
214 333
215out: 334out_err:
216 (*sa_bo)->manager = sa_manager;
217 (*sa_bo)->soffset = offset;
218 (*sa_bo)->eoffset = offset + size;
219 list_add(&(*sa_bo)->list, head);
220 spin_unlock(&sa_manager->lock); 335 spin_unlock(&sa_manager->lock);
221 return 0; 336 kfree(*sa_bo);
337 *sa_bo = NULL;
338 return r;
222} 339}
223 340
224void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, 341void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
@@ -226,13 +343,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
226{ 343{
227 struct radeon_sa_manager *sa_manager; 344 struct radeon_sa_manager *sa_manager;
228 345
229 if (!sa_bo || !*sa_bo) 346 if (sa_bo == NULL || *sa_bo == NULL) {
230 return; 347 return;
348 }
231 349
232 sa_manager = (*sa_bo)->manager; 350 sa_manager = (*sa_bo)->manager;
233 spin_lock(&sa_manager->lock); 351 spin_lock(&sa_manager->lock);
234 if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { 352 if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
235 (*sa_bo)->fence = radeon_fence_ref(fence); 353 (*sa_bo)->fence = radeon_fence_ref(fence);
354 list_add_tail(&(*sa_bo)->flist,
355 &sa_manager->flist[fence->ring]);
236 } else { 356 } else {
237 radeon_sa_bo_remove_locked(*sa_bo); 357 radeon_sa_bo_remove_locked(*sa_bo);
238 } 358 }
@@ -247,15 +367,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
247 struct radeon_sa_bo *i; 367 struct radeon_sa_bo *i;
248 368
249 spin_lock(&sa_manager->lock); 369 spin_lock(&sa_manager->lock);
250 list_for_each_entry(i, &sa_manager->sa_bo, list) { 370 list_for_each_entry(i, &sa_manager->olist, olist) {
251 seq_printf(m, "[%08x %08x] size %4d (%p)", 371 if (&i->olist == sa_manager->hole) {
252 i->soffset, i->eoffset, i->eoffset - i->soffset, i); 372 seq_printf(m, ">");
253 if (i->fence) {
254 seq_printf(m, " protected by %Ld (%p) on ring %d\n",
255 i->fence->seq, i->fence, i->fence->ring);
256 } else { 373 } else {
257 seq_printf(m, "\n"); 374 seq_printf(m, " ");
375 }
376 seq_printf(m, "[0x%08x 0x%08x] size %8d",
377 i->soffset, i->eoffset, i->eoffset - i->soffset);
378 if (i->fence) {
379 seq_printf(m, " protected by 0x%016llx on ring %d",
380 i->fence->seq, i->fence->ring);
258 } 381 }
382 seq_printf(m, "\n");
259 } 383 }
260 spin_unlock(&sa_manager->lock); 384 spin_unlock(&sa_manager->lock);
261} 385}