drm/radeon: multiple ring allocator v3

A startover with a new idea for a multiple ring allocator. Should perform as well as a normal ring allocator as long as only one ring does somthing, but falls back to a more complex algorithm if more complex things start to happen. We store the last allocated bo in last, we always try to allocate after the last allocated bo. Principle is that in a linear GPU ring progression was is after last is the oldest bo we allocated and thus the first one that should no longer be in use by the GPU. If it's not the case we skip over the bo after last to the closest done bo if such one exist. If none exist and we are not asked to block we report failure to allocate. If we are asked to block we wait on all the oldest fence of all rings. We just wait for any of those fence to complete. v2: We need to be able to let hole point to the list_head, otherwise try free will never free the first allocation of the list. Also stop calling radeon_fence_signalled more than necessary. v3: Don't free allocations without considering them as a hole, otherwise we might lose holes. Also return ENOMEM instead of ENOENT when running out of fences to wait for. Limit the number of holes we try for each ring to 3. Signed-off-by: Christian König <deathsimple@vodafone.de> Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
author: Christian König <deathsimple@vodafone.de> 2012-05-09 09:34:56 -0400
committer: Dave Airlie <airlied@redhat.com> 2012-05-09 12:22:39 -0400
commit: c3b7fe8b8a0b717f90a4a0c49cffae27e46e3fb7 (patch)
tree: ad2756cf15f8986f7e41f3470c0bd60b30397cde
parent: 0085c95061e836f3ed489d042b502733c094e7e4 (diff)
3 files changed, 231 insertions, 107 deletions
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 37a74597e9df..cc7f16ab257f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -385,7 +385,9 @@ struct radeon_bo_list {
 struct radeon_sa_manager {
        spinlock_t              lock;
        struct radeon_bo        *bo;
-        struct list_head        sa_bo;
+        struct list_head        *hole;
+        struct list_head        flist[RADEON_NUM_RINGS];
+        struct list_head        olist;
        unsigned                size;
        uint64_t                gpu_addr;
        void                    *cpu_ptr;
@@ -396,7 +398,8 @@ struct radeon_sa_bo;
 /* sub-allocation buffer */
 struct radeon_sa_bo {
-        struct list_head                list;
+        struct list_head                olist;
+        struct list_head                flist;
        struct radeon_sa_manager        *manager;
        unsigned                        soffset;
        unsigned                        eoffset;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 1748d939657c..e074ff5c2ac2 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-        struct radeon_sa_manager tmp;
        int i, r;
-        r = radeon_sa_bo_manager_init(rdev, &tmp,
-                                      RADEON_IB_POOL_SIZE*64*1024,
-                                      RADEON_GEM_DOMAIN_GTT);
-        if (r) {
-                return r;
-        }
        radeon_mutex_lock(&rdev->ib_pool.mutex);
        if (rdev->ib_pool.ready) {
                radeon_mutex_unlock(&rdev->ib_pool.mutex);
-                radeon_sa_bo_manager_fini(rdev, &tmp);
                return 0;
        }
-        rdev->ib_pool.sa_manager = tmp;
+        r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
-        INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
+                                      RADEON_IB_POOL_SIZE*64*1024,
+                                      RADEON_GEM_DOMAIN_GTT);
+        if (r) {
+                radeon_mutex_unlock(&rdev->ib_pool.mutex);
+                return r;
+        }
        for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
                rdev->ib_pool.ibs[i].fence = NULL;
                rdev->ib_pool.ibs[i].idx = i;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 90ee8add2443..c3ac7f4c7b70 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -27,21 +27,42 @@
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 */
+/* Algorithm:
+ *
+ * We store the last allocated bo in "hole", we always try to allocate
+ * after the last allocated bo. Principle is that in a linear GPU ring
+ * progression was is after last is the oldest bo we allocated and thus
+ * the first one that should no longer be in use by the GPU.
+ *
+ * If it's not the case we skip over the bo after last to the closest
+ * done bo if such one exist. If none exist and we are not asked to
+ * block we report failure to allocate.
+ *
+ * If we are asked to block we wait on all the oldest fence of all
+ * rings. We just wait for any of those fence to complete.
+ */
 #include "drmP.h"
 #include "drm.h"
 #include "radeon.h"
+static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
+static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
 int radeon_sa_bo_manager_init(struct radeon_device *rdev,
                              struct radeon_sa_manager *sa_manager,
                              unsigned size, u32 domain)
 {
-        int r;
+        int i, r;
        spin_lock_init(&sa_manager->lock);
        sa_manager->bo = NULL;
        sa_manager->size = size;
        sa_manager->domain = domain;
-        INIT_LIST_HEAD(&sa_manager->sa_bo);
+        sa_manager->hole = &sa_manager->olist;
+        INIT_LIST_HEAD(&sa_manager->olist);
+        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+                INIT_LIST_HEAD(&sa_manager->flist[i]);
+        }
        r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
                             RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
@@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
 {
        struct radeon_sa_bo *sa_bo, *tmp;
-        if (!list_empty(&sa_manager->sa_bo)) {
+        if (!list_empty(&sa_manager->olist)) {
-                dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+                sa_manager->hole = &sa_manager->olist,
+                radeon_sa_bo_try_free(sa_manager);
+                if (!list_empty(&sa_manager->olist)) {
+                        dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+                }
        }
-        list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
+        list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
-                list_del_init(&sa_bo->list);
+                radeon_sa_bo_remove_locked(sa_bo);
        }
        radeon_bo_unref(&sa_manager->bo);
        sa_manager->size = 0;
@@ -114,111 +139,203 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
        return r;
 }
-/*
- * Principe is simple, we keep a list of sub allocation in offset
- * order (first entry has offset == 0, last entry has the highest
- * offset).
- *
- * When allocating new object we first check if there is room at
- * the end total_size - (last_object_offset + last_object_size) >=
- * alloc_size. If so we allocate new object there.
- *
- * When there is not enough room at the end, we start waiting for
- * each sub object until we reach object_offset+object_size >=
- * alloc_size, this object then become the sub object we return.
- *
- * Alignment can't be bigger than page size
- */
 static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
 {
-        list_del(&sa_bo->list);
+        struct radeon_sa_manager *sa_manager = sa_bo->manager;
+        if (sa_manager->hole == &sa_bo->olist) {
+                sa_manager->hole = sa_bo->olist.prev;
+        }
+        list_del_init(&sa_bo->olist);
+        list_del_init(&sa_bo->flist);
        radeon_fence_unref(&sa_bo->fence);
        kfree(sa_bo);
 }
+static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
+{
+        struct radeon_sa_bo *sa_bo, *tmp;
+        if (sa_manager->hole->next == &sa_manager->olist)
+                return;
+        sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
+        list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
+                if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
+                        return;
+                }
+                radeon_sa_bo_remove_locked(sa_bo);
+        }
+}
+static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
+{
+        struct list_head *hole = sa_manager->hole;
+        if (hole != &sa_manager->olist) {
+                return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
+        }
+        return 0;
+}
+static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
+{
+        struct list_head *hole = sa_manager->hole;
+        if (hole->next != &sa_manager->olist) {
+                return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
+        }
+        return sa_manager->size;
+}
+static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
+                                   struct radeon_sa_bo *sa_bo,
+                                   unsigned size, unsigned align)
+{
+        unsigned soffset, eoffset, wasted;
+        soffset = radeon_sa_bo_hole_soffset(sa_manager);
+        eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
+        wasted = (align - (soffset % align)) % align;
+        if ((eoffset - soffset) >= (size + wasted)) {
+                soffset += wasted;
+                sa_bo->manager = sa_manager;
+                sa_bo->soffset = soffset;
+                sa_bo->eoffset = soffset + size;
+                list_add(&sa_bo->olist, sa_manager->hole);
+                INIT_LIST_HEAD(&sa_bo->flist);
+                sa_manager->hole = &sa_bo->olist;
+                return true;
+        }
+        return false;
+}
+static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
+                                   struct radeon_fence **fences,
+                                   unsigned *tries)
+{
+        struct radeon_sa_bo *best_bo = NULL;
+        unsigned i, soffset, best, tmp;
+        /* if hole points to the end of the buffer */
+        if (sa_manager->hole->next == &sa_manager->olist) {
+                /* try again with its beginning */
+                sa_manager->hole = &sa_manager->olist;
+                return true;
+        }
+        soffset = radeon_sa_bo_hole_soffset(sa_manager);
+        /* to handle wrap around we add sa_manager->size */
+        best = sa_manager->size * 2;
+        /* go over all fence list and try to find the closest sa_bo
+         * of the current last
+         */
+        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+                struct radeon_sa_bo *sa_bo;
+                if (list_empty(&sa_manager->flist[i])) {
+                        continue;
+                }
+                sa_bo = list_first_entry(&sa_manager->flist[i],
+                                         struct radeon_sa_bo, flist);
+                if (!radeon_fence_signaled(sa_bo->fence)) {
+                        fences[i] = sa_bo->fence;
+                        continue;
+                }
+                /* limit the number of tries each ring gets */
+                if (tries[i] > 2) {
+                        continue;
+                }
+                tmp = sa_bo->soffset;
+                if (tmp < soffset) {
+                        /* wrap around, pretend it's after */
+                        tmp += sa_manager->size;
+                }
+                tmp -= soffset;
+                if (tmp < best) {
+                        /* this sa bo is the closest one */
+                        best = tmp;
+                        best_bo = sa_bo;
+                }
+        }
+        if (best_bo) {
+                ++tries[best_bo->fence->ring];
+                sa_manager->hole = best_bo->olist.prev;
+                /* we knew that this one is signaled,
+                   so it's save to remote it */
+                radeon_sa_bo_remove_locked(best_bo);
+                return true;
+        }
+        return false;
+}
 int radeon_sa_bo_new(struct radeon_device *rdev,
                     struct radeon_sa_manager *sa_manager,
                     struct radeon_sa_bo **sa_bo,
                     unsigned size, unsigned align, bool block)
 {
-        struct radeon_fence *fence = NULL;
+        struct radeon_fence *fences[RADEON_NUM_RINGS];
-        struct radeon_sa_bo *tmp, *next;
+        unsigned tries[RADEON_NUM_RINGS];
-        struct list_head *head;
+        int i, r = -ENOMEM;
-        unsigned offset = 0, wasted = 0;
-        int r;
        BUG_ON(align > RADEON_GPU_PAGE_SIZE);
        BUG_ON(size > sa_manager->size);
        *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
+        if ((*sa_bo) == NULL) {
-retry:
+                return -ENOMEM;
+        }
+        (*sa_bo)->manager = sa_manager;
+        (*sa_bo)->fence = NULL;
+        INIT_LIST_HEAD(&(*sa_bo)->olist);
+        INIT_LIST_HEAD(&(*sa_bo)->flist);
        spin_lock(&sa_manager->lock);
+        do {
+                for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+                        fences[i] = NULL;
+                        tries[i] = 0;
+                }
-        /* no one ? */
+                do {
-        head = sa_manager->sa_bo.prev;
+                        radeon_sa_bo_try_free(sa_manager);
-        if (list_empty(&sa_manager->sa_bo)) {
-                goto out;
-        }
-        /* look for a hole big enough */
+                        if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
-        offset = 0;
+                                                   size, align)) {
-        list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) {
+                                spin_unlock(&sa_manager->lock);
-                /* try to free this object */
+                                return 0;
-                if (tmp->fence) {
-                        if (radeon_fence_signaled(tmp->fence)) {
-                                radeon_sa_bo_remove_locked(tmp);
-                                continue;
-                        } else {
-                                fence = tmp->fence;
                        }
-                }
-                /* room before this object ? */
+                        /* see if we can skip over some allocations */
-                if (offset < tmp->soffset && (tmp->soffset - offset) >= size) {
+                } while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
-                        head = tmp->list.prev;
-                        goto out;
+                if (block) {
-                }
+                        spin_unlock(&sa_manager->lock);
-                offset = tmp->eoffset;
+                        r = radeon_fence_wait_any(rdev, fences, false);
-                wasted = offset % align;
+                        spin_lock(&sa_manager->lock);
-                if (wasted) {
+                        if (r) {
-                        wasted = align - wasted;
+                                /* if we have nothing to wait for we
-                }
+                                   are practically out of memory */
-                offset += wasted;
+                                if (r == -ENOENT) {
-        }
+                                        r = -ENOMEM;
-        /* room at the end ? */
+                                }
-        head = sa_manager->sa_bo.prev;
+                                goto out_err;
-        tmp = list_entry(head, struct radeon_sa_bo, list);
+                        }
-        offset = tmp->eoffset;
-        wasted = offset % align;
-        if (wasted) {
-                wasted = align - wasted;
-        }
-        offset += wasted;
-        if ((sa_manager->size - offset) < size) {
-                /* failed to find somethings big enough */
-                spin_unlock(&sa_manager->lock);
-                if (block && fence) {
-                        r = radeon_fence_wait(fence, false);
-                        if (r)
-                                return r;
-                        goto retry;
                }
-                kfree(*sa_bo);
+        } while (block);
-                *sa_bo = NULL;
-                return -ENOMEM;
-        }
-out:
+out_err:
-        (*sa_bo)->manager = sa_manager;
-        (*sa_bo)->soffset = offset;
-        (*sa_bo)->eoffset = offset + size;
-        list_add(&(*sa_bo)->list, head);
        spin_unlock(&sa_manager->lock);
-        return 0;
+        kfree(*sa_bo);
+        *sa_bo = NULL;
+        return r;
 }
 void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
@@ -226,13 +343,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
 {
        struct radeon_sa_manager *sa_manager;
-        if (!sa_bo || !*sa_bo)
+        if (sa_bo == NULL || *sa_bo == NULL) {
                return;
+        }
        sa_manager = (*sa_bo)->manager;
        spin_lock(&sa_manager->lock);
        if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
                (*sa_bo)->fence = radeon_fence_ref(fence);
+                list_add_tail(&(*sa_bo)->flist,
+                              &sa_manager->flist[fence->ring]);
        } else {
                radeon_sa_bo_remove_locked(*sa_bo);
        }
@@ -247,15 +367,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
        struct radeon_sa_bo *i;
        spin_lock(&sa_manager->lock);
-        list_for_each_entry(i, &sa_manager->sa_bo, list) {
+        list_for_each_entry(i, &sa_manager->olist, olist) {
-                seq_printf(m, "[%08x %08x] size %4d (%p)",
+                if (&i->olist == sa_manager->hole) {
-                           i->soffset, i->eoffset, i->eoffset - i->soffset, i);
+                        seq_printf(m, ">");
-                if (i->fence) {
-                        seq_printf(m, " protected by %Ld (%p) on ring %d\n",
-                                   i->fence->seq, i->fence, i->fence->ring);
                } else {
-                        seq_printf(m, "\n");
+                        seq_printf(m, " ");
+                }
+                seq_printf(m, "[0x%08x 0x%08x] size %8d",
+                           i->soffset, i->eoffset, i->eoffset - i->soffset);
+                if (i->fence) {
+                        seq_printf(m, " protected by 0x%016llx on ring %d",
+                                   i->fence->seq, i->fence->ring);
                }
+                seq_printf(m, "\n");
        }
        spin_unlock(&sa_manager->lock);
 }
author	Christian König <deathsimple@vodafone.de>	2012-05-09 09:34:56 -0400
committer	Dave Airlie <airlied@redhat.com>	2012-05-09 12:22:39 -0400
commit	c3b7fe8b8a0b717f90a4a0c49cffae27e46e3fb7 (patch)
tree	ad2756cf15f8986f7e41f3470c0bd60b30397cde
parent	0085c95061e836f3ed489d042b502733c094e7e4 (diff)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 37a74597e9df..cc7f16ab257f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h
@@ -385,7 +385,9 @@ struct radeon_bo_list {
385	struct radeon_sa_manager {	385	struct radeon_sa_manager {
386	spinlock_t lock;	386	spinlock_t lock;
387	struct radeon_bo *bo;	387	struct radeon_bo *bo;
388	struct list_head sa_bo;	388	struct list_head *hole;
		389	struct list_head flist[RADEON_NUM_RINGS];
		390	struct list_head olist;
389	unsigned size;	391	unsigned size;
390	uint64_t gpu_addr;	392	uint64_t gpu_addr;
391	void *cpu_ptr;	393	void *cpu_ptr;
@@ -396,7 +398,8 @@ struct radeon_sa_bo;
396		398
397	/* sub-allocation buffer */	399	/* sub-allocation buffer */
398	struct radeon_sa_bo {	400	struct radeon_sa_bo {
399	struct list_head list;	401	struct list_head olist;
		402	struct list_head flist;
400	struct radeon_sa_manager *manager;	403	struct radeon_sa_manager *manager;
401	unsigned soffset;	404	unsigned soffset;
402	unsigned eoffset;	405	unsigned eoffset;


diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1748d939657c..e074ff5c2ac2 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device rdev, struct radeon_ib ib)
204		204
205	int radeon_ib_pool_init(struct radeon_device *rdev)	205	int radeon_ib_pool_init(struct radeon_device *rdev)
206	{	206	{
207	struct radeon_sa_manager tmp;
208	int i, r;	207	int i, r;
209		208
210	r = radeon_sa_bo_manager_init(rdev, &tmp,
211	RADEON_IB_POOL_SIZE641024,
212	RADEON_GEM_DOMAIN_GTT);
213	if (r) {
214	return r;
215	}
216
217	radeon_mutex_lock(&rdev->ib_pool.mutex);	209	radeon_mutex_lock(&rdev->ib_pool.mutex);
218	if (rdev->ib_pool.ready) {	210	if (rdev->ib_pool.ready) {
219	radeon_mutex_unlock(&rdev->ib_pool.mutex);	211	radeon_mutex_unlock(&rdev->ib_pool.mutex);
220	radeon_sa_bo_manager_fini(rdev, &tmp);
221	return 0;	212	return 0;
222	}	213	}
223		214
224	rdev->ib_pool.sa_manager = tmp;	215	r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
225	INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);	216	RADEON_IB_POOL_SIZE641024,
		217	RADEON_GEM_DOMAIN_GTT);
		218	if (r) {
		219	radeon_mutex_unlock(&rdev->ib_pool.mutex);
		220	return r;
		221	}
		222
226	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {	223	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
227	rdev->ib_pool.ibs[i].fence = NULL;	224	rdev->ib_pool.ibs[i].fence = NULL;
228	rdev->ib_pool.ibs[i].idx = i;	225	rdev->ib_pool.ibs[i].idx = i;


diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 90ee8add2443..c3ac7f4c7b70 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -27,21 +27,42 @@
27	* Authors:	27	* Authors:
28	* Jerome Glisse <glisse@freedesktop.org>	28	* Jerome Glisse <glisse@freedesktop.org>
29	*/	29	*/
		30	/* Algorithm:
		31	*
		32	* We store the last allocated bo in "hole", we always try to allocate
		33	* after the last allocated bo. Principle is that in a linear GPU ring
		34	* progression was is after last is the oldest bo we allocated and thus
		35	* the first one that should no longer be in use by the GPU.
		36	*
		37	* If it's not the case we skip over the bo after last to the closest
		38	* done bo if such one exist. If none exist and we are not asked to
		39	* block we report failure to allocate.
		40	*
		41	* If we are asked to block we wait on all the oldest fence of all
		42	* rings. We just wait for any of those fence to complete.
		43	*/
30	#include "drmP.h"	44	#include "drmP.h"
31	#include "drm.h"	45	#include "drm.h"
32	#include "radeon.h"	46	#include "radeon.h"
33		47
		48	static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
		49	static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
		50
34	int radeon_sa_bo_manager_init(struct radeon_device *rdev,	51	int radeon_sa_bo_manager_init(struct radeon_device *rdev,
35	struct radeon_sa_manager *sa_manager,	52	struct radeon_sa_manager *sa_manager,
36	unsigned size, u32 domain)	53	unsigned size, u32 domain)
37	{	54	{
38	int r;	55	int i, r;
39		56
40	spin_lock_init(&sa_manager->lock);	57	spin_lock_init(&sa_manager->lock);
41	sa_manager->bo = NULL;	58	sa_manager->bo = NULL;
42	sa_manager->size = size;	59	sa_manager->size = size;
43	sa_manager->domain = domain;	60	sa_manager->domain = domain;
44	INIT_LIST_HEAD(&sa_manager->sa_bo);	61	sa_manager->hole = &sa_manager->olist;
		62	INIT_LIST_HEAD(&sa_manager->olist);
		63	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		64	INIT_LIST_HEAD(&sa_manager->flist[i]);
		65	}
45		66
46	r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,	67	r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
47	RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);	68	RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
@@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
58	{	79	{
59	struct radeon_sa_bo sa_bo, tmp;	80	struct radeon_sa_bo sa_bo, tmp;
60		81
61	if (!list_empty(&sa_manager->sa_bo)) {	82	if (!list_empty(&sa_manager->olist)) {
62	dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");	83	sa_manager->hole = &sa_manager->olist,
		84	radeon_sa_bo_try_free(sa_manager);
		85	if (!list_empty(&sa_manager->olist)) {
		86	dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
		87	}
63	}	88	}
64	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {	89	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
65	list_del_init(&sa_bo->list);	90	radeon_sa_bo_remove_locked(sa_bo);
66	}	91	}
67	radeon_bo_unref(&sa_manager->bo);	92	radeon_bo_unref(&sa_manager->bo);
68	sa_manager->size = 0;	93	sa_manager->size = 0;
@@ -114,111 +139,203 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
114	return r;	139	return r;
115	}	140	}
116		141
117	/*
118	* Principe is simple, we keep a list of sub allocation in offset
119	* order (first entry has offset == 0, last entry has the highest
120	* offset).
121	*
122	* When allocating new object we first check if there is room at
123	* the end total_size - (last_object_offset + last_object_size) >=
124	* alloc_size. If so we allocate new object there.
125	*
126	* When there is not enough room at the end, we start waiting for
127	* each sub object until we reach object_offset+object_size >=
128	* alloc_size, this object then become the sub object we return.
129	*
130	* Alignment can't be bigger than page size
131	*/
132
133	static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)	142	static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
134	{	143	{
135	list_del(&sa_bo->list);	144	struct radeon_sa_manager *sa_manager = sa_bo->manager;
		145	if (sa_manager->hole == &sa_bo->olist) {
		146	sa_manager->hole = sa_bo->olist.prev;
		147	}
		148	list_del_init(&sa_bo->olist);
		149	list_del_init(&sa_bo->flist);
136	radeon_fence_unref(&sa_bo->fence);	150	radeon_fence_unref(&sa_bo->fence);
137	kfree(sa_bo);	151	kfree(sa_bo);
138	}	152	}
139		153
		154	static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
		155	{
		156	struct radeon_sa_bo sa_bo, tmp;
		157
		158	if (sa_manager->hole->next == &sa_manager->olist)
		159	return;
		160
		161	sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
		162	list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
		163	if (sa_bo->fence == NULL \|\| !radeon_fence_signaled(sa_bo->fence)) {
		164	return;
		165	}
		166	radeon_sa_bo_remove_locked(sa_bo);
		167	}
		168	}
		169
		170	static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
		171	{
		172	struct list_head *hole = sa_manager->hole;
		173
		174	if (hole != &sa_manager->olist) {
		175	return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
		176	}
		177	return 0;
		178	}
		179
		180	static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
		181	{
		182	struct list_head *hole = sa_manager->hole;
		183
		184	if (hole->next != &sa_manager->olist) {
		185	return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
		186	}
		187	return sa_manager->size;
		188	}
		189
		190	static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
		191	struct radeon_sa_bo *sa_bo,
		192	unsigned size, unsigned align)
		193	{
		194	unsigned soffset, eoffset, wasted;
		195
		196	soffset = radeon_sa_bo_hole_soffset(sa_manager);
		197	eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
		198	wasted = (align - (soffset % align)) % align;
		199
		200	if ((eoffset - soffset) >= (size + wasted)) {
		201	soffset += wasted;
		202
		203	sa_bo->manager = sa_manager;
		204	sa_bo->soffset = soffset;
		205	sa_bo->eoffset = soffset + size;
		206	list_add(&sa_bo->olist, sa_manager->hole);
		207	INIT_LIST_HEAD(&sa_bo->flist);
		208	sa_manager->hole = &sa_bo->olist;
		209	return true;
		210	}
		211	return false;
		212	}
		213
		214	static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
		215	struct radeon_fence **fences,
		216	unsigned *tries)
		217	{
		218	struct radeon_sa_bo *best_bo = NULL;
		219	unsigned i, soffset, best, tmp;
		220
		221	/* if hole points to the end of the buffer */
		222	if (sa_manager->hole->next == &sa_manager->olist) {
		223	/* try again with its beginning */
		224	sa_manager->hole = &sa_manager->olist;
		225	return true;
		226	}
		227
		228	soffset = radeon_sa_bo_hole_soffset(sa_manager);
		229	/* to handle wrap around we add sa_manager->size */
		230	best = sa_manager->size * 2;
		231	/* go over all fence list and try to find the closest sa_bo
		232	* of the current last
		233	*/
		234	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		235	struct radeon_sa_bo *sa_bo;
		236
		237	if (list_empty(&sa_manager->flist[i])) {
		238	continue;
		239	}
		240
		241	sa_bo = list_first_entry(&sa_manager->flist[i],
		242	struct radeon_sa_bo, flist);
		243
		244	if (!radeon_fence_signaled(sa_bo->fence)) {
		245	fences[i] = sa_bo->fence;
		246	continue;
		247	}
		248
		249	/* limit the number of tries each ring gets */
		250	if (tries[i] > 2) {
		251	continue;
		252	}
		253
		254	tmp = sa_bo->soffset;
		255	if (tmp < soffset) {
		256	/* wrap around, pretend it's after */
		257	tmp += sa_manager->size;
		258	}
		259	tmp -= soffset;
		260	if (tmp < best) {
		261	/* this sa bo is the closest one */
		262	best = tmp;
		263	best_bo = sa_bo;
		264	}
		265	}
		266
		267	if (best_bo) {
		268	++tries[best_bo->fence->ring];
		269	sa_manager->hole = best_bo->olist.prev;
		270
		271	/* we knew that this one is signaled,
		272	so it's save to remote it */
		273	radeon_sa_bo_remove_locked(best_bo);
		274	return true;
		275	}
		276	return false;
		277	}
		278
140	int radeon_sa_bo_new(struct radeon_device *rdev,	279	int radeon_sa_bo_new(struct radeon_device *rdev,
141	struct radeon_sa_manager *sa_manager,	280	struct radeon_sa_manager *sa_manager,
142	struct radeon_sa_bo **sa_bo,	281	struct radeon_sa_bo **sa_bo,
143	unsigned size, unsigned align, bool block)	282	unsigned size, unsigned align, bool block)
144	{	283	{
145	struct radeon_fence *fence = NULL;	284	struct radeon_fence *fences[RADEON_NUM_RINGS];
146	struct radeon_sa_bo tmp, next;	285	unsigned tries[RADEON_NUM_RINGS];
147	struct list_head *head;	286	int i, r = -ENOMEM;
148	unsigned offset = 0, wasted = 0;
149	int r;
150		287
151	BUG_ON(align > RADEON_GPU_PAGE_SIZE);	288	BUG_ON(align > RADEON_GPU_PAGE_SIZE);
152	BUG_ON(size > sa_manager->size);	289	BUG_ON(size > sa_manager->size);
153		290
154	*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);	291	*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
155		292	if ((*sa_bo) == NULL) {
156	retry:	293	return -ENOMEM;
		294	}
		295	(*sa_bo)->manager = sa_manager;
		296	(*sa_bo)->fence = NULL;
		297	INIT_LIST_HEAD(&(*sa_bo)->olist);
		298	INIT_LIST_HEAD(&(*sa_bo)->flist);
157		299
158	spin_lock(&sa_manager->lock);	300	spin_lock(&sa_manager->lock);
		301	do {
		302	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		303	fences[i] = NULL;
		304	tries[i] = 0;
		305	}
159		306
160	/* no one ? */	307	do {
161	head = sa_manager->sa_bo.prev;	308	radeon_sa_bo_try_free(sa_manager);
162	if (list_empty(&sa_manager->sa_bo)) {
163	goto out;
164	}
165		309
166	/* look for a hole big enough */	310	if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
167	offset = 0;	311	size, align)) {
168	list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) {	312	spin_unlock(&sa_manager->lock);
169	/* try to free this object */	313	return 0;
170	if (tmp->fence) {
171	if (radeon_fence_signaled(tmp->fence)) {
172	radeon_sa_bo_remove_locked(tmp);
173	continue;
174	} else {
175	fence = tmp->fence;
176	}	314	}
177	}
178		315
179	/* room before this object ? */	316	/* see if we can skip over some allocations */
180	if (offset < tmp->soffset && (tmp->soffset - offset) >= size) {	317	} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
181	head = tmp->list.prev;	318
182	goto out;	319	if (block) {
183	}	320	spin_unlock(&sa_manager->lock);
184	offset = tmp->eoffset;	321	r = radeon_fence_wait_any(rdev, fences, false);
185	wasted = offset % align;	322	spin_lock(&sa_manager->lock);
186	if (wasted) {	323	if (r) {
187	wasted = align - wasted;	324	/* if we have nothing to wait for we
188	}	325	are practically out of memory */
189	offset += wasted;	326	if (r == -ENOENT) {
190	}	327	r = -ENOMEM;
191	/* room at the end ? */	328	}
192	head = sa_manager->sa_bo.prev;	329	goto out_err;
193	tmp = list_entry(head, struct radeon_sa_bo, list);	330	}
194	offset = tmp->eoffset;
195	wasted = offset % align;
196	if (wasted) {
197	wasted = align - wasted;
198	}
199	offset += wasted;
200	if ((sa_manager->size - offset) < size) {
201	/* failed to find somethings big enough */
202	spin_unlock(&sa_manager->lock);
203	if (block && fence) {
204	r = radeon_fence_wait(fence, false);
205	if (r)
206	return r;
207
208	goto retry;
209	}	331	}
210	kfree(*sa_bo);	332	} while (block);
211	*sa_bo = NULL;
212	return -ENOMEM;
213	}
214		333
215	out:	334	out_err:
216	(*sa_bo)->manager = sa_manager;
217	(*sa_bo)->soffset = offset;
218	(*sa_bo)->eoffset = offset + size;
219	list_add(&(*sa_bo)->list, head);
220	spin_unlock(&sa_manager->lock);	335	spin_unlock(&sa_manager->lock);
221	return 0;	336	kfree(*sa_bo);
		337	*sa_bo = NULL;
		338	return r;
222	}	339	}
223		340
224	void radeon_sa_bo_free(struct radeon_device rdev, struct radeon_sa_bo *sa_bo,	341	void radeon_sa_bo_free(struct radeon_device rdev, struct radeon_sa_bo *sa_bo,
@@ -226,13 +343,16 @@ void radeon_sa_bo_free(struct radeon_device rdev, struct radeon_sa_bo *sa_bo,
226	{	343	{
227	struct radeon_sa_manager *sa_manager;	344	struct radeon_sa_manager *sa_manager;
228		345
229	if (!sa_bo \|\| !*sa_bo)	346	if (sa_bo == NULL \|\| *sa_bo == NULL) {
230	return;	347	return;
		348	}
231		349
232	sa_manager = (*sa_bo)->manager;	350	sa_manager = (*sa_bo)->manager;
233	spin_lock(&sa_manager->lock);	351	spin_lock(&sa_manager->lock);
234	if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {	352	if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
235	(*sa_bo)->fence = radeon_fence_ref(fence);	353	(*sa_bo)->fence = radeon_fence_ref(fence);
		354	list_add_tail(&(*sa_bo)->flist,
		355	&sa_manager->flist[fence->ring]);
236	} else {	356	} else {
237	radeon_sa_bo_remove_locked(*sa_bo);	357	radeon_sa_bo_remove_locked(*sa_bo);
238	}	358	}
@@ -247,15 +367,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
247	struct radeon_sa_bo *i;	367	struct radeon_sa_bo *i;
248		368
249	spin_lock(&sa_manager->lock);	369	spin_lock(&sa_manager->lock);
250	list_for_each_entry(i, &sa_manager->sa_bo, list) {	370	list_for_each_entry(i, &sa_manager->olist, olist) {
251	seq_printf(m, "[%08x %08x] size %4d (%p)",	371	if (&i->olist == sa_manager->hole) {
252	i->soffset, i->eoffset, i->eoffset - i->soffset, i);	372	seq_printf(m, ">");
253	if (i->fence) {
254	seq_printf(m, " protected by %Ld (%p) on ring %d\n",
255	i->fence->seq, i->fence, i->fence->ring);
256	} else {	373	} else {
257	seq_printf(m, "\n");	374	seq_printf(m, " ");
		375	}
		376	seq_printf(m, "[0x%08x 0x%08x] size %8d",
		377	i->soffset, i->eoffset, i->eoffset - i->soffset);
		378	if (i->fence) {
		379	seq_printf(m, " protected by 0x%016llx on ring %d",
		380	i->fence->seq, i->fence->ring);
258	}	381	}
		382	seq_printf(m, "\n");
259	}	383	}
260	spin_unlock(&sa_manager->lock);	384	spin_unlock(&sa_manager->lock);
261	}	385	}