gpu: nvgpu: Revamp semaphore support

Revamp the support the nvgpu driver has for semaphores. The original problem with nvgpu's semaphore support is that it required a SW based wait for every semaphore release. This was because for every fence that gk20a_channel_semaphore_wait_fd() waited on a new semaphore was created. This semaphore would then get released by SW when the fence signaled. This meant that for every release there was necessarily a sync_fence_wait_async() call which could block. The latency of this SW wait was enough to cause massive degredation in performance. To fix this a fast path was implemented. When a fence is passed to gk20a_channel_semaphore_wait_fd() that is backed by a GPU semaphore a semaphore acquire is directly used to block the GPU. No longer is a sync_fence_wait_async() performed nor is there an extra semaphore created. To implement this fast path the semaphore memory had to be shared between channels. Previously since a new semaphore was created every time through gk20a_channel_semaphore_wait_fd() what address space a semaphore was mapped into was irrelevant. However, when using the fast path a sempahore may be released on one address space but acquired in another. Sharing the semaphore memory was done by making a fixed GPU mapping in all channels. This mapping points to the semaphore memory (the so called semaphore sea). This global fixed mapping is read-only to make sure no semaphores can be incremented (i.e released) by a malicious channel. Each channel then gets a RW mapping of it's own semaphore. This way a channel may only acquire other channel's semaphores but may both acquire and release its own semaphore. The gk20a fence code was updated to allow introspection of the GPU backed fences. This allows detection of when the fast path can be taken. If the fast path cannot be used (for example when a fence is sync-pt backed) the original slow path is still present. This gets used when the GPU needs to wait on an event from something which only understands how to use sync-pts. Bug 1732449 JIRA DNVGPU-12 Change-Id: Ic0fea74994da5819a771deac726bb0d47a33c2de Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1133792 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2016-04-27 15:27:36 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2016-06-28 18:49:11 -0400
commit: dfd5ec53fcce4ebae27f78242e6b788350337095 (patch)
tree: 073ea380b9ee4734391d381745f57600c3525be5 /drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
parent: b30990ea6db564e885d5aee7a1a5ea87a1e5e8ee (diff)
1 files changed, 324 insertions, 111 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
index 3b17bfcb..aa375b24 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -15,63 +15,284 @@
 * more details.
 */
-#include "semaphore_gk20a.h"
+#define pr_fmt(fmt) "gpu_sema: " fmt
 #include <linux/dma-mapping.h>
+#include <linux/highmem.h>
 #include <linux/slab.h>
+#include <asm/pgtable.h>
 #include "gk20a.h"
 #include "mm_gk20a.h"
+#include "semaphore_gk20a.h"
+#define __lock_sema_sea(s)                                              \
+        do {                                                            \
+                mutex_lock(&s->sea_lock);                               \
+        } while (0)
-static const int SEMAPHORE_SIZE = 16;
+#define __unlock_sema_sea(s)                                            \
+        do {                                                            \
+                mutex_unlock(&s->sea_lock);                             \
+        } while (0)
-struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct gk20a *g,
+/*
-                const char *unique_name, size_t capacity)
+ * Return the sema_sea pointer.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g)
+{
+        return g->sema_sea;
+}
+static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
+{
+        int ret = 0;
+        struct gk20a *gk20a = sea->gk20a;
+        __lock_sema_sea(sea);
+        ret = gk20a_gmmu_alloc_attr(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
+                                    PAGE_SIZE * SEMAPHORE_POOL_COUNT,
+                                    &sea->sea_mem);
+        if (ret)
+                goto out;
+        sea->ro_sg_table = sea->sea_mem.sgt;
+        sea->size = SEMAPHORE_POOL_COUNT;
+        sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
+out:
+        __unlock_sema_sea(sea);
+        return ret;
+}
+/*
+ * Create the semaphore sea. Only create it once - subsequent calls to this will
+ * return the originally created sea pointer.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
+{
+        if (g->sema_sea)
+                return g->sema_sea;
+        g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
+        if (!g->sema_sea)
+                return NULL;
+        g->sema_sea->size = 0;
+        g->sema_sea->page_count = 0;
+        g->sema_sea->gk20a = g;
+        INIT_LIST_HEAD(&g->sema_sea->pool_list);
+        mutex_init(&g->sema_sea->sea_lock);
+        if (__gk20a_semaphore_sea_grow(g->sema_sea))
+                goto cleanup;
+        return g->sema_sea;
+cleanup:
+        kfree(g->sema_sea);
+        g->sema_sea = NULL;
+        return NULL;
+}
+static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
+{
+        unsigned long idx = find_first_zero_bit(bitmap, len);
+        if (idx == len)
+                return -ENOSPC;
+        set_bit(idx, bitmap);
+        return (int)idx;
+}
+/*
+ * Allocate a pool from the sea.
+ */
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
+                                struct gk20a_semaphore_sea *sea)
 {
        struct gk20a_semaphore_pool *p;
+        unsigned long page_idx;
+        int err = 0;
        p = kzalloc(sizeof(*p), GFP_KERNEL);
        if (!p)
-                return NULL;
+                return ERR_PTR(-ENOMEM);
+        __lock_sema_sea(sea);
+        page_idx = __semaphore_bitmap_alloc(sea->pools_alloced,
+                                            SEMAPHORE_POOL_COUNT);
+        if (page_idx < 0) {
+                err = page_idx;
+                goto fail;
+        }
+        p->page = sea->sea_mem.pages[page_idx];
+        p->ro_sg_table = sea->ro_sg_table;
+        p->page_idx = page_idx;
+        p->sema_sea = sea;
+        INIT_LIST_HEAD(&p->hw_semas);
        kref_init(&p->ref);
-        INIT_LIST_HEAD(&p->maps);
+        mutex_init(&p->pool_lock);
-        mutex_init(&p->maps_mutex);
-        p->g = g;
+        sea->page_count++;
+        list_add(&p->pool_list_entry, &sea->pool_list);
-        /* Alloc one 4k page of semaphore per channel. */
+        __unlock_sema_sea(sea);
-        if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE),
-                                &p->mem))
-                goto clean_up;
-        /* Sacrifice one semaphore in the name of returning error codes. */
-        if (gk20a_allocator_init(&p->alloc, unique_name,
-                                 SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE,
-                                 SEMAPHORE_SIZE))
-                goto clean_up;
-        gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va,
-                (u64)sg_dma_address(p->mem.sgt->sgl),
-                (u64)sg_phys(p->mem.sgt->sgl));
        return p;
-clean_up:
+fail:
-        if (p->mem.size)
+        __unlock_sema_sea(sea);
-                gk20a_gmmu_free(p->g, &p->mem);
        kfree(p);
-        return NULL;
+        return ERR_PTR(err);
+}
+/*
+ * Map a pool into the passed vm's address space. This handles both the fixed
+ * global RO mapping and the non-fixed private RW mapping.
+ */
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
+                             struct vm_gk20a *vm)
+{
+        int ents, err = 0;
+        u64 addr;
+        p->cpu_va = vmap(&p->page, 1, 0,
+                         pgprot_writecombine(PAGE_KERNEL));
+        /* First do the RW mapping. */
+        p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
+        if (!p->rw_sg_table)
+                return -ENOMEM;
+        err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
+                                        PAGE_SIZE, GFP_KERNEL);
+        if (err) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        /* Add IOMMU mapping... */
+        ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+                          DMA_BIDIRECTIONAL);
+        if (ents != 1) {
+                err = -ENOMEM;
+                goto fail_free_sgt;
+        }
+        /* Map into the GPU... Doesn't need to be fixed. */
+        p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
+                                   0, gk20a_mem_flag_none, false);
+        if (!p->gpu_va) {
+                err = -ENOMEM;
+                goto fail_unmap_sgt;
+        }
+        /*
+         * And now the global mapping. Take the sea lock so that we don't race
+         * with a concurrent remap.
+         */
+        __lock_sema_sea(p->sema_sea);
+        BUG_ON(p->mapped);
+        addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
+                                    p->sema_sea->gpu_va, p->sema_sea->map_size,
+                                    0,
+                                    gk20a_mem_flag_read_only,
+                                    false);
+        if (!addr) {
+                err = -ENOMEM;
+                BUG();
+                goto fail_unlock;
+        }
+        p->gpu_va_ro = addr;
+        p->mapped = 1;
+        __unlock_sema_sea(p->sema_sea);
+        return 0;
+fail_unlock:
+        __unlock_sema_sea(p->sema_sea);
+fail_unmap_sgt:
+        dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+                     DMA_BIDIRECTIONAL);
+fail_free_sgt:
+        sg_free_table(p->rw_sg_table);
+fail:
+        kfree(p->rw_sg_table);
+        p->rw_sg_table = NULL;
+        return err;
 }
+/*
+ * Unmap a semaphore_pool.
+ */
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
+                                struct vm_gk20a *vm)
+{
+        struct gk20a_semaphore_int *hw_sema;
+        kunmap(p->cpu_va);
+        /* First the global RO mapping... */
+        __lock_sema_sea(p->sema_sea);
+        gk20a_gmmu_unmap(vm, p->gpu_va_ro,
+                         p->sema_sea->map_size, gk20a_mem_flag_none);
+        p->ro_sg_table = NULL;
+        __unlock_sema_sea(p->sema_sea);
+        /* And now the private RW mapping. */
+        gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
+        p->gpu_va = 0;
+        dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+                     DMA_BIDIRECTIONAL);
+        sg_free_table(p->rw_sg_table);
+        kfree(p->rw_sg_table);
+        p->rw_sg_table = NULL;
+        gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx);
+        list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
+                /*
+                 * Make sure the mem addresses are all NULL so if this gets
+                 * reused we will fault.
+                 */
+                hw_sema->value = NULL;
+}
+/*
+ * Completely free a sempahore_pool. You should make sure this pool is not
+ * mapped otherwise there's going to be a memory leak.
+ */
 static void gk20a_semaphore_pool_free(struct kref *ref)
 {
        struct gk20a_semaphore_pool *p =
                container_of(ref, struct gk20a_semaphore_pool, ref);
-        mutex_lock(&p->maps_mutex);
+        struct gk20a_semaphore_sea *s = p->sema_sea;
-        WARN_ON(!list_empty(&p->maps));
+        struct gk20a_semaphore_int *hw_sema, *tmp;
-        mutex_unlock(&p->maps_mutex);
-        gk20a_gmmu_free(p->g, &p->mem);
+        WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table);
-        gk20a_allocator_destroy(&p->alloc);
+        __lock_sema_sea(s);
+        list_del(&p->pool_list_entry);
+        clear_bit(p->page_idx, s->pools_alloced);
+        s->page_count--;
+        __unlock_sema_sea(s);
+        list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
+                kfree(hw_sema);
        kfree(p);
 }
-static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
+void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
 {
        kref_get(&p->ref);
 }
@@ -81,104 +302,96 @@ void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
        kref_put(&p->ref, gk20a_semaphore_pool_free);
 }
-static struct gk20a_semaphore_pool_map *
+/*
-gk20a_semaphore_pool_find_map_locked(struct gk20a_semaphore_pool *p,
+ * Get the address for a semaphore_pool - if global is true then return the
-                                     struct vm_gk20a *vm)
+ * global RO address instead of the RW address owned by the semaphore's VM.
+ */
+u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
 {
-        struct gk20a_semaphore_pool_map *map, *found = NULL;
+        if (!global)
-        list_for_each_entry(map, &p->maps, list) {
+                return p->gpu_va;
-                if (map->vm == vm) {
-                        found = map;
+        return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
-                        break;
-                }
-        }
-        return found;
 }
-int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
+static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
-                             struct vm_gk20a *vm,
-                             enum gk20a_mem_rw_flag rw_flag)
 {
-        struct gk20a_semaphore_pool_map *map;
+        int hw_sema_idx;
+        int ret = 0;
+        struct gk20a_semaphore_int *hw_sema;
+        struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
-        map = kzalloc(sizeof(*map), GFP_KERNEL);
+        BUG_ON(!p);
-        if (!map)
-                return -ENOMEM;
-        map->vm = vm;
-        map->rw_flag = rw_flag;
-        map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size,
-                                     0/*uncached*/, rw_flag,
-                                     false);
-        if (!map->gpu_va) {
-                kfree(map);
-                return -ENOMEM;
-        }
-        gk20a_vm_get(vm);
-        mutex_lock(&p->maps_mutex);
+        mutex_lock(&p->pool_lock);
-        WARN_ON(gk20a_semaphore_pool_find_map_locked(p, vm));
-        list_add(&map->list, &p->maps);
-        mutex_unlock(&p->maps_mutex);
-        return 0;
-}
-void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
+        /* Find an available HW semaphore. */
-                struct vm_gk20a *vm)
+        hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
-{
+                                               PAGE_SIZE / SEMAPHORE_SIZE);
-        struct gk20a_semaphore_pool_map *map;
+        if (hw_sema_idx < 0) {
-        WARN_ON(!vm);
+                ret = hw_sema_idx;
+                goto fail;
-        mutex_lock(&p->maps_mutex);
-        map = gk20a_semaphore_pool_find_map_locked(p, vm);
-        if (map) {
-                gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag);
-                gk20a_vm_put(vm);
-                list_del(&map->list);
-                kfree(map);
        }
-        mutex_unlock(&p->maps_mutex);
-}
-u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p,
+        hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
-                struct vm_gk20a *vm)
+        if (!hw_sema) {
-{
+                ret = -ENOMEM;
-        struct gk20a_semaphore_pool_map *map;
+                goto fail_free_idx;
-        u64 gpu_va = 0;
+        }
-        mutex_lock(&p->maps_mutex);
+        ch->hw_sema = hw_sema;
-        map = gk20a_semaphore_pool_find_map_locked(p, vm);
+        hw_sema->ch = ch;
-        if (map)
+        hw_sema->p = p;
-                gpu_va = map->gpu_va;
+        hw_sema->idx = hw_sema_idx;
-        mutex_unlock(&p->maps_mutex);
+        hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
+        atomic_set(&hw_sema->next_value, 0);
+        hw_sema->value = p->cpu_va + hw_sema->offset;
+        writel(0, hw_sema->value);
-        return gpu_va;
+        list_add(&hw_sema->hw_sema_list, &p->hw_semas);
+        mutex_unlock(&p->pool_lock);
+        return 0;
+fail_free_idx:
+        clear_bit(hw_sema_idx, p->semas_alloced);
+fail:
+        mutex_unlock(&p->pool_lock);
+        return ret;
 }
-struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
+/*
+ * Allocate a semaphore from the passed pool.
+ *
+ * Since semaphores are ref-counted there's no explicit free for external code
+ * to use. When the ref-count hits 0 the internal free will happen.
+ */
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
 {
        struct gk20a_semaphore *s;
+        int ret;
+        if (!ch->hw_sema) {
+                ret = __gk20a_init_hw_sema(ch);
+                if (ret)
+                        return ERR_PTR(ret);
+        }
        s = kzalloc(sizeof(*s), GFP_KERNEL);
        if (!s)
                return NULL;
-        s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE);
+        kref_init(&s->ref);
-        if (!s->offset) {
+        s->hw_sema = ch->hw_sema;
-                gk20a_err(dev_from_gk20a(pool->g),
+        atomic_set(&s->value, 0);
-                                "failed to allocate semaphore");
-                kfree(s);
-                return NULL;
-        }
-        gk20a_semaphore_pool_get(pool);
+        /*
-        s->pool = pool;
+         * Take a ref on the pool so that we can keep this pool alive for
+         * as long as this semaphore is alive.
+         */
+        gk20a_semaphore_pool_get(s->hw_sema->p);
-        kref_init(&s->ref);
-        /* Initially acquired. */
-        gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0);
-        gk20a_dbg_info("created semaphore offset=%d, value=%d",
-                        s->offset,
-                        gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset));
        return s;
 }
@@ -187,8 +400,8 @@ static void gk20a_semaphore_free(struct kref *ref)
        struct gk20a_semaphore *s =
                container_of(ref, struct gk20a_semaphore, ref);
-        gk20a_bfree(&s->pool->alloc, s->offset);
+        gk20a_semaphore_pool_put(s->hw_sema->p);
-        gk20a_semaphore_pool_put(s->pool);
        kfree(s);
 }
author	Alex Waterman <alexw@nvidia.com>	2016-04-27 15:27:36 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2016-06-28 18:49:11 -0400
commit	dfd5ec53fcce4ebae27f78242e6b788350337095 (patch)
tree	073ea380b9ee4734391d381745f57600c3525be5 /drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
parent	b30990ea6db564e885d5aee7a1a5ea87a1e5e8ee (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 3b17bfcb..aa375b24 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -15,63 +15,284 @@
15	* more details.	15	* more details.
16	*/	16	*/
17		17
18	#include "semaphore_gk20a.h"	18	#define pr_fmt(fmt) "gpu_sema: " fmt
		19
19	#include <linux/dma-mapping.h>	20	#include <linux/dma-mapping.h>
		21	#include <linux/highmem.h>
20	#include <linux/slab.h>	22	#include <linux/slab.h>
		23
		24	#include <asm/pgtable.h>
		25
21	#include "gk20a.h"	26	#include "gk20a.h"
22	#include "mm_gk20a.h"	27	#include "mm_gk20a.h"
		28	#include "semaphore_gk20a.h"
		29
		30	#define __lock_sema_sea(s) \
		31	do { \
		32	mutex_lock(&s->sea_lock); \
		33	} while (0)
23		34
24	static const int SEMAPHORE_SIZE = 16;	35	#define __unlock_sema_sea(s) \
		36	do { \
		37	mutex_unlock(&s->sea_lock); \
		38	} while (0)
25		39
26	struct gk20a_semaphore_pool gk20a_semaphore_pool_alloc(struct gk20a g,	40	/*
27	const char *unique_name, size_t capacity)	41	* Return the sema_sea pointer.
		42	*/
		43	struct gk20a_semaphore_sea gk20a_semaphore_get_sea(struct gk20a g)
		44	{
		45	return g->sema_sea;
		46	}
		47
		48	static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
		49	{
		50	int ret = 0;
		51	struct gk20a *gk20a = sea->gk20a;
		52
		53	__lock_sema_sea(sea);
		54
		55	ret = gk20a_gmmu_alloc_attr(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
		56	PAGE_SIZE * SEMAPHORE_POOL_COUNT,
		57	&sea->sea_mem);
		58	if (ret)
		59	goto out;
		60
		61	sea->ro_sg_table = sea->sea_mem.sgt;
		62	sea->size = SEMAPHORE_POOL_COUNT;
		63	sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
		64
		65	out:
		66	__unlock_sema_sea(sea);
		67	return ret;
		68	}
		69
		70	/*
		71	* Create the semaphore sea. Only create it once - subsequent calls to this will
		72	* return the originally created sea pointer.
		73	*/
		74	struct gk20a_semaphore_sea gk20a_semaphore_sea_create(struct gk20a g)
		75	{
		76	if (g->sema_sea)
		77	return g->sema_sea;
		78
		79	g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
		80	if (!g->sema_sea)
		81	return NULL;
		82
		83	g->sema_sea->size = 0;
		84	g->sema_sea->page_count = 0;
		85	g->sema_sea->gk20a = g;
		86	INIT_LIST_HEAD(&g->sema_sea->pool_list);
		87	mutex_init(&g->sema_sea->sea_lock);
		88
		89	if (__gk20a_semaphore_sea_grow(g->sema_sea))
		90	goto cleanup;
		91
		92	return g->sema_sea;
		93
		94	cleanup:
		95	kfree(g->sema_sea);
		96	g->sema_sea = NULL;
		97	return NULL;
		98	}
		99
		100	static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
		101	{
		102	unsigned long idx = find_first_zero_bit(bitmap, len);
		103
		104	if (idx == len)
		105	return -ENOSPC;
		106
		107	set_bit(idx, bitmap);
		108
		109	return (int)idx;
		110	}
		111
		112	/*
		113	* Allocate a pool from the sea.
		114	*/
		115	struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
		116	struct gk20a_semaphore_sea *sea)
28	{	117	{
29	struct gk20a_semaphore_pool *p;	118	struct gk20a_semaphore_pool *p;
		119	unsigned long page_idx;
		120	int err = 0;
		121
30	p = kzalloc(sizeof(*p), GFP_KERNEL);	122	p = kzalloc(sizeof(*p), GFP_KERNEL);
31	if (!p)	123	if (!p)
32	return NULL;	124	return ERR_PTR(-ENOMEM);
		125
		126	__lock_sema_sea(sea);
		127
		128	page_idx = __semaphore_bitmap_alloc(sea->pools_alloced,
		129	SEMAPHORE_POOL_COUNT);
		130	if (page_idx < 0) {
		131	err = page_idx;
		132	goto fail;
		133	}
33		134
		135	p->page = sea->sea_mem.pages[page_idx];
		136	p->ro_sg_table = sea->ro_sg_table;
		137	p->page_idx = page_idx;
		138	p->sema_sea = sea;
		139	INIT_LIST_HEAD(&p->hw_semas);
34	kref_init(&p->ref);	140	kref_init(&p->ref);
35	INIT_LIST_HEAD(&p->maps);	141	mutex_init(&p->pool_lock);
36	mutex_init(&p->maps_mutex);	142
37	p->g = g;	143	sea->page_count++;
38		144	list_add(&p->pool_list_entry, &sea->pool_list);
39	/* Alloc one 4k page of semaphore per channel. */	145	__unlock_sema_sea(sea);
40	if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE),	146
41	&p->mem))
42	goto clean_up;
43
44	/* Sacrifice one semaphore in the name of returning error codes. */
45	if (gk20a_allocator_init(&p->alloc, unique_name,
46	SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE,
47	SEMAPHORE_SIZE))
48	goto clean_up;
49
50	gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va,
51	(u64)sg_dma_address(p->mem.sgt->sgl),
52	(u64)sg_phys(p->mem.sgt->sgl));
53	return p;	147	return p;
54		148
55	clean_up:	149	fail:
56	if (p->mem.size)	150	__unlock_sema_sea(sea);
57	gk20a_gmmu_free(p->g, &p->mem);
58	kfree(p);	151	kfree(p);
59	return NULL;	152	return ERR_PTR(err);
		153	}
		154
		155	/*
		156	* Map a pool into the passed vm's address space. This handles both the fixed
		157	* global RO mapping and the non-fixed private RW mapping.
		158	*/
		159	int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
		160	struct vm_gk20a *vm)
		161	{
		162	int ents, err = 0;
		163	u64 addr;
		164
		165	p->cpu_va = vmap(&p->page, 1, 0,
		166	pgprot_writecombine(PAGE_KERNEL));
		167
		168	/* First do the RW mapping. */
		169	p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
		170	if (!p->rw_sg_table)
		171	return -ENOMEM;
		172
		173	err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
		174	PAGE_SIZE, GFP_KERNEL);
		175	if (err) {
		176	err = -ENOMEM;
		177	goto fail;
		178	}
		179
		180	/* Add IOMMU mapping... */
		181	ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
		182	DMA_BIDIRECTIONAL);
		183	if (ents != 1) {
		184	err = -ENOMEM;
		185	goto fail_free_sgt;
		186	}
		187
		188	/* Map into the GPU... Doesn't need to be fixed. */
		189	p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
		190	0, gk20a_mem_flag_none, false);
		191	if (!p->gpu_va) {
		192	err = -ENOMEM;
		193	goto fail_unmap_sgt;
		194	}
		195
		196	/*
		197	* And now the global mapping. Take the sea lock so that we don't race
		198	* with a concurrent remap.
		199	*/
		200	__lock_sema_sea(p->sema_sea);
		201
		202	BUG_ON(p->mapped);
		203	addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
		204	p->sema_sea->gpu_va, p->sema_sea->map_size,
		205	0,
		206	gk20a_mem_flag_read_only,
		207	false);
		208	if (!addr) {
		209	err = -ENOMEM;
		210	BUG();
		211	goto fail_unlock;
		212	}
		213	p->gpu_va_ro = addr;
		214	p->mapped = 1;
		215
		216	__unlock_sema_sea(p->sema_sea);
		217
		218	return 0;
		219
		220	fail_unlock:
		221	__unlock_sema_sea(p->sema_sea);
		222	fail_unmap_sgt:
		223	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
		224	DMA_BIDIRECTIONAL);
		225	fail_free_sgt:
		226	sg_free_table(p->rw_sg_table);
		227	fail:
		228	kfree(p->rw_sg_table);
		229	p->rw_sg_table = NULL;
		230	return err;
60	}	231	}
61		232
		233	/*
		234	* Unmap a semaphore_pool.
		235	*/
		236	void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
		237	struct vm_gk20a *vm)
		238	{
		239	struct gk20a_semaphore_int *hw_sema;
		240
		241	kunmap(p->cpu_va);
		242
		243	/* First the global RO mapping... */
		244	__lock_sema_sea(p->sema_sea);
		245	gk20a_gmmu_unmap(vm, p->gpu_va_ro,
		246	p->sema_sea->map_size, gk20a_mem_flag_none);
		247	p->ro_sg_table = NULL;
		248	__unlock_sema_sea(p->sema_sea);
		249
		250	/* And now the private RW mapping. */
		251	gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
		252	p->gpu_va = 0;
		253
		254	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
		255	DMA_BIDIRECTIONAL);
		256
		257	sg_free_table(p->rw_sg_table);
		258	kfree(p->rw_sg_table);
		259	p->rw_sg_table = NULL;
		260
		261	gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx);
		262	list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
		263	/*
		264	* Make sure the mem addresses are all NULL so if this gets
		265	* reused we will fault.
		266	*/
		267	hw_sema->value = NULL;
		268	}
		269
		270	/*
		271	* Completely free a sempahore_pool. You should make sure this pool is not
		272	* mapped otherwise there's going to be a memory leak.
		273	*/
62	static void gk20a_semaphore_pool_free(struct kref *ref)	274	static void gk20a_semaphore_pool_free(struct kref *ref)
63	{	275	{
64	struct gk20a_semaphore_pool *p =	276	struct gk20a_semaphore_pool *p =
65	container_of(ref, struct gk20a_semaphore_pool, ref);	277	container_of(ref, struct gk20a_semaphore_pool, ref);
66	mutex_lock(&p->maps_mutex);	278	struct gk20a_semaphore_sea *s = p->sema_sea;
67	WARN_ON(!list_empty(&p->maps));	279	struct gk20a_semaphore_int hw_sema, tmp;
68	mutex_unlock(&p->maps_mutex);	280
69	gk20a_gmmu_free(p->g, &p->mem);	281	WARN_ON(p->gpu_va \|\| p->rw_sg_table \|\| p->ro_sg_table);
70	gk20a_allocator_destroy(&p->alloc);	282
		283	__lock_sema_sea(s);
		284	list_del(&p->pool_list_entry);
		285	clear_bit(p->page_idx, s->pools_alloced);
		286	s->page_count--;
		287	__unlock_sema_sea(s);
		288
		289	list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
		290	kfree(hw_sema);
		291
71	kfree(p);	292	kfree(p);
72	}	293	}
73		294
74	static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)	295	void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
75	{	296	{
76	kref_get(&p->ref);	297	kref_get(&p->ref);
77	}	298	}
@@ -81,104 +302,96 @@ void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
81	kref_put(&p->ref, gk20a_semaphore_pool_free);	302	kref_put(&p->ref, gk20a_semaphore_pool_free);
82	}	303	}
83		304
84	static struct gk20a_semaphore_pool_map *	305	/*
85	gk20a_semaphore_pool_find_map_locked(struct gk20a_semaphore_pool *p,	306	* Get the address for a semaphore_pool - if global is true then return the
86	struct vm_gk20a *vm)	307	* global RO address instead of the RW address owned by the semaphore's VM.
		308	*/
		309	u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
87	{	310	{
88	struct gk20a_semaphore_pool_map map, found = NULL;	311	if (!global)
89	list_for_each_entry(map, &p->maps, list) {	312	return p->gpu_va;
90	if (map->vm == vm) {	313
91	found = map;	314	return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
92	break;
93	}
94	}
95	return found;
96	}	315	}
97		316
98	int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,	317	static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
99	struct vm_gk20a *vm,
100	enum gk20a_mem_rw_flag rw_flag)
101	{	318	{
102	struct gk20a_semaphore_pool_map *map;	319	int hw_sema_idx;
		320	int ret = 0;
		321	struct gk20a_semaphore_int *hw_sema;
		322	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
103		323
104	map = kzalloc(sizeof(*map), GFP_KERNEL);	324	BUG_ON(!p);
105	if (!map)
106	return -ENOMEM;
107	map->vm = vm;
108	map->rw_flag = rw_flag;
109	map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size,
110	0/uncached/, rw_flag,
111	false);
112	if (!map->gpu_va) {
113	kfree(map);
114	return -ENOMEM;
115	}
116	gk20a_vm_get(vm);
117		325
118	mutex_lock(&p->maps_mutex);	326	mutex_lock(&p->pool_lock);
119	WARN_ON(gk20a_semaphore_pool_find_map_locked(p, vm));
120	list_add(&map->list, &p->maps);
121	mutex_unlock(&p->maps_mutex);
122	return 0;
123	}
124		327
125	void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,	328	/* Find an available HW semaphore. */
126	struct vm_gk20a *vm)	329	hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
127	{	330	PAGE_SIZE / SEMAPHORE_SIZE);
128	struct gk20a_semaphore_pool_map *map;	331	if (hw_sema_idx < 0) {
129	WARN_ON(!vm);	332	ret = hw_sema_idx;
130		333	goto fail;
131	mutex_lock(&p->maps_mutex);
132	map = gk20a_semaphore_pool_find_map_locked(p, vm);
133	if (map) {
134	gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag);
135	gk20a_vm_put(vm);
136	list_del(&map->list);
137	kfree(map);
138	}	334	}
139	mutex_unlock(&p->maps_mutex);
140	}
141		335
142	u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p,	336	hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
143	struct vm_gk20a *vm)	337	if (!hw_sema) {
144	{	338	ret = -ENOMEM;
145	struct gk20a_semaphore_pool_map *map;	339	goto fail_free_idx;
146	u64 gpu_va = 0;	340	}
147		341
148	mutex_lock(&p->maps_mutex);	342	ch->hw_sema = hw_sema;
149	map = gk20a_semaphore_pool_find_map_locked(p, vm);	343	hw_sema->ch = ch;
150	if (map)	344	hw_sema->p = p;
151	gpu_va = map->gpu_va;	345	hw_sema->idx = hw_sema_idx;
152	mutex_unlock(&p->maps_mutex);	346	hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
		347	atomic_set(&hw_sema->next_value, 0);
		348	hw_sema->value = p->cpu_va + hw_sema->offset;
		349	writel(0, hw_sema->value);
153		350
154	return gpu_va;	351	list_add(&hw_sema->hw_sema_list, &p->hw_semas);
		352
		353	mutex_unlock(&p->pool_lock);
		354
		355	return 0;
		356
		357	fail_free_idx:
		358	clear_bit(hw_sema_idx, p->semas_alloced);
		359	fail:
		360	mutex_unlock(&p->pool_lock);
		361	return ret;
155	}	362	}
156		363
157	struct gk20a_semaphore gk20a_semaphore_alloc(struct gk20a_semaphore_pool pool)	364	/*
		365	* Allocate a semaphore from the passed pool.
		366	*
		367	* Since semaphores are ref-counted there's no explicit free for external code
		368	* to use. When the ref-count hits 0 the internal free will happen.
		369	*/
		370	struct gk20a_semaphore gk20a_semaphore_alloc(struct channel_gk20a ch)
158	{	371	{
159	struct gk20a_semaphore *s;	372	struct gk20a_semaphore *s;
		373	int ret;
		374
		375	if (!ch->hw_sema) {
		376	ret = __gk20a_init_hw_sema(ch);
		377	if (ret)
		378	return ERR_PTR(ret);
		379	}
160		380
161	s = kzalloc(sizeof(*s), GFP_KERNEL);	381	s = kzalloc(sizeof(*s), GFP_KERNEL);
162	if (!s)	382	if (!s)
163	return NULL;	383	return NULL;
164		384
165	s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE);	385	kref_init(&s->ref);
166	if (!s->offset) {	386	s->hw_sema = ch->hw_sema;
167	gk20a_err(dev_from_gk20a(pool->g),	387	atomic_set(&s->value, 0);
168	"failed to allocate semaphore");
169	kfree(s);
170	return NULL;
171	}
172		388
173	gk20a_semaphore_pool_get(pool);	389	/*
174	s->pool = pool;	390	* Take a ref on the pool so that we can keep this pool alive for
		391	* as long as this semaphore is alive.
		392	*/
		393	gk20a_semaphore_pool_get(s->hw_sema->p);
175		394
176	kref_init(&s->ref);
177	/* Initially acquired. */
178	gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0);
179	gk20a_dbg_info("created semaphore offset=%d, value=%d",
180	s->offset,
181	gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset));
182	return s;	395	return s;
183	}	396	}
184		397
@@ -187,8 +400,8 @@ static void gk20a_semaphore_free(struct kref *ref)
187	struct gk20a_semaphore *s =	400	struct gk20a_semaphore *s =
188	container_of(ref, struct gk20a_semaphore, ref);	401	container_of(ref, struct gk20a_semaphore, ref);
189		402
190	gk20a_bfree(&s->pool->alloc, s->offset);	403	gk20a_semaphore_pool_put(s->hw_sema->p);
191	gk20a_semaphore_pool_put(s->pool);	404
192	kfree(s);	405	kfree(s);
193	}	406	}
194		407