gpu: nvgpu: Organize semaphore_gk20a.[ch]

Move semaphore_gk20a.c drivers/gpu/nvgpu/common/ since the semaphore code is common to all chips. Move the semaphore_gk20a.h header file to drivers/gpu/nvgpu/include/nvgpu and rename it to semaphore.h. Also update all places where the header is inluced to use the new path. This revealed an odd location for the enum gk20a_mem_rw_flag. This should be in the mm headers. As a result many places that did not need anything semaphore related had to include the semaphore header file. Fixing this oddity allowed the semaphore include to be removed from many C files that did not need it. Bug 1799159 Change-Id: Ie017219acf34c4c481747323b9f3ac33e76e064c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1284627 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-01-12 21:50:34 -0500
committer: Varun Colbert <vcolbert@nvidia.com> 2017-02-13 21:14:45 -0500
commit: aa36d3786aeed6755b9744fed37aad000b582322 (patch)
tree: d68d71632a01062e00fc2b057c5a0c37dfda4fb8 /drivers/gpu/nvgpu/common/semaphore.c
parent: b9194a1c3300e505d22fba97136dd305300397f0 (diff)
1 files changed, 460 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
new file mode 100644
index 00000000..ea4910f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -0,0 +1,460 @@
+/*
+ * Nvgpu Semaphores
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) "gpu_sema: " fmt
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <nvgpu/semaphore.h>
+#define __lock_sema_sea(s)                                              \
+        do {                                                            \
+                gpu_sema_verbose_dbg("Acquiring sema lock...");         \
+                mutex_lock(&s->sea_lock);                               \
+                gpu_sema_verbose_dbg("Sema lock aquried!");             \
+        } while (0)
+#define __unlock_sema_sea(s)                                            \
+        do {                                                            \
+                mutex_unlock(&s->sea_lock);                             \
+                gpu_sema_verbose_dbg("Released sema lock");             \
+        } while (0)
+/*
+ * Return the sema_sea pointer.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g)
+{
+        return g->sema_sea;
+}
+static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
+{
+        int ret = 0;
+        struct gk20a *gk20a = sea->gk20a;
+        __lock_sema_sea(sea);
+        ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
+                                    PAGE_SIZE * SEMAPHORE_POOL_COUNT,
+                                    &sea->sea_mem);
+        if (ret)
+                goto out;
+        sea->ro_sg_table = sea->sea_mem.sgt;
+        sea->size = SEMAPHORE_POOL_COUNT;
+        sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
+out:
+        __unlock_sema_sea(sea);
+        return ret;
+}
+/*
+ * Create the semaphore sea. Only create it once - subsequent calls to this will
+ * return the originally created sea pointer.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
+{
+        if (g->sema_sea)
+                return g->sema_sea;
+        g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
+        if (!g->sema_sea)
+                return NULL;
+        g->sema_sea->size = 0;
+        g->sema_sea->page_count = 0;
+        g->sema_sea->gk20a = g;
+        INIT_LIST_HEAD(&g->sema_sea->pool_list);
+        mutex_init(&g->sema_sea->sea_lock);
+        if (__gk20a_semaphore_sea_grow(g->sema_sea))
+                goto cleanup;
+        gpu_sema_dbg("Created semaphore sea!");
+        return g->sema_sea;
+cleanup:
+        kfree(g->sema_sea);
+        g->sema_sea = NULL;
+        gpu_sema_dbg("Failed to creat semaphore sea!");
+        return NULL;
+}
+static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
+{
+        unsigned long idx = find_first_zero_bit(bitmap, len);
+        if (idx == len)
+                return -ENOSPC;
+        set_bit(idx, bitmap);
+        return (int)idx;
+}
+/*
+ * Allocate a pool from the sea.
+ */
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
+                                struct gk20a_semaphore_sea *sea)
+{
+        struct gk20a_semaphore_pool *p;
+        unsigned long page_idx;
+        int ret, err = 0;
+        p = kzalloc(sizeof(*p), GFP_KERNEL);
+        if (!p)
+                return ERR_PTR(-ENOMEM);
+        __lock_sema_sea(sea);
+        ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT);
+        if (ret < 0) {
+                err = ret;
+                goto fail;
+        }
+        page_idx = (unsigned long)ret;
+        p->page = sea->sea_mem.pages[page_idx];
+        p->ro_sg_table = sea->ro_sg_table;
+        p->page_idx = page_idx;
+        p->sema_sea = sea;
+        INIT_LIST_HEAD(&p->hw_semas);
+        kref_init(&p->ref);
+        mutex_init(&p->pool_lock);
+        sea->page_count++;
+        list_add(&p->pool_list_entry, &sea->pool_list);
+        __unlock_sema_sea(sea);
+        gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
+        return p;
+fail:
+        __unlock_sema_sea(sea);
+        kfree(p);
+        gpu_sema_dbg("Failed to allocate semaphore pool!");
+        return ERR_PTR(err);
+}
+/*
+ * Map a pool into the passed vm's address space. This handles both the fixed
+ * global RO mapping and the non-fixed private RW mapping.
+ */
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
+                             struct vm_gk20a *vm)
+{
+        int ents, err = 0;
+        u64 addr;
+        gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
+        p->cpu_va = vmap(&p->page, 1, 0,
+                         pgprot_writecombine(PAGE_KERNEL));
+        gpu_sema_dbg("  %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
+        /* First do the RW mapping. */
+        p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
+        if (!p->rw_sg_table)
+                return -ENOMEM;
+        err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
+                                        PAGE_SIZE, GFP_KERNEL);
+        if (err) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        /* Add IOMMU mapping... */
+        ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+                          DMA_BIDIRECTIONAL);
+        if (ents != 1) {
+                err = -ENOMEM;
+                goto fail_free_sgt;
+        }
+        gpu_sema_dbg("  %d: DMA addr = 0x%pad", p->page_idx,
+                     &sg_dma_address(p->rw_sg_table->sgl));
+        /* Map into the GPU... Doesn't need to be fixed. */
+        p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
+                                   0, gk20a_mem_flag_none, false,
+                                   APERTURE_SYSMEM);
+        if (!p->gpu_va) {
+                err = -ENOMEM;
+                goto fail_unmap_sgt;
+        }
+        gpu_sema_dbg("  %d: GPU read-write VA = 0x%llx", p->page_idx,
+                     p->gpu_va);
+        /*
+         * And now the global mapping. Take the sea lock so that we don't race
+         * with a concurrent remap.
+         */
+        __lock_sema_sea(p->sema_sea);
+        BUG_ON(p->mapped);
+        addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
+                                    p->sema_sea->gpu_va, p->sema_sea->map_size,
+                                    0,
+                                    gk20a_mem_flag_read_only,
+                                    false,
+                                    APERTURE_SYSMEM);
+        if (!addr) {
+                err = -ENOMEM;
+                BUG();
+                goto fail_unlock;
+        }
+        p->gpu_va_ro = addr;
+        p->mapped = 1;
+        gpu_sema_dbg("  %d: GPU read-only  VA = 0x%llx", p->page_idx,
+                     p->gpu_va_ro);
+        __unlock_sema_sea(p->sema_sea);
+        return 0;
+fail_unlock:
+        __unlock_sema_sea(p->sema_sea);
+fail_unmap_sgt:
+        dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+                     DMA_BIDIRECTIONAL);
+fail_free_sgt:
+        sg_free_table(p->rw_sg_table);
+fail:
+        kfree(p->rw_sg_table);
+        p->rw_sg_table = NULL;
+        gpu_sema_dbg("  %d: Failed to map semaphore pool!", p->page_idx);
+        return err;
+}
+/*
+ * Unmap a semaphore_pool.
+ */
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
+                                struct vm_gk20a *vm)
+{
+        struct gk20a_semaphore_int *hw_sema;
+        kunmap(p->cpu_va);
+        /* First the global RO mapping... */
+        __lock_sema_sea(p->sema_sea);
+        gk20a_gmmu_unmap(vm, p->gpu_va_ro,
+                         p->sema_sea->map_size, gk20a_mem_flag_none);
+        p->ro_sg_table = NULL;
+        __unlock_sema_sea(p->sema_sea);
+        /* And now the private RW mapping. */
+        gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
+        p->gpu_va = 0;
+        dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+                     DMA_BIDIRECTIONAL);
+        sg_free_table(p->rw_sg_table);
+        kfree(p->rw_sg_table);
+        p->rw_sg_table = NULL;
+        list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
+                /*
+                 * Make sure the mem addresses are all NULL so if this gets
+                 * reused we will fault.
+                 */
+                hw_sema->value = NULL;
+        gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
+}
+/*
+ * Completely free a sempahore_pool. You should make sure this pool is not
+ * mapped otherwise there's going to be a memory leak.
+ */
+static void gk20a_semaphore_pool_free(struct kref *ref)
+{
+        struct gk20a_semaphore_pool *p =
+                container_of(ref, struct gk20a_semaphore_pool, ref);
+        struct gk20a_semaphore_sea *s = p->sema_sea;
+        struct gk20a_semaphore_int *hw_sema, *tmp;
+        WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table);
+        __lock_sema_sea(s);
+        list_del(&p->pool_list_entry);
+        clear_bit(p->page_idx, s->pools_alloced);
+        s->page_count--;
+        __unlock_sema_sea(s);
+        list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
+                kfree(hw_sema);
+        gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
+        kfree(p);
+}
+void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
+{
+        kref_get(&p->ref);
+}
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
+{
+        kref_put(&p->ref, gk20a_semaphore_pool_free);
+}
+/*
+ * Get the address for a semaphore_pool - if global is true then return the
+ * global RO address instead of the RW address owned by the semaphore's VM.
+ */
+u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
+{
+        if (!global)
+                return p->gpu_va;
+        return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
+}
+static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
+{
+        int hw_sema_idx;
+        int ret = 0;
+        struct gk20a_semaphore_int *hw_sema;
+        struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
+        BUG_ON(!p);
+        mutex_lock(&p->pool_lock);
+        /* Find an available HW semaphore. */
+        hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
+                                               PAGE_SIZE / SEMAPHORE_SIZE);
+        if (hw_sema_idx < 0) {
+                ret = hw_sema_idx;
+                goto fail;
+        }
+        hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
+        if (!hw_sema) {
+                ret = -ENOMEM;
+                goto fail_free_idx;
+        }
+        ch->hw_sema = hw_sema;
+        hw_sema->ch = ch;
+        hw_sema->p = p;
+        hw_sema->idx = hw_sema_idx;
+        hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
+        atomic_set(&hw_sema->next_value, 0);
+        hw_sema->value = p->cpu_va + hw_sema->offset;
+        writel(0, hw_sema->value);
+        list_add(&hw_sema->hw_sema_list, &p->hw_semas);
+        mutex_unlock(&p->pool_lock);
+        return 0;
+fail_free_idx:
+        clear_bit(hw_sema_idx, p->semas_alloced);
+fail:
+        mutex_unlock(&p->pool_lock);
+        return ret;
+}
+/*
+ * Free the channel used semaphore index
+ */
+void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch)
+{
+        struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
+        BUG_ON(!p);
+        mutex_lock(&p->pool_lock);
+        clear_bit(ch->hw_sema->idx, p->semas_alloced);
+        /* Make sure that when the ch is re-opened it will get a new HW sema. */
+        list_del(&ch->hw_sema->hw_sema_list);
+        kfree(ch->hw_sema);
+        ch->hw_sema = NULL;
+        mutex_unlock(&p->pool_lock);
+}
+/*
+ * Allocate a semaphore from the passed pool.
+ *
+ * Since semaphores are ref-counted there's no explicit free for external code
+ * to use. When the ref-count hits 0 the internal free will happen.
+ */
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
+{
+        struct gk20a_semaphore *s;
+        int ret;
+        if (!ch->hw_sema) {
+                ret = __gk20a_init_hw_sema(ch);
+                if (ret)
+                        return NULL;
+        }
+        s = kzalloc(sizeof(*s), GFP_KERNEL);
+        if (!s)
+                return NULL;
+        kref_init(&s->ref);
+        s->hw_sema = ch->hw_sema;
+        atomic_set(&s->value, 0);
+        /*
+         * Take a ref on the pool so that we can keep this pool alive for
+         * as long as this semaphore is alive.
+         */
+        gk20a_semaphore_pool_get(s->hw_sema->p);
+        gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
+        return s;
+}
+static void gk20a_semaphore_free(struct kref *ref)
+{
+        struct gk20a_semaphore *s =
+                container_of(ref, struct gk20a_semaphore, ref);
+        gk20a_semaphore_pool_put(s->hw_sema->p);
+        kfree(s);
+}
+void gk20a_semaphore_put(struct gk20a_semaphore *s)
+{
+        kref_put(&s->ref, gk20a_semaphore_free);
+}
+void gk20a_semaphore_get(struct gk20a_semaphore *s)
+{
+        kref_get(&s->ref);
+}
author	Alex Waterman <alexw@nvidia.com>	2017-01-12 21:50:34 -0500
committer	Varun Colbert <vcolbert@nvidia.com>	2017-02-13 21:14:45 -0500
commit	aa36d3786aeed6755b9744fed37aad000b582322 (patch)
tree	d68d71632a01062e00fc2b057c5a0c37dfda4fb8 /drivers/gpu/nvgpu/common/semaphore.c
parent	b9194a1c3300e505d22fba97136dd305300397f0 (diff)

diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c new file mode 100644 index 00000000..ea4910f1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -0,0 +1,460 @@
	1	/*
	2	* Nvgpu Semaphores
	3	*
	4	* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*/
	15
	16	#define pr_fmt(fmt) "gpu_sema: " fmt
	17
	18	#include <linux/dma-mapping.h>
	19	#include <linux/highmem.h>
	20	#include <linux/slab.h>
	21
	22	#include <nvgpu/semaphore.h>
	23
	24	#define __lock_sema_sea(s) \
	25	do { \
	26	gpu_sema_verbose_dbg("Acquiring sema lock..."); \
	27	mutex_lock(&s->sea_lock); \
	28	gpu_sema_verbose_dbg("Sema lock aquried!"); \
	29	} while (0)
	30
	31	#define __unlock_sema_sea(s) \
	32	do { \
	33	mutex_unlock(&s->sea_lock); \
	34	gpu_sema_verbose_dbg("Released sema lock"); \
	35	} while (0)
	36
	37	/*
	38	* Return the sema_sea pointer.
	39	*/
	40	struct gk20a_semaphore_sea gk20a_semaphore_get_sea(struct gk20a g)
	41	{
	42	return g->sema_sea;
	43	}
	44
	45	static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
	46	{
	47	int ret = 0;
	48	struct gk20a *gk20a = sea->gk20a;
	49
	50	__lock_sema_sea(sea);
	51
	52	ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
	53	PAGE_SIZE * SEMAPHORE_POOL_COUNT,
	54	&sea->sea_mem);
	55	if (ret)
	56	goto out;
	57
	58	sea->ro_sg_table = sea->sea_mem.sgt;
	59	sea->size = SEMAPHORE_POOL_COUNT;
	60	sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
	61
	62	out:
	63	__unlock_sema_sea(sea);
	64	return ret;
	65	}
	66
	67	/*
	68	* Create the semaphore sea. Only create it once - subsequent calls to this will
	69	* return the originally created sea pointer.
	70	*/
	71	struct gk20a_semaphore_sea gk20a_semaphore_sea_create(struct gk20a g)
	72	{
	73	if (g->sema_sea)
	74	return g->sema_sea;
	75
	76	g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
	77	if (!g->sema_sea)
	78	return NULL;
	79
	80	g->sema_sea->size = 0;
	81	g->sema_sea->page_count = 0;
	82	g->sema_sea->gk20a = g;
	83	INIT_LIST_HEAD(&g->sema_sea->pool_list);
	84	mutex_init(&g->sema_sea->sea_lock);
	85
	86	if (__gk20a_semaphore_sea_grow(g->sema_sea))
	87	goto cleanup;
	88
	89	gpu_sema_dbg("Created semaphore sea!");
	90	return g->sema_sea;
	91
	92	cleanup:
	93	kfree(g->sema_sea);
	94	g->sema_sea = NULL;
	95	gpu_sema_dbg("Failed to creat semaphore sea!");
	96	return NULL;
	97	}
	98
	99	static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
	100	{
	101	unsigned long idx = find_first_zero_bit(bitmap, len);
	102
	103	if (idx == len)
	104	return -ENOSPC;
	105
	106	set_bit(idx, bitmap);
	107
	108	return (int)idx;
	109	}
	110
	111	/*
	112	* Allocate a pool from the sea.
	113	*/
	114	struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
	115	struct gk20a_semaphore_sea *sea)
	116	{
	117	struct gk20a_semaphore_pool *p;
	118	unsigned long page_idx;
	119	int ret, err = 0;
	120
	121	p = kzalloc(sizeof(*p), GFP_KERNEL);
	122	if (!p)
	123	return ERR_PTR(-ENOMEM);
	124
	125	__lock_sema_sea(sea);
	126
	127	ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT);
	128	if (ret < 0) {
	129	err = ret;
	130	goto fail;
	131	}
	132
	133	page_idx = (unsigned long)ret;
	134
	135	p->page = sea->sea_mem.pages[page_idx];
	136	p->ro_sg_table = sea->ro_sg_table;
	137	p->page_idx = page_idx;
	138	p->sema_sea = sea;
	139	INIT_LIST_HEAD(&p->hw_semas);
	140	kref_init(&p->ref);
	141	mutex_init(&p->pool_lock);
	142
	143	sea->page_count++;
	144	list_add(&p->pool_list_entry, &sea->pool_list);
	145	__unlock_sema_sea(sea);
	146
	147	gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
	148
	149	return p;
	150
	151	fail:
	152	__unlock_sema_sea(sea);
	153	kfree(p);
	154	gpu_sema_dbg("Failed to allocate semaphore pool!");
	155	return ERR_PTR(err);
	156	}
	157
	158	/*
	159	* Map a pool into the passed vm's address space. This handles both the fixed
	160	* global RO mapping and the non-fixed private RW mapping.
	161	*/
	162	int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
	163	struct vm_gk20a *vm)
	164	{
	165	int ents, err = 0;
	166	u64 addr;
	167
	168	gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
	169
	170	p->cpu_va = vmap(&p->page, 1, 0,
	171	pgprot_writecombine(PAGE_KERNEL));
	172
	173	gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
	174
	175	/* First do the RW mapping. */
	176	p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
	177	if (!p->rw_sg_table)
	178	return -ENOMEM;
	179
	180	err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
	181	PAGE_SIZE, GFP_KERNEL);
	182	if (err) {
	183	err = -ENOMEM;
	184	goto fail;
	185	}
	186
	187	/* Add IOMMU mapping... */
	188	ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
	189	DMA_BIDIRECTIONAL);
	190	if (ents != 1) {
	191	err = -ENOMEM;
	192	goto fail_free_sgt;
	193	}
	194
	195	gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx,
	196	&sg_dma_address(p->rw_sg_table->sgl));
	197
	198	/* Map into the GPU... Doesn't need to be fixed. */
	199	p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
	200	0, gk20a_mem_flag_none, false,
	201	APERTURE_SYSMEM);
	202	if (!p->gpu_va) {
	203	err = -ENOMEM;
	204	goto fail_unmap_sgt;
	205	}
	206
	207	gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx,
	208	p->gpu_va);
	209
	210	/*
	211	* And now the global mapping. Take the sea lock so that we don't race
	212	* with a concurrent remap.
	213	*/
	214	__lock_sema_sea(p->sema_sea);
	215
	216	BUG_ON(p->mapped);
	217	addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
	218	p->sema_sea->gpu_va, p->sema_sea->map_size,
	219	0,
	220	gk20a_mem_flag_read_only,
	221	false,
	222	APERTURE_SYSMEM);
	223	if (!addr) {
	224	err = -ENOMEM;
	225	BUG();
	226	goto fail_unlock;
	227	}
	228	p->gpu_va_ro = addr;
	229	p->mapped = 1;
	230
	231	gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx,
	232	p->gpu_va_ro);
	233
	234	__unlock_sema_sea(p->sema_sea);
	235
	236	return 0;
	237
	238	fail_unlock:
	239	__unlock_sema_sea(p->sema_sea);
	240	fail_unmap_sgt:
	241	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
	242	DMA_BIDIRECTIONAL);
	243	fail_free_sgt:
	244	sg_free_table(p->rw_sg_table);
	245	fail:
	246	kfree(p->rw_sg_table);
	247	p->rw_sg_table = NULL;
	248	gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx);
	249	return err;
	250	}
	251
	252	/*
	253	* Unmap a semaphore_pool.
	254	*/
	255	void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
	256	struct vm_gk20a *vm)
	257	{
	258	struct gk20a_semaphore_int *hw_sema;
	259
	260	kunmap(p->cpu_va);
	261
	262	/* First the global RO mapping... */
	263	__lock_sema_sea(p->sema_sea);
	264	gk20a_gmmu_unmap(vm, p->gpu_va_ro,
	265	p->sema_sea->map_size, gk20a_mem_flag_none);
	266	p->ro_sg_table = NULL;
	267	__unlock_sema_sea(p->sema_sea);
	268
	269	/* And now the private RW mapping. */
	270	gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
	271	p->gpu_va = 0;
	272
	273	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
	274	DMA_BIDIRECTIONAL);
	275
	276	sg_free_table(p->rw_sg_table);
	277	kfree(p->rw_sg_table);
	278	p->rw_sg_table = NULL;
	279
	280	list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
	281	/*
	282	* Make sure the mem addresses are all NULL so if this gets
	283	* reused we will fault.
	284	*/
	285	hw_sema->value = NULL;
	286
	287	gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
	288	}
	289
	290	/*
	291	* Completely free a sempahore_pool. You should make sure this pool is not
	292	* mapped otherwise there's going to be a memory leak.
	293	*/
	294	static void gk20a_semaphore_pool_free(struct kref *ref)
	295	{
	296	struct gk20a_semaphore_pool *p =
	297	container_of(ref, struct gk20a_semaphore_pool, ref);
	298	struct gk20a_semaphore_sea *s = p->sema_sea;
	299	struct gk20a_semaphore_int hw_sema, tmp;
	300
	301	WARN_ON(p->gpu_va \|\| p->rw_sg_table \|\| p->ro_sg_table);
	302
	303	__lock_sema_sea(s);
	304	list_del(&p->pool_list_entry);
	305	clear_bit(p->page_idx, s->pools_alloced);
	306	s->page_count--;
	307	__unlock_sema_sea(s);
	308
	309	list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
	310	kfree(hw_sema);
	311
	312	gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
	313	kfree(p);
	314	}
	315
	316	void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
	317	{
	318	kref_get(&p->ref);
	319	}
	320
	321	void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
	322	{
	323	kref_put(&p->ref, gk20a_semaphore_pool_free);
	324	}
	325
	326	/*
	327	* Get the address for a semaphore_pool - if global is true then return the
	328	* global RO address instead of the RW address owned by the semaphore's VM.
	329	*/
	330	u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
	331	{
	332	if (!global)
	333	return p->gpu_va;
	334
	335	return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
	336	}
	337
	338	static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
	339	{
	340	int hw_sema_idx;
	341	int ret = 0;
	342	struct gk20a_semaphore_int *hw_sema;
	343	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
	344
	345	BUG_ON(!p);
	346
	347	mutex_lock(&p->pool_lock);
	348
	349	/* Find an available HW semaphore. */
	350	hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
	351	PAGE_SIZE / SEMAPHORE_SIZE);
	352	if (hw_sema_idx < 0) {
	353	ret = hw_sema_idx;
	354	goto fail;
	355	}
	356
	357	hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
	358	if (!hw_sema) {
	359	ret = -ENOMEM;
	360	goto fail_free_idx;
	361	}
	362
	363	ch->hw_sema = hw_sema;
	364	hw_sema->ch = ch;
	365	hw_sema->p = p;
	366	hw_sema->idx = hw_sema_idx;
	367	hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
	368	atomic_set(&hw_sema->next_value, 0);
	369	hw_sema->value = p->cpu_va + hw_sema->offset;
	370	writel(0, hw_sema->value);
	371
	372	list_add(&hw_sema->hw_sema_list, &p->hw_semas);
	373
	374	mutex_unlock(&p->pool_lock);
	375
	376	return 0;
	377
	378	fail_free_idx:
	379	clear_bit(hw_sema_idx, p->semas_alloced);
	380	fail:
	381	mutex_unlock(&p->pool_lock);
	382	return ret;
	383	}
	384
	385	/*
	386	* Free the channel used semaphore index
	387	*/
	388	void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch)
	389	{
	390	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
	391
	392	BUG_ON(!p);
	393
	394	mutex_lock(&p->pool_lock);
	395
	396	clear_bit(ch->hw_sema->idx, p->semas_alloced);
	397
	398	/* Make sure that when the ch is re-opened it will get a new HW sema. */
	399	list_del(&ch->hw_sema->hw_sema_list);
	400	kfree(ch->hw_sema);
	401	ch->hw_sema = NULL;
	402
	403	mutex_unlock(&p->pool_lock);
	404	}
	405
	406	/*
	407	* Allocate a semaphore from the passed pool.
	408	*
	409	* Since semaphores are ref-counted there's no explicit free for external code
	410	* to use. When the ref-count hits 0 the internal free will happen.
	411	*/
	412	struct gk20a_semaphore gk20a_semaphore_alloc(struct channel_gk20a ch)
	413	{
	414	struct gk20a_semaphore *s;
	415	int ret;
	416
	417	if (!ch->hw_sema) {
	418	ret = __gk20a_init_hw_sema(ch);
	419	if (ret)
	420	return NULL;
	421	}
	422
	423	s = kzalloc(sizeof(*s), GFP_KERNEL);
	424	if (!s)
	425	return NULL;
	426
	427	kref_init(&s->ref);
	428	s->hw_sema = ch->hw_sema;
	429	atomic_set(&s->value, 0);
	430
	431	/*
	432	* Take a ref on the pool so that we can keep this pool alive for
	433	* as long as this semaphore is alive.
	434	*/
	435	gk20a_semaphore_pool_get(s->hw_sema->p);
	436
	437	gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
	438
	439	return s;
	440	}
	441
	442	static void gk20a_semaphore_free(struct kref *ref)
	443	{
	444	struct gk20a_semaphore *s =
	445	container_of(ref, struct gk20a_semaphore, ref);
	446
	447	gk20a_semaphore_pool_put(s->hw_sema->p);
	448
	449	kfree(s);
	450	}
	451
	452	void gk20a_semaphore_put(struct gk20a_semaphore *s)
	453	{
	454	kref_put(&s->ref, gk20a_semaphore_free);
	455	}
	456
	457	void gk20a_semaphore_get(struct gk20a_semaphore *s)
	458	{
	459	kref_get(&s->ref);
	460	}