14 files changed, 27 insertions, 808 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index b4a1f6f4..2a9ad40d 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -34,7 +34,6 @@
 #include "fence_gk20a.h"
 #include "gr_gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 7afed41f..62b0a05e 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Graphics Copy Engine  (gr host)
 *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -28,7 +28,6 @@
 #include "gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index c8b1c105..3fa6bb25 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -28,13 +28,13 @@
 #include <linux/vmalloc.h>
 #include <linux/circ_buf.h>
-#include "debug_gk20a.h"
+#include <nvgpu/semaphore.h>
-#include "ctxsw_trace_gk20a.h"
 #include "gk20a.h"
+#include "debug_gk20a.h"
+#include "ctxsw_trace_gk20a.h"
 #include "dbg_gpu_gk20a.h"
 #include "fence_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/timers.h>
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index c3c6fbb8..0eba1c30 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -1,9 +1,7 @@
 /*
- * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c
- *
 * GK20A Channel Synchronization Abstraction
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -20,10 +18,11 @@
 #include <linux/list.h>
 #include <linux/version.h>
+#include <nvgpu/semaphore.h>
 #include "channel_sync_gk20a.h"
 #include "gk20a.h"
 #include "fence_gk20a.h"
-#include "semaphore_gk20a.h"
 #include "sync_gk20a.h"
 #include "mm_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 37ba720a..83fdc05d 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * drivers/video/tegra/host/t20/debug_gk20a.c
- *
 * Copyright (C) 2011-2017 NVIDIA Corporation.  All rights reserved.
 *
 * This software is licensed under the terms of the GNU General Public
@@ -20,12 +18,12 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/io.h>
+#include <nvgpu/semaphore.h>
 #include "gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index b8a1dcbc..6bd59067 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -17,8 +17,9 @@
 #include <linux/file.h>
 #include <linux/version.h>
+#include <nvgpu/semaphore.h>
 #include "gk20a.h"
-#include "semaphore_gk20a.h"
 #include "channel_gk20a.h"
 #include "sync_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 469148c2..c6b444f9 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -25,11 +25,11 @@
 #include <linux/nvhost.h>
 #include <nvgpu/timers.h>
+#include <nvgpu/semaphore.h>
 #include "gk20a.h"
 #include "debug_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cddb3316..0e1c88a4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -41,7 +41,6 @@
 #include "regops_gk20a.h"
 #include "dbg_gpu_gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 #include "platform_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ea5ea73f..cafb1233 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -33,13 +33,13 @@
 #include <nvgpu/timers.h>
 #include <nvgpu/allocator.h>
+#include <nvgpu/semaphore.h>
 #include <nvgpu/page_allocator.h>
 #include "gk20a.h"
 #include "mm_gk20a.h"
 #include "fence_gk20a.h"
 #include "kind_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index f3dffa46..d39ca2d0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -52,6 +52,12 @@ enum gk20a_aperture {
        APERTURE_VIDMEM
 };
+enum gk20a_mem_rw_flag {
+        gk20a_mem_flag_none = 0,
+        gk20a_mem_flag_read_only = 1,
+        gk20a_mem_flag_write_only = 2,
+};
 static inline const char *gk20a_aperture_str(enum gk20a_aperture aperture)
 {
        switch (aperture) {
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 9924e48f..d53cf09b 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -29,7 +29,6 @@
 #include "gk20a.h"
 #include "gr_gk20a.h"
-#include "semaphore_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pwr_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
deleted file mode 100644
index 2038e300..00000000
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * drivers/video/tegra/host/gk20a/semaphore_gk20a.c
- *
- * GK20A Semaphores
- *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#define pr_fmt(fmt) "gpu_sema: " fmt
-#include <linux/dma-mapping.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <asm/pgtable.h>
-#include "gk20a.h"
-#include "mm_gk20a.h"
-#include "semaphore_gk20a.h"
-#define __lock_sema_sea(s)                                              \
-        do {                                                            \
-                gpu_sema_verbose_dbg("Acquiring sema lock...");         \
-                mutex_lock(&s->sea_lock);                               \
-                gpu_sema_verbose_dbg("Sema lock aquried!");             \
-        } while (0)
-#define __unlock_sema_sea(s)                                            \
-        do {                                                            \
-                mutex_unlock(&s->sea_lock);                             \
-                gpu_sema_verbose_dbg("Released sema lock");             \
-        } while (0)
-/*
- * Return the sema_sea pointer.
- */
-struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g)
-{
-        return g->sema_sea;
-}
-static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
-{
-        int ret = 0;
-        struct gk20a *gk20a = sea->gk20a;
-        __lock_sema_sea(sea);
-        ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
-                                    PAGE_SIZE * SEMAPHORE_POOL_COUNT,
-                                    &sea->sea_mem);
-        if (ret)
-                goto out;
-        sea->ro_sg_table = sea->sea_mem.sgt;
-        sea->size = SEMAPHORE_POOL_COUNT;
-        sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
-out:
-        __unlock_sema_sea(sea);
-        return ret;
-}
-/*
- * Create the semaphore sea. Only create it once - subsequent calls to this will
- * return the originally created sea pointer.
- */
-struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
-{
-        if (g->sema_sea)
-                return g->sema_sea;
-        g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
-        if (!g->sema_sea)
-                return NULL;
-        g->sema_sea->size = 0;
-        g->sema_sea->page_count = 0;
-        g->sema_sea->gk20a = g;
-        INIT_LIST_HEAD(&g->sema_sea->pool_list);
-        mutex_init(&g->sema_sea->sea_lock);
-        if (__gk20a_semaphore_sea_grow(g->sema_sea))
-                goto cleanup;
-        gpu_sema_dbg("Created semaphore sea!");
-        return g->sema_sea;
-cleanup:
-        kfree(g->sema_sea);
-        g->sema_sea = NULL;
-        gpu_sema_dbg("Failed to creat semaphore sea!");
-        return NULL;
-}
-static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
-{
-        unsigned long idx = find_first_zero_bit(bitmap, len);
-        if (idx == len)
-                return -ENOSPC;
-        set_bit(idx, bitmap);
-        return (int)idx;
-}
-/*
- * Allocate a pool from the sea.
- */
-struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
-                                struct gk20a_semaphore_sea *sea)
-{
-        struct gk20a_semaphore_pool *p;
-        unsigned long page_idx;
-        int ret, err = 0;
-        p = kzalloc(sizeof(*p), GFP_KERNEL);
-        if (!p)
-                return ERR_PTR(-ENOMEM);
-        __lock_sema_sea(sea);
-        ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT);
-        if (ret < 0) {
-                err = ret;
-                goto fail;
-        }
-        page_idx = (unsigned long)ret;
-        p->page = sea->sea_mem.pages[page_idx];
-        p->ro_sg_table = sea->ro_sg_table;
-        p->page_idx = page_idx;
-        p->sema_sea = sea;
-        INIT_LIST_HEAD(&p->hw_semas);
-        kref_init(&p->ref);
-        mutex_init(&p->pool_lock);
-        sea->page_count++;
-        list_add(&p->pool_list_entry, &sea->pool_list);
-        __unlock_sema_sea(sea);
-        gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
-        return p;
-fail:
-        __unlock_sema_sea(sea);
-        kfree(p);
-        gpu_sema_dbg("Failed to allocate semaphore pool!");
-        return ERR_PTR(err);
-}
-/*
- * Map a pool into the passed vm's address space. This handles both the fixed
- * global RO mapping and the non-fixed private RW mapping.
- */
-int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
-                             struct vm_gk20a *vm)
-{
-        int ents, err = 0;
-        u64 addr;
-        gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
-        p->cpu_va = vmap(&p->page, 1, 0,
-                         pgprot_writecombine(PAGE_KERNEL));
-        gpu_sema_dbg("  %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
-        /* First do the RW mapping. */
-        p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
-        if (!p->rw_sg_table)
-                return -ENOMEM;
-        err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
-                                        PAGE_SIZE, GFP_KERNEL);
-        if (err) {
-                err = -ENOMEM;
-                goto fail;
-        }
-        /* Add IOMMU mapping... */
-        ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
-                          DMA_BIDIRECTIONAL);
-        if (ents != 1) {
-                err = -ENOMEM;
-                goto fail_free_sgt;
-        }
-        gpu_sema_dbg("  %d: DMA addr = 0x%pad", p->page_idx,
-                     &sg_dma_address(p->rw_sg_table->sgl));
-        /* Map into the GPU... Doesn't need to be fixed. */
-        p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
-                                   0, gk20a_mem_flag_none, false,
-                                   APERTURE_SYSMEM);
-        if (!p->gpu_va) {
-                err = -ENOMEM;
-                goto fail_unmap_sgt;
-        }
-        gpu_sema_dbg("  %d: GPU read-write VA = 0x%llx", p->page_idx,
-                     p->gpu_va);
-        /*
-         * And now the global mapping. Take the sea lock so that we don't race
-         * with a concurrent remap.
-         */
-        __lock_sema_sea(p->sema_sea);
-        BUG_ON(p->mapped);
-        addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
-                                    p->sema_sea->gpu_va, p->sema_sea->map_size,
-                                    0,
-                                    gk20a_mem_flag_read_only,
-                                    false,
-                                    APERTURE_SYSMEM);
-        if (!addr) {
-                err = -ENOMEM;
-                BUG();
-                goto fail_unlock;
-        }
-        p->gpu_va_ro = addr;
-        p->mapped = 1;
-        gpu_sema_dbg("  %d: GPU read-only  VA = 0x%llx", p->page_idx,
-                     p->gpu_va_ro);
-        __unlock_sema_sea(p->sema_sea);
-        return 0;
-fail_unlock:
-        __unlock_sema_sea(p->sema_sea);
-fail_unmap_sgt:
-        dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
-                     DMA_BIDIRECTIONAL);
-fail_free_sgt:
-        sg_free_table(p->rw_sg_table);
-fail:
-        kfree(p->rw_sg_table);
-        p->rw_sg_table = NULL;
-        gpu_sema_dbg("  %d: Failed to map semaphore pool!", p->page_idx);
-        return err;
-}
-/*
- * Unmap a semaphore_pool.
- */
-void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
-                                struct vm_gk20a *vm)
-{
-        struct gk20a_semaphore_int *hw_sema;
-        kunmap(p->cpu_va);
-        /* First the global RO mapping... */
-        __lock_sema_sea(p->sema_sea);
-        gk20a_gmmu_unmap(vm, p->gpu_va_ro,
-                         p->sema_sea->map_size, gk20a_mem_flag_none);
-        p->ro_sg_table = NULL;
-        __unlock_sema_sea(p->sema_sea);
-        /* And now the private RW mapping. */
-        gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
-        p->gpu_va = 0;
-        dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
-                     DMA_BIDIRECTIONAL);
-        sg_free_table(p->rw_sg_table);
-        kfree(p->rw_sg_table);
-        p->rw_sg_table = NULL;
-        list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
-                /*
-                 * Make sure the mem addresses are all NULL so if this gets
-                 * reused we will fault.
-                 */
-                hw_sema->value = NULL;
-        gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
-}
-/*
- * Completely free a sempahore_pool. You should make sure this pool is not
- * mapped otherwise there's going to be a memory leak.
- */
-static void gk20a_semaphore_pool_free(struct kref *ref)
-{
-        struct gk20a_semaphore_pool *p =
-                container_of(ref, struct gk20a_semaphore_pool, ref);
-        struct gk20a_semaphore_sea *s = p->sema_sea;
-        struct gk20a_semaphore_int *hw_sema, *tmp;
-        WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table);
-        __lock_sema_sea(s);
-        list_del(&p->pool_list_entry);
-        clear_bit(p->page_idx, s->pools_alloced);
-        s->page_count--;
-        __unlock_sema_sea(s);
-        list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
-                kfree(hw_sema);
-        gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
-        kfree(p);
-}
-void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
-{
-        kref_get(&p->ref);
-}
-void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
-{
-        kref_put(&p->ref, gk20a_semaphore_pool_free);
-}
-/*
- * Get the address for a semaphore_pool - if global is true then return the
- * global RO address instead of the RW address owned by the semaphore's VM.
- */
-u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
-{
-        if (!global)
-                return p->gpu_va;
-        return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
-}
-static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
-{
-        int hw_sema_idx;
-        int ret = 0;
-        struct gk20a_semaphore_int *hw_sema;
-        struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
-        BUG_ON(!p);
-        mutex_lock(&p->pool_lock);
-        /* Find an available HW semaphore. */
-        hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
-                                               PAGE_SIZE / SEMAPHORE_SIZE);
-        if (hw_sema_idx < 0) {
-                ret = hw_sema_idx;
-                goto fail;
-        }
-        hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
-        if (!hw_sema) {
-                ret = -ENOMEM;
-                goto fail_free_idx;
-        }
-        ch->hw_sema = hw_sema;
-        hw_sema->ch = ch;
-        hw_sema->p = p;
-        hw_sema->idx = hw_sema_idx;
-        hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
-        atomic_set(&hw_sema->next_value, 0);
-        hw_sema->value = p->cpu_va + hw_sema->offset;
-        writel(0, hw_sema->value);
-        list_add(&hw_sema->hw_sema_list, &p->hw_semas);
-        mutex_unlock(&p->pool_lock);
-        return 0;
-fail_free_idx:
-        clear_bit(hw_sema_idx, p->semas_alloced);
-fail:
-        mutex_unlock(&p->pool_lock);
-        return ret;
-}
-/*
- * Free the channel used semaphore index
- */
-void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch)
-{
-        struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
-        BUG_ON(!p);
-        mutex_lock(&p->pool_lock);
-        clear_bit(ch->hw_sema->idx, p->semas_alloced);
-        /* Make sure that when the ch is re-opened it will get a new HW sema. */
-        list_del(&ch->hw_sema->hw_sema_list);
-        kfree(ch->hw_sema);
-        ch->hw_sema = NULL;
-        mutex_unlock(&p->pool_lock);
-}
-/*
- * Allocate a semaphore from the passed pool.
- *
- * Since semaphores are ref-counted there's no explicit free for external code
- * to use. When the ref-count hits 0 the internal free will happen.
- */
-struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
-{
-        struct gk20a_semaphore *s;
-        int ret;
-        if (!ch->hw_sema) {
-                ret = __gk20a_init_hw_sema(ch);
-                if (ret)
-                        return NULL;
-        }
-        s = kzalloc(sizeof(*s), GFP_KERNEL);
-        if (!s)
-                return NULL;
-        kref_init(&s->ref);
-        s->hw_sema = ch->hw_sema;
-        atomic_set(&s->value, 0);
-        /*
-         * Take a ref on the pool so that we can keep this pool alive for
-         * as long as this semaphore is alive.
-         */
-        gk20a_semaphore_pool_get(s->hw_sema->p);
-        gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
-        return s;
-}
-static void gk20a_semaphore_free(struct kref *ref)
-{
-        struct gk20a_semaphore *s =
-                container_of(ref, struct gk20a_semaphore, ref);
-        gk20a_semaphore_pool_put(s->hw_sema->p);
-        kfree(s);
-}
-void gk20a_semaphore_put(struct gk20a_semaphore *s)
-{
-        kref_put(&s->ref, gk20a_semaphore_free);
-}
-void gk20a_semaphore_get(struct gk20a_semaphore *s)
-{
-        kref_get(&s->ref);
-}
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
deleted file mode 100644
index 8e09fcfc..00000000
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef SEMAPHORE_GK20A_H
-#define SEMAPHORE_GK20A_H
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/delay.h>
-#include <nvgpu/allocator.h>
-#include "gk20a.h"
-#include "mm_gk20a.h"
-#include "channel_gk20a.h"
-#define gpu_sema_dbg(fmt, args...)              \
-        gk20a_dbg(gpu_dbg_sema, fmt, ##args)
-#define gpu_sema_verbose_dbg(fmt, args...)      \
-        gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
-/*
- * Max number of channels that can be used is 512. This of course needs to be
- * fixed to be dynamic but still fast.
- */
-#define SEMAPHORE_POOL_COUNT            512
-#define SEMAPHORE_SIZE                  16
-#define SEMAPHORE_SEA_GROWTH_RATE       32
-struct gk20a_semaphore_sea;
-/*
- * Underlying semaphore data structure. This semaphore can be shared amongst
- * other semaphore instances.
- */
-struct gk20a_semaphore_int {
-        int idx;                        /* Semaphore index. */
-        u32 offset;                     /* Offset into the pool. */
-        atomic_t next_value;            /* Next available value. */
-        u32 *value;                     /* Current value (access w/ readl()). */
-        u32 nr_incrs;                   /* Number of increments programmed. */
-        struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */
-        struct channel_gk20a *ch;       /* Channel that owns this sema. */
-        struct list_head hw_sema_list;  /* List of HW semaphores. */
-};
-/*
- * A semaphore which the rest of the driver actually uses. This consists of a
- * pointer to a real semaphore and a value to wait for. This allows one physical
- * semaphore to be shared among an essentially infinite number of submits.
- */
-struct gk20a_semaphore {
-        struct gk20a_semaphore_int *hw_sema;
-        atomic_t value;
-        int incremented;
-        struct kref ref;
-};
-/*
- * A semaphore pool. Each address space will own exactly one of these.
- */
-struct gk20a_semaphore_pool {
-        struct page *page;                      /* This pool's page of memory */
-        struct list_head pool_list_entry;       /* Node for list of pools. */
-        void *cpu_va;                           /* CPU access to the pool. */
-        u64 gpu_va;                             /* GPU access to the pool. */
-        u64 gpu_va_ro;                          /* GPU access to the pool. */
-        int page_idx;                           /* Index into sea bitmap. */
-        struct list_head hw_semas;              /* List of HW semas. */
-        DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
-        struct gk20a_semaphore_sea *sema_sea;   /* Sea that owns this pool. */
-        struct mutex pool_lock;
-        /*
-         * This is the address spaces's personal RW table. Other channels will
-         * ultimately map this page as RO.
-         */
-        struct sg_table *rw_sg_table;
-        /*
-         * This is to keep track of whether the pool has had its sg_table
-         * updated during sea resizing.
-         */
-        struct sg_table *ro_sg_table;
-        int mapped;
-        /*
-         * Sometimes a channel can be released before other channels are
-         * done waiting on it. This ref count ensures that the pool doesn't
-         * go away until all semaphores using this pool are cleaned up first.
-         */
-        struct kref ref;
-};
-/*
- * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
- * channels can share a VM each channel gets it's own HW semaphore from the
- * pool. Channels then allocate regular semaphores - basically just a value that
- * signifies when a particular job is done.
- */
-struct gk20a_semaphore_sea {
-        struct list_head pool_list;     /* List of pools in this sea. */
-        struct gk20a *gk20a;
-        size_t size;                    /* Number of pages available. */
-        u64 gpu_va;                     /* GPU virtual address of sema sea. */
-        u64 map_size;                   /* Size of the mapping. */
-        /*
-         * TODO:
-         * List of pages that we use to back the pools. The number of pages
-         * can grow dynamically since allocating 512 pages for all channels at
-         * once would be a tremendous waste.
-         */
-        int page_count;                 /* Pages allocated to pools. */
-        struct sg_table *ro_sg_table;
-        /*
-        struct page *pages[SEMAPHORE_POOL_COUNT];
-        */
-        struct mem_desc sea_mem;
-        /*
-         * Can't use a regular allocator here since the full range of pools are
-         * not always allocated. Instead just use a bitmap.
-         */
-        DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
-        struct mutex sea_lock;          /* Lock alloc/free calls. */
-};
-enum gk20a_mem_rw_flag {
-        gk20a_mem_flag_none = 0,
-        gk20a_mem_flag_read_only = 1,
-        gk20a_mem_flag_write_only = 2,
-};
-/*
- * Semaphore sea functions.
- */
-struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
-int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
-                            struct vm_gk20a *vm);
-void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
-                               struct vm_gk20a *vm);
-struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
-/*
- * Semaphore pool functions.
- */
-struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
-        struct gk20a_semaphore_sea *sea);
-int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
-                             struct vm_gk20a *vm);
-void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
-                                struct vm_gk20a *vm);
-u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
-void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
-void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
-/*
- * Semaphore functions.
- */
-struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
-void gk20a_semaphore_put(struct gk20a_semaphore *s);
-void gk20a_semaphore_get(struct gk20a_semaphore *s);
-void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch);
-/*
- * Return the address of a specific semaphore.
- *
- * Don't call this on a semaphore you don't own - the VA returned will make no
- * sense in your specific channel's VM.
- */
-static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
-{
-        return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
-                s->hw_sema->offset;
-}
-/*
- * Get the global RO address for the semaphore. Can be called on any semaphore
- * regardless of whether you own it.
- */
-static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
-{
-        return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
-                s->hw_sema->offset;
-}
-static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
-{
-        return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
-                hw_sema->offset;
-}
-/*
- * TODO: handle wrap around... Hmm, how to do this?
- */
-static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
-{
-        u32 sema_val = readl(s->hw_sema->value);
-        /*
-         * If the underlying semaphore value is greater than or equal to
-         * the value of the semaphore then the semaphore has been signaled
-         * (a.k.a. released).
-         */
-        return (int)sema_val >= atomic_read(&s->value);
-}
-static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
-{
-        return !gk20a_semaphore_is_released(s);
-}
-/*
- * Read the underlying value from a semaphore.
- */
-static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
-{
-        return readl(s->hw_sema->value);
-}
-static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
-{
-        return (u32)atomic_read(&s->value);
-}
-static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
-{
-        return (u32)atomic_read(&s->hw_sema->next_value);
-}
-/*
- * If @force is set then this will not wait for the underlying semaphore to
- * catch up to the passed semaphore.
- */
-static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s,
-                                             bool force)
-{
-        u32 current_val;
-        u32 val = gk20a_semaphore_get_value(s);
-        int attempts = 0;
-        /*
-         * Wait until the sema value is 1 less than the write value. That
-         * way this function is essentially an increment.
-         *
-         * TODO: tune the wait a little better.
-         */
-        while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
-                if (force)
-                        break;
-                msleep(100);
-                attempts += 1;
-                if (attempts > 100) {
-                        WARN(1, "Stall on sema release!");
-                        return;
-                }
-        }
-        /*
-         * If the semaphore has already passed the value we would write then
-         * this is really just a NO-OP.
-         */
-        if (current_val >= val)
-                return;
-        writel(val, s->hw_sema->value);
-        gpu_sema_verbose_dbg("(c=%d) WRITE %u",
-                             s->hw_sema->ch->hw_chid, val);
-}
-static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
-{
-        __gk20a_semaphore_release(s, false);
-}
-/*
- * Configure a software based increment on this semaphore. This is useful for
- * when we want the GPU to wait on a SW event before processing a channel.
- * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
- * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
- * then allows the GPU to continue.
- *
- * Also used to prep a semaphore for an INCR by the GPU.
- */
-static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
-{
-        BUG_ON(s->incremented);
-        atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
-        s->incremented = 1;
-        gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
-                             s->hw_sema->ch->hw_chid,
-                             gk20a_semaphore_next_value(s));
-}
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index b642981c..e7bacac8 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Sync Framework Integration
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -13,8 +13,6 @@
 * more details.
 */
-#include "sync_gk20a.h"
 #include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/file.h>
@@ -23,9 +21,14 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <uapi/linux/nvgpu.h>
+#include <nvgpu/semaphore.h>
 #include "../drivers/staging/android/sync.h"
-#include "semaphore_gk20a.h"
+#include "sync_gk20a.h"
 static const struct sync_timeline_ops gk20a_sync_timeline_ops;