From aa36d3786aeed6755b9744fed37aad000b582322 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Thu, 12 Jan 2017 18:50:34 -0800
Subject: gpu: nvgpu: Organize semaphore_gk20a.[ch]

Move semaphore_gk20a.c drivers/gpu/nvgpu/common/ since the semaphore
code is common to all chips.

Move the semaphore_gk20a.h header file to drivers/gpu/nvgpu/include/nvgpu
and rename it to semaphore.h. Also update all places where the header
is inluced to use the new path.

This revealed an odd location for the enum gk20a_mem_rw_flag. This should
be in the mm headers. As a result many places that did not need anything
semaphore related had to include the semaphore header file. Fixing this
oddity allowed the semaphore include to be removed from many C files that
did not need it.

Bug 1799159

Change-Id: Ie017219acf34c4c481747323b9f3ac33e76e064c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1284627
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu             |   2 +-
 drivers/gpu/nvgpu/common/semaphore.c         | 460 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c          |   1 -
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c          |   3 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c      |   6 +-
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c |   7 +-
 drivers/gpu/nvgpu/gk20a/debug_gk20a.c        |   6 +-
 drivers/gpu/nvgpu/gk20a/fence_gk20a.c        |   5 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c         |   2 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c           |   1 -
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c           |   2 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h           |   6 +
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c          |   1 -
 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c    | 466 ---------------------------
 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h    | 318 ------------------
 drivers/gpu/nvgpu/gk20a/sync_gk20a.c         |  11 +-
 drivers/gpu/nvgpu/gm206/acr_gm206.c          |   1 -
 drivers/gpu/nvgpu/gm206/ce_gm206.c           |   3 +-
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c          |   1 -
 drivers/gpu/nvgpu/gp106/acr_gp106.c          |   1 -
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c           |   1 -
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c           |   3 +-
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h  | 312 ++++++++++++++++++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c |   3 +-
 drivers/gpu/nvgpu/vgpu/mm_vgpu.c             |   3 +-
 25 files changed, 804 insertions(+), 821 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/semaphore.c
 delete mode 100644 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
 delete mode 100644 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/semaphore.h

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 0f8f5bc1..5c2bbb79 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -32,6 +32,7 @@ nvgpu-y := \
 	common/mm/page_allocator.o \
 	common/mm/lockless_allocator.o \
 	common/nvgpu_common.o \
+	common/semaphore.o \
 	gk20a/gk20a.o \
 	gk20a/sched_gk20a.o \
 	gk20a/as_gk20a.o \
@@ -48,7 +49,6 @@ nvgpu-y := \
 	gk20a/mm_gk20a.o \
 	gk20a/pmu_gk20a.o \
 	gk20a/priv_ring_gk20a.o \
-	gk20a/semaphore_gk20a.o \
 	gk20a/fence_gk20a.o \
 	gk20a/therm_gk20a.o \
 	gk20a/gr_ctx_gk20a_sim.o \
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
new file mode 100644
index 00000000..ea4910f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -0,0 +1,460 @@
+/*
+ * Nvgpu Semaphores
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt) "gpu_sema: " fmt
+
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+
+#include <nvgpu/semaphore.h>
+
+#define __lock_sema_sea(s)						\
+	do {								\
+		gpu_sema_verbose_dbg("Acquiring sema lock...");		\
+		mutex_lock(&s->sea_lock);				\
+		gpu_sema_verbose_dbg("Sema lock aquried!");		\
+	} while (0)
+
+#define __unlock_sema_sea(s)						\
+	do {								\
+		mutex_unlock(&s->sea_lock);				\
+		gpu_sema_verbose_dbg("Released sema lock");		\
+	} while (0)
+
+/*
+ * Return the sema_sea pointer.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g)
+{
+	return g->sema_sea;
+}
+
+static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
+{
+	int ret = 0;
+	struct gk20a *gk20a = sea->gk20a;
+
+	__lock_sema_sea(sea);
+
+	ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
+				    PAGE_SIZE * SEMAPHORE_POOL_COUNT,
+				    &sea->sea_mem);
+	if (ret)
+		goto out;
+
+	sea->ro_sg_table = sea->sea_mem.sgt;
+	sea->size = SEMAPHORE_POOL_COUNT;
+	sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
+
+out:
+	__unlock_sema_sea(sea);
+	return ret;
+}
+
+/*
+ * Create the semaphore sea. Only create it once - subsequent calls to this will
+ * return the originally created sea pointer.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
+{
+	if (g->sema_sea)
+		return g->sema_sea;
+
+	g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
+	if (!g->sema_sea)
+		return NULL;
+
+	g->sema_sea->size = 0;
+	g->sema_sea->page_count = 0;
+	g->sema_sea->gk20a = g;
+	INIT_LIST_HEAD(&g->sema_sea->pool_list);
+	mutex_init(&g->sema_sea->sea_lock);
+
+	if (__gk20a_semaphore_sea_grow(g->sema_sea))
+		goto cleanup;
+
+	gpu_sema_dbg("Created semaphore sea!");
+	return g->sema_sea;
+
+cleanup:
+	kfree(g->sema_sea);
+	g->sema_sea = NULL;
+	gpu_sema_dbg("Failed to creat semaphore sea!");
+	return NULL;
+}
+
+static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
+{
+	unsigned long idx = find_first_zero_bit(bitmap, len);
+
+	if (idx == len)
+		return -ENOSPC;
+
+	set_bit(idx, bitmap);
+
+	return (int)idx;
+}
+
+/*
+ * Allocate a pool from the sea.
+ */
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
+				struct gk20a_semaphore_sea *sea)
+{
+	struct gk20a_semaphore_pool *p;
+	unsigned long page_idx;
+	int ret, err = 0;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	__lock_sema_sea(sea);
+
+	ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT);
+	if (ret < 0) {
+		err = ret;
+		goto fail;
+	}
+
+	page_idx = (unsigned long)ret;
+
+	p->page = sea->sea_mem.pages[page_idx];
+	p->ro_sg_table = sea->ro_sg_table;
+	p->page_idx = page_idx;
+	p->sema_sea = sea;
+	INIT_LIST_HEAD(&p->hw_semas);
+	kref_init(&p->ref);
+	mutex_init(&p->pool_lock);
+
+	sea->page_count++;
+	list_add(&p->pool_list_entry, &sea->pool_list);
+	__unlock_sema_sea(sea);
+
+	gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
+
+	return p;
+
+fail:
+	__unlock_sema_sea(sea);
+	kfree(p);
+	gpu_sema_dbg("Failed to allocate semaphore pool!");
+	return ERR_PTR(err);
+}
+
+/*
+ * Map a pool into the passed vm's address space. This handles both the fixed
+ * global RO mapping and the non-fixed private RW mapping.
+ */
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
+			     struct vm_gk20a *vm)
+{
+	int ents, err = 0;
+	u64 addr;
+
+	gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
+
+	p->cpu_va = vmap(&p->page, 1, 0,
+			 pgprot_writecombine(PAGE_KERNEL));
+
+	gpu_sema_dbg("  %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
+
+	/* First do the RW mapping. */
+	p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
+	if (!p->rw_sg_table)
+		return -ENOMEM;
+
+	err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
+					PAGE_SIZE, GFP_KERNEL);
+	if (err) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	/* Add IOMMU mapping... */
+	ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+			  DMA_BIDIRECTIONAL);
+	if (ents != 1) {
+		err = -ENOMEM;
+		goto fail_free_sgt;
+	}
+
+	gpu_sema_dbg("  %d: DMA addr = 0x%pad", p->page_idx,
+		     &sg_dma_address(p->rw_sg_table->sgl));
+
+	/* Map into the GPU... Doesn't need to be fixed. */
+	p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
+				   0, gk20a_mem_flag_none, false,
+				   APERTURE_SYSMEM);
+	if (!p->gpu_va) {
+		err = -ENOMEM;
+		goto fail_unmap_sgt;
+	}
+
+	gpu_sema_dbg("  %d: GPU read-write VA = 0x%llx", p->page_idx,
+		     p->gpu_va);
+
+	/*
+	 * And now the global mapping. Take the sea lock so that we don't race
+	 * with a concurrent remap.
+	 */
+	__lock_sema_sea(p->sema_sea);
+
+	BUG_ON(p->mapped);
+	addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
+				    p->sema_sea->gpu_va, p->sema_sea->map_size,
+				    0,
+				    gk20a_mem_flag_read_only,
+				    false,
+				    APERTURE_SYSMEM);
+	if (!addr) {
+		err = -ENOMEM;
+		BUG();
+		goto fail_unlock;
+	}
+	p->gpu_va_ro = addr;
+	p->mapped = 1;
+
+	gpu_sema_dbg("  %d: GPU read-only  VA = 0x%llx", p->page_idx,
+		     p->gpu_va_ro);
+
+	__unlock_sema_sea(p->sema_sea);
+
+	return 0;
+
+fail_unlock:
+	__unlock_sema_sea(p->sema_sea);
+fail_unmap_sgt:
+	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+		     DMA_BIDIRECTIONAL);
+fail_free_sgt:
+	sg_free_table(p->rw_sg_table);
+fail:
+	kfree(p->rw_sg_table);
+	p->rw_sg_table = NULL;
+	gpu_sema_dbg("  %d: Failed to map semaphore pool!", p->page_idx);
+	return err;
+}
+
+/*
+ * Unmap a semaphore_pool.
+ */
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
+				struct vm_gk20a *vm)
+{
+	struct gk20a_semaphore_int *hw_sema;
+
+	kunmap(p->cpu_va);
+
+	/* First the global RO mapping... */
+	__lock_sema_sea(p->sema_sea);
+	gk20a_gmmu_unmap(vm, p->gpu_va_ro,
+			 p->sema_sea->map_size, gk20a_mem_flag_none);
+	p->ro_sg_table = NULL;
+	__unlock_sema_sea(p->sema_sea);
+
+	/* And now the private RW mapping. */
+	gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
+	p->gpu_va = 0;
+
+	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
+		     DMA_BIDIRECTIONAL);
+
+	sg_free_table(p->rw_sg_table);
+	kfree(p->rw_sg_table);
+	p->rw_sg_table = NULL;
+
+	list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
+		/*
+		 * Make sure the mem addresses are all NULL so if this gets
+		 * reused we will fault.
+		 */
+		hw_sema->value = NULL;
+
+	gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
+}
+
+/*
+ * Completely free a sempahore_pool. You should make sure this pool is not
+ * mapped otherwise there's going to be a memory leak.
+ */
+static void gk20a_semaphore_pool_free(struct kref *ref)
+{
+	struct gk20a_semaphore_pool *p =
+		container_of(ref, struct gk20a_semaphore_pool, ref);
+	struct gk20a_semaphore_sea *s = p->sema_sea;
+	struct gk20a_semaphore_int *hw_sema, *tmp;
+
+	WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table);
+
+	__lock_sema_sea(s);
+	list_del(&p->pool_list_entry);
+	clear_bit(p->page_idx, s->pools_alloced);
+	s->page_count--;
+	__unlock_sema_sea(s);
+
+	list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
+		kfree(hw_sema);
+
+	gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
+	kfree(p);
+}
+
+void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
+{
+	kref_get(&p->ref);
+}
+
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
+{
+	kref_put(&p->ref, gk20a_semaphore_pool_free);
+}
+
+/*
+ * Get the address for a semaphore_pool - if global is true then return the
+ * global RO address instead of the RW address owned by the semaphore's VM.
+ */
+u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
+{
+	if (!global)
+		return p->gpu_va;
+
+	return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
+}
+
+static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
+{
+	int hw_sema_idx;
+	int ret = 0;
+	struct gk20a_semaphore_int *hw_sema;
+	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
+
+	BUG_ON(!p);
+
+	mutex_lock(&p->pool_lock);
+
+	/* Find an available HW semaphore. */
+	hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
+					       PAGE_SIZE / SEMAPHORE_SIZE);
+	if (hw_sema_idx < 0) {
+		ret = hw_sema_idx;
+		goto fail;
+	}
+
+	hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
+	if (!hw_sema) {
+		ret = -ENOMEM;
+		goto fail_free_idx;
+	}
+
+	ch->hw_sema = hw_sema;
+	hw_sema->ch = ch;
+	hw_sema->p = p;
+	hw_sema->idx = hw_sema_idx;
+	hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
+	atomic_set(&hw_sema->next_value, 0);
+	hw_sema->value = p->cpu_va + hw_sema->offset;
+	writel(0, hw_sema->value);
+
+	list_add(&hw_sema->hw_sema_list, &p->hw_semas);
+
+	mutex_unlock(&p->pool_lock);
+
+	return 0;
+
+fail_free_idx:
+	clear_bit(hw_sema_idx, p->semas_alloced);
+fail:
+	mutex_unlock(&p->pool_lock);
+	return ret;
+}
+
+/*
+ * Free the channel used semaphore index
+ */
+void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch)
+{
+	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
+
+	BUG_ON(!p);
+
+	mutex_lock(&p->pool_lock);
+
+	clear_bit(ch->hw_sema->idx, p->semas_alloced);
+
+	/* Make sure that when the ch is re-opened it will get a new HW sema. */
+	list_del(&ch->hw_sema->hw_sema_list);
+	kfree(ch->hw_sema);
+	ch->hw_sema = NULL;
+
+	mutex_unlock(&p->pool_lock);
+}
+
+/*
+ * Allocate a semaphore from the passed pool.
+ *
+ * Since semaphores are ref-counted there's no explicit free for external code
+ * to use. When the ref-count hits 0 the internal free will happen.
+ */
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
+{
+	struct gk20a_semaphore *s;
+	int ret;
+
+	if (!ch->hw_sema) {
+		ret = __gk20a_init_hw_sema(ch);
+		if (ret)
+			return NULL;
+	}
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+
+	kref_init(&s->ref);
+	s->hw_sema = ch->hw_sema;
+	atomic_set(&s->value, 0);
+
+	/*
+	 * Take a ref on the pool so that we can keep this pool alive for
+	 * as long as this semaphore is alive.
+	 */
+	gk20a_semaphore_pool_get(s->hw_sema->p);
+
+	gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
+
+	return s;
+}
+
+static void gk20a_semaphore_free(struct kref *ref)
+{
+	struct gk20a_semaphore *s =
+		container_of(ref, struct gk20a_semaphore, ref);
+
+	gk20a_semaphore_pool_put(s->hw_sema->p);
+
+	kfree(s);
+}
+
+void gk20a_semaphore_put(struct gk20a_semaphore *s)
+{
+	kref_put(&s->ref, gk20a_semaphore_free);
+}
+
+void gk20a_semaphore_get(struct gk20a_semaphore *s)
+{
+	kref_get(&s->ref);
+}
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index b4a1f6f4..2a9ad40d 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -34,7 +34,6 @@
 #include "fence_gk20a.h"
 #include "gr_gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 7afed41f..62b0a05e 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Graphics Copy Engine  (gr host)
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -28,7 +28,6 @@
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index c8b1c105..3fa6bb25 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -28,13 +28,13 @@
 #include <linux/vmalloc.h>
 #include <linux/circ_buf.h>
 
-#include "debug_gk20a.h"
-#include "ctxsw_trace_gk20a.h"
+#include <nvgpu/semaphore.h>
 
 #include "gk20a.h"
+#include "debug_gk20a.h"
+#include "ctxsw_trace_gk20a.h"
 #include "dbg_gpu_gk20a.h"
 #include "fence_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/timers.h>
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index c3c6fbb8..0eba1c30 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -1,9 +1,7 @@
 /*
- * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c
- *
  * GK20A Channel Synchronization Abstraction
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -20,10 +18,11 @@
 #include <linux/list.h>
 #include <linux/version.h>
 
+#include <nvgpu/semaphore.h>
+
 #include "channel_sync_gk20a.h"
 #include "gk20a.h"
 #include "fence_gk20a.h"
-#include "semaphore_gk20a.h"
 #include "sync_gk20a.h"
 #include "mm_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 37ba720a..83fdc05d 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * drivers/video/tegra/host/t20/debug_gk20a.c
- *
  * Copyright (C) 2011-2017 NVIDIA Corporation.  All rights reserved.
  *
  * This software is licensed under the terms of the GNU General Public
@@ -20,12 +18,12 @@
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-
 #include <linux/io.h>
 
+#include <nvgpu/semaphore.h>
+
 #include "gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index b8a1dcbc..6bd59067 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -17,8 +17,9 @@
 #include <linux/file.h>
 #include <linux/version.h>
 
+#include <nvgpu/semaphore.h>
+
 #include "gk20a.h"
-#include "semaphore_gk20a.h"
 #include "channel_gk20a.h"
 #include "sync_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 469148c2..c6b444f9 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -25,11 +25,11 @@
 #include <linux/nvhost.h>
 
 #include <nvgpu/timers.h>
+#include <nvgpu/semaphore.h>
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cddb3316..0e1c88a4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -41,7 +41,6 @@
 #include "regops_gk20a.h"
 #include "dbg_gpu_gk20a.h"
 #include "debug_gk20a.h"
-#include "semaphore_gk20a.h"
 #include "platform_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ea5ea73f..cafb1233 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -33,13 +33,13 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/allocator.h>
+#include <nvgpu/semaphore.h>
 #include <nvgpu/page_allocator.h>
 
 #include "gk20a.h"
 #include "mm_gk20a.h"
 #include "fence_gk20a.h"
 #include "kind_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index f3dffa46..d39ca2d0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -52,6 +52,12 @@ enum gk20a_aperture {
 	APERTURE_VIDMEM
 };
 
+enum gk20a_mem_rw_flag {
+	gk20a_mem_flag_none = 0,
+	gk20a_mem_flag_read_only = 1,
+	gk20a_mem_flag_write_only = 2,
+};
+
 static inline const char *gk20a_aperture_str(enum gk20a_aperture aperture)
 {
 	switch (aperture) {
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 9924e48f..d53cf09b 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -29,7 +29,6 @@
 
 #include "gk20a.h"
 #include "gr_gk20a.h"
-#include "semaphore_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pwr_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
deleted file mode 100644
index 2038e300..00000000
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * drivers/video/tegra/host/gk20a/semaphore_gk20a.c
- *
- * GK20A Semaphores
- *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#define pr_fmt(fmt) "gpu_sema: " fmt
-
-#include <linux/dma-mapping.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-
-#include <asm/pgtable.h>
-
-#include "gk20a.h"
-#include "mm_gk20a.h"
-#include "semaphore_gk20a.h"
-
-#define __lock_sema_sea(s)						\
-	do {								\
-		gpu_sema_verbose_dbg("Acquiring sema lock...");		\
-		mutex_lock(&s->sea_lock);				\
-		gpu_sema_verbose_dbg("Sema lock aquried!");		\
-	} while (0)
-
-#define __unlock_sema_sea(s)						\
-	do {								\
-		mutex_unlock(&s->sea_lock);				\
-		gpu_sema_verbose_dbg("Released sema lock");		\
-	} while (0)
-
-/*
- * Return the sema_sea pointer.
- */
-struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g)
-{
-	return g->sema_sea;
-}
-
-static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
-{
-	int ret = 0;
-	struct gk20a *gk20a = sea->gk20a;
-
-	__lock_sema_sea(sea);
-
-	ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
-				    PAGE_SIZE * SEMAPHORE_POOL_COUNT,
-				    &sea->sea_mem);
-	if (ret)
-		goto out;
-
-	sea->ro_sg_table = sea->sea_mem.sgt;
-	sea->size = SEMAPHORE_POOL_COUNT;
-	sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
-
-out:
-	__unlock_sema_sea(sea);
-	return ret;
-}
-
-/*
- * Create the semaphore sea. Only create it once - subsequent calls to this will
- * return the originally created sea pointer.
- */
-struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
-{
-	if (g->sema_sea)
-		return g->sema_sea;
-
-	g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
-	if (!g->sema_sea)
-		return NULL;
-
-	g->sema_sea->size = 0;
-	g->sema_sea->page_count = 0;
-	g->sema_sea->gk20a = g;
-	INIT_LIST_HEAD(&g->sema_sea->pool_list);
-	mutex_init(&g->sema_sea->sea_lock);
-
-	if (__gk20a_semaphore_sea_grow(g->sema_sea))
-		goto cleanup;
-
-	gpu_sema_dbg("Created semaphore sea!");
-	return g->sema_sea;
-
-cleanup:
-	kfree(g->sema_sea);
-	g->sema_sea = NULL;
-	gpu_sema_dbg("Failed to creat semaphore sea!");
-	return NULL;
-}
-
-static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
-{
-	unsigned long idx = find_first_zero_bit(bitmap, len);
-
-	if (idx == len)
-		return -ENOSPC;
-
-	set_bit(idx, bitmap);
-
-	return (int)idx;
-}
-
-/*
- * Allocate a pool from the sea.
- */
-struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
-				struct gk20a_semaphore_sea *sea)
-{
-	struct gk20a_semaphore_pool *p;
-	unsigned long page_idx;
-	int ret, err = 0;
-
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return ERR_PTR(-ENOMEM);
-
-	__lock_sema_sea(sea);
-
-	ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT);
-	if (ret < 0) {
-		err = ret;
-		goto fail;
-	}
-
-	page_idx = (unsigned long)ret;
-
-	p->page = sea->sea_mem.pages[page_idx];
-	p->ro_sg_table = sea->ro_sg_table;
-	p->page_idx = page_idx;
-	p->sema_sea = sea;
-	INIT_LIST_HEAD(&p->hw_semas);
-	kref_init(&p->ref);
-	mutex_init(&p->pool_lock);
-
-	sea->page_count++;
-	list_add(&p->pool_list_entry, &sea->pool_list);
-	__unlock_sema_sea(sea);
-
-	gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
-
-	return p;
-
-fail:
-	__unlock_sema_sea(sea);
-	kfree(p);
-	gpu_sema_dbg("Failed to allocate semaphore pool!");
-	return ERR_PTR(err);
-}
-
-/*
- * Map a pool into the passed vm's address space. This handles both the fixed
- * global RO mapping and the non-fixed private RW mapping.
- */
-int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
-			     struct vm_gk20a *vm)
-{
-	int ents, err = 0;
-	u64 addr;
-
-	gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
-
-	p->cpu_va = vmap(&p->page, 1, 0,
-			 pgprot_writecombine(PAGE_KERNEL));
-
-	gpu_sema_dbg("  %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
-
-	/* First do the RW mapping. */
-	p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
-	if (!p->rw_sg_table)
-		return -ENOMEM;
-
-	err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
-					PAGE_SIZE, GFP_KERNEL);
-	if (err) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	/* Add IOMMU mapping... */
-	ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
-			  DMA_BIDIRECTIONAL);
-	if (ents != 1) {
-		err = -ENOMEM;
-		goto fail_free_sgt;
-	}
-
-	gpu_sema_dbg("  %d: DMA addr = 0x%pad", p->page_idx,
-		     &sg_dma_address(p->rw_sg_table->sgl));
-
-	/* Map into the GPU... Doesn't need to be fixed. */
-	p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
-				   0, gk20a_mem_flag_none, false,
-				   APERTURE_SYSMEM);
-	if (!p->gpu_va) {
-		err = -ENOMEM;
-		goto fail_unmap_sgt;
-	}
-
-	gpu_sema_dbg("  %d: GPU read-write VA = 0x%llx", p->page_idx,
-		     p->gpu_va);
-
-	/*
-	 * And now the global mapping. Take the sea lock so that we don't race
-	 * with a concurrent remap.
-	 */
-	__lock_sema_sea(p->sema_sea);
-
-	BUG_ON(p->mapped);
-	addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
-				    p->sema_sea->gpu_va, p->sema_sea->map_size,
-				    0,
-				    gk20a_mem_flag_read_only,
-				    false,
-				    APERTURE_SYSMEM);
-	if (!addr) {
-		err = -ENOMEM;
-		BUG();
-		goto fail_unlock;
-	}
-	p->gpu_va_ro = addr;
-	p->mapped = 1;
-
-	gpu_sema_dbg("  %d: GPU read-only  VA = 0x%llx", p->page_idx,
-		     p->gpu_va_ro);
-
-	__unlock_sema_sea(p->sema_sea);
-
-	return 0;
-
-fail_unlock:
-	__unlock_sema_sea(p->sema_sea);
-fail_unmap_sgt:
-	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
-		     DMA_BIDIRECTIONAL);
-fail_free_sgt:
-	sg_free_table(p->rw_sg_table);
-fail:
-	kfree(p->rw_sg_table);
-	p->rw_sg_table = NULL;
-	gpu_sema_dbg("  %d: Failed to map semaphore pool!", p->page_idx);
-	return err;
-}
-
-/*
- * Unmap a semaphore_pool.
- */
-void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
-				struct vm_gk20a *vm)
-{
-	struct gk20a_semaphore_int *hw_sema;
-
-	kunmap(p->cpu_va);
-
-	/* First the global RO mapping... */
-	__lock_sema_sea(p->sema_sea);
-	gk20a_gmmu_unmap(vm, p->gpu_va_ro,
-			 p->sema_sea->map_size, gk20a_mem_flag_none);
-	p->ro_sg_table = NULL;
-	__unlock_sema_sea(p->sema_sea);
-
-	/* And now the private RW mapping. */
-	gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
-	p->gpu_va = 0;
-
-	dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
-		     DMA_BIDIRECTIONAL);
-
-	sg_free_table(p->rw_sg_table);
-	kfree(p->rw_sg_table);
-	p->rw_sg_table = NULL;
-
-	list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
-		/*
-		 * Make sure the mem addresses are all NULL so if this gets
-		 * reused we will fault.
-		 */
-		hw_sema->value = NULL;
-
-	gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
-}
-
-/*
- * Completely free a sempahore_pool. You should make sure this pool is not
- * mapped otherwise there's going to be a memory leak.
- */
-static void gk20a_semaphore_pool_free(struct kref *ref)
-{
-	struct gk20a_semaphore_pool *p =
-		container_of(ref, struct gk20a_semaphore_pool, ref);
-	struct gk20a_semaphore_sea *s = p->sema_sea;
-	struct gk20a_semaphore_int *hw_sema, *tmp;
-
-	WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table);
-
-	__lock_sema_sea(s);
-	list_del(&p->pool_list_entry);
-	clear_bit(p->page_idx, s->pools_alloced);
-	s->page_count--;
-	__unlock_sema_sea(s);
-
-	list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
-		kfree(hw_sema);
-
-	gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
-	kfree(p);
-}
-
-void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
-{
-	kref_get(&p->ref);
-}
-
-void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
-{
-	kref_put(&p->ref, gk20a_semaphore_pool_free);
-}
-
-/*
- * Get the address for a semaphore_pool - if global is true then return the
- * global RO address instead of the RW address owned by the semaphore's VM.
- */
-u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
-{
-	if (!global)
-		return p->gpu_va;
-
-	return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
-}
-
-static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
-{
-	int hw_sema_idx;
-	int ret = 0;
-	struct gk20a_semaphore_int *hw_sema;
-	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
-
-	BUG_ON(!p);
-
-	mutex_lock(&p->pool_lock);
-
-	/* Find an available HW semaphore. */
-	hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
-					       PAGE_SIZE / SEMAPHORE_SIZE);
-	if (hw_sema_idx < 0) {
-		ret = hw_sema_idx;
-		goto fail;
-	}
-
-	hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
-	if (!hw_sema) {
-		ret = -ENOMEM;
-		goto fail_free_idx;
-	}
-
-	ch->hw_sema = hw_sema;
-	hw_sema->ch = ch;
-	hw_sema->p = p;
-	hw_sema->idx = hw_sema_idx;
-	hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
-	atomic_set(&hw_sema->next_value, 0);
-	hw_sema->value = p->cpu_va + hw_sema->offset;
-	writel(0, hw_sema->value);
-
-	list_add(&hw_sema->hw_sema_list, &p->hw_semas);
-
-	mutex_unlock(&p->pool_lock);
-
-	return 0;
-
-fail_free_idx:
-	clear_bit(hw_sema_idx, p->semas_alloced);
-fail:
-	mutex_unlock(&p->pool_lock);
-	return ret;
-}
-
-/*
- * Free the channel used semaphore index
- */
-void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch)
-{
-	struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
-
-	BUG_ON(!p);
-
-	mutex_lock(&p->pool_lock);
-
-	clear_bit(ch->hw_sema->idx, p->semas_alloced);
-
-	/* Make sure that when the ch is re-opened it will get a new HW sema. */
-	list_del(&ch->hw_sema->hw_sema_list);
-	kfree(ch->hw_sema);
-	ch->hw_sema = NULL;
-
-	mutex_unlock(&p->pool_lock);
-}
-
-/*
- * Allocate a semaphore from the passed pool.
- *
- * Since semaphores are ref-counted there's no explicit free for external code
- * to use. When the ref-count hits 0 the internal free will happen.
- */
-struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
-{
-	struct gk20a_semaphore *s;
-	int ret;
-
-	if (!ch->hw_sema) {
-		ret = __gk20a_init_hw_sema(ch);
-		if (ret)
-			return NULL;
-	}
-
-	s = kzalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		return NULL;
-
-	kref_init(&s->ref);
-	s->hw_sema = ch->hw_sema;
-	atomic_set(&s->value, 0);
-
-	/*
-	 * Take a ref on the pool so that we can keep this pool alive for
-	 * as long as this semaphore is alive.
-	 */
-	gk20a_semaphore_pool_get(s->hw_sema->p);
-
-	gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
-
-	return s;
-}
-
-static void gk20a_semaphore_free(struct kref *ref)
-{
-	struct gk20a_semaphore *s =
-		container_of(ref, struct gk20a_semaphore, ref);
-
-	gk20a_semaphore_pool_put(s->hw_sema->p);
-
-	kfree(s);
-}
-
-void gk20a_semaphore_put(struct gk20a_semaphore *s)
-{
-	kref_put(&s->ref, gk20a_semaphore_free);
-}
-
-void gk20a_semaphore_get(struct gk20a_semaphore *s)
-{
-	kref_get(&s->ref);
-}
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
deleted file mode 100644
index 8e09fcfc..00000000
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef SEMAPHORE_GK20A_H
-#define SEMAPHORE_GK20A_H
-
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/delay.h>
-
-#include <nvgpu/allocator.h>
-
-#include "gk20a.h"
-#include "mm_gk20a.h"
-#include "channel_gk20a.h"
-
-#define gpu_sema_dbg(fmt, args...)		\
-	gk20a_dbg(gpu_dbg_sema, fmt, ##args)
-#define gpu_sema_verbose_dbg(fmt, args...)	\
-	gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
-
-/*
- * Max number of channels that can be used is 512. This of course needs to be
- * fixed to be dynamic but still fast.
- */
-#define SEMAPHORE_POOL_COUNT		512
-#define SEMAPHORE_SIZE			16
-#define SEMAPHORE_SEA_GROWTH_RATE	32
-
-struct gk20a_semaphore_sea;
-
-/*
- * Underlying semaphore data structure. This semaphore can be shared amongst
- * other semaphore instances.
- */
-struct gk20a_semaphore_int {
-	int idx;			/* Semaphore index. */
-	u32 offset;			/* Offset into the pool. */
-	atomic_t next_value;		/* Next available value. */
-	u32 *value;			/* Current value (access w/ readl()). */
-	u32 nr_incrs;			/* Number of increments programmed. */
-	struct gk20a_semaphore_pool *p;	/* Pool that owns this sema. */
-	struct channel_gk20a *ch;	/* Channel that owns this sema. */
-	struct list_head hw_sema_list;	/* List of HW semaphores. */
-};
-
-/*
- * A semaphore which the rest of the driver actually uses. This consists of a
- * pointer to a real semaphore and a value to wait for. This allows one physical
- * semaphore to be shared among an essentially infinite number of submits.
- */
-struct gk20a_semaphore {
-	struct gk20a_semaphore_int *hw_sema;
-
-	atomic_t value;
-	int incremented;
-
-	struct kref ref;
-};
-
-/*
- * A semaphore pool. Each address space will own exactly one of these.
- */
-struct gk20a_semaphore_pool {
-	struct page *page;			/* This pool's page of memory */
-	struct list_head pool_list_entry;	/* Node for list of pools. */
-	void *cpu_va;				/* CPU access to the pool. */
-	u64 gpu_va;				/* GPU access to the pool. */
-	u64 gpu_va_ro;				/* GPU access to the pool. */
-	int page_idx;				/* Index into sea bitmap. */
-
-	struct list_head hw_semas;		/* List of HW semas. */
-	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
-
-	struct gk20a_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
-
-	struct mutex pool_lock;
-
-	/*
-	 * This is the address spaces's personal RW table. Other channels will
-	 * ultimately map this page as RO.
-	 */
-	struct sg_table *rw_sg_table;
-
-	/*
-	 * This is to keep track of whether the pool has had its sg_table
-	 * updated during sea resizing.
-	 */
-	struct sg_table *ro_sg_table;
-
-	int mapped;
-
-	/*
-	 * Sometimes a channel can be released before other channels are
-	 * done waiting on it. This ref count ensures that the pool doesn't
-	 * go away until all semaphores using this pool are cleaned up first.
-	 */
-	struct kref ref;
-};
-
-/*
- * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
- * channels can share a VM each channel gets it's own HW semaphore from the
- * pool. Channels then allocate regular semaphores - basically just a value that
- * signifies when a particular job is done.
- */
-struct gk20a_semaphore_sea {
-	struct list_head pool_list;	/* List of pools in this sea. */
-	struct gk20a *gk20a;
-
-	size_t size;			/* Number of pages available. */
-	u64 gpu_va;			/* GPU virtual address of sema sea. */
-	u64 map_size;			/* Size of the mapping. */
-
-	/*
-	 * TODO:
-	 * List of pages that we use to back the pools. The number of pages
-	 * can grow dynamically since allocating 512 pages for all channels at
-	 * once would be a tremendous waste.
-	 */
-	int page_count;			/* Pages allocated to pools. */
-
-	struct sg_table *ro_sg_table;
-	/*
-	struct page *pages[SEMAPHORE_POOL_COUNT];
-	*/
-
-	struct mem_desc sea_mem;
-
-	/*
-	 * Can't use a regular allocator here since the full range of pools are
-	 * not always allocated. Instead just use a bitmap.
-	 */
-	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
-
-	struct mutex sea_lock;		/* Lock alloc/free calls. */
-};
-
-enum gk20a_mem_rw_flag {
-	gk20a_mem_flag_none = 0,
-	gk20a_mem_flag_read_only = 1,
-	gk20a_mem_flag_write_only = 2,
-};
-
-/*
- * Semaphore sea functions.
- */
-struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
-int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
-			    struct vm_gk20a *vm);
-void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
-			       struct vm_gk20a *vm);
-struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
-
-/*
- * Semaphore pool functions.
- */
-struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
-	struct gk20a_semaphore_sea *sea);
-int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
-			     struct vm_gk20a *vm);
-void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
-				struct vm_gk20a *vm);
-u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
-void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
-void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
-
-/*
- * Semaphore functions.
- */
-struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
-void gk20a_semaphore_put(struct gk20a_semaphore *s);
-void gk20a_semaphore_get(struct gk20a_semaphore *s);
-void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch);
-
-/*
- * Return the address of a specific semaphore.
- *
- * Don't call this on a semaphore you don't own - the VA returned will make no
- * sense in your specific channel's VM.
- */
-static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
-{
-	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
-		s->hw_sema->offset;
-}
-
-/*
- * Get the global RO address for the semaphore. Can be called on any semaphore
- * regardless of whether you own it.
- */
-static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
-{
-	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
-		s->hw_sema->offset;
-}
-
-static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
-{
-	return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
-		hw_sema->offset;
-}
-
-/*
- * TODO: handle wrap around... Hmm, how to do this?
- */
-static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
-{
-	u32 sema_val = readl(s->hw_sema->value);
-
-	/*
-	 * If the underlying semaphore value is greater than or equal to
-	 * the value of the semaphore then the semaphore has been signaled
-	 * (a.k.a. released).
-	 */
-	return (int)sema_val >= atomic_read(&s->value);
-}
-
-static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
-{
-	return !gk20a_semaphore_is_released(s);
-}
-
-/*
- * Read the underlying value from a semaphore.
- */
-static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
-{
-	return readl(s->hw_sema->value);
-}
-
-static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
-{
-	return (u32)atomic_read(&s->value);
-}
-
-static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
-{
-	return (u32)atomic_read(&s->hw_sema->next_value);
-}
-
-/*
- * If @force is set then this will not wait for the underlying semaphore to
- * catch up to the passed semaphore.
- */
-static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s,
-					     bool force)
-{
-	u32 current_val;
-	u32 val = gk20a_semaphore_get_value(s);
-	int attempts = 0;
-
-	/*
-	 * Wait until the sema value is 1 less than the write value. That
-	 * way this function is essentially an increment.
-	 *
-	 * TODO: tune the wait a little better.
-	 */
-	while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
-		if (force)
-			break;
-		msleep(100);
-		attempts += 1;
-		if (attempts > 100) {
-			WARN(1, "Stall on sema release!");
-			return;
-		}
-	}
-
-	/*
-	 * If the semaphore has already passed the value we would write then
-	 * this is really just a NO-OP.
-	 */
-	if (current_val >= val)
-		return;
-
-	writel(val, s->hw_sema->value);
-
-	gpu_sema_verbose_dbg("(c=%d) WRITE %u",
-			     s->hw_sema->ch->hw_chid, val);
-}
-
-static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
-{
-	__gk20a_semaphore_release(s, false);
-}
-
-/*
- * Configure a software based increment on this semaphore. This is useful for
- * when we want the GPU to wait on a SW event before processing a channel.
- * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
- * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
- * then allows the GPU to continue.
- *
- * Also used to prep a semaphore for an INCR by the GPU.
- */
-static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
-{
-	BUG_ON(s->incremented);
-
-	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
-	s->incremented = 1;
-
-	gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
-			     s->hw_sema->ch->hw_chid,
-			     gk20a_semaphore_next_value(s));
-}
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index b642981c..e7bacac8 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Sync Framework Integration
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -13,8 +13,6 @@
  * more details.
  */
 
-#include "sync_gk20a.h"
-
 #include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/file.h>
@@ -23,9 +21,14 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+
 #include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/semaphore.h>
+
 #include "../drivers/staging/android/sync.h"
-#include "semaphore_gk20a.h"
+
+#include "sync_gk20a.h"
 
 static const struct sync_timeline_ops gk20a_sync_timeline_ops;
 
diff --git a/drivers/gpu/nvgpu/gm206/acr_gm206.c b/drivers/gpu/nvgpu/gm206/acr_gm206.c
index 238114e3..3af59374 100644
--- a/drivers/gpu/nvgpu/gm206/acr_gm206.c
+++ b/drivers/gpu/nvgpu/gm206/acr_gm206.c
@@ -21,7 +21,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
-#include "gk20a/semaphore_gk20a.h"
 
 #include "acr.h"
 #include "acr_gm206.h"
diff --git a/drivers/gpu/nvgpu/gm206/ce_gm206.c b/drivers/gpu/nvgpu/gm206/ce_gm206.c
index 5d5fd432..dd3eac95 100644
--- a/drivers/gpu/nvgpu/gm206/ce_gm206.c
+++ b/drivers/gpu/nvgpu/gm206/ce_gm206.c
@@ -1,7 +1,7 @@
 /*
  * GM206 Copy Engine.
  *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -29,7 +29,6 @@
 #include <linux/nvhost.h>
 
 #include "gk20a/debug_gk20a.h"
-#include "gk20a/semaphore_gk20a.h"
 
 #include <nvgpu/hw/gm206/hw_ce2_gm206.h>
 #include <nvgpu/hw/gm206/hw_pbdma_gm206.h>
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 060dc778..40a28136 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -26,7 +26,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
-#include "gk20a/semaphore_gk20a.h"
 
 #include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
 
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 9e844994..f8d7ba70 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -21,7 +21,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
-#include "gk20a/semaphore_gk20a.h"
 
 #include "gm206/acr_gm206.h"
 #include "gm20b/acr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index e04aec7d..a7aa4003 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -24,7 +24,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
-#include "gk20a/semaphore_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 
 #include "gm20b/gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 835d33f3..776bbe85 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B MMU
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -17,7 +17,6 @@
 #include <linux/dma-mapping.h>
 
 #include "gk20a/gk20a.h"
-#include "gk20a/semaphore_gk20a.h"
 
 #include "mm_gp10b.h"
 #include "rpfb_gp10b.h"
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
new file mode 100644
index 00000000..07a27584
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef SEMAPHORE_GK20A_H
+#define SEMAPHORE_GK20A_H
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+
+#include <nvgpu/allocator.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "gk20a/channel_gk20a.h"
+
+#define gpu_sema_dbg(fmt, args...)		\
+	gk20a_dbg(gpu_dbg_sema, fmt, ##args)
+#define gpu_sema_verbose_dbg(fmt, args...)	\
+	gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
+
+/*
+ * Max number of channels that can be used is 512. This of course needs to be
+ * fixed to be dynamic but still fast.
+ */
+#define SEMAPHORE_POOL_COUNT		512
+#define SEMAPHORE_SIZE			16
+#define SEMAPHORE_SEA_GROWTH_RATE	32
+
+struct gk20a_semaphore_sea;
+
+/*
+ * Underlying semaphore data structure. This semaphore can be shared amongst
+ * other semaphore instances.
+ */
+struct gk20a_semaphore_int {
+	int idx;			/* Semaphore index. */
+	u32 offset;			/* Offset into the pool. */
+	atomic_t next_value;		/* Next available value. */
+	u32 *value;			/* Current value (access w/ readl()). */
+	u32 nr_incrs;			/* Number of increments programmed. */
+	struct gk20a_semaphore_pool *p;	/* Pool that owns this sema. */
+	struct channel_gk20a *ch;	/* Channel that owns this sema. */
+	struct list_head hw_sema_list;	/* List of HW semaphores. */
+};
+
+/*
+ * A semaphore which the rest of the driver actually uses. This consists of a
+ * pointer to a real semaphore and a value to wait for. This allows one physical
+ * semaphore to be shared among an essentially infinite number of submits.
+ */
+struct gk20a_semaphore {
+	struct gk20a_semaphore_int *hw_sema;
+
+	atomic_t value;
+	int incremented;
+
+	struct kref ref;
+};
+
+/*
+ * A semaphore pool. Each address space will own exactly one of these.
+ */
+struct gk20a_semaphore_pool {
+	struct page *page;			/* This pool's page of memory */
+	struct list_head pool_list_entry;	/* Node for list of pools. */
+	void *cpu_va;				/* CPU access to the pool. */
+	u64 gpu_va;				/* GPU access to the pool. */
+	u64 gpu_va_ro;				/* GPU access to the pool. */
+	int page_idx;				/* Index into sea bitmap. */
+
+	struct list_head hw_semas;		/* List of HW semas. */
+	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
+
+	struct gk20a_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
+
+	struct mutex pool_lock;
+
+	/*
+	 * This is the address spaces's personal RW table. Other channels will
+	 * ultimately map this page as RO.
+	 */
+	struct sg_table *rw_sg_table;
+
+	/*
+	 * This is to keep track of whether the pool has had its sg_table
+	 * updated during sea resizing.
+	 */
+	struct sg_table *ro_sg_table;
+
+	int mapped;
+
+	/*
+	 * Sometimes a channel can be released before other channels are
+	 * done waiting on it. This ref count ensures that the pool doesn't
+	 * go away until all semaphores using this pool are cleaned up first.
+	 */
+	struct kref ref;
+};
+
+/*
+ * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
+ * channels can share a VM each channel gets it's own HW semaphore from the
+ * pool. Channels then allocate regular semaphores - basically just a value that
+ * signifies when a particular job is done.
+ */
+struct gk20a_semaphore_sea {
+	struct list_head pool_list;	/* List of pools in this sea. */
+	struct gk20a *gk20a;
+
+	size_t size;			/* Number of pages available. */
+	u64 gpu_va;			/* GPU virtual address of sema sea. */
+	u64 map_size;			/* Size of the mapping. */
+
+	/*
+	 * TODO:
+	 * List of pages that we use to back the pools. The number of pages
+	 * can grow dynamically since allocating 512 pages for all channels at
+	 * once would be a tremendous waste.
+	 */
+	int page_count;			/* Pages allocated to pools. */
+
+	struct sg_table *ro_sg_table;
+	/*
+	struct page *pages[SEMAPHORE_POOL_COUNT];
+	*/
+
+	struct mem_desc sea_mem;
+
+	/*
+	 * Can't use a regular allocator here since the full range of pools are
+	 * not always allocated. Instead just use a bitmap.
+	 */
+	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
+
+	struct mutex sea_lock;		/* Lock alloc/free calls. */
+};
+
+/*
+ * Semaphore sea functions.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
+int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
+			    struct vm_gk20a *vm);
+void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
+			       struct vm_gk20a *vm);
+struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
+
+/*
+ * Semaphore pool functions.
+ */
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
+	struct gk20a_semaphore_sea *sea);
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
+			     struct vm_gk20a *vm);
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
+				struct vm_gk20a *vm);
+u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
+void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
+
+/*
+ * Semaphore functions.
+ */
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
+void gk20a_semaphore_put(struct gk20a_semaphore *s);
+void gk20a_semaphore_get(struct gk20a_semaphore *s);
+void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch);
+
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
+{
+	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
+		s->hw_sema->offset;
+}
+
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
+{
+	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
+		s->hw_sema->offset;
+}
+
+static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
+{
+	return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
+		hw_sema->offset;
+}
+
+/*
+ * TODO: handle wrap around... Hmm, how to do this?
+ */
+static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
+{
+	u32 sema_val = readl(s->hw_sema->value);
+
+	/*
+	 * If the underlying semaphore value is greater than or equal to
+	 * the value of the semaphore then the semaphore has been signaled
+	 * (a.k.a. released).
+	 */
+	return (int)sema_val >= atomic_read(&s->value);
+}
+
+static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
+{
+	return !gk20a_semaphore_is_released(s);
+}
+
+/*
+ * Read the underlying value from a semaphore.
+ */
+static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
+{
+	return readl(s->hw_sema->value);
+}
+
+static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
+{
+	return (u32)atomic_read(&s->value);
+}
+
+static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
+{
+	return (u32)atomic_read(&s->hw_sema->next_value);
+}
+
+/*
+ * If @force is set then this will not wait for the underlying semaphore to
+ * catch up to the passed semaphore.
+ */
+static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s,
+					     bool force)
+{
+	u32 current_val;
+	u32 val = gk20a_semaphore_get_value(s);
+	int attempts = 0;
+
+	/*
+	 * Wait until the sema value is 1 less than the write value. That
+	 * way this function is essentially an increment.
+	 *
+	 * TODO: tune the wait a little better.
+	 */
+	while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
+		if (force)
+			break;
+		msleep(100);
+		attempts += 1;
+		if (attempts > 100) {
+			WARN(1, "Stall on sema release!");
+			return;
+		}
+	}
+
+	/*
+	 * If the semaphore has already passed the value we would write then
+	 * this is really just a NO-OP.
+	 */
+	if (current_val >= val)
+		return;
+
+	writel(val, s->hw_sema->value);
+
+	gpu_sema_verbose_dbg("(c=%d) WRITE %u",
+			     s->hw_sema->ch->hw_chid, val);
+}
+
+static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
+{
+	__gk20a_semaphore_release(s, false);
+}
+
+/*
+ * Configure a software based increment on this semaphore. This is useful for
+ * when we want the GPU to wait on a SW event before processing a channel.
+ * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
+ * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
+ * then allows the GPU to continue.
+ *
+ * Also used to prep a semaphore for an INCR by the GPU.
+ */
+static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
+{
+	BUG_ON(s->incremented);
+
+	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
+	s->incremented = 1;
+
+	gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
+			     s->hw_sema->ch->hw_chid,
+			     gk20a_semaphore_next_value(s));
+}
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 66fda2d9..3d908b0d 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Memory Management
  *
- * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -16,7 +16,6 @@
 #include <linux/dma-mapping.h>
 #include "vgpu/vgpu.h"
 #include "vgpu_mm_gp10b.h"
-#include "gk20a/semaphore_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
 static int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index f97acd47..eb5f7749 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Memory Management
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -15,7 +15,6 @@
 
 #include <linux/dma-mapping.h>
 #include "vgpu/vgpu.h"
-#include "gk20a/semaphore_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
 static int vgpu_init_mm_setup_sw(struct gk20a *g)
-- 
cgit v1.2.2