From bcf60a22c3e8671468517d34aa37548272455c1f Mon Sep 17 00:00:00 2001
From: Lauri Peltonen <lpeltonen@nvidia.com>
Date: Fri, 18 Jul 2014 02:21:34 +0300
Subject: gpu: nvgpu: Add gk20a_fence type

When moving compression state tracking and compbit management ops to
kernel, we need to attach a fence to dma-buf metadata, along with the
compbit state.

To make in-kernel fence management easier, introduce a new gk20a_fence
abstraction. A gk20a_fence may be backed by a semaphore or a syncpoint
(id, value) pair. If the kernel is configured with CONFIG_SYNC, it will
also contain a sync_fence. The gk20a_fence can easily be converted back
to a syncpoint (id, value) parir or sync FD when we need to return it to
user space.

Change gk20a_submit_channel_gpfifo to return a gk20a_fence instead of
nvhost_fence. This is to facilitate work submission initiated from
kernel.

Bug 1509620

Change-Id: I6154764a279dba83f5e91ba9e0cb5e227ca08e1b
Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
Reviewed-on: http://git-master/r/439846
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/Makefile             |   1 +
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c      | 131 +++++++++------
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h      |  13 +-
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 233 +++++++--------------------
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h |  74 ++++-----
 drivers/gpu/nvgpu/gk20a/fence_gk20a.c        | 229 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/fence_gk20a.h        |  68 ++++++++
 drivers/gpu/nvgpu/gk20a/sync_gk20a.c         |  17 +-
 drivers/gpu/nvgpu/gk20a/sync_gk20a.h         |   2 +-
 9 files changed, 476 insertions(+), 292 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gk20a/fence_gk20a.c
 create mode 100644 drivers/gpu/nvgpu/gk20a/fence_gk20a.h

diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
index d234db50..246f9447 100644
--- a/drivers/gpu/nvgpu/gk20a/Makefile
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -21,6 +21,7 @@ nvgpu-y := \
 	pmu_gk20a.o \
 	priv_ring_gk20a.o \
 	semaphore_gk20a.o \
+	fence_gk20a.o \
 	clk_gk20a.o \
 	therm_gk20a.o \
 	gr_ctx_gk20a_sim.o \
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index de82be36..9f8876c3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -33,6 +33,7 @@
 
 #include "gk20a.h"
 #include "dbg_gpu_gk20a.h"
+#include "fence_gk20a.h"
 #include "semaphore_gk20a.h"
 
 #include "hw_ram_gk20a.h"
@@ -418,8 +419,8 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
 	   semaphore synchronization) */
 	mutex_lock(&ch->jobs_lock);
 	list_for_each_entry_safe(job, n, &ch->jobs, list) {
-		if (job->post_fence.semaphore) {
-			gk20a_semaphore_release(job->post_fence.semaphore);
+		if (job->post_fence->semaphore) {
+			gk20a_semaphore_release(job->post_fence->semaphore);
 			released_job_semaphore = true;
 		}
 	}
@@ -685,8 +686,12 @@ unbind:
 	ch->vpr = false;
 	ch->vm = NULL;
 
-	gk20a_channel_fence_close(&ch->last_submit.pre_fence);
-	gk20a_channel_fence_close(&ch->last_submit.post_fence);
+	mutex_lock(&ch->submit_lock);
+	gk20a_fence_put(ch->last_submit.pre_fence);
+	gk20a_fence_put(ch->last_submit.post_fence);
+	ch->last_submit.pre_fence = NULL;
+	ch->last_submit.post_fence = NULL;
+	mutex_unlock(&ch->submit_lock);
 	WARN_ON(ch->sync);
 
 	/* unlink all debug sessions */
@@ -1119,8 +1124,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 	ch_vm = c->vm;
 
 	c->cmds_pending = false;
-	gk20a_channel_fence_close(&c->last_submit.pre_fence);
-	gk20a_channel_fence_close(&c->last_submit.post_fence);
+	mutex_lock(&c->submit_lock);
+	gk20a_fence_put(c->last_submit.pre_fence);
+	gk20a_fence_put(c->last_submit.post_fence);
+	c->last_submit.pre_fence = NULL;
+	c->last_submit.post_fence = NULL;
+	mutex_unlock(&c->submit_lock);
 
 	c->ramfc.offset = 0;
 	c->ramfc.size = ram_in_ramfc_s() / 8;
@@ -1303,8 +1312,10 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
 		}
 	}
 
-	gk20a_channel_fence_close(&c->last_submit.pre_fence);
-	gk20a_channel_fence_close(&c->last_submit.post_fence);
+	gk20a_fence_put(c->last_submit.pre_fence);
+	gk20a_fence_put(c->last_submit.post_fence);
+	c->last_submit.pre_fence = NULL;
+	c->last_submit.post_fence = NULL;
 
 	err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
 	if (unlikely(err)) {
@@ -1312,7 +1323,7 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
 		return err;
 	}
 
-	WARN_ON(!c->last_submit.post_fence.wfi);
+	WARN_ON(!c->last_submit.post_fence->wfi);
 
 	c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
 	c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
@@ -1378,8 +1389,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
 }
 
 static int gk20a_channel_add_job(struct channel_gk20a *c,
-				 struct gk20a_channel_fence *pre_fence,
-				 struct gk20a_channel_fence *post_fence)
+				 struct gk20a_fence *pre_fence,
+				 struct gk20a_fence *post_fence)
 {
 	struct vm_gk20a *vm = c->vm;
 	struct channel_gk20a_job *job = NULL;
@@ -1404,8 +1415,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 
 	job->num_mapped_buffers = num_mapped_buffers;
 	job->mapped_buffers = mapped_buffers;
-	gk20a_channel_fence_dup(pre_fence, &job->pre_fence);
-	gk20a_channel_fence_dup(post_fence, &job->post_fence);
+	job->pre_fence = gk20a_fence_get(pre_fence);
+	job->post_fence = gk20a_fence_get(post_fence);
 
 	mutex_lock(&c->jobs_lock);
 	list_add_tail(&job->list, &c->jobs);
@@ -1424,18 +1435,19 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
 	mutex_lock(&c->submit_lock);
 	mutex_lock(&c->jobs_lock);
 	list_for_each_entry_safe(job, n, &c->jobs, list) {
-		bool completed = WARN_ON(!c->sync) ||
-			c->sync->is_expired(c->sync, &job->post_fence);
+		bool completed = gk20a_fence_is_expired(job->post_fence);
 		if (!completed)
 			break;
 
+		c->sync->signal_timeline(c->sync);
+
 		gk20a_vm_put_buffers(vm, job->mapped_buffers,
 				job->num_mapped_buffers);
 
 		/* Close the fences (this will unref the semaphores and release
 		 * them to the pool). */
-		gk20a_channel_fence_close(&job->pre_fence);
-		gk20a_channel_fence_close(&job->post_fence);
+		gk20a_fence_put(job->pre_fence);
+		gk20a_fence_put(job->post_fence);
 
 		/* job is done. release its reference to vm */
 		gk20a_vm_put(vm);
@@ -1453,7 +1465,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
 	 */
 	if (list_empty(&c->jobs)) {
 		if (c->sync && c->sync->aggressive_destroy &&
-			  c->sync->is_expired(c->sync, &c->last_submit.post_fence)) {
+			  gk20a_fence_is_expired(c->last_submit.post_fence)) {
 			c->sync->destroy(c->sync);
 			c->sync = NULL;
 		}
@@ -1477,8 +1489,9 @@ void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 				struct nvhost_gpfifo *gpfifo,
 				u32 num_entries,
+				u32 flags,
 				struct nvhost_fence *fence,
-				u32 flags)
+				struct gk20a_fence **fence_out)
 {
 	struct gk20a *g = c->g;
 	struct device *d = dev_from_gk20a(g);
@@ -1487,8 +1500,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	int wait_fence_fd = -1;
 	struct priv_cmd_entry *wait_cmd = NULL;
 	struct priv_cmd_entry *incr_cmd = NULL;
-	struct gk20a_channel_fence pre_fence = { 0 };
-	struct gk20a_channel_fence post_fence = { 0 };
+	struct gk20a_fence *pre_fence = NULL;
+	struct gk20a_fence *post_fence = NULL;
 	/* we might need two extra gpfifo entries - one for pre fence
 	 * and one for post fence. */
 	const int extra_entries = 2;
@@ -1591,18 +1604,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 
 	/* always insert syncpt increment at end of gpfifo submission
 	   to keep track of method completion for idle railgating */
-	if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
-			flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
-		err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd,
-					    &post_fence,
-					    need_wfi,
-					    &fence->syncpt_id);
-	else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-		err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
-						&post_fence,
-						need_wfi,
-						&fence->syncpt_id,
-						&fence->value);
+	if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+		err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
+					 &post_fence, need_wfi);
 	else
 		err = c->sync->incr(c->sync, &incr_cmd,
 				    &post_fence);
@@ -1653,13 +1657,15 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 		incr_cmd->gp_put = c->gpfifo.put;
 	}
 
-	gk20a_channel_fence_close(&c->last_submit.pre_fence);
-	gk20a_channel_fence_close(&c->last_submit.post_fence);
+	gk20a_fence_put(c->last_submit.pre_fence);
+	gk20a_fence_put(c->last_submit.post_fence);
 	c->last_submit.pre_fence = pre_fence;
 	c->last_submit.post_fence = post_fence;
+	if (fence_out)
+		*fence_out = gk20a_fence_get(post_fence);
 
 	/* TODO! Check for errors... */
-	gk20a_channel_add_job(c, &pre_fence, &post_fence);
+	gk20a_channel_add_job(c, pre_fence, post_fence);
 
 	c->cmds_pending = true;
 	gk20a_bar1_writel(g,
@@ -1672,8 +1678,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 					     c->hw_chid,
 					     num_entries,
 					     flags,
-					     fence ? fence->syncpt_id : 0,
-					     fence ? fence->value : 0);
+					     post_fence->syncpt_id,
+					     post_fence->syncpt_value);
 
 	gk20a_dbg_info("post-submit put %d, get %d, size %d",
 		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
@@ -1685,8 +1691,8 @@ clean_up:
 	gk20a_err(d, "fail");
 	free_priv_cmdbuf(c, wait_cmd);
 	free_priv_cmdbuf(c, incr_cmd);
-	gk20a_channel_fence_close(&pre_fence);
-	gk20a_channel_fence_close(&post_fence);
+	gk20a_fence_put(pre_fence);
+	gk20a_fence_put(post_fence);
 	gk20a_idle(g->dev);
 	return err;
 }
@@ -1719,7 +1725,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
 {
 	int err = 0;
-	struct gk20a_channel_fence *fence = &ch->last_submit.post_fence;
+	struct gk20a_fence *fence = ch->last_submit.post_fence;
 
 	if (!ch->cmds_pending)
 		return 0;
@@ -1728,26 +1734,25 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
 	if (ch->has_timedout)
 		return -ETIMEDOUT;
 
-	if (!(fence->valid && fence->wfi) && ch->obj_class != KEPLER_C) {
+	if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
 		gk20a_dbg_fn("issuing wfi, incr to finish the channel");
 		err = gk20a_channel_submit_wfi(ch);
+		fence = ch->last_submit.post_fence;
 	}
 	if (err)
 		return err;
 
-	BUG_ON(!(fence->valid && fence->wfi) && ch->obj_class != KEPLER_C);
+	BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
 
 	gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
-		      fence->thresh, fence->semaphore);
+		     fence->syncpt_value, fence->semaphore);
 
-	if (ch->sync) {
-		err = ch->sync->wait_cpu(ch->sync, fence, timeout);
-		if (WARN_ON(err))
-			dev_warn(dev_from_gk20a(ch->g),
-			       "timed out waiting for gk20a channel to finish");
-		else
-			ch->cmds_pending = false;
-	}
+	err = gk20a_fence_wait(fence, timeout);
+	if (WARN_ON(err))
+		dev_warn(dev_from_gk20a(ch->g),
+		       "timed out waiting for gk20a channel to finish");
+	else
+		ch->cmds_pending = false;
 
 	return err;
 }
@@ -2014,6 +2019,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	struct channel_gk20a *ch,
 	struct nvhost_submit_gpfifo_args *args)
 {
+	struct gk20a_fence *fence_out;
 	void *gpfifo;
 	u32 size;
 	int ret = 0;
@@ -2036,7 +2042,26 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	}
 
 	ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
-					&args->fence, args->flags);
+					  args->flags, &args->fence,
+					  &fence_out);
+
+	if (ret)
+		goto clean_up;
+
+	/* Convert fence_out to something we can pass back to user space. */
+	if (args->flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
+		if (args->flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+			int fd = gk20a_fence_install_fd(fence_out);
+			if (fd < 0)
+				ret = fd;
+			else
+				args->fence.syncpt_id = fd;
+		} else {
+			args->fence.syncpt_id = fence_out->syncpt_id;
+			args->fence.value = fence_out->syncpt_value;
+		}
+	}
+	gk20a_fence_put(fence_out);
 
 clean_up:
 	kfree(gpfifo);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 161d2b83..2ea3eccb 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -29,6 +29,7 @@
 struct gk20a;
 struct gr_gk20a;
 struct dbg_session_gk20a;
+struct gk20a_fence;
 
 #include "channel_sync_gk20a.h"
 
@@ -68,8 +69,8 @@ struct channel_ctx_gk20a {
 struct channel_gk20a_job {
 	struct mapped_buffer_node **mapped_buffers;
 	int num_mapped_buffers;
-	struct gk20a_channel_fence pre_fence;
-	struct gk20a_channel_fence post_fence;
+	struct gk20a_fence *pre_fence;
+	struct gk20a_fence *post_fence;
 	struct list_head list;
 };
 
@@ -117,8 +118,9 @@ struct channel_gk20a {
 
 	bool cmds_pending;
 	struct {
-		struct gk20a_channel_fence pre_fence;
-		struct gk20a_channel_fence post_fence;
+		/* These fences should be accessed with submit_lock held. */
+		struct gk20a_fence *pre_fence;
+		struct gk20a_fence *post_fence;
 	} last_submit;
 
 	void (*remove_support)(struct channel_gk20a *);
@@ -184,8 +186,9 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 				struct nvhost_gpfifo *gpfifo,
 				u32 num_entries,
+				u32 flags,
 				struct nvhost_fence *fence,
-				u32 flags);
+				struct gk20a_fence **fence_out);
 
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 			       struct nvhost_alloc_gpfifo_args *args);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 0dcc5abb..4d366fa9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -19,6 +19,7 @@
 
 #include "channel_sync_gk20a.h"
 #include "gk20a.h"
+#include "fence_gk20a.h"
 #include "semaphore_gk20a.h"
 #include "sync_gk20a.h"
 #include "mm_gk20a.h"
@@ -52,33 +53,9 @@ static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
 	ptr[3] = (id << 8) | 0x10;
 }
 
-int gk20a_channel_syncpt_wait_cpu(struct gk20a_channel_sync *s,
-				  struct gk20a_channel_fence *fence,
-				  int timeout)
-{
-	struct gk20a_channel_syncpt *sp =
-		container_of(s, struct gk20a_channel_syncpt, ops);
-	if (!fence->valid)
-		return 0;
-	return nvhost_syncpt_wait_timeout_ext(
-			sp->host1x_pdev, sp->id, fence->thresh,
-			timeout, NULL, NULL);
-}
-
-bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
-				     struct gk20a_channel_fence *fence)
-{
-	struct gk20a_channel_syncpt *sp =
-		container_of(s, struct gk20a_channel_syncpt, ops);
-	if (!fence->valid)
-		return true;
-	return nvhost_syncpt_is_expired_ext(sp->host1x_pdev, sp->id,
-			fence->thresh);
-}
-
 int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
 		u32 thresh, struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence)
+		struct gk20a_fence **fence)
 {
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
@@ -103,13 +80,13 @@ int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
 	add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
 
 	*entry = wait_cmd;
-	fence->valid = false;
+	*fence = NULL;
 	return 0;
 }
 
 int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
 		       struct priv_cmd_entry **entry,
-		       struct gk20a_channel_fence *fence)
+		       struct gk20a_fence **fence)
 {
 #ifdef CONFIG_SYNC
 	int i;
@@ -164,7 +141,7 @@ int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
 	sync_fence_put(sync_fence);
 
 	*entry = wait_cmd;
-	fence->valid = false;
+	*fence = NULL;
 	return 0;
 #else
 	return -ENODEV;
@@ -181,7 +158,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 				       bool gfx_class, bool wfi_cmd,
 				       bool register_irq,
 				       struct priv_cmd_entry **entry,
-				       struct gk20a_channel_fence *fence)
+				       struct gk20a_fence **fence)
 {
 	u32 thresh;
 	int incr_cmd_size;
@@ -253,16 +230,15 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 		WARN(err, "failed to set submit complete interrupt");
 	}
 
-	fence->thresh = thresh;
-	fence->valid = true;
-	fence->wfi = wfi_cmd;
+	*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
+					 wfi_cmd);
 	*entry = incr_cmd;
 	return 0;
 }
 
 int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
 				  struct priv_cmd_entry **entry,
-				  struct gk20a_channel_fence *fence)
+				  struct gk20a_fence **fence)
 {
 	return __gk20a_channel_syncpt_incr(s,
 			false /* use host class */,
@@ -273,7 +249,7 @@ int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
 
 int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 			      struct priv_cmd_entry **entry,
-			      struct gk20a_channel_fence *fence)
+			      struct gk20a_fence **fence)
 {
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
@@ -286,58 +262,36 @@ int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 			entry, fence);
 }
 
-int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
-					  struct priv_cmd_entry **entry,
-					  struct gk20a_channel_fence *fence,
-					  bool wfi,
-					  u32 *id, u32 *thresh)
+int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
+				   int wait_fence_fd,
+				   struct priv_cmd_entry **entry,
+				   struct gk20a_fence **fence,
+				   bool wfi)
 {
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
 	/* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence
 	 * to user space. */
-	int err = __gk20a_channel_syncpt_incr(s,
+	return __gk20a_channel_syncpt_incr(s,
 			wfi &&
 			  sp->c->obj_class == KEPLER_C /* use gfx class? */,
 			wfi &&
 			  sp->c->obj_class != KEPLER_C /* wfi if host class */,
 			true /* register irq */,
 			entry, fence);
-	if (err)
-		return err;
-	*id = sp->id;
-	*thresh = fence->thresh;
-	return 0;
 }
 
-int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
-				      int wait_fence_fd,
-				      struct priv_cmd_entry **entry,
-				      struct gk20a_channel_fence *fence,
-				      bool wfi,
-				      int *fd)
+void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
 {
-#ifdef CONFIG_SYNC
-	int err;
-	struct nvhost_ctrl_sync_fence_info pt;
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
-	err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence, wfi,
-						    &pt.id, &pt.thresh);
-	if (err)
-		return err;
-	return nvhost_sync_create_fence_fd(sp->host1x_pdev, &pt, 1,
-					   "fence", fd);
-#else
-	return -ENODEV;
-#endif
+	nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
 }
 
-void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
+static void gk20a_channel_syncpt_signal_timeline(
+		struct gk20a_channel_sync *s)
 {
-	struct gk20a_channel_syncpt *sp =
-		container_of(s, struct gk20a_channel_syncpt, ops);
-	nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
+	/* Nothing to do. */
 }
 
 static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
@@ -366,15 +320,13 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
 		return NULL;
 	}
 
-	sp->ops.wait_cpu		= gk20a_channel_syncpt_wait_cpu;
-	sp->ops.is_expired		= gk20a_channel_syncpt_is_expired;
 	sp->ops.wait_syncpt		= gk20a_channel_syncpt_wait_syncpt;
 	sp->ops.wait_fd			= gk20a_channel_syncpt_wait_fd;
 	sp->ops.incr			= gk20a_channel_syncpt_incr;
 	sp->ops.incr_wfi		= gk20a_channel_syncpt_incr_wfi;
-	sp->ops.incr_user_syncpt	= gk20a_channel_syncpt_incr_user_syncpt;
-	sp->ops.incr_user_fd		= gk20a_channel_syncpt_incr_user_fd;
+	sp->ops.incr_user		= gk20a_channel_syncpt_incr_user;
 	sp->ops.set_min_eq_max		= gk20a_channel_syncpt_set_min_eq_max;
+	sp->ops.signal_timeline		= gk20a_channel_syncpt_signal_timeline;
 	sp->ops.destroy			= gk20a_channel_syncpt_destroy;
 
 	sp->ops.aggressive_destroy	= true;
@@ -460,48 +412,10 @@ static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,
 	return i;
 }
 
-static int gk20a_channel_semaphore_wait_cpu(
-		struct gk20a_channel_sync *s,
-		struct gk20a_channel_fence *fence,
-		int timeout)
-{
-	int remain;
-	struct gk20a_channel_semaphore *sp =
-		container_of(s, struct gk20a_channel_semaphore, ops);
-	if (!fence->valid || WARN_ON(!fence->semaphore))
-		return 0;
-
-	remain = wait_event_interruptible_timeout(
-		sp->c->semaphore_wq,
-		!gk20a_semaphore_is_acquired(fence->semaphore),
-		timeout);
-	if (remain == 0 && gk20a_semaphore_is_acquired(fence->semaphore))
-		return -ETIMEDOUT;
-	else if (remain < 0)
-		return remain;
-	return 0;
-}
-
-static bool gk20a_channel_semaphore_is_expired(
-		struct gk20a_channel_sync *s,
-		struct gk20a_channel_fence *fence)
-{
-	bool expired;
-	struct gk20a_channel_semaphore *sp =
-		container_of(s, struct gk20a_channel_semaphore, ops);
-	if (!fence->valid || WARN_ON(!fence->semaphore))
-		return true;
-
-	expired = !gk20a_semaphore_is_acquired(fence->semaphore);
-	if (expired)
-		gk20a_sync_timeline_signal(sp->timeline);
-	return expired;
-}
-
 static int gk20a_channel_semaphore_wait_syncpt(
 		struct gk20a_channel_sync *s, u32 id,
 		u32 thresh, struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence)
+		struct gk20a_fence **fence)
 {
 	struct gk20a_channel_semaphore *sema =
 		container_of(s, struct gk20a_channel_semaphore, ops);
@@ -513,7 +427,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
 static int gk20a_channel_semaphore_wait_fd(
 		struct gk20a_channel_sync *s, int fd,
 		struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence)
+		struct gk20a_fence **fence)
 {
 	struct gk20a_channel_semaphore *sema =
 		container_of(s, struct gk20a_channel_semaphore, ops);
@@ -558,6 +472,11 @@ static int gk20a_channel_semaphore_wait_fd(
 	WARN_ON(written != wait_cmd->size);
 	sync_fence_wait_async(sync_fence, &w->waiter);
 
+	/* XXX - this fixes an actual bug, we need to hold a ref to this
+	   semaphore while the job is in flight. */
+	*fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
+					    &c->semaphore_wq,
+					    NULL, false);
 	*entry = wait_cmd;
 	return 0;
 fail:
@@ -575,8 +494,9 @@ fail:
 
 static int __gk20a_channel_semaphore_incr(
 		struct gk20a_channel_sync *s, bool wfi_cmd,
+		struct sync_fence *dependency,
 		struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence)
+		struct gk20a_fence **fence)
 {
 	u64 va;
 	int incr_cmd_size;
@@ -608,9 +528,9 @@ static int __gk20a_channel_semaphore_incr(
 	written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);
 	WARN_ON(written != incr_cmd_size);
 
-	fence->valid = true;
-	fence->wfi = wfi_cmd;
-	fence->semaphore = semaphore;
+	*fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
+					    &c->semaphore_wq,
+					    dependency, wfi_cmd);
 	*entry = incr_cmd;
 	return 0;
 }
@@ -618,72 +538,54 @@ static int __gk20a_channel_semaphore_incr(
 static int gk20a_channel_semaphore_incr_wfi(
 		struct gk20a_channel_sync *s,
 		struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence)
+		struct gk20a_fence **fence)
 {
 	return __gk20a_channel_semaphore_incr(s,
 			true /* wfi */,
+			NULL,
 			entry, fence);
 }
 
 static int gk20a_channel_semaphore_incr(
 		struct gk20a_channel_sync *s,
 		struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence)
+		struct gk20a_fence **fence)
 {
 	/* Don't put wfi cmd to this one since we're not returning
 	 * a fence to user space. */
 	return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
-					      entry, fence);
-}
-
-static int gk20a_channel_semaphore_incr_user_syncpt(
-		struct gk20a_channel_sync *s,
-		struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence,
-		bool wfi,
-		u32 *id, u32 *thresh)
-{
-	struct gk20a_channel_semaphore *sema =
-		container_of(s, struct gk20a_channel_semaphore, ops);
-	struct device *dev = dev_from_gk20a(sema->c->g);
-	gk20a_err(dev, "trying to use syncpoint synchronization");
-	return -ENODEV;
+					      NULL, entry, fence);
 }
 
-static int gk20a_channel_semaphore_incr_user_fd(
+static int gk20a_channel_semaphore_incr_user(
 		struct gk20a_channel_sync *s,
 		int wait_fence_fd,
 		struct priv_cmd_entry **entry,
-		struct gk20a_channel_fence *fence,
-		bool wfi,
-		int *fd)
+		struct gk20a_fence **fence,
+		bool wfi)
 {
-	struct gk20a_channel_semaphore *sema =
-		container_of(s, struct gk20a_channel_semaphore, ops);
 #ifdef CONFIG_SYNC
 	struct sync_fence *dependency = NULL;
 	int err;
 
-	err = __gk20a_channel_semaphore_incr(s, wfi,
-					     entry, fence);
-	if (err)
-		return err;
-
 	if (wait_fence_fd >= 0) {
 		dependency = gk20a_sync_fence_fdget(wait_fence_fd);
 		if (!dependency)
 			return -EINVAL;
 	}
 
-	*fd = gk20a_sync_fence_create(sema->timeline, fence->semaphore,
-				      dependency, "fence");
-	if (*fd < 0) {
+	err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
+					     entry, fence);
+	if (err) {
 		if (dependency)
 			sync_fence_put(dependency);
-		return *fd;
+		return err;
 	}
+
 	return 0;
 #else
+	struct gk20a_channel_semaphore *sema =
+		container_of(s, struct gk20a_channel_semaphore, ops);
 	gk20a_err(dev_from_gk20a(sema->c->g),
 		  "trying to use sync fds with CONFIG_SYNC disabled");
 	return -ENODEV;
@@ -695,6 +597,14 @@ static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
 	/* Nothing to do. */
 }
 
+static void gk20a_channel_semaphore_signal_timeline(
+		struct gk20a_channel_sync *s)
+{
+	struct gk20a_channel_semaphore *sp =
+		container_of(s, struct gk20a_channel_semaphore, ops);
+	gk20a_sync_timeline_signal(sp->timeline);
+}
+
 static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
 {
 	struct gk20a_channel_semaphore *sema =
@@ -746,15 +656,13 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
 	if (!sema->timeline)
 		goto clean_up;
 #endif
-	sema->ops.wait_cpu	= gk20a_channel_semaphore_wait_cpu;
-	sema->ops.is_expired	= gk20a_channel_semaphore_is_expired;
 	sema->ops.wait_syncpt	= gk20a_channel_semaphore_wait_syncpt;
 	sema->ops.wait_fd	= gk20a_channel_semaphore_wait_fd;
 	sema->ops.incr		= gk20a_channel_semaphore_incr;
 	sema->ops.incr_wfi	= gk20a_channel_semaphore_incr_wfi;
-	sema->ops.incr_user_syncpt = gk20a_channel_semaphore_incr_user_syncpt;
-	sema->ops.incr_user_fd	= gk20a_channel_semaphore_incr_user_fd;
+	sema->ops.incr_user	= gk20a_channel_semaphore_incr_user;
 	sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
+	sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
 	sema->ops.destroy	= gk20a_channel_semaphore_destroy;
 
 	/* Aggressively destroying the semaphore sync would cause overhead
@@ -775,26 +683,3 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
 #endif
 	return gk20a_channel_semaphore_create(c);
 }
-
-static inline bool gk20a_channel_fence_is_closed(struct gk20a_channel_fence *f)
-{
-	if (f->valid || f->semaphore)
-		return false;
-	return true;
-}
-
-void gk20a_channel_fence_close(struct gk20a_channel_fence *f)
-{
-	if (f->semaphore)
-		gk20a_semaphore_put(f->semaphore);
-	memset(f, 0, sizeof(*f));
-}
-
-void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
-			     struct gk20a_channel_fence *to)
-{
-	WARN_ON(!gk20a_channel_fence_is_closed(to));
-	*to = *from;
-	if (to->semaphore)
-		gk20a_semaphore_get(to->semaphore);
-}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index baa4a151..a3cd8208 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -24,34 +24,28 @@ struct gk20a_channel_sync;
 struct priv_cmd_entry;
 struct channel_gk20a;
 struct gk20a_semaphore;
-
-struct gk20a_channel_fence {
-	bool valid;
-	bool wfi; /* was issued with preceding wfi */
-	u32 thresh; /* syncpoint fences only */
-	struct gk20a_semaphore *semaphore; /* semaphore fences only */
-};
+struct gk20a_fence;
 
 struct gk20a_channel_sync {
-	/* CPU wait for a fence returned by incr_syncpt() or incr_fd(). */
-	int (*wait_cpu)(struct gk20a_channel_sync *s,
-			struct gk20a_channel_fence *fence,
-			int timeout);
-
-	/* Test whether a fence returned by incr_syncpt() or incr_fd() is
-	 * expired. */
-	bool (*is_expired)(struct gk20a_channel_sync *s,
-			   struct gk20a_channel_fence *fence);
-
-	/* Generate a gpu wait cmdbuf from syncpoint. */
+	/* Generate a gpu wait cmdbuf from syncpoint.
+	 * Returns
+	 *  - a gpu cmdbuf that performs the wait when executed,
+	 *  - possibly a helper fence that the caller must hold until the
+	 *    cmdbuf is executed.
+	 */
 	int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
 			   struct priv_cmd_entry **entry,
-			   struct gk20a_channel_fence *fence);
+			   struct gk20a_fence **fence);
 
-	/* Generate a gpu wait cmdbuf from sync fd. */
+	/* Generate a gpu wait cmdbuf from sync fd.
+	 * Returns
+	 *  - a gpu cmdbuf that performs the wait when executed,
+	 *  - possibly a helper fence that the caller must hold until the
+	 *    cmdbuf is executed.
+	 */
 	int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
 		       struct priv_cmd_entry **entry,
-		       struct gk20a_channel_fence *fence);
+		       struct gk20a_fence **fence);
 
 	/* Increment syncpoint/semaphore.
 	 * Returns
@@ -60,7 +54,7 @@ struct gk20a_channel_sync {
 	 */
 	int (*incr)(struct gk20a_channel_sync *s,
 		    struct priv_cmd_entry **entry,
-		    struct gk20a_channel_fence *fence);
+		    struct gk20a_fence **fence);
 
 	/* Increment syncpoint/semaphore, preceded by a wfi.
 	 * Returns
@@ -69,38 +63,29 @@ struct gk20a_channel_sync {
 	 */
 	int (*incr_wfi)(struct gk20a_channel_sync *s,
 			struct priv_cmd_entry **entry,
-			struct gk20a_channel_fence *fence);
-
-	/* Increment syncpoint, so that the returned fence represents
-	 * work completion (may need wfi) and can be returned to user space.
-	 * Returns
-	 *  - a gpu cmdbuf that performs the increment when executed,
-	 *  - a fence that can be passed to wait_cpu() and is_expired(),
-	 *  - a syncpoint id/value pair that can be returned to user space.
-	 */
-	int (*incr_user_syncpt)(struct gk20a_channel_sync *s,
-				struct priv_cmd_entry **entry,
-				struct gk20a_channel_fence *fence,
-				bool wfi,
-				u32 *id, u32 *thresh);
+			struct gk20a_fence **fence);
 
 	/* Increment syncpoint/semaphore, so that the returned fence represents
 	 * work completion (may need wfi) and can be returned to user space.
 	 * Returns
 	 *  - a gpu cmdbuf that performs the increment when executed,
 	 *  - a fence that can be passed to wait_cpu() and is_expired(),
-	 *  - a sync fd that can be returned to user space.
+	 *  - a gk20a_fence that signals when the incr has happened.
 	 */
-	int (*incr_user_fd)(struct gk20a_channel_sync *s,
-			    int wait_fence_fd,
-			    struct priv_cmd_entry **entry,
-			    struct gk20a_channel_fence *fence,
-			    bool wfi,
-			    int *fd);
+	int (*incr_user)(struct gk20a_channel_sync *s,
+			 int wait_fence_fd,
+			 struct priv_cmd_entry **entry,
+			 struct gk20a_fence **fence,
+			 bool wfi);
 
 	/* Reset the channel syncpoint/semaphore. */
 	void (*set_min_eq_max)(struct gk20a_channel_sync *s);
 
+	/* Signals the sync timeline (if owned by the gk20a_channel_sync layer).
+	 * This should be called when we notice that a gk20a_fence is
+	 * expired. */
+	void (*signal_timeline)(struct gk20a_channel_sync *s);
+
 	/* flag to set sync destroy aggressiveness */
 	bool aggressive_destroy;
 
@@ -110,7 +95,4 @@ struct gk20a_channel_sync {
 
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
 
-void gk20a_channel_fence_close(struct gk20a_channel_fence *f);
-void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
-			     struct gk20a_channel_fence *to);
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
new file mode 100644
index 00000000..1a28e660
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -0,0 +1,229 @@
+/*
+ * drivers/video/tegra/host/gk20a/fence_gk20a.c
+ *
+ * GK20A Fences
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "fence_gk20a.h"
+
+#include <linux/gk20a.h>
+#include <linux/file.h>
+
+#include "gk20a.h"
+#include "semaphore_gk20a.h"
+#include "channel_gk20a.h"
+#include "sync_gk20a.h"
+
+#ifdef CONFIG_SYNC
+#include "../../../staging/android/sync.h"
+#endif
+
+#ifdef CONFIG_TEGRA_GK20A
+#include <linux/nvhost.h>
+#endif
+
+struct gk20a_fence_ops {
+	int (*wait)(struct gk20a_fence *, int timeout);
+	bool (*is_expired)(struct gk20a_fence *);
+	void *(*free)(struct kref *);
+};
+
+static void gk20a_fence_free(struct kref *ref)
+{
+	struct gk20a_fence *f =
+		container_of(ref, struct gk20a_fence, ref);
+#ifdef CONFIG_SYNC
+	if (f->sync_fence)
+		sync_fence_put(f->sync_fence);
+#endif
+	if (f->semaphore)
+		gk20a_semaphore_put(f->semaphore);
+	kfree(f);
+}
+
+void gk20a_fence_put(struct gk20a_fence *f)
+{
+	if (f)
+		kref_put(&f->ref, gk20a_fence_free);
+}
+
+struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
+{
+	if (f)
+		kref_get(&f->ref);
+	return f;
+}
+
+int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
+{
+	return f->ops->wait(f, timeout);
+}
+
+bool gk20a_fence_is_expired(struct gk20a_fence *f)
+{
+	return f->ops->is_expired(f);
+}
+
+int gk20a_fence_install_fd(struct gk20a_fence *f)
+{
+#ifdef CONFIG_SYNC
+	int fd;
+
+	if (!f->sync_fence)
+		return -EINVAL;
+
+	fd = get_unused_fd();
+	if (fd < 0)
+		return fd;
+
+	sync_fence_get(f->sync_fence);
+	sync_fence_install(f->sync_fence, fd);
+	return fd;
+#else
+	return -ENODEV;
+#endif
+}
+
+static struct gk20a_fence *alloc_fence(const struct gk20a_fence_ops *ops,
+				struct sync_fence *sync_fence, bool wfi)
+{
+	struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+	kref_init(&f->ref);
+	f->ops = ops;
+	f->sync_fence = sync_fence;
+	f->wfi = wfi;
+	f->syncpt_id = -1;
+	return f;
+}
+
+/* Fences that are backed by GPU semaphores: */
+
+static int gk20a_semaphore_fence_wait(struct gk20a_fence *f, int timeout)
+{
+	int remain;
+
+	if (!gk20a_semaphore_is_acquired(f->semaphore))
+		return 0;
+
+	remain = wait_event_interruptible_timeout(
+		*f->semaphore_wq,
+		!gk20a_semaphore_is_acquired(f->semaphore),
+		timeout);
+	if (remain == 0 && gk20a_semaphore_is_acquired(f->semaphore))
+		return -ETIMEDOUT;
+	else if (remain < 0)
+		return remain;
+	return 0;
+}
+
+static bool gk20a_semaphore_fence_is_expired(struct gk20a_fence *f)
+{
+	return !gk20a_semaphore_is_acquired(f->semaphore);
+}
+
+static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
+	.wait = &gk20a_semaphore_fence_wait,
+	.is_expired = &gk20a_semaphore_fence_is_expired,
+};
+
+struct gk20a_fence *gk20a_fence_from_semaphore(
+		struct sync_timeline *timeline,
+		struct gk20a_semaphore *semaphore,
+		wait_queue_head_t *semaphore_wq,
+		struct sync_fence *dependency,
+		bool wfi)
+{
+	struct gk20a_fence *f;
+	struct sync_fence *sync_fence = NULL;
+
+#ifdef CONFIG_SYNC
+	sync_fence = gk20a_sync_fence_create(timeline, semaphore,
+					     dependency, "fence");
+	if (!sync_fence)
+		return NULL;
+#endif
+
+	f  = alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
+	if (!f) {
+#ifdef CONFIG_SYNC
+		sync_fence_put(sync_fence);
+#endif
+		return NULL;
+	}
+	gk20a_semaphore_get(semaphore);
+	f->semaphore = semaphore;
+	f->semaphore_wq = semaphore_wq;
+	return f;
+}
+
+#ifdef CONFIG_TEGRA_GK20A
+/* Fences that are backed by host1x syncpoints: */
+
+static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, int timeout)
+{
+	return nvhost_syncpt_wait_timeout_ext(
+			f->host1x_pdev, f->syncpt_id, f->syncpt_value,
+			timeout, NULL, NULL);
+}
+
+static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
+{
+	return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
+					    f->syncpt_value);
+}
+
+static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
+	.wait = &gk20a_syncpt_fence_wait,
+	.is_expired = &gk20a_syncpt_fence_is_expired,
+};
+
+struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+					    u32 id, u32 value, bool wfi)
+{
+	struct gk20a_fence *f;
+	struct sync_fence *sync_fence = NULL;
+
+#ifdef CONFIG_SYNC
+	struct nvhost_ctrl_sync_fence_info pt = {
+		.id = id,
+		.thresh = value
+	};
+
+	sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
+					      "fence");
+	if (!sync_fence)
+		return NULL;
+#endif
+
+	f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
+	if (!f) {
+#ifdef CONFIG_SYNC
+		sync_fence_put(sync_fence);
+#endif
+		return NULL;
+	}
+	f->host1x_pdev = host1x_pdev;
+	f->syncpt_id = id;
+	f->syncpt_value = value;
+	return f;
+}
+#else
+struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+					    u32 id, u32 value, bool wfi)
+{
+	return NULL;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
new file mode 100644
index 00000000..629dc694
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -0,0 +1,68 @@
+/*
+ * drivers/video/tegra/host/gk20a/fence_gk20a.h
+ *
+ * GK20A Fences
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _GK20A_FENCE_H_
+#define _GK20A_FENCE_H_
+
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/wait.h>
+
+struct platform_device;
+struct sync_timeline;
+struct sync_fence;
+struct gk20a_semaphore;
+struct channel_gk20a;
+
+struct gk20a_fence_ops;
+
+struct gk20a_fence {
+	/* Valid for all fence types: */
+	struct kref ref;
+	bool wfi;
+	struct sync_fence *sync_fence;
+	const struct gk20a_fence_ops *ops;
+
+	/* Valid for fences created from semaphores: */
+	struct gk20a_semaphore *semaphore;
+	wait_queue_head_t *semaphore_wq;
+
+	/* Valid for fences created from syncpoints: */
+	struct platform_device *host1x_pdev;
+	u32 syncpt_id;
+	u32 syncpt_value;
+};
+
+/* Fences can be created from semaphores or syncpoint (id, value) pairs */
+struct gk20a_fence *gk20a_fence_from_semaphore(
+		struct sync_timeline *timeline,
+		struct gk20a_semaphore *semaphore,
+		wait_queue_head_t *semaphore_wq,
+		struct sync_fence *dependency,
+		bool wfi);
+
+struct gk20a_fence *gk20a_fence_from_syncpt(
+		struct platform_device *host1x_pdev,
+		u32 id, u32 value, bool wfi);
+
+/* Fence operations */
+void gk20a_fence_put(struct gk20a_fence *f);
+struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f);
+int gk20a_fence_wait(struct gk20a_fence *f, int timeout);
+bool gk20a_fence_is_expired(struct gk20a_fence *f);
+int gk20a_fence_install_fd(struct gk20a_fence *f);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 1f2eae1a..da9a0f5e 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -371,12 +371,11 @@ struct sync_timeline *gk20a_sync_timeline_create(
 	return &obj->obj;
 }
 
-int gk20a_sync_fence_create(struct sync_timeline *obj,
+struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *obj,
 		struct gk20a_semaphore *sema,
 		struct sync_fence *dependency,
 		const char *fmt, ...)
 {
-	int fd;
 	char name[30];
 	va_list args;
 	struct sync_pt *pt;
@@ -385,7 +384,7 @@ int gk20a_sync_fence_create(struct sync_timeline *obj,
 
 	pt = gk20a_sync_pt_create_inst(timeline, sema, dependency);
 	if (pt == NULL)
-		return -ENOMEM;
+		return NULL;
 
 	va_start(args, fmt);
 	vsnprintf(name, sizeof(name), fmt, args);
@@ -394,15 +393,7 @@ int gk20a_sync_fence_create(struct sync_timeline *obj,
 	fence = sync_fence_create(name, pt);
 	if (fence == NULL) {
 		sync_pt_free(pt);
-		return -ENOMEM;
-	}
-
-	fd = get_unused_fd();
-	if (fd < 0) {
-		sync_fence_put(fence);
-		return fd;
+		return NULL;
 	}
-
-	sync_fence_install(fence, fd);
-	return fd;
+	return fence;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 49597f06..e9c26221 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -29,7 +29,7 @@ struct gk20a_semaphore;
 struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
 void gk20a_sync_timeline_destroy(struct sync_timeline *);
 void gk20a_sync_timeline_signal(struct sync_timeline *);
-int gk20a_sync_fence_create(struct sync_timeline *,
+struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *,
 		struct gk20a_semaphore *,
 		struct sync_fence *dependency,
 		const char *fmt, ...);
-- 
cgit v1.2.2