From ba2e59dc41f593bb011e0ec58c969337a35f4cf1 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Wed, 22 Nov 2017 15:57:11 +0200
Subject: gpu: nvgpu: use submit callback only in linux code

Move the implementation for channel job update callbacks that is based
on Linux specific work_struct usage to Linux-specific code.

This requires a bit of extra work for allocating OS-specific priv data
for channels which is also done in this patch. The priv data will be
used more when more OS-specific features are moved.

Jira NVGPU-259

Change-Id: I24bc0148a827f375b56a1c96044685affc2d1e8c
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1589321
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/channel.c  | 157 ++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/common/linux/channel.h  |  32 ++++++
 drivers/gpu/nvgpu/common/linux/module.c   |  26 +++++
 drivers/gpu/nvgpu/common/linux/os_linux.h |   2 +
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c   |  57 ++---------
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h   |  21 +---
 drivers/gpu/nvgpu/gk20a/gk20a.h           |   7 ++
 7 files changed, 236 insertions(+), 66 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 40b11b86..8366ed88 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -27,6 +27,9 @@
 
 #include "gk20a/gk20a.h"
 
+#include "channel.h"
+#include "os_linux.h"
+
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
 
 #include <linux/uaccess.h>
@@ -34,6 +37,160 @@
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>
 
+static void gk20a_channel_update_runcb_fn(struct work_struct *work)
+{
+	struct nvgpu_channel_completion_cb *completion_cb =
+		container_of(work, struct nvgpu_channel_completion_cb, work);
+	struct nvgpu_channel_linux *priv =
+		container_of(completion_cb,
+				struct nvgpu_channel_linux, completion_cb);
+	struct channel_gk20a *ch = priv->ch;
+	void (*fn)(struct channel_gk20a *, void *);
+	void *user_data;
+
+	nvgpu_spinlock_acquire(&completion_cb->lock);
+	fn = completion_cb->fn;
+	user_data = completion_cb->user_data;
+	nvgpu_spinlock_release(&completion_cb->lock);
+
+	if (fn)
+		fn(ch, user_data);
+}
+
+static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	priv->completion_cb.fn = NULL;
+	priv->completion_cb.user_data = NULL;
+	nvgpu_spinlock_init(&priv->completion_cb.lock);
+	INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
+}
+
+static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_spinlock_acquire(&priv->completion_cb.lock);
+	priv->completion_cb.fn = NULL;
+	priv->completion_cb.user_data = NULL;
+	nvgpu_spinlock_release(&priv->completion_cb.lock);
+	cancel_work_sync(&priv->completion_cb.work);
+}
+
+static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	if (priv->completion_cb.fn)
+		schedule_work(&priv->completion_cb.work);
+}
+
+static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	if (priv->completion_cb.fn)
+		cancel_work_sync(&priv->completion_cb.work);
+}
+
+struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
+		void (*update_fn)(struct channel_gk20a *, void *),
+		void *update_fn_data,
+		int runlist_id,
+		bool is_privileged_channel)
+{
+	struct channel_gk20a *ch;
+	struct nvgpu_channel_linux *priv;
+
+	ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
+
+	if (ch) {
+		priv = ch->os_priv;
+		nvgpu_spinlock_acquire(&priv->completion_cb.lock);
+		priv->completion_cb.fn = update_fn;
+		priv->completion_cb.user_data = update_fn_data;
+		nvgpu_spinlock_release(&priv->completion_cb.lock);
+	}
+
+	return ch;
+}
+
+static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
+{
+}
+
+static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
+{
+	nvgpu_channel_work_completion_clear(ch);
+}
+
+static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv;
+
+	priv = nvgpu_kzalloc(g, sizeof(*priv));
+	if (!priv)
+		return -ENOMEM;
+
+	ch->os_priv = priv;
+	priv->ch = ch;
+
+	nvgpu_channel_work_completion_init(ch);
+
+	return 0;
+}
+
+static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
+{
+	nvgpu_kfree(g, ch->os_priv);
+}
+
+int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	struct fifo_gk20a *f = &g->fifo;
+	int chid;
+	int err;
+
+	for (chid = 0; chid < (int)f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		err = nvgpu_channel_alloc_linux(g, ch);
+		if (err)
+			goto err_clean;
+	}
+
+	g->os_channel.open = nvgpu_channel_open_linux;
+	g->os_channel.close = nvgpu_channel_close_linux;
+	g->os_channel.work_completion_signal =
+		nvgpu_channel_work_completion_signal;
+	g->os_channel.work_completion_cancel_sync =
+		nvgpu_channel_work_completion_cancel_sync;
+	return 0;
+
+err_clean:
+	for (; chid >= 0; chid--) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		nvgpu_channel_free_linux(g, ch);
+	}
+	return err;
+}
+
+void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	struct fifo_gk20a *f = &g->fifo;
+	unsigned int chid;
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		nvgpu_channel_free_linux(g, ch);
+	}
+}
+
 u32 nvgpu_get_gpfifo_entry_size(void)
 {
 	return sizeof(struct nvgpu_gpfifo);
diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h
index 785c03d6..ba1935f3 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ b/drivers/gpu/nvgpu/common/linux/channel.h
@@ -16,6 +16,8 @@
 #ifndef __NVGPU_CHANNEL_H__
 #define __NVGPU_CHANNEL_H__
 
+#include <linux/workqueue.h>
+
 #include <nvgpu/types.h>
 
 struct channel_gk20a;
@@ -24,6 +26,36 @@ struct nvgpu_submit_gpfifo_args;
 struct nvgpu_fence;
 struct gk20a_fence;
 struct fifo_profile_gk20a;
+struct nvgpu_os_linux;
+
+struct nvgpu_channel_completion_cb {
+	/*
+	 * Signal channel owner via a callback, if set, in job cleanup with
+	 * schedule_work. Means that something finished on the channel (perhaps
+	 * more than one job).
+	 */
+	void (*fn)(struct channel_gk20a *, void *);
+	void *user_data;
+	/* Make access to the two above atomic */
+	struct nvgpu_spinlock lock;
+	/* Per-channel async work task, cannot reschedule itself */
+	struct work_struct work;
+};
+
+struct nvgpu_channel_linux {
+	struct channel_gk20a *ch;
+
+	struct nvgpu_channel_completion_cb completion_cb;
+};
+
+int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
+void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
+
+struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
+		void (*update_fn)(struct channel_gk20a *, void *),
+		void *update_fn_data,
+		int runlist_id,
+		bool is_privileged_channel);
 
 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 				struct nvgpu_gpfifo *gpfifo,
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 796507a9..5012d8b5 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -59,6 +59,7 @@
 #include "cde_gp10b.h"
 #include "ctxsw_trace.h"
 #include "driver_common.h"
+#include "channel.h"
 
 #define CLASS_NAME "nvidia-gpu"
 /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
@@ -185,6 +186,25 @@ static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l)
 	return 0;
 }
 
+static int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	int err;
+
+	if (l->init_done)
+		return 0;
+
+	err = nvgpu_init_channel_support_linux(l);
+	if (err) {
+		nvgpu_err(g, "failed to init linux channel support");
+		return err;
+	}
+
+	l->init_done = true;
+
+	return 0;
+}
+
 int gk20a_pm_finalize_poweron(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
@@ -227,6 +247,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
 	if (err)
 		goto done;
 
+	err = nvgpu_finalize_poweron_linux(l);
+	if (err)
+		goto done;
+
 	trace_gk20a_finalize_poweron_done(dev_name(dev));
 
 	err = nvgpu_init_os_linux_ops(l);
@@ -596,6 +620,8 @@ void gk20a_remove_support(struct gk20a *g)
 
 	nvgpu_kfree(g, g->dbg_regops_tmp_buf);
 
+	nvgpu_remove_channel_support_linux(l);
+
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
 
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h
index 07be7edc..9b95ed84 100644
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/os_linux.h
@@ -149,6 +149,8 @@ struct nvgpu_os_linux {
 	struct rw_semaphore busy_lock;
 
 	struct gk20a_sched_ctrl sched_ctrl;
+
+	bool init_done;
 };
 
 static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 4be232f1..e01d6cdb 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -444,6 +444,9 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 
 	trace_gk20a_free_channel(ch->chid);
 
+	if (g->os_channel.close)
+		g->os_channel.close(ch);
+
 	/*
 	 * Disable channel/TSG and unbind here. This should not be executed if
 	 * HW access is not available during shutdown/removal path as it will
@@ -561,12 +564,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	 */
 	nvgpu_vm_put(ch_vm);
 
-	nvgpu_spinlock_acquire(&ch->update_fn_lock);
-	ch->update_fn = NULL;
-	ch->update_fn_data = NULL;
-	nvgpu_spinlock_release(&ch->update_fn_lock);
-	cancel_work_sync(&ch->update_fn_work);
-
 	/* make sure we don't have deferred interrupts pending that
 	 * could still touch the channel */
 	nvgpu_wait_for_deferred_interrupts(g);
@@ -756,40 +753,6 @@ void __gk20a_channel_kill(struct channel_gk20a *ch)
 	gk20a_free_channel(ch, true);
 }
 
-static void gk20a_channel_update_runcb_fn(struct work_struct *work)
-{
-	struct channel_gk20a *ch =
-		container_of(work, struct channel_gk20a, update_fn_work);
-	void (*update_fn)(struct channel_gk20a *, void *);
-	void *update_fn_data;
-
-	nvgpu_spinlock_acquire(&ch->update_fn_lock);
-	update_fn = ch->update_fn;
-	update_fn_data = ch->update_fn_data;
-	nvgpu_spinlock_release(&ch->update_fn_lock);
-
-	if (update_fn)
-		update_fn(ch, update_fn_data);
-}
-
-struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
-		void (*update_fn)(struct channel_gk20a *, void *),
-		void *update_fn_data,
-		int runlist_id,
-		bool is_privileged_channel)
-{
-	struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
-
-	if (ch) {
-		nvgpu_spinlock_acquire(&ch->update_fn_lock);
-		ch->update_fn = update_fn;
-		ch->update_fn_data = update_fn_data;
-		nvgpu_spinlock_release(&ch->update_fn_lock);
-	}
-
-	return ch;
-}
-
 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 		s32 runlist_id,
 		bool is_privileged_channel)
@@ -872,10 +835,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 	nvgpu_cond_init(&ch->notifier_wq);
 	nvgpu_cond_init(&ch->semaphore_wq);
 
-	ch->update_fn = NULL;
-	ch->update_fn_data = NULL;
-	nvgpu_spinlock_init(&ch->update_fn_lock);
-	INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
+	if (g->os_channel.open)
+		g->os_channel.open(ch);
 
 	/* Mark the channel alive, get-able, with 1 initial use
 	 * references. The initial reference will be decreased in
@@ -2120,8 +2081,8 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 
 	nvgpu_mutex_release(&c->joblist.cleanup_lock);
 
-	if (job_finished && c->update_fn)
-		schedule_work(&c->update_fn_work);
+	if (job_finished && g->os_channel.work_completion_signal)
+		g->os_channel.work_completion_signal(c);
 
 	gk20a_channel_put(c);
 }
@@ -2322,8 +2283,8 @@ int gk20a_channel_suspend(struct gk20a *g)
 			/* preempt the channel */
 			gk20a_fifo_preempt(g, ch);
 			/* wait for channel update notifiers */
-			if (ch->update_fn)
-				cancel_work_sync(&ch->update_fn_work);
+			if (g->os_channel.work_completion_cancel_sync)
+				g->os_channel.work_completion_cancel_sync(ch);
 
 			channels_in_use = true;
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index d865849b..8c9095b2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -24,9 +24,6 @@
 #ifndef CHANNEL_GK20A_H
 #define CHANNEL_GK20A_H
 
-/* TODO: To be removed when work_struct update_fn_work is moved out of common code */
-#include <linux/workqueue.h>
-
 #include <nvgpu/list.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/timers.h>
@@ -288,16 +285,6 @@ struct channel_gk20a {
 	u64 virt_ctx;
 #endif
 
-	/*
-	 * Signal channel owner via a callback, if set, in job cleanup with
-	 * schedule_work. Means that something finished on the channel (perhaps
-	 * more than one job).
-	 */
-	void (*update_fn)(struct channel_gk20a *, void *);
-	void *update_fn_data;
-	struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */
-	struct work_struct update_fn_work;
-
 	u32 interleave_level;
 
 	u32 runlist_id;
@@ -306,6 +293,9 @@ struct channel_gk20a {
 #ifdef CONFIG_TEGRA_19x_GPU
 	struct channel_t19x t19x;
 #endif
+
+	/* Any operating system specific data. */
+	void *os_priv;
 };
 
 static inline struct channel_gk20a *
@@ -382,11 +372,6 @@ int gk20a_wait_channel_idle(struct channel_gk20a *ch);
 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 		s32 runlist_id,
 		bool is_privileged_channel);
-struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
-		void (*update_fn)(struct channel_gk20a *, void *),
-		void *update_fn_data,
-		int runlist_id,
-		bool is_privileged_channel);
 
 int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
 		unsigned int num_entries,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8d6db4c7..11a99bff 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1255,6 +1255,13 @@ struct gk20a {
 		struct nvgpu_mutex start_lock;
 	} channel_worker;
 
+	struct {
+		void (*open)(struct channel_gk20a *ch);
+		void (*close)(struct channel_gk20a *ch);
+		void (*work_completion_signal)(struct channel_gk20a *ch);
+		void (*work_completion_cancel_sync)(struct channel_gk20a *ch);
+	} os_channel;
+
 	struct gk20a_scale_profile *scale_profile;
 	unsigned long last_freq;
 
-- 
cgit v1.2.2