From d868b654419cfa096f563c9281a2a5cc067c23db Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 19 Apr 2016 16:57:49 +0530
Subject: gpu: nvgpu: separate IOCTL to set preemption mode

Add separate IOCTL NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE
to allow setting preemption modes from UMD

Define preemption modes in nvgpu.h and use them everywhere
Remove mode definitions from mm_gk20a.h

Also, we support setting only one preemption mode in a channel
But it is possible to have multiple preemption modes (one from
graphics and one from compute) set simultaneously

Hence, update struct gr_ctx_desc to include two separate
preemption modes (graphics_preempt_mode and compute_preempt_mode)

API NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE also supports
setting two separate preemption modes i.e. one for graphics
and one for compute

Make necessary changes in code to support two preemption
modes

Bug 1646259

Change-Id: Ia1dea19e609ba8cc0de2f39ab6c0c4cd6b0a752c
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1131805
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 17 ++++++++++++
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c    |  8 +++---
 drivers/gpu/nvgpu/gk20a/gk20a.h         |  9 +++++--
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h      | 21 +++++++++------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h      |  3 ++-
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c      |  4 +--
 include/trace/events/gk20a.h            | 47 ++++++++++++++++++++++-----------
 include/uapi/linux/nvgpu.h              | 14 +++++++++-
 8 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 189ec330..990972e4 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -3208,6 +3208,23 @@ long gk20a_channel_ioctl(struct file *filp,
 
 		trace_gk20a_channel_set_timeslice(GK20A_TP_ARGS_SCHED(ch));
 		break;
+	case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
+		if (ch->g->ops.gr.set_preemption_mode) {
+			err = gk20a_busy(dev);
+			if (err) {
+				dev_err(dev,
+					"%s: failed to host gk20a for ioctl cmd: 0x%x",
+					__func__, cmd);
+				break;
+			}
+			err = ch->g->ops.gr.set_preemption_mode(ch,
+			     ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
+			     ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
+			gk20a_idle(dev);
+		} else {
+			err = -EINVAL;
+		}
+		break;
 	default:
 		dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index eccea4d4..dc3debf2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -2599,7 +2599,7 @@ static int gk20a_fifo_sched_debugfs_seq_show(
 	runlist = &f->runlist_info[runlist_id];
 
 	if (ch == f->channel) {
-		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave preempt\n");
+		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
 		seq_puts(s, "                            (usecs)   (msecs)\n");
 		ret = 0;
 	}
@@ -2611,15 +2611,15 @@ static int gk20a_fifo_sched_debugfs_seq_show(
 		if (gk20a_is_channel_marked_as_tsg(ch))
 			tsg = &f->tsg[ch->tsgid];
 
-		seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d\n",
+		seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
 				ch->hw_chid,
 				ch->tsgid,
 				ch->pid,
 				tsg ? tsg->timeslice_us : ch->timeslice_us,
 				ch->timeout_ms_max,
 				ch->interleave_level,
-				ch->ch_ctx.gr_ctx ?
-					ch->ch_ctx.gr_ctx->preempt_mode : -1);
+				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : -1,
+				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : -1);
 		gk20a_channel_put(ch);
 	}
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ebc18d5d..64e410db 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -254,6 +254,9 @@ struct gpu_ops {
 		int (*suspend_contexts)(struct gk20a *g,
 				struct dbg_session_gk20a *dbg_s,
 				int *ctx_resident_ch_fd);
+		int (*set_preemption_mode)(struct channel_gk20a *ch,
+				u32 graphics_preempt_mode,
+				u32 compute_preempt_mode);
 	} gr;
 	const char *name;
 	struct {
@@ -1098,7 +1101,9 @@ static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch)
 		tsg_gk20a_from_ch(ch)->timeslice_us : ch->timeslice_us, \
 	ch->timeout_ms_max,                                             \
 	gk20a_fifo_interleave_level_name(ch->interleave_level),         \
-	gr_gk20a_preempt_mode_name(ch->ch_ctx.gr_ctx ? 			\
-		ch->ch_ctx.gr_ctx->preempt_mode : 0)
+	gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ?		\
+		ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0),		\
+	gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ? 		\
+		ch->ch_ctx.gr_ctx->compute_preempt_mode : 0)
 
 #endif /* GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 24123eea..10997c17 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -628,18 +628,23 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
 			      struct dbg_session_gk20a *dbg_s,
 			      int *ctx_resident_ch_fd);
 
-#define NVGPU_GR_PREEMPTION_MODE_WFI            0
-#define NVGPU_GR_PREEMPTION_MODE_CTA            2
-
-static inline const char *gr_gk20a_preempt_mode_name(u32 preempt_mode)
+static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
 {
-	switch (preempt_mode) {
-	case NVGPU_GR_PREEMPTION_MODE_WFI:
+	switch (graphics_preempt_mode) {
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
 		return "WFI";
+	default:
+		return "?";
+	}
+}
 
-	case NVGPU_GR_PREEMPTION_MODE_CTA:
+static inline const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode)
+{
+	switch (compute_preempt_mode) {
+	case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
+		return "WFI";
+	case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
 		return "CTA";
-
 	default:
 		return "?";
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 29f32d8f..7fa0b7fb 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -94,7 +94,8 @@ struct gr_ctx_buffer_desc {
 struct gr_ctx_desc {
 	struct mem_desc mem;
 
-	int preempt_mode;
+	int graphics_preempt_mode;
+	int compute_preempt_mode;
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 	struct gr_ctx_desc_t18x t18x;
 #endif
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 35bbe70c..dbe30f00 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -824,7 +824,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
 		return err;
 
 	if (class == MAXWELL_COMPUTE_B)
-		(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
+		(*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
 
 	gk20a_dbg_fn("done");
 
@@ -841,7 +841,7 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
 
 	gk20a_dbg_fn("");
 
-	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CTA) {
+	if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
 		gk20a_dbg_info("CTA: %x", cta_preempt_option);
 		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0,
 				cta_preempt_option);
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index fc5cb872..ba71dda2 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -192,9 +192,11 @@ DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put_nofree,
 
 DECLARE_EVENT_CLASS(gk20a_channel_sched_params,
 	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave, const char *preempt_mode),
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
 	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
-		interleave, preempt_mode),
+		interleave, graphics_preempt_mode, compute_preempt_mode),
 	TP_STRUCT__entry(
 		__field(int, chid)
 		__field(int, tsgid)
@@ -202,7 +204,8 @@ DECLARE_EVENT_CLASS(gk20a_channel_sched_params,
 		__field(u32, timeslice)
 		__field(u32, timeout)
 		__field(const char *, interleave)	/* no need to copy */
-		__field(const char *, preempt_mode)	/* no need to copy */
+		__field(const char *, graphics_preempt_mode)	/* no need to copy */
+		__field(const char *, compute_preempt_mode)	/* no need to copy */
 	),
 	TP_fast_assign(
 		__entry->chid = chid;
@@ -211,47 +214,59 @@ DECLARE_EVENT_CLASS(gk20a_channel_sched_params,
 		__entry->timeslice = timeslice;
 		__entry->timeout = timeout;
 		__entry->interleave = interleave;
-		__entry->preempt_mode = preempt_mode;
+		__entry->graphics_preempt_mode = graphics_preempt_mode;
+		__entry->compute_preempt_mode = compute_preempt_mode;
 	),
-	TP_printk("chid=%d tsgid=%d pid=%d timeslice=%u timeout=%u interleave=%s preempt=%s",
+	TP_printk("chid=%d tsgid=%d pid=%d timeslice=%u timeout=%u interleave=%s graphics_preempt=%s compute_preempt=%s",
 		__entry->chid, __entry->tsgid, __entry->pid,
 		__entry->timeslice, __entry->timeout,
-		__entry->interleave, __entry->preempt_mode)
+		__entry->interleave, __entry->graphics_preempt_mode,
+		__entry->compute_preempt_mode)
 );
 
 DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_sched_defaults,
 	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave, const char *preempt_mode),
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
 	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
-		interleave, preempt_mode)
+		interleave, graphics_preempt_mode, compute_preempt_mode)
 );
 
 DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_priority,
 	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave, const char *preempt_mode),
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
 	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
-		interleave, preempt_mode)
+		interleave, graphics_preempt_mode, compute_preempt_mode)
 );
 
 DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_runlist_interleave,
 	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave, const char *preempt_mode),
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
 	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
-		interleave, preempt_mode)
+		interleave, graphics_preempt_mode, compute_preempt_mode)
 );
 
 DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_timeslice,
 	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave, const char *preempt_mode),
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
 	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
-		interleave, preempt_mode)
+		interleave, graphics_preempt_mode, compute_preempt_mode)
 );
 
 DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_timeout,
 	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave, const char *preempt_mode),
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
 	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
-		interleave, preempt_mode)
+		interleave, graphics_preempt_mode, compute_preempt_mode)
 );
 
 TRACE_EVENT(gk20a_push_cmdbuf,
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index b8bd21f2..8ebe8d06 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -999,6 +999,16 @@ struct nvgpu_event_id_ctrl_args {
 
 #define NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE		1
 
+struct nvgpu_preemption_mode_args {
+/* only one should be enabled at a time */
+#define NVGPU_GRAPHICS_PREEMPTION_MODE_WFI		BIT(0)
+	__u32 graphics_preempt_mode; /* in */
+
+/* only one should be enabled at a time */
+#define NVGPU_COMPUTE_PREEMPTION_MODE_WFI		BIT(0)
+#define NVGPU_COMPUTE_PREEMPTION_MODE_CTA		BIT(1)
+	__u32 compute_preempt_mode; /* in */
+};
 
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD	\
 	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
@@ -1046,9 +1056,11 @@ struct nvgpu_event_id_ctrl_args {
 	_IOW(NVGPU_IOCTL_MAGIC, 120, struct nvgpu_runlist_interleave_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMESLICE \
 	_IOW(NVGPU_IOCTL_MAGIC, 121, struct nvgpu_timeslice_args)
+#define NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE \
+	_IOW(NVGPU_IOCTL_MAGIC, 122, struct nvgpu_preemption_mode_args)
 
 #define NVGPU_IOCTL_CHANNEL_LAST	\
-	_IOC_NR(NVGPU_IOCTL_CHANNEL_SET_TIMESLICE)
+	_IOC_NR(NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_submit_gpfifo_args)
 
 /*
-- 
cgit v1.2.2