From 15839d4763e0651e789a6511476851cccef0febb Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 3 Dec 2014 16:11:50 +0200
Subject: gpu: nvgpu: Implement gp10b context creation

Implement context creation for gp10b. GfxP contexts need per channel
buffers.

Bug 1517461

Change-Id: Ifecb59002f89f0407457730a35bfb3fe988b907a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/660236
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c            | 221 +++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h            |  19 ++-
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h |  16 ++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h         |  64 ++++++++
 drivers/gpu/nvgpu/gr_t18x.h                   |  20 +++
 5 files changed, 333 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gr_t18x.h

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 0dce115a..0a60612d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -21,6 +21,7 @@
 #include "gp10b/gr_gp10b.h"
 #include "hw_gr_gp10b.h"
 #include "hw_proj_gp10b.h"
+#include "hw_ctxsw_prog_gp10b.h"
 
 bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -297,12 +298,9 @@ clean_up:
 	return ret;
 }
 
-static void gr_gp10b_buffer_size_defaults(struct gk20a *g)
+static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
 {
-	g->gr.pagepool_default_size =
-		gr_scc_pagepool_total_pages_hwmax_value_v();
-	g->gr.pagepool_max_size =
-		gr_scc_pagepool_total_pages_hwmax_value_v();
+	return gr_scc_pagepool_total_pages_hwmax_value_v();
 }
 
 static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
@@ -473,6 +471,212 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 	}
 }
 
+static int gr_gp10b_init_ctx_state(struct gk20a *g)
+{
+	struct fecs_method_op_gk20a op = {
+		.mailbox = { .id = 0, .data = 0,
+			     .clr = ~0, .ok = 0, .fail = 0},
+		.method.data = 0,
+		.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+		.cond.fail = GR_IS_UCODE_OP_SKIP,
+		};
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = gr_gk20a_init_ctx_state(g);
+	if (err)
+		return err;
+
+	if (!g->gr.t18x.ctx_vars.preempt_image_size) {
+		op.method.addr =
+			gr_fecs_method_push_adr_discover_preemption_image_size_v();
+		op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
+		err = gr_gk20a_submit_fecs_method_op(g, op);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+					"query preempt image size failed");
+			return err;
+		}
+	}
+
+	gk20a_dbg_info("preempt image size: %u",
+		g->gr.t18x.ctx_vars.preempt_image_size);
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+}
+
+int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+			  u32 flags)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, flags);
+	if (err)
+		return err;
+
+	if (flags == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+		u32 spill_size =
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
+		u32 betacb_size = ALIGN(
+			(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() *
+			 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+			 g->gr.max_tpc_count) +
+			(g->gr.alpha_cb_size *
+			 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+			 g->gr.max_tpc_count),
+			128);
+		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+			gr_scc_pagepool_total_pages_byte_granularity_v();
+
+		err = gk20a_gmmu_alloc_map(vm, g->gr.t18x.ctx_vars.preempt_image_size,
+				&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate preempt buffer");
+			goto fail_free_gk20a_ctx;
+		}
+
+		err = gk20a_gmmu_alloc_map(vm, spill_size,
+				&(*gr_ctx)->t18x.spill_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate spill buffer");
+			goto fail_free_preempt;
+		}
+
+		err = gk20a_gmmu_alloc_map(vm, betacb_size,
+					   &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate beta buffer");
+			goto fail_free_spill;
+		}
+
+		err = gk20a_gmmu_alloc_map(vm, pagepool_size,
+					   &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate page pool");
+			goto fail_free_betacb;
+		}
+
+		(*gr_ctx)->t18x.preempt_mode = flags;
+	}
+
+	gk20a_dbg_fn("done");
+
+	return err;
+
+fail_free_betacb:
+	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+fail_free_spill:
+	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
+fail_free_preempt:
+	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+fail_free_gk20a_ctx:
+	gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
+	*gr_ctx = NULL;
+
+	return err;
+}
+
+static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+			  struct gr_ctx_desc *gr_ctx)
+{
+	gk20a_dbg_fn("");
+
+	if (!gr_ctx)
+		return;
+
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+	gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
+
+	gk20a_dbg_fn("done");
+}
+
+static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
+		struct channel_ctx_gk20a *ch_ctx,
+		void *ctx_ptr)
+{
+	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+	u32 gfxp_preempt_option =
+		ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
+	int err;
+
+	gk20a_dbg_fn("");
+
+	if (gr_ctx->t18x.preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+		gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
+		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
+				gfxp_preempt_option);
+	}
+
+	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
+		u32 addr;
+		u32 size;
+		u32 cbes_reserve;
+
+		gk20a_mem_wr32(ctx_ptr, ctxsw_prog_main_image_full_preemption_ptr_o(),
+				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
+
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+
+		addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
+			gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
+			(u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
+			 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
+
+		gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
+		g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+
+		addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
+			gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
+			(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+			 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
+		size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
+		g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+
+		addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
+			gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
+			(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+			 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
+
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
+				gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
+				true);
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_swdx_rm_spill_buffer_size_r(),
+				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(addr),
+				true);
+
+		cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpcs_swdx_beta_cb_ctrl_r(),
+				gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
+					cbes_reserve),
+				true);
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
+				gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
+					cbes_reserve),
+				true);
+
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+	}
+
+	gk20a_dbg_fn("done");
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -481,7 +685,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
 	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
 	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
-	gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;
+	gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
 	gops->gr.calc_global_ctx_buffer_size =
 		gr_gp10b_calc_global_ctx_buffer_size;
 	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
@@ -490,4 +694,9 @@ void gp10b_init_gr(struct gpu_ops *gops)
 		gr_gp10b_set_alpha_circular_buffer_size;
 	gops->gr.set_circular_buffer_size =
 		gr_gp10b_set_circular_buffer_size;
+	gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
+	gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
+	gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
+	gops->gr.update_ctxsw_preemption_mode =
+		gr_gp10b_update_ctxsw_preemption_mode;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 536a7d27..6bbda564 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -16,7 +16,7 @@
 #ifndef _NVGPU_GR_GP10B_H_
 #define _NVGPU_GR_GP10B_H_
 
-struct gk20a;
+struct gpu_ops;
 
 enum {
 	PASCAL_CHANNEL_GPFIFO_A  = 0xC06F,
@@ -32,4 +32,21 @@ enum {
 
 void gp10b_init_gr(struct gpu_ops *ops);
 
+struct gr_t18x {
+	struct {
+		u32 preempt_image_size;
+	} ctx_vars;
+};
+
+struct gr_ctx_desc_t18x {
+	int preempt_mode;
+	struct mem_desc preempt_ctxsw_buffer;
+	struct mem_desc spill_ctxsw_buffer;
+	struct mem_desc betacb_ctxsw_buffer;
+	struct mem_desc pagepool_ctxsw_buffer;
+};
+
+#define NVGPU_GR_PREEMPTION_MODE_WFI		0
+#define NVGPU_GR_PREEMPTION_MODE_GFXP		1
+
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 79890f3c..0892f12e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -238,4 +238,20 @@ static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(v
 {
 	return 0x0;
 }
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_o(void)
+{
+	return 0x00000080;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(void)
+{
+	return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_full_preemption_ptr_o(void)
+{
+	return 0x00000068;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index f2237a50..0bd707db 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -718,6 +718,10 @@ static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
 {
 	return 0x21;
 }
+static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
+{
+	return 0x0000001a;
+}
 static inline u32 gr_fecs_host_int_status_r(void)
 {
 	return 0x00409c18;
@@ -1910,6 +1914,10 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
 {
 	return 0x00030000;
 }
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(void)
+{
+	return 0x00030a00;
+}
 static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
 {
 	return 0x00000020;
@@ -2186,6 +2194,50 @@ static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_r(void)
+{
+	return 0x00500ee4;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(void)
+{
+	return 0x00000250;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_r(void)
+{
+	return 0x00500ee0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_r(void)
+{
+	return 0x00418eec;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(void)
+{
+	return 0x0041befc;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
 {
 	return 0x00418ea0 + i*4;
@@ -3342,4 +3394,16 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
 {
 	return 0x40000000;
 }
+static inline u32 gr_fe_gfxp_wfi_timeout_r(void)
+{
+	return 0x004041c0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
+{
+	return 0x0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gr_t18x.h b/drivers/gpu/nvgpu/gr_t18x.h
new file mode 100644
index 00000000..95601116
--- /dev/null
+++ b/drivers/gpu/nvgpu/gr_t18x.h
@@ -0,0 +1,20 @@
+/*
+ * NVIDIA T18x GR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _NVGPU_GR_T18X_H_
+#define _NVGPU_GR_T18X_H_
+
+#include "gp10b/gr_gp10b.h"
+
+#endif
-- 
cgit v1.2.2