gpu: nvgpu: Implement gp10b context creation

Implement context creation for gp10b. GfxP contexts need per channel buffers. Bug 1517461 Change-Id: Ifecb59002f89f0407457730a35bfb3fe988b907a Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/660236
author: Terje Bergstrom <tbergstrom@nvidia.com> 2014-12-03 09:11:50 -0500
committer: Deepak Nibade <dnibade@nvidia.com> 2016-12-27 04:52:03 -0500
commit: 15839d4763e0651e789a6511476851cccef0febb (patch)
tree: 0e0a6d52c478077049c0c51d1abf6bfdcccffacf /drivers/gpu/nvgpu
parent: 945e5e6832bd2461b9eafa61e8dd06b793a6f6b9 (diff)
5 files changed, 333 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 0dce115a..0a60612d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -21,6 +21,7 @@
 #include "gp10b/gr_gp10b.h"
 #include "hw_gr_gp10b.h"
 #include "hw_proj_gp10b.h"
+#include "hw_ctxsw_prog_gp10b.h"
 bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -297,12 +298,9 @@ clean_up:
        return ret;
 }
-static void gr_gp10b_buffer_size_defaults(struct gk20a *g)
+static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
 {
-        g->gr.pagepool_default_size =
+        return gr_scc_pagepool_total_pages_hwmax_value_v();
-                gr_scc_pagepool_total_pages_hwmax_value_v();
-        g->gr.pagepool_max_size =
-                gr_scc_pagepool_total_pages_hwmax_value_v();
 }
 static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
@@ -473,6 +471,212 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
        }
 }
+static int gr_gp10b_init_ctx_state(struct gk20a *g)
+{
+        struct fecs_method_op_gk20a op = {
+                .mailbox = { .id = 0, .data = 0,
+                             .clr = ~0, .ok = 0, .fail = 0},
+                .method.data = 0,
+                .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+                .cond.fail = GR_IS_UCODE_OP_SKIP,
+                };
+        int err;
+        gk20a_dbg_fn("");
+        err = gr_gk20a_init_ctx_state(g);
+        if (err)
+                return err;
+        if (!g->gr.t18x.ctx_vars.preempt_image_size) {
+                op.method.addr =
+                        gr_fecs_method_push_adr_discover_preemption_image_size_v();
+                op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
+                err = gr_gk20a_submit_fecs_method_op(g, op);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(g),
+                                        "query preempt image size failed");
+                        return err;
+                }
+        }
+        gk20a_dbg_info("preempt image size: %u",
+                g->gr.t18x.ctx_vars.preempt_image_size);
+        gk20a_dbg_fn("done");
+        return 0;
+}
+int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+                          struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+                          u32 flags)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, flags);
+        if (err)
+                return err;
+        if (flags == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+                u32 spill_size =
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
+                u32 betacb_size = ALIGN(
+                        (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() *
+                         gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+                         g->gr.max_tpc_count) +
+                        (g->gr.alpha_cb_size *
+                         gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+                         g->gr.max_tpc_count),
+                        128);
+                u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+                        gr_scc_pagepool_total_pages_byte_granularity_v();
+                err = gk20a_gmmu_alloc_map(vm, g->gr.t18x.ctx_vars.preempt_image_size,
+                                &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(vm->mm->g),
+                                  "cannot allocate preempt buffer");
+                        goto fail_free_gk20a_ctx;
+                }
+                err = gk20a_gmmu_alloc_map(vm, spill_size,
+                                &(*gr_ctx)->t18x.spill_ctxsw_buffer);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(vm->mm->g),
+                                  "cannot allocate spill buffer");
+                        goto fail_free_preempt;
+                }
+                err = gk20a_gmmu_alloc_map(vm, betacb_size,
+                                           &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(vm->mm->g),
+                                  "cannot allocate beta buffer");
+                        goto fail_free_spill;
+                }
+                err = gk20a_gmmu_alloc_map(vm, pagepool_size,
+                                           &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(vm->mm->g),
+                                  "cannot allocate page pool");
+                        goto fail_free_betacb;
+                }
+                (*gr_ctx)->t18x.preempt_mode = flags;
+        }
+        gk20a_dbg_fn("done");
+        return err;
+fail_free_betacb:
+        gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+fail_free_spill:
+        gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
+fail_free_preempt:
+        gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+fail_free_gk20a_ctx:
+        gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
+        *gr_ctx = NULL;
+        return err;
+}
+static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+                          struct gr_ctx_desc *gr_ctx)
+{
+        gk20a_dbg_fn("");
+        if (!gr_ctx)
+                return;
+        gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
+        gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
+        gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
+        gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+        gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
+        gk20a_dbg_fn("done");
+}
+static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
+                struct channel_ctx_gk20a *ch_ctx,
+                void *ctx_ptr)
+{
+        struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+        u32 gfxp_preempt_option =
+                ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
+        int err;
+        gk20a_dbg_fn("");
+        if (gr_ctx->t18x.preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+                gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
+                gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
+                                gfxp_preempt_option);
+        }
+        if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
+                u32 addr;
+                u32 size;
+                u32 cbes_reserve;
+                gk20a_mem_wr32(ctx_ptr, ctxsw_prog_main_image_full_preemption_ptr_o(),
+                                gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+                addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
+                        gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
+                        (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
+                gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
+                g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+                addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
+                        gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
+                        (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
+                size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
+                g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+                addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
+                        gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
+                        (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_swdx_rm_spill_buffer_addr_r(),
+                                gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_swdx_rm_spill_buffer_size_r(),
+                                gr_gpc0_swdx_rm_spill_buffer_size_256b_f(addr),
+                                true);
+                cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_swdx_beta_cb_ctrl_r(),
+                                gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
+                                        cbes_reserve),
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
+                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
+                                        cbes_reserve),
+                                true);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+        }
+        gk20a_dbg_fn("done");
+}
 void gp10b_init_gr(struct gpu_ops *gops)
 {
        gm20b_init_gr(gops);
@@ -481,7 +685,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
        gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
        gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
        gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
-        gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;
+        gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
        gops->gr.calc_global_ctx_buffer_size =
                gr_gp10b_calc_global_ctx_buffer_size;
        gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
@@ -490,4 +694,9 @@ void gp10b_init_gr(struct gpu_ops *gops)
                gr_gp10b_set_alpha_circular_buffer_size;
        gops->gr.set_circular_buffer_size =
                gr_gp10b_set_circular_buffer_size;
+        gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
+        gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
+        gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
+        gops->gr.update_ctxsw_preemption_mode =
+                gr_gp10b_update_ctxsw_preemption_mode;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 536a7d27..6bbda564 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -16,7 +16,7 @@
 #ifndef _NVGPU_GR_GP10B_H_
 #define _NVGPU_GR_GP10B_H_
-struct gk20a;
+struct gpu_ops;
 enum {
        PASCAL_CHANNEL_GPFIFO_A  = 0xC06F,
@@ -32,4 +32,21 @@ enum {
 void gp10b_init_gr(struct gpu_ops *ops);
+struct gr_t18x {
+        struct {
+                u32 preempt_image_size;
+        } ctx_vars;
+};
+struct gr_ctx_desc_t18x {
+        int preempt_mode;
+        struct mem_desc preempt_ctxsw_buffer;
+        struct mem_desc spill_ctxsw_buffer;
+        struct mem_desc betacb_ctxsw_buffer;
+        struct mem_desc pagepool_ctxsw_buffer;
+};
+#define NVGPU_GR_PREEMPTION_MODE_WFI            0
+#define NVGPU_GR_PREEMPTION_MODE_GFXP           1
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 79890f3c..0892f12e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -238,4 +238,20 @@ static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(v
 {
        return 0x0;
 }
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_o(void)
+{
+        return 0x00000080;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_f(u32 v)
+{
+        return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(void)
+{
+        return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_full_preemption_ptr_o(void)
+{
+        return 0x00000068;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index f2237a50..0bd707db 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -718,6 +718,10 @@ static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
 {
        return 0x21;
 }
+static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
+{
+        return 0x0000001a;
+}
 static inline u32 gr_fecs_host_int_status_r(void)
 {
        return 0x00409c18;
@@ -1910,6 +1914,10 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
 {
        return 0x00030000;
 }
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(void)
+{
+        return 0x00030a00;
+}
 static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
 {
        return 0x00000020;
@@ -2186,6 +2194,50 @@ static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_f(void)
 {
        return 0x80000000;
 }
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_r(void)
+{
+        return 0x00500ee4;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(u32 v)
+{
+        return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(void)
+{
+        return 0x00000250;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_r(void)
+{
+        return 0x00500ee0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(u32 v)
+{
+        return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v(void)
+{
+        return 0x00000008;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_r(void)
+{
+        return 0x00418eec;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+        return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(void)
+{
+        return 0x00000100;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(void)
+{
+        return 0x0041befc;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+        return (v & 0xfff) << 0;
+}
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
 {
        return 0x00418ea0 + i*4;
@@ -3342,4 +3394,16 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
 {
        return 0x40000000;
 }
+static inline u32 gr_fe_gfxp_wfi_timeout_r(void)
+{
+        return 0x004041c0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_f(u32 v)
+{
+        return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
+{
+        return 0x0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gr_t18x.h b/drivers/gpu/nvgpu/gr_t18x.h
new file mode 100644
index 00000000..95601116
--- /dev/null
+++ b/drivers/gpu/nvgpu/gr_t18x.h
@@ -0,0 +1,20 @@
+/*
+ * NVIDIA T18x GR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _NVGPU_GR_T18X_H_
+#define _NVGPU_GR_T18X_H_
+#include "gp10b/gr_gp10b.h"
+#endif
author	Terje Bergstrom <tbergstrom@nvidia.com>	2014-12-03 09:11:50 -0500
committer	Deepak Nibade <dnibade@nvidia.com>	2016-12-27 04:52:03 -0500
commit	15839d4763e0651e789a6511476851cccef0febb (patch)
tree	0e0a6d52c478077049c0c51d1abf6bfdcccffacf /drivers/gpu/nvgpu
parent	945e5e6832bd2461b9eafa61e8dd06b793a6f6b9 (diff)

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 0dce115a..0a60612d 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -21,6 +21,7 @@
21	#include "gp10b/gr_gp10b.h"	21	#include "gp10b/gr_gp10b.h"
22	#include "hw_gr_gp10b.h"	22	#include "hw_gr_gp10b.h"
23	#include "hw_proj_gp10b.h"	23	#include "hw_proj_gp10b.h"
		24	#include "hw_ctxsw_prog_gp10b.h"
24		25
25	bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)	26	bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
26	{	27	{
@@ -297,12 +298,9 @@ clean_up:
297	return ret;	298	return ret;
298	}	299	}
299		300
300	static void gr_gp10b_buffer_size_defaults(struct gk20a *g)	301	static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
301	{	302	{
302	g->gr.pagepool_default_size =	303	return gr_scc_pagepool_total_pages_hwmax_value_v();
303	gr_scc_pagepool_total_pages_hwmax_value_v();
304	g->gr.pagepool_max_size =
305	gr_scc_pagepool_total_pages_hwmax_value_v();
306	}	304	}
307		305
308	static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)	306	static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
@@ -473,6 +471,212 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
473	}	471	}
474	}	472	}
475		473
		474	static int gr_gp10b_init_ctx_state(struct gk20a *g)
		475	{
		476	struct fecs_method_op_gk20a op = {
		477	.mailbox = { .id = 0, .data = 0,
		478	.clr = ~0, .ok = 0, .fail = 0},
		479	.method.data = 0,
		480	.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
		481	.cond.fail = GR_IS_UCODE_OP_SKIP,
		482	};
		483	int err;
		484
		485	gk20a_dbg_fn("");
		486
		487	err = gr_gk20a_init_ctx_state(g);
		488	if (err)
		489	return err;
		490
		491	if (!g->gr.t18x.ctx_vars.preempt_image_size) {
		492	op.method.addr =
		493	gr_fecs_method_push_adr_discover_preemption_image_size_v();
		494	op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
		495	err = gr_gk20a_submit_fecs_method_op(g, op);
		496	if (err) {
		497	gk20a_err(dev_from_gk20a(g),
		498	"query preempt image size failed");
		499	return err;
		500	}
		501	}
		502
		503	gk20a_dbg_info("preempt image size: %u",
		504	g->gr.t18x.ctx_vars.preempt_image_size);
		505
		506	gk20a_dbg_fn("done");
		507
		508	return 0;
		509	}
		510
		511	int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
		512	struct gr_ctx_desc *gr_ctx, struct vm_gk20a vm,
		513	u32 flags)
		514	{
		515	int err;
		516
		517	gk20a_dbg_fn("");
		518
		519	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, flags);
		520	if (err)
		521	return err;
		522
		523	if (flags == NVGPU_GR_PREEMPTION_MODE_GFXP) {
		524	u32 spill_size =
		525	gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
		526	u32 betacb_size = ALIGN(
		527	(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() *
		528	gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
		529	g->gr.max_tpc_count) +
		530	(g->gr.alpha_cb_size *
		531	gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
		532	g->gr.max_tpc_count),
		533	128);
		534	u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
		535	gr_scc_pagepool_total_pages_byte_granularity_v();
		536
		537	err = gk20a_gmmu_alloc_map(vm, g->gr.t18x.ctx_vars.preempt_image_size,
		538	&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
		539	if (err) {
		540	gk20a_err(dev_from_gk20a(vm->mm->g),
		541	"cannot allocate preempt buffer");
		542	goto fail_free_gk20a_ctx;
		543	}
		544
		545	err = gk20a_gmmu_alloc_map(vm, spill_size,
		546	&(*gr_ctx)->t18x.spill_ctxsw_buffer);
		547	if (err) {
		548	gk20a_err(dev_from_gk20a(vm->mm->g),
		549	"cannot allocate spill buffer");
		550	goto fail_free_preempt;
		551	}
		552
		553	err = gk20a_gmmu_alloc_map(vm, betacb_size,
		554	&(*gr_ctx)->t18x.betacb_ctxsw_buffer);
		555	if (err) {
		556	gk20a_err(dev_from_gk20a(vm->mm->g),
		557	"cannot allocate beta buffer");
		558	goto fail_free_spill;
		559	}
		560
		561	err = gk20a_gmmu_alloc_map(vm, pagepool_size,
		562	&(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
		563	if (err) {
		564	gk20a_err(dev_from_gk20a(vm->mm->g),
		565	"cannot allocate page pool");
		566	goto fail_free_betacb;
		567	}
		568
		569	(*gr_ctx)->t18x.preempt_mode = flags;
		570	}
		571
		572	gk20a_dbg_fn("done");
		573
		574	return err;
		575
		576	fail_free_betacb:
		577	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
		578	fail_free_spill:
		579	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
		580	fail_free_preempt:
		581	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
		582	fail_free_gk20a_ctx:
		583	gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
		584	*gr_ctx = NULL;
		585
		586	return err;
		587	}
		588
		589	static void gr_gp10b_free_gr_ctx(struct gk20a g, struct vm_gk20a vm,
		590	struct gr_ctx_desc *gr_ctx)
		591	{
		592	gk20a_dbg_fn("");
		593
		594	if (!gr_ctx)
		595	return;
		596
		597	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
		598	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
		599	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
		600	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
		601	gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
		602
		603	gk20a_dbg_fn("done");
		604	}
		605
		606	static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
		607	struct channel_ctx_gk20a *ch_ctx,
		608	void *ctx_ptr)
		609	{
		610	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
		611	u32 gfxp_preempt_option =
		612	ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
		613	int err;
		614
		615	gk20a_dbg_fn("");
		616
		617	if (gr_ctx->t18x.preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
		618	gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
		619	gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
		620	gfxp_preempt_option);
		621	}
		622
		623	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
		624	u32 addr;
		625	u32 size;
		626	u32 cbes_reserve;
		627
		628	gk20a_mem_wr32(ctx_ptr, ctxsw_prog_main_image_full_preemption_ptr_o(),
		629	gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
		630
		631	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
		632
		633	addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
		634	gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) \|
		635	(u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
		636	(32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
		637
		638	gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
		639	g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
		640
		641	addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
		642	gr_scc_pagepool_base_addr_39_8_align_bits_v()) \|
		643	(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
		644	(32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
		645	size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
		646	g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
		647
		648	addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
		649	gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) \|
		650	(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
		651	(32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
		652
		653	gr_gk20a_ctx_patch_write(g, ch_ctx,
		654	gr_gpc0_swdx_rm_spill_buffer_addr_r(),
		655	gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
		656	true);
		657	gr_gk20a_ctx_patch_write(g, ch_ctx,
		658	gr_gpc0_swdx_rm_spill_buffer_size_r(),
		659	gr_gpc0_swdx_rm_spill_buffer_size_256b_f(addr),
		660	true);
		661
		662	cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
		663	gr_gk20a_ctx_patch_write(g, ch_ctx,
		664	gr_gpcs_swdx_beta_cb_ctrl_r(),
		665	gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
		666	cbes_reserve),
		667	true);
		668	gr_gk20a_ctx_patch_write(g, ch_ctx,
		669	gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
		670	gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
		671	cbes_reserve),
		672	true);
		673
		674	gr_gk20a_ctx_patch_write_end(g, ch_ctx);
		675	}
		676
		677	gk20a_dbg_fn("done");
		678	}
		679
476	void gp10b_init_gr(struct gpu_ops *gops)	680	void gp10b_init_gr(struct gpu_ops *gops)
477	{	681	{
478	gm20b_init_gr(gops);	682	gm20b_init_gr(gops);
@@ -481,7 +685,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
481	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;	685	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
482	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;	686	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
483	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;	687	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
484	gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;	688	gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
485	gops->gr.calc_global_ctx_buffer_size =	689	gops->gr.calc_global_ctx_buffer_size =
486	gr_gp10b_calc_global_ctx_buffer_size;	690	gr_gp10b_calc_global_ctx_buffer_size;
487	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;	691	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
@@ -490,4 +694,9 @@ void gp10b_init_gr(struct gpu_ops *gops)
490	gr_gp10b_set_alpha_circular_buffer_size;	694	gr_gp10b_set_alpha_circular_buffer_size;
491	gops->gr.set_circular_buffer_size =	695	gops->gr.set_circular_buffer_size =
492	gr_gp10b_set_circular_buffer_size;	696	gr_gp10b_set_circular_buffer_size;
		697	gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
		698	gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
		699	gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
		700	gops->gr.update_ctxsw_preemption_mode =
		701	gr_gp10b_update_ctxsw_preemption_mode;
493	}	702	}


diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 536a7d27..6bbda564 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -16,7 +16,7 @@
16	#ifndef _NVGPU_GR_GP10B_H_	16	#ifndef _NVGPU_GR_GP10B_H_
17	#define _NVGPU_GR_GP10B_H_	17	#define _NVGPU_GR_GP10B_H_
18		18
19	struct gk20a;	19	struct gpu_ops;
20		20
21	enum {	21	enum {
22	PASCAL_CHANNEL_GPFIFO_A = 0xC06F,	22	PASCAL_CHANNEL_GPFIFO_A = 0xC06F,
@@ -32,4 +32,21 @@ enum {
32		32
33	void gp10b_init_gr(struct gpu_ops *ops);	33	void gp10b_init_gr(struct gpu_ops *ops);
34		34
		35	struct gr_t18x {
		36	struct {
		37	u32 preempt_image_size;
		38	} ctx_vars;
		39	};
		40
		41	struct gr_ctx_desc_t18x {
		42	int preempt_mode;
		43	struct mem_desc preempt_ctxsw_buffer;
		44	struct mem_desc spill_ctxsw_buffer;
		45	struct mem_desc betacb_ctxsw_buffer;
		46	struct mem_desc pagepool_ctxsw_buffer;
		47	};
		48
		49	#define NVGPU_GR_PREEMPTION_MODE_WFI 0
		50	#define NVGPU_GR_PREEMPTION_MODE_GFXP 1
		51
35	#endif	52	#endif


diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h index 79890f3c..0892f12e 100644 --- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -238,4 +238,20 @@ static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(v
238	{	238	{
239	return 0x0;	239	return 0x0;
240	}	240	}
		241	static inline u32 ctxsw_prog_main_image_graphics_preemption_options_o(void)
		242	{
		243	return 0x00000080;
		244	}
		245	static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_f(u32 v)
		246	{
		247	return (v & 0x3) << 0;
		248	}
		249	static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(void)
		250	{
		251	return 0x1;
		252	}
		253	static inline u32 ctxsw_prog_main_image_full_preemption_ptr_o(void)
		254	{
		255	return 0x00000068;
		256	}
241	#endif	257	#endif


diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h index f2237a50..0bd707db 100644 --- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -718,6 +718,10 @@ static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
718	{	718	{
719	return 0x21;	719	return 0x21;
720	}	720	}
		721	static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
		722	{
		723	return 0x0000001a;
		724	}
721	static inline u32 gr_fecs_host_int_status_r(void)	725	static inline u32 gr_fecs_host_int_status_r(void)
722	{	726	{
723	return 0x00409c18;	727	return 0x00409c18;
@@ -1910,6 +1914,10 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
1910	{	1914	{
1911	return 0x00030000;	1915	return 0x00030000;
1912	}	1916	}
		1917	static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(void)
		1918	{
		1919	return 0x00030a00;
		1920	}
1913	static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)	1921	static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
1914	{	1922	{
1915	return 0x00000020;	1923	return 0x00000020;
@@ -2186,6 +2194,50 @@ static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_f(void)
2186	{	2194	{
2187	return 0x80000000;	2195	return 0x80000000;
2188	}	2196	}
		2197	static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_r(void)
		2198	{
		2199	return 0x00500ee4;
		2200	}
		2201	static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(u32 v)
		2202	{
		2203	return (v & 0xffff) << 0;
		2204	}
		2205	static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(void)
		2206	{
		2207	return 0x00000250;
		2208	}
		2209	static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_r(void)
		2210	{
		2211	return 0x00500ee0;
		2212	}
		2213	static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(u32 v)
		2214	{
		2215	return (v & 0xffffffff) << 0;
		2216	}
		2217	static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v(void)
		2218	{
		2219	return 0x00000008;
		2220	}
		2221	static inline u32 gr_gpcs_swdx_beta_cb_ctrl_r(void)
		2222	{
		2223	return 0x00418eec;
		2224	}
		2225	static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(u32 v)
		2226	{
		2227	return (v & 0xfff) << 0;
		2228	}
		2229	static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(void)
		2230	{
		2231	return 0x00000100;
		2232	}
		2233	static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(void)
		2234	{
		2235	return 0x0041befc;
		2236	}
		2237	static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(u32 v)
		2238	{
		2239	return (v & 0xfff) << 0;
		2240	}
2189	static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)	2241	static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
2190	{	2242	{
2191	return 0x00418ea0 + i*4;	2243	return 0x00418ea0 + i*4;
@@ -3342,4 +3394,16 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
3342	{	3394	{
3343	return 0x40000000;	3395	return 0x40000000;
3344	}	3396	}
		3397	static inline u32 gr_fe_gfxp_wfi_timeout_r(void)
		3398	{
		3399	return 0x004041c0;
		3400	}
		3401	static inline u32 gr_fe_gfxp_wfi_timeout_count_f(u32 v)
		3402	{
		3403	return (v & 0xffffffff) << 0;
		3404	}
		3405	static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
		3406	{
		3407	return 0x0;
		3408	}
3345	#endif	3409	#endif


diff --git a/drivers/gpu/nvgpu/gr_t18x.h b/drivers/gpu/nvgpu/gr_t18x.h new file mode 100644 index 00000000..95601116 --- /dev/null +++ b/drivers/gpu/nvgpu/gr_t18x.h
@@ -0,0 +1,20 @@
		1	/*
		2	* NVIDIA T18x GR
		3	*
		4	* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*/
		15	#ifndef _NVGPU_GR_T18X_H_
		16	#define _NVGPU_GR_T18X_H_
		17
		18	#include "gp10b/gr_gp10b.h"
		19
		20	#endif