From 3b08d73568ddaf0dec2c2abe8e813672da2463ae Mon Sep 17 00:00:00 2001
From: Kirill Artamonov <kartamonov@nvidia.com>
Date: Sun, 25 Jan 2015 18:42:18 +0200
Subject: gpu: nvgpu: gp10b: add debug features for gfxp and cilp

Add debugfs switch to force cilp and gfx preemption
Add debugfs switch to dump context switch stats on channel
destruction.

bug 1525327
bug 1581799

Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com>
Change-Id: I7d0558cc325ce655411388ea66ad982101f2fe66
Reviewed-on: http://git-master/r/794976
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/677231
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c             | 47 ++++++++++++++++++++++++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h             |  8 ++++-
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 24 +++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c9b870c7..e727ee99 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -25,6 +25,7 @@
 #include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
 #include "hw_mc_gp10b.h"
+#include <linux/vmalloc.h>
 
 static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -482,6 +483,13 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		return err;
 
+	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
+		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
+
+	if (class == PASCAL_COMPUTE_A &&
+			g->gr.t18x.ctx_vars.force_preemption_cilp)
+		flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
+
 	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
 		u32 spill_size =
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
@@ -531,7 +539,7 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			goto fail_free_betacb;
 		}
 
-		(*gr_ctx)->preempt_mode = flags;
+		(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP;
 	}
 
 	if (class == PASCAL_COMPUTE_A) {
@@ -558,6 +566,38 @@ fail_free_gk20a_ctx:
 	return err;
 }
 
+static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
+		  struct gr_ctx_desc *gr_ctx) {
+	void *ctx_ptr = vmap(gr_ctx->mem.pages,
+		PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT,
+		0, pgprot_writecombine(PAGE_KERNEL));
+	if (!ctx_ptr) {
+		WARN_ON("Cannot map context");
+		return;
+	}
+	gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_wfi_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_cta_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_cilp_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g),
+		"image gfx preemption option (GFXP is 1) %x\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_graphics_preemption_options_o(),
+			0));
+	vunmap(ctx_ptr);
+}
+
 static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 			  struct gr_ctx_desc *gr_ctx)
 {
@@ -566,15 +606,18 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	if (!gr_ctx)
 		return;
 
+	if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
+		dump_ctx_switch_stats(g, vm, gr_ctx);
+
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
 	gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
-
 	gk20a_dbg_fn("done");
 }
 
+
 static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		struct channel_ctx_gk20a *ch_ctx,
 		void *ctx_ptr)
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 7c3ddf27..370e0ea3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -1,7 +1,7 @@
 /*
  * GM20B GPU GR
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -36,6 +36,12 @@ void gp10b_init_gr(struct gpu_ops *ops);
 struct gr_t18x {
 	struct {
 		u32 preempt_image_size;
+		u32 force_preemption_gfxp;
+		u32 force_preemption_cilp;
+		u32 dump_ctxsw_stats_on_channel_close;
+		struct dentry *debugfs_force_preemption_cilp;
+		struct dentry *debugfs_force_preemption_gfxp;
+		struct dentry *debugfs_dump_ctxsw_stats;
 	} ctx_vars;
 };
 
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 44b27fe7..2f81378d 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -53,6 +53,30 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 	platform->bypass_smmu = !device_is_iommuable(&pdev->dev);
 	platform->disable_bigpage = platform->bypass_smmu;
 
+	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+
+	platform->g->gr.t18x.ctx_vars.force_preemption_gfxp = false;
+	platform->g->gr.t18x.ctx_vars.force_preemption_cilp = false;
+
+	platform->g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
+		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
+			platform->debugfs,
+			&platform->g->gr.t18x.ctx_vars.force_preemption_gfxp);
+
+	platform->g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
+		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
+			platform->debugfs,
+			&platform->g->gr.t18x.ctx_vars.force_preemption_cilp);
+
+	platform->g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
+		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
+			S_IRUGO|S_IWUSR,
+			platform->debugfs,
+			&platform->g->gr.t18x.
+				ctx_vars.dump_ctxsw_stats_on_channel_close);
 	return 0;
 }
 
-- 
cgit v1.2.2