From 33f192b2f781007fb7f9598613ce3811f3f39237 Mon Sep 17 00:00:00 2001
From: Sandeep Shinde <sashinde@nvidia.com>
Date: Thu, 24 Aug 2017 12:12:42 +0530
Subject: gpu: nvgpu: Add pd_max_batches sysfs node for gp10b

Add a new sysfs node pd_max_batches for setting max batches value in
NV_PGRAPH_PRI_PD_AB_DIST_CONFIG_1_MAX_BATCHES register which controls
max number of batches per alpha-beta transition stored in PD.

Bug 1927124

Change-Id: I2817f2d70dab348d8b0b8ba19bf1e9b9d23ca907
Signed-off-by: Sandeep Shinde <sashinde@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1544104
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
(cherry picked from commit aa4daddda23aa44a84464200f497eac802a8e6ce)
Reviewed-on: https://git-master.nvidia.com/r/1543355
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/common/linux/sysfs.c             | 29 ++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h                 |  1 +
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c                 | 24 +++++++++++++-----
 .../gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h |  4 +++
 4 files changed, 52 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.c b/drivers/gpu/nvgpu/common/linux/sysfs.c
index 1b59c480..7b614023 100644
--- a/drivers/gpu/nvgpu/common/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/common/linux/sysfs.c
@@ -927,6 +927,33 @@ static ssize_t czf_bypass_read(struct device *dev,
 
 static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
 
+static ssize_t pd_max_batches_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val > 64)
+		return -EINVAL;
+
+	g->gr.pd_max_batches = val;
+
+	return count;
+}
+
+static ssize_t pd_max_batches_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return sprintf(buf, "%d\n", g->gr.pd_max_batches);
+}
+
+static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store);
+
 
 void nvgpu_remove_sysfs(struct device *dev)
 {
@@ -961,6 +988,7 @@ void nvgpu_remove_sysfs(struct device *dev)
 #endif
 
 	device_remove_file(dev, &dev_attr_czf_bypass);
+	device_remove_file(dev, &dev_attr_pd_max_batches);
 
 	if (strcmp(dev_name(dev), "gpu.0")) {
 		struct kobject *kobj = &dev->kobj;
@@ -1006,6 +1034,7 @@ int nvgpu_create_sysfs(struct device *dev)
 #endif
 
 	error |= device_create_file(dev, &dev_attr_czf_bypass);
+	error |= device_create_file(dev, &dev_attr_pd_max_batches);
 
 	if (strcmp(dev_name(dev), "gpu.0")) {
 		struct kobject *kobj = &dev->kobj;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 400b7feb..42296084 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -322,6 +322,7 @@ struct gr_gk20a {
 	u32 alpha_cb_size;
 	u32 timeslice_mode;
 	u32 czf_bypass;
+	u32 pd_max_batches;
 
 	struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
 
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 74af9817..ee7118e7 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -420,9 +420,15 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 		gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
 		gr_pd_ab_dist_cfg1_max_output_granularity_v();
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
-		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
-		gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
+	if (g->gr.pd_max_batches) {
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
+			gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+			gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch);
+	} else {
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
+			gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+			gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
+	}
 
 	attrib_offset_in_chunk = alpha_offset_in_chunk +
 		gr->tpc_count * gr->alpha_cb_size;
@@ -751,9 +757,15 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 		gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
 		gr_pd_ab_dist_cfg1_max_output_granularity_v();
 
-	gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
-		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
-		gr_pd_ab_dist_cfg1_max_batches_init_f());
+	if (g->gr.pd_max_batches) {
+		gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
+			gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+			gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches));
+	} else {
+		gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
+			gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+			gr_pd_ab_dist_cfg1_max_batches_init_f());
+	}
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
 		stride = gpc_stride * gpc_index;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
index fe902cbb..e56923c6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
@@ -1662,6 +1662,10 @@ static inline u32 gr_pd_ab_dist_cfg1_r(void)
 {
 	return 0x004064c4;
 }
+static inline u32 gr_pd_ab_dist_cfg1_max_batches_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
 static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void)
 {
 	return 0xffff;
-- 
cgit v1.2.2