From 67887cc61ae18c0278eec8ff8ae758297765f3b8 Mon Sep 17 00:00:00 2001 From: Kevin Huang Date: Wed, 9 Apr 2014 18:47:29 -0700 Subject: gpu: nvgpu: gm20b: support TPC floorsweeping Bug 1450798 Change-Id: I371537d086ce1088c6d007676c1fe1e2770dd4e3 Signed-off-by: Kevin Huang Reviewed-on: http://git-master/r/403877 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 39 +++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 46 ++++++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index 7e0183ca..bec18328 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c @@ -520,6 +520,43 @@ static ssize_t force_idle_read(struct device *device, static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); #endif +static ssize_t tpc_fs_mask_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val) + g->gr.gpc_tpc_mask[0] = val; + + return count; +} + +static ssize_t tpc_fs_mask_read(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + struct gr_gk20a *gr = &g->gr; + u32 gpc_index; + u32 tpc_fs_mask = 0; + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + if (g->ops.gr.get_gpc_tpc_mask) + tpc_fs_mask |= + g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << + (gr->max_tpc_per_gpc_count * gpc_index); + } + + return sprintf(buf, "0x%x\n", tpc_fs_mask); +} + +static DEVICE_ATTR(tpc_fs_mask, S_IRWXUGO, tpc_fs_mask_read, tpc_fs_mask_store); + void gk20a_remove_sysfs(struct device *dev) { struct gk20a *g = get_gk20a(to_platform_device(dev)); @@ -540,6 +577,7 @@ void gk20a_remove_sysfs(struct device *dev) device_remove_file(dev, &dev_attr_aelpg_param); device_remove_file(dev, &dev_attr_aelpg_enable); device_remove_file(dev, &dev_attr_allow_all); + device_remove_file(dev, &dev_attr_tpc_fs_mask); if (g->host1x_dev && (dev->parent != &g->host1x_dev->dev)) sysfs_remove_link(&dev->kobj, dev_name(dev)); @@ -566,6 +604,7 @@ void gk20a_create_sysfs(struct platform_device *dev) error |= device_create_file(&dev->dev, &dev_attr_aelpg_param); error |= device_create_file(&dev->dev, &dev_attr_aelpg_enable); error |= device_create_file(&dev->dev, &dev_attr_allow_all); + error |= device_create_file(&dev->dev, &dev_attr_tpc_fs_mask); if (g->host1x_dev && (dev->dev.parent != &g->host1x_dev->dev)) error |= sysfs_create_link(&g->host1x_dev->dev.kobj, diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 25d03736..72500b0e 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -23,6 +23,7 @@ #include "hw_fb_gm20b.h" #include "hw_proj_gm20b.h" #include "hw_ctxsw_prog_gm20b.h" +#include "hw_fuse_gm20b.h" static void gr_gm20b_init_gpc_mmu(struct gk20a *g) { @@ -478,6 +479,17 @@ static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); } +static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +{ + u32 val; + struct gr_gk20a *gr = &g->gr; + + /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ + val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index)); + + return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); +} + static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -486,6 +498,7 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) u32 sm_id = 0, gpc_id = 0; u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; u32 tpc_per_gpc; + u32 tpc_fs_mask = 0, tpc_sm_id, gpc_tpc_id; gk20a_dbg_fn(""); @@ -556,22 +569,26 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) gk20a_readl(g, gr_be0_crop_debug3_r()) | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); - if (tegra_platform_is_silicon()) { - gk20a_writel(g, gr_fe_tpc_fs_r(), gr->pes_tpc_mask[0][0]); - - gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0) | - gr_cwd_gpc_tpc_id_tpc1_f(1)); - - gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0) | - gr_cwd_sm_id_tpc1_f(1)); - } else { - gk20a_writel(g, gr_fe_tpc_fs_r(), 1); - - gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0)); - - gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0)); + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + tpc_fs_mask |= gr->gpc_tpc_mask[gpc_index] << + (gr->max_tpc_per_gpc_count * gpc_index); + } + gk20a_writel(g, gr_fe_tpc_fs_r(), tpc_fs_mask); + if (tpc_fs_mask & (0x1 << 0)) { + tpc_sm_id |= gr_cwd_sm_id_tpc0_f(0); + gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(0); + } + if (tpc_fs_mask & (0x1 << 1)) { + gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(1); + tpc_sm_id |= gr_cwd_sm_id_tpc1_f(1); } + /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. + * Since we know TPC number is less than 5. We select + * index 0 directly. */ + gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gpc_tpc_id); + + gk20a_writel(g, gr_cwd_sm_id_r(0), tpc_sm_id); return 0; } @@ -733,4 +750,5 @@ void gm20b_init_gr(struct gpu_ops *gops) #else gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; #endif + gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; } -- cgit v1.2.2