From 34732a14b22f09d8f9d52f756612178f0313f120 Mon Sep 17 00:00:00 2001 From: Deepak Goyal Date: Fri, 14 Sep 2018 11:45:19 +0530 Subject: nvgpu: gpu: Support multiple tpc-pg masks. - TPC powergating should be done before calling gk20a_enable_gr_hw. gk20a_enable_gr_hw() issues a GR engine reset. Without this fix, enabling 1 TPC from each PES causes ctxsw timeout error while running GFX Benchmark. - Adds valid tpc-pg mask for 1/2/3/4 active TPC configs. TPC Config - TPC-MASK 4 TPC configuration - 0x0 3 TPC configuration - 0x1/0x2/0x4/0x8 2 TPC configuration - 0x5/0x9/0x6/0xa - We should not write to gr_fe_tpc_pesmask_r() as part of TPC-PG sequence. This register is for debug purpose only. Bug 200442360 Change-Id: I6fbe1ad8fbc836ace8cbaf00ec3d21a12c73e0bd Signed-off-by: Deepak Goyal Reviewed-on: https://git-master.nvidia.com/r/1809772 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.c | 30 +++++++++++++++++------ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 21 ---------------- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 4 ++- drivers/gpu/nvgpu/os/linux/driver_common.c | 6 ++++- drivers/gpu/nvgpu/os/linux/platform_gk20a.h | 2 +- drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 4 ++- drivers/gpu/nvgpu/os/linux/sysfs.c | 15 ++++++++++-- 7 files changed, 47 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9958d24f..1cad8dcb 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -259,9 +259,28 @@ int gk20a_finalize_poweron(struct gk20a *g) g->ops.mc.intr_enable(g); + /* + * Overwrite can_tpc_powergate to false if the chip is ES fused and + * already optimized with some TPCs already floorswept + * via fuse. We will not support TPC-PG in those cases. + */ + + if (g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0) != 0x0) { + g->can_tpc_powergate = false; + g->tpc_pg_mask = 0x0; + } + + nvgpu_mutex_acquire(&g->tpc_pg_lock); + + if (g->can_tpc_powergate) { + if (g->ops.gr.powergate_tpc != NULL) + g->ops.gr.powergate_tpc(g); + } + err = gk20a_enable_gr_hw(g); if (err) { nvgpu_err(g, "failed to enable gr"); + nvgpu_mutex_release(&g->tpc_pg_lock); goto done; } @@ -271,6 +290,7 @@ int gk20a_finalize_poweron(struct gk20a *g) } if (err) { nvgpu_err(g, "failed to init pmu ucode"); + nvgpu_mutex_release(&g->tpc_pg_lock); goto done; } } @@ -279,6 +299,7 @@ int gk20a_finalize_poweron(struct gk20a *g) err = gk20a_init_pstate_support(g); if (err) { nvgpu_err(g, "failed to init pstates"); + nvgpu_mutex_release(&g->tpc_pg_lock); goto done; } } @@ -296,18 +317,11 @@ int gk20a_finalize_poweron(struct gk20a *g) err = nvgpu_init_pmu_support(g); if (err) { nvgpu_err(g, "failed to init gk20a pmu"); + nvgpu_mutex_release(&g->tpc_pg_lock); goto done; } } - nvgpu_mutex_acquire(&g->tpc_pg_lock); - - if (g->can_tpc_powergate) { - if (g->ops.gr.powergate_tpc != NULL) { - g->ops.gr.powergate_tpc(g); - } - } - err = gk20a_init_gr_support(g); if (err) { nvgpu_err(g, "failed to init gk20a gr"); diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3dedc6b5..288bd583 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -73,16 +73,6 @@ */ #define GR_TPCS_INFO_FOR_MAPREGISTER 6 -/* - * There are 4 TPCs in GV11b ranging from TPC0 to TPC3 - * There are two PES in GV11b each controlling two TPCs - * PES0 is linked to TPC0 & TPC2 - * PES1 is linked to TPC1 & TPC3 - */ -#define TPC_MASK_FOR_PESID_0 (u32) 0x5 -#define TPC_MASK_FOR_PESID_1 (u32) 0xa - - bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -143,17 +133,6 @@ void gr_gv11b_powergate_tpc(struct gk20a *g) tpc_pg_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0); } while (tpc_pg_status != g->tpc_pg_mask); - gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() | - gr_fe_tpc_pesmask_action_write_f() | - gr_fe_tpc_pesmask_pesid_f(0) | - gr_fe_tpc_pesmask_gpcid_f(0) | - ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0)); - gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() | - gr_fe_tpc_pesmask_action_write_f() | - gr_fe_tpc_pesmask_pesid_f(1) | - gr_fe_tpc_pesmask_gpcid_f(0) | - ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1)); - return; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 2ebe0011..8d7ccfa8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -138,6 +138,8 @@ enum gk20a_cbc_op { #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v) +#define MAX_TPC_PG_CONFIGS 3 + enum nvgpu_unit; enum nvgpu_flush_op; @@ -1592,7 +1594,7 @@ struct gk20a { u32 tpc_pg_mask; bool can_tpc_powergate; - u32 valid_tpc_mask; + u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS]; struct nvgpu_bios bios; bool bios_is_init; diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index 9ff32d68..539f0559 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -135,6 +135,7 @@ static void nvgpu_init_timeslice(struct gk20a *g) static void nvgpu_init_pm_vars(struct gk20a *g) { struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + u32 i = 0; /* * Set up initial power settings. For non-slicon platforms, disable @@ -172,7 +173,10 @@ static void nvgpu_init_pm_vars(struct gk20a *g) g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); g->can_tpc_powergate = platform->can_tpc_powergate; - g->valid_tpc_mask = platform->valid_tpc_mask; + + for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) + g->valid_tpc_mask[i] = platform->valid_tpc_mask[i]; + g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; /* if default delay is not set, set default delay to 500msec */ if (platform->railgate_delay_init) diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index 3a0227a1..a19d0a7c 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h @@ -74,7 +74,7 @@ struct gk20a_platform { struct reset_control *reset_control; #endif /* valid TPC-MASK */ - u32 valid_tpc_mask; + u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS]; /* Delay before rail gated */ int railgate_delay_init; diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index 41cba0f0..dafa05e5 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c @@ -233,7 +233,9 @@ struct gk20a_platform gv11b_tegra_platform = { .can_railgate_init = true, .can_tpc_powergate = true, - .valid_tpc_mask = 0xc, + .valid_tpc_mask[0] = 0x0, + .valid_tpc_mask[1] = 0x1, + .valid_tpc_mask[2] = 0x5, .can_slcg = true, .can_blcg = true, diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 9e48e45d..1ffb6539 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -865,6 +865,18 @@ static ssize_t tpc_pg_mask_read(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask); } +static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask) +{ + u32 i; + bool valid = false; + + for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) { + if (tpc_mask == g->valid_tpc_mask[i]) + valid = true; + } + return valid; +} + static ssize_t tpc_pg_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -896,10 +908,9 @@ static ssize_t tpc_pg_mask_store(struct device *dev, return -ENODEV; } - if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) { + if (is_tpc_mask_valid(g, (u32)val)) { g->tpc_pg_mask = val; } else { - nvgpu_err(g, "TPC-PG mask is invalid"); nvgpu_mutex_release(&g->tpc_pg_lock); return -EINVAL; -- cgit v1.2.2