From d3b8415948de8c9ffe2f2fa66340dd7e71a894e6 Mon Sep 17 00:00:00 2001 From: Deepak Goyal Date: Mon, 16 Jul 2018 11:10:23 +0530 Subject: gpu: nvgpu: tpc powergating through sysfs - adds static tpc-powergating through sysfs. - active tpc count will remain till the GPU/systems is not booted again. - tpc_pg_mask can be written only after GPU probe finishes and GPU boot is triggered. Note: To be able to use this feature, we need to change boot/init scripts of the OS(used with nvgpu driver) to write to sysfs nodes before posting discover image size query to FECS. Bug 200406784 Change-Id: Id749c7a617422c625f77d0c1a9aada2eb960c4d0 Signed-off-by: Deepak Goyal Reviewed-on: https://git-master.nvidia.com/r/1742422 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.c | 11 ++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 8 +++ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 40 +++++++++++++++ drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 1 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 + .../gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | 36 +++++++++++++ drivers/gpu/nvgpu/os/linux/driver_common.c | 3 ++ drivers/gpu/nvgpu/os/linux/platform_gk20a.h | 5 ++ drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 3 ++ drivers/gpu/nvgpu/os/linux/sysfs.c | 59 ++++++++++++++++++++++ 10 files changed, 167 insertions(+) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 08e8b79f..ed48253f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -274,12 +274,23 @@ int gk20a_finalize_poweron(struct gk20a *g) } } + nvgpu_mutex_acquire(&g->tpc_pg_lock); + + if (g->can_tpc_powergate) { + if (g->ops.gr.powergate_tpc != NULL) { + g->ops.gr.powergate_tpc(g); + } + } + err = gk20a_init_gr_support(g); if (err) { nvgpu_err(g, "failed to init gk20a gr"); + nvgpu_mutex_release(&g->tpc_pg_lock); goto done; } + nvgpu_mutex_release(&g->tpc_pg_lock); + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { err = gk20a_init_pstate_pmu_support(g); if (err) { diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 948d8e60..4934958c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -307,6 +307,7 @@ struct gpu_ops { u32 class, u32 padding); void (*free_gr_ctx)(struct gk20a *g, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); + void (*powergate_tpc)(struct gk20a *g); void (*update_ctxsw_preemption_mode)(struct gk20a *g, struct channel_gk20a *c, struct nvgpu_mem *mem); @@ -1361,6 +1362,8 @@ struct gk20a { u64 log_mask; u32 log_trace; + struct nvgpu_mutex tpc_pg_lock; + struct nvgpu_gpu_params params; /* @@ -1532,6 +1535,11 @@ struct gk20a { u32 tpc_fs_mask_user; + u32 tpc_pg_mask; + bool can_tpc_powergate; + + u32 valid_tpc_mask; + struct nvgpu_bios bios; bool bios_is_init; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index c2f47a20..51588f1f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -58,6 +58,7 @@ #include #include #include +#include #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 @@ -71,6 +72,16 @@ */ #define GR_TPCS_INFO_FOR_MAPREGISTER 6 +/* + * There are 4 TPCs in GV11b ranging from TPC0 to TPC3 + * There are two PES in GV11b each controlling two TPCs + * PES0 is linked to TPC0 & TPC2 + * PES1 is linked to TPC1 & TPC3 + */ +#define TPC_MASK_FOR_PESID_0 (u32) 0x5 +#define TPC_MASK_FOR_PESID_1 (u32) 0xa + + bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -117,6 +128,35 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) return valid; } +void gr_gv11b_powergate_tpc(struct gk20a *g) +{ + u32 tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0)); + + if (tpc_pg_status == g->tpc_pg_mask) { + nvgpu_info(g, "TPC-PG mask and TPC-PG status is same"); + return; + } + + gk20a_writel(g, fuse_ctrl_opt_tpc_gpc_r(0), (g->tpc_pg_mask)); + + do { + tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0)); + } while (tpc_pg_status != g->tpc_pg_mask); + + gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() | + gr_fe_tpc_pesmask_action_write_f() | + gr_fe_tpc_pesmask_pesid_f(0) | + gr_fe_tpc_pesmask_gpcid_f(0) | + ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0)); + gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() | + gr_fe_tpc_pesmask_action_write_f() | + gr_fe_tpc_pesmask_pesid_f(1) | + gr_fe_tpc_pesmask_gpcid_f(0) | + ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1)); + + return; +} + bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) { bool valid = false; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index a8dbd3a8..f799ccfe 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -250,4 +250,5 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, u32 *priv_addr_table, u32 *num_registers); u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc); +void gr_gv11b_powergate_tpc(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index d479fef8..08c3097e 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -294,6 +294,7 @@ static const struct gpu_ops gv11b_ops = { .init_ctx_state = gr_gp10b_init_ctx_state, .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, .free_gr_ctx = gr_gk20a_free_gr_ctx, + .powergate_tpc = gr_gv11b_powergate_tpc, .update_ctxsw_preemption_mode = gr_gv11b_update_ctxsw_preemption_mode, .dump_gr_regs = gr_gv11b_dump_gr_status_regs, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h index 90994a53..473eaff4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h @@ -1572,6 +1572,42 @@ static inline u32 gr_fe_tpc_fs_r(u32 i) { return 0x0040a200U + i*4U; } +static inline u32 gr_fe_tpc_pesmask_r(void) +{ + return 0x0040a260U; +} +static inline u32 gr_fe_tpc_pesmask_pesid_f(u32 v) +{ + return (v & 0x3fU) << 24U; +} +static inline u32 gr_fe_tpc_pesmask_gpcid_f(u32 v) +{ + return (v & 0xffU) << 16U; +} +static inline u32 gr_fe_tpc_pesmask_action_m(void) +{ + return 0x1U << 30U; +} +static inline u32 gr_fe_tpc_pesmask_action_write_f(void) +{ + return 0x40000000U; +} +static inline u32 gr_fe_tpc_pesmask_action_read_f(void) +{ + return 0x0U; +} +static inline u32 gr_fe_tpc_pesmask_req_m(void) +{ + return 0x1U << 31U; +} +static inline u32 gr_fe_tpc_pesmask_req_send_f(void) +{ + return 0x80000000U; +} +static inline u32 gr_fe_tpc_pesmask_mask_m(void) +{ + return 0xffffU << 0U; +} static inline u32 gr_pri_mme_shadow_raw_index_r(void) { return 0x00404488U; diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index 8a7cf552..c651e394 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -64,6 +64,7 @@ static void nvgpu_init_vars(struct gk20a *g) nvgpu_mutex_init(&g->poweron_lock); nvgpu_mutex_init(&g->poweroff_lock); nvgpu_mutex_init(&g->ctxsw_disable_lock); + nvgpu_mutex_init(&g->tpc_pg_lock); l->regs_saved = l->regs; l->bar1_saved = l->bar1; @@ -168,6 +169,8 @@ static void nvgpu_init_pm_vars(struct gk20a *g) g->ptimer_src_freq = platform->ptimer_src_freq; g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); + g->can_tpc_powergate = platform->can_tpc_powergate; + g->valid_tpc_mask = platform->valid_tpc_mask; g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; /* if default delay is not set, set default delay to 500msec */ if (platform->railgate_delay_init) diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index d9725e4c..a4c3eca3 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h @@ -43,6 +43,9 @@ struct gk20a_platform { /* Should be populated at probe. */ bool can_railgate_init; + /* Should be populated at probe. */ + bool can_tpc_powergate; + /* Should be populated at probe. */ bool can_elpg_init; @@ -71,6 +74,8 @@ struct gk20a_platform { /* Reset control for device */ struct reset_control *reset_control; #endif + /* valid TPC-MASK */ + u32 valid_tpc_mask; /* Delay before rail gated */ int railgate_delay_init; diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index c9c13197..edb3263c 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c @@ -209,6 +209,9 @@ struct gk20a_platform gv11b_tegra_platform = { .railgate_delay_init = 500, .can_railgate_init = true, + .can_tpc_powergate = true, + .valid_tpc_mask = 0xc, + .can_slcg = true, .can_blcg = true, .can_elcg = true, diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 1b84e2e2..b607768a 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -31,6 +31,8 @@ #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) +#define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0 + static ssize_t elcg_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -843,6 +845,61 @@ static ssize_t force_idle_read(struct device *dev, static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); #endif +static ssize_t tpc_pg_mask_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask); +} + +static ssize_t tpc_pg_mask_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + struct gr_gk20a *gr = &g->gr; + unsigned long val = 0; + + nvgpu_mutex_acquire(&g->tpc_pg_lock); + + if (!g->can_tpc_powergate) { + nvgpu_info(g, "TPC-PG not enabled for the platform"); + goto exit; + } + + if (kstrtoul(buf, 10, &val) < 0) { + nvgpu_err(g, "invalid value"); + nvgpu_mutex_release(&g->tpc_pg_lock); + return -EINVAL; + } + + if (val == g->tpc_pg_mask) { + nvgpu_info(g, "no value change, same mask already set"); + goto exit; + } + + if (gr->ctx_vars.golden_image_size) { + nvgpu_err(g, "golden image size already initialized"); + nvgpu_mutex_release(&g->tpc_pg_lock); + return -ENODEV; + } + + if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) { + g->tpc_pg_mask = val; + } else { + + nvgpu_err(g, "TPC-PG mask is invalid"); + nvgpu_mutex_release(&g->tpc_pg_lock); + return -EINVAL; + } +exit: + nvgpu_mutex_release(&g->tpc_pg_lock); + + return count; +} + +static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store); + static ssize_t tpc_fs_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -1130,6 +1187,7 @@ void nvgpu_remove_sysfs(struct device *dev) device_remove_file(dev, &dev_attr_aelpg_enable); device_remove_file(dev, &dev_attr_allow_all); device_remove_file(dev, &dev_attr_tpc_fs_mask); + device_remove_file(dev, &dev_attr_tpc_pg_mask); device_remove_file(dev, &dev_attr_min_timeslice_us); device_remove_file(dev, &dev_attr_max_timeslice_us); @@ -1181,6 +1239,7 @@ int nvgpu_create_sysfs(struct device *dev) error |= device_create_file(dev, &dev_attr_aelpg_enable); error |= device_create_file(dev, &dev_attr_allow_all); error |= device_create_file(dev, &dev_attr_tpc_fs_mask); + error |= device_create_file(dev, &dev_attr_tpc_pg_mask); error |= device_create_file(dev, &dev_attr_min_timeslice_us); error |= device_create_file(dev, &dev_attr_max_timeslice_us); -- cgit v1.2.2