From e5824d8014c321fbe2c1e04e12307125dd50a472 Mon Sep 17 00:00:00 2001 From: Mahantesh Kumbar Date: Thu, 3 Nov 2016 21:16:21 +0530 Subject: gpu: nvgpu: MSCG support - update gp106 pg engine init/list/features HALs to support MS engine - Added defines & interface for lpwr tables read from vbios. - lpwr module which reads idx/gr/ms table from vbios to map rppg/mscg support with respective p-state - lpwr module public functions to control lpwr features enable/disable mscg/rppg & mclk-change request whenever change in mclk-change parameters - lpwr public functions to know rppg/mscg support for requested pstate, - added mutex t prevent PG transition while arbiter executes pstate transition - nvgpu_clk_arb_get_current_pstate() of clk arbiter to get current pstate JIRA DNVGPU-71 Change-Id: Ifcd640cc19ef630be1e2a9ba07ec84023d8202a0 Signed-off-by: Mahantesh Kumbar Reviewed-on: http://git-master/r/1247553 (cherry picked from commit 8a441dea2410e1b5196ef24e56a7768b6980e46b) Reviewed-on: http://git-master/r/1270989 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile.nvgpu-t18x | 3 +- drivers/gpu/nvgpu/clk/clk_arb.c | 102 ++++---- drivers/gpu/nvgpu/clk/clk_arb.h | 5 + drivers/gpu/nvgpu/gp106/pmu_gp106.c | 27 ++- drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 3 + drivers/gpu/nvgpu/include/bios.h | 64 +++++ drivers/gpu/nvgpu/lpwr/lpwr.c | 423 ++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/lpwr/lpwr.h | 92 ++++++++ drivers/gpu/nvgpu/perf/perf.h | 2 + drivers/gpu/nvgpu/pstate/pstate.c | 9 +- drivers/gpu/nvgpu/pstate/pstate.h | 4 + 11 files changed, 691 insertions(+), 43 deletions(-) create mode 100644 drivers/gpu/nvgpu/lpwr/lpwr.c create mode 100644 drivers/gpu/nvgpu/lpwr/lpwr.h (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x index 2b650ad8..30119345 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x +++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x @@ -63,7 +63,8 @@ nvgpu-y += \ $(nvgpu-t18x)/therm/thrmdev.o \ $(nvgpu-t18x)/therm/thrmchannel.o \ $(nvgpu-t18x)/therm/thrmpmu.o \ - $(nvgpu-t18x)/lpwr/rppg.o + $(nvgpu-t18x)/lpwr/rppg.o \ + $(nvgpu-t18x)/lpwr/lpwr.o nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 3f35fac7..c440dc3b 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -44,14 +44,6 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount); static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, u32 voltuv_sram); -static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g, - u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target, - u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin, - u32 nuvmin_sram); -static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g, - u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target, - u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin, - u32 nuvmin_sram); static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram); @@ -105,6 +97,7 @@ struct nvgpu_clk_arb { spinlock_t sessions_lock; spinlock_t users_lock; + struct mutex pstate_lock; struct list_head users; struct list_head sessions; struct llist_head requests; @@ -235,6 +228,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) g->clk_arb = arb; arb->g = g; + mutex_init(&arb->pstate_lock); spin_lock_init(&arb->sessions_lock); spin_lock_init(&arb->users_lock); @@ -943,12 +937,23 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) /* Program clocks */ /* A change in both mclk of gpc2clk may require a change in voltage */ - status = nvgpu_clk_arb_change_vf_point_prefix(g, gpc2clk_target, - sys2clk_target, xbar2clk_target, mclk_target, voltuv, - voltuv_sram, nuvmin, nuvmin_sram); + mutex_lock(&arb->pstate_lock); + status = nvgpu_lpwr_disable_pg(g, false); + status = clk_pmu_freq_controller_load(g, false); + if (status < 0) { + arb->status = status; + mutex_unlock(&arb->pstate_lock); + + /* make status visible */ + smp_mb(); + goto exit_arb; + } + status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); if (status < 0) { arb->status = status; + mutex_unlock(&arb->pstate_lock); + /* make status visible */ smp_mb(); goto exit_arb; @@ -957,20 +962,30 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target, voltuv, voltuv_sram); - if (status < 0) { arb->status = status; + mutex_unlock(&arb->pstate_lock); + /* make status visible */ smp_mb(); goto exit_arb; } - status = nvgpu_clk_arb_change_vf_point_postfix(g, gpc2clk_target, - sys2clk_target, xbar2clk_target, mclk_target, voltuv, - voltuv_sram, nuvmin, nuvmin_sram); + status = clk_pmu_freq_controller_load(g, true); + if (status < 0) { + arb->status = status; + mutex_unlock(&arb->pstate_lock); + + /* make status visible */ + smp_mb(); + goto exit_arb; + } + status = nvgpu_lwpr_mclk_change(g, pstate); if (status < 0) { arb->status = status; + mutex_unlock(&arb->pstate_lock); + /* make status visible */ smp_mb(); goto exit_arb; @@ -991,15 +1006,24 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) smp_wmb(); xchg(&arb->actual, actual); + status = nvgpu_lpwr_enable_pg(g, false); + if (status < 0) { + arb->status = status; + mutex_unlock(&arb->pstate_lock); + + /* make status visible */ + smp_mb(); + goto exit_arb; + } + /* status must be visible before atomic inc */ smp_wmb(); atomic_inc(&arb->req_nr); - wake_up_interruptible(&arb->request_wq); + /* Unlock pstate change for PG */ + mutex_unlock(&arb->pstate_lock); - if (status < 0) - gk20a_err(dev_from_gk20a(g), - "Error in arbiter update"); + wake_up_interruptible(&arb->request_wq); #ifdef CONFIG_DEBUG_FS g->ops.read_ptimer(g, &t1); @@ -1036,6 +1060,9 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) #endif exit_arb: + if (status < 0) + gk20a_err(dev_from_gk20a(g), + "Error in arbiter update"); /* notify completion for all requests */ head = llist_del_all(&arb->requests); @@ -1300,6 +1327,7 @@ recalculate_vf_point: } if (index == table->mclk_num_points) { mclk_vf = &table->mclk_points[index-1]; + index = table->mclk_num_points - 1; } index_mclk = index; @@ -1378,28 +1406,11 @@ find_exit: return pstate; } -static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g, - u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target, - u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin, - u32 nuvmin_sram) -{ - - int status; - - status = clk_pmu_freq_controller_load(g, false); - if (status < 0) - return status; - - status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); - return status; -} - -static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g, - u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target, - u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin, - u32 nuvmin_sram) +/* This function is inherently unsafe to call while arbiter is running + * arbiter must be blocked before calling this function */ +int nvgpu_clk_arb_get_current_pstate(struct gk20a *g) { - return clk_pmu_freq_controller_load(g, true); + return ACCESS_ONCE(g->clk_arb->actual->pstate); } static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, @@ -1456,6 +1467,17 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, return 0; } +void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + if (lock) + mutex_lock(&arb->pstate_lock); + else + mutex_unlock(&arb->pstate_lock); + +} + #ifdef CONFIG_DEBUG_FS static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) { diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h index 8355dac5..700804b3 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.h +++ b/drivers/gpu/nvgpu/clk/clk_arb.h @@ -62,5 +62,10 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g, struct nvgpu_clk_session *session, int *event_fd); void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g); + +int nvgpu_clk_arb_get_current_pstate(struct gk20a *g); + +void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); + #endif /* _CLK_ARB_H_ */ diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c index 6f5e71eb..eecd7351 100644 --- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c +++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c @@ -24,6 +24,7 @@ #include "clk/clk_mclk.h" #include "hw_mc_gp106.h" #include "hw_pwr_gp106.h" +#include "lpwr/lpwr.h" #include "lpwr/rppg.h" #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000 @@ -180,12 +181,16 @@ static u32 gp106_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id) if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) return PMU_PG_FEATURE_GR_RPPG_ENABLED; + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) + return NVGPU_PMU_MS_FEATURE_MASK_ALL; + return 0; } static u32 gp106_pmu_pg_engines_list(struct gk20a *g) { - return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS); + return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS) | + BIT(PMU_PG_ELPG_ENGINE_ID_MS); } static void pmu_handle_param_msg(struct gk20a *g, struct pmu_msg *msg, @@ -231,6 +236,23 @@ static int gp106_pg_param_init(struct gk20a *g, u32 pg_engine_id) gp106_dbg_pmu("cmd post GR PMU_PG_CMD_ID_PG_PARAM"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_param_msg, pmu, &seq, ~0); + } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_ms_init_param); + cmd.cmd.pg.ms_init_param.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.ms_init_param.cmd_id = + PMU_PG_PARAM_CMD_MS_INIT_PARAM; + cmd.cmd.pg.ms_init_param.support_mask = + NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING | + NVGPU_PMU_MS_FEATURE_MASK_SW_ASR | + NVGPU_PMU_MS_FEATURE_MASK_RPPG | + NVGPU_PMU_MS_FEATURE_MASK_FB_TRAINING; + + gp106_dbg_pmu("cmd post MS PMU_PG_CMD_ID_PG_PARAM"); + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_param_msg, pmu, &seq, ~0); } return 0; @@ -261,6 +283,9 @@ void gp106_init_pmu_ops(struct gpu_ops *gops) gops->pmu.pmu_pg_init_param = gp106_pg_param_init; gops->pmu.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list; gops->pmu.pmu_pg_engines_feature_list = gp106_pmu_pg_feature_list; + gops->pmu.pmu_lpwr_enable_pg = nvgpu_lpwr_enable_pg; + gops->pmu.pmu_lpwr_disable_pg = nvgpu_lpwr_disable_pg; + gops->pmu.pmu_pg_param_post_init = nvgpu_lpwr_post_init; gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL; gops->pmu.dump_secure_fuses = NULL; gops->pmu.reset = gp106_falcon_reset; diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c index cd9cd0b0..9274990a 100644 --- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c @@ -482,6 +482,9 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops) gops->pmu.pmu_pg_init_param = gp10b_pg_gr_init; gops->pmu.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list; gops->pmu.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list; + gops->pmu.pmu_lpwr_enable_pg = NULL; + gops->pmu.pmu_lpwr_disable_pg = NULL; + gops->pmu.pmu_pg_param_post_init = NULL; gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = send_ecc_overide_en_dis_cmd; gops->pmu.reset = gk20a_pmu_reset; diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h index f3939d14..097e90ec 100644 --- a/drivers/gpu/nvgpu/include/bios.h +++ b/drivers/gpu/nvgpu/include/bios.h @@ -925,4 +925,68 @@ struct vbios_fct_1x_entry { #define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_MASK GENMASK(31, 16) #define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_SHIFT 16 +/* LPWR Index Table */ +struct nvgpu_bios_lpwr_idx_table_1x_header { + u8 version; + u8 header_size; + u8 entry_size; + u8 entry_count; + u16 base_sampling_period; +} __packed; + +struct nvgpu_bios_lpwr_idx_table_1x_entry { + u8 pcie_idx; + u8 gr_idx; + u8 ms_idx; + u8 di_idx; + u8 gc6_idx; +} __packed; + +/* LPWR MS Table*/ +struct nvgpu_bios_lpwr_ms_table_1x_header { + u8 version; + u8 header_size; + u8 entry_size; + u8 entry_count; + u8 default_entry_idx; + u16 idle_threshold_us; +} __packed; + +struct nvgpu_bios_lpwr_ms_table_1x_entry { + u32 feautre_mask; + u16 dynamic_current_logic; + u16 dynamic_current_sram; +} __packed; + +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_MASK GENMASK(0, 0) +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SHIFT 0 +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR_MASK GENMASK(2, 2) +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR_SHIFT 2 +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING_MASK \ + GENMASK(3, 3) +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING_SHIFT 3 +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG_MASK GENMASK(5, 5) +#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG_SHIFT 5 + +/* LPWR GR Table */ +struct nvgpu_bios_lpwr_gr_table_1x_header { + u8 version; + u8 header_size; + u8 entry_size; + u8 entry_count; + u8 default_entry_idx; + u16 idle_threshold_us; + u8 adaptive_gr_multiplier; +} __packed; + +struct nvgpu_bios_lpwr_gr_table_1x_entry { + u32 feautre_mask; +} __packed; + +#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_MASK GENMASK(0, 0) +#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_SHIFT 0 + +#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG_MASK GENMASK(4, 4) +#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG_SHIFT 4 + #endif diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.c b/drivers/gpu/nvgpu/lpwr/lpwr.c new file mode 100644 index 00000000..4f8d2eec --- /dev/null +++ b/drivers/gpu/nvgpu/lpwr/lpwr.c @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "gk20a/gk20a.h" +#include "gk20a/pmu_gk20a.h" +#include "gp106/pmu_gp106.h" +#include "gk20a/pmu_api.h" +#include "gm206/bios_gm206.h" +#include "pstate/pstate.h" +#include "include/bios.h" +#include "perf/perf.h" +#include "lpwr.h" + +static int get_lpwr_idx_table(struct gk20a *g) +{ + u32 *lpwr_idx_table_ptr; + u8 *entry_addr; + u32 idx; + struct nvgpu_lpwr_bios_idx_data *pidx_data = + &g->perf_pmu.lpwr.lwpr_bios_data.idx; + struct nvgpu_bios_lpwr_idx_table_1x_header header = { 0 }; + struct nvgpu_bios_lpwr_idx_table_1x_entry entry = { 0 }; + + if (g->ops.bios.get_perf_table_ptrs) { + lpwr_idx_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g, + g->bios.perf_token, LOWPOWER_TABLE); + if (lpwr_idx_table_ptr == NULL) + return -EINVAL; + } else + return -EINVAL; + + memcpy(&header, lpwr_idx_table_ptr, + sizeof(struct nvgpu_bios_lpwr_idx_table_1x_header)); + + if (header.entry_count >= LPWR_VBIOS_IDX_ENTRY_COUNT_MAX) + return -EINVAL; + + pidx_data->base_sampling_period = (u16)header.base_sampling_period; + + /* Parse the LPWR Index Table entries.*/ + for (idx = 0; idx < header.entry_count; idx++) { + entry_addr = (u8 *)lpwr_idx_table_ptr + header.header_size + + (idx * header.entry_size); + + memcpy(&entry, entry_addr, + sizeof(struct nvgpu_bios_lpwr_idx_table_1x_entry)); + + pidx_data->entry[idx].pcie_idx = entry.pcie_idx; + pidx_data->entry[idx].gr_idx = entry.gr_idx; + pidx_data->entry[idx].ms_idx = entry.ms_idx; + pidx_data->entry[idx].di_idx = entry.di_idx; + pidx_data->entry[idx].gc6_idx = entry.gc6_idx; + + } + + return 0; +} + +static int get_lpwr_gr_table(struct gk20a *g) +{ + u32 *lpwr_gr_table_ptr; + u8 *entry_addr; + u32 idx; + struct nvgpu_lpwr_bios_gr_data *pgr_data = + &g->perf_pmu.lpwr.lwpr_bios_data.gr; + struct nvgpu_bios_lpwr_gr_table_1x_header header = { 0 }; + struct nvgpu_bios_lpwr_gr_table_1x_entry entry = { 0 }; + + if (g->ops.bios.get_perf_table_ptrs) { + lpwr_gr_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g, + g->bios.perf_token, LOWPOWER_GR_TABLE); + if (lpwr_gr_table_ptr == NULL) + return -EINVAL; + } else + return -EINVAL; + + memcpy(&header, lpwr_gr_table_ptr, + sizeof(struct nvgpu_bios_lpwr_gr_table_1x_header)); + + /* Parse the LPWR Index Table entries.*/ + for (idx = 0; idx < header.entry_count; idx++) { + entry_addr = (u8 *)lpwr_gr_table_ptr + header.header_size + + (idx * header.entry_size); + + memcpy(&entry, entry_addr, + sizeof(struct nvgpu_bios_lpwr_gr_table_1x_entry)); + + if (BIOS_GET_FIELD(entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) { + pgr_data->entry[idx].gr_enabled = true; + + pgr_data->entry[idx].feature_mask = + NVGPU_PMU_GR_FEATURE_MASK_ALL; + + if (!BIOS_GET_FIELD(entry.feautre_mask, + NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG)) + pgr_data->entry[idx].feature_mask &= + ~NVGPU_PMU_GR_FEATURE_MASK_RPPG; + } + + } + + return 0; +} + +static int get_lpwr_ms_table(struct gk20a *g) +{ + u32 *lpwr_ms_table_ptr; + u8 *entry_addr; + u32 idx; + struct nvgpu_lpwr_bios_ms_data *pms_data = + &g->perf_pmu.lpwr.lwpr_bios_data.ms; + struct nvgpu_bios_lpwr_ms_table_1x_header header = { 0 }; + struct nvgpu_bios_lpwr_ms_table_1x_entry entry = { 0 }; + + if (g->ops.bios.get_perf_table_ptrs) { + lpwr_ms_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g, + g->bios.perf_token, LOWPOWER_MS_TABLE); + if (lpwr_ms_table_ptr == NULL) + return -EINVAL; + } else + return -EINVAL; + + memcpy(&header, lpwr_ms_table_ptr, + sizeof(struct nvgpu_bios_lpwr_ms_table_1x_header)); + + if (header.entry_count >= LPWR_VBIOS_MS_ENTRY_COUNT_MAX) + return -EINVAL; + + pms_data->default_entry_idx = (u8)header.default_entry_idx; + + pms_data->idle_threshold_us = (u32)(header.idle_threshold_us * 10); + + /* Parse the LPWR MS Table entries.*/ + for (idx = 0; idx < header.entry_count; idx++) { + entry_addr = (u8 *)lpwr_ms_table_ptr + header.header_size + + (idx * header.entry_size); + + memcpy(&entry, entry_addr, + sizeof(struct nvgpu_bios_lpwr_ms_table_1x_entry)); + + if (BIOS_GET_FIELD(entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) { + pms_data->entry[idx].ms_enabled = true; + + pms_data->entry[idx].feature_mask = + NVGPU_PMU_MS_FEATURE_MASK_ALL; + + if (!BIOS_GET_FIELD(entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING)) + pms_data->entry[idx].feature_mask &= + ~NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING; + + if (!BIOS_GET_FIELD(entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR)) + pms_data->entry[idx].feature_mask &= + ~NVGPU_PMU_MS_FEATURE_MASK_SW_ASR; + + if (!BIOS_GET_FIELD(entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG)) + pms_data->entry[idx].feature_mask &= + ~NVGPU_PMU_MS_FEATURE_MASK_RPPG; + } + + pms_data->entry[idx].dynamic_current_logic = + entry.dynamic_current_logic; + + pms_data->entry[idx].dynamic_current_sram = + entry.dynamic_current_sram; + } + + return 0; +} + +u32 nvgpu_lpwr_pg_setup(struct gk20a *g) +{ + u32 err = 0; + + gk20a_dbg_fn(""); + + err = get_lpwr_gr_table(g); + if (err) + return err; + + err = get_lpwr_ms_table(g); + if (err) + return err; + + err = get_lpwr_idx_table(g); + + return err; +} + +static void nvgpu_pmu_handle_param_lpwr_msg(struct gk20a *g, + struct pmu_msg *msg, void *param, + u32 handle, u32 status) +{ + u32 *ack_status = param; + + gk20a_dbg_fn(""); + + if (status != 0) { + gk20a_err(dev_from_gk20a(g), "LWPR PARAM cmd aborted"); + return; + } + + *ack_status = 1; + + gp106_dbg_pmu("lpwr-param is acknowledged from PMU %x", + msg->msg.pg.msg_type); +} + +int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate) +{ + struct pmu_cmd cmd; + u32 seq, status = 0; + u32 payload = NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED; + struct clk_set_info *pstate_info; + u32 ack_status = 0; + + gk20a_dbg_fn(""); + + pstate_info = pstate_get_clk_set_info(g, pstate, + clkwhich_mclk); + if (!pstate_info) + return -EINVAL; + + if (pstate_info->max_mhz > + MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ) + payload |= + NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED; + + if (payload != g->perf_pmu.lpwr.mclk_change_cache) { + g->perf_pmu.lpwr.mclk_change_cache = payload; + + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_mclk_change); + cmd.cmd.pg.mclk_change.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.mclk_change.cmd_id = + PMU_PG_PARAM_CMD_MCLK_CHANGE; + cmd.cmd.pg.mclk_change.data = payload; + + gp106_dbg_pmu("cmd post MS PMU_PG_PARAM_CMD_MCLK_CHANGE"); + status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, + PMU_COMMAND_QUEUE_HPQ, + nvgpu_pmu_handle_param_lpwr_msg, &ack_status, &seq, ~0); + + pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g), + &ack_status, 1); + if (ack_status == 0) { + status = -EINVAL; + gk20a_err(dev_from_gk20a(g), "MCLK-CHANGE ACK failed"); + } + } + + return status; +} + +u32 nvgpu_lpwr_post_init(struct gk20a *g) +{ + struct pmu_cmd cmd; + u32 seq; + u32 status = 0; + u32 ack_status = 0; + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_post_init_param); + + cmd.cmd.pg.post_init.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.post_init.cmd_id = + PMU_PG_PARAM_CMD_POST_INIT; + + gp106_dbg_pmu("cmd post post-init PMU_PG_PARAM_CMD_POST_INIT"); + status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, + PMU_COMMAND_QUEUE_LPQ, + nvgpu_pmu_handle_param_lpwr_msg, &ack_status, &seq, ~0); + + pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g), + &ack_status, 1); + if (ack_status == 0) { + status = -EINVAL; + gk20a_err(dev_from_gk20a(g), "post-init ack failed"); + } + + return status; +} + +u32 nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num) +{ + struct nvgpu_lpwr_bios_ms_data *pms_data = + &g->perf_pmu.lpwr.lwpr_bios_data.ms; + struct nvgpu_lpwr_bios_idx_data *pidx_data = + &g->perf_pmu.lpwr.lwpr_bios_data.idx; + struct pstate *pstate = pstate_find(g, pstate_num); + u32 ms_idx; + + gk20a_dbg_fn(""); + + if (!pstate) + return 0; + + ms_idx = pidx_data->entry[pstate->lpwr_entry_idx].ms_idx; + if (pms_data->entry[ms_idx].ms_enabled) + return 1; + else + return 0; +} + +u32 nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num) +{ + struct nvgpu_lpwr_bios_gr_data *pgr_data = + &g->perf_pmu.lpwr.lwpr_bios_data.gr; + struct nvgpu_lpwr_bios_idx_data *pidx_data = + &g->perf_pmu.lpwr.lwpr_bios_data.idx; + struct pstate *pstate = pstate_find(g, pstate_num); + u32 idx; + + gk20a_dbg_fn(""); + + if (!pstate) + return 0; + + idx = pidx_data->entry[pstate->lpwr_entry_idx].gr_idx; + if (pgr_data->entry[idx].gr_enabled) + return 1; + else + return 0; +} + + +int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock) +{ + struct pmu_gk20a *pmu = &g->pmu; + u32 status = 0; + u32 is_mscg_supported = 0; + u32 is_rppg_supported = 0; + u32 present_pstate = 0; + + gk20a_dbg_fn(""); + + if (pstate_lock) + nvgpu_clk_arb_pstate_change_lock(g, true); + mutex_lock(&pmu->pg_mutex); + + present_pstate = nvgpu_clk_arb_get_current_pstate(g); + + is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g, + present_pstate); + if (is_mscg_supported && g->mscg_enabled) { + if (!pmu->mscg_stat) + pmu->mscg_stat = PMU_MSCG_ENABLED; + } + + is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g, + present_pstate); + if (is_rppg_supported) { + if (support_gk20a_pmu(g->dev) && g->elpg_enabled) + status = gk20a_pmu_enable_elpg(g); + } + + mutex_unlock(&pmu->pg_mutex); + if (pstate_lock) + nvgpu_clk_arb_pstate_change_lock(g, false); + + return status; +} + +int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock) +{ + struct pmu_gk20a *pmu = &g->pmu; + int status = 0; + u32 is_mscg_supported = 0; + u32 is_rppg_supported = 0; + u32 present_pstate = 0; + + gk20a_dbg_fn(""); + + if (pstate_lock) + nvgpu_clk_arb_pstate_change_lock(g, true); + mutex_lock(&pmu->pg_mutex); + + present_pstate = nvgpu_clk_arb_get_current_pstate(g); + + is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g, + present_pstate); + if (is_rppg_supported) { + if (support_gk20a_pmu(g->dev) && g->elpg_enabled) { + status = gk20a_pmu_disable_elpg(g); + if (status) + goto exit_unlock; + } + } + + is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g, + present_pstate); + if (is_mscg_supported && g->mscg_enabled) { + if (pmu->mscg_stat) + pmu->mscg_stat = PMU_MSCG_DISABLED; + } + +exit_unlock: + mutex_unlock(&pmu->pg_mutex); + if (pstate_lock) + nvgpu_clk_arb_pstate_change_lock(g, false); + + gk20a_dbg_fn("done"); + return status; +} diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.h b/drivers/gpu/nvgpu/lpwr/lpwr.h new file mode 100644 index 00000000..6b3259df --- /dev/null +++ b/drivers/gpu/nvgpu/lpwr/lpwr.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef _MSCG_H_ +#define _MSCG_H_ + +#define MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ 540 + +#define NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED BIT(0x1) +#define NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED BIT(0x3) + +#define LPWR_ENTRY_COUNT_MAX 0x06 + +#define LPWR_VBIOS_IDX_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX) + +#define LPWR_VBIOS_IDX_ENTRY_RSVD \ + (LPWR_VBIOS_IDX_ENTRY_COUNT_MAX - 1) + +#define LPWR_VBIOS_BASE_SAMPLING_PERIOD_DEFAULT (500) + +struct nvgpu_lpwr_bios_idx_entry { + u8 pcie_idx; + u8 gr_idx; + u8 ms_idx; + u8 di_idx; + u8 gc6_idx; +}; + +struct nvgpu_lpwr_bios_idx_data { + u16 base_sampling_period; + struct nvgpu_lpwr_bios_idx_entry entry[LPWR_VBIOS_IDX_ENTRY_COUNT_MAX]; +}; + +#define LPWR_VBIOS_MS_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX) + +struct nvgpu_lpwr_bios_ms_entry { + bool ms_enabled; + u32 feature_mask; + u32 asr_efficiency_thresholdl; + u16 dynamic_current_logic; + u16 dynamic_current_sram; +}; + +struct nvgpu_lpwr_bios_ms_data { + u8 default_entry_idx; + u32 idle_threshold_us; + struct nvgpu_lpwr_bios_ms_entry entry[LPWR_VBIOS_MS_ENTRY_COUNT_MAX]; +}; + +#define LPWR_VBIOS_GR_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX) + +struct nvgpu_lpwr_bios_gr_entry { + bool gr_enabled; + u32 feature_mask; +}; + +struct nvgpu_lpwr_bios_gr_data { + u8 default_entry_idx; + u32 idle_threshold_us; + u8 adaptive_gr_multiplier; + struct nvgpu_lpwr_bios_gr_entry entry[LPWR_VBIOS_GR_ENTRY_COUNT_MAX]; +}; + +struct nvgpu_lpwr_bios_data { + struct nvgpu_lpwr_bios_idx_data idx; + struct nvgpu_lpwr_bios_ms_data ms; + struct nvgpu_lpwr_bios_gr_data gr; +}; + +struct obj_lwpr { + struct nvgpu_lpwr_bios_data lwpr_bios_data; + u32 mclk_change_cache; +}; + +u32 nvgpu_lpwr_pg_setup(struct gk20a *g); +int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate); +int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock); +int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock); +u32 nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num); +u32 nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num); +u32 nvgpu_lpwr_post_init(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h index c03bf2ae..a3213f7a 100644 --- a/drivers/gpu/nvgpu/perf/perf.h +++ b/drivers/gpu/nvgpu/perf/perf.h @@ -18,6 +18,7 @@ #include "pstate/pstate.h" #include "gk20a/gk20a.h" #include "volt/volt.h" +#include "lpwr/lpwr.h" #define CTRL_PERF_VFE_VAR_TYPE_INVALID 0x00 #define CTRL_PERF_VFE_VAR_TYPE_DERIVED 0x01 @@ -57,6 +58,7 @@ struct perf_pmupstate { struct vfe_equs vfe_equobjs; struct pstates pstatesobjs; struct obj_volt volt; + struct obj_lwpr lpwr; }; u32 perf_pmu_vfe_load(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c index 2e08ef01..82e809bb 100644 --- a/drivers/gpu/nvgpu/pstate/pstate.c +++ b/drivers/gpu/nvgpu/pstate/pstate.c @@ -83,6 +83,10 @@ int gk20a_init_pstate_support(struct gk20a *g) return err; err = clk_freq_controller_sw_setup(g); + if (err) + return err; + + err = nvgpu_lpwr_pg_setup(g); return err; } @@ -327,6 +331,9 @@ static int pstate_sw_setup(struct gk20a *g) gk20a_dbg_fn(""); + init_waitqueue_head(&g->perf_pmu.pstatesobjs.pstate_notifier_wq); + mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex); + err = boardobjgrpconstruct_e32(&g->perf_pmu.pstatesobjs.super); if (err) { gk20a_err(dev_from_gk20a(g), @@ -361,7 +368,7 @@ done: return err; } -static struct pstate *pstate_find(struct gk20a *g, u32 num) +struct pstate *pstate_find(struct gk20a *g, u32 num) { struct pstates *pstates = &(g->perf_pmu.pstatesobjs); struct pstate *pstate; diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h index b6519c20..af0956e8 100644 --- a/drivers/gpu/nvgpu/pstate/pstate.h +++ b/drivers/gpu/nvgpu/pstate/pstate.h @@ -48,6 +48,9 @@ struct pstate { struct pstates { struct boardobjgrp_e32 super; u32 num_levels; + wait_queue_head_t pstate_notifier_wq; + u32 is_pstate_switch_on; + struct mutex pstate_mutex; /* protect is_pstate_switch_on */ }; int gk20a_init_pstate_support(struct gk20a *g); @@ -55,5 +58,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g); struct clk_set_info *pstate_get_clk_set_info(struct gk20a *g, u32 pstate_num, enum nv_pmu_clk_clkwhich clkwhich); +struct pstate *pstate_find(struct gk20a *g, u32 num); #endif /* __PSTATE_H__ */ -- cgit v1.2.2