gpu: nvgpu: MSCG support

- update gp106 pg engine init/list/features HALs to support MS engine - Added defines & interface for lpwr tables read from vbios. - lpwr module which reads idx/gr/ms table from vbios to map rppg/mscg support with respective p-state - lpwr module public functions to control lpwr features enable/disable mscg/rppg & mclk-change request whenever change in mclk-change parameters - lpwr public functions to know rppg/mscg support for requested pstate, - added mutex t prevent PG transition while arbiter executes pstate transition - nvgpu_clk_arb_get_current_pstate() of clk arbiter to get current pstate JIRA DNVGPU-71 Change-Id: Ifcd640cc19ef630be1e2a9ba07ec84023d8202a0 Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-on: http://git-master/r/1247553 (cherry picked from commit 8a441dea2410e1b5196ef24e56a7768b6980e46b) Reviewed-on: http://git-master/r/1270989 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Mahantesh Kumbar <mkumbar@nvidia.com> 2016-11-03 11:46:21 -0400
committer: Deepak Nibade <dnibade@nvidia.com> 2016-12-27 04:56:53 -0500
commit: e5824d8014c321fbe2c1e04e12307125dd50a472 (patch)
tree: 82657cd43c0dcd313b3251f3776e5e80b488fabc /drivers/gpu/nvgpu
parent: 62d13e613807e9bce3a9d1ef0c61725ef3a885ce (diff)
11 files changed, 691 insertions, 43 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 2b650ad8..30119345 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -63,7 +63,8 @@ nvgpu-y += \
        $(nvgpu-t18x)/therm/thrmdev.o \
        $(nvgpu-t18x)/therm/thrmchannel.o \
        $(nvgpu-t18x)/therm/thrmpmu.o \
-        $(nvgpu-t18x)/lpwr/rppg.o
+        $(nvgpu-t18x)/lpwr/rppg.o \
+        $(nvgpu-t18x)/lpwr/lpwr.o
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 3f35fac7..c440dc3b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -44,14 +44,6 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount);
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
        u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
        u32 voltuv_sram);
-static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g,
-        u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-        u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-        u32 nuvmin_sram);
-static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g,
-        u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-        u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-        u32 nuvmin_sram);
 static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
        u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
        u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
@@ -105,6 +97,7 @@ struct nvgpu_clk_arb {
        spinlock_t sessions_lock;
        spinlock_t users_lock;
+        struct mutex pstate_lock;
        struct list_head users;
        struct list_head sessions;
        struct llist_head requests;
@@ -235,6 +228,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
        g->clk_arb = arb;
        arb->g = g;
+        mutex_init(&arb->pstate_lock);
        spin_lock_init(&arb->sessions_lock);
        spin_lock_init(&arb->users_lock);
@@ -943,12 +937,23 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        /* Program clocks */
        /* A change in both mclk of gpc2clk may require a change in voltage */
-        status = nvgpu_clk_arb_change_vf_point_prefix(g, gpc2clk_target,
+        mutex_lock(&arb->pstate_lock);
-                sys2clk_target, xbar2clk_target, mclk_target, voltuv,
+        status = nvgpu_lpwr_disable_pg(g, false);
-                voltuv_sram, nuvmin, nuvmin_sram);
+        status = clk_pmu_freq_controller_load(g, false);
+        if (status < 0) {
+                arb->status = status;
+                mutex_unlock(&arb->pstate_lock);
+                /* make status visible */
+                smp_mb();
+                goto exit_arb;
+        }
+        status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
        if (status < 0) {
                arb->status = status;
+                mutex_unlock(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
                goto exit_arb;
@@ -957,20 +962,30 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
                sys2clk_target, xbar2clk_target, mclk_target, voltuv,
                voltuv_sram);
        if (status < 0) {
                arb->status = status;
+                mutex_unlock(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
                goto exit_arb;
        }
-        status = nvgpu_clk_arb_change_vf_point_postfix(g, gpc2clk_target,
+        status = clk_pmu_freq_controller_load(g, true);
-                sys2clk_target, xbar2clk_target, mclk_target, voltuv,
+        if (status < 0) {
-                voltuv_sram, nuvmin, nuvmin_sram);
+                arb->status = status;
+                mutex_unlock(&arb->pstate_lock);
+                /* make status visible */
+                smp_mb();
+                goto exit_arb;
+        }
+        status = nvgpu_lwpr_mclk_change(g, pstate);
        if (status < 0) {
                arb->status = status;
+                mutex_unlock(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
                goto exit_arb;
@@ -991,15 +1006,24 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        smp_wmb();
        xchg(&arb->actual, actual);
+        status = nvgpu_lpwr_enable_pg(g, false);
+        if (status < 0) {
+                arb->status = status;
+                mutex_unlock(&arb->pstate_lock);
+                /* make status visible */
+                smp_mb();
+                goto exit_arb;
+        }
        /* status must be visible before atomic inc */
        smp_wmb();
        atomic_inc(&arb->req_nr);
-        wake_up_interruptible(&arb->request_wq);
+        /* Unlock pstate change for PG */
+        mutex_unlock(&arb->pstate_lock);
-        if (status < 0)
+        wake_up_interruptible(&arb->request_wq);
-                gk20a_err(dev_from_gk20a(g),
-                        "Error in arbiter update");
 #ifdef CONFIG_DEBUG_FS
        g->ops.read_ptimer(g, &t1);
@@ -1036,6 +1060,9 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 #endif
 exit_arb:
+        if (status < 0)
+                gk20a_err(dev_from_gk20a(g),
+                                "Error in arbiter update");
        /* notify completion for all requests */
        head = llist_del_all(&arb->requests);
@@ -1300,6 +1327,7 @@ recalculate_vf_point:
                }
                if (index == table->mclk_num_points) {
                        mclk_vf = &table->mclk_points[index-1];
+                        index = table->mclk_num_points - 1;
                }
                index_mclk = index;
@@ -1378,28 +1406,11 @@ find_exit:
        return pstate;
 }
-static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g,
+/* This function is inherently unsafe to call while arbiter is running
-        u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
+ * arbiter must be blocked before calling this function */
-        u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
+int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
-        u32 nuvmin_sram)
-{
-        int status;
-        status = clk_pmu_freq_controller_load(g, false);
-        if (status < 0)
-                return status;
-        status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
-        return status;
-}
-static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g,
-        u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-        u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-        u32 nuvmin_sram)
 {
-        return clk_pmu_freq_controller_load(g, true);
+        return ACCESS_ONCE(g->clk_arb->actual->pstate);
 }
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
@@ -1456,6 +1467,17 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
        return 0;
 }
+void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
+{
+        struct nvgpu_clk_arb *arb = g->clk_arb;
+        if (lock)
+                mutex_lock(&arb->pstate_lock);
+        else
+                mutex_unlock(&arb->pstate_lock);
+}
 #ifdef CONFIG_DEBUG_FS
 static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
 {
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
index 8355dac5..700804b3 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.h
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -62,5 +62,10 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
        struct nvgpu_clk_session *session, int *event_fd);
 void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g);
+int nvgpu_clk_arb_get_current_pstate(struct gk20a *g);
+void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
 #endif /* _CLK_ARB_H_ */
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index 6f5e71eb..eecd7351 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -24,6 +24,7 @@
 #include "clk/clk_mclk.h"
 #include "hw_mc_gp106.h"
 #include "hw_pwr_gp106.h"
+#include "lpwr/lpwr.h"
 #include "lpwr/rppg.h"
 #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
@@ -180,12 +181,16 @@ static u32 gp106_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id)
        if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
                return PMU_PG_FEATURE_GR_RPPG_ENABLED;
+        if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                return NVGPU_PMU_MS_FEATURE_MASK_ALL;
        return 0;
 }
 static u32 gp106_pmu_pg_engines_list(struct gk20a *g)
 {
-        return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
+        return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS) |
+                        BIT(PMU_PG_ELPG_ENGINE_ID_MS);
 }
 static void pmu_handle_param_msg(struct gk20a *g, struct pmu_msg *msg,
@@ -231,6 +236,23 @@ static int gp106_pg_param_init(struct gk20a *g, u32 pg_engine_id)
                gp106_dbg_pmu("cmd post GR PMU_PG_CMD_ID_PG_PARAM");
                gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
                                pmu_handle_param_msg, pmu, &seq, ~0);
+        } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) {
+                cmd.hdr.unit_id = PMU_UNIT_PG;
+                cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                        sizeof(struct pmu_pg_cmd_ms_init_param);
+                cmd.cmd.pg.ms_init_param.cmd_type =
+                        PMU_PG_CMD_ID_PG_PARAM;
+                cmd.cmd.pg.ms_init_param.cmd_id =
+                        PMU_PG_PARAM_CMD_MS_INIT_PARAM;
+                cmd.cmd.pg.ms_init_param.support_mask =
+                        NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING |
+                        NVGPU_PMU_MS_FEATURE_MASK_SW_ASR |
+                        NVGPU_PMU_MS_FEATURE_MASK_RPPG |
+                        NVGPU_PMU_MS_FEATURE_MASK_FB_TRAINING;
+                gp106_dbg_pmu("cmd post MS PMU_PG_CMD_ID_PG_PARAM");
+                gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+                        pmu_handle_param_msg, pmu, &seq, ~0);
        }
        return 0;
@@ -261,6 +283,9 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
        gops->pmu.pmu_pg_init_param = gp106_pg_param_init;
        gops->pmu.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list;
        gops->pmu.pmu_pg_engines_feature_list = gp106_pmu_pg_feature_list;
+        gops->pmu.pmu_lpwr_enable_pg = nvgpu_lpwr_enable_pg;
+        gops->pmu.pmu_lpwr_disable_pg = nvgpu_lpwr_disable_pg;
+        gops->pmu.pmu_pg_param_post_init = nvgpu_lpwr_post_init;
        gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
        gops->pmu.dump_secure_fuses = NULL;
        gops->pmu.reset = gp106_falcon_reset;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index cd9cd0b0..9274990a 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -482,6 +482,9 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
        gops->pmu.pmu_pg_init_param = gp10b_pg_gr_init;
        gops->pmu.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list;
        gops->pmu.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list;
+        gops->pmu.pmu_lpwr_enable_pg = NULL;
+        gops->pmu.pmu_lpwr_disable_pg = NULL;
+        gops->pmu.pmu_pg_param_post_init = NULL;
        gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd =
                        send_ecc_overide_en_dis_cmd;
        gops->pmu.reset = gk20a_pmu_reset;
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index f3939d14..097e90ec 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -925,4 +925,68 @@ struct vbios_fct_1x_entry {
 #define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_MASK GENMASK(31, 16)
 #define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_SHIFT 16
+/* LPWR Index Table */
+struct nvgpu_bios_lpwr_idx_table_1x_header {
+        u8 version;
+        u8 header_size;
+        u8 entry_size;
+        u8 entry_count;
+        u16 base_sampling_period;
+} __packed;
+struct nvgpu_bios_lpwr_idx_table_1x_entry {
+        u8 pcie_idx;
+        u8 gr_idx;
+        u8 ms_idx;
+        u8 di_idx;
+        u8 gc6_idx;
+} __packed;
+/* LPWR MS Table*/
+struct nvgpu_bios_lpwr_ms_table_1x_header {
+        u8 version;
+        u8 header_size;
+        u8 entry_size;
+        u8 entry_count;
+        u8 default_entry_idx;
+        u16 idle_threshold_us;
+} __packed;
+struct nvgpu_bios_lpwr_ms_table_1x_entry {
+        u32 feautre_mask;
+        u16 dynamic_current_logic;
+        u16 dynamic_current_sram;
+} __packed;
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_MASK    GENMASK(0, 0)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SHIFT    0
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR_MASK    GENMASK(2, 2)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR_SHIFT    2
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING_MASK    \
+                        GENMASK(3, 3)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING_SHIFT    3
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG_MASK    GENMASK(5, 5)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG_SHIFT    5
+/* LPWR GR Table */
+struct nvgpu_bios_lpwr_gr_table_1x_header {
+        u8 version;
+        u8 header_size;
+        u8 entry_size;
+        u8 entry_count;
+        u8 default_entry_idx;
+        u16 idle_threshold_us;
+        u8 adaptive_gr_multiplier;
+} __packed;
+struct nvgpu_bios_lpwr_gr_table_1x_entry {
+        u32 feautre_mask;
+} __packed;
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_MASK GENMASK(0, 0)
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_SHIFT 0
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG_MASK GENMASK(4, 4)
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG_SHIFT 4
 #endif
diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.c b/drivers/gpu/nvgpu/lpwr/lpwr.c
new file mode 100644
index 00000000..4f8d2eec
--- /dev/null
+++ b/drivers/gpu/nvgpu/lpwr/lpwr.c
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gp106/pmu_gp106.h"
+#include "gk20a/pmu_api.h"
+#include "gm206/bios_gm206.h"
+#include "pstate/pstate.h"
+#include "include/bios.h"
+#include "perf/perf.h"
+#include "lpwr.h"
+static int get_lpwr_idx_table(struct gk20a *g)
+{
+        u32 *lpwr_idx_table_ptr;
+        u8 *entry_addr;
+        u32 idx;
+        struct nvgpu_lpwr_bios_idx_data *pidx_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.idx;
+        struct nvgpu_bios_lpwr_idx_table_1x_header header = { 0 };
+        struct nvgpu_bios_lpwr_idx_table_1x_entry entry = { 0 };
+        if (g->ops.bios.get_perf_table_ptrs) {
+                lpwr_idx_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g,
+                        g->bios.perf_token, LOWPOWER_TABLE);
+                if (lpwr_idx_table_ptr == NULL)
+                        return -EINVAL;
+        } else
+                 return -EINVAL;
+        memcpy(&header, lpwr_idx_table_ptr,
+                sizeof(struct nvgpu_bios_lpwr_idx_table_1x_header));
+        if (header.entry_count >= LPWR_VBIOS_IDX_ENTRY_COUNT_MAX)
+                return -EINVAL;
+        pidx_data->base_sampling_period = (u16)header.base_sampling_period;
+        /* Parse the LPWR Index Table entries.*/
+        for (idx = 0; idx < header.entry_count; idx++) {
+                entry_addr = (u8 *)lpwr_idx_table_ptr + header.header_size +
+                        (idx * header.entry_size);
+                memcpy(&entry, entry_addr,
+                        sizeof(struct nvgpu_bios_lpwr_idx_table_1x_entry));
+                pidx_data->entry[idx].pcie_idx = entry.pcie_idx;
+                pidx_data->entry[idx].gr_idx = entry.gr_idx;
+                pidx_data->entry[idx].ms_idx = entry.ms_idx;
+                pidx_data->entry[idx].di_idx = entry.di_idx;
+                pidx_data->entry[idx].gc6_idx = entry.gc6_idx;
+        }
+        return 0;
+}
+static int get_lpwr_gr_table(struct gk20a *g)
+{
+        u32 *lpwr_gr_table_ptr;
+        u8 *entry_addr;
+        u32 idx;
+        struct nvgpu_lpwr_bios_gr_data *pgr_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.gr;
+        struct nvgpu_bios_lpwr_gr_table_1x_header header = { 0 };
+        struct nvgpu_bios_lpwr_gr_table_1x_entry entry = { 0 };
+        if (g->ops.bios.get_perf_table_ptrs) {
+                lpwr_gr_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g,
+                        g->bios.perf_token, LOWPOWER_GR_TABLE);
+                if (lpwr_gr_table_ptr == NULL)
+                        return -EINVAL;
+        } else
+                 return -EINVAL;
+        memcpy(&header, lpwr_gr_table_ptr,
+                sizeof(struct nvgpu_bios_lpwr_gr_table_1x_header));
+        /* Parse the LPWR Index Table entries.*/
+        for (idx = 0; idx < header.entry_count; idx++) {
+                entry_addr = (u8 *)lpwr_gr_table_ptr + header.header_size +
+                        (idx * header.entry_size);
+                memcpy(&entry, entry_addr,
+                        sizeof(struct nvgpu_bios_lpwr_gr_table_1x_entry));
+                if (BIOS_GET_FIELD(entry.feautre_mask,
+                        NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) {
+                        pgr_data->entry[idx].gr_enabled = true;
+                        pgr_data->entry[idx].feature_mask =
+                                NVGPU_PMU_GR_FEATURE_MASK_ALL;
+                        if (!BIOS_GET_FIELD(entry.feautre_mask,
+                                NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG))
+                                pgr_data->entry[idx].feature_mask &=
+                                        ~NVGPU_PMU_GR_FEATURE_MASK_RPPG;
+                }
+        }
+        return 0;
+}
+static int get_lpwr_ms_table(struct gk20a *g)
+{
+        u32 *lpwr_ms_table_ptr;
+        u8 *entry_addr;
+        u32 idx;
+        struct nvgpu_lpwr_bios_ms_data *pms_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.ms;
+        struct nvgpu_bios_lpwr_ms_table_1x_header header = { 0 };
+        struct nvgpu_bios_lpwr_ms_table_1x_entry entry = { 0 };
+        if (g->ops.bios.get_perf_table_ptrs) {
+                lpwr_ms_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g,
+                        g->bios.perf_token, LOWPOWER_MS_TABLE);
+                if (lpwr_ms_table_ptr == NULL)
+                        return -EINVAL;
+        } else
+                 return -EINVAL;
+        memcpy(&header, lpwr_ms_table_ptr,
+                sizeof(struct nvgpu_bios_lpwr_ms_table_1x_header));
+        if (header.entry_count >= LPWR_VBIOS_MS_ENTRY_COUNT_MAX)
+                return -EINVAL;
+        pms_data->default_entry_idx = (u8)header.default_entry_idx;
+        pms_data->idle_threshold_us = (u32)(header.idle_threshold_us * 10);
+        /* Parse the LPWR MS Table entries.*/
+        for (idx = 0; idx < header.entry_count; idx++) {
+                entry_addr = (u8 *)lpwr_ms_table_ptr + header.header_size +
+                        (idx * header.entry_size);
+                memcpy(&entry, entry_addr,
+                        sizeof(struct nvgpu_bios_lpwr_ms_table_1x_entry));
+                if (BIOS_GET_FIELD(entry.feautre_mask,
+                        NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) {
+                        pms_data->entry[idx].ms_enabled = true;
+                        pms_data->entry[idx].feature_mask =
+                                NVGPU_PMU_MS_FEATURE_MASK_ALL;
+                        if (!BIOS_GET_FIELD(entry.feautre_mask,
+                                NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING))
+                                pms_data->entry[idx].feature_mask &=
+                                        ~NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING;
+                        if (!BIOS_GET_FIELD(entry.feautre_mask,
+                                NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR))
+                                pms_data->entry[idx].feature_mask &=
+                                        ~NVGPU_PMU_MS_FEATURE_MASK_SW_ASR;
+                        if (!BIOS_GET_FIELD(entry.feautre_mask,
+                                NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG))
+                                pms_data->entry[idx].feature_mask &=
+                                        ~NVGPU_PMU_MS_FEATURE_MASK_RPPG;
+                }
+                pms_data->entry[idx].dynamic_current_logic =
+                                entry.dynamic_current_logic;
+                pms_data->entry[idx].dynamic_current_sram =
+                                entry.dynamic_current_sram;
+        }
+        return 0;
+}
+u32 nvgpu_lpwr_pg_setup(struct gk20a *g)
+{
+        u32 err = 0;
+        gk20a_dbg_fn("");
+        err = get_lpwr_gr_table(g);
+        if (err)
+                return err;
+        err = get_lpwr_ms_table(g);
+        if (err)
+                return err;
+        err = get_lpwr_idx_table(g);
+        return err;
+}
+static void nvgpu_pmu_handle_param_lpwr_msg(struct gk20a *g,
+                struct pmu_msg *msg, void *param,
+                u32 handle, u32 status)
+{
+        u32 *ack_status = param;
+        gk20a_dbg_fn("");
+        if (status != 0) {
+                gk20a_err(dev_from_gk20a(g), "LWPR PARAM cmd aborted");
+                return;
+        }
+        *ack_status = 1;
+        gp106_dbg_pmu("lpwr-param is acknowledged from PMU %x",
+                        msg->msg.pg.msg_type);
+}
+int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate)
+{
+        struct pmu_cmd cmd;
+        u32 seq, status = 0;
+        u32 payload = NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED;
+        struct clk_set_info *pstate_info;
+        u32 ack_status = 0;
+        gk20a_dbg_fn("");
+        pstate_info = pstate_get_clk_set_info(g, pstate,
+                        clkwhich_mclk);
+        if (!pstate_info)
+                return -EINVAL;
+        if (pstate_info->max_mhz >
+                        MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ)
+                payload |=
+                        NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED;
+        if (payload != g->perf_pmu.lpwr.mclk_change_cache) {
+                g->perf_pmu.lpwr.mclk_change_cache = payload;
+                cmd.hdr.unit_id = PMU_UNIT_PG;
+                cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                        sizeof(struct pmu_pg_cmd_mclk_change);
+                cmd.cmd.pg.mclk_change.cmd_type =
+                        PMU_PG_CMD_ID_PG_PARAM;
+                cmd.cmd.pg.mclk_change.cmd_id =
+                        PMU_PG_PARAM_CMD_MCLK_CHANGE;
+                cmd.cmd.pg.mclk_change.data = payload;
+                gp106_dbg_pmu("cmd post MS PMU_PG_PARAM_CMD_MCLK_CHANGE");
+                status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL,
+                        PMU_COMMAND_QUEUE_HPQ,
+                        nvgpu_pmu_handle_param_lpwr_msg, &ack_status, &seq, ~0);
+                pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g),
+                        &ack_status, 1);
+                if (ack_status == 0) {
+                        status = -EINVAL;
+                        gk20a_err(dev_from_gk20a(g), "MCLK-CHANGE ACK failed");
+                }
+        }
+        return status;
+}
+u32 nvgpu_lpwr_post_init(struct gk20a *g)
+{
+        struct pmu_cmd cmd;
+        u32 seq;
+        u32 status = 0;
+        u32 ack_status = 0;
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size   = PMU_CMD_HDR_SIZE +
+                sizeof(struct pmu_pg_cmd_post_init_param);
+        cmd.cmd.pg.post_init.cmd_type =
+                PMU_PG_CMD_ID_PG_PARAM;
+        cmd.cmd.pg.post_init.cmd_id =
+                PMU_PG_PARAM_CMD_POST_INIT;
+        gp106_dbg_pmu("cmd post post-init PMU_PG_PARAM_CMD_POST_INIT");
+        status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL,
+                PMU_COMMAND_QUEUE_LPQ,
+                nvgpu_pmu_handle_param_lpwr_msg, &ack_status, &seq, ~0);
+        pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g),
+                &ack_status, 1);
+        if (ack_status == 0) {
+                status = -EINVAL;
+                gk20a_err(dev_from_gk20a(g), "post-init ack failed");
+        }
+        return status;
+}
+u32 nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num)
+{
+        struct nvgpu_lpwr_bios_ms_data *pms_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.ms;
+        struct nvgpu_lpwr_bios_idx_data *pidx_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.idx;
+        struct pstate *pstate = pstate_find(g, pstate_num);
+        u32 ms_idx;
+        gk20a_dbg_fn("");
+        if (!pstate)
+                return 0;
+        ms_idx = pidx_data->entry[pstate->lpwr_entry_idx].ms_idx;
+        if (pms_data->entry[ms_idx].ms_enabled)
+                return 1;
+        else
+                return 0;
+}
+u32 nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num)
+{
+        struct nvgpu_lpwr_bios_gr_data *pgr_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.gr;
+        struct nvgpu_lpwr_bios_idx_data *pidx_data =
+                        &g->perf_pmu.lpwr.lwpr_bios_data.idx;
+        struct pstate *pstate = pstate_find(g, pstate_num);
+        u32 idx;
+        gk20a_dbg_fn("");
+        if (!pstate)
+                return 0;
+        idx = pidx_data->entry[pstate->lpwr_entry_idx].gr_idx;
+        if (pgr_data->entry[idx].gr_enabled)
+                return 1;
+        else
+                return 0;
+}
+int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock)
+{
+        struct pmu_gk20a *pmu = &g->pmu;
+        u32  status = 0;
+        u32 is_mscg_supported = 0;
+        u32 is_rppg_supported = 0;
+        u32 present_pstate = 0;
+        gk20a_dbg_fn("");
+        if (pstate_lock)
+                nvgpu_clk_arb_pstate_change_lock(g, true);
+        mutex_lock(&pmu->pg_mutex);
+        present_pstate = nvgpu_clk_arb_get_current_pstate(g);
+        is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g,
+                        present_pstate);
+        if (is_mscg_supported && g->mscg_enabled) {
+                if (!pmu->mscg_stat)
+                        pmu->mscg_stat = PMU_MSCG_ENABLED;
+        }
+        is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g,
+                        present_pstate);
+        if (is_rppg_supported) {
+                if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
+                        status = gk20a_pmu_enable_elpg(g);
+        }
+        mutex_unlock(&pmu->pg_mutex);
+        if (pstate_lock)
+                nvgpu_clk_arb_pstate_change_lock(g, false);
+        return status;
+}
+int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock)
+{
+        struct pmu_gk20a *pmu = &g->pmu;
+        int status = 0;
+        u32 is_mscg_supported = 0;
+        u32 is_rppg_supported = 0;
+        u32 present_pstate = 0;
+        gk20a_dbg_fn("");
+        if (pstate_lock)
+                nvgpu_clk_arb_pstate_change_lock(g, true);
+        mutex_lock(&pmu->pg_mutex);
+        present_pstate = nvgpu_clk_arb_get_current_pstate(g);
+        is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g,
+                        present_pstate);
+        if (is_rppg_supported) {
+                if (support_gk20a_pmu(g->dev) && g->elpg_enabled) {
+                        status = gk20a_pmu_disable_elpg(g);
+                        if (status)
+                                goto exit_unlock;
+                }
+        }
+        is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g,
+                        present_pstate);
+        if (is_mscg_supported && g->mscg_enabled) {
+                if (pmu->mscg_stat)
+                        pmu->mscg_stat = PMU_MSCG_DISABLED;
+        }
+exit_unlock:
+        mutex_unlock(&pmu->pg_mutex);
+        if (pstate_lock)
+                nvgpu_clk_arb_pstate_change_lock(g, false);
+        gk20a_dbg_fn("done");
+        return status;
+}
diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.h b/drivers/gpu/nvgpu/lpwr/lpwr.h
new file mode 100644
index 00000000..6b3259df
--- /dev/null
+++ b/drivers/gpu/nvgpu/lpwr/lpwr.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _MSCG_H_
+#define _MSCG_H_
+#define MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ   540
+#define NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED BIT(0x1)
+#define NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED BIT(0x3)
+#define LPWR_ENTRY_COUNT_MAX 0x06
+#define LPWR_VBIOS_IDX_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX)
+#define LPWR_VBIOS_IDX_ENTRY_RSVD \
+        (LPWR_VBIOS_IDX_ENTRY_COUNT_MAX - 1)
+#define LPWR_VBIOS_BASE_SAMPLING_PERIOD_DEFAULT    (500)
+struct nvgpu_lpwr_bios_idx_entry {
+        u8 pcie_idx;
+        u8 gr_idx;
+        u8 ms_idx;
+        u8 di_idx;
+        u8 gc6_idx;
+};
+struct nvgpu_lpwr_bios_idx_data {
+        u16 base_sampling_period;
+        struct nvgpu_lpwr_bios_idx_entry entry[LPWR_VBIOS_IDX_ENTRY_COUNT_MAX];
+};
+#define LPWR_VBIOS_MS_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX)
+struct nvgpu_lpwr_bios_ms_entry {
+        bool ms_enabled;
+        u32 feature_mask;
+        u32 asr_efficiency_thresholdl;
+        u16 dynamic_current_logic;
+        u16 dynamic_current_sram;
+};
+struct nvgpu_lpwr_bios_ms_data {
+        u8 default_entry_idx;
+        u32 idle_threshold_us;
+        struct nvgpu_lpwr_bios_ms_entry entry[LPWR_VBIOS_MS_ENTRY_COUNT_MAX];
+};
+#define LPWR_VBIOS_GR_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX)
+struct nvgpu_lpwr_bios_gr_entry {
+        bool  gr_enabled;
+        u32   feature_mask;
+};
+struct nvgpu_lpwr_bios_gr_data {
+        u8 default_entry_idx;
+        u32 idle_threshold_us;
+        u8 adaptive_gr_multiplier;
+        struct nvgpu_lpwr_bios_gr_entry  entry[LPWR_VBIOS_GR_ENTRY_COUNT_MAX];
+};
+struct nvgpu_lpwr_bios_data {
+        struct nvgpu_lpwr_bios_idx_data idx;
+        struct nvgpu_lpwr_bios_ms_data ms;
+        struct nvgpu_lpwr_bios_gr_data gr;
+};
+struct obj_lwpr {
+        struct nvgpu_lpwr_bios_data lwpr_bios_data;
+        u32 mclk_change_cache;
+};
+u32 nvgpu_lpwr_pg_setup(struct gk20a *g);
+int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate);
+int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock);
+int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock);
+u32 nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num);
+u32 nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num);
+u32 nvgpu_lpwr_post_init(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h
index c03bf2ae..a3213f7a 100644
--- a/drivers/gpu/nvgpu/perf/perf.h
+++ b/drivers/gpu/nvgpu/perf/perf.h
@@ -18,6 +18,7 @@
 #include "pstate/pstate.h"
 #include "gk20a/gk20a.h"
 #include "volt/volt.h"
+#include "lpwr/lpwr.h"
 #define CTRL_PERF_VFE_VAR_TYPE_INVALID                               0x00
 #define CTRL_PERF_VFE_VAR_TYPE_DERIVED                               0x01
@@ -57,6 +58,7 @@ struct perf_pmupstate {
        struct vfe_equs vfe_equobjs;
        struct pstates pstatesobjs;
        struct obj_volt volt;
+        struct obj_lwpr lpwr;
 };
 u32 perf_pmu_vfe_load(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index 2e08ef01..82e809bb 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -83,6 +83,10 @@ int gk20a_init_pstate_support(struct gk20a *g)
                return err;
        err = clk_freq_controller_sw_setup(g);
+        if (err)
+                return err;
+        err = nvgpu_lpwr_pg_setup(g);
        return err;
 }
@@ -327,6 +331,9 @@ static int pstate_sw_setup(struct gk20a *g)
        gk20a_dbg_fn("");
+        init_waitqueue_head(&g->perf_pmu.pstatesobjs.pstate_notifier_wq);
+        mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex);
        err = boardobjgrpconstruct_e32(&g->perf_pmu.pstatesobjs.super);
        if (err) {
                gk20a_err(dev_from_gk20a(g),
@@ -361,7 +368,7 @@ done:
        return err;
 }
-static struct pstate *pstate_find(struct gk20a *g, u32 num)
+struct pstate *pstate_find(struct gk20a *g, u32 num)
 {
        struct pstates *pstates = &(g->perf_pmu.pstatesobjs);
        struct pstate *pstate;
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index b6519c20..af0956e8 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -48,6 +48,9 @@ struct pstate {
 struct pstates {
        struct boardobjgrp_e32 super;
        u32  num_levels;
+        wait_queue_head_t pstate_notifier_wq;
+        u32 is_pstate_switch_on;
+        struct mutex pstate_mutex; /* protect is_pstate_switch_on */
 };
 int gk20a_init_pstate_support(struct gk20a *g);
@@ -55,5 +58,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g);
 struct clk_set_info *pstate_get_clk_set_info(struct gk20a *g, u32 pstate_num,
                enum nv_pmu_clk_clkwhich clkwhich);
+struct pstate *pstate_find(struct gk20a *g, u32 num);
 #endif /* __PSTATE_H__ */
author	Mahantesh Kumbar <mkumbar@nvidia.com>	2016-11-03 11:46:21 -0400
committer	Deepak Nibade <dnibade@nvidia.com>	2016-12-27 04:56:53 -0500
commit	e5824d8014c321fbe2c1e04e12307125dd50a472 (patch)
tree	82657cd43c0dcd313b3251f3776e5e80b488fabc /drivers/gpu/nvgpu
parent	62d13e613807e9bce3a9d1ef0c61725ef3a885ce (diff)