From 77e2cbab237637f71367df25384164b8c936a31a Mon Sep 17 00:00:00 2001 From: Mahantesh Kumbar Date: Tue, 13 Jun 2017 15:21:56 +0530 Subject: gpu: nvgpu: reorganize PMU perfmon -Moved perfmon code from pmu_gk20a.c to "drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c" file -Moved below related methods perfmon init, start/stop sampling, load counter read/write/reset, perfmon event handler - prepend with nvgpu_ for perfmon global methods by replacing gk20a_ JURA NVGPU-56 JURA NVGPU-98 Change-Id: Idbcdf63ebd76da170e609cc401b320a42110cd7b Signed-off-by: Mahantesh Kumbar Reviewed-on: http://git-master/r/1501418 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile.nvgpu | 1 + drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c | 271 +++++++++++++++++ drivers/gpu/nvgpu/gk20a/gk20a_scale.c | 6 +- drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 8 +- drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 322 +++------------------ drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 13 +- drivers/gpu/nvgpu/include/nvgpu/pmu.h | 10 +- .../nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h | 1 + .../gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c | 2 +- .../gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c | 2 +- 10 files changed, 341 insertions(+), 295 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 72c2f8d5..ac58d512 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -62,6 +62,7 @@ nvgpu-y := \ common/pmu/pmu_ipc.o \ common/pmu/pmu_fw.o \ common/pmu/pmu_pg.o \ + common/pmu/pmu_perfmon.o \ gk20a/gk20a.o \ gk20a/bus_gk20a.o \ gk20a/pramin_gk20a.o \ diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c new file mode 100644 index 00000000..e28e53a0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + +#include "gk20a/gk20a.h" + +#ifdef CONFIG_TEGRA_19x_GPU +#include "nvgpu_gpuid_t19x.h" +#endif + +static u8 get_perfmon_id(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; + u8 unit_id; + + switch (ver) { + case GK20A_GPUID_GK20A: + case GK20A_GPUID_GM20B: + unit_id = PMU_UNIT_PERFMON; + break; + case NVGPU_GPUID_GP10B: + case NVGPU_GPUID_GP104: + case NVGPU_GPUID_GP106: + unit_id = PMU_UNIT_PERFMON_T18X; + break; +#if defined(CONFIG_TEGRA_19x_GPU) + case TEGRA_19x_GPUID: + unit_id = PMU_UNIT_PERFMON_T18X; + break; +#endif + default: + unit_id = PMU_UNIT_INVALID; + nvgpu_err(g, "no support for %x", ver); + WARN_ON(1); + } + + return unit_id; +} + +int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + struct pmu_v *pv = &g->ops.pmu_ver; + struct pmu_cmd cmd; + struct pmu_payload payload; + u32 seq; + + nvgpu_log_fn(g, " "); + + pmu->perfmon_ready = 0; + + gk20a_pmu_init_perfmon_counter(g); + + if (!pmu->sample_buffer) + pmu->sample_buffer = nvgpu_alloc(&pmu->dmem, + 2 * sizeof(u16)); + if (!pmu->sample_buffer) { + nvgpu_err(g, "failed to allocate perfmon sample buffer"); + return -ENOMEM; + } + + /* init PERFMON */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + + cmd.hdr.unit_id = get_perfmon_id(pmu); + if (cmd.hdr.unit_id == PMU_UNIT_INVALID) { + nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped"); + return -EINVAL; + } + + cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); + cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; + /* buffer to save counter values for pmu perfmon */ + pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, + (u16)pmu->sample_buffer); + /* number of sample periods below lower threshold + * before pmu triggers perfmon decrease event + * TBD: = 15 + */ + pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); + /* index of base counter, aka. always ticking counter */ + pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); + /* microseconds interval between pmu polls perf counters */ + pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); + /* number of perfmon counters + * counter #3 (GR and CE2) for gk20a + */ + pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); + /* moving average window for sample periods + * TBD: = 3000000 / sample_period_us = 17 + */ + pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); + + memset(&payload, 0, sizeof(struct pmu_payload)); + payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); + payload.in.size = pv->get_perfmon_cntr_sz(pmu); + payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); + + nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_INIT"); + gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL, &seq, ~0); + + return 0; +} + +int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + struct pmu_v *pv = &g->ops.pmu_ver; + struct pmu_cmd cmd; + struct pmu_payload payload; + u32 seq; + + /* PERFMON Start */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = get_perfmon_id(pmu); + if (cmd.hdr.unit_id == PMU_UNIT_INVALID) { + nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped"); + return -EINVAL; + } + cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); + pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, + PMU_PERFMON_CMD_ID_START); + pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, + PMU_DOMAIN_GROUP_PSTATE); + pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, + pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); + + pv->perfmon_start_set_flags(&cmd.cmd.perfmon, + PMU_PERFMON_FLAG_ENABLE_INCREASE | + PMU_PERFMON_FLAG_ENABLE_DECREASE | + PMU_PERFMON_FLAG_CLEAR_PREV); + + memset(&payload, 0, sizeof(struct pmu_payload)); + + /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ + pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */ + /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ + pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */ + pv->set_perfmon_cntr_valid(pmu, true); + + payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); + payload.in.size = pv->get_perfmon_cntr_sz(pmu); + payload.in.offset = + pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); + + nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_START"); + gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL, &seq, ~0); + + return 0; +} + +int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + struct pmu_cmd cmd; + u32 seq; + + /* PERFMON Stop */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = get_perfmon_id(pmu); + if (cmd.hdr.unit_id == PMU_UNIT_INVALID) { + nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped"); + return -EINVAL; + } + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); + cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; + + nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_STOP"); + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL, &seq, ~0); + return 0; +} + +int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load) +{ + *load = g->pmu.load_shadow; + return 0; +} + +int nvgpu_pmu_load_update(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = &g->pmu; + u16 load = 0; + + if (!pmu->perfmon_ready) { + pmu->load_shadow = 0; + return 0; + } + + pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&load, 2, 0); + pmu->load_shadow = load / 10; + pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); + + return 0; +} + +void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, + u32 *total_cycles) +{ + if (!g->power_on || gk20a_busy(g)) { + *busy_cycles = 0; + *total_cycles = 0; + return; + } + + *busy_cycles = gk20a_pmu_read_idle_counter(g, 1); + *total_cycles = gk20a_pmu_read_idle_counter(g, 2); + + gk20a_idle(g); +} + +void nvgpu_pmu_reset_load_counters(struct gk20a *g) +{ + if (!g->power_on || gk20a_busy(g)) + return; + + gk20a_pmu_reset_idle_counter(g, 2); + gk20a_pmu_reset_idle_counter(g, 1); + + gk20a_idle(g); +} + +int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, + struct pmu_perfmon_msg *msg) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + + nvgpu_log_fn(g, " "); + + switch (msg->msg_type) { + case PMU_PERFMON_MSG_ID_INCREASE_EVENT: + nvgpu_pmu_dbg(g, "perfmon increase event: "); + nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d", + msg->gen.state_id, msg->gen.group_id, msg->gen.data); + (pmu->perfmon_events_cnt)++; + break; + case PMU_PERFMON_MSG_ID_DECREASE_EVENT: + nvgpu_pmu_dbg(g, "perfmon decrease event: "); + nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d", + msg->gen.state_id, msg->gen.group_id, msg->gen.data); + (pmu->perfmon_events_cnt)++; + break; + case PMU_PERFMON_MSG_ID_INIT_EVENT: + pmu->perfmon_ready = 1; + nvgpu_pmu_dbg(g, "perfmon init event"); + break; + default: + break; + } + + /* restart sampling */ + if (pmu->perfmon_sampling_enabled) + return nvgpu_pmu_perfmon_start_sampling(pmu); + return 0; +} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c index c23cdcba..160776bc 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c @@ -93,7 +93,7 @@ int gk20a_scale_qos_notify(struct notifier_block *nb, /* Update gpu load because we may scale the emc target * if the gpu load changed. */ - gk20a_pmu_load_update(g); + nvgpu_pmu_load_update(g); platform->postscale(profile->dev, freq); return NOTIFY_OK; @@ -223,7 +223,7 @@ static void update_load_estimate_gpmu(struct device *dev) profile->dev_stat.total_time = dt; profile->last_event_time = t; - gk20a_pmu_load_norm(g, &busy_time); + nvgpu_pmu_load_norm(g, &busy_time); profile->dev_stat.busy_time = (busy_time * dt) / 1000; } @@ -310,7 +310,7 @@ static int gk20a_scale_get_dev_status(struct device *dev, struct gk20a_platform *platform = dev_get_drvdata(dev); /* update the software shadow */ - gk20a_pmu_load_update(g); + nvgpu_pmu_load_update(g); /* inform edp about new constraint */ if (platform->prescale) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index 8c1dbd37..1933eed5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c @@ -386,7 +386,7 @@ static ssize_t counters_show(struct device *dev, u32 busy_cycles, total_cycles; ssize_t res; - gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles); + nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); @@ -400,7 +400,7 @@ static ssize_t counters_show_reset(struct device *dev, ssize_t res = counters_show(dev, attr, buf); struct gk20a *g = get_gk20a(dev); - gk20a_pmu_reset_load_counters(g); + nvgpu_pmu_reset_load_counters(g); return res; } @@ -422,8 +422,8 @@ static ssize_t gk20a_load_show(struct device *dev, if (err) return err; - gk20a_pmu_load_update(g); - gk20a_pmu_load_norm(g, &busy_time); + nvgpu_pmu_load_update(g); + nvgpu_pmu_load_norm(g, &busy_time); gk20a_idle(g); } diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 32303c6e..2f8e456f 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -802,135 +802,6 @@ void gk20a_init_pmu_ops(struct gpu_ops *gops) gops->pmu.reset = gk20a_pmu_reset; } -static u8 get_perfmon_id(struct nvgpu_pmu *pmu) -{ - struct gk20a *g = gk20a_from_pmu(pmu); - u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; - u8 unit_id; - - switch (ver) { - case GK20A_GPUID_GK20A: - case GK20A_GPUID_GM20B: - unit_id = PMU_UNIT_PERFMON; - break; - case NVGPU_GPUID_GP10B: - case NVGPU_GPUID_GP104: - case NVGPU_GPUID_GP106: - unit_id = PMU_UNIT_PERFMON_T18X; - break; -#if defined(CONFIG_TEGRA_19x_GPU) - case TEGRA_19x_GPUID: - unit_id = PMU_UNIT_PERFMON_T18X; - break; -#endif - default: - nvgpu_err(g, "no support for %x", ver); - BUG(); - } - - return unit_id; -} - -int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) -{ - struct gk20a *g = gk20a_from_pmu(pmu); - struct pmu_v *pv = &g->ops.pmu_ver; - struct pmu_cmd cmd; - struct pmu_payload payload; - u32 seq; - u32 data; - - gk20a_dbg_fn(""); - - pmu->perfmon_ready = 0; - - /* use counter #3 for GR && CE2 busy cycles */ - gk20a_writel(g, pwr_pmu_idle_mask_r(3), - pwr_pmu_idle_mask_gr_enabled_f() | - pwr_pmu_idle_mask_ce_2_enabled_f()); - - /* disable idle filtering for counters 3 and 6 */ - data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); - data = set_field(data, pwr_pmu_idle_ctrl_value_m() | - pwr_pmu_idle_ctrl_filter_m(), - pwr_pmu_idle_ctrl_value_busy_f() | - pwr_pmu_idle_ctrl_filter_disabled_f()); - gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); - - /* use counter #6 for total cycles */ - data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); - data = set_field(data, pwr_pmu_idle_ctrl_value_m() | - pwr_pmu_idle_ctrl_filter_m(), - pwr_pmu_idle_ctrl_value_always_f() | - pwr_pmu_idle_ctrl_filter_disabled_f()); - gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); - - /* - * We don't want to disturb counters #3 and #6, which are used by - * perfmon, so we add wiring also to counters #1 and #2 for - * exposing raw counter readings. - */ - gk20a_writel(g, pwr_pmu_idle_mask_r(1), - pwr_pmu_idle_mask_gr_enabled_f() | - pwr_pmu_idle_mask_ce_2_enabled_f()); - - data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); - data = set_field(data, pwr_pmu_idle_ctrl_value_m() | - pwr_pmu_idle_ctrl_filter_m(), - pwr_pmu_idle_ctrl_value_busy_f() | - pwr_pmu_idle_ctrl_filter_disabled_f()); - gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); - - data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); - data = set_field(data, pwr_pmu_idle_ctrl_value_m() | - pwr_pmu_idle_ctrl_filter_m(), - pwr_pmu_idle_ctrl_value_always_f() | - pwr_pmu_idle_ctrl_filter_disabled_f()); - gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); - - if (!pmu->sample_buffer) - pmu->sample_buffer = nvgpu_alloc(&pmu->dmem, - 2 * sizeof(u16)); - if (!pmu->sample_buffer) { - nvgpu_err(g, "failed to allocate perfmon sample buffer"); - return -ENOMEM; - } - - /* init PERFMON */ - memset(&cmd, 0, sizeof(struct pmu_cmd)); - cmd.hdr.unit_id = get_perfmon_id(pmu); - cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); - cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; - /* buffer to save counter values for pmu perfmon */ - pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, - (u16)pmu->sample_buffer); - /* number of sample periods below lower threshold - before pmu triggers perfmon decrease event - TBD: = 15 */ - pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); - /* index of base counter, aka. always ticking counter */ - pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); - /* microseconds interval between pmu polls perf counters */ - pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); - /* number of perfmon counters - counter #3 (GR and CE2) for gk20a */ - pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); - /* moving average window for sample periods - TBD: = 3000000 / sample_period_us = 17 */ - pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); - - memset(&payload, 0, sizeof(struct pmu_payload)); - payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); - payload.in.size = pv->get_perfmon_cntr_sz(pmu); - payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); - - gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT"); - gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, - NULL, NULL, &seq, ~0); - - return 0; -} - static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { @@ -965,100 +836,6 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) nvgpu_err(g, "ZBC save timeout"); } -int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) -{ - struct gk20a *g = gk20a_from_pmu(pmu); - struct pmu_v *pv = &g->ops.pmu_ver; - struct pmu_cmd cmd; - struct pmu_payload payload; - u32 seq; - - /* PERFMON Start */ - memset(&cmd, 0, sizeof(struct pmu_cmd)); - cmd.hdr.unit_id = get_perfmon_id(pmu); - cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); - pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, - PMU_PERFMON_CMD_ID_START); - pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, - PMU_DOMAIN_GROUP_PSTATE); - pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, - pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); - - pv->perfmon_start_set_flags(&cmd.cmd.perfmon, - PMU_PERFMON_FLAG_ENABLE_INCREASE | - PMU_PERFMON_FLAG_ENABLE_DECREASE | - PMU_PERFMON_FLAG_CLEAR_PREV); - - memset(&payload, 0, sizeof(struct pmu_payload)); - - /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ - pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */ - /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ - pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */ - pv->set_perfmon_cntr_valid(pmu, true); - - payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); - payload.in.size = pv->get_perfmon_cntr_sz(pmu); - payload.in.offset = - pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); - - gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START"); - gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, - NULL, NULL, &seq, ~0); - - return 0; -} - -int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu) -{ - struct gk20a *g = gk20a_from_pmu(pmu); - struct pmu_cmd cmd; - u32 seq; - - /* PERFMON Stop */ - memset(&cmd, 0, sizeof(struct pmu_cmd)); - cmd.hdr.unit_id = get_perfmon_id(pmu); - cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); - cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; - - gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP"); - gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, - NULL, NULL, &seq, ~0); - return 0; -} - -int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, - struct pmu_perfmon_msg *msg) -{ - gk20a_dbg_fn(""); - - switch (msg->msg_type) { - case PMU_PERFMON_MSG_ID_INCREASE_EVENT: - gk20a_dbg_pmu("perfmon increase event: " - "state_id %d, ground_id %d, pct %d", - msg->gen.state_id, msg->gen.group_id, msg->gen.data); - (pmu->perfmon_events_cnt)++; - break; - case PMU_PERFMON_MSG_ID_DECREASE_EVENT: - gk20a_dbg_pmu("perfmon decrease event: " - "state_id %d, ground_id %d, pct %d", - msg->gen.state_id, msg->gen.group_id, msg->gen.data); - (pmu->perfmon_events_cnt)++; - break; - case PMU_PERFMON_MSG_ID_INIT_EVENT: - pmu->perfmon_ready = 1; - gk20a_dbg_pmu("perfmon init event"); - break; - default: - break; - } - - /* restart sampling */ - if (pmu->perfmon_sampling_enabled) - return nvgpu_pmu_perfmon_start_sampling(pmu); - return 0; -} - int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, struct nv_pmu_therm_msg *msg) { @@ -1359,72 +1136,65 @@ void gk20a_pmu_isr(struct gk20a *g) nvgpu_mutex_release(&pmu->isr_mutex); } -int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) +void gk20a_pmu_init_perfmon_counter(struct gk20a *g) { - struct nvgpu_pmu *pmu = &g->pmu; - int err; - - gk20a_dbg_fn(""); - - if (enable) - err = nvgpu_pmu_perfmon_start_sampling(pmu); - else - err = nvgpu_pmu_perfmon_stop_sampling(pmu); + u32 data; - return err; -} + /* use counter #3 for GR && CE2 busy cycles */ + gk20a_writel(g, pwr_pmu_idle_mask_r(3), + pwr_pmu_idle_mask_gr_enabled_f() | + pwr_pmu_idle_mask_ce_2_enabled_f()); -int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) -{ - *load = g->pmu.load_shadow; - return 0; -} + /* disable idle filtering for counters 3 and 6 */ + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_busy_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); -int gk20a_pmu_load_update(struct gk20a *g) -{ - struct nvgpu_pmu *pmu = &g->pmu; - u16 _load = 0; + /* use counter #6 for total cycles */ + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_always_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); - if (!pmu->perfmon_ready) { - pmu->load_shadow = 0; - return 0; - } + /* + * We don't want to disturb counters #3 and #6, which are used by + * perfmon, so we add wiring also to counters #1 and #2 for + * exposing raw counter readings. + */ + gk20a_writel(g, pwr_pmu_idle_mask_r(1), + pwr_pmu_idle_mask_gr_enabled_f() | + pwr_pmu_idle_mask_ce_2_enabled_f()); - pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); - pmu->load_shadow = _load / 10; - pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_busy_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); - return 0; + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_always_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); } -void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, - u32 *total_cycles) +u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) { - if (!g->power_on || gk20a_busy(g)) { - *busy_cycles = 0; - *total_cycles = 0; - return; - } - - *busy_cycles = pwr_pmu_idle_count_value_v( - gk20a_readl(g, pwr_pmu_idle_count_r(1))); - rmb(); - *total_cycles = pwr_pmu_idle_count_value_v( - gk20a_readl(g, pwr_pmu_idle_count_r(2))); - gk20a_idle(g); + return pwr_pmu_idle_count_value_v( + gk20a_readl(g, pwr_pmu_idle_count_r(counter_id))); } -void gk20a_pmu_reset_load_counters(struct gk20a *g) +void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id) { - u32 reg_val = pwr_pmu_idle_count_reset_f(1); - - if (!g->power_on || gk20a_busy(g)) - return; - - gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val); - wmb(); - gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val); - gk20a_idle(g); + gk20a_writel(g, pwr_pmu_idle_count_r(counter_id), + pwr_pmu_idle_count_reset_f(1)); } void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 4a1609d6..a88bc404 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -38,7 +38,7 @@ u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id); void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); -int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable); +void gk20a_pmu_init_perfmon_counter(struct gk20a *g); void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id); @@ -51,11 +51,9 @@ int gk20a_pmu_queue_tail(struct nvgpu_pmu *pmu, struct pmu_queue *queue, u32 *tail, bool set); void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set); -int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); -int gk20a_pmu_load_update(struct gk20a *g); -void gk20a_pmu_reset_load_counters(struct gk20a *g); -void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, - u32 *total_cycles); +u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id); +void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id); + void gk20a_init_pmu_ops(struct gpu_ops *gops); void pmu_copy_to_dmem(struct nvgpu_pmu *pmu, @@ -81,7 +79,4 @@ int pmu_enable_hw(struct nvgpu_pmu *pmu, bool enable); bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos); -int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); -int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); - #endif /*__PMU_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index c4972f67..ede238a0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -392,8 +392,16 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu); /* perfmon */ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, - struct pmu_perfmon_msg *msg); + struct pmu_perfmon_msg *msg); +int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); +int nvgpu_pmu_load_update(struct gk20a *g); +void nvgpu_pmu_reset_load_counters(struct gk20a *g); +void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, + u32 *total_cycles); + int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, struct nv_pmu_therm_msg *msg); diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h index 01cd2743..310cb508 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h @@ -78,6 +78,7 @@ struct pmu_msg { #define PMU_UNIT_VOLT (0x0E) #define PMU_UNIT_END (0x23) +#define PMU_UNIT_INVALID (0xFF) #define PMU_UNIT_TEST_START (0xFE) #define PMU_UNIT_END_SIM (0xFF) diff --git a/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c index 3d5ea698..c4216ffc 100644 --- a/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c @@ -299,7 +299,7 @@ static void gk20a_tegra_prescale(struct device *dev) struct gk20a *g = get_gk20a(dev); u32 avg = 0; - gk20a_pmu_load_norm(g, &avg); + nvgpu_pmu_load_norm(g, &avg); tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); } diff --git a/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c index 235473d3..5980c592 100644 --- a/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c @@ -280,7 +280,7 @@ static void gp10b_tegra_prescale(struct device *dev) gk20a_dbg_fn(""); - gk20a_pmu_load_norm(g, &avg); + nvgpu_pmu_load_norm(g, &avg); gk20a_dbg_fn("done"); } -- cgit v1.2.2