summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMahantesh Kumbar <mkumbar@nvidia.com>2017-06-13 05:51:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-06-15 14:18:51 -0400
commit77e2cbab237637f71367df25384164b8c936a31a (patch)
tree64ccc10c4c6aa3eddbcac1d697d50b860247bf57 /drivers
parent7d16f7e52c0f8ce8604e992a617a3f98545fcf07 (diff)
gpu: nvgpu: reorganize PMU perfmon
-Moved perfmon code from pmu_gk20a.c to "drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c" file -Moved below related methods perfmon init, start/stop sampling, load counter read/write/reset, perfmon event handler - prepend with nvgpu_ for perfmon global methods by replacing gk20a_ JURA NVGPU-56 JURA NVGPU-98 Change-Id: Idbcdf63ebd76da170e609cc401b320a42110cd7b Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-on: http://git-master/r/1501418 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c271
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_scale.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c322
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h13
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/pmu.h10
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h1
-rw-r--r--drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c2
-rw-r--r--drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c2
10 files changed, 341 insertions, 295 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 72c2f8d5..ac58d512 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -62,6 +62,7 @@ nvgpu-y := \
62 common/pmu/pmu_ipc.o \ 62 common/pmu/pmu_ipc.o \
63 common/pmu/pmu_fw.o \ 63 common/pmu/pmu_fw.o \
64 common/pmu/pmu_pg.o \ 64 common/pmu/pmu_pg.o \
65 common/pmu/pmu_perfmon.o \
65 gk20a/gk20a.o \ 66 gk20a/gk20a.o \
66 gk20a/bus_gk20a.o \ 67 gk20a/bus_gk20a.o \
67 gk20a/pramin_gk20a.o \ 68 gk20a/pramin_gk20a.o \
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
new file mode 100644
index 00000000..e28e53a0
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -0,0 +1,271 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/pmu.h>
15#include <nvgpu/log.h>
16#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
17
18#include "gk20a/gk20a.h"
19
20#ifdef CONFIG_TEGRA_19x_GPU
21#include "nvgpu_gpuid_t19x.h"
22#endif
23
24static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
25{
26 struct gk20a *g = gk20a_from_pmu(pmu);
27 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
28 u8 unit_id;
29
30 switch (ver) {
31 case GK20A_GPUID_GK20A:
32 case GK20A_GPUID_GM20B:
33 unit_id = PMU_UNIT_PERFMON;
34 break;
35 case NVGPU_GPUID_GP10B:
36 case NVGPU_GPUID_GP104:
37 case NVGPU_GPUID_GP106:
38 unit_id = PMU_UNIT_PERFMON_T18X;
39 break;
40#if defined(CONFIG_TEGRA_19x_GPU)
41 case TEGRA_19x_GPUID:
42 unit_id = PMU_UNIT_PERFMON_T18X;
43 break;
44#endif
45 default:
46 unit_id = PMU_UNIT_INVALID;
47 nvgpu_err(g, "no support for %x", ver);
48 WARN_ON(1);
49 }
50
51 return unit_id;
52}
53
54int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
55{
56 struct gk20a *g = gk20a_from_pmu(pmu);
57 struct pmu_v *pv = &g->ops.pmu_ver;
58 struct pmu_cmd cmd;
59 struct pmu_payload payload;
60 u32 seq;
61
62 nvgpu_log_fn(g, " ");
63
64 pmu->perfmon_ready = 0;
65
66 gk20a_pmu_init_perfmon_counter(g);
67
68 if (!pmu->sample_buffer)
69 pmu->sample_buffer = nvgpu_alloc(&pmu->dmem,
70 2 * sizeof(u16));
71 if (!pmu->sample_buffer) {
72 nvgpu_err(g, "failed to allocate perfmon sample buffer");
73 return -ENOMEM;
74 }
75
76 /* init PERFMON */
77 memset(&cmd, 0, sizeof(struct pmu_cmd));
78
79 cmd.hdr.unit_id = get_perfmon_id(pmu);
80 if (cmd.hdr.unit_id == PMU_UNIT_INVALID) {
81 nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped");
82 return -EINVAL;
83 }
84
85 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
86 cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
87 /* buffer to save counter values for pmu perfmon */
88 pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
89 (u16)pmu->sample_buffer);
90 /* number of sample periods below lower threshold
91 * before pmu triggers perfmon decrease event
92 * TBD: = 15
93 */
94 pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
95 /* index of base counter, aka. always ticking counter */
96 pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
97 /* microseconds interval between pmu polls perf counters */
98 pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
99 /* number of perfmon counters
100 * counter #3 (GR and CE2) for gk20a
101 */
102 pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
103 /* moving average window for sample periods
104 * TBD: = 3000000 / sample_period_us = 17
105 */
106 pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
107
108 memset(&payload, 0, sizeof(struct pmu_payload));
109 payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
110 payload.in.size = pv->get_perfmon_cntr_sz(pmu);
111 payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
112
113 nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_INIT");
114 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
115 NULL, NULL, &seq, ~0);
116
117 return 0;
118}
119
120int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
121{
122 struct gk20a *g = gk20a_from_pmu(pmu);
123 struct pmu_v *pv = &g->ops.pmu_ver;
124 struct pmu_cmd cmd;
125 struct pmu_payload payload;
126 u32 seq;
127
128 /* PERFMON Start */
129 memset(&cmd, 0, sizeof(struct pmu_cmd));
130 cmd.hdr.unit_id = get_perfmon_id(pmu);
131 if (cmd.hdr.unit_id == PMU_UNIT_INVALID) {
132 nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped");
133 return -EINVAL;
134 }
135 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
136 pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
137 PMU_PERFMON_CMD_ID_START);
138 pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
139 PMU_DOMAIN_GROUP_PSTATE);
140 pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
141 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
142
143 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
144 PMU_PERFMON_FLAG_ENABLE_INCREASE |
145 PMU_PERFMON_FLAG_ENABLE_DECREASE |
146 PMU_PERFMON_FLAG_CLEAR_PREV);
147
148 memset(&payload, 0, sizeof(struct pmu_payload));
149
150 /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
151 pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */
152 /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
153 pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */
154 pv->set_perfmon_cntr_valid(pmu, true);
155
156 payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
157 payload.in.size = pv->get_perfmon_cntr_sz(pmu);
158 payload.in.offset =
159 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
160
161 nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_START");
162 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
163 NULL, NULL, &seq, ~0);
164
165 return 0;
166}
167
168int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
169{
170 struct gk20a *g = gk20a_from_pmu(pmu);
171 struct pmu_cmd cmd;
172 u32 seq;
173
174 /* PERFMON Stop */
175 memset(&cmd, 0, sizeof(struct pmu_cmd));
176 cmd.hdr.unit_id = get_perfmon_id(pmu);
177 if (cmd.hdr.unit_id == PMU_UNIT_INVALID) {
178 nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped");
179 return -EINVAL;
180 }
181 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
182 cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
183
184 nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_STOP");
185 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
186 NULL, NULL, &seq, ~0);
187 return 0;
188}
189
190int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load)
191{
192 *load = g->pmu.load_shadow;
193 return 0;
194}
195
196int nvgpu_pmu_load_update(struct gk20a *g)
197{
198 struct nvgpu_pmu *pmu = &g->pmu;
199 u16 load = 0;
200
201 if (!pmu->perfmon_ready) {
202 pmu->load_shadow = 0;
203 return 0;
204 }
205
206 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&load, 2, 0);
207 pmu->load_shadow = load / 10;
208 pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
209
210 return 0;
211}
212
213void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
214 u32 *total_cycles)
215{
216 if (!g->power_on || gk20a_busy(g)) {
217 *busy_cycles = 0;
218 *total_cycles = 0;
219 return;
220 }
221
222 *busy_cycles = gk20a_pmu_read_idle_counter(g, 1);
223 *total_cycles = gk20a_pmu_read_idle_counter(g, 2);
224
225 gk20a_idle(g);
226}
227
228void nvgpu_pmu_reset_load_counters(struct gk20a *g)
229{
230 if (!g->power_on || gk20a_busy(g))
231 return;
232
233 gk20a_pmu_reset_idle_counter(g, 2);
234 gk20a_pmu_reset_idle_counter(g, 1);
235
236 gk20a_idle(g);
237}
238
239int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
240 struct pmu_perfmon_msg *msg)
241{
242 struct gk20a *g = gk20a_from_pmu(pmu);
243
244 nvgpu_log_fn(g, " ");
245
246 switch (msg->msg_type) {
247 case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
248 nvgpu_pmu_dbg(g, "perfmon increase event: ");
249 nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
250 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
251 (pmu->perfmon_events_cnt)++;
252 break;
253 case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
254 nvgpu_pmu_dbg(g, "perfmon decrease event: ");
255 nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
256 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
257 (pmu->perfmon_events_cnt)++;
258 break;
259 case PMU_PERFMON_MSG_ID_INIT_EVENT:
260 pmu->perfmon_ready = 1;
261 nvgpu_pmu_dbg(g, "perfmon init event");
262 break;
263 default:
264 break;
265 }
266
267 /* restart sampling */
268 if (pmu->perfmon_sampling_enabled)
269 return nvgpu_pmu_perfmon_start_sampling(pmu);
270 return 0;
271}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
index c23cdcba..160776bc 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
@@ -93,7 +93,7 @@ int gk20a_scale_qos_notify(struct notifier_block *nb,
93 93
94 /* Update gpu load because we may scale the emc target 94 /* Update gpu load because we may scale the emc target
95 * if the gpu load changed. */ 95 * if the gpu load changed. */
96 gk20a_pmu_load_update(g); 96 nvgpu_pmu_load_update(g);
97 platform->postscale(profile->dev, freq); 97 platform->postscale(profile->dev, freq);
98 98
99 return NOTIFY_OK; 99 return NOTIFY_OK;
@@ -223,7 +223,7 @@ static void update_load_estimate_gpmu(struct device *dev)
223 223
224 profile->dev_stat.total_time = dt; 224 profile->dev_stat.total_time = dt;
225 profile->last_event_time = t; 225 profile->last_event_time = t;
226 gk20a_pmu_load_norm(g, &busy_time); 226 nvgpu_pmu_load_norm(g, &busy_time);
227 profile->dev_stat.busy_time = (busy_time * dt) / 1000; 227 profile->dev_stat.busy_time = (busy_time * dt) / 1000;
228} 228}
229 229
@@ -310,7 +310,7 @@ static int gk20a_scale_get_dev_status(struct device *dev,
310 struct gk20a_platform *platform = dev_get_drvdata(dev); 310 struct gk20a_platform *platform = dev_get_drvdata(dev);
311 311
312 /* update the software shadow */ 312 /* update the software shadow */
313 gk20a_pmu_load_update(g); 313 nvgpu_pmu_load_update(g);
314 314
315 /* inform edp about new constraint */ 315 /* inform edp about new constraint */
316 if (platform->prescale) 316 if (platform->prescale)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index 8c1dbd37..1933eed5 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -386,7 +386,7 @@ static ssize_t counters_show(struct device *dev,
386 u32 busy_cycles, total_cycles; 386 u32 busy_cycles, total_cycles;
387 ssize_t res; 387 ssize_t res;
388 388
389 gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles); 389 nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
390 390
391 res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); 391 res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
392 392
@@ -400,7 +400,7 @@ static ssize_t counters_show_reset(struct device *dev,
400 ssize_t res = counters_show(dev, attr, buf); 400 ssize_t res = counters_show(dev, attr, buf);
401 struct gk20a *g = get_gk20a(dev); 401 struct gk20a *g = get_gk20a(dev);
402 402
403 gk20a_pmu_reset_load_counters(g); 403 nvgpu_pmu_reset_load_counters(g);
404 404
405 return res; 405 return res;
406} 406}
@@ -422,8 +422,8 @@ static ssize_t gk20a_load_show(struct device *dev,
422 if (err) 422 if (err)
423 return err; 423 return err;
424 424
425 gk20a_pmu_load_update(g); 425 nvgpu_pmu_load_update(g);
426 gk20a_pmu_load_norm(g, &busy_time); 426 nvgpu_pmu_load_norm(g, &busy_time);
427 gk20a_idle(g); 427 gk20a_idle(g);
428 } 428 }
429 429
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 32303c6e..2f8e456f 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -802,135 +802,6 @@ void gk20a_init_pmu_ops(struct gpu_ops *gops)
802 gops->pmu.reset = gk20a_pmu_reset; 802 gops->pmu.reset = gk20a_pmu_reset;
803} 803}
804 804
805static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
806{
807 struct gk20a *g = gk20a_from_pmu(pmu);
808 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
809 u8 unit_id;
810
811 switch (ver) {
812 case GK20A_GPUID_GK20A:
813 case GK20A_GPUID_GM20B:
814 unit_id = PMU_UNIT_PERFMON;
815 break;
816 case NVGPU_GPUID_GP10B:
817 case NVGPU_GPUID_GP104:
818 case NVGPU_GPUID_GP106:
819 unit_id = PMU_UNIT_PERFMON_T18X;
820 break;
821#if defined(CONFIG_TEGRA_19x_GPU)
822 case TEGRA_19x_GPUID:
823 unit_id = PMU_UNIT_PERFMON_T18X;
824 break;
825#endif
826 default:
827 nvgpu_err(g, "no support for %x", ver);
828 BUG();
829 }
830
831 return unit_id;
832}
833
834int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
835{
836 struct gk20a *g = gk20a_from_pmu(pmu);
837 struct pmu_v *pv = &g->ops.pmu_ver;
838 struct pmu_cmd cmd;
839 struct pmu_payload payload;
840 u32 seq;
841 u32 data;
842
843 gk20a_dbg_fn("");
844
845 pmu->perfmon_ready = 0;
846
847 /* use counter #3 for GR && CE2 busy cycles */
848 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
849 pwr_pmu_idle_mask_gr_enabled_f() |
850 pwr_pmu_idle_mask_ce_2_enabled_f());
851
852 /* disable idle filtering for counters 3 and 6 */
853 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
854 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
855 pwr_pmu_idle_ctrl_filter_m(),
856 pwr_pmu_idle_ctrl_value_busy_f() |
857 pwr_pmu_idle_ctrl_filter_disabled_f());
858 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
859
860 /* use counter #6 for total cycles */
861 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
862 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
863 pwr_pmu_idle_ctrl_filter_m(),
864 pwr_pmu_idle_ctrl_value_always_f() |
865 pwr_pmu_idle_ctrl_filter_disabled_f());
866 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
867
868 /*
869 * We don't want to disturb counters #3 and #6, which are used by
870 * perfmon, so we add wiring also to counters #1 and #2 for
871 * exposing raw counter readings.
872 */
873 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
874 pwr_pmu_idle_mask_gr_enabled_f() |
875 pwr_pmu_idle_mask_ce_2_enabled_f());
876
877 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
878 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
879 pwr_pmu_idle_ctrl_filter_m(),
880 pwr_pmu_idle_ctrl_value_busy_f() |
881 pwr_pmu_idle_ctrl_filter_disabled_f());
882 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
883
884 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
885 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
886 pwr_pmu_idle_ctrl_filter_m(),
887 pwr_pmu_idle_ctrl_value_always_f() |
888 pwr_pmu_idle_ctrl_filter_disabled_f());
889 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
890
891 if (!pmu->sample_buffer)
892 pmu->sample_buffer = nvgpu_alloc(&pmu->dmem,
893 2 * sizeof(u16));
894 if (!pmu->sample_buffer) {
895 nvgpu_err(g, "failed to allocate perfmon sample buffer");
896 return -ENOMEM;
897 }
898
899 /* init PERFMON */
900 memset(&cmd, 0, sizeof(struct pmu_cmd));
901 cmd.hdr.unit_id = get_perfmon_id(pmu);
902 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
903 cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
904 /* buffer to save counter values for pmu perfmon */
905 pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
906 (u16)pmu->sample_buffer);
907 /* number of sample periods below lower threshold
908 before pmu triggers perfmon decrease event
909 TBD: = 15 */
910 pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
911 /* index of base counter, aka. always ticking counter */
912 pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
913 /* microseconds interval between pmu polls perf counters */
914 pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
915 /* number of perfmon counters
916 counter #3 (GR and CE2) for gk20a */
917 pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
918 /* moving average window for sample periods
919 TBD: = 3000000 / sample_period_us = 17 */
920 pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
921
922 memset(&payload, 0, sizeof(struct pmu_payload));
923 payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
924 payload.in.size = pv->get_perfmon_cntr_sz(pmu);
925 payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
926
927 gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
928 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
929 NULL, NULL, &seq, ~0);
930
931 return 0;
932}
933
934static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, 805static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
935 void *param, u32 handle, u32 status) 806 void *param, u32 handle, u32 status)
936{ 807{
@@ -965,100 +836,6 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
965 nvgpu_err(g, "ZBC save timeout"); 836 nvgpu_err(g, "ZBC save timeout");
966} 837}
967 838
968int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
969{
970 struct gk20a *g = gk20a_from_pmu(pmu);
971 struct pmu_v *pv = &g->ops.pmu_ver;
972 struct pmu_cmd cmd;
973 struct pmu_payload payload;
974 u32 seq;
975
976 /* PERFMON Start */
977 memset(&cmd, 0, sizeof(struct pmu_cmd));
978 cmd.hdr.unit_id = get_perfmon_id(pmu);
979 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
980 pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
981 PMU_PERFMON_CMD_ID_START);
982 pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
983 PMU_DOMAIN_GROUP_PSTATE);
984 pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
985 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
986
987 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
988 PMU_PERFMON_FLAG_ENABLE_INCREASE |
989 PMU_PERFMON_FLAG_ENABLE_DECREASE |
990 PMU_PERFMON_FLAG_CLEAR_PREV);
991
992 memset(&payload, 0, sizeof(struct pmu_payload));
993
994 /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
995 pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */
996 /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
997 pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */
998 pv->set_perfmon_cntr_valid(pmu, true);
999
1000 payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
1001 payload.in.size = pv->get_perfmon_cntr_sz(pmu);
1002 payload.in.offset =
1003 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
1004
1005 gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
1006 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
1007 NULL, NULL, &seq, ~0);
1008
1009 return 0;
1010}
1011
1012int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
1013{
1014 struct gk20a *g = gk20a_from_pmu(pmu);
1015 struct pmu_cmd cmd;
1016 u32 seq;
1017
1018 /* PERFMON Stop */
1019 memset(&cmd, 0, sizeof(struct pmu_cmd));
1020 cmd.hdr.unit_id = get_perfmon_id(pmu);
1021 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
1022 cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
1023
1024 gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
1025 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1026 NULL, NULL, &seq, ~0);
1027 return 0;
1028}
1029
1030int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
1031 struct pmu_perfmon_msg *msg)
1032{
1033 gk20a_dbg_fn("");
1034
1035 switch (msg->msg_type) {
1036 case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
1037 gk20a_dbg_pmu("perfmon increase event: "
1038 "state_id %d, ground_id %d, pct %d",
1039 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
1040 (pmu->perfmon_events_cnt)++;
1041 break;
1042 case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
1043 gk20a_dbg_pmu("perfmon decrease event: "
1044 "state_id %d, ground_id %d, pct %d",
1045 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
1046 (pmu->perfmon_events_cnt)++;
1047 break;
1048 case PMU_PERFMON_MSG_ID_INIT_EVENT:
1049 pmu->perfmon_ready = 1;
1050 gk20a_dbg_pmu("perfmon init event");
1051 break;
1052 default:
1053 break;
1054 }
1055
1056 /* restart sampling */
1057 if (pmu->perfmon_sampling_enabled)
1058 return nvgpu_pmu_perfmon_start_sampling(pmu);
1059 return 0;
1060}
1061
1062int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, 839int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
1063 struct nv_pmu_therm_msg *msg) 840 struct nv_pmu_therm_msg *msg)
1064{ 841{
@@ -1359,72 +1136,65 @@ void gk20a_pmu_isr(struct gk20a *g)
1359 nvgpu_mutex_release(&pmu->isr_mutex); 1136 nvgpu_mutex_release(&pmu->isr_mutex);
1360} 1137}
1361 1138
1362int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) 1139void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
1363{ 1140{
1364 struct nvgpu_pmu *pmu = &g->pmu; 1141 u32 data;
1365 int err;
1366
1367 gk20a_dbg_fn("");
1368
1369 if (enable)
1370 err = nvgpu_pmu_perfmon_start_sampling(pmu);
1371 else
1372 err = nvgpu_pmu_perfmon_stop_sampling(pmu);
1373 1142
1374 return err; 1143 /* use counter #3 for GR && CE2 busy cycles */
1375} 1144 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
1145 pwr_pmu_idle_mask_gr_enabled_f() |
1146 pwr_pmu_idle_mask_ce_2_enabled_f());
1376 1147
1377int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) 1148 /* disable idle filtering for counters 3 and 6 */
1378{ 1149 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
1379 *load = g->pmu.load_shadow; 1150 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1380 return 0; 1151 pwr_pmu_idle_ctrl_filter_m(),
1381} 1152 pwr_pmu_idle_ctrl_value_busy_f() |
1153 pwr_pmu_idle_ctrl_filter_disabled_f());
1154 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
1382 1155
1383int gk20a_pmu_load_update(struct gk20a *g) 1156 /* use counter #6 for total cycles */
1384{ 1157 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
1385 struct nvgpu_pmu *pmu = &g->pmu; 1158 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1386 u16 _load = 0; 1159 pwr_pmu_idle_ctrl_filter_m(),
1160 pwr_pmu_idle_ctrl_value_always_f() |
1161 pwr_pmu_idle_ctrl_filter_disabled_f());
1162 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
1387 1163
1388 if (!pmu->perfmon_ready) { 1164 /*
1389 pmu->load_shadow = 0; 1165 * We don't want to disturb counters #3 and #6, which are used by
1390 return 0; 1166 * perfmon, so we add wiring also to counters #1 and #2 for
1391 } 1167 * exposing raw counter readings.
1168 */
1169 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
1170 pwr_pmu_idle_mask_gr_enabled_f() |
1171 pwr_pmu_idle_mask_ce_2_enabled_f());
1392 1172
1393 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); 1173 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
1394 pmu->load_shadow = _load / 10; 1174 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1395 pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); 1175 pwr_pmu_idle_ctrl_filter_m(),
1176 pwr_pmu_idle_ctrl_value_busy_f() |
1177 pwr_pmu_idle_ctrl_filter_disabled_f());
1178 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
1396 1179
1397 return 0; 1180 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
1181 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1182 pwr_pmu_idle_ctrl_filter_m(),
1183 pwr_pmu_idle_ctrl_value_always_f() |
1184 pwr_pmu_idle_ctrl_filter_disabled_f());
1185 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
1398} 1186}
1399 1187
1400void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 1188u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
1401 u32 *total_cycles)
1402{ 1189{
1403 if (!g->power_on || gk20a_busy(g)) { 1190 return pwr_pmu_idle_count_value_v(
1404 *busy_cycles = 0; 1191 gk20a_readl(g, pwr_pmu_idle_count_r(counter_id)));
1405 *total_cycles = 0;
1406 return;
1407 }
1408
1409 *busy_cycles = pwr_pmu_idle_count_value_v(
1410 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
1411 rmb();
1412 *total_cycles = pwr_pmu_idle_count_value_v(
1413 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
1414 gk20a_idle(g);
1415} 1192}
1416 1193
1417void gk20a_pmu_reset_load_counters(struct gk20a *g) 1194void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
1418{ 1195{
1419 u32 reg_val = pwr_pmu_idle_count_reset_f(1); 1196 gk20a_writel(g, pwr_pmu_idle_count_r(counter_id),
1420 1197 pwr_pmu_idle_count_reset_f(1));
1421 if (!g->power_on || gk20a_busy(g))
1422 return;
1423
1424 gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
1425 wmb();
1426 gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
1427 gk20a_idle(g);
1428} 1198}
1429 1199
1430void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, 1200void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 4a1609d6..a88bc404 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -38,7 +38,7 @@ u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id);
38 38
39void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); 39void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
40 40
41int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable); 41void gk20a_pmu_init_perfmon_counter(struct gk20a *g);
42 42
43void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id); 43void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id);
44 44
@@ -51,11 +51,9 @@ int gk20a_pmu_queue_tail(struct nvgpu_pmu *pmu, struct pmu_queue *queue,
51 u32 *tail, bool set); 51 u32 *tail, bool set);
52void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set); 52void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set);
53 53
54int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); 54u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
55int gk20a_pmu_load_update(struct gk20a *g); 55void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
56void gk20a_pmu_reset_load_counters(struct gk20a *g); 56
57void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
58 u32 *total_cycles);
59void gk20a_init_pmu_ops(struct gpu_ops *gops); 57void gk20a_init_pmu_ops(struct gpu_ops *gops);
60 58
61void pmu_copy_to_dmem(struct nvgpu_pmu *pmu, 59void pmu_copy_to_dmem(struct nvgpu_pmu *pmu,
@@ -81,7 +79,4 @@ int pmu_enable_hw(struct nvgpu_pmu *pmu, bool enable);
81 79
82bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos); 80bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos);
83 81
84int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
85int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
86
87#endif /*__PMU_GK20A_H__*/ 82#endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index c4972f67..ede238a0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -392,8 +392,16 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu);
392 392
393/* perfmon */ 393/* perfmon */
394int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); 394int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
395int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
396int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
395int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, 397int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
396 struct pmu_perfmon_msg *msg); 398 struct pmu_perfmon_msg *msg);
399int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
400int nvgpu_pmu_load_update(struct gk20a *g);
401void nvgpu_pmu_reset_load_counters(struct gk20a *g);
402void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
403 u32 *total_cycles);
404
397int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, 405int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
398 struct nv_pmu_therm_msg *msg); 406 struct nv_pmu_therm_msg *msg);
399 407
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h
index 01cd2743..310cb508 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/nvgpu_gpmu_cmdif.h
@@ -78,6 +78,7 @@ struct pmu_msg {
78#define PMU_UNIT_VOLT (0x0E) 78#define PMU_UNIT_VOLT (0x0E)
79 79
80#define PMU_UNIT_END (0x23) 80#define PMU_UNIT_END (0x23)
81#define PMU_UNIT_INVALID (0xFF)
81 82
82#define PMU_UNIT_TEST_START (0xFE) 83#define PMU_UNIT_TEST_START (0xFE)
83#define PMU_UNIT_END_SIM (0xFF) 84#define PMU_UNIT_END_SIM (0xFF)
diff --git a/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c
index 3d5ea698..c4216ffc 100644
--- a/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c
@@ -299,7 +299,7 @@ static void gk20a_tegra_prescale(struct device *dev)
299 struct gk20a *g = get_gk20a(dev); 299 struct gk20a *g = get_gk20a(dev);
300 u32 avg = 0; 300 u32 avg = 0;
301 301
302 gk20a_pmu_load_norm(g, &avg); 302 nvgpu_pmu_load_norm(g, &avg);
303 tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); 303 tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
304} 304}
305 305
diff --git a/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c
index 235473d3..5980c592 100644
--- a/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/tegra/linux/platform_gp10b_tegra.c
@@ -280,7 +280,7 @@ static void gp10b_tegra_prescale(struct device *dev)
280 280
281 gk20a_dbg_fn(""); 281 gk20a_dbg_fn("");
282 282
283 gk20a_pmu_load_norm(g, &avg); 283 nvgpu_pmu_load_norm(g, &avg);
284 284
285 gk20a_dbg_fn("done"); 285 gk20a_dbg_fn("done");
286} 286}