diff options
author | Mahantesh Kumbar <mkumbar@nvidia.com> | 2017-06-13 05:51:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-15 14:18:51 -0400 |
commit | 77e2cbab237637f71367df25384164b8c936a31a (patch) | |
tree | 64ccc10c4c6aa3eddbcac1d697d50b860247bf57 /drivers/gpu/nvgpu/gk20a | |
parent | 7d16f7e52c0f8ce8604e992a617a3f98545fcf07 (diff) |
gpu: nvgpu: reorganize PMU perfmon
-Moved perfmon code from pmu_gk20a.c to
"drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c" file
-Moved below related methods
perfmon init,
start/stop sampling,
load counter read/write/reset,
perfmon event handler
- prepend with nvgpu_ for perfmon global methods
by replacing gk20a_
JURA NVGPU-56
JURA NVGPU-98
Change-Id: Idbcdf63ebd76da170e609cc401b320a42110cd7b
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1501418
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_scale.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 322 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 13 |
4 files changed, 57 insertions, 292 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c index c23cdcba..160776bc 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c | |||
@@ -93,7 +93,7 @@ int gk20a_scale_qos_notify(struct notifier_block *nb, | |||
93 | 93 | ||
94 | /* Update gpu load because we may scale the emc target | 94 | /* Update gpu load because we may scale the emc target |
95 | * if the gpu load changed. */ | 95 | * if the gpu load changed. */ |
96 | gk20a_pmu_load_update(g); | 96 | nvgpu_pmu_load_update(g); |
97 | platform->postscale(profile->dev, freq); | 97 | platform->postscale(profile->dev, freq); |
98 | 98 | ||
99 | return NOTIFY_OK; | 99 | return NOTIFY_OK; |
@@ -223,7 +223,7 @@ static void update_load_estimate_gpmu(struct device *dev) | |||
223 | 223 | ||
224 | profile->dev_stat.total_time = dt; | 224 | profile->dev_stat.total_time = dt; |
225 | profile->last_event_time = t; | 225 | profile->last_event_time = t; |
226 | gk20a_pmu_load_norm(g, &busy_time); | 226 | nvgpu_pmu_load_norm(g, &busy_time); |
227 | profile->dev_stat.busy_time = (busy_time * dt) / 1000; | 227 | profile->dev_stat.busy_time = (busy_time * dt) / 1000; |
228 | } | 228 | } |
229 | 229 | ||
@@ -310,7 +310,7 @@ static int gk20a_scale_get_dev_status(struct device *dev, | |||
310 | struct gk20a_platform *platform = dev_get_drvdata(dev); | 310 | struct gk20a_platform *platform = dev_get_drvdata(dev); |
311 | 311 | ||
312 | /* update the software shadow */ | 312 | /* update the software shadow */ |
313 | gk20a_pmu_load_update(g); | 313 | nvgpu_pmu_load_update(g); |
314 | 314 | ||
315 | /* inform edp about new constraint */ | 315 | /* inform edp about new constraint */ |
316 | if (platform->prescale) | 316 | if (platform->prescale) |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index 8c1dbd37..1933eed5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | |||
@@ -386,7 +386,7 @@ static ssize_t counters_show(struct device *dev, | |||
386 | u32 busy_cycles, total_cycles; | 386 | u32 busy_cycles, total_cycles; |
387 | ssize_t res; | 387 | ssize_t res; |
388 | 388 | ||
389 | gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles); | 389 | nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); |
390 | 390 | ||
391 | res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); | 391 | res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); |
392 | 392 | ||
@@ -400,7 +400,7 @@ static ssize_t counters_show_reset(struct device *dev, | |||
400 | ssize_t res = counters_show(dev, attr, buf); | 400 | ssize_t res = counters_show(dev, attr, buf); |
401 | struct gk20a *g = get_gk20a(dev); | 401 | struct gk20a *g = get_gk20a(dev); |
402 | 402 | ||
403 | gk20a_pmu_reset_load_counters(g); | 403 | nvgpu_pmu_reset_load_counters(g); |
404 | 404 | ||
405 | return res; | 405 | return res; |
406 | } | 406 | } |
@@ -422,8 +422,8 @@ static ssize_t gk20a_load_show(struct device *dev, | |||
422 | if (err) | 422 | if (err) |
423 | return err; | 423 | return err; |
424 | 424 | ||
425 | gk20a_pmu_load_update(g); | 425 | nvgpu_pmu_load_update(g); |
426 | gk20a_pmu_load_norm(g, &busy_time); | 426 | nvgpu_pmu_load_norm(g, &busy_time); |
427 | gk20a_idle(g); | 427 | gk20a_idle(g); |
428 | } | 428 | } |
429 | 429 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 32303c6e..2f8e456f 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -802,135 +802,6 @@ void gk20a_init_pmu_ops(struct gpu_ops *gops) | |||
802 | gops->pmu.reset = gk20a_pmu_reset; | 802 | gops->pmu.reset = gk20a_pmu_reset; |
803 | } | 803 | } |
804 | 804 | ||
805 | static u8 get_perfmon_id(struct nvgpu_pmu *pmu) | ||
806 | { | ||
807 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
808 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; | ||
809 | u8 unit_id; | ||
810 | |||
811 | switch (ver) { | ||
812 | case GK20A_GPUID_GK20A: | ||
813 | case GK20A_GPUID_GM20B: | ||
814 | unit_id = PMU_UNIT_PERFMON; | ||
815 | break; | ||
816 | case NVGPU_GPUID_GP10B: | ||
817 | case NVGPU_GPUID_GP104: | ||
818 | case NVGPU_GPUID_GP106: | ||
819 | unit_id = PMU_UNIT_PERFMON_T18X; | ||
820 | break; | ||
821 | #if defined(CONFIG_TEGRA_19x_GPU) | ||
822 | case TEGRA_19x_GPUID: | ||
823 | unit_id = PMU_UNIT_PERFMON_T18X; | ||
824 | break; | ||
825 | #endif | ||
826 | default: | ||
827 | nvgpu_err(g, "no support for %x", ver); | ||
828 | BUG(); | ||
829 | } | ||
830 | |||
831 | return unit_id; | ||
832 | } | ||
833 | |||
834 | int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) | ||
835 | { | ||
836 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
837 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
838 | struct pmu_cmd cmd; | ||
839 | struct pmu_payload payload; | ||
840 | u32 seq; | ||
841 | u32 data; | ||
842 | |||
843 | gk20a_dbg_fn(""); | ||
844 | |||
845 | pmu->perfmon_ready = 0; | ||
846 | |||
847 | /* use counter #3 for GR && CE2 busy cycles */ | ||
848 | gk20a_writel(g, pwr_pmu_idle_mask_r(3), | ||
849 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
850 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
851 | |||
852 | /* disable idle filtering for counters 3 and 6 */ | ||
853 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); | ||
854 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
855 | pwr_pmu_idle_ctrl_filter_m(), | ||
856 | pwr_pmu_idle_ctrl_value_busy_f() | | ||
857 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
858 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); | ||
859 | |||
860 | /* use counter #6 for total cycles */ | ||
861 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); | ||
862 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
863 | pwr_pmu_idle_ctrl_filter_m(), | ||
864 | pwr_pmu_idle_ctrl_value_always_f() | | ||
865 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
866 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); | ||
867 | |||
868 | /* | ||
869 | * We don't want to disturb counters #3 and #6, which are used by | ||
870 | * perfmon, so we add wiring also to counters #1 and #2 for | ||
871 | * exposing raw counter readings. | ||
872 | */ | ||
873 | gk20a_writel(g, pwr_pmu_idle_mask_r(1), | ||
874 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
875 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
876 | |||
877 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); | ||
878 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
879 | pwr_pmu_idle_ctrl_filter_m(), | ||
880 | pwr_pmu_idle_ctrl_value_busy_f() | | ||
881 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
882 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); | ||
883 | |||
884 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); | ||
885 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
886 | pwr_pmu_idle_ctrl_filter_m(), | ||
887 | pwr_pmu_idle_ctrl_value_always_f() | | ||
888 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
889 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); | ||
890 | |||
891 | if (!pmu->sample_buffer) | ||
892 | pmu->sample_buffer = nvgpu_alloc(&pmu->dmem, | ||
893 | 2 * sizeof(u16)); | ||
894 | if (!pmu->sample_buffer) { | ||
895 | nvgpu_err(g, "failed to allocate perfmon sample buffer"); | ||
896 | return -ENOMEM; | ||
897 | } | ||
898 | |||
899 | /* init PERFMON */ | ||
900 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
901 | cmd.hdr.unit_id = get_perfmon_id(pmu); | ||
902 | cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); | ||
903 | cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; | ||
904 | /* buffer to save counter values for pmu perfmon */ | ||
905 | pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, | ||
906 | (u16)pmu->sample_buffer); | ||
907 | /* number of sample periods below lower threshold | ||
908 | before pmu triggers perfmon decrease event | ||
909 | TBD: = 15 */ | ||
910 | pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); | ||
911 | /* index of base counter, aka. always ticking counter */ | ||
912 | pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); | ||
913 | /* microseconds interval between pmu polls perf counters */ | ||
914 | pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); | ||
915 | /* number of perfmon counters | ||
916 | counter #3 (GR and CE2) for gk20a */ | ||
917 | pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); | ||
918 | /* moving average window for sample periods | ||
919 | TBD: = 3000000 / sample_period_us = 17 */ | ||
920 | pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); | ||
921 | |||
922 | memset(&payload, 0, sizeof(struct pmu_payload)); | ||
923 | payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); | ||
924 | payload.in.size = pv->get_perfmon_cntr_sz(pmu); | ||
925 | payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); | ||
926 | |||
927 | gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT"); | ||
928 | gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, | ||
929 | NULL, NULL, &seq, ~0); | ||
930 | |||
931 | return 0; | ||
932 | } | ||
933 | |||
934 | static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, | 805 | static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, |
935 | void *param, u32 handle, u32 status) | 806 | void *param, u32 handle, u32 status) |
936 | { | 807 | { |
@@ -965,100 +836,6 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) | |||
965 | nvgpu_err(g, "ZBC save timeout"); | 836 | nvgpu_err(g, "ZBC save timeout"); |
966 | } | 837 | } |
967 | 838 | ||
968 | int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) | ||
969 | { | ||
970 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
971 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
972 | struct pmu_cmd cmd; | ||
973 | struct pmu_payload payload; | ||
974 | u32 seq; | ||
975 | |||
976 | /* PERFMON Start */ | ||
977 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
978 | cmd.hdr.unit_id = get_perfmon_id(pmu); | ||
979 | cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); | ||
980 | pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, | ||
981 | PMU_PERFMON_CMD_ID_START); | ||
982 | pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, | ||
983 | PMU_DOMAIN_GROUP_PSTATE); | ||
984 | pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, | ||
985 | pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); | ||
986 | |||
987 | pv->perfmon_start_set_flags(&cmd.cmd.perfmon, | ||
988 | PMU_PERFMON_FLAG_ENABLE_INCREASE | | ||
989 | PMU_PERFMON_FLAG_ENABLE_DECREASE | | ||
990 | PMU_PERFMON_FLAG_CLEAR_PREV); | ||
991 | |||
992 | memset(&payload, 0, sizeof(struct pmu_payload)); | ||
993 | |||
994 | /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ | ||
995 | pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */ | ||
996 | /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ | ||
997 | pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */ | ||
998 | pv->set_perfmon_cntr_valid(pmu, true); | ||
999 | |||
1000 | payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); | ||
1001 | payload.in.size = pv->get_perfmon_cntr_sz(pmu); | ||
1002 | payload.in.offset = | ||
1003 | pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); | ||
1004 | |||
1005 | gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START"); | ||
1006 | gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, | ||
1007 | NULL, NULL, &seq, ~0); | ||
1008 | |||
1009 | return 0; | ||
1010 | } | ||
1011 | |||
1012 | int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu) | ||
1013 | { | ||
1014 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
1015 | struct pmu_cmd cmd; | ||
1016 | u32 seq; | ||
1017 | |||
1018 | /* PERFMON Stop */ | ||
1019 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
1020 | cmd.hdr.unit_id = get_perfmon_id(pmu); | ||
1021 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); | ||
1022 | cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; | ||
1023 | |||
1024 | gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP"); | ||
1025 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, | ||
1026 | NULL, NULL, &seq, ~0); | ||
1027 | return 0; | ||
1028 | } | ||
1029 | |||
1030 | int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, | ||
1031 | struct pmu_perfmon_msg *msg) | ||
1032 | { | ||
1033 | gk20a_dbg_fn(""); | ||
1034 | |||
1035 | switch (msg->msg_type) { | ||
1036 | case PMU_PERFMON_MSG_ID_INCREASE_EVENT: | ||
1037 | gk20a_dbg_pmu("perfmon increase event: " | ||
1038 | "state_id %d, ground_id %d, pct %d", | ||
1039 | msg->gen.state_id, msg->gen.group_id, msg->gen.data); | ||
1040 | (pmu->perfmon_events_cnt)++; | ||
1041 | break; | ||
1042 | case PMU_PERFMON_MSG_ID_DECREASE_EVENT: | ||
1043 | gk20a_dbg_pmu("perfmon decrease event: " | ||
1044 | "state_id %d, ground_id %d, pct %d", | ||
1045 | msg->gen.state_id, msg->gen.group_id, msg->gen.data); | ||
1046 | (pmu->perfmon_events_cnt)++; | ||
1047 | break; | ||
1048 | case PMU_PERFMON_MSG_ID_INIT_EVENT: | ||
1049 | pmu->perfmon_ready = 1; | ||
1050 | gk20a_dbg_pmu("perfmon init event"); | ||
1051 | break; | ||
1052 | default: | ||
1053 | break; | ||
1054 | } | ||
1055 | |||
1056 | /* restart sampling */ | ||
1057 | if (pmu->perfmon_sampling_enabled) | ||
1058 | return nvgpu_pmu_perfmon_start_sampling(pmu); | ||
1059 | return 0; | ||
1060 | } | ||
1061 | |||
1062 | int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, | 839 | int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, |
1063 | struct nv_pmu_therm_msg *msg) | 840 | struct nv_pmu_therm_msg *msg) |
1064 | { | 841 | { |
@@ -1359,72 +1136,65 @@ void gk20a_pmu_isr(struct gk20a *g) | |||
1359 | nvgpu_mutex_release(&pmu->isr_mutex); | 1136 | nvgpu_mutex_release(&pmu->isr_mutex); |
1360 | } | 1137 | } |
1361 | 1138 | ||
1362 | int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) | 1139 | void gk20a_pmu_init_perfmon_counter(struct gk20a *g) |
1363 | { | 1140 | { |
1364 | struct nvgpu_pmu *pmu = &g->pmu; | 1141 | u32 data; |
1365 | int err; | ||
1366 | |||
1367 | gk20a_dbg_fn(""); | ||
1368 | |||
1369 | if (enable) | ||
1370 | err = nvgpu_pmu_perfmon_start_sampling(pmu); | ||
1371 | else | ||
1372 | err = nvgpu_pmu_perfmon_stop_sampling(pmu); | ||
1373 | 1142 | ||
1374 | return err; | 1143 | /* use counter #3 for GR && CE2 busy cycles */ |
1375 | } | 1144 | gk20a_writel(g, pwr_pmu_idle_mask_r(3), |
1145 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
1146 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
1376 | 1147 | ||
1377 | int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) | 1148 | /* disable idle filtering for counters 3 and 6 */ |
1378 | { | 1149 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); |
1379 | *load = g->pmu.load_shadow; | 1150 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | |
1380 | return 0; | 1151 | pwr_pmu_idle_ctrl_filter_m(), |
1381 | } | 1152 | pwr_pmu_idle_ctrl_value_busy_f() | |
1153 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
1154 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); | ||
1382 | 1155 | ||
1383 | int gk20a_pmu_load_update(struct gk20a *g) | 1156 | /* use counter #6 for total cycles */ |
1384 | { | 1157 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); |
1385 | struct nvgpu_pmu *pmu = &g->pmu; | 1158 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | |
1386 | u16 _load = 0; | 1159 | pwr_pmu_idle_ctrl_filter_m(), |
1160 | pwr_pmu_idle_ctrl_value_always_f() | | ||
1161 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
1162 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); | ||
1387 | 1163 | ||
1388 | if (!pmu->perfmon_ready) { | 1164 | /* |
1389 | pmu->load_shadow = 0; | 1165 | * We don't want to disturb counters #3 and #6, which are used by |
1390 | return 0; | 1166 | * perfmon, so we add wiring also to counters #1 and #2 for |
1391 | } | 1167 | * exposing raw counter readings. |
1168 | */ | ||
1169 | gk20a_writel(g, pwr_pmu_idle_mask_r(1), | ||
1170 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
1171 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
1392 | 1172 | ||
1393 | pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); | 1173 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); |
1394 | pmu->load_shadow = _load / 10; | 1174 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | |
1395 | pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); | 1175 | pwr_pmu_idle_ctrl_filter_m(), |
1176 | pwr_pmu_idle_ctrl_value_busy_f() | | ||
1177 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
1178 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); | ||
1396 | 1179 | ||
1397 | return 0; | 1180 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); |
1181 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
1182 | pwr_pmu_idle_ctrl_filter_m(), | ||
1183 | pwr_pmu_idle_ctrl_value_always_f() | | ||
1184 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
1185 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); | ||
1398 | } | 1186 | } |
1399 | 1187 | ||
1400 | void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, | 1188 | u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) |
1401 | u32 *total_cycles) | ||
1402 | { | 1189 | { |
1403 | if (!g->power_on || gk20a_busy(g)) { | 1190 | return pwr_pmu_idle_count_value_v( |
1404 | *busy_cycles = 0; | 1191 | gk20a_readl(g, pwr_pmu_idle_count_r(counter_id))); |
1405 | *total_cycles = 0; | ||
1406 | return; | ||
1407 | } | ||
1408 | |||
1409 | *busy_cycles = pwr_pmu_idle_count_value_v( | ||
1410 | gk20a_readl(g, pwr_pmu_idle_count_r(1))); | ||
1411 | rmb(); | ||
1412 | *total_cycles = pwr_pmu_idle_count_value_v( | ||
1413 | gk20a_readl(g, pwr_pmu_idle_count_r(2))); | ||
1414 | gk20a_idle(g); | ||
1415 | } | 1192 | } |
1416 | 1193 | ||
1417 | void gk20a_pmu_reset_load_counters(struct gk20a *g) | 1194 | void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id) |
1418 | { | 1195 | { |
1419 | u32 reg_val = pwr_pmu_idle_count_reset_f(1); | 1196 | gk20a_writel(g, pwr_pmu_idle_count_r(counter_id), |
1420 | 1197 | pwr_pmu_idle_count_reset_f(1)); | |
1421 | if (!g->power_on || gk20a_busy(g)) | ||
1422 | return; | ||
1423 | |||
1424 | gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val); | ||
1425 | wmb(); | ||
1426 | gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val); | ||
1427 | gk20a_idle(g); | ||
1428 | } | 1198 | } |
1429 | 1199 | ||
1430 | void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, | 1200 | void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 4a1609d6..a88bc404 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -38,7 +38,7 @@ u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id); | |||
38 | 38 | ||
39 | void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); | 39 | void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); |
40 | 40 | ||
41 | int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable); | 41 | void gk20a_pmu_init_perfmon_counter(struct gk20a *g); |
42 | 42 | ||
43 | void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id); | 43 | void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id); |
44 | 44 | ||
@@ -51,11 +51,9 @@ int gk20a_pmu_queue_tail(struct nvgpu_pmu *pmu, struct pmu_queue *queue, | |||
51 | u32 *tail, bool set); | 51 | u32 *tail, bool set); |
52 | void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set); | 52 | void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set); |
53 | 53 | ||
54 | int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); | 54 | u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id); |
55 | int gk20a_pmu_load_update(struct gk20a *g); | 55 | void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id); |
56 | void gk20a_pmu_reset_load_counters(struct gk20a *g); | 56 | |
57 | void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, | ||
58 | u32 *total_cycles); | ||
59 | void gk20a_init_pmu_ops(struct gpu_ops *gops); | 57 | void gk20a_init_pmu_ops(struct gpu_ops *gops); |
60 | 58 | ||
61 | void pmu_copy_to_dmem(struct nvgpu_pmu *pmu, | 59 | void pmu_copy_to_dmem(struct nvgpu_pmu *pmu, |
@@ -81,7 +79,4 @@ int pmu_enable_hw(struct nvgpu_pmu *pmu, bool enable); | |||
81 | 79 | ||
82 | bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos); | 80 | bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos); |
83 | 81 | ||
84 | int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); | ||
85 | int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); | ||
86 | |||
87 | #endif /*__PMU_GK20A_H__*/ | 82 | #endif /*__PMU_GK20A_H__*/ |