summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/pmu_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c322
1 files changed, 46 insertions, 276 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 32303c6e..2f8e456f 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -802,135 +802,6 @@ void gk20a_init_pmu_ops(struct gpu_ops *gops)
802 gops->pmu.reset = gk20a_pmu_reset; 802 gops->pmu.reset = gk20a_pmu_reset;
803} 803}
804 804
805static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
806{
807 struct gk20a *g = gk20a_from_pmu(pmu);
808 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
809 u8 unit_id;
810
811 switch (ver) {
812 case GK20A_GPUID_GK20A:
813 case GK20A_GPUID_GM20B:
814 unit_id = PMU_UNIT_PERFMON;
815 break;
816 case NVGPU_GPUID_GP10B:
817 case NVGPU_GPUID_GP104:
818 case NVGPU_GPUID_GP106:
819 unit_id = PMU_UNIT_PERFMON_T18X;
820 break;
821#if defined(CONFIG_TEGRA_19x_GPU)
822 case TEGRA_19x_GPUID:
823 unit_id = PMU_UNIT_PERFMON_T18X;
824 break;
825#endif
826 default:
827 nvgpu_err(g, "no support for %x", ver);
828 BUG();
829 }
830
831 return unit_id;
832}
833
834int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
835{
836 struct gk20a *g = gk20a_from_pmu(pmu);
837 struct pmu_v *pv = &g->ops.pmu_ver;
838 struct pmu_cmd cmd;
839 struct pmu_payload payload;
840 u32 seq;
841 u32 data;
842
843 gk20a_dbg_fn("");
844
845 pmu->perfmon_ready = 0;
846
847 /* use counter #3 for GR && CE2 busy cycles */
848 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
849 pwr_pmu_idle_mask_gr_enabled_f() |
850 pwr_pmu_idle_mask_ce_2_enabled_f());
851
852 /* disable idle filtering for counters 3 and 6 */
853 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
854 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
855 pwr_pmu_idle_ctrl_filter_m(),
856 pwr_pmu_idle_ctrl_value_busy_f() |
857 pwr_pmu_idle_ctrl_filter_disabled_f());
858 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
859
860 /* use counter #6 for total cycles */
861 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
862 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
863 pwr_pmu_idle_ctrl_filter_m(),
864 pwr_pmu_idle_ctrl_value_always_f() |
865 pwr_pmu_idle_ctrl_filter_disabled_f());
866 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
867
868 /*
869 * We don't want to disturb counters #3 and #6, which are used by
870 * perfmon, so we add wiring also to counters #1 and #2 for
871 * exposing raw counter readings.
872 */
873 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
874 pwr_pmu_idle_mask_gr_enabled_f() |
875 pwr_pmu_idle_mask_ce_2_enabled_f());
876
877 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
878 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
879 pwr_pmu_idle_ctrl_filter_m(),
880 pwr_pmu_idle_ctrl_value_busy_f() |
881 pwr_pmu_idle_ctrl_filter_disabled_f());
882 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
883
884 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
885 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
886 pwr_pmu_idle_ctrl_filter_m(),
887 pwr_pmu_idle_ctrl_value_always_f() |
888 pwr_pmu_idle_ctrl_filter_disabled_f());
889 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
890
891 if (!pmu->sample_buffer)
892 pmu->sample_buffer = nvgpu_alloc(&pmu->dmem,
893 2 * sizeof(u16));
894 if (!pmu->sample_buffer) {
895 nvgpu_err(g, "failed to allocate perfmon sample buffer");
896 return -ENOMEM;
897 }
898
899 /* init PERFMON */
900 memset(&cmd, 0, sizeof(struct pmu_cmd));
901 cmd.hdr.unit_id = get_perfmon_id(pmu);
902 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
903 cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
904 /* buffer to save counter values for pmu perfmon */
905 pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
906 (u16)pmu->sample_buffer);
907 /* number of sample periods below lower threshold
908 before pmu triggers perfmon decrease event
909 TBD: = 15 */
910 pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
911 /* index of base counter, aka. always ticking counter */
912 pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
913 /* microseconds interval between pmu polls perf counters */
914 pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
915 /* number of perfmon counters
916 counter #3 (GR and CE2) for gk20a */
917 pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
918 /* moving average window for sample periods
919 TBD: = 3000000 / sample_period_us = 17 */
920 pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
921
922 memset(&payload, 0, sizeof(struct pmu_payload));
923 payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
924 payload.in.size = pv->get_perfmon_cntr_sz(pmu);
925 payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
926
927 gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
928 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
929 NULL, NULL, &seq, ~0);
930
931 return 0;
932}
933
934static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, 805static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
935 void *param, u32 handle, u32 status) 806 void *param, u32 handle, u32 status)
936{ 807{
@@ -965,100 +836,6 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
965 nvgpu_err(g, "ZBC save timeout"); 836 nvgpu_err(g, "ZBC save timeout");
966} 837}
967 838
968int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
969{
970 struct gk20a *g = gk20a_from_pmu(pmu);
971 struct pmu_v *pv = &g->ops.pmu_ver;
972 struct pmu_cmd cmd;
973 struct pmu_payload payload;
974 u32 seq;
975
976 /* PERFMON Start */
977 memset(&cmd, 0, sizeof(struct pmu_cmd));
978 cmd.hdr.unit_id = get_perfmon_id(pmu);
979 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
980 pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
981 PMU_PERFMON_CMD_ID_START);
982 pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
983 PMU_DOMAIN_GROUP_PSTATE);
984 pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
985 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
986
987 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
988 PMU_PERFMON_FLAG_ENABLE_INCREASE |
989 PMU_PERFMON_FLAG_ENABLE_DECREASE |
990 PMU_PERFMON_FLAG_CLEAR_PREV);
991
992 memset(&payload, 0, sizeof(struct pmu_payload));
993
994 /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
995 pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */
996 /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
997 pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */
998 pv->set_perfmon_cntr_valid(pmu, true);
999
1000 payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
1001 payload.in.size = pv->get_perfmon_cntr_sz(pmu);
1002 payload.in.offset =
1003 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
1004
1005 gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
1006 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
1007 NULL, NULL, &seq, ~0);
1008
1009 return 0;
1010}
1011
1012int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
1013{
1014 struct gk20a *g = gk20a_from_pmu(pmu);
1015 struct pmu_cmd cmd;
1016 u32 seq;
1017
1018 /* PERFMON Stop */
1019 memset(&cmd, 0, sizeof(struct pmu_cmd));
1020 cmd.hdr.unit_id = get_perfmon_id(pmu);
1021 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
1022 cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
1023
1024 gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
1025 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1026 NULL, NULL, &seq, ~0);
1027 return 0;
1028}
1029
1030int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
1031 struct pmu_perfmon_msg *msg)
1032{
1033 gk20a_dbg_fn("");
1034
1035 switch (msg->msg_type) {
1036 case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
1037 gk20a_dbg_pmu("perfmon increase event: "
1038 "state_id %d, ground_id %d, pct %d",
1039 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
1040 (pmu->perfmon_events_cnt)++;
1041 break;
1042 case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
1043 gk20a_dbg_pmu("perfmon decrease event: "
1044 "state_id %d, ground_id %d, pct %d",
1045 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
1046 (pmu->perfmon_events_cnt)++;
1047 break;
1048 case PMU_PERFMON_MSG_ID_INIT_EVENT:
1049 pmu->perfmon_ready = 1;
1050 gk20a_dbg_pmu("perfmon init event");
1051 break;
1052 default:
1053 break;
1054 }
1055
1056 /* restart sampling */
1057 if (pmu->perfmon_sampling_enabled)
1058 return nvgpu_pmu_perfmon_start_sampling(pmu);
1059 return 0;
1060}
1061
1062int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, 839int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
1063 struct nv_pmu_therm_msg *msg) 840 struct nv_pmu_therm_msg *msg)
1064{ 841{
@@ -1359,72 +1136,65 @@ void gk20a_pmu_isr(struct gk20a *g)
1359 nvgpu_mutex_release(&pmu->isr_mutex); 1136 nvgpu_mutex_release(&pmu->isr_mutex);
1360} 1137}
1361 1138
1362int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) 1139void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
1363{ 1140{
1364 struct nvgpu_pmu *pmu = &g->pmu; 1141 u32 data;
1365 int err;
1366
1367 gk20a_dbg_fn("");
1368
1369 if (enable)
1370 err = nvgpu_pmu_perfmon_start_sampling(pmu);
1371 else
1372 err = nvgpu_pmu_perfmon_stop_sampling(pmu);
1373 1142
1374 return err; 1143 /* use counter #3 for GR && CE2 busy cycles */
1375} 1144 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
1145 pwr_pmu_idle_mask_gr_enabled_f() |
1146 pwr_pmu_idle_mask_ce_2_enabled_f());
1376 1147
1377int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) 1148 /* disable idle filtering for counters 3 and 6 */
1378{ 1149 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
1379 *load = g->pmu.load_shadow; 1150 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1380 return 0; 1151 pwr_pmu_idle_ctrl_filter_m(),
1381} 1152 pwr_pmu_idle_ctrl_value_busy_f() |
1153 pwr_pmu_idle_ctrl_filter_disabled_f());
1154 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
1382 1155
1383int gk20a_pmu_load_update(struct gk20a *g) 1156 /* use counter #6 for total cycles */
1384{ 1157 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
1385 struct nvgpu_pmu *pmu = &g->pmu; 1158 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1386 u16 _load = 0; 1159 pwr_pmu_idle_ctrl_filter_m(),
1160 pwr_pmu_idle_ctrl_value_always_f() |
1161 pwr_pmu_idle_ctrl_filter_disabled_f());
1162 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
1387 1163
1388 if (!pmu->perfmon_ready) { 1164 /*
1389 pmu->load_shadow = 0; 1165 * We don't want to disturb counters #3 and #6, which are used by
1390 return 0; 1166 * perfmon, so we add wiring also to counters #1 and #2 for
1391 } 1167 * exposing raw counter readings.
1168 */
1169 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
1170 pwr_pmu_idle_mask_gr_enabled_f() |
1171 pwr_pmu_idle_mask_ce_2_enabled_f());
1392 1172
1393 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); 1173 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
1394 pmu->load_shadow = _load / 10; 1174 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1395 pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); 1175 pwr_pmu_idle_ctrl_filter_m(),
1176 pwr_pmu_idle_ctrl_value_busy_f() |
1177 pwr_pmu_idle_ctrl_filter_disabled_f());
1178 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
1396 1179
1397 return 0; 1180 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
1181 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
1182 pwr_pmu_idle_ctrl_filter_m(),
1183 pwr_pmu_idle_ctrl_value_always_f() |
1184 pwr_pmu_idle_ctrl_filter_disabled_f());
1185 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
1398} 1186}
1399 1187
1400void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 1188u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
1401 u32 *total_cycles)
1402{ 1189{
1403 if (!g->power_on || gk20a_busy(g)) { 1190 return pwr_pmu_idle_count_value_v(
1404 *busy_cycles = 0; 1191 gk20a_readl(g, pwr_pmu_idle_count_r(counter_id)));
1405 *total_cycles = 0;
1406 return;
1407 }
1408
1409 *busy_cycles = pwr_pmu_idle_count_value_v(
1410 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
1411 rmb();
1412 *total_cycles = pwr_pmu_idle_count_value_v(
1413 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
1414 gk20a_idle(g);
1415} 1192}
1416 1193
1417void gk20a_pmu_reset_load_counters(struct gk20a *g) 1194void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
1418{ 1195{
1419 u32 reg_val = pwr_pmu_idle_count_reset_f(1); 1196 gk20a_writel(g, pwr_pmu_idle_count_r(counter_id),
1420 1197 pwr_pmu_idle_count_reset_f(1));
1421 if (!g->power_on || gk20a_busy(g))
1422 return;
1423
1424 gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
1425 wmb();
1426 gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
1427 gk20a_idle(g);
1428} 1198}
1429 1199
1430void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, 1200void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,