/* * GK20A PMU (aka. gPMU outside gk20a context) * * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include "gk20a.h" #include "gr_gk20a.h" #include #include #include #ifdef CONFIG_TEGRA_19x_GPU #include "nvgpu_gpuid_t19x.h" #endif #define gk20a_dbg_pmu(fmt, arg...) \ gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) { u32 i = 0, j = strlen(strings); for (; i < j; i++) { if (strings[i] == '%') if (strings[i + 1] == 'x' || strings[i + 1] == 'X') { *hex_pos = i; return true; } } *hex_pos = -1; return false; } static void printtrace(struct nvgpu_pmu *pmu) { u32 i = 0, j = 0, k, l, m, count; char part_str[40], buf[0x40]; struct gk20a *g = gk20a_from_pmu(pmu); void *tracebuffer; char *trace; u32 *trace1; /* allocate system memory to copy pmu trace buffer */ tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); if (tracebuffer == NULL) return; /* read pmu traces into system memory buffer */ nvgpu_mem_rd_n(g, &pmu->trace_buf, 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); trace = (char *)tracebuffer; trace1 = (u32 *)tracebuffer; nvgpu_err(g, "Dump pmutrace"); for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { for (j = 0; j < 0x40; j++) if (trace1[(i / 4) + j]) break; if (j == 0x40) break; count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]); l = 0; m = 0; while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { if (k >= 40) break; strncpy(part_str, (trace+i+20+m), k); part_str[k] = 0; count += scnprintf((buf + count), 0x40, "%s0x%x", part_str, trace1[(i / 4) + 1 + l]); l++; m += k + 2; } scnprintf((buf + count), 0x40, "%s", (trace+i+20+m)); nvgpu_err(g, "%s", buf); } nvgpu_kfree(g, tracebuffer); } void pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable) { struct gk20a *g = gk20a_from_pmu(pmu); u32 intr_mask; u32 intr_dest; gk20a_dbg_fn(""); g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_DISABLE, true, mc_intr_mask_0_pmu_enabled_f()); g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_DISABLE, false, mc_intr_mask_1_pmu_enabled_f()); nvgpu_flcn_set_irq(pmu->flcn, false, 0x0, 0x0); if (enable) { /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */ intr_dest = pwr_falcon_irqdest_host_gptmr_f(0) | pwr_falcon_irqdest_host_wdtmr_f(1) | pwr_falcon_irqdest_host_mthd_f(0) | pwr_falcon_irqdest_host_ctxsw_f(0) | pwr_falcon_irqdest_host_halt_f(1) | pwr_falcon_irqdest_host_exterr_f(0) | pwr_falcon_irqdest_host_swgen0_f(1) | pwr_falcon_irqdest_host_swgen1_f(0) | pwr_falcon_irqdest_host_ext_f(0xff) | pwr_falcon_irqdest_target_gptmr_f(1) | pwr_falcon_irqdest_target_wdtmr_f(0) | pwr_falcon_irqdest_target_mthd_f(0) | pwr_falcon_irqdest_target_ctxsw_f(0) | pwr_falcon_irqdest_target_halt_f(0) | pwr_falcon_irqdest_target_exterr_f(0) | pwr_falcon_irqdest_target_swgen0_f(0) | pwr_falcon_irqdest_target_swgen1_f(0) | pwr_falcon_irqdest_target_ext_f(0xff); /* 0=disable, 1=enable */ intr_mask = pwr_falcon_irqmset_gptmr_f(1) | pwr_falcon_irqmset_wdtmr_f(1) | pwr_falcon_irqmset_mthd_f(0) | pwr_falcon_irqmset_ctxsw_f(0) | pwr_falcon_irqmset_halt_f(1) | pwr_falcon_irqmset_exterr_f(1) | pwr_falcon_irqmset_swgen0_f(1) | pwr_falcon_irqmset_swgen1_f(1); nvgpu_flcn_set_irq(pmu->flcn, true, intr_mask, intr_dest); g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_ENABLE, true, mc_intr_mask_0_pmu_enabled_f()); } gk20a_dbg_fn("done"); } int pmu_bootstrap(struct nvgpu_pmu *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct mm_gk20a *mm = &g->mm; struct pmu_ucode_desc *desc = pmu->desc; u64 addr_code, addr_data, addr_load; u32 i, blocks, addr_args; gk20a_dbg_fn(""); gk20a_writel(g, pwr_falcon_itfen_r(), gk20a_readl(g, pwr_falcon_itfen_r()) | pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); /* TBD: load all other surfaces */ g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( pmu, GK20A_PMU_TRACE_BUFSIZE); g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu); g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx( pmu, GK20A_PMU_DMAIDX_VIRT); g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu, g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK)); addr_args = (pwr_falcon_hwcfg_dmem_size_v( gk20a_readl(g, pwr_falcon_hwcfg_r())) << GK20A_PMU_DMEM_BLKSIZE2) - g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu); nvgpu_flcn_copy_to_dmem(pmu->flcn, addr_args, (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)), g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0); gk20a_writel(g, pwr_falcon_dmemc_r(0), pwr_falcon_dmemc_offs_f(0) | pwr_falcon_dmemc_blk_f(0) | pwr_falcon_dmemc_aincw_f(1)); addr_code = u64_lo32((pmu->ucode.gpu_va + desc->app_start_offset + desc->app_resident_code_offset) >> 8) ; addr_data = u64_lo32((pmu->ucode.gpu_va + desc->app_start_offset + desc->app_resident_data_offset) >> 8); addr_load = u64_lo32((pmu->ucode.gpu_va + desc->bootloader_start_offset) >> 8); gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args); g->ops.pmu.write_dmatrfbase(g, addr_load - (desc->bootloader_imem_offset >> 8)); blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; for (i = 0; i < blocks; i++) { gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(), desc->bootloader_imem_offset + (i << 8)); gk20a_writel(g, pwr_falcon_dmatrffboffs_r(), desc->bootloader_imem_offset + (i << 8)); gk20a_writel(g, pwr_falcon_dmatrfcmd_r(), pwr_falcon_dmatrfcmd_imem_f(1) | pwr_falcon_dmatrfcmd_write_f(0) | pwr_falcon_dmatrfcmd_size_f(6) | pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE)); } nvgpu_flcn_bootstrap(g->pmu.flcn, desc->bootloader_entry_point); gk20a_writel(g, pwr_falcon_os_r(), desc->app_version); return 0; } void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id) { gk20a_writel(g, pwr_pmu_pg_idlefilth_r(pg_engine_id), PMU_PG_IDLE_THRESHOLD); gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(pg_engine_id), PMU_PG_POST_POWERUP_IDLE_THRESHOLD); } int gk20a_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_mutex *mutex; u32 data, owner, max_retry; if (!pmu->initialized) return -EINVAL; BUG_ON(!token); BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); BUG_ON(id > pmu->mutex_cnt); mutex = &pmu->mutex[id]; owner = pwr_pmu_mutex_value_v( gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { BUG_ON(mutex->ref_cnt == 0); gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token); mutex->ref_cnt++; return 0; } max_retry = 40; do { data = pwr_pmu_mutex_id_value_v( gk20a_readl(g, pwr_pmu_mutex_id_r())); if (data == pwr_pmu_mutex_id_value_init_v() || data == pwr_pmu_mutex_id_value_not_avail_v()) { nvgpu_warn(g, "fail to generate mutex token: val 0x%08x", owner); nvgpu_usleep_range(20, 40); continue; } owner = data; gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), pwr_pmu_mutex_value_f(owner)); data = pwr_pmu_mutex_value_v( gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); if (owner == data) { mutex->ref_cnt = 1; gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x", mutex->index, *token); *token = owner; return 0; } else { gk20a_dbg_info("fail to acquire mutex idx=0x%08x", mutex->index); data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); data = set_field(data, pwr_pmu_mutex_id_release_value_m(), pwr_pmu_mutex_id_release_value_f(owner)); gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); nvgpu_usleep_range(20, 40); continue; } } while (max_retry-- > 0); return -EBUSY; } int gk20a_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_mutex *mutex; u32 owner, data; if (!pmu->initialized) return -EINVAL; BUG_ON(!token); BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); BUG_ON(id > pmu->mutex_cnt); mutex = &pmu->mutex[id]; owner = pwr_pmu_mutex_value_v( gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); if (*token != owner) { nvgpu_err(g, "requester 0x%08x NOT match owner 0x%08x", *token, owner); return -EINVAL; } if (--mutex->ref_cnt > 0) return -EBUSY; gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), pwr_pmu_mutex_value_initial_lock_f()); data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); data = set_field(data, pwr_pmu_mutex_id_release_value_m(), pwr_pmu_mutex_id_release_value_f(owner)); gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); gk20a_dbg_pmu("mutex released: id=%d, token=0x%x", mutex->index, *token); return 0; } int gk20a_pmu_queue_head(struct nvgpu_pmu *pmu, struct pmu_queue *queue, u32 *head, bool set) { struct gk20a *g = gk20a_from_pmu(pmu); u32 queue_head_size = 0; if (g->ops.pmu.pmu_get_queue_head_size) queue_head_size = g->ops.pmu.pmu_get_queue_head_size(); BUG_ON(!head || !queue_head_size); if (PMU_IS_COMMAND_QUEUE(queue->id)) { if (queue->index >= queue_head_size) return -EINVAL; if (!set) *head = pwr_pmu_queue_head_address_v( gk20a_readl(g, g->ops.pmu.pmu_get_queue_head(queue->index))); else gk20a_writel(g, g->ops.pmu.pmu_get_queue_head(queue->index), pwr_pmu_queue_head_address_f(*head)); } else { if (!set) *head = pwr_pmu_msgq_head_val_v( gk20a_readl(g, pwr_pmu_msgq_head_r())); else gk20a_writel(g, pwr_pmu_msgq_head_r(), pwr_pmu_msgq_head_val_f(*head)); } return 0; } int gk20a_pmu_queue_tail(struct nvgpu_pmu *pmu, struct pmu_queue *queue, u32 *tail, bool set) { struct gk20a *g = gk20a_from_pmu(pmu); u32 queue_tail_size = 0; if (g->ops.pmu.pmu_get_queue_tail_size) queue_tail_size = g->ops.pmu.pmu_get_queue_tail_size(); BUG_ON(!tail || !queue_tail_size); if (PMU_IS_COMMAND_QUEUE(queue->id)) { if (queue->index >= queue_tail_size) return -EINVAL; if (!set) *tail = pwr_pmu_queue_tail_address_v( gk20a_readl(g, g->ops.pmu.pmu_get_queue_tail(queue->index))); else gk20a_writel(g, g->ops.pmu.pmu_get_queue_tail(queue->index), pwr_pmu_queue_tail_address_f(*tail)); } else { if (!set) *tail = pwr_pmu_msgq_tail_val_v( gk20a_readl(g, pwr_pmu_msgq_tail_r())); else gk20a_writel(g, pwr_pmu_msgq_tail_r(), pwr_pmu_msgq_tail_val_f(*tail)); } return 0; } void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set) { struct gk20a *g = gk20a_from_pmu(pmu); u32 queue_tail_size = 0; if (g->ops.pmu.pmu_get_queue_tail_size) queue_tail_size = g->ops.pmu.pmu_get_queue_tail_size(); BUG_ON(!tail || !queue_tail_size); if (!set) *tail = pwr_pmu_msgq_tail_val_v( gk20a_readl(g, pwr_pmu_msgq_tail_r())); else gk20a_writel(g, pwr_pmu_msgq_tail_r(), pwr_pmu_msgq_tail_val_f(*tail)); } static int gk20a_init_pmu_setup_hw1(struct gk20a *g) { struct nvgpu_pmu *pmu = &g->pmu; int err = 0; gk20a_dbg_fn(""); nvgpu_mutex_acquire(&pmu->isr_mutex); nvgpu_flcn_reset(pmu->flcn); pmu->isr_enabled = true; nvgpu_mutex_release(&pmu->isr_mutex); /* setup apertures - virtual */ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), pwr_fbif_transcfg_mem_type_virtual_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), pwr_fbif_transcfg_mem_type_virtual_f()); /* setup apertures - physical */ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_local_fb_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_coherent_sysmem_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_noncoherent_sysmem_f()); err = g->ops.pmu.pmu_nsbootstrap(pmu); return err; } static void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr) { gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr); } bool gk20a_pmu_is_engine_in_reset(struct gk20a *g) { u32 pmc_enable; bool status = false; pmc_enable = gk20a_readl(g, mc_enable_r()); if (mc_enable_pwr_v(pmc_enable) == mc_enable_pwr_disabled_v()) status = true; return status; } int gk20a_pmu_engine_reset(struct gk20a *g, bool do_reset) { if (do_reset) g->ops.mc.enable(g, mc_enable_pwr_enabled_f()); else g->ops.mc.disable(g, mc_enable_pwr_enabled_f()); return 0; } static bool gk20a_is_pmu_supported(struct gk20a *g) { return true; } u32 gk20a_pmu_pg_engines_list(struct gk20a *g) { return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS); } u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id) { if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) return PMU_PG_FEATURE_GR_POWER_GATING_ENABLED; return 0; } void gk20a_init_pmu_ops(struct gpu_ops *gops) { gops->pmu.is_pmu_supported = gk20a_is_pmu_supported; gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob; gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1; gops->pmu.pmu_nsbootstrap = pmu_bootstrap; gops->pmu.pmu_get_queue_head = pwr_pmu_queue_head_r; gops->pmu.pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v; gops->pmu.pmu_get_queue_tail = pwr_pmu_queue_tail_r; gops->pmu.pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v; gops->pmu.pmu_queue_head = gk20a_pmu_queue_head; gops->pmu.pmu_queue_tail = gk20a_pmu_queue_tail; gops->pmu.pmu_msgq_tail = gk20a_pmu_msgq_tail; gops->pmu.pmu_mutex_size = pwr_pmu_mutex__size_1_v; gops->pmu.pmu_mutex_acquire = gk20a_pmu_mutex_acquire; gops->pmu.pmu_mutex_release = gk20a_pmu_mutex_release; gops->pmu.pmu_setup_elpg = NULL; gops->pmu.init_wpr_region = NULL; gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.write_dmatrfbase = gk20a_write_dmatrfbase; gops->pmu.pmu_elpg_statistics = gk20a_pmu_elpg_statistics; gops->pmu.pmu_pg_init_param = NULL; gops->pmu.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list; gops->pmu.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list; gops->pmu.pmu_is_lpwr_feature_supported = NULL; gops->pmu.pmu_lpwr_enable_pg = NULL; gops->pmu.pmu_lpwr_disable_pg = NULL; gops->pmu.pmu_pg_param_post_init = NULL; gops->pmu.dump_secure_fuses = NULL; gops->pmu.is_lazy_bootstrap = NULL; gops->pmu.is_priv_load = NULL; gops->pmu.get_wpr = NULL; gops->pmu.alloc_blob_space = NULL; gops->pmu.pmu_populate_loader_cfg = NULL; gops->pmu.flcn_populate_bl_dmem_desc = NULL; gops->pmu.reset_engine = gk20a_pmu_engine_reset; gops->pmu.is_engine_in_reset = gk20a_pmu_is_engine_in_reset; } static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct nvgpu_pmu *pmu = param; gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE"); pmu->zbc_save_done = 1; } void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) { struct nvgpu_pmu *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) return; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd); cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update; cmd.cmd.zbc.entry_mask = ZBC_MASK(entries); pmu->zbc_save_done = 0; gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE"); nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_zbc_msg, pmu, &seq, ~0); pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->zbc_save_done, 1); if (!pmu->zbc_save_done) nvgpu_err(g, "ZBC save timeout"); } int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, struct nv_pmu_therm_msg *msg) { gk20a_dbg_fn(""); switch (msg->msg_type) { case NV_PMU_THERM_MSG_ID_EVENT_HW_SLOWDOWN_NOTIFICATION: #ifdef CONFIG_ARCH_TEGRA_18x_SOC if (msg->hw_slct_msg.mask == BIT(NV_PMU_THERM_EVENT_THERMAL_1)) nvgpu_clk_arb_schedule_alarm(gk20a_from_pmu(pmu), (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); else #endif gk20a_dbg_pmu("Unwanted/Unregistered thermal event received %d", msg->hw_slct_msg.mask); break; default: gk20a_dbg_pmu("unkown therm event received %d", msg->msg_type); break; } return 0; } void pmu_dump_elpg_stats(struct nvgpu_pmu *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_pg_stats stats; nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_GRAPHICS], (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx", stats.pg_entry_start_timestamp); gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx", stats.pg_exit_start_timestamp); gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx", stats.pg_ingating_start_timestamp); gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx", stats.pg_ungating_start_timestamp); gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x", stats.pg_avg_entry_time_us); gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x", stats.pg_avg_exit_time_us); gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x", stats.pg_ingating_cnt); gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x", stats.pg_ingating_time_us); gk20a_dbg_pmu("pg_ungating_count : 0x%08x", stats.pg_ungating_count); gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ", stats.pg_ungating_time_us); gk20a_dbg_pmu("pg_gating_cnt : 0x%08x", stats.pg_gating_cnt); gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x", stats.pg_gating_deny_cnt); /* Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset in .nm file, e.g. 0x1000066c. use 0x66c. u32 i, val[20]; nvgpu_flcn_copy_from_dmem(pmu->flcn, 0x66c, (u8 *)val, sizeof(val), 0); gk20a_dbg_pmu("elpg log begin"); for (i = 0; i < 20; i++) gk20a_dbg_pmu("0x%08x", val[i]); gk20a_dbg_pmu("elpg log end"); */ gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x", gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0))); gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x", gk20a_readl(g, pwr_pmu_pg_intren_r(0))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(4))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(7))); /* TBD: script can't generate those registers correctly gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x", gk20a_readl(g, pwr_pmu_idle_status_r())); gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x", gk20a_readl(g, pwr_pmu_pg_ctrl_r())); */ } void pmu_dump_falcon_stats(struct nvgpu_pmu *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); unsigned int i; nvgpu_err(g, "pwr_falcon_os_r : %d", gk20a_readl(g, pwr_falcon_os_r())); nvgpu_err(g, "pwr_falcon_cpuctl_r : 0x%x", gk20a_readl(g, pwr_falcon_cpuctl_r())); nvgpu_err(g, "pwr_falcon_idlestate_r : 0x%x", gk20a_readl(g, pwr_falcon_idlestate_r())); nvgpu_err(g, "pwr_falcon_mailbox0_r : 0x%x", gk20a_readl(g, pwr_falcon_mailbox0_r())); nvgpu_err(g, "pwr_falcon_mailbox1_r : 0x%x", gk20a_readl(g, pwr_falcon_mailbox1_r())); nvgpu_err(g, "pwr_falcon_irqstat_r : 0x%x", gk20a_readl(g, pwr_falcon_irqstat_r())); nvgpu_err(g, "pwr_falcon_irqmode_r : 0x%x", gk20a_readl(g, pwr_falcon_irqmode_r())); nvgpu_err(g, "pwr_falcon_irqmask_r : 0x%x", gk20a_readl(g, pwr_falcon_irqmask_r())); nvgpu_err(g, "pwr_falcon_irqdest_r : 0x%x", gk20a_readl(g, pwr_falcon_irqdest_r())); for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) nvgpu_err(g, "pwr_pmu_mailbox_r(%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_mailbox_r(i))); for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) nvgpu_err(g, "pwr_pmu_debug_r(%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_debug_r(i))); for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rstat_f() | pwr_pmu_falcon_icd_cmd_idx_f(i)); nvgpu_err(g, "pmu_rstat (%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); } i = gk20a_readl(g, pwr_pmu_bar0_error_status_r()); nvgpu_err(g, "pwr_pmu_bar0_error_status_r : 0x%x", i); if (i != 0) { nvgpu_err(g, "pwr_pmu_bar0_addr_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_addr_r())); nvgpu_err(g, "pwr_pmu_bar0_data_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_data_r())); nvgpu_err(g, "pwr_pmu_bar0_timeout_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_timeout_r())); nvgpu_err(g, "pwr_pmu_bar0_ctl_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_ctl_r())); } i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r()); nvgpu_err(g, "pwr_pmu_bar0_fecs_error_r : 0x%x", i); i = gk20a_readl(g, pwr_falcon_exterrstat_r()); nvgpu_err(g, "pwr_falcon_exterrstat_r : 0x%x", i); if (pwr_falcon_exterrstat_valid_v(i) == pwr_falcon_exterrstat_valid_true_v()) { nvgpu_err(g, "pwr_falcon_exterraddr_r : 0x%x", gk20a_readl(g, pwr_falcon_exterraddr_r())); nvgpu_err(g, "pmc_enable : 0x%x", gk20a_readl(g, mc_enable_r())); } nvgpu_err(g, "pwr_falcon_engctl_r : 0x%x", gk20a_readl(g, pwr_falcon_engctl_r())); nvgpu_err(g, "pwr_falcon_curctx_r : 0x%x", gk20a_readl(g, pwr_falcon_curctx_r())); nvgpu_err(g, "pwr_falcon_nxtctx_r : 0x%x", gk20a_readl(g, pwr_falcon_nxtctx_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); nvgpu_err(g, "PMU_FALCON_REG_IMB : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); nvgpu_err(g, "PMU_FALCON_REG_DMB : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); nvgpu_err(g, "PMU_FALCON_REG_CSW : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); nvgpu_err(g, "PMU_FALCON_REG_CTX : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); nvgpu_err(g, "PMU_FALCON_REG_EXCI : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); for (i = 0; i < 4; i++) { gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC)); nvgpu_err(g, "PMU_FALCON_REG_PC : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP)); nvgpu_err(g, "PMU_FALCON_REG_SP : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); } nvgpu_err(g, "elpg stat: %d", pmu->elpg_stat); /* PMU may crash due to FECS crash. Dump FECS status */ gk20a_fecs_dump_falcon_stats(g); printtrace(pmu); } bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); u32 servicedpmuint; servicedpmuint = pwr_falcon_irqstat_halt_true_f() | pwr_falcon_irqstat_exterr_true_f() | pwr_falcon_irqstat_swgen0_true_f(); if (gk20a_readl(g, pwr_falcon_irqstat_r()) & servicedpmuint) return true; return false; } void gk20a_pmu_isr(struct gk20a *g) { struct nvgpu_pmu *pmu = &g->pmu; struct pmu_queue *queue; u32 intr, mask; bool recheck = false; gk20a_dbg_fn(""); nvgpu_mutex_acquire(&pmu->isr_mutex); if (!pmu->isr_enabled) { nvgpu_mutex_release(&pmu->isr_mutex); return; } mask = gk20a_readl(g, pwr_falcon_irqmask_r()) & gk20a_readl(g, pwr_falcon_irqdest_r()); intr = gk20a_readl(g, pwr_falcon_irqstat_r()); gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr); intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask; if (!intr || pmu->pmu_state == PMU_STATE_OFF) { gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); nvgpu_mutex_release(&pmu->isr_mutex); return; } if (intr & pwr_falcon_irqstat_halt_true_f()) { nvgpu_err(g, "pmu halt intr not implemented"); pmu_dump_falcon_stats(pmu); if (gk20a_readl(g, pwr_pmu_mailbox_r (PMU_MODE_MISMATCH_STATUS_MAILBOX_R)) == PMU_MODE_MISMATCH_STATUS_VAL) if (g->ops.pmu.dump_secure_fuses) g->ops.pmu.dump_secure_fuses(g); } if (intr & pwr_falcon_irqstat_exterr_true_f()) { nvgpu_err(g, "pmu exterr intr not implemented. Clearing interrupt."); pmu_dump_falcon_stats(pmu); gk20a_writel(g, pwr_falcon_exterrstat_r(), gk20a_readl(g, pwr_falcon_exterrstat_r()) & ~pwr_falcon_exterrstat_valid_m()); } if (intr & pwr_falcon_irqstat_swgen0_true_f()) { nvgpu_pmu_process_message(pmu); recheck = true; } gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); if (recheck) { queue = &pmu->queue[PMU_MESSAGE_QUEUE]; if (!nvgpu_pmu_queue_is_empty(pmu, queue)) gk20a_writel(g, pwr_falcon_irqsset_r(), pwr_falcon_irqsset_swgen0_set_f()); } nvgpu_mutex_release(&pmu->isr_mutex); } void gk20a_pmu_init_perfmon_counter(struct gk20a *g) { u32 data; /* use counter #3 for GR && CE2 busy cycles */ gk20a_writel(g, pwr_pmu_idle_mask_r(3), pwr_pmu_idle_mask_gr_enabled_f() | pwr_pmu_idle_mask_ce_2_enabled_f()); /* disable idle filtering for counters 3 and 6 */ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_busy_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); /* use counter #6 for total cycles */ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); /* * We don't want to disturb counters #3 and #6, which are used by * perfmon, so we add wiring also to counters #1 and #2 for * exposing raw counter readings. */ gk20a_writel(g, pwr_pmu_idle_mask_r(1), pwr_pmu_idle_mask_gr_enabled_f() | pwr_pmu_idle_mask_ce_2_enabled_f()); data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_busy_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); } u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) { return pwr_pmu_idle_count_value_v( gk20a_readl(g, pwr_pmu_idle_count_r(counter_id))); } void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id) { gk20a_writel(g, pwr_pmu_idle_count_r(counter_id), pwr_pmu_idle_count_reset_f(1)); } void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data) { struct nvgpu_pmu *pmu = &g->pmu; struct pmu_pg_stats stats; nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->stat_dmem_offset[pg_engine_id], (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); pg_stat_data->ingating_time = stats.pg_ingating_time_us; pg_stat_data->ungating_time = stats.pg_ungating_time_us; pg_stat_data->gating_cnt = stats.pg_gating_cnt; pg_stat_data->avg_entry_latency_us = stats.pg_avg_entry_time_us; pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us; }