/* * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef NVGPU_PMU_H #define NVGPU_PMU_H #include #include #include #include #include #include #include #include #include #include #define nvgpu_pmu_dbg(g, fmt, args...) \ nvgpu_log(g, gpu_dbg_pmu, fmt, ##args) /* defined by pmu hw spec */ #define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) #define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) #define GK20A_PMU_SEQ_BUF_SIZE 4096 #define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ #define GK20A_PMU_DMEM_BLKSIZE2 8 #define PMU_MODE_MISMATCH_STATUS_MAILBOX_R 6 #define PMU_MODE_MISMATCH_STATUS_VAL 0xDEADDEAD /* Falcon Register index */ #define PMU_FALCON_REG_R0 (0) #define PMU_FALCON_REG_R1 (1) #define PMU_FALCON_REG_R2 (2) #define PMU_FALCON_REG_R3 (3) #define PMU_FALCON_REG_R4 (4) #define PMU_FALCON_REG_R5 (5) #define PMU_FALCON_REG_R6 (6) #define PMU_FALCON_REG_R7 (7) #define PMU_FALCON_REG_R8 (8) #define PMU_FALCON_REG_R9 (9) #define PMU_FALCON_REG_R10 (10) #define PMU_FALCON_REG_R11 (11) #define PMU_FALCON_REG_R12 (12) #define PMU_FALCON_REG_R13 (13) #define PMU_FALCON_REG_R14 (14) #define PMU_FALCON_REG_R15 (15) #define PMU_FALCON_REG_IV0 (16) #define PMU_FALCON_REG_IV1 (17) #define PMU_FALCON_REG_UNDEFINED (18) #define PMU_FALCON_REG_EV (19) #define PMU_FALCON_REG_SP (20) #define PMU_FALCON_REG_PC (21) #define PMU_FALCON_REG_IMB (22) #define PMU_FALCON_REG_DMB (23) #define PMU_FALCON_REG_CSW (24) #define PMU_FALCON_REG_CCR (25) #define PMU_FALCON_REG_SEC (26) #define PMU_FALCON_REG_CTX (27) #define PMU_FALCON_REG_EXCI (28) #define PMU_FALCON_REG_RSVD0 (29) #define PMU_FALCON_REG_RSVD1 (30) #define PMU_FALCON_REG_RSVD2 (31) #define PMU_FALCON_REG_SIZE (32) /* Choices for pmu_state */ #define PMU_STATE_OFF 0U /* PMU is off */ #define PMU_STATE_STARTING 1U /* PMU is on, but not booted */ #define PMU_STATE_INIT_RECEIVED 2U /* PMU init message received */ #define PMU_STATE_ELPG_BOOTING 3U /* PMU is booting */ #define PMU_STATE_ELPG_BOOTED 4U /* ELPG is initialized */ #define PMU_STATE_LOADING_PG_BUF 5U /* Loading PG buf */ #define PMU_STATE_LOADING_ZBC 6U /* Loading ZBC buf */ #define PMU_STATE_STARTED 7U /* Fully unitialized */ #define PMU_STATE_EXIT 8U /* Exit PMU state machine */ /* state transition : * OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF * ON => OFF is always synchronized */ /* elpg is off */ #define PMU_ELPG_STAT_OFF 0U /* elpg is on */ #define PMU_ELPG_STAT_ON 1U /* elpg is off, ALLOW cmd has been sent, wait for ack */ #define PMU_ELPG_STAT_ON_PENDING 2U /* elpg is on, DISALLOW cmd has been sent, wait for ack */ #define PMU_ELPG_STAT_OFF_PENDING 3U /* elpg is off, caller has requested on, but ALLOW * cmd hasn't been sent due to ENABLE_ALLOW delay */ #define PMU_ELPG_STAT_OFF_ON_PENDING 4U #define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32U #define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64U #define PMU_MAX_NUM_SEQUENCES (256U) #define PMU_SEQ_BIT_SHIFT (5U) #define PMU_SEQ_TBL_SIZE \ (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) #define PMU_INVALID_SEQ_DESC (~0) enum { GK20A_PMU_DMAIDX_UCODE = 0, GK20A_PMU_DMAIDX_VIRT = 1, GK20A_PMU_DMAIDX_PHYS_VID = 2, GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, GK20A_PMU_DMAIDX_RSVD = 5, GK20A_PMU_DMAIDX_PELPG = 6, GK20A_PMU_DMAIDX_END = 7 }; enum { PMU_SEQ_STATE_FREE = 0, PMU_SEQ_STATE_PENDING, PMU_SEQ_STATE_USED, PMU_SEQ_STATE_CANCELLED }; /*PG defines used by nvpgu-pmu*/ #define PMU_PG_IDLE_THRESHOLD_SIM 1000 #define PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM 4000000 /* TBD: QT or else ? */ #define PMU_PG_IDLE_THRESHOLD 15000 #define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 #define PMU_PG_LPWR_FEATURE_RPPG 0x0 #define PMU_PG_LPWR_FEATURE_MSCG 0x1 #define PMU_MSCG_DISABLED 0U #define PMU_MSCG_ENABLED 1U /* Default Sampling Period of AELPG */ #define APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US (1000000) /* Default values of APCTRL parameters */ #define APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US (100) #define APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US (10000) #define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000) #define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200) /* pmu load const defines */ #define PMU_BUSY_CYCLES_NORM_MAX (1000U) /* RPC */ #define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\ do { \ memset(&((_prpc)->hdr), 0, sizeof((_prpc)->hdr));\ \ (_prpc)->hdr.unit_id = PMU_UNIT_##_unit; \ (_prpc)->hdr.function = NV_PMU_RPC_ID_##_unit##_##_func;\ (_prpc)->hdr.flags = 0x0; \ \ _stat = nvgpu_pmu_rpc_execute(_pmu, &((_prpc)->hdr), \ (sizeof(*(_prpc)) - sizeof((_prpc)->scratch)),\ (_size), NULL, NULL, false); \ } while (0) /* RPC blocking call to copy back data from PMU to _prpc */ #define PMU_RPC_EXECUTE_CPB(_stat, _pmu, _unit, _func, _prpc, _size)\ do { \ memset(&((_prpc)->hdr), 0, sizeof((_prpc)->hdr));\ \ (_prpc)->hdr.unit_id = PMU_UNIT_##_unit; \ (_prpc)->hdr.function = NV_PMU_RPC_ID_##_unit##_##_func;\ (_prpc)->hdr.flags = 0x0; \ \ _stat = nvgpu_pmu_rpc_execute(_pmu, &((_prpc)->hdr), \ (sizeof(*(_prpc)) - sizeof((_prpc)->scratch)),\ (_size), NULL, NULL, true); \ } while (0) /* RPC non-blocking with call_back handler option */ #define PMU_RPC_EXECUTE_CB(_stat, _pmu, _unit, _func, _prpc, _size, _cb, _cbp)\ do { \ memset(&((_prpc)->hdr), 0, sizeof((_prpc)->hdr));\ \ (_prpc)->hdr.unit_id = PMU_UNIT_##_unit; \ (_prpc)->hdr.function = NV_PMU_RPC_ID_##_unit##_##_func;\ (_prpc)->hdr.flags = 0x0; \ \ _stat = nvgpu_pmu_rpc_execute(_pmu, &((_prpc)->hdr), \ (sizeof(*(_prpc)) - sizeof((_prpc)->scratch)),\ (_size), _cb, _cbp, false); \ } while (0) typedef void (*pmu_callback)(struct gk20a *, struct pmu_msg *, void *, u32, u32); struct rpc_handler_payload { void *rpc_buff; bool is_mem_free_set; bool complete; }; struct pmu_rpc_desc { void *prpc; u16 size_rpc; u16 size_scratch; }; struct pmu_payload { struct { void *buf; u32 offset; u32 size; u32 fb_size; } in, out; struct pmu_rpc_desc rpc; }; struct pmu_ucode_desc { u32 descriptor_size; u32 image_size; u32 tools_version; u32 app_version; char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; u32 bootloader_start_offset; u32 bootloader_size; u32 bootloader_imem_offset; u32 bootloader_entry_point; u32 app_start_offset; u32 app_size; u32 app_imem_offset; u32 app_imem_entry; u32 app_dmem_offset; /* Offset from appStartOffset */ u32 app_resident_code_offset; /* Exact size of the resident code * ( potentially contains CRC inside at the end ) */ u32 app_resident_code_size; /* Offset from appStartOffset */ u32 app_resident_data_offset; /* Exact size of the resident code * ( potentially contains CRC inside at the end ) */ u32 app_resident_data_size; u32 nb_overlays; struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; u32 compressed; }; struct pmu_ucode_desc_v1 { u32 descriptor_size; u32 image_size; u32 tools_version; u32 app_version; char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; u32 bootloader_start_offset; u32 bootloader_size; u32 bootloader_imem_offset; u32 bootloader_entry_point; u32 app_start_offset; u32 app_size; u32 app_imem_offset; u32 app_imem_entry; u32 app_dmem_offset; u32 app_resident_code_offset; u32 app_resident_code_size; u32 app_resident_data_offset; u32 app_resident_data_size; u32 nb_imem_overlays; u32 nb_dmem_overlays; struct {u32 start; u32 size; } load_ovl[64]; u32 compressed; }; struct pmu_mutex { u32 id; u32 index; u32 ref_cnt; }; struct pmu_sequence { u8 id; u32 state; u32 desc; struct pmu_msg *msg; union { struct pmu_allocation_v1 in_v1; struct pmu_allocation_v2 in_v2; struct pmu_allocation_v3 in_v3; }; struct nvgpu_mem *in_mem; union { struct pmu_allocation_v1 out_v1; struct pmu_allocation_v2 out_v2; struct pmu_allocation_v3 out_v3; }; struct nvgpu_mem *out_mem; u8 *out_payload; pmu_callback callback; void *cb_params; }; struct nvgpu_pg_init { bool state_change; bool state_destroy; struct nvgpu_cond wq; struct nvgpu_thread state_task; }; struct nvgpu_pmu { struct gk20a *g; struct nvgpu_falcon *flcn; union { struct pmu_ucode_desc *desc; struct pmu_ucode_desc_v1 *desc_v1; }; struct nvgpu_mem ucode; struct nvgpu_mem pg_buf; /* TBD: remove this if ZBC seq is fixed */ struct nvgpu_mem seq_buf; struct nvgpu_mem trace_buf; struct nvgpu_mem super_surface_buf; bool buf_loaded; struct pmu_sha1_gid gid_info; struct nvgpu_falcon_queue queue[PMU_QUEUE_COUNT]; struct pmu_sequence *seq; unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; u32 next_seq_desc; struct pmu_mutex *mutex; u32 mutex_cnt; struct nvgpu_mutex pmu_copy_lock; struct nvgpu_mutex pmu_seq_lock; struct nvgpu_allocator dmem; u32 *ucode_image; bool pmu_ready; u32 perfmon_query; u32 zbc_save_done; u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE]; u32 elpg_stat; u32 disallow_state; u32 mscg_stat; u32 mscg_transition_state; u32 pmu_state; #define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ struct nvgpu_pg_init pg_init; struct nvgpu_mutex pg_mutex; /* protect pg-RPPG/MSCG enable/disable */ struct nvgpu_mutex elpg_mutex; /* protect elpg enable/disable */ /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ int elpg_refcnt; union { struct pmu_perfmon_counter_v2 perfmon_counter_v2; }; u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; bool initialized; void (*remove_support)(struct nvgpu_pmu *pmu); bool sw_ready; bool perfmon_ready; u32 sample_buffer; u32 load_shadow; u32 load_avg; u32 load; struct nvgpu_mutex isr_mutex; bool isr_enabled; bool zbc_ready; union { struct pmu_cmdline_args_v3 args_v3; struct pmu_cmdline_args_v4 args_v4; struct pmu_cmdline_args_v5 args_v5; struct pmu_cmdline_args_v6 args_v6; }; unsigned long perfmon_events_cnt; bool perfmon_sampling_enabled; u8 pmu_mode; /*Added for GM20b, and ACR*/ u32 falcon_id; u32 aelpg_param[5]; u32 override_done; struct nvgpu_firmware *fw; }; struct pmu_surface { struct nvgpu_mem vidmem_desc; struct nvgpu_mem sysmem_desc; struct flcn_mem_desc_v0 params; }; /*PG defines used by nvpgu-pmu*/ struct pmu_pg_stats_data { u32 gating_cnt; u32 ingating_time; u32 ungating_time; u32 avg_entry_latency_us; u32 avg_exit_latency_us; }; /*! * Structure/object which single register write need to be done during PG init * sequence to set PROD values. */ struct pg_init_sequence_list { u32 regaddr; u32 writeval; }; /* PMU IPC Methods */ void nvgpu_pmu_seq_init(struct nvgpu_pmu *pmu); int nvgpu_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token); int nvgpu_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token); int nvgpu_pmu_queue_init(struct nvgpu_pmu *pmu, u32 id, union pmu_init_msg_pmu *init); /* send a cmd to pmu */ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg, struct pmu_payload *payload, u32 queue_id, pmu_callback callback, void *cb_param, u32 *seq_desc, unsigned long timeout); int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu); /* perfmon */ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu); int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu); int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu); int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, struct pmu_perfmon_msg *msg); int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); int nvgpu_pmu_load_update(struct gk20a *g); int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm); void nvgpu_pmu_reset_load_counters(struct gk20a *g); void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles); int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, struct nv_pmu_therm_msg *msg); /* PMU init */ int nvgpu_init_pmu_support(struct gk20a *g); int nvgpu_pmu_destroy(struct gk20a *g); int nvgpu_pmu_process_init_msg(struct nvgpu_pmu *pmu, struct pmu_msg *msg); int nvgpu_pmu_super_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem_surface, u32 size); void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state, bool post_change_event); void nvgpu_kill_task_pg_init(struct gk20a *g); /* NVGPU-PMU MEM alloc */ void nvgpu_pmu_surface_free(struct gk20a *g, struct nvgpu_mem *mem); void nvgpu_pmu_surface_describe(struct gk20a *g, struct nvgpu_mem *mem, struct flcn_mem_desc_v0 *fb); int nvgpu_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem, u32 size); int nvgpu_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem, u32 size); /* PMU F/W support */ int nvgpu_init_pmu_fw_support(struct nvgpu_pmu *pmu); int nvgpu_pmu_prepare_ns_ucode_blob(struct gk20a *g); /* PG init*/ int nvgpu_pmu_init_powergating(struct gk20a *g); int nvgpu_pmu_init_bind_fecs(struct gk20a *g); void nvgpu_pmu_setup_hw_load_zbc(struct gk20a *g); /* PMU reset */ int nvgpu_pmu_reset(struct gk20a *g); /* PG enable/disable */ int nvgpu_pmu_reenable_elpg(struct gk20a *g); int nvgpu_pmu_enable_elpg(struct gk20a *g); int nvgpu_pmu_disable_elpg(struct gk20a *g); int nvgpu_pmu_pg_global_enable(struct gk20a *g, u32 enable_pg); int nvgpu_pmu_get_pg_stats(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data); /* AELPG */ int nvgpu_aelpg_init(struct gk20a *g); int nvgpu_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id); int nvgpu_pmu_ap_send_command(struct gk20a *g, union pmu_ap_cmd *p_ap_cmd, bool b_block); /* PMU debug */ void nvgpu_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu); void nvgpu_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu); bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos); /* PMU RPC */ int nvgpu_pmu_rpc_execute(struct nvgpu_pmu *pmu, struct nv_pmu_rpc_header *rpc, u16 size_rpc, u16 size_scratch, pmu_callback callback, void *cb_param, bool is_copy_back); /* PMU wait*/ int pmu_wait_message_cond(struct nvgpu_pmu *pmu, u32 timeout_ms, void *var, u8 val); struct gk20a *gk20a_from_pmu(struct nvgpu_pmu *pmu); #endif /* NVGPU_PMU_H */