/* * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * GK20A Graphics * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef GK20A_H #define GK20A_H struct gk20a; struct fifo_gk20a; struct channel_gk20a; struct gr_gk20a; struct sim_gk20a; struct gk20a_ctxsw_ucode_segments; struct gk20a_fecs_trace; struct gk20a_ctxsw_trace; struct acr_desc; struct nvgpu_mem_alloc_tracker; struct dbg_profiler_object_data; struct ecc_gk20a; struct gk20a_debug_output; struct nvgpu_clk_pll_debug_data; struct nvgpu_nvhost_dev; struct nvgpu_cpu_time_correlation_sample; #include #include #ifdef CONFIG_DEBUG_FS #include #endif #include #include #include #include #include #include #include #include "clk_gk20a.h" #include "ce2_gk20a.h" #include "fifo_gk20a.h" #include "tsg_gk20a.h" #include "gr_gk20a.h" #include "sim_gk20a.h" #include "pmu_gk20a.h" #include "priv_ring_gk20a.h" #include "therm_gk20a.h" #include "cde_gk20a.h" #include "sched_gk20a.h" #ifdef CONFIG_ARCH_TEGRA_18x_SOC #include "clk/clk.h" #include "clk/clk_arb.h" #include "perf/perf.h" #include "pmgr/pmgr.h" #include "therm/thrm.h" #endif #include "ecc_gk20a.h" struct page_alloc_chunk; /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is the resolution of ptimer. */ #define PTIMER_REF_FREQ_HZ 31250000 #ifdef CONFIG_DEBUG_FS struct railgate_stats { unsigned long last_rail_gate_start; unsigned long last_rail_gate_complete; unsigned long last_rail_ungate_start; unsigned long last_rail_ungate_complete; unsigned long total_rail_gate_time_ms; unsigned long total_rail_ungate_time_ms; unsigned long railgating_cycle_count; }; #endif enum gk20a_cbc_op { gk20a_cbc_op_clear, gk20a_cbc_op_clean, gk20a_cbc_op_invalidate, }; #define MC_INTR_UNIT_DISABLE false #define MC_INTR_UNIT_ENABLE true #define GPU_LIT_NUM_GPCS 0 #define GPU_LIT_NUM_PES_PER_GPC 1 #define GPU_LIT_NUM_ZCULL_BANKS 2 #define GPU_LIT_NUM_TPC_PER_GPC 3 #define GPU_LIT_NUM_SM_PER_TPC 4 #define GPU_LIT_NUM_FBPS 5 #define GPU_LIT_GPC_BASE 6 #define GPU_LIT_GPC_STRIDE 7 #define GPU_LIT_GPC_SHARED_BASE 8 #define GPU_LIT_TPC_IN_GPC_BASE 9 #define GPU_LIT_TPC_IN_GPC_STRIDE 10 #define GPU_LIT_TPC_IN_GPC_SHARED_BASE 11 #define GPU_LIT_PPC_IN_GPC_BASE 12 #define GPU_LIT_PPC_IN_GPC_STRIDE 13 #define GPU_LIT_PPC_IN_GPC_SHARED_BASE 14 #define GPU_LIT_ROP_BASE 15 #define GPU_LIT_ROP_STRIDE 16 #define GPU_LIT_ROP_SHARED_BASE 17 #define GPU_LIT_HOST_NUM_ENGINES 18 #define GPU_LIT_HOST_NUM_PBDMA 19 #define GPU_LIT_LTC_STRIDE 20 #define GPU_LIT_LTS_STRIDE 21 #define GPU_LIT_NUM_FBPAS 22 #define GPU_LIT_FBPA_STRIDE 23 #define GPU_LIT_FBPA_BASE 24 #define GPU_LIT_FBPA_SHARED_BASE 25 #define GPU_LIT_SM_PRI_STRIDE 26 #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v) enum nvgpu_unit; struct gpu_ops { struct { int (*determine_L2_size_bytes)(struct gk20a *gk20a); int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr); int (*cbc_ctrl)(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max); void (*set_zbc_color_entry)(struct gk20a *g, struct zbc_entry *color_val, u32 index); void (*set_zbc_depth_entry)(struct gk20a *g, struct zbc_entry *depth_val, u32 index); void (*set_zbc_s_entry)(struct gk20a *g, struct zbc_entry *s_val, u32 index); void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr); #ifdef CONFIG_DEBUG_FS void (*sync_debugfs)(struct gk20a *g); #endif void (*init_fs_state)(struct gk20a *g); void (*isr)(struct gk20a *g); u32 (*cbc_fix_config)(struct gk20a *g, int base); void (*flush)(struct gk20a *g); } ltc; struct { void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base); int (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base); u32 (*get_num_pce)(struct gk20a *g); } ce2; struct { int (*init_fs_state)(struct gk20a *g); int (*init_preemption_state)(struct gk20a *g); void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset); void (*bundle_cb_defaults)(struct gk20a *g); void (*cb_size_default)(struct gk20a *g); int (*calc_global_ctx_buffer_size)(struct gk20a *g); void (*commit_global_attrib_cb)(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, bool patch); void (*commit_global_bundle_cb)(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u64 size, bool patch); int (*commit_global_cb_manager)(struct gk20a *g, struct channel_gk20a *ch, bool patch); void (*commit_global_pagepool)(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u32 size, bool patch); void (*init_gpc_mmu)(struct gk20a *g); int (*handle_sw_method)(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); void (*set_alpha_circular_buffer_size)(struct gk20a *g, u32 data); void (*set_circular_buffer_size)(struct gk20a *g, u32 data); void (*enable_hww_exceptions)(struct gk20a *g); bool (*is_valid_class)(struct gk20a *g, u32 class_num); bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num); bool (*is_valid_compute_class)(struct gk20a *g, u32 class_num); void (*get_sm_dsm_perf_regs)(struct gk20a *g, u32 *num_sm_dsm_perf_regs, u32 **sm_dsm_perf_regs, u32 *perf_register_stride); void (*get_sm_dsm_perf_ctrl_regs)(struct gk20a *g, u32 *num_sm_dsm_perf_regs, u32 **sm_dsm_perf_regs, u32 *perf_register_stride); void (*set_hww_esr_report_mask)(struct gk20a *g); int (*setup_alpha_beta_tables)(struct gk20a *g, struct gr_gk20a *gr); int (*falcon_load_ucode)(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); int (*load_ctxsw_ucode)(struct gk20a *g); u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*free_channel_ctx)(struct channel_gk20a *c); int (*alloc_obj_ctx)(struct channel_gk20a *c, struct nvgpu_alloc_obj_ctx_args *args); int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, struct channel_gk20a *c, u64 zcull_va, u32 mode); int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, struct gr_zcull_info *zcull_params); bool (*is_tpc_addr)(struct gk20a *g, u32 addr); u32 (*get_tpc_num)(struct gk20a *g, u32 addr); bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr); bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr); bool (*get_lts_in_ltc_shared_base)(void); void (*split_lts_broadcast_addr)(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *priv_addr_table_index); void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *priv_addr_table_index); void (*detect_sm_arch)(struct gk20a *g); int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *color_val, u32 index); int (*add_zbc_depth)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *depth_val, u32 index); int (*add_zbc_s)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *s_val, u32 index); int (*zbc_set_table)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *zbc_val); int (*zbc_query_table)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_query_params *query_params); int (*zbc_s_query_table)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_query_params *query_params); int (*load_zbc_s_default_tbl)(struct gk20a *g, struct gr_gk20a *gr); int (*load_zbc_s_tbl)(struct gk20a *g, struct gr_gk20a *gr); void (*pmu_save_zbc)(struct gk20a *g, u32 entries); int (*add_zbc)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *zbc_val); bool (*add_zbc_type_s)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *zbc_val, int *ret_val); u32 (*pagepool_default_size)(struct gk20a *g); int (*init_ctx_state)(struct gk20a *g); int (*alloc_gr_ctx)(struct gk20a *g, struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, u32 class, u32 padding); void (*free_gr_ctx)(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); void (*update_ctxsw_preemption_mode)(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, struct nvgpu_mem *mem); int (*update_smpc_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, bool enable); int (*update_hwpm_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, bool enable); int (*dump_gr_regs)(struct gk20a *g, struct gk20a_debug_output *o); int (*update_pc_sampling)(struct channel_gk20a *ch, bool enable); u32 (*get_max_fbps_count)(struct gk20a *g); u32 (*get_fbp_en_mask)(struct gk20a *g); u32 (*get_max_ltc_per_fbp)(struct gk20a *g); u32 (*get_max_lts_per_ltc)(struct gk20a *g); u32* (*get_rop_l2_en_mask)(struct gk20a *g); void (*init_sm_dsm_reg_info)(void); int (*wait_empty)(struct gk20a *g, unsigned long duration_ms, u32 expect_delay); void (*init_cyclestats)(struct gk20a *g); void (*enable_cde_in_fecs)(struct gk20a *g, struct nvgpu_mem *mem); int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable); void (*bpt_reg_info)(struct gk20a *g, struct warpstate *w_state); void (*get_access_map)(struct gk20a *g, u32 **whitelist, int *num_entries); int (*handle_fecs_error)(struct gk20a *g, struct channel_gk20a *ch, struct gr_gk20a_isr_data *isr_data); int (*pre_process_sm_exception)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, bool sm_debugger_attached, struct channel_gk20a *fault_ch, bool *early_exit, bool *ignore_debugger); u32 (*get_sm_hww_warp_esr)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); u32 (*get_sm_hww_global_esr)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g); int (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors); int (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors); void (*clear_sm_hww)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr); void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel); int (*handle_tpc_sm_ecc_exception)(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr); int (*handle_sm_exception)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr); int (*handle_gcc_exception)(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr); int (*handle_tex_exception)(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event); int (*handle_gpc_gpccs_exception)(struct gk20a *g, u32 gpc, u32 gpc_exception); int (*handle_gpc_gpcmmu_exception)(struct gk20a *g, u32 gpc, u32 gpc_exception); void (*enable_gpc_exceptions)(struct gk20a *g); void (*enable_exceptions)(struct gk20a *g); void (*create_gr_sysfs)(struct device *dev); u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc); int (*update_sm_error_state)(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id, struct nvgpu_dbg_gpu_sm_error_state_record * sm_error_state); int (*clear_sm_error_state)(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id); int (*suspend_contexts)(struct gk20a *g, struct dbg_session_gk20a *dbg_s, int *ctx_resident_ch_fd); int (*resume_contexts)(struct gk20a *g, struct dbg_session_gk20a *dbg_s, int *ctx_resident_ch_fd); int (*set_preemption_mode)(struct channel_gk20a *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode); int (*get_preemption_mode_flags)(struct gk20a *g, struct nvgpu_preemption_modes_rec *preemption_modes_rec); int (*set_ctxsw_preemption_mode)(struct gk20a *g, struct gr_ctx_desc *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); void (*update_boosted_ctx)(struct gk20a *g, struct nvgpu_mem *mem, struct gr_ctx_desc *gr_ctx); int (*fuse_override)(struct gk20a *g); void (*init_sm_id_table)(struct gk20a *g); int (*load_smid_config)(struct gk20a *g); void (*program_sm_id_numbering)(struct gk20a *g, u32 gpc, u32 tpc, u32 smid); void (*program_active_tpc_counts)(struct gk20a *g, u32 gpc); int (*setup_rop_mapping)(struct gk20a *g, struct gr_gk20a *gr); int (*init_sw_veid_bundle)(struct gk20a *g); void (*program_zcull_mapping)(struct gk20a *g, u32 zcull_alloc_num, u32 *zcull_map_tiles); int (*commit_global_timeslice)(struct gk20a *g, struct channel_gk20a *c, bool patch); int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); void (*restore_context_header)(struct gk20a *g, struct nvgpu_mem *ctxheader); void (*write_zcull_ptr)(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); void (*write_pm_ptr)(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); void (*set_preemption_buffer_va)(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); void (*load_tpc_mask)(struct gk20a *g); int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); int (*trigger_suspend)(struct gk20a *g); int (*wait_for_pause)(struct gk20a *g, struct warpstate *w_state); int (*resume_from_pause)(struct gk20a *g); int (*clear_sm_errors)(struct gk20a *g); u32 (*tpc_enabled_exceptions)(struct gk20a *g); int (*set_czf_bypass)(struct gk20a *g, struct channel_gk20a *ch); bool (*sm_debugger_attached)(struct gk20a *g); void (*suspend_single_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors); void (*suspend_all_sms)(struct gk20a *g, u32 global_esr_mask, bool check_errors); void (*resume_single_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); void (*resume_all_sms)(struct gk20a *g); } gr; struct { void (*init_hw)(struct gk20a *g); void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr); void (*init_fs_state)(struct gk20a *g); void (*reset)(struct gk20a *g); void (*init_uncompressed_kind_map)(struct gk20a *g); void (*init_kind_attr)(struct gk20a *g); void (*set_mmu_page_size)(struct gk20a *g); bool (*set_use_full_comp_tag_line)(struct gk20a *g); unsigned int (*compression_page_size)(struct gk20a *g); unsigned int (*compressible_page_size)(struct gk20a *g); void (*dump_vpr_wpr_info)(struct gk20a *g); int (*vpr_info_fetch)(struct gk20a *g); bool (*is_debug_mode_enabled)(struct gk20a *g); void (*set_debug_mode)(struct gk20a *g, bool enable); void (*tlb_invalidate)(struct gk20a *g, struct nvgpu_mem *pdb); void (*hub_isr)(struct gk20a *g); } fb; struct { void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_ce2_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_chiplet_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_ctxsw_firmware_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_fb_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_fifo_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_gr_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_ltc_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_perf_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_priring_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_pmu_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod); void (*slcg_xbar_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_bus_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_ce_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_ctxsw_firmware_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_fb_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_fifo_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_gr_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_ltc_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_pwr_csb_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_pmu_load_gating_prod)(struct gk20a *g, bool prod); void (*blcg_xbar_load_gating_prod)(struct gk20a *g, bool prod); void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); } clock_gating; struct { int (*init_fifo_setup_hw)(struct gk20a *g); void (*bind_channel)(struct channel_gk20a *ch_gk20a); void (*unbind_channel)(struct channel_gk20a *ch_gk20a); void (*disable_channel)(struct channel_gk20a *ch); void (*enable_channel)(struct channel_gk20a *ch); int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch); void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch); int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base, u32 gpfifo_entries, unsigned long acquire_timeout, u32 flags); int (*resetup_ramfc)(struct channel_gk20a *c); int (*preempt_channel)(struct gk20a *g, u32 chid); int (*preempt_tsg)(struct gk20a *g, u32 tsgid); int (*update_runlist)(struct gk20a *g, u32 runlist_id, u32 chid, bool add, bool wait_for_finish); void (*trigger_mmu_fault)(struct gk20a *g, unsigned long engine_ids); void (*get_mmu_fault_info)(struct gk20a *g, u32 mmu_fault_id, struct mmu_fault_info *mmfault); void (*apply_pb_timeout)(struct gk20a *g); int (*wait_engine_idle)(struct gk20a *g); u32 (*get_num_fifos)(struct gk20a *g); u32 (*get_pbdma_signature)(struct gk20a *g); int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority); int (*set_runlist_interleave)(struct gk20a *g, u32 id, bool is_tsg, u32 runlist_id, u32 new_level); int (*channel_set_timeslice)(struct channel_gk20a *ch, u32 timeslice); int (*tsg_set_timeslice)(struct tsg_gk20a *tsg, u32 timeslice); u32 (*default_timeslice_us)(struct gk20a *); int (*force_reset_ch)(struct channel_gk20a *ch, u32 err_code, bool verbose); int (*engine_enum_from_type)(struct gk20a *g, u32 engine_type, u32 *inst_id); void (*device_info_data_parse)(struct gk20a *g, u32 table_entry, u32 *inst_id, u32 *pri_base, u32 *fault_id); u32 (*device_info_fault_id)(u32 table_entry); int (*tsg_bind_channel)(struct tsg_gk20a *tsg, struct channel_gk20a *ch); int (*tsg_unbind_channel)(struct channel_gk20a *ch); int (*tsg_open)(struct tsg_gk20a *tsg); u32 (*eng_runlist_base_size)(void); int (*init_engine_info)(struct fifo_gk20a *f); u32 (*runlist_entry_size)(void); void (*get_tsg_runlist_entry)(struct tsg_gk20a *tsg, u32 *runlist); void (*get_ch_runlist_entry)(struct channel_gk20a *ch, u32 *runlist); u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch); void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch); u64 (*userd_pb_get)(struct gk20a *g, struct channel_gk20a *ch); void (*free_channel_ctx_header)(struct channel_gk20a *ch); bool (*is_fault_engine_subid_gpc)(struct gk20a *g, u32 engine_subid); void (*dump_pbdma_status)(struct gk20a *g, struct gk20a_debug_output *o); void (*dump_eng_status)(struct gk20a *g, struct gk20a_debug_output *o); void (*dump_channel_status_ramfc)(struct gk20a *g, struct gk20a_debug_output *o, u32 chid, struct ch_state *ch_state); u32 (*intr_0_error_mask)(struct gk20a *g); int (*is_preempt_pending)(struct gk20a *g, u32 id, unsigned int id_type, unsigned int timeout_rc_type); int (*preempt_ch_tsg)(struct gk20a *g, u32 id, unsigned int id_type, unsigned int timeout_rc_type); void (*init_pbdma_intr_descs)(struct fifo_gk20a *f); int (*reset_enable_hw)(struct gk20a *g); int (*setup_userd)(struct channel_gk20a *c); u32 (*pbdma_acquire_val)(u64 timeout); void (*teardown_ch_tsg)(struct gk20a *g, u32 act_eng_bitmask, u32 id, unsigned int id_type, unsigned int rc_type, struct mmu_fault_info *mmfault); bool (*handle_sched_error)(struct gk20a *g); bool (*handle_ctxsw_timeout)(struct gk20a *g, u32 fifo_intr); unsigned int (*handle_pbdma_intr_0)(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_0, u32 *handled, u32 *error_notifier); unsigned int (*handle_pbdma_intr_1)(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_1, u32 *handled, u32 *error_notifier); void (*init_eng_method_buffers)(struct gk20a *g, struct tsg_gk20a *tsg); void (*deinit_eng_method_buffers)(struct gk20a *g, struct tsg_gk20a *tsg); #ifdef CONFIG_TEGRA_GK20A_NVHOST int (*alloc_syncpt_buf)(struct channel_gk20a *c, u32 syncpt_id, struct nvgpu_mem *syncpt_buf); void (*free_syncpt_buf)(struct channel_gk20a *c, struct nvgpu_mem *syncpt_buf); void (*add_syncpt_wait_cmd)(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, u32 id, u32 thresh, u64 gpu_va); u32 (*get_syncpt_wait_cmd_size)(void); void (*add_syncpt_incr_cmd)(struct gk20a *g, bool wfi_cmd, struct priv_cmd_entry *cmd, u32 id, u64 gpu_va); u32 (*get_syncpt_incr_cmd_size)(bool wfi_cmd); #endif } fifo; struct pmu_v { u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu); void (*set_pmu_cmdline_args_cpu_freq)(struct nvgpu_pmu *pmu, u32 freq); void (*set_pmu_cmdline_args_trace_size)(struct nvgpu_pmu *pmu, u32 size); void (*set_pmu_cmdline_args_trace_dma_base)( struct nvgpu_pmu *pmu); void (*set_pmu_cmdline_args_trace_dma_idx)( struct nvgpu_pmu *pmu, u32 idx); void * (*get_pmu_cmdline_args_ptr)(struct nvgpu_pmu *pmu); u32 (*get_pmu_allocation_struct_size)(struct nvgpu_pmu *pmu); void (*set_pmu_allocation_ptr)(struct nvgpu_pmu *pmu, void **pmu_alloc_ptr, void *assign_ptr); void (*pmu_allocation_set_dmem_size)(struct nvgpu_pmu *pmu, void *pmu_alloc_ptr, u16 size); u16 (*pmu_allocation_get_dmem_size)(struct nvgpu_pmu *pmu, void *pmu_alloc_ptr); u32 (*pmu_allocation_get_dmem_offset)(struct nvgpu_pmu *pmu, void *pmu_alloc_ptr); u32 * (*pmu_allocation_get_dmem_offset_addr)( struct nvgpu_pmu *pmu, void *pmu_alloc_ptr); void (*pmu_allocation_set_dmem_offset)(struct nvgpu_pmu *pmu, void *pmu_alloc_ptr, u32 offset); void * (*pmu_allocation_get_fb_addr)( struct nvgpu_pmu *pmu, void *pmu_alloc_ptr); u32 (*pmu_allocation_get_fb_size)( struct nvgpu_pmu *pmu, void *pmu_alloc_ptr); void (*get_pmu_init_msg_pmu_queue_params)( struct pmu_queue *queue, u32 id, void *pmu_init_msg); void *(*get_pmu_msg_pmu_init_msg_ptr)( struct pmu_init_msg *init); u16 (*get_pmu_init_msg_pmu_sw_mg_off)( union pmu_init_msg_pmu *init_msg); u16 (*get_pmu_init_msg_pmu_sw_mg_size)( union pmu_init_msg_pmu *init_msg); u32 (*get_pmu_perfmon_cmd_start_size)(void); int (*get_perfmon_cmd_start_offsetofvar)( enum pmu_perfmon_cmd_start_fields field); void (*perfmon_start_set_cmd_type)(struct pmu_perfmon_cmd *pc, u8 value); void (*perfmon_start_set_group_id)(struct pmu_perfmon_cmd *pc, u8 value); void (*perfmon_start_set_state_id)(struct pmu_perfmon_cmd *pc, u8 value); void (*perfmon_start_set_flags)(struct pmu_perfmon_cmd *pc, u8 value); u8 (*perfmon_start_get_flags)(struct pmu_perfmon_cmd *pc); u32 (*get_pmu_perfmon_cmd_init_size)(void); int (*get_perfmon_cmd_init_offsetofvar)( enum pmu_perfmon_cmd_start_fields field); void (*perfmon_cmd_init_set_sample_buffer)( struct pmu_perfmon_cmd *pc, u16 value); void (*perfmon_cmd_init_set_dec_cnt)( struct pmu_perfmon_cmd *pc, u8 value); void (*perfmon_cmd_init_set_base_cnt_id)( struct pmu_perfmon_cmd *pc, u8 value); void (*perfmon_cmd_init_set_samp_period_us)( struct pmu_perfmon_cmd *pc, u32 value); void (*perfmon_cmd_init_set_num_cnt)(struct pmu_perfmon_cmd *pc, u8 value); void (*perfmon_cmd_init_set_mov_avg)(struct pmu_perfmon_cmd *pc, u8 value); void *(*get_pmu_seq_in_a_ptr)( struct pmu_sequence *seq); void *(*get_pmu_seq_out_a_ptr)( struct pmu_sequence *seq); void (*set_pmu_cmdline_args_secure_mode)(struct nvgpu_pmu *pmu, u32 val); u32 (*get_perfmon_cntr_sz)(struct nvgpu_pmu *pmu); void * (*get_perfmon_cntr_ptr)(struct nvgpu_pmu *pmu); void (*set_perfmon_cntr_ut)(struct nvgpu_pmu *pmu, u16 ut); void (*set_perfmon_cntr_lt)(struct nvgpu_pmu *pmu, u16 lt); void (*set_perfmon_cntr_valid)(struct nvgpu_pmu *pmu, u8 val); void (*set_perfmon_cntr_index)(struct nvgpu_pmu *pmu, u8 val); void (*set_perfmon_cntr_group_id)(struct nvgpu_pmu *pmu, u8 gid); u8 (*pg_cmd_eng_buf_load_size)(struct pmu_pg_cmd *pg); void (*pg_cmd_eng_buf_load_set_cmd_type)(struct pmu_pg_cmd *pg, u8 value); void (*pg_cmd_eng_buf_load_set_engine_id)(struct pmu_pg_cmd *pg, u8 value); void (*pg_cmd_eng_buf_load_set_buf_idx)(struct pmu_pg_cmd *pg, u8 value); void (*pg_cmd_eng_buf_load_set_pad)(struct pmu_pg_cmd *pg, u8 value); void (*pg_cmd_eng_buf_load_set_buf_size)(struct pmu_pg_cmd *pg, u16 value); void (*pg_cmd_eng_buf_load_set_dma_base)(struct pmu_pg_cmd *pg, u32 value); void (*pg_cmd_eng_buf_load_set_dma_offset)(struct pmu_pg_cmd *pg, u8 value); void (*pg_cmd_eng_buf_load_set_dma_idx)(struct pmu_pg_cmd *pg, u8 value); /*used for change of enum zbc update cmd id from ver 0 to ver1*/ u32 cmd_id_zbc_table_update; bool is_pmu_zbc_save_supported; } pmu_ver; struct { int (*get_netlist_name)(struct gk20a *g, int index, char *name); bool (*is_fw_defined)(void); bool use_dma_for_fw_bootstrap; } gr_ctx; struct { int (*init)(struct gk20a *g); int (*max_entries)(struct gk20a *, struct nvgpu_ctxsw_trace_filter *); int (*flush)(struct gk20a *g); int (*poll)(struct gk20a *g); int (*enable)(struct gk20a *g); int (*disable)(struct gk20a *g); bool (*is_enabled)(struct gk20a *g); int (*reset)(struct gk20a *g); int (*bind_channel)(struct gk20a *, struct channel_gk20a *); int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); int (*deinit)(struct gk20a *g); int (*alloc_user_buffer)(struct gk20a *g, void **buf, size_t *size); int (*free_user_buffer)(struct gk20a *g); int (*mmap_user_buffer)(struct gk20a *g, struct vm_area_struct *vma); int (*set_filter)(struct gk20a *g, struct nvgpu_ctxsw_trace_filter *filter); } fecs_trace; struct { bool (*support_sparse)(struct gk20a *g); u64 (*gmmu_map)(struct vm_gk20a *vm, u64 map_offset, struct sg_table *sgt, u64 buffer_offset, u64 size, int pgsz_idx, u8 kind_v, u32 ctag_offset, u32 flags, int rw_flag, bool clear_ctags, bool sparse, bool priv, struct vm_gk20a_mapping_batch *batch, enum nvgpu_aperture aperture); void (*gmmu_unmap)(struct vm_gk20a *vm, u64 vaddr, u64 size, int pgsz_idx, bool va_allocated, int rw_flag, bool sparse, struct vm_gk20a_mapping_batch *batch); int (*vm_bind_channel)(struct gk20a_as_share *as_share, struct channel_gk20a *ch); int (*fb_flush)(struct gk20a *g); void (*l2_invalidate)(struct gk20a *g); void (*l2_flush)(struct gk20a *g, bool invalidate); void (*cbc_clean)(struct gk20a *g); void (*set_big_page_size)(struct gk20a *g, struct nvgpu_mem *mem, int size); u32 (*get_big_page_sizes)(void); u32 (*get_default_big_page_size)(void); u32 (*get_physical_addr_bits)(struct gk20a *g); int (*init_mm_setup_hw)(struct gk20a *g); bool (*is_bar1_supported)(struct gk20a *g); int (*init_bar2_vm)(struct gk20a *g); int (*init_bar2_mm_hw_setup)(struct gk20a *g); void (*remove_bar2_vm)(struct gk20a *g); const struct gk20a_mmu_level * (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); void (*init_pdb)(struct gk20a *g, struct nvgpu_mem *inst_block, struct vm_gk20a *vm); u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, u32 flags); size_t (*get_vidmem_size)(struct gk20a *g); void (*init_inst_block)(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); bool (*mmu_fault_pending)(struct gk20a *g); /* This function is called to allocate secure memory (memory * that the CPU cannot see). The function should fill the * context buffer descriptor (especially fields destroy, sgt, * size). */ int (*secure_alloc)(struct gk20a *g, struct gr_ctx_buffer_desc *desc, size_t size); } mm; struct { u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, struct page_alloc_chunk *chunk, u32 w); void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, struct page_alloc_chunk *chunk); u32 (*data032_r)(u32 i); } pramin; struct { int (*init_therm_setup_hw)(struct gk20a *g); int (*elcg_init_idle_filters)(struct gk20a *g); #ifdef CONFIG_DEBUG_FS void (*therm_debugfs_init)(struct gk20a *g); #endif int (*get_internal_sensor_curr_temp)(struct gk20a *g, u32 *temp_f24_8); void (*get_internal_sensor_limits)(s32 *max_24_8, s32 *min_24_8); u32 (*configure_therm_alert)(struct gk20a *g, s32 curr_warn_temp); } therm; struct { bool (*is_pmu_supported)(struct gk20a *g); int (*prepare_ucode)(struct gk20a *g); int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g); int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu); int (*pmu_setup_elpg)(struct gk20a *g); u32 (*pmu_get_queue_head)(u32 i); u32 (*pmu_get_queue_head_size)(void); u32 (*pmu_get_queue_tail_size)(void); u32 (*pmu_get_queue_tail)(u32 i); int (*pmu_queue_head)(struct nvgpu_pmu *pmu, struct pmu_queue *queue, u32 *head, bool set); int (*pmu_queue_tail)(struct nvgpu_pmu *pmu, struct pmu_queue *queue, u32 *tail, bool set); void (*pmu_msgq_tail)(struct nvgpu_pmu *pmu, u32 *tail, bool set); u32 (*pmu_mutex_size)(void); int (*pmu_mutex_acquire)(struct nvgpu_pmu *pmu, u32 id, u32 *token); int (*pmu_mutex_release)(struct nvgpu_pmu *pmu, u32 id, u32 *token); int (*init_wpr_region)(struct gk20a *g); int (*load_lsfalcon_ucode)(struct gk20a *g, u32 falconidmask); void (*write_dmatrfbase)(struct gk20a *g, u32 addr); void (*pmu_elpg_statistics)(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data); int (*pmu_pg_init_param)(struct gk20a *g, u32 pg_engine_id); int (*pmu_pg_set_sub_feature_mask)(struct gk20a *g, u32 pg_engine_id); u32 (*pmu_pg_supported_engines_list)(struct gk20a *g); u32 (*pmu_pg_engines_feature_list)(struct gk20a *g, u32 pg_engine_id); bool (*pmu_is_lpwr_feature_supported)(struct gk20a *g, u32 feature_id); int (*pmu_lpwr_enable_pg)(struct gk20a *g, bool pstate_lock); int (*pmu_lpwr_disable_pg)(struct gk20a *g, bool pstate_lock); u32 (*pmu_pg_param_post_init)(struct gk20a *g); void (*dump_secure_fuses)(struct gk20a *g); int (*reset_engine)(struct gk20a *g, bool do_reset); bool (*is_engine_in_reset)(struct gk20a *g); int (*falcon_wait_for_halt)(struct gk20a *g, unsigned int timeout); int (*falcon_clear_halt_interrupt_status)(struct gk20a *g, unsigned int timeout); int (*init_falcon_setup_hw)(struct gk20a *g, void *desc, u32 bl_sz); bool (*is_lazy_bootstrap)(u32 falcon_id); bool (*is_priv_load)(u32 falcon_id); void (*get_wpr)(struct gk20a *g, struct wpr_carveout_info *inf); int (*alloc_blob_space)(struct gk20a *g, size_t size, struct nvgpu_mem *mem); int (*pmu_populate_loader_cfg)(struct gk20a *g, void *lsfm, u32 *p_bl_gen_desc_size); int (*flcn_populate_bl_dmem_desc)(struct gk20a *g, void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid); void (*handle_ext_irq)(struct gk20a *g, u32 intr); void (*set_irqmask)(struct gk20a *g); u32 lspmuwprinitdone; u32 lsfloadedfalconid; bool fecsbootstrapdone; } pmu; struct { int (*init_debugfs)(struct gk20a *g); void (*disable_slowboot)(struct gk20a *g); int (*init_clk_support)(struct gk20a *g); int (*suspend_clk_support)(struct gk20a *g); u32 (*get_crystal_clk_hz)(struct gk20a *g); unsigned long (*measure_freq)(struct gk20a *g, u32 api_domain); unsigned long (*get_rate)(struct gk20a *g, u32 api_domain); int (*set_rate)(struct gk20a *g, u32 api_domain, unsigned long rate); unsigned long (*get_fmax_at_vmin_safe)(struct clk_gk20a *clk); u32 (*get_ref_clock_rate)(struct gk20a *g); int (*predict_mv_at_hz_cur_tfloor)(struct clk_gk20a *clk, unsigned long rate); unsigned long (*get_maxrate)(struct clk_gk20a *clk); int (*prepare_enable)(struct clk_gk20a *clk); void (*disable_unprepare)(struct clk_gk20a *clk); int (*get_voltage)(struct clk_gk20a *clk, u64 *val); int (*get_gpcclk_clock_counter)(struct clk_gk20a *clk, u64 *val); int (*pll_reg_write)(struct gk20a *g, u32 reg, u32 val); int (*get_pll_debug_data)(struct gk20a *g, struct nvgpu_clk_pll_debug_data *d); int (*mclk_init)(struct gk20a *g); void (*mclk_deinit)(struct gk20a *g); int (*mclk_change)(struct gk20a *g, u16 val); } clk; struct { u32 (*get_arbiter_clk_domains)(struct gk20a *g); int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain, u16 *min_mhz, u16 *max_mhz); int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain, u16 *default_mhz); /* This function is inherently unsafe to call while * arbiter is running arbiter must be blocked * before calling this function */ int (*get_current_pstate)(struct gk20a *g); } clk_arb; struct { int (*handle_pmu_perf_event)(struct gk20a *g, void *pmu_msg); } perf; struct { const struct regop_offset_range* ( *get_global_whitelist_ranges)(void); int (*get_global_whitelist_ranges_count)(void); const struct regop_offset_range* ( *get_context_whitelist_ranges)(void); int (*get_context_whitelist_ranges_count)(void); const u32* (*get_runcontrol_whitelist)(void); int (*get_runcontrol_whitelist_count)(void); const struct regop_offset_range* ( *get_runcontrol_whitelist_ranges)(void); int (*get_runcontrol_whitelist_ranges_count)(void); const u32* (*get_qctl_whitelist)(void); int (*get_qctl_whitelist_count)(void); const struct regop_offset_range* ( *get_qctl_whitelist_ranges)(void); int (*get_qctl_whitelist_ranges_count)(void); int (*apply_smpc_war)(struct dbg_session_gk20a *dbg_s); } regops; struct { void (*intr_enable)(struct gk20a *g); void (*intr_unit_config)(struct gk20a *g, bool enable, bool is_stalling, u32 unit); void (*isr_stall)(struct gk20a *g); bool (*is_intr_hub_pending)(struct gk20a *g, u32 mc_intr); u32 (*intr_stall)(struct gk20a *g); void (*intr_stall_pause)(struct gk20a *g); void (*intr_stall_resume)(struct gk20a *g); u32 (*intr_nonstall)(struct gk20a *g); void (*intr_nonstall_pause)(struct gk20a *g); void (*intr_nonstall_resume)(struct gk20a *g); void (*enable)(struct gk20a *g, u32 units); void (*disable)(struct gk20a *g, u32 units); void (*reset)(struct gk20a *g, u32 units); u32 (*boot_0)(struct gk20a *g, u32 *arch, u32 *impl, u32 *rev); bool (*is_intr1_pending)(struct gk20a *g, enum nvgpu_unit unit, u32 mc_intr_1); u32 intr_mask_restore[4]; } mc; struct { void (*show_dump)(struct gk20a *g, struct gk20a_debug_output *o); } debug; struct { int (*exec_reg_ops)(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_reg_op *ops, u64 num_ops); int (*dbg_set_powergate)(struct dbg_session_gk20a *dbg_s, u32 mode); bool (*check_and_set_global_reservation)( struct dbg_session_gk20a *dbg_s, struct dbg_profiler_object_data *prof_obj); bool (*check_and_set_context_reservation)( struct dbg_session_gk20a *dbg_s, struct dbg_profiler_object_data *prof_obj); void (*release_profiler_reservation)( struct dbg_session_gk20a *dbg_s, struct dbg_profiler_object_data *prof_obj); int (*perfbuffer_enable)(struct gk20a *g, u64 offset, u32 size); int (*perfbuffer_disable)(struct gk20a *g); } dbg_session_ops; struct { void (*get_program_numbers)(struct gk20a *g, u32 block_height_log2, int *hprog, int *vprog); bool (*need_scatter_buffer)(struct gk20a *g); int (*populate_scatter_buffer)(struct gk20a *g, struct sg_table *sgt, size_t surface_size, void *scatter_buffer_ptr, size_t scatter_buffer_size); } cde; int (*get_litter_value)(struct gk20a *g, int value); int (*chip_init_gpu_characteristics)(struct gk20a *g); struct { void (*init_hw)(struct gk20a *g); void (*isr)(struct gk20a *g); int (*read_ptimer)(struct gk20a *g, u64 *value); int (*get_timestamps_zipper)(struct gk20a *g, u32 source_id, u32 count, struct nvgpu_cpu_time_correlation_sample *); int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst); } bus; int (*bios_init)(struct gk20a *g); #if defined(CONFIG_GK20A_CYCLE_STATS) struct { int (*enable_snapshot)(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *client); void (*disable_snapshot)(struct gr_gk20a *gr); int (*check_data_available)(struct channel_gk20a *ch, u32 *pending, bool *hw_overflow); void (*set_handled_snapshots)(struct gk20a *g, u32 num); u32 (*allocate_perfmon_ids)(struct gk20a_cs_snapshot *data, u32 count); u32 (*release_perfmon_ids)(struct gk20a_cs_snapshot *data, u32 start, u32 count); int (*detach_snapshot)(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *client); } css; #endif struct { int (*sw_init)(struct device *dev); int (*get_speed)(struct gk20a *g, u32 *xve_link_speed); int (*set_speed)(struct gk20a *g, u32 xve_link_speed); void (*available_speeds)(struct gk20a *g, u32 *speed_mask); u32 (*xve_readl)(struct gk20a *g, u32 reg); void (*xve_writel)(struct gk20a *g, u32 reg, u32 val); void (*disable_aspm)(struct gk20a *g); void (*reset_gpu)(struct gk20a *g); #if defined(CONFIG_PCI_MSI) void (*rearm_msi)(struct gk20a *g); #endif void (*enable_shadow_rom)(struct gk20a *g); void (*disable_shadow_rom)(struct gk20a *g); } xve; struct { void (*falcon_hal_sw_init)(struct nvgpu_falcon *flcn); } falcon; struct { void (*isr)(struct gk20a *g); } priv_ring; bool privsecurity; bool securegpccs; bool pmupstate; }; struct nvgpu_bios_ucode { u8 *bootloader; u32 bootloader_phys_base; u32 bootloader_size; u8 *ucode; u32 phys_base; u32 size; u8 *dmem; u32 dmem_phys_base; u32 dmem_size; u32 code_entry_point; }; struct nvgpu_bios { u8 *data; size_t size; struct nvgpu_bios_ucode devinit; struct nvgpu_bios_ucode preos; u8 *devinit_tables; u32 devinit_tables_size; u8 *bootscripts; u32 bootscripts_size; u8 mem_strap_data_count; u16 mem_strap_xlat_tbl_ptr; u32 condition_table_ptr; u32 devinit_tables_phys_base; u32 devinit_script_phys_base; struct bit_token *perf_token; struct bit_token *clock_token; struct bit_token *virt_token; u32 expansion_rom_offset; }; struct gk20a { struct device *dev; struct nvgpu_nvhost_dev *nvhost_dev; /* * Used by . Do not access directly! */ unsigned long *enabled_flags; atomic_t usage_count; atomic_t nonstall_ops; struct work_struct nonstall_fn_work; struct workqueue_struct *nonstall_work_queue; struct kref refcount; struct resource *reg_mem; void __iomem *regs; void __iomem *regs_saved; struct resource *bar1_mem; void __iomem *bar1; void __iomem *bar1_saved; const char *name; bool gpu_reset_done; bool power_on; bool suspended; u32 log_mask; u32 log_trace; struct rw_semaphore busy_lock; /* * Guards access to hardware when usual gk20a_{busy,idle} are skipped * for submits and held for channel lifetime but dropped for an ongoing * gk20a_do_idle(). */ struct rw_semaphore deterministic_busy; struct nvgpu_falcon pmu_flcn; struct nvgpu_falcon sec2_flcn; struct nvgpu_falcon fecs_flcn; struct nvgpu_falcon gpccs_flcn; struct clk_gk20a clk; struct fifo_gk20a fifo; struct gr_gk20a gr; struct sim_gk20a sim; struct mm_gk20a mm; struct nvgpu_pmu pmu; struct acr_desc acr; struct ecc_gk20a ecc; #ifdef CONFIG_ARCH_TEGRA_18x_SOC struct clk_pmupstate clk_pmu; struct perf_pmupstate perf_pmu; struct pmgr_pmupstate pmgr_pmu; struct therm_pmupstate therm_pmu; #endif #ifdef CONFIG_DEBUG_FS struct railgate_stats pstats; #endif u32 gr_idle_timeout_default; #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) u32 timeouts_enabled; #else bool timeouts_enabled; #endif unsigned int ch_wdt_timeout_ms; struct nvgpu_mutex poweron_lock; struct nvgpu_mutex poweroff_lock; /* Channel priorities */ u32 timeslice_low_priority_us; u32 timeslice_medium_priority_us; u32 timeslice_high_priority_us; u32 min_timeslice_us; u32 max_timeslice_us; #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) u32 runlist_interleave; #else bool runlist_interleave; #endif bool slcg_enabled; bool blcg_enabled; bool elcg_enabled; bool elpg_enabled; bool aelpg_enabled; bool can_elpg; bool mscg_enabled; bool forced_idle; bool forced_reset; bool allow_all; u32 default_pri_timeout; u32 ptimer_src_freq; bool can_railgate; bool user_railgate_disabled; int railgate_delay; unsigned int aggressive_sync_destroy_thresh; bool aggressive_sync_destroy; bool has_syncpoints; /* Debugfs knob for forcing syncpt support off in runtime. */ u32 disable_syncpoints; bool support_pmu; bool is_virtual; u32 emc3d_ratio; #ifdef CONFIG_DEBUG_FS struct nvgpu_spinlock debugfs_lock; struct dentry *debugfs_ltc_enabled; struct dentry *debugfs_timeouts_enabled; struct dentry *debugfs_gr_idle_timeout_default; struct dentry *debugfs_bypass_smmu; struct dentry *debugfs_disable_bigpage; struct dentry *debugfs_gr_default_attrib_cb_size; struct dentry *debugfs_timeslice_low_priority_us; struct dentry *debugfs_timeslice_medium_priority_us; struct dentry *debugfs_timeslice_high_priority_us; struct dentry *debugfs_runlist_interleave; struct dentry *debugfs_allocators; struct dentry *debugfs_xve; struct dentry *debugfs_kmem; #endif struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; /* * A group of semaphore pools. One for each channel. */ struct nvgpu_semaphore_sea *sema_sea; /* List of pending SW semaphore waits. */ struct nvgpu_list_node pending_sema_waits; struct nvgpu_raw_spinlock pending_sema_waits_lock; /* held while manipulating # of debug/profiler sessions present */ /* also prevents debug sessions from attaching until released */ struct nvgpu_mutex dbg_sessions_lock; int dbg_powergating_disabled_refcount; /*refcount for pg disable */ int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ /* must have dbg_sessions_lock before use */ struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; u32 dbg_regops_tmp_buf_ops; /* For perfbuf mapping */ struct { struct dbg_session_gk20a *owner; u64 offset; } perfbuf; /* For profiler reservations */ struct nvgpu_list_node profiler_objects; bool global_profiler_reservation_held; int profiler_reservation_count; void (*remove_support)(struct gk20a *); u64 pg_ingating_time_us; u64 pg_ungating_time_us; u32 pg_gating_cnt; struct nvgpu_spinlock mc_enable_lock; struct nvgpu_gpu_characteristics gpu_characteristics; struct gk20a_as as; struct nvgpu_mutex client_lock; int client_refcount; /* open channels and ctrl nodes */ struct gpu_ops ops; int irqs_enabled; int irq_stall; /* can be same as irq_nonstall in case of PCI */ int irq_nonstall; u32 max_ltc_count; u32 ltc_count; atomic_t hw_irq_stall_count; atomic_t hw_irq_nonstall_count; atomic_t sw_irq_stall_last_handled; wait_queue_head_t sw_irq_stall_last_handled_wq; atomic_t sw_irq_nonstall_last_handled; wait_queue_head_t sw_irq_nonstall_last_handled_wq; struct gk20a_channel_worker { struct nvgpu_thread poll_task; atomic_t put; struct nvgpu_cond wq; struct nvgpu_list_node items; struct nvgpu_spinlock items_lock; } channel_worker; struct gk20a_scale_profile *scale_profile; unsigned long last_freq; struct gk20a_ctxsw_trace *ctxsw_trace; struct gk20a_fecs_trace *fecs_trace; struct gk20a_sched_ctrl sched_ctrl; struct gk20a_cde_app cde_app; bool mmu_debug_ctrl; u32 tpc_fs_mask_user; struct nvgpu_bios bios; #ifdef CONFIG_DEBUG_FS struct debugfs_blob_wrapper bios_blob; #endif struct nvgpu_clk_arb *clk_arb; struct gk20a_ce_app ce_app; /* PCI device identifier */ u16 pci_vendor_id, pci_device_id; u16 pci_subsystem_vendor_id, pci_subsystem_device_id; u16 pci_class; u8 pci_revision; /* PCIe power states. */ bool xve_l0s; bool xve_l1; /* Current warning temp in sfxp24.8 */ s32 curr_warn_temp; #if defined(CONFIG_PCI_MSI) /* Check if msi is enabled */ bool msi_enabled; #endif #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE struct nvgpu_mem_alloc_tracker *vmallocs; struct nvgpu_mem_alloc_tracker *kmallocs; #endif /* Some boards might be missing power sensor, preventing * from monitoring power, current and voltage */ bool power_sensor_missing; /* memory training sequence and mclk switch scripts */ u32 mem_config_idx; #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) phys_addr_t syncpt_unit_base; size_t syncpt_unit_size; u32 syncpt_size; #endif }; static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) { return g->timeouts_enabled ? g->gr_idle_timeout_default : MAX_SCHEDULE_TIMEOUT; } enum BAR0_DEBUG_OPERATION { BARO_ZERO_NOP = 0, OP_END = 'DONE', BAR0_READ32 = '0R32', BAR0_WRITE32 = '0W32', }; struct share_buffer_head { enum BAR0_DEBUG_OPERATION operation; /* size of the operation item */ u32 size; u32 completed; u32 failed; u64 context; u64 completion_callback; }; struct gk20a_cyclestate_buffer_elem { struct share_buffer_head head; /* in */ u64 p_data; u64 p_done; u32 offset_bar0; u16 first_bit; u16 last_bit; /* out */ /* keep 64 bits to be consistent */ u64 data; }; /* operations that will need to be executed on non stall workqueue */ enum gk20a_nonstall_ops { gk20a_nonstall_ops_wakeup_semaphore = BIT(0), /* wake up semaphore */ gk20a_nonstall_ops_post_events = BIT(1), }; /* register accessors */ int gk20a_lockout_registers(struct gk20a *g); int gk20a_restore_registers(struct gk20a *g); void __nvgpu_check_gpu_state(struct gk20a *g); void __gk20a_warn_on_no_regs(void); static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) { if (unlikely(!g->regs)) { __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); } else { writel_relaxed(v, g->regs + r); wmb(); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); } } static inline u32 gk20a_readl(struct gk20a *g, u32 r) { u32 v = 0xffffffff; if (unlikely(!g->regs)) { __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); } else { v = readl(g->regs + r); if (v == 0xffffffff) __nvgpu_check_gpu_state(g); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); } return v; } static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v) { if (unlikely(!g->regs)) { __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); } else { wmb(); do { writel_relaxed(v, g->regs + r); } while (readl(g->regs + r) != v); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); } } static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v) { if (unlikely(!g->bar1)) { __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); } else { wmb(); writel_relaxed(v, g->bar1 + b); gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); } } static inline u32 gk20a_bar1_readl(struct gk20a *g, u32 b) { u32 v = 0xffffffff; if (unlikely(!g->bar1)) { __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); } else { v = readl(g->bar1 + b); gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); } return v; } /* convenience */ static inline struct gk20a *gk20a_from_as(struct gk20a_as *as) { return container_of(as, struct gk20a, as); } static inline struct gk20a *gk20a_from_pmu(struct nvgpu_pmu *pmu) { return container_of(pmu, struct gk20a, pmu); } static inline u32 u64_hi32(u64 n) { return (u32)((n >> 32) & ~(u32)0); } static inline u32 u64_lo32(u64 n) { return (u32)(n & ~(u32)0); } static inline u64 hi32_lo32_to_u64(u32 hi, u32 lo) { return (((u64)hi) << 32) | (u64)lo; } static inline u32 set_field(u32 val, u32 mask, u32 field) { return ((val & ~mask) | field); } static inline u32 get_field(u32 reg, u32 mask) { return (reg & mask); } /* invalidate channel lookup tlb */ static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr) { nvgpu_spinlock_acquire(&gr->ch_tlb_lock); memset(gr->chid_tlb, 0, sizeof(struct gr_channel_map_tlb_entry) * GR_CHANNEL_MAP_TLB_SIZE); nvgpu_spinlock_release(&gr->ch_tlb_lock); } /* classes that the device supports */ /* TBD: get these from an open-sourced SDK? */ enum { KEPLER_C = 0xA297, FERMI_TWOD_A = 0x902D, KEPLER_COMPUTE_A = 0xA0C0, KEPLER_INLINE_TO_MEMORY_A = 0xA040, KEPLER_DMA_COPY_A = 0xA0B5, KEPLER_CHANNEL_GPFIFO_C = 0xA26F, }; #define GK20A_BAR0_IORESOURCE_MEM 0 #define GK20A_BAR1_IORESOURCE_MEM 1 #define GK20A_SIM_IORESOURCE_MEM 2 void gk20a_busy_noresume(struct gk20a *g); void gk20a_idle_nosuspend(struct gk20a *g); int __must_check gk20a_busy(struct gk20a *g); void gk20a_idle(struct gk20a *g); int __gk20a_do_idle(struct gk20a *g, bool force_reset); int __gk20a_do_unidle(struct gk20a *g); int gk20a_can_busy(struct gk20a *g); void gk20a_driver_start_unload(struct gk20a *g); int gk20a_wait_for_idle(struct device *dev); #define NVGPU_GPU_ARCHITECTURE_SHIFT 4 /* constructs unique and compact GPUID from nvgpu_gpu_characteristics * arch/impl fields */ #define GK20A_GPUID(arch, impl) ((u32) ((arch) | (impl))) #define GK20A_GPUID_GK20A \ GK20A_GPUID(NVGPU_GPU_ARCH_GK100, NVGPU_GPU_IMPL_GK20A) #define GK20A_GPUID_GM20B \ GK20A_GPUID(NVGPU_GPU_ARCH_GM200, NVGPU_GPU_IMPL_GM20B) #define NVGPU_GPUID_GP10B \ GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP10B) #define GK20A_GPUID_GM20B_B \ GK20A_GPUID(NVGPU_GPU_ARCH_GM200, NVGPU_GPU_IMPL_GM20B_B) #define NVGPU_GPUID_GP104 \ GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP104) #define NVGPU_GPUID_GP106 \ GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP106) int gk20a_init_gpu_characteristics(struct gk20a *g); int gk20a_user_init(struct device *dev, const char *interface_name, struct class *class); void gk20a_user_deinit(struct device *dev, struct class *class); static inline u32 ptimer_scalingfactor10x(u32 ptimer_src_freq) { return (u32)(((u64)(PTIMER_REF_FREQ_HZ * 10)) / ptimer_src_freq); } static inline u32 scale_ptimer(u32 timeout , u32 scale10x) { if (((timeout*10) % scale10x) >= (scale10x/2)) return ((timeout * 10) / scale10x) + 1; else return (timeout * 10) / scale10x; } int gk20a_prepare_poweroff(struct gk20a *g); int gk20a_finalize_poweron(struct gk20a *g); void nvgpu_wait_for_deferred_interrupts(struct gk20a *g); struct gk20a * __must_check gk20a_get(struct gk20a *g); void gk20a_put(struct gk20a *g); static inline bool gk20a_platform_has_syncpoints(struct gk20a *g) { #ifdef CONFIG_TEGRA_GK20A_NVHOST return g->has_syncpoints && !g->disable_syncpoints; #else return false; #endif } #endif /* GK20A_H */