summaryrefslogblamecommitdiffstats
path: root/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
blob: 56e79f5a488ddec157d2cf475ce3fe67b101d372 (plain) (tree)
1
2
3
4
5
6
7
8
9
  
                                                                     
  





                                                                             
  









                                                                             






                                         
                          
                        
                        
 


                                 



                                                        
                                         
                                         



                                         

                                    









                                                           




                                                                 




                                                                 




                                                                      




                                                                      




                                                                       

                                                                           
                                                                             




























































                                                                                









                                                                           



























                                                                               
                                                                               







                                                       









                                                               



                                                               















                                                            


























                                                            


























                                                                  


























                                                                 


























                                                                   












































                                                                           


























                                                                    




                                                                       









                                                                       







                                                                              















                                                                              







                                                                               















                                                                               




















                                                                               














                                                       









































                                                                














                                                      









































                                                               




















                                                                               




















                                                                               




















                                                                               




















                                                                            




















                                                                
























                                                                             























                                                                       























                                                                           























                                                                              























                                                                       























                                                                       

                                                 









                                                           

                                                 






                                                        
                                              
                                                  
                                                     
                                                  
                                                 
                                                  
                
                       
         


                                                    
                           
                                               
                                                           
                 



                                                           

                                                 

                                   
                                                        



                            
                                              
                                                  
                                                     
                                                  
                                                 
                                                  
                
                       
         


                                                    
                           
                                               
                                                           
                 



                                                           

                                                 







                                                           
                                              
                                                  
                                                     
                                                  
                                                 
                                                  
                
                       
         

                                                    
                           
                                               
                                                           
                 













                                                                       









                                                                        





























































































































































                                                                        
                               



























                                                                              

                                                                 






































































                                                                    

                               
                                  



























                                                                              

                                                                  











                                                                    

                                                                      



















                                                                    

                                                                         

                                                                          





                                                                        







                                                                                   





                                                                       

                                                                     

                                                                      

                                                                                   

                                                                         
                        

                                                                          







                                                                        


































                                                                    
                               



























                                                                              

                                                                 































                                                                    
                                                                  
                                                                     
                                                             
                                                                
                                                               
                                                                  
                                                                
                                                                   

































                                                                    







                                                                           





                                                               

                                                             

                                                              

                                                                           
                      
                               



























                                                                              

                                                                 






































































                                                                    













                                                                            

                                         

                                                            


                             
                                                  
                                                
         
 









                                                                      
                      
                                                   
         
 
                            
                                                         
         
 
                              
                                                           
         
 

                                                

                                                          














                                                    
                  
                           
         

                                               
                  
                               
         

                                                
                  
                             
         

                                                    
                  
                              
         

                                                   
                  
                                   
         



                                                       
                  
                                  
         

























                                                    
                              
                                                      
         

                                                                         
                              











                                                                   
                  
                                    
         












                                                                           
/*
 * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <nvgpu/pmu.h>
#include <nvgpu/dma.h>
#include <nvgpu/log.h>
#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
#include <nvgpu/firmware.h>
#include <nvgpu/enabled.h>
#include <nvgpu/utils.h>
#include <nvgpu/gk20a.h>

#include "boardobj/boardobj.h"
#include "boardobj/boardobjgrp.h"

/* PMU NS UCODE IMG */
#define NVGPU_PMU_NS_UCODE_IMAGE	"gpmu_ucode.bin"

/* PMU F/W version */
#define APP_VERSION_GPU_NEXT	24408680U
#define APP_VERSION_GV11B	25005711U
#define APP_VERSION_GV10X	23647491U
#define APP_VERSION_GP10X	24076634U
#define APP_VERSION_GP10B	23782727U
#define APP_VERSION_GM20B	20490253U

/* PMU version specific functions */
static u32 pmu_perfmon_cntr_sz_v2(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_perfmon_counter_v2);
}

static void *get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu)
{
	return (void *)(&pmu->perfmon_counter_v2);
}

static void set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut)
{
	pmu->perfmon_counter_v2.upper_threshold = ut;
}

static void set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt)
{
	pmu->perfmon_counter_v2.lower_threshold = lt;
}

static void set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid)
{
	pmu->perfmon_counter_v2.valid = valid;
}

static void set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index)
{
	pmu->perfmon_counter_v2.index = index;
}

static void set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid)
{
	pmu->perfmon_counter_v2.group_id = gid;
}

static void set_pmu_cmdline_args_falctracedmabase_v4(struct nvgpu_pmu *pmu)
{
	pmu->args_v4.dma_addr.dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100U;
	pmu->args_v4.dma_addr.dma_base1 = 0;
	pmu->args_v4.dma_addr.dma_offset = 0;
}

static u32 pmu_cmdline_size_v4(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_cmdline_args_v4);
}

static void set_pmu_cmdline_args_cpufreq_v4(struct nvgpu_pmu *pmu, u32 freq)
{
	pmu->args_v4.cpu_freq_hz = freq;
}
static void set_pmu_cmdline_args_secure_mode_v4(struct nvgpu_pmu *pmu, u32 val)
{
	pmu->args_v4.secure_mode = val;
}

static void set_pmu_cmdline_args_falctracesize_v4(
			struct nvgpu_pmu *pmu, u32 size)
{
	pmu->args_v4.falc_trace_size = size;
}
static void set_pmu_cmdline_args_falctracedmaidx_v4(
			struct nvgpu_pmu *pmu, u32 idx)
{
	pmu->args_v4.falc_trace_dma_idx = idx;
}

static u32 pmu_cmdline_size_v5(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_cmdline_args_v5);
}

static u32 pmu_cmdline_size_v6(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_cmdline_args_v6);
}

static void set_pmu_cmdline_args_cpufreq_v5(struct nvgpu_pmu *pmu, u32 freq)
{
	pmu->args_v5.cpu_freq_hz = 204000000;
}
static void set_pmu_cmdline_args_secure_mode_v5(struct nvgpu_pmu *pmu, u32 val)
{
	pmu->args_v5.secure_mode = val;
}

static void set_pmu_cmdline_args_falctracesize_v5(
			struct nvgpu_pmu *pmu, u32 size)
{
	/* set by surface describe */
}

static void set_pmu_cmdline_args_falctracedmabase_v5(struct nvgpu_pmu *pmu)
{
	struct gk20a *g = gk20a_from_pmu(pmu);

	nvgpu_pmu_surface_describe(g, &pmu->trace_buf, &pmu->args_v5.trace_buf);
}

static void config_pmu_cmdline_args_super_surface_v6(struct nvgpu_pmu *pmu)
{
	struct gk20a *g = gk20a_from_pmu(pmu);

	if (g->ops.pmu.alloc_super_surface) {
		nvgpu_pmu_surface_describe(g, &pmu->super_surface_buf,
			&pmu->args_v6.super_surface);
	}
}

static void set_pmu_cmdline_args_falctracedmaidx_v5(
			struct nvgpu_pmu *pmu, u32 idx)
{
	/* set by surface describe */
}

static u32 pmu_cmdline_size_v3(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_cmdline_args_v3);
}

static void set_pmu_cmdline_args_cpufreq_v3(struct nvgpu_pmu *pmu, u32 freq)
{
	pmu->args_v3.cpu_freq_hz = freq;
}
static void set_pmu_cmdline_args_secure_mode_v3(struct nvgpu_pmu *pmu, u32 val)
{
	pmu->args_v3.secure_mode = val;
}

static void set_pmu_cmdline_args_falctracesize_v3(
			struct nvgpu_pmu *pmu, u32 size)
{
	pmu->args_v3.falc_trace_size = size;
}

static void set_pmu_cmdline_args_falctracedmabase_v3(struct nvgpu_pmu *pmu)
{
	pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100U;
}

static void set_pmu_cmdline_args_falctracedmaidx_v3(
			struct nvgpu_pmu *pmu, u32 idx)
{
	pmu->args_v3.falc_trace_dma_idx = idx;
}

static void *get_pmu_cmdline_args_ptr_v4(struct nvgpu_pmu *pmu)
{
	return (void *)(&pmu->args_v4);
}

static void *get_pmu_cmdline_args_ptr_v3(struct nvgpu_pmu *pmu)
{
	return (void *)(&pmu->args_v3);
}

static void *get_pmu_cmdline_args_ptr_v5(struct nvgpu_pmu *pmu)
{
	return (void *)(&pmu->args_v5);
}

static u32 get_pmu_allocation_size_v3(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_allocation_v3);
}

static u32 get_pmu_allocation_size_v2(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_allocation_v2);
}

static u32 get_pmu_allocation_size_v1(struct nvgpu_pmu *pmu)
{
	return sizeof(struct pmu_allocation_v1);
}

static void set_pmu_allocation_ptr_v3(struct nvgpu_pmu *pmu,
	void **pmu_alloc_ptr, void *assign_ptr)
{
	struct pmu_allocation_v3 **pmu_a_ptr =
		(struct pmu_allocation_v3 **)pmu_alloc_ptr;

	*pmu_a_ptr = (struct pmu_allocation_v3 *)assign_ptr;
}

static void set_pmu_allocation_ptr_v2(struct nvgpu_pmu *pmu,
	void **pmu_alloc_ptr, void *assign_ptr)
{
	struct pmu_allocation_v2 **pmu_a_ptr =
		(struct pmu_allocation_v2 **)pmu_alloc_ptr;

	*pmu_a_ptr = (struct pmu_allocation_v2 *)assign_ptr;
}

static void set_pmu_allocation_ptr_v1(struct nvgpu_pmu *pmu,
	void **pmu_alloc_ptr, void *assign_ptr)
{
	struct pmu_allocation_v1 **pmu_a_ptr =
		(struct pmu_allocation_v1 **)pmu_alloc_ptr;

	*pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
}

static void pmu_allocation_set_dmem_size_v3(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr, u16 size)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
		(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	pmu_a_ptr->alloc.dmem.size = size;
}

static void pmu_allocation_set_dmem_size_v2(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr, u16 size)
{
	struct pmu_allocation_v2 *pmu_a_ptr =
		(struct pmu_allocation_v2 *)pmu_alloc_ptr;

	pmu_a_ptr->alloc.dmem.size = size;
}

static void pmu_allocation_set_dmem_size_v1(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr, u16 size)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;

	pmu_a_ptr->alloc.dmem.size = size;
}

static u16 pmu_allocation_get_dmem_size_v3(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
		(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	return pmu_a_ptr->alloc.dmem.size;
}

static u16 pmu_allocation_get_dmem_size_v2(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v2 *pmu_a_ptr =
		(struct pmu_allocation_v2 *)pmu_alloc_ptr;

	return pmu_a_ptr->alloc.dmem.size;
}

static u16 pmu_allocation_get_dmem_size_v1(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;

	return pmu_a_ptr->alloc.dmem.size;
}

static u32 pmu_allocation_get_dmem_offset_v3(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
		(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	return pmu_a_ptr->alloc.dmem.offset;
}

static u32 pmu_allocation_get_dmem_offset_v2(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v2 *pmu_a_ptr =
		(struct pmu_allocation_v2 *)pmu_alloc_ptr;

	return pmu_a_ptr->alloc.dmem.offset;
}

static u32 pmu_allocation_get_dmem_offset_v1(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;

	return pmu_a_ptr->alloc.dmem.offset;
}

static u32 *pmu_allocation_get_dmem_offset_addr_v3(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
		(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	return &pmu_a_ptr->alloc.dmem.offset;
}

static void *pmu_allocation_get_fb_addr_v3(
				struct nvgpu_pmu *pmu, void *pmu_alloc_ptr)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
			(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	return (void *)&pmu_a_ptr->alloc.fb;
}

static u32 pmu_allocation_get_fb_size_v3(
				struct nvgpu_pmu *pmu, void *pmu_alloc_ptr)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
			(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	return sizeof(pmu_a_ptr->alloc.fb);
}

static u32 *pmu_allocation_get_dmem_offset_addr_v2(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v2 *pmu_a_ptr =
		(struct pmu_allocation_v2 *)pmu_alloc_ptr;

	return &pmu_a_ptr->alloc.dmem.offset;
}

static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;

	return &pmu_a_ptr->alloc.dmem.offset;
}

static void pmu_allocation_set_dmem_offset_v3(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr, u32 offset)
{
	struct pmu_allocation_v3 *pmu_a_ptr =
		(struct pmu_allocation_v3 *)pmu_alloc_ptr;

	pmu_a_ptr->alloc.dmem.offset = offset;
}

static void pmu_allocation_set_dmem_offset_v2(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr, u32 offset)
{
	struct pmu_allocation_v2 *pmu_a_ptr =
		(struct pmu_allocation_v2 *)pmu_alloc_ptr;

	pmu_a_ptr->alloc.dmem.offset = offset;
}

static void pmu_allocation_set_dmem_offset_v1(struct nvgpu_pmu *pmu,
	void *pmu_alloc_ptr, u32 offset)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;

	pmu_a_ptr->alloc.dmem.offset = offset;
}

static void *get_pmu_msg_pmu_init_msg_ptr_v5(struct pmu_init_msg *init)
{
	return (void *)(&(init->pmu_init_v5));
}

static void *get_pmu_msg_pmu_init_msg_ptr_v4(struct pmu_init_msg *init)
{
	return (void *)(&(init->pmu_init_v4));
}

static void *get_pmu_msg_pmu_init_msg_ptr_v3(struct pmu_init_msg *init)
{
	return (void *)(&(init->pmu_init_v3));
}

static u16 get_pmu_init_msg_pmu_sw_mg_off_v5(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v5 *init =
		(struct pmu_init_msg_pmu_v5 *)(&init_msg->v5);

	return init->sw_managed_area_offset;
}

static u16 get_pmu_init_msg_pmu_sw_mg_off_v4(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v4 *init =
		(struct pmu_init_msg_pmu_v4 *)(&init_msg->v4);

	return init->sw_managed_area_offset;
}

static u16 get_pmu_init_msg_pmu_sw_mg_off_v3(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v3 *init =
		(struct pmu_init_msg_pmu_v3 *)(&init_msg->v3);

	return init->sw_managed_area_offset;
}

static u16 get_pmu_init_msg_pmu_sw_mg_size_v5(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v5 *init =
		(struct pmu_init_msg_pmu_v5 *)(&init_msg->v5);

	return init->sw_managed_area_size;
}

static u16 get_pmu_init_msg_pmu_sw_mg_size_v4(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v4 *init =
		(struct pmu_init_msg_pmu_v4 *)(&init_msg->v4);

	return init->sw_managed_area_size;
}

static u16 get_pmu_init_msg_pmu_sw_mg_size_v3(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v3 *init =
		(struct pmu_init_msg_pmu_v3 *)(&init_msg->v3);

	return init->sw_managed_area_size;
}

static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
{
	return (void *)(&(init->pmu_init_v1));
}

static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v1 *init =
		(struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);

	return init->sw_managed_area_offset;
}

static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v1 *init =
		(struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);

	return init->sw_managed_area_size;
}

static u32 get_pmu_perfmon_cmd_start_size_v3(void)
{
	return sizeof(struct pmu_perfmon_cmd_start_v3);
}

static u32 get_pmu_perfmon_cmd_start_size_v2(void)
{
	return sizeof(struct pmu_perfmon_cmd_start_v2);
}

static u32 get_pmu_perfmon_cmd_start_size_v1(void)
{
	return sizeof(struct pmu_perfmon_cmd_start_v1);
}

static int get_perfmon_cmd_start_offsetofvar_v3(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_start_v3,
		counter_alloc);
	default:
		return -EINVAL;
	}

	return 0;
}

static int get_perfmon_cmd_start_offsetofvar_v2(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_start_v2,
		counter_alloc);
	default:
		return -EINVAL;
	}

	return 0;
}

static int get_perfmon_cmd_start_offsetofvar_v1(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_start_v1,
		counter_alloc);
	default:
		return -EINVAL;
	}

	return 0;
}

static u32 get_pmu_perfmon_cmd_init_size_v3(void)
{
	return sizeof(struct pmu_perfmon_cmd_init_v3);
}

static u32 get_pmu_perfmon_cmd_init_size_v2(void)
{
	return sizeof(struct pmu_perfmon_cmd_init_v2);
}

static u32 get_pmu_perfmon_cmd_init_size_v1(void)
{
	return sizeof(struct pmu_perfmon_cmd_init_v1);
}

static int get_perfmon_cmd_init_offsetofvar_v3(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_init_v3,
		counter_alloc);
	default:
		return -EINVAL;
	}

	return 0;
}

static int get_perfmon_cmd_init_offsetofvar_v2(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_init_v2,
		counter_alloc);
	default:
		return -EINVAL;
	}

	return 0;
}

static int get_perfmon_cmd_init_offsetofvar_v1(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_init_v1,
		counter_alloc);
	default:
		return -EINVAL;
	}

	return 0;
}

static void perfmon_start_set_cmd_type_v3(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;

	start->cmd_type = value;
}

static void perfmon_start_set_cmd_type_v2(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;

	start->cmd_type = value;
}

static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;

	start->cmd_type = value;
}

static void perfmon_start_set_group_id_v3(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;

	start->group_id = value;
}

static void perfmon_start_set_group_id_v2(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;

	start->group_id = value;
}

static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;

	start->group_id = value;
}

static void perfmon_start_set_state_id_v3(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;

	start->state_id = value;
}

static void perfmon_start_set_state_id_v2(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;

	start->state_id = value;
}

static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;

	start->state_id = value;
}

static void perfmon_start_set_flags_v3(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;

	start->flags = value;
}

static void perfmon_start_set_flags_v2(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;

	start->flags = value;
}

static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;

	start->flags = value;
}

static u8 perfmon_start_get_flags_v3(struct pmu_perfmon_cmd *pc)
{
	struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;

	return start->flags;
}

static u8 perfmon_start_get_flags_v2(struct pmu_perfmon_cmd *pc)
{
	struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;

	return start->flags;
}

static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;

	return start->flags;
}

static void perfmon_cmd_init_set_sample_buffer_v3(struct pmu_perfmon_cmd *pc,
	u16 value)
{
	struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;

	init->sample_buffer = value;
}

static void perfmon_cmd_init_set_sample_buffer_v2(struct pmu_perfmon_cmd *pc,
	u16 value)
{
	struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;

	init->sample_buffer = value;
}


static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
	u16 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;

	init->sample_buffer = value;
}

static void perfmon_cmd_init_set_dec_cnt_v3(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;

	init->to_decrease_count = value;
}

static void perfmon_cmd_init_set_dec_cnt_v2(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;

	init->to_decrease_count = value;
}

static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;

	init->to_decrease_count = value;
}

static void perfmon_cmd_init_set_base_cnt_id_v3(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;

	init->base_counter_id = value;
}

static void perfmon_cmd_init_set_base_cnt_id_v2(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;

	init->base_counter_id = value;
}

static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;

	init->base_counter_id = value;
}

static void perfmon_cmd_init_set_samp_period_us_v3(struct pmu_perfmon_cmd *pc,
	u32 value)
{
	struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;

	init->sample_period_us = value;
}

static void perfmon_cmd_init_set_samp_period_us_v2(struct pmu_perfmon_cmd *pc,
	u32 value)
{
	struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;

	init->sample_period_us = value;
}

static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
	u32 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;

	init->sample_period_us = value;
}

static void perfmon_cmd_init_set_num_cnt_v3(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;

	init->num_counters = value;
}

static void perfmon_cmd_init_set_num_cnt_v2(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;

	init->num_counters = value;
}

static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;

	init->num_counters = value;
}

static void perfmon_cmd_init_set_mov_avg_v3(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;

	init->samples_in_moving_avg = value;
}

static void perfmon_cmd_init_set_mov_avg_v2(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;

	init->samples_in_moving_avg = value;
}

static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;

	init->samples_in_moving_avg = value;
}

static void get_pmu_init_msg_pmu_queue_params_v1(
	struct nvgpu_falcon_queue *queue,
	u32 id, void *pmu_init_msg)
{
	struct pmu_init_msg_pmu_v1 *init =
		(struct pmu_init_msg_pmu_v1 *)pmu_init_msg;

	queue->index    = init->queue_info[id].index;
	queue->offset   = init->queue_info[id].offset;
	queue->size = init->queue_info[id].size;
}

static void get_pmu_init_msg_pmu_queue_params_v4(
	struct nvgpu_falcon_queue *queue,
	u32 id, void *pmu_init_msg)
{
	struct pmu_init_msg_pmu_v4 *init = pmu_init_msg;
	u32 current_ptr = 0;
	u8 i;
	u8 tmp_id = id;

	if (tmp_id == PMU_COMMAND_QUEUE_HPQ) {
		tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
	} else if (tmp_id == PMU_COMMAND_QUEUE_LPQ) {
		tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3;
	} else if (tmp_id == PMU_MESSAGE_QUEUE) {
		tmp_id = PMU_QUEUE_MSG_IDX_FOR_V3;
	} else {
		return;
	}

	queue->index    = init->queue_index[tmp_id];
	queue->size = init->queue_size[tmp_id];
	if (tmp_id != 0U) {
		for (i = 0 ; i < tmp_id; i++) {
			current_ptr += init->queue_size[i];
		}
	}
	queue->offset   = init->queue_offset + current_ptr;
}

static void get_pmu_init_msg_pmu_queue_params_v5(
	struct nvgpu_falcon_queue *queue,
	u32 id, void *pmu_init_msg)
{
	struct pmu_init_msg_pmu_v5 *init = pmu_init_msg;
	u32 current_ptr = 0;
	u8 i;
	u8 tmp_id = id;

	if (tmp_id == PMU_COMMAND_QUEUE_HPQ) {
		tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
	} else if (tmp_id == PMU_COMMAND_QUEUE_LPQ) {
		tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3;
	} else if (tmp_id == PMU_MESSAGE_QUEUE) {
		tmp_id = PMU_QUEUE_MSG_IDX_FOR_V5;
	} else {
		return;
	}

	queue->index    = init->queue_index[tmp_id];
	queue->size = init->queue_size[tmp_id];
	if (tmp_id != 0U) {
		for (i = 0 ; i < tmp_id; i++) {
			current_ptr += init->queue_size[i];
		}
	}
	queue->offset   = init->queue_offset + current_ptr;
}

static void get_pmu_init_msg_pmu_queue_params_v3(
	struct nvgpu_falcon_queue *queue,
	u32 id, void *pmu_init_msg)
{
	struct pmu_init_msg_pmu_v3 *init =
		(struct pmu_init_msg_pmu_v3 *)pmu_init_msg;
	u32 current_ptr = 0;
	u8 i;
	u8 tmp_id = id;

	if (tmp_id == PMU_COMMAND_QUEUE_HPQ) {
		tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
	} else if (tmp_id == PMU_COMMAND_QUEUE_LPQ) {
		tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3;
	} else if (tmp_id == PMU_MESSAGE_QUEUE) {
		tmp_id = PMU_QUEUE_MSG_IDX_FOR_V3;
	} else {
		return;
	}
	queue->index    = init->queue_index[tmp_id];
	queue->size = init->queue_size[tmp_id];
	if (tmp_id != 0U) {
		for (i = 0 ; i < tmp_id; i++) {
			current_ptr += init->queue_size[i];
		}
	}
	queue->offset   = init->queue_offset + current_ptr;
}

static void *get_pmu_sequence_in_alloc_ptr_v3(struct pmu_sequence *seq)
{
	return (void *)(&seq->in_v3);
}

static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
{
	return (void *)(&seq->in_v1);
}

static void *get_pmu_sequence_out_alloc_ptr_v3(struct pmu_sequence *seq)
{
	return (void *)(&seq->out_v3);
}

static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
{
	return (void *)(&seq->out_v1);
}

static u8 pg_cmd_eng_buf_load_size_v0(struct pmu_pg_cmd *pg)
{
	return sizeof(pg->eng_buf_load_v0);
}

static u8 pg_cmd_eng_buf_load_size_v1(struct pmu_pg_cmd *pg)
{
	return sizeof(pg->eng_buf_load_v1);
}

static u8 pg_cmd_eng_buf_load_size_v2(struct pmu_pg_cmd *pg)
{
	return sizeof(pg->eng_buf_load_v2);
}

static void pg_cmd_eng_buf_load_set_cmd_type_v0(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v0.cmd_type = value;
}

static void pg_cmd_eng_buf_load_set_cmd_type_v1(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v1.cmd_type = value;
}

static void pg_cmd_eng_buf_load_set_cmd_type_v2(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v2.cmd_type = value;
}

static void pg_cmd_eng_buf_load_set_engine_id_v0(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v0.engine_id = value;
}
static void pg_cmd_eng_buf_load_set_engine_id_v1(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v1.engine_id = value;
}
static void pg_cmd_eng_buf_load_set_engine_id_v2(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v2.engine_id = value;
}
static void pg_cmd_eng_buf_load_set_buf_idx_v0(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v0.buf_idx = value;
}
static void pg_cmd_eng_buf_load_set_buf_idx_v1(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v1.buf_idx = value;
}
static void pg_cmd_eng_buf_load_set_buf_idx_v2(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v2.buf_idx = value;
}

static void pg_cmd_eng_buf_load_set_pad_v0(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v0.pad = value;
}
static void pg_cmd_eng_buf_load_set_pad_v1(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v1.pad = value;
}
static void pg_cmd_eng_buf_load_set_pad_v2(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v2.pad = value;
}

static void pg_cmd_eng_buf_load_set_buf_size_v0(struct pmu_pg_cmd *pg,
	u16 value)
{
	pg->eng_buf_load_v0.buf_size = value;
}
static void pg_cmd_eng_buf_load_set_buf_size_v1(struct pmu_pg_cmd *pg,
	u16 value)
{
	pg->eng_buf_load_v1.dma_desc.dma_size = value;
}
static void pg_cmd_eng_buf_load_set_buf_size_v2(struct pmu_pg_cmd *pg,
	u16 value)
{
	pg->eng_buf_load_v2.dma_desc.params = value;
}

static void pg_cmd_eng_buf_load_set_dma_base_v0(struct pmu_pg_cmd *pg,
	u32 value)
{
	pg->eng_buf_load_v0.dma_base = (value >> 8);
}
static void pg_cmd_eng_buf_load_set_dma_base_v1(struct pmu_pg_cmd *pg,
	u32 value)
{
	pg->eng_buf_load_v1.dma_desc.dma_addr.lo |= u64_lo32(value);
	pg->eng_buf_load_v1.dma_desc.dma_addr.hi |= u64_hi32(value);
}
static void pg_cmd_eng_buf_load_set_dma_base_v2(struct pmu_pg_cmd *pg,
	u32 value)
{
	pg->eng_buf_load_v2.dma_desc.address.lo = u64_lo32(value);
	pg->eng_buf_load_v2.dma_desc.address.hi = u64_lo32(value);
}

static void pg_cmd_eng_buf_load_set_dma_offset_v0(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v0.dma_offset = value;
}
static void pg_cmd_eng_buf_load_set_dma_offset_v1(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v1.dma_desc.dma_addr.lo |= value;
}
static void pg_cmd_eng_buf_load_set_dma_offset_v2(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v2.dma_desc.address.lo |= u64_lo32(value);
	pg->eng_buf_load_v2.dma_desc.address.hi |= u64_lo32(value);
}

static void pg_cmd_eng_buf_load_set_dma_idx_v0(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v0.dma_idx = value;
}

static void pg_cmd_eng_buf_load_set_dma_idx_v1(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v1.dma_desc.dma_idx = value;
}

static void pg_cmd_eng_buf_load_set_dma_idx_v2(struct pmu_pg_cmd *pg,
	u8 value)
{
	pg->eng_buf_load_v2.dma_desc.params |= (value << 24);
}

static int nvgpu_init_pmu_fw_ver_ops(struct nvgpu_pmu *pmu)
{
	struct gk20a *g = gk20a_from_pmu(pmu);
	struct pmu_v *pv = &g->ops.pmu_ver;
	int err = 0;

	nvgpu_log_fn(g, " ");

	switch (pmu->desc->app_version) {
	case APP_VERSION_GP10B:
		g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
				pg_cmd_eng_buf_load_size_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
				pg_cmd_eng_buf_load_set_cmd_type_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
				pg_cmd_eng_buf_load_set_engine_id_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
				pg_cmd_eng_buf_load_set_buf_idx_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
				pg_cmd_eng_buf_load_set_pad_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
				pg_cmd_eng_buf_load_set_buf_size_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
				pg_cmd_eng_buf_load_set_dma_base_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
				pg_cmd_eng_buf_load_set_dma_offset_v1;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
				pg_cmd_eng_buf_load_set_dma_idx_v1;
		g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
		g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
		g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
		g->ops.pmu_ver.set_perfmon_cntr_valid =
			set_perfmon_cntr_valid_v2;
		g->ops.pmu_ver.set_perfmon_cntr_index =
			set_perfmon_cntr_index_v2;
		g->ops.pmu_ver.set_perfmon_cntr_group_id =
			set_perfmon_cntr_group_id_v2;
		g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
		g->pmu_ver_cmd_id_zbc_table_update = 16;
		__nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
		g->ops.pmu_ver.get_pmu_cmdline_args_size =
			pmu_cmdline_size_v4;
		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
			set_pmu_cmdline_args_cpufreq_v4;
		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
			set_pmu_cmdline_args_secure_mode_v4;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
			set_pmu_cmdline_args_falctracesize_v4;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
			set_pmu_cmdline_args_falctracedmabase_v4;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
			set_pmu_cmdline_args_falctracedmaidx_v4;
		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
			get_pmu_cmdline_args_ptr_v4;
		g->ops.pmu_ver.get_pmu_allocation_struct_size =
			get_pmu_allocation_size_v2;
		g->ops.pmu_ver.set_pmu_allocation_ptr =
			set_pmu_allocation_ptr_v2;
		g->ops.pmu_ver.pmu_allocation_set_dmem_size =
			pmu_allocation_set_dmem_size_v2;
		g->ops.pmu_ver.pmu_allocation_get_dmem_size =
			pmu_allocation_get_dmem_size_v2;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
			pmu_allocation_get_dmem_offset_v2;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
			pmu_allocation_get_dmem_offset_addr_v2;
		g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
			pmu_allocation_set_dmem_offset_v2;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
			get_pmu_init_msg_pmu_queue_params_v1;
		g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
			get_pmu_msg_pmu_init_msg_ptr_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
			get_pmu_init_msg_pmu_sw_mg_off_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
			get_pmu_init_msg_pmu_sw_mg_size_v1;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
			get_pmu_perfmon_cmd_start_size_v2;
		g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
			get_perfmon_cmd_start_offsetofvar_v2;
		g->ops.pmu_ver.perfmon_start_set_cmd_type =
			perfmon_start_set_cmd_type_v2;
		g->ops.pmu_ver.perfmon_start_set_group_id =
			perfmon_start_set_group_id_v2;
		g->ops.pmu_ver.perfmon_start_set_state_id =
			perfmon_start_set_state_id_v2;
		g->ops.pmu_ver.perfmon_start_set_flags =
			perfmon_start_set_flags_v2;
		g->ops.pmu_ver.perfmon_start_get_flags =
			perfmon_start_get_flags_v2;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
			get_pmu_perfmon_cmd_init_size_v2;
		g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
			get_perfmon_cmd_init_offsetofvar_v2;
		g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
			perfmon_cmd_init_set_sample_buffer_v2;
		g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
			perfmon_cmd_init_set_dec_cnt_v2;
		g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
			perfmon_cmd_init_set_base_cnt_id_v2;
		g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
			perfmon_cmd_init_set_samp_period_us_v2;
		g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
			perfmon_cmd_init_set_num_cnt_v2;
		g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
			perfmon_cmd_init_set_mov_avg_v2;
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
			get_pmu_sequence_in_alloc_ptr_v1;
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
			get_pmu_sequence_out_alloc_ptr_v1;
		break;
	case APP_VERSION_GV11B:
	case APP_VERSION_GV10X:
	case APP_VERSION_GPU_NEXT:
		g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
				pg_cmd_eng_buf_load_size_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
				pg_cmd_eng_buf_load_set_cmd_type_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
				pg_cmd_eng_buf_load_set_engine_id_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
				pg_cmd_eng_buf_load_set_buf_idx_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
				pg_cmd_eng_buf_load_set_pad_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
				pg_cmd_eng_buf_load_set_buf_size_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
				pg_cmd_eng_buf_load_set_dma_base_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
				pg_cmd_eng_buf_load_set_dma_offset_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
				pg_cmd_eng_buf_load_set_dma_idx_v2;
		g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
		g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
		g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
		g->ops.pmu_ver.set_perfmon_cntr_valid =
			set_perfmon_cntr_valid_v2;
		g->ops.pmu_ver.set_perfmon_cntr_index =
			set_perfmon_cntr_index_v2;
		g->ops.pmu_ver.set_perfmon_cntr_group_id =
			set_perfmon_cntr_group_id_v2;
		g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
		g->pmu_ver_cmd_id_zbc_table_update = 16;
		__nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, false);
		g->ops.pmu_ver.get_pmu_cmdline_args_size =
			pmu_cmdline_size_v6;
		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
			set_pmu_cmdline_args_cpufreq_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
			set_pmu_cmdline_args_secure_mode_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
			set_pmu_cmdline_args_falctracesize_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
			set_pmu_cmdline_args_falctracedmabase_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
			set_pmu_cmdline_args_falctracedmaidx_v5;
		g->ops.pmu_ver.config_pmu_cmdline_args_super_surface =
			config_pmu_cmdline_args_super_surface_v6;
		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
			get_pmu_cmdline_args_ptr_v5;
		g->ops.pmu_ver.get_pmu_allocation_struct_size =
			get_pmu_allocation_size_v3;
		g->ops.pmu_ver.set_pmu_allocation_ptr =
			set_pmu_allocation_ptr_v3;
		g->ops.pmu_ver.pmu_allocation_set_dmem_size =
			pmu_allocation_set_dmem_size_v3;
		g->ops.pmu_ver.pmu_allocation_get_dmem_size =
			pmu_allocation_get_dmem_size_v3;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
			pmu_allocation_get_dmem_offset_v3;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
			pmu_allocation_get_dmem_offset_addr_v3;
		g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
			pmu_allocation_set_dmem_offset_v3;
		g->ops.pmu_ver.pmu_allocation_get_fb_addr =
				pmu_allocation_get_fb_addr_v3;
		g->ops.pmu_ver.pmu_allocation_get_fb_size =
				pmu_allocation_get_fb_size_v3;
		if (pmu->desc->app_version == APP_VERSION_GV10X ||
			pmu->desc->app_version == APP_VERSION_GPU_NEXT) {
			g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
				get_pmu_init_msg_pmu_queue_params_v5;
			g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
				get_pmu_msg_pmu_init_msg_ptr_v5;
			g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
				get_pmu_init_msg_pmu_sw_mg_off_v5;
			g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
				get_pmu_init_msg_pmu_sw_mg_size_v5;
			g->ops.pmu_ver.boardobj.boardobjgrp_pmucmd_construct_impl =
				boardobjgrp_pmucmd_construct_impl_v1;
			g->ops.pmu_ver.boardobj.boardobjgrp_pmuset_impl =
				boardobjgrp_pmuset_impl_v1;
			g->ops.pmu_ver.boardobj.boardobjgrp_pmugetstatus_impl =
				boardobjgrp_pmugetstatus_impl_v1;
			g->ops.pmu_ver.boardobj.is_boardobjgrp_pmucmd_id_valid =
				is_boardobjgrp_pmucmd_id_valid_v1;
			g->ops.pmu_ver.volt.volt_set_voltage =
				nvgpu_volt_set_voltage_gv10x;
			g->ops.pmu_ver.volt.volt_get_voltage =
				nvgpu_volt_rail_get_voltage_gv10x;
			g->ops.pmu_ver.volt.volt_send_load_cmd_to_pmu =
				nvgpu_volt_send_load_cmd_to_pmu_gv10x;
			g->ops.pmu_ver.clk.get_vbios_clk_domain =
				nvgpu_clk_get_vbios_clk_domain_gv10x;
			g->ops.pmu_ver.clk.clk_avfs_get_vin_cal_data =
					clk_avfs_get_vin_cal_fuse_v20;
			g->ops.pmu_ver.clk.clk_vf_change_inject_data_fill =
					nvgpu_clk_vf_change_inject_data_fill_gv10x;
			g->ops.pmu_ver.clk.clk_set_boot_clk =
					nvgpu_clk_set_boot_fll_clk_gv10x;
		} else {
			g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
				get_pmu_init_msg_pmu_queue_params_v4;
			g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
				get_pmu_msg_pmu_init_msg_ptr_v4;
			g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
				get_pmu_init_msg_pmu_sw_mg_off_v4;
			g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
				get_pmu_init_msg_pmu_sw_mg_size_v4;
		}

		g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
			get_pmu_perfmon_cmd_start_size_v3;
		g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
			get_perfmon_cmd_start_offsetofvar_v3;
		g->ops.pmu_ver.perfmon_start_set_cmd_type =
			perfmon_start_set_cmd_type_v3;
		g->ops.pmu_ver.perfmon_start_set_group_id =
			perfmon_start_set_group_id_v3;
		g->ops.pmu_ver.perfmon_start_set_state_id =
			perfmon_start_set_state_id_v3;
		g->ops.pmu_ver.perfmon_start_set_flags =
			perfmon_start_set_flags_v3;
		g->ops.pmu_ver.perfmon_start_get_flags =
			perfmon_start_get_flags_v3;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
			get_pmu_perfmon_cmd_init_size_v3;
		g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
			get_perfmon_cmd_init_offsetofvar_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
			perfmon_cmd_init_set_sample_buffer_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
			perfmon_cmd_init_set_dec_cnt_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
			perfmon_cmd_init_set_base_cnt_id_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
			perfmon_cmd_init_set_samp_period_us_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
			perfmon_cmd_init_set_num_cnt_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
			perfmon_cmd_init_set_mov_avg_v3;
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
			get_pmu_sequence_in_alloc_ptr_v3;
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
			get_pmu_sequence_out_alloc_ptr_v3;
		break;
	case APP_VERSION_GP10X:
		g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
				pg_cmd_eng_buf_load_size_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
				pg_cmd_eng_buf_load_set_cmd_type_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
				pg_cmd_eng_buf_load_set_engine_id_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
				pg_cmd_eng_buf_load_set_buf_idx_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
				pg_cmd_eng_buf_load_set_pad_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
				pg_cmd_eng_buf_load_set_buf_size_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
				pg_cmd_eng_buf_load_set_dma_base_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
				pg_cmd_eng_buf_load_set_dma_offset_v2;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
				pg_cmd_eng_buf_load_set_dma_idx_v2;
		g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
		g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
		g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
		g->ops.pmu_ver.set_perfmon_cntr_valid =
			set_perfmon_cntr_valid_v2;
		g->ops.pmu_ver.set_perfmon_cntr_index =
			set_perfmon_cntr_index_v2;
		g->ops.pmu_ver.set_perfmon_cntr_group_id =
			set_perfmon_cntr_group_id_v2;
		g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
		g->pmu_ver_cmd_id_zbc_table_update = 16;
		__nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
		g->ops.pmu_ver.get_pmu_cmdline_args_size =
			pmu_cmdline_size_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
			set_pmu_cmdline_args_cpufreq_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
			set_pmu_cmdline_args_secure_mode_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
			set_pmu_cmdline_args_falctracesize_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
			set_pmu_cmdline_args_falctracedmabase_v5;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
			set_pmu_cmdline_args_falctracedmaidx_v5;
		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
			get_pmu_cmdline_args_ptr_v5;
		g->ops.pmu_ver.get_pmu_allocation_struct_size =
			get_pmu_allocation_size_v3;
		g->ops.pmu_ver.set_pmu_allocation_ptr =
			set_pmu_allocation_ptr_v3;
		g->ops.pmu_ver.pmu_allocation_set_dmem_size =
			pmu_allocation_set_dmem_size_v3;
		g->ops.pmu_ver.pmu_allocation_get_dmem_size =
			pmu_allocation_get_dmem_size_v3;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
			pmu_allocation_get_dmem_offset_v3;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
			pmu_allocation_get_dmem_offset_addr_v3;
		g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
			pmu_allocation_set_dmem_offset_v3;
		g->ops.pmu_ver.pmu_allocation_get_fb_addr =
				pmu_allocation_get_fb_addr_v3;
		g->ops.pmu_ver.pmu_allocation_get_fb_size =
				pmu_allocation_get_fb_size_v3;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
				get_pmu_init_msg_pmu_queue_params_v3;
		g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
				get_pmu_msg_pmu_init_msg_ptr_v3;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
				get_pmu_init_msg_pmu_sw_mg_off_v3;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
				get_pmu_init_msg_pmu_sw_mg_size_v3;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
			get_pmu_perfmon_cmd_start_size_v3;
		g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
			get_perfmon_cmd_start_offsetofvar_v3;
		g->ops.pmu_ver.perfmon_start_set_cmd_type =
			perfmon_start_set_cmd_type_v3;
		g->ops.pmu_ver.perfmon_start_set_group_id =
			perfmon_start_set_group_id_v3;
		g->ops.pmu_ver.perfmon_start_set_state_id =
			perfmon_start_set_state_id_v3;
		g->ops.pmu_ver.perfmon_start_set_flags =
			perfmon_start_set_flags_v3;
		g->ops.pmu_ver.perfmon_start_get_flags =
			perfmon_start_get_flags_v3;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
			get_pmu_perfmon_cmd_init_size_v3;
		g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
			get_perfmon_cmd_init_offsetofvar_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
			perfmon_cmd_init_set_sample_buffer_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
			perfmon_cmd_init_set_dec_cnt_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
			perfmon_cmd_init_set_base_cnt_id_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
			perfmon_cmd_init_set_samp_period_us_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
			perfmon_cmd_init_set_num_cnt_v3;
		g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
			perfmon_cmd_init_set_mov_avg_v3;
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
			get_pmu_sequence_in_alloc_ptr_v3;
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
			get_pmu_sequence_out_alloc_ptr_v3;
		g->ops.pmu_ver.boardobj.boardobjgrp_pmucmd_construct_impl =
			boardobjgrp_pmucmd_construct_impl;
		g->ops.pmu_ver.boardobj.boardobjgrp_pmuset_impl =
			boardobjgrp_pmuset_impl;
		g->ops.pmu_ver.boardobj.boardobjgrp_pmugetstatus_impl =
			boardobjgrp_pmugetstatus_impl;
		g->ops.pmu_ver.boardobj.is_boardobjgrp_pmucmd_id_valid =
			is_boardobjgrp_pmucmd_id_valid_v0;
		g->ops.pmu_ver.volt.volt_set_voltage =
			nvgpu_volt_set_voltage_gp10x;
		g->ops.pmu_ver.volt.volt_get_voltage =
			nvgpu_volt_rail_get_voltage_gp10x;
		g->ops.pmu_ver.volt.volt_send_load_cmd_to_pmu =
			nvgpu_volt_send_load_cmd_to_pmu_gp10x;
		g->ops.pmu_ver.clk.get_vbios_clk_domain =
			nvgpu_clk_get_vbios_clk_domain_gp10x;
		g->ops.pmu_ver.clk.clk_avfs_get_vin_cal_data =
				clk_avfs_get_vin_cal_fuse_v10;
		g->ops.pmu_ver.clk.clk_vf_change_inject_data_fill =
				nvgpu_clk_vf_change_inject_data_fill_gp10x;
		break;
	case APP_VERSION_GM20B:
		g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
				pg_cmd_eng_buf_load_size_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
				pg_cmd_eng_buf_load_set_cmd_type_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
				pg_cmd_eng_buf_load_set_engine_id_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
				pg_cmd_eng_buf_load_set_buf_idx_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
				pg_cmd_eng_buf_load_set_pad_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
				pg_cmd_eng_buf_load_set_buf_size_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
				pg_cmd_eng_buf_load_set_dma_base_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
				pg_cmd_eng_buf_load_set_dma_offset_v0;
		g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
				pg_cmd_eng_buf_load_set_dma_idx_v0;
		g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
		g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
		g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
		g->ops.pmu_ver.set_perfmon_cntr_valid =
			set_perfmon_cntr_valid_v2;
		g->ops.pmu_ver.set_perfmon_cntr_index =
			set_perfmon_cntr_index_v2;
		g->ops.pmu_ver.set_perfmon_cntr_group_id =
			set_perfmon_cntr_group_id_v2;
		g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
		g->pmu_ver_cmd_id_zbc_table_update = 16;
		__nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
		g->ops.pmu_ver.get_pmu_cmdline_args_size =
			pmu_cmdline_size_v3;
		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
			set_pmu_cmdline_args_cpufreq_v3;
		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
			set_pmu_cmdline_args_secure_mode_v3;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
			set_pmu_cmdline_args_falctracesize_v3;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
			set_pmu_cmdline_args_falctracedmabase_v3;
		g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
			set_pmu_cmdline_args_falctracedmaidx_v3;
		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
			get_pmu_cmdline_args_ptr_v3;
		g->ops.pmu_ver.get_pmu_allocation_struct_size =
			get_pmu_allocation_size_v1;
		g->ops.pmu_ver.set_pmu_allocation_ptr =
			set_pmu_allocation_ptr_v1;
		g->ops.pmu_ver.pmu_allocation_set_dmem_size =
			pmu_allocation_set_dmem_size_v1;
		g->ops.pmu_ver.pmu_allocation_get_dmem_size =
			pmu_allocation_get_dmem_size_v1;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
			pmu_allocation_get_dmem_offset_v1;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
			pmu_allocation_get_dmem_offset_addr_v1;
		g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
			pmu_allocation_set_dmem_offset_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
			get_pmu_init_msg_pmu_queue_params_v1;
		g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
			get_pmu_msg_pmu_init_msg_ptr_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
			get_pmu_init_msg_pmu_sw_mg_off_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
			get_pmu_init_msg_pmu_sw_mg_size_v1;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
			get_pmu_perfmon_cmd_start_size_v1;
		g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
			get_perfmon_cmd_start_offsetofvar_v1;
		g->ops.pmu_ver.perfmon_start_set_cmd_type =
			perfmon_start_set_cmd_type_v1;
		g->ops.pmu_ver.perfmon_start_set_group_id =
			perfmon_start_set_group_id_v1;
		g->ops.pmu_ver.perfmon_start_set_state_id =
			perfmon_start_set_state_id_v1;
		g->ops.pmu_ver.perfmon_start_set_flags =
			perfmon_start_set_flags_v1;
		g->ops.pmu_ver.perfmon_start_get_flags =
			perfmon_start_get_flags_v1;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
			get_pmu_perfmon_cmd_init_size_v1;
		g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
			get_perfmon_cmd_init_offsetofvar_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
			perfmon_cmd_init_set_sample_buffer_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
			perfmon_cmd_init_set_dec_cnt_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
			perfmon_cmd_init_set_base_cnt_id_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
			perfmon_cmd_init_set_samp_period_us_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
			perfmon_cmd_init_set_num_cnt_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
			perfmon_cmd_init_set_mov_avg_v1;
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
			get_pmu_sequence_in_alloc_ptr_v1;
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
			get_pmu_sequence_out_alloc_ptr_v1;
		break;
	default:
		nvgpu_err(g, "PMU code version not supported version: %d\n",
			pmu->desc->app_version);
		err = -EINVAL;
	}
	pv->set_perfmon_cntr_index(pmu, 3); /* GR & CE2 */
	pv->set_perfmon_cntr_group_id(pmu, PMU_DOMAIN_GROUP_PSTATE);

	return err;
}

static void nvgpu_remove_pmu_support(struct nvgpu_pmu *pmu)
{
	struct gk20a *g = gk20a_from_pmu(pmu);
	struct mm_gk20a *mm = &g->mm;
	struct vm_gk20a *vm = mm->pmu.vm;
	struct boardobj *pboardobj, *pboardobj_tmp;
	struct boardobjgrp *pboardobjgrp, *pboardobjgrp_tmp;

	nvgpu_log_fn(g, " ");

	if (nvgpu_alloc_initialized(&pmu->dmem)) {
		nvgpu_alloc_destroy(&pmu->dmem);
	}

	nvgpu_list_for_each_entry_safe(pboardobjgrp, pboardobjgrp_tmp,
		&g->boardobjgrp_head, boardobjgrp, node) {
		pboardobjgrp->destruct(pboardobjgrp);
	}

	nvgpu_list_for_each_entry_safe(pboardobj, pboardobj_tmp,
			&g->boardobj_head, boardobj, node) {
		pboardobj->destruct(pboardobj);
	}

	if (pmu->fw) {
		nvgpu_release_firmware(g, pmu->fw);
	}

	if (g->acr.pmu_fw) {
		nvgpu_release_firmware(g, g->acr.pmu_fw);
	}

	if (g->acr.pmu_desc) {
		nvgpu_release_firmware(g, g->acr.pmu_desc);
	}

	nvgpu_dma_unmap_free(vm, &pmu->seq_buf);

	nvgpu_dma_unmap_free(vm, &pmu->super_surface_buf);

	nvgpu_mutex_destroy(&pmu->elpg_mutex);
	nvgpu_mutex_destroy(&pmu->pg_mutex);
	nvgpu_mutex_destroy(&pmu->isr_mutex);
	nvgpu_mutex_destroy(&pmu->pmu_copy_lock);
	nvgpu_mutex_destroy(&pmu->pmu_seq_lock);
}

int nvgpu_init_pmu_fw_support(struct nvgpu_pmu *pmu)
{
	struct gk20a *g = gk20a_from_pmu(pmu);
	int err = 0;

	nvgpu_log_fn(g, " ");

	err = nvgpu_mutex_init(&pmu->elpg_mutex);
	if (err) {
		return err;
	}

	err = nvgpu_mutex_init(&pmu->pg_mutex);
	if (err) {
		goto fail_elpg;
	}

	err = nvgpu_mutex_init(&pmu->isr_mutex);
	if (err) {
		goto fail_pg;
	}

	err = nvgpu_mutex_init(&pmu->pmu_copy_lock);
	if (err) {
		goto fail_isr;
	}

	err = nvgpu_mutex_init(&pmu->pmu_seq_lock);
	if (err) {
		goto fail_pmu_copy;
	}

	pmu->remove_support = nvgpu_remove_pmu_support;

	err = nvgpu_init_pmu_fw_ver_ops(pmu);
	if (err) {
		goto fail_pmu_seq;
	}

	goto exit;

fail_pmu_seq:
	nvgpu_mutex_destroy(&pmu->pmu_seq_lock);
fail_pmu_copy:
	nvgpu_mutex_destroy(&pmu->pmu_copy_lock);
fail_isr:
	nvgpu_mutex_destroy(&pmu->isr_mutex);
fail_pg:
	nvgpu_mutex_destroy(&pmu->pg_mutex);
fail_elpg:
	nvgpu_mutex_destroy(&pmu->elpg_mutex);
exit:
	return err;
}

int nvgpu_pmu_prepare_ns_ucode_blob(struct gk20a *g)
{
	struct nvgpu_pmu *pmu = &g->pmu;
	int err = 0;
	struct mm_gk20a *mm = &g->mm;
	struct vm_gk20a *vm = mm->pmu.vm;

	nvgpu_log_fn(g, " ");

	if (pmu->fw != NULL) {
		return nvgpu_init_pmu_fw_support(pmu);
	}

	pmu->fw = nvgpu_request_firmware(g, NVGPU_PMU_NS_UCODE_IMAGE, 0);
	if (pmu->fw == NULL) {
		nvgpu_err(g, "failed to load pmu ucode!!");
		return err;
	}

	nvgpu_log_fn(g, "firmware loaded");

	pmu->desc = (struct pmu_ucode_desc *)pmu->fw->data;
	pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
			pmu->desc->descriptor_size);

	err = nvgpu_dma_alloc_map_sys(vm, GK20A_PMU_UCODE_SIZE_MAX,
			&pmu->ucode);
	if (err) {
		goto err_release_fw;
	}

	nvgpu_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image,
			pmu->desc->app_start_offset + pmu->desc->app_size);

	return nvgpu_init_pmu_fw_support(pmu);

 err_release_fw:
	nvgpu_release_firmware(g, pmu->fw);
	pmu->fw = NULL;

	return err;
}

pt">*token, owner); return -EINVAL; } if (--mutex->ref_cnt > 0) return -EBUSY; gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), pwr_pmu_mutex_value_initial_lock_f()); data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); data = set_field(data, pwr_pmu_mutex_id_release_value_m(), pwr_pmu_mutex_id_release_value_f(owner)); gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); gk20a_dbg_pmu("mutex released: id=%d, token=0x%x", mutex->index, *token); return 0; } static int pmu_queue_lock(struct pmu_gk20a *pmu, struct pmu_queue *queue) { int err; if (PMU_IS_MESSAGE_QUEUE(queue->id)) return 0; if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { mutex_lock(&queue->mutex); return 0; } err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock); return err; } static int pmu_queue_unlock(struct pmu_gk20a *pmu, struct pmu_queue *queue) { int err; if (PMU_IS_MESSAGE_QUEUE(queue->id)) return 0; if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { mutex_unlock(&queue->mutex); return 0; } err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock); return err; } /* called by pmu_read_message, no lock */ static bool pmu_queue_is_empty(struct pmu_gk20a *pmu, struct pmu_queue *queue) { u32 head, tail; pmu_queue_head(pmu, queue, &head, QUEUE_GET); if (queue->opened && queue->oflag == OFLAG_READ) tail = queue->position; else pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); return head == tail; } static bool pmu_queue_has_room(struct pmu_gk20a *pmu, struct pmu_queue *queue, u32 size, bool *need_rewind) { u32 head, tail, free; bool rewind = false; size = ALIGN(size, QUEUE_ALIGNMENT); pmu_queue_head(pmu, queue, &head, QUEUE_GET); pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); if (head >= tail) { free = queue->offset + queue->size - head; free -= PMU_CMD_HDR_SIZE; if (size > free) { rewind = true; head = queue->offset; } } if (head < tail) free = tail - head - 1; if (need_rewind) *need_rewind = rewind; return size <= free; } static int pmu_queue_push(struct pmu_gk20a *pmu, struct pmu_queue *queue, void *data, u32 size) { gk20a_dbg_fn(""); if (!queue->opened && queue->oflag == OFLAG_WRITE){ gk20a_err(dev_from_gk20a(pmu->g), "queue not opened for write"); return -EINVAL; } pmu_queue_write(pmu, queue->position, data, size); queue->position += ALIGN(size, QUEUE_ALIGNMENT); return 0; } static int pmu_queue_pop(struct pmu_gk20a *pmu, struct pmu_queue *queue, void *data, u32 size, u32 *bytes_read) { u32 head, tail, used; *bytes_read = 0; if (!queue->opened && queue->oflag == OFLAG_READ){ gk20a_err(dev_from_gk20a(pmu->g), "queue not opened for read"); return -EINVAL; } pmu_queue_head(pmu, queue, &head, QUEUE_GET); tail = queue->position; if (head == tail) return 0; if (head > tail) used = head - tail; else used = queue->offset + queue->size - tail; if (size > used) { gk20a_warn(dev_from_gk20a(pmu->g), "queue size smaller than request read"); size = used; } pmu_queue_read(pmu, tail, data, size); queue->position += ALIGN(size, QUEUE_ALIGNMENT); *bytes_read = size; return 0; } static void pmu_queue_rewind(struct pmu_gk20a *pmu, struct pmu_queue *queue) { struct pmu_cmd cmd; gk20a_dbg_fn(""); if (!queue->opened) { gk20a_err(dev_from_gk20a(pmu->g), "queue not opened"); return; } if (queue->oflag == OFLAG_WRITE) { cmd.hdr.unit_id = PMU_UNIT_REWIND; cmd.hdr.size = PMU_CMD_HDR_SIZE; pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); gk20a_dbg_pmu("queue %d rewinded", queue->id); } queue->position = queue->offset; return; } /* open for read and lock the queue */ static int pmu_queue_open_read(struct pmu_gk20a *pmu, struct pmu_queue *queue) { int err; err = pmu_queue_lock(pmu, queue); if (err) return err; if (queue->opened) BUG(); pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); queue->oflag = OFLAG_READ; queue->opened = true; return 0; } /* open for write and lock the queue make sure there's enough free space for the write */ static int pmu_queue_open_write(struct pmu_gk20a *pmu, struct pmu_queue *queue, u32 size) { bool rewind = false; int err; err = pmu_queue_lock(pmu, queue); if (err) return err; if (queue->opened) BUG(); if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { gk20a_err(dev_from_gk20a(pmu->g), "queue full"); return -EAGAIN; } pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); queue->oflag = OFLAG_WRITE; queue->opened = true; if (rewind) pmu_queue_rewind(pmu, queue); return 0; } /* close and unlock the queue */ static int pmu_queue_close(struct pmu_gk20a *pmu, struct pmu_queue *queue, bool commit) { if (!queue->opened) return 0; if (commit) { if (queue->oflag == OFLAG_READ) { pmu_queue_tail(pmu, queue, &queue->position, QUEUE_SET); } else { pmu_queue_head(pmu, queue, &queue->position, QUEUE_SET); } } queue->opened = false; pmu_queue_unlock(pmu, queue); return 0; } void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) { gk20a_dbg_fn(""); gk20a_allocator_destroy(&pmu->dmem); } int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; gk20a_dbg_fn(""); pmu_enable_hw(pmu, true); return 0; } int gk20a_init_pmu_setup_sw(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; struct device *d = dev_from_gk20a(g); int i, err = 0; u8 *ptr; void *ucode_ptr; struct sg_table *sgt_pmu_ucode; struct sg_table *sgt_seq_buf; DEFINE_DMA_ATTRS(attrs); dma_addr_t iova; gk20a_dbg_fn(""); if (pmu->sw_ready) { for (i = 0; i < pmu->mutex_cnt; i++) { pmu->mutex[i].id = i; pmu->mutex[i].index = i; } pmu_seq_init(pmu); gk20a_dbg_fn("skip init"); goto skip_init; } /* no infoRom script from vbios? */ /* TBD: sysmon subtask */ if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON)) pmu->perfmon_sampling_enabled = true; pmu->mutex_cnt = pwr_pmu_mutex__size_1_v(); pmu->mutex = kzalloc(pmu->mutex_cnt * sizeof(struct pmu_mutex), GFP_KERNEL); if (!pmu->mutex) { err = -ENOMEM; goto err; } for (i = 0; i < pmu->mutex_cnt; i++) { pmu->mutex[i].id = i; pmu->mutex[i].index = i; } pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * sizeof(struct pmu_sequence), GFP_KERNEL); if (!pmu->seq) { err = -ENOMEM; goto err_free_mutex; } pmu_seq_init(pmu); if (!g->pmu_fw) { g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE); if (!g->pmu_fw) { gk20a_err(d, "failed to load pmu ucode!!"); err = -ENOENT; goto err_free_seq; } } gk20a_dbg_fn("firmware loaded"); pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data; pmu->ucode_image = (u32 *)((u8 *)pmu->desc + pmu->desc->descriptor_size); INIT_WORK(&pmu->pg_init, pmu_setup_hw); dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, &iova, GFP_KERNEL, &attrs); if (!pmu->ucode.cpuva) { gk20a_err(d, "failed to allocate memory\n"); err = -ENOMEM; goto err_release_fw; } pmu->ucode.iova = iova; pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, &iova, GFP_KERNEL); if (!pmu->seq_buf.cpuva) { gk20a_err(d, "failed to allocate memory\n"); err = -ENOMEM; goto err_free_pmu_ucode; } pmu->seq_buf.iova = iova; err = gk20a_get_sgtable(d, &sgt_pmu_ucode, pmu->ucode.cpuva, pmu->ucode.iova, GK20A_PMU_UCODE_SIZE_MAX); if (err) { gk20a_err(d, "failed to allocate sg table\n"); goto err_free_seq_buf; } pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, GK20A_PMU_UCODE_SIZE_MAX, 0, /* flags */ gk20a_mem_flag_read_only); if (!pmu->ucode.pmu_va) { gk20a_err(d, "failed to map pmu ucode memory!!"); goto err_free_ucode_sgt; } err = gk20a_get_sgtable(d, &sgt_seq_buf, pmu->seq_buf.cpuva, pmu->seq_buf.iova, GK20A_PMU_SEQ_BUF_SIZE); if (err) { gk20a_err(d, "failed to allocate sg table\n"); goto err_unmap_ucode; } pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf, GK20A_PMU_SEQ_BUF_SIZE, 0, /* flags */ gk20a_mem_flag_none); if (!pmu->seq_buf.pmu_va) { gk20a_err(d, "failed to map pmu ucode memory!!"); goto err_free_seq_buf_sgt; } ptr = (u8 *)pmu->seq_buf.cpuva; if (!ptr) { gk20a_err(d, "failed to map cpu ptr for zbc buffer"); goto err_unmap_seq_buf; } /* TBD: remove this if ZBC save/restore is handled by PMU * end an empty ZBC sequence for now */ ptr[0] = 0x16; /* opcode EXIT */ ptr[1] = 0; ptr[2] = 1; ptr[3] = 0; ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0; pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; ucode_ptr = pmu->ucode.cpuva; for (i = 0; i < (pmu->desc->app_start_offset + pmu->desc->app_size) >> 2; i++) gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); gk20a_free_sgtable(&sgt_pmu_ucode); gk20a_free_sgtable(&sgt_seq_buf); pmu->sw_ready = true; skip_init: mutex_init(&pmu->elpg_mutex); mutex_init(&pmu->isr_mutex); mutex_init(&pmu->pmu_copy_lock); mutex_init(&pmu->pmu_seq_lock); pmu->perfmon_counter.index = 3; /* GR & CE2 */ pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE; pmu->remove_support = gk20a_remove_pmu_support; err = gk20a_init_pmu(pmu); if (err) { gk20a_err(d, "failed to set function pointers\n"); return err; } gk20a_dbg_fn("done"); return 0; err_unmap_seq_buf: gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va, GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none); err_free_seq_buf_sgt: gk20a_free_sgtable(&sgt_seq_buf); err_unmap_ucode: gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va, GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none); err_free_ucode_sgt: gk20a_free_sgtable(&sgt_pmu_ucode); err_free_seq_buf: dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, pmu->seq_buf.cpuva, pmu->seq_buf.iova); pmu->seq_buf.cpuva = NULL; pmu->seq_buf.iova = 0; err_free_pmu_ucode: dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, pmu->ucode.cpuva, pmu->ucode.iova, &attrs); pmu->ucode.cpuva = NULL; pmu->ucode.iova = 0; err_release_fw: release_firmware(g->pmu_fw); err_free_seq: kfree(pmu->seq); err_free_mutex: kfree(pmu->mutex); err: gk20a_dbg_fn("fail"); return err; } static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status); static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat; gk20a_dbg_fn(""); gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); if (status != 0) { gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted"); /* TBD: disable ELPG */ return; } if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) { gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer"); } pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED); schedule_work(&pmu->pg_init); } int gk20a_init_pmu_setup_hw1(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; int err; gk20a_dbg_fn(""); pmu_reset(pmu); /* setup apertures - virtual */ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), pwr_fbif_transcfg_mem_type_virtual_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), pwr_fbif_transcfg_mem_type_virtual_f()); /* setup apertures - physical */ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_local_fb_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_coherent_sysmem_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_noncoherent_sysmem_f()); /* TBD: load pmu ucode */ err = pmu_bootstrap(pmu); if (err) return err; return 0; } static int gk20a_aelpg_init(struct gk20a *g); static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id); static void pmu_setup_hw_load_zbc(struct gk20a *g); static void pmu_setup_hw_enable_elpg(struct gk20a *g); static void pmu_setup_hw(struct work_struct *work) { struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init); struct gk20a *g = pmu->g; switch (pmu->pmu_state) { case PMU_STATE_ELPG_BOOTED: gk20a_dbg_pmu("elpg booted"); gk20a_init_pmu_bind_fecs(g); break; case PMU_STATE_LOADING_PG_BUF: gk20a_dbg_pmu("loaded pg buf"); pmu_setup_hw_load_zbc(g); break; case PMU_STATE_LOADING_ZBC: gk20a_dbg_pmu("loaded zbc"); pmu_setup_hw_enable_elpg(g); break; case PMU_STATE_STARTED: gk20a_dbg_pmu("PMU booted"); break; default: gk20a_dbg_pmu("invalid state"); break; } } int gk20a_init_pmu_bind_fecs(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; struct device *d = dev_from_gk20a(g); struct pmu_cmd cmd; u32 desc; int err; u32 size; struct sg_table *sgt_pg_buf; dma_addr_t iova; gk20a_dbg_fn(""); size = 0; gk20a_gr_wait_initialized(g); err = gr_gk20a_fecs_get_reglist_img_size(g, &size); if (err) { gk20a_err(dev_from_gk20a(g), "fail to query fecs pg buffer size"); return err; } if (!pmu->pg_buf.cpuva) { pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, &iova, GFP_KERNEL); if (!pmu->pg_buf.cpuva) { gk20a_err(d, "failed to allocate memory\n"); return -ENOMEM; } pmu->pg_buf.iova = iova; pmu->pg_buf.size = size; err = gk20a_get_sgtable(d, &sgt_pg_buf, pmu->pg_buf.cpuva, pmu->pg_buf.iova, size); if (err) { gk20a_err(d, "failed to create sg table\n"); goto err_free_pg_buf; } pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_pg_buf, size, 0, /* flags */ gk20a_mem_flag_none); if (!pmu->pg_buf.pmu_va) { gk20a_err(d, "failed to map fecs pg buffer"); err = -ENOMEM; goto err_free_sgtable; } gk20a_free_sgtable(&sgt_pg_buf); } err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); if (err) { gk20a_err(dev_from_gk20a(g), "fail to bind pmu inst to gr"); return err; } err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va); if (err) { gk20a_err(dev_from_gk20a(g), "fail to set pg buffer pmu va"); return err; } memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load); cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD; cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8); cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF); cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; pmu->buf_loaded = false; gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); pmu->pmu_state = PMU_STATE_LOADING_PG_BUF; return err; err_free_sgtable: gk20a_free_sgtable(&sgt_pg_buf); err_free_pg_buf: dma_free_coherent(d, size, pmu->pg_buf.cpuva, pmu->pg_buf.iova); pmu->pg_buf.cpuva = NULL; pmu->pg_buf.iova = 0; return err; } static void pmu_setup_hw_load_zbc(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 desc; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load); cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD; cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8); cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF); cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; pmu->buf_loaded = false; gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); pmu->pmu_state = PMU_STATE_LOADING_ZBC; } static void pmu_setup_hw_enable_elpg(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; /* * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to * 7. This prevents PMU stalling on Host register accesses. Once the * cause for this hang is discovered and fixed, this WAR should be * removed. */ gk20a_writel(g, 0x10a164, 0x109ff); pmu->initialized = true; pmu->pmu_state = PMU_STATE_STARTED; pmu->zbc_ready = true; /* Save zbc table after PMU is initialized. */ gr_gk20a_pmu_save_zbc(g, 0xf); if (g->elpg_enabled) gk20a_pmu_enable_elpg(g); udelay(50); /* Enable AELPG */ if (g->aelpg_enabled) { gk20a_aelpg_init(g); gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); } } int gk20a_init_pmu_support(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; u32 err; gk20a_dbg_fn(""); if (pmu->initialized) return 0; pmu->g = g; err = gk20a_init_pmu_reset_enable_hw(g); if (err) return err; if (support_gk20a_pmu()) { err = gk20a_init_pmu_setup_sw(g); if (err) return err; err = gk20a_init_pmu_setup_hw1(g); if (err) return err; } return err; } static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg; gk20a_dbg_fn(""); if (status != 0) { gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); /* TBD: disable ELPG */ return; } switch (elpg_msg->msg) { case PMU_PG_ELPG_MSG_INIT_ACK: gk20a_dbg_pmu("INIT_PG is acknowledged from PMU"); break; case PMU_PG_ELPG_MSG_ALLOW_ACK: gk20a_dbg_pmu("ALLOW is acknowledged from PMU"); pmu->elpg_stat = PMU_ELPG_STAT_ON; break; case PMU_PG_ELPG_MSG_DISALLOW_ACK: gk20a_dbg_pmu("DISALLOW is acknowledged from PMU"); pmu->elpg_stat = PMU_ELPG_STAT_OFF; if (pmu->pmu_state == PMU_STATE_STARTING) pmu->pmu_state = PMU_STATE_ELPG_BOOTED; schedule_work(&pmu->pg_init); break; default: gk20a_err(dev_from_gk20a(g), "unsupported ELPG message : 0x%04x", elpg_msg->msg); } return; } static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; gk20a_dbg_fn(""); if (status != 0) { gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); /* TBD: disable ELPG */ return; } switch (msg->msg.pg.stat.sub_msg_id) { case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET: gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU"); pmu->stat_dmem_offset = msg->msg.pg.stat.data; break; default: break; } } static int pmu_init_powergating(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; struct pmu_cmd cmd; u32 seq; gk20a_dbg_fn(""); if (tegra_cpu_is_asim()) { /* TBD: calculate threshold for silicon */ gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), PMU_PG_IDLE_THRESHOLD_SIM); gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM); } else { /* TBD: calculate threshold for silicon */ gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), PMU_PG_IDLE_THRESHOLD); gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), PMU_PG_POST_POWERUP_IDLE_THRESHOLD); } /* init ELPG */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); /* alloc dmem for powergating state log */ pmu->stat_dmem_offset = 0; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat); cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT; cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM; cmd.cmd.pg.stat.data = 0; gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_stat_msg, pmu, &seq, ~0); /* disallow ELPG initially PMU ucode requires a disallow cmd before allow cmd */ pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); /* start with elpg disabled until first enable call */ pmu->elpg_refcnt = 0; pmu->pmu_state = PMU_STATE_STARTING; return 0; } static int pmu_init_perfmon(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_cmd cmd; struct pmu_payload payload; u32 seq; u32 data; int err = 0; gk20a_dbg_fn(""); pmu->perfmon_ready = 0; /* use counter #3 for GR && CE2 busy cycles */ gk20a_writel(g, pwr_pmu_idle_mask_r(3), pwr_pmu_idle_mask_gr_enabled_f() | pwr_pmu_idle_mask_ce_2_enabled_f()); /* disable idle filtering for counters 3 and 6 */ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_busy_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); /* use counter #6 for total cycles */ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); /* * We don't want to disturb counters #3 and #6, which are used by * perfmon, so we add wiring also to counters #1 and #2 for * exposing raw counter readings. */ gk20a_writel(g, pwr_pmu_idle_mask_r(1), pwr_pmu_idle_mask_gr_enabled_f() | pwr_pmu_idle_mask_ce_2_enabled_f()); data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_busy_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); if (!pmu->sample_buffer) err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16)); if (err) { gk20a_err(dev_from_gk20a(g), "failed to allocate perfmon sample buffer"); return -ENOMEM; } /* init PERFMON */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PERFMON; cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; /* buffer to save counter values for pmu perfmon */ pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, (u16)pmu->sample_buffer); /* number of sample periods below lower threshold before pmu triggers perfmon decrease event TBD: = 15 */ pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); /* index of base counter, aka. always ticking counter */ pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); /* microseconds interval between pmu polls perf counters */ pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); /* number of perfmon counters counter #3 (GR and CE2) for gk20a */ pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); /* moving average window for sample periods TBD: = 3000000 / sample_period_us = 17 */ pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); memset(&payload, 0, sizeof(struct pmu_payload)); payload.in.buf = &pmu->perfmon_counter; payload.in.size = sizeof(struct pmu_perfmon_counter); payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT"); gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, NULL, NULL, &seq, ~0); return 0; } static int pmu_process_init_msg(struct pmu_gk20a *pmu, struct pmu_msg *msg) { struct gk20a *g = pmu->g; struct pmu_v *pv = &g->ops.pmu_ver; union pmu_init_msg_pmu *init; struct pmu_sha1_gid_data gid_data; u32 i, tail = 0; tail = pwr_pmu_msgq_tail_val_v( gk20a_readl(g, pwr_pmu_msgq_tail_r())); pmu_copy_from_dmem(pmu, tail, (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); if (msg->hdr.unit_id != PMU_UNIT_INIT) { gk20a_err(dev_from_gk20a(g), "expecting init msg"); return -EINVAL; } pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { gk20a_err(dev_from_gk20a(g), "expecting init msg"); return -EINVAL; } tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); gk20a_writel(g, pwr_pmu_msgq_tail_r(), pwr_pmu_msgq_tail_val_f(tail)); init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init)); if (!pmu->gid_info.valid) { pmu_copy_from_dmem(pmu, pv->get_pmu_init_msg_pmu_sw_mg_off(init), (u8 *)&gid_data, sizeof(struct pmu_sha1_gid_data), 0); pmu->gid_info.valid = (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); if (pmu->gid_info.valid) { BUG_ON(sizeof(pmu->gid_info.gid) != sizeof(gid_data.gid)); memcpy(pmu->gid_info.gid, gid_data.gid, sizeof(pmu->gid_info.gid)); } } for (i = 0; i < PMU_QUEUE_COUNT; i++) pmu_queue_init(pmu, i, init); if (!pmu->dmem.alloc) gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", pv->get_pmu_init_msg_pmu_sw_mg_off(init), pv->get_pmu_init_msg_pmu_sw_mg_size(init), PMU_DMEM_ALLOC_ALIGNMENT); pmu->pmu_ready = true; return 0; } static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue, struct pmu_msg *msg, int *status) { struct gk20a *g = pmu->g; u32 read_size, bytes_read; int err; *status = 0; if (pmu_queue_is_empty(pmu, queue)) return false; err = pmu_queue_open_read(pmu, queue); if (err) { gk20a_err(dev_from_gk20a(g), "fail to open queue %d for read", queue->id); *status = err; return false; } err = pmu_queue_pop(pmu, queue, &msg->hdr, PMU_MSG_HDR_SIZE, &bytes_read); if (err || bytes_read != PMU_MSG_HDR_SIZE) { gk20a_err(dev_from_gk20a(g), "fail to read msg from queue %d", queue->id); *status = err | -EINVAL; goto clean_up; } if (msg->hdr.unit_id == PMU_UNIT_REWIND) { pmu_queue_rewind(pmu, queue); /* read again after rewind */ err = pmu_queue_pop(pmu, queue, &msg->hdr, PMU_MSG_HDR_SIZE, &bytes_read); if (err || bytes_read != PMU_MSG_HDR_SIZE) { gk20a_err(dev_from_gk20a(g), "fail to read msg from queue %d", queue->id); *status = err | -EINVAL; goto clean_up; } } if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { gk20a_err(dev_from_gk20a(g), "read invalid unit_id %d from queue %d", msg->hdr.unit_id, queue->id); *status = -EINVAL; goto clean_up; } if (msg->hdr.size > PMU_MSG_HDR_SIZE) { read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; err = pmu_queue_pop(pmu, queue, &msg->msg, read_size, &bytes_read); if (err || bytes_read != read_size) { gk20a_err(dev_from_gk20a(g), "fail to read msg from queue %d", queue->id); *status = err; goto clean_up; } } err = pmu_queue_close(pmu, queue, true); if (err) { gk20a_err(dev_from_gk20a(g), "fail to close queue %d", queue->id); *status = err; return false; } return true; clean_up: err = pmu_queue_close(pmu, queue, false); if (err) gk20a_err(dev_from_gk20a(g), "fail to close queue %d", queue->id); return false; } static int pmu_response_handle(struct pmu_gk20a *pmu, struct pmu_msg *msg) { struct gk20a *g = pmu->g; struct pmu_sequence *seq; struct pmu_v *pv = &g->ops.pmu_ver; int ret = 0; gk20a_dbg_fn(""); seq = &pmu->seq[msg->hdr.seq_id]; if (seq->state != PMU_SEQ_STATE_USED && seq->state != PMU_SEQ_STATE_CANCELLED) { gk20a_err(dev_from_gk20a(g), "msg for an unknown sequence %d", seq->id); return -EINVAL; } if (msg->hdr.unit_id == PMU_UNIT_RC && msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { gk20a_err(dev_from_gk20a(g), "unhandled cmd: seq %d", seq->id); } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { if (seq->msg) { if (seq->msg->hdr.size >= msg->hdr.size) { memcpy(seq->msg, msg, msg->hdr.size); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) { pmu_copy_from_dmem(pmu, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq)), seq->out_payload, pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)), 0); } } else { gk20a_err(dev_from_gk20a(g), "sequence %d msg buffer too small", seq->id); } } } else seq->callback = NULL; if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0) pmu->dmem.free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_in_a_ptr(seq)), pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq))); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) pmu->dmem.free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq)), pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq))); if (seq->callback) seq->callback(g, msg, seq->cb_params, seq->desc, ret); pmu_seq_release(pmu, seq); /* TBD: notify client waiting for available dmem */ gk20a_dbg_fn("done"); return 0; } static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, u32 *var, u32 val); static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE"); pmu->zbc_save_done = 1; } void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) return; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd); cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update; cmd.cmd.zbc.entry_mask = ZBC_MASK(entries); pmu->zbc_save_done = 0; gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_zbc_msg, pmu, &seq, ~0); pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->zbc_save_done, 1); if (!pmu->zbc_save_done) gk20a_err(dev_from_gk20a(g), "ZBC save timeout"); } static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_cmd cmd; struct pmu_payload payload; u32 seq; /* PERFMON Start */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PERFMON; cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, PMU_PERFMON_CMD_ID_START); pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, PMU_DOMAIN_GROUP_PSTATE); pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); pv->perfmon_start_set_flags(&cmd.cmd.perfmon, PMU_PERFMON_FLAG_ENABLE_INCREASE | PMU_PERFMON_FLAG_ENABLE_DECREASE | PMU_PERFMON_FLAG_CLEAR_PREV); memset(&payload, 0, sizeof(struct pmu_payload)); /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ pmu->perfmon_counter.upper_threshold = 3000; /* 30% */ /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ pmu->perfmon_counter.lower_threshold = 1000; /* 10% */ pmu->perfmon_counter.valid = true; payload.in.buf = &pmu->perfmon_counter; payload.in.size = sizeof(pmu->perfmon_counter); payload.in.offset = pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START"); gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, NULL, NULL, &seq, ~0); return 0; } static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; struct pmu_cmd cmd; u32 seq; /* PERFMON Stop */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PERFMON; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, NULL, NULL, &seq, ~0); return 0; } static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu, struct pmu_perfmon_msg *msg) { gk20a_dbg_fn(""); switch (msg->msg_type) { case PMU_PERFMON_MSG_ID_INCREASE_EVENT: gk20a_dbg_pmu("perfmon increase event: " "state_id %d, ground_id %d, pct %d", msg->gen.state_id, msg->gen.group_id, msg->gen.data); (pmu->perfmon_events_cnt)++; break; case PMU_PERFMON_MSG_ID_DECREASE_EVENT: gk20a_dbg_pmu("perfmon decrease event: " "state_id %d, ground_id %d, pct %d", msg->gen.state_id, msg->gen.group_id, msg->gen.data); (pmu->perfmon_events_cnt)++; break; case PMU_PERFMON_MSG_ID_INIT_EVENT: pmu->perfmon_ready = 1; gk20a_dbg_pmu("perfmon init event"); break; default: break; } /* restart sampling */ if (pmu->perfmon_sampling_enabled) return pmu_perfmon_start_sampling(pmu); return 0; } static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg) { int err = 0; gk20a_dbg_fn(""); switch (msg->hdr.unit_id) { case PMU_UNIT_PERFMON: err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon); break; default: break; } return err; } static int pmu_process_message(struct pmu_gk20a *pmu) { struct pmu_msg msg; int status; if (unlikely(!pmu->pmu_ready)) { pmu_process_init_msg(pmu, &msg); pmu_init_powergating(pmu); pmu_init_perfmon(pmu); return 0; } while (pmu_read_message(pmu, &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { gk20a_dbg_pmu("read msg hdr: " "unit_id = 0x%08x, size = 0x%08x, " "ctrl_flags = 0x%08x, seq_id = 0x%08x", msg.hdr.unit_id, msg.hdr.size, msg.hdr.ctrl_flags, msg.hdr.seq_id); msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) { pmu_handle_event(pmu, &msg); } else { pmu_response_handle(pmu, &msg); } } return 0; } static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, u32 *var, u32 val) { struct gk20a *g = pmu->g; unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); unsigned long delay = GR_IDLE_CHECK_DEFAULT; do { if (*var == val) return 0; if (gk20a_readl(g, pwr_falcon_irqstat_r())) gk20a_pmu_isr(g); usleep_range(delay, delay * 2); delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); } while (time_before(jiffies, end_jiffies) || !tegra_platform_is_silicon()); return -ETIMEDOUT; } static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; struct pmu_pg_stats stats; pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx", stats.pg_entry_start_timestamp); gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx", stats.pg_exit_start_timestamp); gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx", stats.pg_ingating_start_timestamp); gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx", stats.pg_ungating_start_timestamp); gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x", stats.pg_avg_entry_time_us); gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x", stats.pg_avg_exit_time_us); gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x", stats.pg_ingating_cnt); gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x", stats.pg_ingating_time_us); gk20a_dbg_pmu("pg_ungating_count : 0x%08x", stats.pg_ungating_count); gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ", stats.pg_ungating_time_us); gk20a_dbg_pmu("pg_gating_cnt : 0x%08x", stats.pg_gating_cnt); gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x", stats.pg_gating_deny_cnt); /* Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset in .nm file, e.g. 0x1000066c. use 0x66c. u32 i, val[20]; pmu_copy_from_dmem(pmu, 0x66c, (u8 *)val, sizeof(val), 0); gk20a_dbg_pmu("elpg log begin"); for (i = 0; i < 20; i++) gk20a_dbg_pmu("0x%08x", val[i]); gk20a_dbg_pmu("elpg log end"); */ gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x", gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0))); gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x", gk20a_readl(g, pwr_pmu_pg_intren_r(0))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(4))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(7))); /* TBD: script can't generate those registers correctly gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x", gk20a_readl(g, pwr_pmu_idle_status_r())); gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x", gk20a_readl(g, pwr_pmu_pg_ctrl_r())); */ } static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; int i; gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d", gk20a_readl(g, pwr_falcon_os_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x", gk20a_readl(g, pwr_falcon_cpuctl_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x", gk20a_readl(g, pwr_falcon_idlestate_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x", gk20a_readl(g, pwr_falcon_mailbox0_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x", gk20a_readl(g, pwr_falcon_mailbox1_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x", gk20a_readl(g, pwr_falcon_irqstat_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x", gk20a_readl(g, pwr_falcon_irqmode_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x", gk20a_readl(g, pwr_falcon_irqmask_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x", gk20a_readl(g, pwr_falcon_irqdest_r())); for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_mailbox_r(i))); for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_debug_r(i))); for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rstat_f() | pwr_pmu_falcon_icd_cmd_idx_f(i)); gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); } i = gk20a_readl(g, pwr_pmu_bar0_error_status_r()); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i); if (i != 0) { gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_addr_r())); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_data_r())); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_timeout_r())); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_ctl_r())); } i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r()); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i); i = gk20a_readl(g, pwr_falcon_exterrstat_r()); gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i); if (pwr_falcon_exterrstat_valid_v(i) == pwr_falcon_exterrstat_valid_true_v()) { gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x", gk20a_readl(g, pwr_falcon_exterraddr_r())); gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x", gk20a_readl(g, mc_enable_r())); } gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x", gk20a_readl(g, pwr_falcon_engctl_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x", gk20a_readl(g, pwr_falcon_curctx_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x", gk20a_readl(g, pwr_falcon_nxtctx_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); for (i = 0; i < 4; i++) { gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); } gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n", pmu->elpg_stat); /* PMU may crash due to FECS crash. Dump FECS status */ gk20a_fecs_dump_falcon_stats(g); } void gk20a_pmu_isr(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_queue *queue; u32 intr, mask; bool recheck = false; gk20a_dbg_fn(""); mutex_lock(&pmu->isr_mutex); mask = gk20a_readl(g, pwr_falcon_irqmask_r()) & gk20a_readl(g, pwr_falcon_irqdest_r()); intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask; gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr); if (!intr) { mutex_unlock(&pmu->isr_mutex); return; } if (intr & pwr_falcon_irqstat_halt_true_f()) { gk20a_err(dev_from_gk20a(g), "pmu halt intr not implemented"); pmu_dump_falcon_stats(pmu); } if (intr & pwr_falcon_irqstat_exterr_true_f()) { gk20a_err(dev_from_gk20a(g), "pmu exterr intr not implemented. Clearing interrupt."); pmu_dump_falcon_stats(pmu); gk20a_writel(g, pwr_falcon_exterrstat_r(), gk20a_readl(g, pwr_falcon_exterrstat_r()) & ~pwr_falcon_exterrstat_valid_m()); } if (intr & pwr_falcon_irqstat_swgen0_true_f()) { pmu_process_message(pmu); recheck = true; } gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); if (recheck) { queue = &pmu->queue[PMU_MESSAGE_QUEUE]; if (!pmu_queue_is_empty(pmu, queue)) gk20a_writel(g, pwr_falcon_irqsset_r(), pwr_falcon_irqsset_swgen0_set_f()); } mutex_unlock(&pmu->isr_mutex); } static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, struct pmu_msg *msg, struct pmu_payload *payload, u32 queue_id) { struct gk20a *g = pmu->g; struct pmu_queue *queue; u32 in_size, out_size; if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) goto invalid_cmd; queue = &pmu->queue[queue_id]; if (cmd->hdr.size < PMU_CMD_HDR_SIZE) goto invalid_cmd; if (cmd->hdr.size > (queue->size >> 1)) goto invalid_cmd; if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) goto invalid_cmd; if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) goto invalid_cmd; if (payload == NULL) return true; if (payload->in.buf == NULL && payload->out.buf == NULL) goto invalid_cmd; if ((payload->in.buf != NULL && payload->in.size == 0) || (payload->out.buf != NULL && payload->out.size == 0)) goto invalid_cmd; in_size = PMU_CMD_HDR_SIZE; if (payload->in.buf) { in_size += payload->in.offset; in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); } out_size = PMU_CMD_HDR_SIZE; if (payload->out.buf) { out_size += payload->out.offset; out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); } if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) goto invalid_cmd; if ((payload->in.offset != 0 && payload->in.buf == NULL) || (payload->out.offset != 0 && payload->out.buf == NULL)) goto invalid_cmd; return true; invalid_cmd: gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n" "queue_id=%d,\n" "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" "payload in=%p, in_size=%d, in_offset=%d,\n" "payload out=%p, out_size=%d, out_offset=%d", queue_id, cmd->hdr.size, cmd->hdr.unit_id, msg, msg?msg->hdr.unit_id:~0, &payload->in, payload->in.size, payload->in.offset, &payload->out, payload->out.size, payload->out.offset); return false; } static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, u32 queue_id, unsigned long timeout) { struct gk20a *g = pmu->g; struct pmu_queue *queue; unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); int err; gk20a_dbg_fn(""); queue = &pmu->queue[queue_id]; do { err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); if (err == -EAGAIN && time_before(jiffies, end_jiffies)) usleep_range(1000, 2000); else break; } while (1); if (err) goto clean_up; pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); err = pmu_queue_close(pmu, queue, true); clean_up: if (err) gk20a_err(dev_from_gk20a(g), "fail to write cmd to queue %d", queue_id); else gk20a_dbg_fn("done"); return err; } int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg, struct pmu_payload *payload, u32 queue_id, pmu_callback callback, void* cb_param, u32 *seq_desc, unsigned long timeout) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_sequence *seq; void *in = NULL, *out = NULL; int err; gk20a_dbg_fn(""); BUG_ON(!cmd); BUG_ON(!seq_desc); BUG_ON(!pmu->pmu_ready); if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) return -EINVAL; err = pmu_seq_acquire(pmu, &seq); if (err) return err; cmd->hdr.seq_id = seq->id; cmd->hdr.ctrl_flags = 0; cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; seq->callback = callback; seq->cb_params = cb_param; seq->msg = msg; seq->out_payload = NULL; seq->desc = pmu->next_seq_desc++; if (payload) seq->out_payload = payload->out.buf; *seq_desc = seq->desc; if (payload && payload->in.offset != 0) { pv->set_pmu_allocation_ptr(pmu, &in, ((u8 *)&cmd->cmd + payload->in.offset)); if (payload->in.buf != payload->out.buf) pv->pmu_allocation_set_dmem_size(pmu, in, (u16)payload->in.size); else pv->pmu_allocation_set_dmem_size(pmu, in, (u16)max(payload->in.size, payload->out.size)); err = pmu->dmem.alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_offset_addr(pmu, in), pv->pmu_allocation_get_dmem_size(pmu, in)); if (err) goto clean_up; pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, in)), payload->in.buf, payload->in.size, 0); pv->pmu_allocation_set_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq), pv->pmu_allocation_get_dmem_size(pmu, in)); pv->pmu_allocation_set_dmem_offset(pmu, pv->get_pmu_seq_in_a_ptr(seq), pv->pmu_allocation_get_dmem_offset(pmu, in)); } if (payload && payload->out.offset != 0) { pv->set_pmu_allocation_ptr(pmu, &out, ((u8 *)&cmd->cmd + payload->out.offset)); pv->pmu_allocation_set_dmem_size(pmu, out, (u16)payload->out.size); if (payload->out.buf != payload->in.buf) { err = pmu->dmem.alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_offset_addr(pmu, out), pv->pmu_allocation_get_dmem_size(pmu, out)); if (err) goto clean_up; } else { BUG_ON(in == NULL); pv->pmu_allocation_set_dmem_offset(pmu, out, pv->pmu_allocation_get_dmem_offset(pmu, in)); } pv->pmu_allocation_set_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq), pv->pmu_allocation_get_dmem_size(pmu, out)); pv->pmu_allocation_set_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq), pv->pmu_allocation_get_dmem_offset(pmu, out)); } seq->state = PMU_SEQ_STATE_USED; err = pmu_write_cmd(pmu, cmd, queue_id, timeout); if (err) seq->state = PMU_SEQ_STATE_PENDING; gk20a_dbg_fn("done"); return 0; clean_up: gk20a_dbg_fn("fail"); if (in) pmu->dmem.free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, in), pv->pmu_allocation_get_dmem_size(pmu, in)); if (out) pmu->dmem.free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, out), pv->pmu_allocation_get_dmem_size(pmu, out)); pmu_seq_release(pmu, seq); return err; } static int gk20a_pmu_enable_elpg_locked(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq, status; gk20a_dbg_fn(""); memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW; /* no need to wait ack for ELPG enable but set pending to sync with follow up ELPG disable */ pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW"); status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); BUG_ON(status != 0); gk20a_dbg_fn("done"); return 0; } int gk20a_pmu_enable_elpg(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct gr_gk20a *gr = &g->gr; int ret = 0; gk20a_dbg_fn(""); mutex_lock(&pmu->elpg_mutex); pmu->elpg_refcnt++; if (pmu->elpg_refcnt <= 0) goto exit_unlock; /* something is not right if we end up in following code path */ if (unlikely(pmu->elpg_refcnt > 1)) { gk20a_warn(dev_from_gk20a(g), "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", __func__, pmu->elpg_refcnt); WARN_ON(1); } /* do NOT enable elpg until golden ctx is created, which is related with the ctx that ELPG save and restore. */ if (unlikely(!gr->ctx_vars.golden_image_initialized)) goto exit_unlock; /* return if ELPG is already on or on_pending or off_on_pending */ if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) goto exit_unlock; ret = gk20a_pmu_enable_elpg_locked(g); exit_unlock: mutex_unlock(&pmu->elpg_mutex); gk20a_dbg_fn("done"); return ret; } int gk20a_pmu_disable_elpg(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; int ret = 0; gk20a_dbg_fn(""); mutex_lock(&pmu->elpg_mutex); pmu->elpg_refcnt--; if (pmu->elpg_refcnt > 0) { gk20a_warn(dev_from_gk20a(g), "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", __func__, pmu->elpg_refcnt); WARN_ON(1); ret = 0; goto exit_unlock; } /* cancel off_on_pending and return */ if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { pmu->elpg_stat = PMU_ELPG_STAT_OFF; ret = 0; goto exit_reschedule; } /* wait if on_pending */ else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) { pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->elpg_stat, PMU_ELPG_STAT_ON); if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { gk20a_err(dev_from_gk20a(g), "ELPG_ALLOW_ACK failed, elpg_stat=%d", pmu->elpg_stat); pmu_dump_elpg_stats(pmu); pmu_dump_falcon_stats(pmu); ret = -EBUSY; goto exit_unlock; } } /* return if ELPG is already off */ else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { ret = 0; goto exit_reschedule; } memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->elpg_stat, PMU_ELPG_STAT_OFF); if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) { gk20a_err(dev_from_gk20a(g), "ELPG_DISALLOW_ACK failed"); pmu_dump_elpg_stats(pmu); pmu_dump_falcon_stats(pmu); ret = -EBUSY; goto exit_unlock; } exit_reschedule: exit_unlock: mutex_unlock(&pmu->elpg_mutex); gk20a_dbg_fn("done"); return ret; } int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) { struct pmu_gk20a *pmu = &g->pmu; int err; gk20a_dbg_fn(""); if (enable) err = pmu_perfmon_start_sampling(pmu); else err = pmu_perfmon_stop_sampling(pmu); return err; } int gk20a_pmu_destroy(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; u32 elpg_ingating_time, elpg_ungating_time, gating_cnt; gk20a_dbg_fn(""); if (!support_gk20a_pmu()) return 0; /* make sure the pending operations are finished before we continue */ cancel_work_sync(&pmu->pg_init); gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time, &elpg_ungating_time, &gating_cnt); gk20a_pmu_disable_elpg(g); pmu->initialized = false; /* update the s/w ELPG residency counters */ g->pg_ingating_time_us += (u64)elpg_ingating_time; g->pg_ungating_time_us += (u64)elpg_ungating_time; g->pg_gating_cnt += gating_cnt; pmu_enable(pmu, false); pmu->pmu_state = PMU_STATE_OFF; pmu->pmu_ready = false; pmu->perfmon_ready = false; pmu->zbc_ready = false; gk20a_dbg_fn("done"); return 0; } int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) { struct pmu_gk20a *pmu = &g->pmu; u16 _load = 0; if (!pmu->perfmon_ready) { *load = 0; return 0; } pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); *load = _load / 10; return 0; } void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles) { if (!g->power_on) { *busy_cycles = 0; *total_cycles = 0; return; } gk20a_busy(g->dev); *busy_cycles = pwr_pmu_idle_count_value_v( gk20a_readl(g, pwr_pmu_idle_count_r(1))); rmb(); *total_cycles = pwr_pmu_idle_count_value_v( gk20a_readl(g, pwr_pmu_idle_count_r(2))); gk20a_idle(g->dev); } void gk20a_pmu_reset_load_counters(struct gk20a *g) { u32 reg_val = pwr_pmu_idle_count_reset_f(1); if (!g->power_on) return; gk20a_busy(g->dev); gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val); wmb(); gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val); gk20a_idle(g->dev); } static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_pg_stats stats; if (!pmu->initialized) { *ingating_time = 0; *ungating_time = 0; *gating_cnt = 0; return 0; } pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); *ingating_time = stats.pg_ingating_time_us; *ungating_time = stats.pg_ungating_time_us; *gating_cnt = stats.pg_gating_cnt; return 0; } /* Send an Adaptive Power (AP) related command to PMU */ static int gk20a_pmu_ap_send_command(struct gk20a *g, union pmu_ap_cmd *p_ap_cmd, bool b_block) { struct pmu_gk20a *pmu = &g->pmu; /* FIXME: where is the PG structure defined?? */ u32 status = 0; struct pmu_cmd cmd; u32 seq; pmu_callback p_callback = NULL; memset(&cmd, 0, sizeof(struct pmu_cmd)); /* Copy common members */ cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd); cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP; cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id; /* Copy other members of command */ switch (p_ap_cmd->cmn.cmd_id) { case PMU_AP_CMD_ID_INIT: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT"); cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us = p_ap_cmd->init.pg_sampling_period_us; p_callback = ap_callback_init_and_enable_ctrl; break; case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL"); cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id = p_ap_cmd->init_and_enable_ctrl.ctrl_id; memcpy( (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params), (void *)&(p_ap_cmd->init_and_enable_ctrl.params), sizeof(struct pmu_ap_ctrl_init_params)); p_callback = ap_callback_init_and_enable_ctrl; break; case PMU_AP_CMD_ID_ENABLE_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL"); cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id = p_ap_cmd->enable_ctrl.ctrl_id; break; case PMU_AP_CMD_ID_DISABLE_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL"); cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id = p_ap_cmd->disable_ctrl.ctrl_id; break; case PMU_AP_CMD_ID_KICK_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL"); cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id = p_ap_cmd->kick_ctrl.ctrl_id; cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count = p_ap_cmd->kick_ctrl.skip_count; break; default: gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n", __func__, p_ap_cmd->cmn.cmd_id); return 0x2f; } status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, p_callback, pmu, &seq, ~0); if (!status) { gk20a_dbg_pmu( "%s: Unable to submit Adaptive Power Command %d\n", __func__, p_ap_cmd->cmn.cmd_id); goto err_return; } /* TODO: Implement blocking calls (b_block) */ err_return: return status; } static void ap_callback_init_and_enable_ctrl( struct gk20a *g, struct pmu_msg *msg, void *param, u32 seq_desc, u32 status) { /* Define p_ap (i.e pointer to pmu_ap structure) */ WARN_ON(!msg); if (!status) { switch (msg->msg.pg.ap_msg.cmn.msg_id) { case PMU_AP_MSG_ID_INIT_ACK: gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT"); break; default: gk20a_dbg_pmu( "%s: Invalid Adaptive Power Message: %x\n", __func__, msg->msg.pg.ap_msg.cmn.msg_id); break; } } } static int gk20a_aelpg_init(struct gk20a *g) { int status = 0; /* Remove reliance on app_ctrl field. */ union pmu_ap_cmd ap_cmd; /* TODO: Check for elpg being ready? */ ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT; ap_cmd.init.pg_sampling_period_us = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; status = gk20a_pmu_ap_send_command(g, &ap_cmd, false); return status; } static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) { int status = 0; union pmu_ap_cmd ap_cmd; /* TODO: Probably check if ELPG is ready? */ ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL; ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id; ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; ap_cmd.init_and_enable_ctrl.params.min_target_saving_us = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; ap_cmd.init_and_enable_ctrl.params.power_break_even_us = APCTRL_POWER_BREAKEVEN_DEFAULT_US; ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; switch (ctrl_id) { case PMU_AP_CTRL_ID_GRAPHICS: break; default: break; } status = gk20a_pmu_ap_send_command(g, &ap_cmd, true); return status; } #if CONFIG_DEBUG_FS static int elpg_residency_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; u32 ingating_time = 0; u32 ungating_time = 0; u32 gating_cnt; u64 total_ingating, total_ungating, residency, divisor, dividend; /* Don't unnecessarily power on the device */ if (g->power_on) { gk20a_busy(g->dev); gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, &ungating_time, &gating_cnt); gk20a_idle(g->dev); } total_ingating = g->pg_ingating_time_us + (u64)ingating_time; total_ungating = g->pg_ungating_time_us + (u64)ungating_time; divisor = total_ingating + total_ungating; /* We compute the residency on a scale of 1000 */ dividend = total_ingating * 1000; if (divisor) residency = div64_u64(dividend, divisor); else residency = 0; seq_printf(s, "Time in ELPG: %llu us\n" "Time out of ELPG: %llu us\n" "ELPG residency ratio: %llu\n", total_ingating, total_ungating, residency); return 0; } static int elpg_residency_open(struct inode *inode, struct file *file) { return single_open(file, elpg_residency_show, inode->i_private); } static const struct file_operations elpg_residency_fops = { .open = elpg_residency_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int elpg_transitions_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; u32 ingating_time, ungating_time, total_gating_cnt; u32 gating_cnt = 0; if (g->power_on) { gk20a_busy(g->dev); gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, &ungating_time, &gating_cnt); gk20a_idle(g->dev); } total_gating_cnt = g->pg_gating_cnt + gating_cnt; seq_printf(s, "%u\n", total_gating_cnt); return 0; } static int elpg_transitions_open(struct inode *inode, struct file *file) { return single_open(file, elpg_transitions_show, inode->i_private); } static const struct file_operations elpg_transitions_fops = { .open = elpg_transitions_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int perfmon_events_enable_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); return 0; } static int perfmon_events_enable_open(struct inode *inode, struct file *file) { return single_open(file, perfmon_events_enable_show, inode->i_private); } static ssize_t perfmon_events_enable_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct seq_file *s = file->private_data; struct gk20a *g = s->private; unsigned long val = 0; char buf[40]; int buf_size; memset(buf, 0, sizeof(buf)); buf_size = min(count, (sizeof(buf)-1)); if (copy_from_user(buf, userbuf, buf_size)) return -EFAULT; if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; /* Don't turn on gk20a unnecessarily */ if (g->power_on) { gk20a_busy(g->dev); if (val && !g->pmu.perfmon_sampling_enabled) { g->pmu.perfmon_sampling_enabled = true; pmu_perfmon_start_sampling(&(g->pmu)); } else if (!val && g->pmu.perfmon_sampling_enabled) { g->pmu.perfmon_sampling_enabled = false; pmu_perfmon_stop_sampling(&(g->pmu)); } gk20a_idle(g->dev); } else { g->pmu.perfmon_sampling_enabled = val ? true : false; } return count; } static const struct file_operations perfmon_events_enable_fops = { .open = perfmon_events_enable_open, .read = seq_read, .write = perfmon_events_enable_write, .llseek = seq_lseek, .release = single_release, }; static int perfmon_events_count_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); return 0; } static int perfmon_events_count_open(struct inode *inode, struct file *file) { return single_open(file, perfmon_events_count_show, inode->i_private); } static const struct file_operations perfmon_events_count_fops = { .open = perfmon_events_count_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; int gk20a_pmu_debugfs_init(struct platform_device *dev) { struct dentry *d; struct gk20a_platform *platform = platform_get_drvdata(dev); struct gk20a *g = get_gk20a(dev); d = debugfs_create_file( "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, &elpg_residency_fops); if (!d) goto err_out; d = debugfs_create_file( "elpg_transitions", S_IRUGO, platform->debugfs, g, &elpg_transitions_fops); if (!d) goto err_out; d = debugfs_create_file( "perfmon_events_enable", S_IRUGO, platform->debugfs, g, &perfmon_events_enable_fops); if (!d) goto err_out; d = debugfs_create_file( "perfmon_events_count", S_IRUGO, platform->debugfs, g, &perfmon_events_count_fops); if (!d) goto err_out; return 0; err_out: pr_err("%s: Failed to make debugfs node\n", __func__); debugfs_remove_recursive(platform->debugfs); return -ENOMEM; } #endif