summaryrefslogblamecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
blob: 68394da536b69986795cb1d1048d930286e067da (plain) (tree)
1
2
3
4
  

                                
                                                                     






















                                                                               
                              
                            



                           


                         
                                       



                                                                            
                                                                          
 




























































































































                                                                                                


























                                                                                  

















































                                                                                       


                     



                                             

                                                                           
                                            
                                                   
 
                                                               
 



                                                                                           
         
                        

 
                                                                          
 
                      


                                                   
 


                                                                      
 
                                                      
                              
                                                        
         
                                

 

                                                                      





                                                                  






                                                                                      
                            
                                                                                       
         


                   
 
                                                     


                                             

                                                           
                                             
                                             



                            


                         
                               


                         
                           



                                                                          
                                
 






                                                                                    
 
 



                                                                                     
 
                                                              
 





                                                                                         
 




                                                                         
 




                                                                                 
 









                                                                                  


                                                                       
                                                                                   



                                                                         
                 
 


                                                                     
                                                                        
 



                                                                  


                                                          










                                                                                    

                                                          
 




                                                                                    


                                                                 




                 


                                                     
                       

                                             
 






                                                                                
                                                                            
                                         
 
                                           




                             


































                                                                            
                                                           

                               


                         

                          



                                                                 
 
                                     
 
                            
                            
                              


                                      

















































































                                                                                

                                                
                                
              
                            


                         
                                                              


                                                                          
                                      
 

                                                                            
 






                                                                             
                                              






                                                                             
                                                                               
                                                                       

                                                                              
                                                           

                                                                       



                                                                         
                 
                                            
 



                                                                                 




                             
                                     










                                                    
                       
                                                                  
                                                                               


                              
                                                    
 









                                                                 

                          
                                            









                                                                          
                                              




                                                                   
                                                            
                                                               

                                                            














                                                                

                                              





                                                                      

                                                
 
                                                 

                                                                    
                                                          



















                                                                     

























                                                                        
                                                





                                                    

                             









                                          
                                       
                                       

                                                                              
                                                       
                                                                  
                                                                 
                                                                         
                                                                             

                                                            
 

                                                                    



                                                                          

                                                                      
 

                                                             


                              
                                                                             




                                                                            
                                        


                           


                                       













                                                                         
                                                        
                                             
                                                                        
                                                     

                                                                     
                                                    
                                                
         
                                        












                                                      



                                                                 
 
                          
                          

                      



                              

                                      





                                                            
                                                                  
 

                                                       
 
                                                              

 
                                             











                                                              
                                                          









                                                            

                                                                                             





                                                            

                                                                                             

































                                                                     

                                                        





                           
                                                                     


                                                         
                               
                        


                                                  



                                                        



                                                      
                                                                           
                                            


                                      





































































































                                                                            
                                                            
 




                                                   

                         















                                                                                    

                                                                 




                                                                         


                                                                               


                                                              

                                                                 
         



                                                                

 



















                                                                                

















                                                               


                                                              
                                                





                                                                              
                                             





                                                                               
                                          
                                                         

                                         



                               







                                                                           
                                                                         


                                                                 
                                                                      







                                     


                       
                                                     







                                                                 
                                                      








                                                            






                                                                 



                                         
 
                                                                   



                                                    






                                                  


                                                          

                                                         





                                              



































                                                                          




                                                                            


                               
                                          
                            
                       
 



                                               
                                      

                                                         








                                                                 

                                                      
 








                                                           

                                             






                                                                      

                                            
                                                              

                                                                            

                                                             

                                                   
                                             
                                                                
                               








                                                                              
 
                                                                         







                                                          
                                                                          


                                                                           
                                                    






                                                                 





                                                                          
                                         
                                 
                                                                    
















                                                                                       
 

                                                                         
                                                                               
                                                          

                                                                           


                                                                         
                                                

                 

                                                             
 


                                                                          
                                                                         
 

                                                              


                                                                                  
                                                               



                                                                             
                                       


                                                                      





                                                               

                                                                
 
                          


                                                                                
 
                                                                   

                                                                
                                               
                                                      
                                


                                                                                         
                                                               





                                                                                
                                        
                                                                                      

                                                                            
                                                                                     




                                                                                              














                                                                   


                       

                                                                         



                                                                     
                                                    
                            


                                                               




                                                               

 

                                                         



                                                               
                                

                

                                                      

                           
                                                                   



                                      
                                                                 
                                                   


                                                                            























                                                                        
                                                                         
 
                       

                        


                                                                                    

                                                                
                                           


                                                                       





                                                                       




                                                                                
                                                                 
                 






                                                                      








                                                                    
                    
                                                                              
              
                                                                     
 
                                            
                                                       
 




                                                                   
         


                                            

 

                                                                     








                                                                    
                    
                                                                           

                                                            
 

                                                             
 
                                                      
         


                                            

 
                                                          
                                                  
                                                       



                                                 
                
                                  


                                  



                                         

                                    
 









                                                                           

                                                                             
                                                           
                                                                 











                                                                             

                                                                                      


                                         
                                                                                               
                                                                       

                                                                                                    
                                                                            
                                                                           
                                                                                 
                                 

                         

         




















                                                                                

 
                                                       

                                                           


                                            
                                

                                                 
                                              

                                               
 
                                                                      
                                                        
                                                                           

                                                          
                 
 
                                                 
                                                              
                
                                                       
                                                               



                 
 

                                                       
 
                      
                    
                            
                     
                                                    
 


                                                                           

                                    



                                                                                












                                                                              

                                                            
                                 
                 
 
                                 
                                                                               
                                                                  

                                                                             
                                        
                                                                                
                                                                              
                                                                 
                                                                          


                                                                                     
                                                                     


                                                                                

                                                             

                                                                        
                 
                      

         


                           
                                

 


























































































                                                                               












                                                                        
                                                             
                                                           


                                                                                                  
                         
         



                                                                       

                                     
 
                             




                                                                        

                 
                          
                                                    


                                                                          



                                                                               
                                                                      

                                                                 
                        
                                       

                                                                            
                 



                                                                           
         
 

                   



                                                         
                                             


                                               
                                                    

























                                                                           

                                                                          











                                                                     
                                       
                                            
                                                                          
                                                
                                   
                                                     
                                   







                                                                                                    





                       















































                                                                               







                                                                    
                                                       
                           
              





                                                                               


                                                                   
                                                    
                                                                                                         

                                                                            
                                                                             




                                                                          




                                                                    

                 



                                                                            





                                                                      

                 










                                                                      





                                                                            











                                                                        







                                                                             
                             


                                                                        


                                                                           
                                                        

                                                                        

                                                                   
                                                    
                                                                             


                                                                   






                                                                          

                                                                    
                                                                

                                                              


                                                            

                 



                       




                                                         
                                                                               

                                                                   

                                                                    






















                                                                               



                                                                  
 
                                                                      
 






                                                                            
 

                                                                  
 

                                                      

                                                     










                                                                       
                                                                  


                                                     

                                                 


               











                                                                   
                                                                      
 
                                          

                                                                 
                    
 

                               
                           
                                                
 
                               













                                                                  
                               
                  
                             


                                                                 

                                                                


                                                                          

                                                           
                                                            
                                                                               
                                                      


                                                            

                                                                        
 

                                                                    
 





                                                                                       
                 

         















































                                                                          
                       







                                                                      




                                                                 
                                                                
            
                                                                      



                   



                                                                        
                      



                         
                                                                          




                                                                         
                       





                                                                      

                                                          
                       

                             

                                                                      
                                                          
                                                                        

                                                    
                                                                                           






                                  




                                                                        

                                                         
                                               
                      









                                                                          
                                                                          

















                                                                                 
                                                  
                                                                 













                                                                             
                                                                               
                                                                  




                                      
                       












                                                                         


                                                           
                       
                             
                             
 

                                                                  
                                                           
                                                                       


                                                    
                                                                                            





                                  
                                  
                                                                          
                                                                  
                                                                                        

                                                            
                                                                                               





                   



                                                                             
                       
 


                                                                                    



                                                                         

                                                                                
         

                    
                                                                            




                                                                           



                                                               

                                                    





                                                                          
 



                                                                  
 
                   

 
                                                                     
 
 


                                                                            

                                   


                                                                                 
            


                                                               
 








                                                                       

 







                                                                             
 



                                                                           
                                                                      

                         
 




























                                                                             



                                                                           


                                  
 
























                                                                           



                                                                              
















                                                                   



                                                                               
                                
                                                             
                                          
                 
                                                 

         








































                                                                           

 



                                                                            
                    


                                                       
                         
                             

                                        
                      

                                                                      



                                                                       
                                               



                                                       



                                                               
                                                              

                                                                  



                                                               


                                                                    





                                       


                                                              
                                                                        
                                               
 
                            



                              
                                                          




                                  
                                                                                
                                                        

                                                         
 









                                                                      
                 
                                                                                 
                                                        




                                                      
                                                                               

                                                                      
                                                                 















                                                                        



                                                                             














                                                                             





                                                                                
                                                                     




                         
                                                              











                                                                                                   









                                                                           
                      

                    

                         



                                               
                                                                          



                                                                           
                       










                                                                      

                                                   

















                                                          
 

                                               
                       









                                                                     






                                                               
















                                                                             
                                                                   








                              










                                                                  




                                                    




                                                                          





                                                                      










































                                                         
                      
 

                                                   



                                               
                                                                                                                            










                                                                               
                                                                           

                                            
                                         

                                                                           
                                                                                   

                                                                                                  




                                      
                                                                       



























                                                                            
                                                                     


























                                                               


                                          
                                                                 
                                                                
                                                        
                                                              
                                                                    
                                                                  
                                                                  
                                                            
                                                                        
                                                                              
                                                              
                                                                            
                                                                   
                                                 
                                                                           
                                                                  


                                                                       
 
/*
 * GK20A Graphics FIFO (gr host)
 *
 * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/scatterlist.h>
#include <trace/events/gk20a.h>
#include <linux/dma-mapping.h>
#include <linux/nvhost.h>

#include "gk20a.h"
#include "debug_gk20a.h"
#include "ctxsw_trace_gk20a.h"
#include "semaphore_gk20a.h"
#include "hw_fifo_gk20a.h"
#include "hw_pbdma_gk20a.h"
#include "hw_ccsr_gk20a.h"
#include "hw_ram_gk20a.h"
#include "hw_top_gk20a.h"
#include "hw_mc_gk20a.h"
#include "hw_gr_gk20a.h"
#define FECS_METHOD_WFI_RESTORE 0x80000

static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
					    u32 hw_chid, bool add,
					    bool wait_for_finish);
static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);

u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
		u32 engine_id[], u32 engine_id_sz,
		u32 engine_enum)
{
	struct fifo_gk20a *f = NULL;
	u32 instance_cnt = 0;
	u32 engine_id_idx;
	u32 active_engine_id = 0;
	struct fifo_engine_info_gk20a *info = NULL;

	if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) {
		f = &g->fifo;
		for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
			active_engine_id = f->active_engines_list[engine_id_idx];
			info = &f->engine_info[active_engine_id];

			if (info->engine_enum == engine_enum) {
				if (instance_cnt < engine_id_sz) {
					engine_id[instance_cnt] = active_engine_id;
					++instance_cnt;
				} else {
					gk20a_dbg_info("warning engine_id table sz is small %d",
							engine_id_sz);
				}
			}
		}
	}
	return instance_cnt;
}

struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id)
{
	struct fifo_gk20a *f = NULL;
	u32 engine_id_idx;
	struct fifo_engine_info_gk20a *info = NULL;

	if (!g)
		return info;

	f = &g->fifo;

	if (engine_id < f->max_engines) {
		for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
			if (engine_id == f->active_engines_list[engine_id_idx]) {
				info = &f->engine_info[engine_id];
				break;
			}
		}
	}

	if (!info)
		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);

	return info;
}

bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id)
{
	struct fifo_gk20a *f = NULL;
	u32 engine_id_idx;
	bool valid = false;

	if (!g)
		return valid;

	f = &g->fifo;

	if (engine_id < f->max_engines) {
		for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
			if (engine_id == f->active_engines_list[engine_id_idx]) {
				valid = true;
				break;
			}
		}
	}

	if (!valid)
		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);

	return valid;
}

u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g)
{
	u32 gr_engine_cnt = 0;
	u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;

	/* Consider 1st available GR engine */
	gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
			1, ENGINE_GR_GK20A);

	if (!gr_engine_cnt) {
		gk20a_err(dev_from_gk20a(g), "No GR engine available on this device!\n");
	}

	return gr_engine_id;
}

u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
{
	u32 reset_mask = 0;
	u32 engine_enum = ENGINE_INVAL_GK20A;
	struct fifo_gk20a *f = NULL;
	u32 engine_id_idx;
	struct fifo_engine_info_gk20a *engine_info;
	u32 active_engine_id = 0;

	if (!g)
		return reset_mask;

	f = &g->fifo;

	for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
		active_engine_id = f->active_engines_list[engine_id_idx];
		engine_info = &f->engine_info[active_engine_id];
		engine_enum = engine_info->engine_enum;

		if ((engine_enum == ENGINE_GRCE_GK20A) ||
			(engine_enum == ENGINE_ASYNC_CE_GK20A))
				reset_mask |= engine_info->reset_mask;
	}

	return reset_mask;
}

u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g)
{
	u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g);
	u32 engine_enum = ENGINE_INVAL_GK20A;
	struct fifo_gk20a *f = NULL;
	u32 engine_id_idx;
	struct fifo_engine_info_gk20a *engine_info;
	u32 active_engine_id = 0;

	if (!g)
		return ce_runlist_id;

	f = &g->fifo;

	for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
		active_engine_id = f->active_engines_list[engine_id_idx];
		engine_info = &f->engine_info[active_engine_id];
		engine_enum = engine_info->engine_enum;

		/* selecet last available ASYNC_CE if available */
		if (engine_enum == ENGINE_ASYNC_CE_GK20A)
			ce_runlist_id = engine_info->runlist_id;
	}

	return ce_runlist_id;
}

u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
{
	u32 gr_engine_cnt = 0;
	u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
	struct fifo_engine_info_gk20a *engine_info;
	u32 gr_runlist_id = ~0;

	/* Consider 1st available GR engine */
	gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
			1, ENGINE_GR_GK20A);

	if (!gr_engine_cnt) {
		gk20a_err(dev_from_gk20a(g),
			"No GR engine available on this device!");
		goto end;
	}

	engine_info = gk20a_fifo_get_engine_info(g, gr_engine_id);

	if (engine_info) {
		gr_runlist_id = engine_info->runlist_id;
	} else {
		gk20a_err(g->dev,
			"gr_engine_id is not in active list/invalid %d", gr_engine_id);
	}

end:
	return gr_runlist_id;
}

bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
{
	struct fifo_gk20a *f = NULL;
	u32 engine_id_idx;
	u32 active_engine_id;
	struct fifo_engine_info_gk20a *engine_info;

	if (!g)
		return false;

	f = &g->fifo;

	for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
		active_engine_id = f->active_engines_list[engine_id_idx];
		engine_info = gk20a_fifo_get_engine_info(g, active_engine_id);
		if (engine_info && (engine_info->runlist_id == runlist_id)) {
			return true;
		}
	}

	return false;
}

/*
 * Link engine IDs to MMU IDs and vice versa.
 */

static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
{
	u32 fault_id = FIFO_INVAL_ENGINE_ID;
	struct fifo_engine_info_gk20a *engine_info;

	engine_info = gk20a_fifo_get_engine_info(g, engine_id);

	if (engine_info) {
		fault_id = engine_info->fault_id;
	} else {
		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);
	}
	return fault_id;
}

static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
{
	u32 engine_id;
	u32 active_engine_id;
	struct fifo_engine_info_gk20a *engine_info;
	struct fifo_gk20a *f = &g->fifo;

	for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
		active_engine_id = f->active_engines_list[engine_id];
		engine_info = &g->fifo.engine_info[active_engine_id];

		if (engine_info->fault_id == fault_id)
			break;
		active_engine_id = FIFO_INVAL_ENGINE_ID;
	}
	return active_engine_id;
}

int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
					u32 *inst_id)
{
	int ret = ENGINE_INVAL_GK20A;

	gk20a_dbg_info("engine type %d", engine_type);
	if (engine_type == top_device_info_type_enum_graphics_v())
		ret = ENGINE_GR_GK20A;
	else if ((engine_type >= top_device_info_type_enum_copy0_v()) &&
		(engine_type <= top_device_info_type_enum_copy2_v())) {
		/* Lets consider all the CE engine have separate runlist at this point
		 * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id
		 * comparsion logic with GR runlist_id in init_engine_info() */
			ret = ENGINE_ASYNC_CE_GK20A;
		/* inst_id starts from CE0 to CE2 */
		if (inst_id)
			*inst_id = (engine_type - top_device_info_type_enum_copy0_v());
	}

	return ret;
}

int gk20a_fifo_init_engine_info(struct fifo_gk20a *f)
{
	struct gk20a *g = f->g;
	struct device *d = dev_from_gk20a(g);
	u32 i;
	u32 max_info_entries = top_device_info__size_1_v();
	u32 engine_enum = ENGINE_INVAL_GK20A;
	u32 engine_id = FIFO_INVAL_ENGINE_ID;
	u32 runlist_id = ~0;
	u32 pbdma_id = ~0;
	u32 intr_id = ~0;
	u32 reset_id = ~0;
	u32 inst_id  = 0;
	u32 pri_base = 0;
	u32 fault_id = 0;
	u32 gr_runlist_id = ~0;

	gk20a_dbg_fn("");

	f->num_engines = 0;

	for (i = 0; i < max_info_entries; i++) {
		u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
		u32 entry = top_device_info_entry_v(table_entry);
		u32 runlist_bit;

		if (entry == top_device_info_entry_enum_v()) {
			if (top_device_info_engine_v(table_entry)) {
				engine_id =
					top_device_info_engine_enum_v(table_entry);
				gk20a_dbg_info("info: engine_id %d",
					top_device_info_engine_enum_v(table_entry));
			}


			if (top_device_info_runlist_v(table_entry)) {
				runlist_id =
					top_device_info_runlist_enum_v(table_entry);
				gk20a_dbg_info("gr info: runlist_id %d", runlist_id);

				runlist_bit = BIT(runlist_id);

				for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
					gk20a_dbg_info("gr info: pbdma_map[%d]=%d",
						pbdma_id, f->pbdma_map[pbdma_id]);
					if (f->pbdma_map[pbdma_id] & runlist_bit)
						break;
				}

				if (pbdma_id == f->num_pbdma) {
					gk20a_err(d, "busted pbdma map");
					return -EINVAL;
				}
			}

			if (top_device_info_intr_v(table_entry)) {
				intr_id =
					top_device_info_intr_enum_v(table_entry);
				gk20a_dbg_info("gr info: intr_id %d", intr_id);
			}

			if (top_device_info_reset_v(table_entry)) {
				reset_id =
					top_device_info_reset_enum_v(table_entry);
				gk20a_dbg_info("gr info: reset_id %d",
						reset_id);
			}
		} else if (entry == top_device_info_entry_engine_type_v()) {
			u32 engine_type =
				top_device_info_type_enum_v(table_entry);
			engine_enum =
				g->ops.fifo.engine_enum_from_type(g,
						engine_type, &inst_id);
		} else if (entry == top_device_info_entry_data_v()) {
			/* gk20a doesn't support device_info_data packet parsing */
			if (g->ops.fifo.device_info_data_parse)
				g->ops.fifo.device_info_data_parse(g,
					table_entry, &inst_id, &pri_base,
					&fault_id);
		}

		if (!top_device_info_chain_v(table_entry)) {
			if (engine_enum < ENGINE_INVAL_GK20A) {
				struct fifo_engine_info_gk20a *info =
					&g->fifo.engine_info[engine_id];

				info->intr_mask |= BIT(intr_id);
				info->reset_mask |= BIT(reset_id);
				info->runlist_id = runlist_id;
				info->pbdma_id = pbdma_id;
				info->inst_id  = inst_id;
				info->pri_base = pri_base;

				if (engine_enum == ENGINE_GR_GK20A)
					gr_runlist_id = runlist_id;

				/* GR and GR_COPY shares same runlist_id */
				if ((engine_enum == ENGINE_ASYNC_CE_GK20A) &&
					(gr_runlist_id == runlist_id))
						engine_enum = ENGINE_GRCE_GK20A;

				info->engine_enum = engine_enum;

				if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A))
					fault_id = 0x1b;
				info->fault_id = fault_id;

				/* engine_id starts from 0 to NV_HOST_NUM_ENGINES */
				f->active_engines_list[f->num_engines] = engine_id;

				++f->num_engines;

				engine_enum = ENGINE_INVAL_GK20A;
			}
		}
	}

	return 0;
}

u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
{
	u32 eng_intr_mask = 0;
	unsigned int i;
	u32 active_engine_id = 0;
	u32 engine_enum = ENGINE_INVAL_GK20A;

	for (i = 0; i < g->fifo.num_engines; i++) {
		u32 intr_mask;
		active_engine_id = g->fifo.active_engines_list[i];
		intr_mask = g->fifo.engine_info[active_engine_id].intr_mask;
		engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
		if (((engine_enum == ENGINE_GRCE_GK20A) ||
			(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
			(!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall))
				continue;

		eng_intr_mask |= intr_mask;
	}

	return eng_intr_mask;
}

void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
{
	u32 i;
	u32 runlist_id;
	struct fifo_runlist_info_gk20a *runlist;
	struct gk20a *g = NULL;

	if (!f || !f->runlist_info)
		return;

	g = f->g;

	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
		runlist = &f->runlist_info[runlist_id];
		for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
			gk20a_gmmu_free(g, &runlist->mem[i]);
		}

		kfree(runlist->active_channels);
		runlist->active_channels = NULL;

		kfree(runlist->active_tsgs);
		runlist->active_tsgs = NULL;

		mutex_destroy(&runlist->mutex);

	}
	memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
		f->max_runlists));

	kfree(f->runlist_info);
	f->runlist_info = NULL;
	f->max_runlists = 0;
}

static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
{
	struct gk20a *g = f->g;

	gk20a_dbg_fn("");

	vfree(f->channel);
	vfree(f->tsg);
	if (g->ops.mm.is_bar1_supported(g))
		gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
	else
		gk20a_gmmu_free(g, &f->userd);

	gk20a_fifo_delete_runlist(f);

	kfree(f->pbdma_map);
	f->pbdma_map = NULL;
	kfree(f->engine_info);
	f->engine_info = NULL;
	kfree(f->active_engines_list);
	f->active_engines_list = NULL;
}

/* reads info from hardware and fills in pbmda exception info record */
static inline void get_exception_pbdma_info(
	struct gk20a *g,
	struct fifo_engine_info_gk20a *eng_info)
{
	struct fifo_pbdma_exception_info_gk20a *e =
		&eng_info->pbdma_exception_info;

	u32 pbdma_status_r = e->status_r = gk20a_readl(g,
		   fifo_pbdma_status_r(eng_info->pbdma_id));
	e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
	e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
		fifo_pbdma_status_id_type_chid_v();
	e->chan_status_v  = fifo_pbdma_status_chan_status_v(pbdma_status_r);
	e->next_id_is_chid =
		fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
		fifo_pbdma_status_next_id_type_chid_v();
	e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
	e->chsw_in_progress =
		fifo_pbdma_status_chsw_v(pbdma_status_r) ==
		fifo_pbdma_status_chsw_in_progress_v();
}

static void fifo_pbdma_exception_status(struct gk20a *g,
	struct fifo_engine_info_gk20a *eng_info)
{
	struct fifo_pbdma_exception_info_gk20a *e;
	get_exception_pbdma_info(g, eng_info);
	e = &eng_info->pbdma_exception_info;

	gk20a_dbg_fn("pbdma_id %d, "
		      "id_type %s, id %d, chan_status %d, "
		      "next_id_type %s, next_id %d, "
		      "chsw_in_progress %d",
		      eng_info->pbdma_id,
		      e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
		      e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
		      e->chsw_in_progress);
}

/* reads info from hardware and fills in pbmda exception info record */
static inline void get_exception_engine_info(
	struct gk20a *g,
	struct fifo_engine_info_gk20a *eng_info)
{
	struct fifo_engine_exception_info_gk20a *e =
		&eng_info->engine_exception_info;
	u32 engine_status_r = e->status_r =
		gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
	e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
	e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
		fifo_engine_status_id_type_chid_v();
	e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
	e->faulted =
		fifo_engine_status_faulted_v(engine_status_r) ==
		fifo_engine_status_faulted_true_v();
	e->idle =
		fifo_engine_status_engine_v(engine_status_r) ==
		fifo_engine_status_engine_idle_v();
	e->ctxsw_in_progress =
		fifo_engine_status_ctxsw_v(engine_status_r) ==
		fifo_engine_status_ctxsw_in_progress_v();
}

static void fifo_engine_exception_status(struct gk20a *g,
			       struct fifo_engine_info_gk20a *eng_info)
{
	struct fifo_engine_exception_info_gk20a *e;
	get_exception_engine_info(g, eng_info);
	e = &eng_info->engine_exception_info;

	gk20a_dbg_fn("engine_id %d, id_type %s, id %d, ctx_status %d, "
		      "faulted %d, idle %d, ctxsw_in_progress %d, ",
		      eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
		      e->id, e->ctx_status_v,
		      e->faulted, e->idle,  e->ctxsw_in_progress);
}

static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
{
	struct fifo_runlist_info_gk20a *runlist;
	struct device *d = dev_from_gk20a(g);
	unsigned int runlist_id;
	u32 i;
	size_t runlist_size;

	gk20a_dbg_fn("");

	f->max_runlists = g->ops.fifo.eng_runlist_base_size();
	f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
				  f->max_runlists, GFP_KERNEL);
	if (!f->runlist_info)
		goto clean_up_runlist;

	memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
		f->max_runlists));

	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
		runlist = &f->runlist_info[runlist_id];

		runlist->active_channels =
			kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
				GFP_KERNEL);
		if (!runlist->active_channels)
			goto clean_up_runlist;

		runlist->active_tsgs =
			kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
				GFP_KERNEL);
		if (!runlist->active_tsgs)
			goto clean_up_runlist;

		runlist_size  = f->runlist_entry_size * f->num_runlist_entries;
		gk20a_dbg_info("runlist_entries %d runlist size %zu\n",
					f->num_runlist_entries, runlist_size);

		for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
			int err = gk20a_gmmu_alloc_sys(g, runlist_size,
					&runlist->mem[i]);
			if (err) {
				dev_err(d, "memory allocation failed\n");
				goto clean_up_runlist;
			}
		}
		mutex_init(&runlist->mutex);

		/* None of buffers is pinned if this value doesn't change.
		    Otherwise, one of them (cur_buffer) must have been pinned. */
		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
	}

	gk20a_dbg_fn("done");
	return 0;

clean_up_runlist:
	gk20a_fifo_delete_runlist(f);
	gk20a_dbg_fn("fail");
	return -ENOMEM;
}

#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000

int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
{
	u32 intr_stall;
	u32 mask;
	u32 timeout;
	unsigned int i;
	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
	u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);

	gk20a_dbg_fn("");
	/* enable pmc pfifo */
	gk20a_reset(g, mc_enable_pfifo_enabled_f());

	if (g->ops.clock_gating.slcg_ce2_load_gating_prod)
		g->ops.clock_gating.slcg_ce2_load_gating_prod(g,
				g->slcg_enabled);
	if (g->ops.clock_gating.slcg_fifo_load_gating_prod)
		g->ops.clock_gating.slcg_fifo_load_gating_prod(g,
				g->slcg_enabled);
	if (g->ops.clock_gating.blcg_fifo_load_gating_prod)
		g->ops.clock_gating.blcg_fifo_load_gating_prod(g,
				g->blcg_enabled);

	/* enable pbdma */
	mask = 0;
	for (i = 0; i < host_num_pbdma; ++i)
		mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i);
	gk20a_writel(g, mc_enable_pb_r(), mask);

	/* enable pfifo interrupt */
	gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
	gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF);
	gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);

	/* enable pbdma interrupt */
	mask = 0;
	for (i = 0; i < host_num_pbdma; i++) {
		intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
		intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
		gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
		gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
		gk20a_writel(g, pbdma_intr_en_0_r(i),
			~pbdma_intr_en_0_lbreq_enabled_f());
		gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
		gk20a_writel(g, pbdma_intr_en_1_r(i),
			~pbdma_intr_en_0_lbreq_enabled_f());
	}

	/* TBD: apply overrides */

	/* TBD: BLCG prod */

	/* reset runlist interrupts */
	gk20a_writel(g, fifo_intr_runlist_r(), ~0);

	/* TBD: do we need those? */
	timeout = gk20a_readl(g, fifo_fb_timeout_r());
	timeout = set_field(timeout, fifo_fb_timeout_period_m(),
			fifo_fb_timeout_period_max_f());
	gk20a_writel(g, fifo_fb_timeout_r(), timeout);

	/* write pbdma timeout value */
	for (i = 0; i < host_num_pbdma; i++) {
		timeout = gk20a_readl(g, pbdma_timeout_r(i));
		timeout = set_field(timeout, pbdma_timeout_period_m(),
				    pbdma_timeout_period_max_f());
		gk20a_writel(g, pbdma_timeout_r(i), timeout);
	}

	if (g->ops.fifo.apply_pb_timeout)
		g->ops.fifo.apply_pb_timeout(g);

	timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
	timeout = scale_ptimer(timeout,
		ptimer_scalingfactor10x(platform->ptimer_src_freq));
	timeout |= fifo_eng_timeout_detection_enabled_f();
	gk20a_writel(g, fifo_eng_timeout_r(), timeout);

	gk20a_dbg_fn("done");

	return 0;
}

static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f)
{
	/* These are all errors which indicate something really wrong
	 * going on in the device. */
	f->intr.pbdma.device_fatal_0 =
		pbdma_intr_0_memreq_pending_f() |
		pbdma_intr_0_memack_timeout_pending_f() |
		pbdma_intr_0_memack_extra_pending_f() |
		pbdma_intr_0_memdat_timeout_pending_f() |
		pbdma_intr_0_memdat_extra_pending_f() |
		pbdma_intr_0_memflush_pending_f() |
		pbdma_intr_0_memop_pending_f() |
		pbdma_intr_0_lbconnect_pending_f() |
		pbdma_intr_0_lback_timeout_pending_f() |
		pbdma_intr_0_lback_extra_pending_f() |
		pbdma_intr_0_lbdat_timeout_pending_f() |
		pbdma_intr_0_lbdat_extra_pending_f() |
		pbdma_intr_0_xbarconnect_pending_f() |
		pbdma_intr_0_pri_pending_f();

	/* These are data parsing, framing errors or others which can be
	 * recovered from with intervention... or just resetting the
	 * channel. */
	f->intr.pbdma.channel_fatal_0 =
		pbdma_intr_0_gpfifo_pending_f() |
		pbdma_intr_0_gpptr_pending_f() |
		pbdma_intr_0_gpentry_pending_f() |
		pbdma_intr_0_gpcrc_pending_f() |
		pbdma_intr_0_pbptr_pending_f() |
		pbdma_intr_0_pbentry_pending_f() |
		pbdma_intr_0_pbcrc_pending_f() |
		pbdma_intr_0_method_pending_f() |
		pbdma_intr_0_methodcrc_pending_f() |
		pbdma_intr_0_pbseg_pending_f() |
		pbdma_intr_0_signature_pending_f();

	/* Can be used for sw-methods, or represents
	 * a recoverable timeout. */
	f->intr.pbdma.restartable_0 =
		pbdma_intr_0_device_pending_f();
}

static int gk20a_init_fifo_setup_sw(struct gk20a *g)
{
	struct fifo_gk20a *f = &g->fifo;
	struct device *d = dev_from_gk20a(g);
	unsigned int chid, i;
	int err = 0;

	gk20a_dbg_fn("");

	if (f->sw_ready) {
		gk20a_dbg_fn("skip init");
		return 0;
	}

	f->g = g;

	mutex_init(&f->intr.isr.mutex);
	mutex_init(&f->gr_reset_mutex);
	gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */

	f->num_channels = g->ops.fifo.get_num_fifos(g);
	f->runlist_entry_size =  g->ops.fifo.runlist_entry_size();
	f->num_runlist_entries = fifo_eng_runlist_length_max_v();
	f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);

	f->userd_entry_size = 1 << ram_userd_base_shift_v();

	f->channel = vzalloc(f->num_channels * sizeof(*f->channel));
	f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg));
	f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map),
				GFP_KERNEL);
	f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
				GFP_KERNEL);
	f->active_engines_list = kzalloc(f->max_engines * sizeof(u32),
				GFP_KERNEL);

	if (!(f->channel && f->pbdma_map && f->engine_info &&
		f->active_engines_list)) {
		err = -ENOMEM;
		goto clean_up;
	}
	memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));

	/* pbdma map needs to be in place before calling engine info init */
	for (i = 0; i < f->num_pbdma; ++i)
		f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));

	g->ops.fifo.init_engine_info(f);

	init_runlist(g, f);

	INIT_LIST_HEAD(&f->free_chs);
	mutex_init(&f->free_chs_mutex);

	if (g->ops.mm.is_bar1_supported(g))
		err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
				   f->userd_entry_size * f->num_channels,
				   &f->userd);

	else
		err = gk20a_gmmu_alloc_sys(g, f->userd_entry_size *
				f->num_channels, &f->userd);
	if (err) {
		dev_err(d, "userd memory allocation failed\n");
		goto clean_up;
	}
	gk20a_dbg(gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);

	for (chid = 0; chid < f->num_channels; chid++) {
		f->channel[chid].userd_iova =
			g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
			+ chid * f->userd_entry_size;
		f->channel[chid].userd_gpu_va =
			f->userd.gpu_va + chid * f->userd_entry_size;
		gk20a_init_channel_support(g, chid);
		gk20a_init_tsg_support(g, chid);
	}
	mutex_init(&f->tsg_inuse_mutex);

	f->remove_support = gk20a_remove_fifo_support;

	f->deferred_reset_pending = false;
	mutex_init(&f->deferred_reset_mutex);

	f->sw_ready = true;

	gk20a_dbg_fn("done");
	return 0;

clean_up:
	gk20a_dbg_fn("fail");
	if (g->ops.mm.is_bar1_supported(g))
		gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
	else
		gk20a_gmmu_free(g, &f->userd);

	vfree(f->channel);
	f->channel = NULL;
	vfree(f->tsg);
	f->tsg = NULL;
	kfree(f->pbdma_map);
	f->pbdma_map = NULL;
	kfree(f->engine_info);
	f->engine_info = NULL;
	kfree(f->active_engines_list);
	f->active_engines_list = NULL;

	return err;
}

static void gk20a_fifo_handle_runlist_event(struct gk20a *g)
{
	u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());

	gk20a_dbg(gpu_dbg_intr, "runlist event %08x\n",
		  runlist_event);

	gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
}

int gk20a_init_fifo_setup_hw(struct gk20a *g)
{
	struct fifo_gk20a *f = &g->fifo;

	gk20a_dbg_fn("");

	/* test write, read through bar1 @ userd region before
	 * turning on the snooping */
	{
		struct fifo_gk20a *f = &g->fifo;
		u32 v, v1 = 0x33, v2 = 0x55;

		u32 bar1_vaddr = f->userd.gpu_va;
		volatile u32 *cpu_vaddr = f->userd.cpu_va;

		gk20a_dbg_info("test bar1 @ vaddr 0x%x",
			   bar1_vaddr);

		v = gk20a_bar1_readl(g, bar1_vaddr);

		*cpu_vaddr = v1;
		smp_mb();

		if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
			gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a: CPU wrote 0x%x, \
				GPU read 0x%x", *cpu_vaddr, gk20a_bar1_readl(g, bar1_vaddr));
			return -EINVAL;
		}

		gk20a_bar1_writel(g, bar1_vaddr, v2);

		if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
			gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a: GPU wrote 0x%x, \
				CPU read 0x%x", gk20a_bar1_readl(g, bar1_vaddr), *cpu_vaddr);
			return -EINVAL;
		}

		/* is it visible to the cpu? */
		if (*cpu_vaddr != v2) {
			gk20a_err(dev_from_gk20a(g),
				"cpu didn't see bar1 write @ %p!",
				cpu_vaddr);
		}

		/* put it back */
		gk20a_bar1_writel(g, bar1_vaddr, v);
	}

	/*XXX all manner of flushes and caching worries, etc */

	/* set the base for the userd region now */
	gk20a_writel(g, fifo_bar1_base_r(),
			fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
			fifo_bar1_base_valid_true_f());

	gk20a_dbg_fn("done");

	return 0;
}

int gk20a_init_fifo_support(struct gk20a *g)
{
	u32 err;

	err = gk20a_init_fifo_setup_sw(g);
	if (err)
		return err;

	if (g->ops.fifo.init_fifo_setup_hw)
		err = g->ops.fifo.init_fifo_setup_hw(g);
	if (err)
		return err;

	return err;
}

/* return with a reference to the channel, caller must put it back */
static struct channel_gk20a *
channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
{
	struct gk20a *g = f->g;
	unsigned int ci;
	if (unlikely(!f->channel))
		return NULL;
	for (ci = 0; ci < f->num_channels; ci++) {
		struct channel_gk20a *ch;
		u64 ch_inst_ptr;

		ch = gk20a_channel_get(&f->channel[ci]);
		/* only alive channels are searched */
		if (!ch)
			continue;

		ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block);
		if (inst_ptr == ch_inst_ptr)
			return ch;

		gk20a_channel_put(ch);
	}
	return NULL;
}

/* fault info/descriptions.
 * tbd: move to setup
 *  */
static const char * const fault_type_descs[] = {
	 "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
	 "pde size",
	 "pte",
	 "va limit viol",
	 "unbound inst",
	 "priv viol",
	 "ro viol",
	 "wo viol",
	 "pitch mask",
	 "work creation",
	 "bad aperture",
	 "compression failure",
	 "bad kind",
	 "region viol",
	 "dual ptes",
	 "poisoned",
};
/* engine descriptions */
static const char * const engine_subid_descs[] = {
	"gpc",
	"hub",
};

static const char * const hub_client_descs[] = {
	"vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
	"host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
	"niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
	"scc nb", "sec", "ssync", "gr copy", "ce2", "xv", "mmu nb",
	"msenc", "d falcon", "sked", "a falcon", "n/a",
};

static const char * const gpc_client_descs[] = {
	"l1 0", "t1 0", "pe 0",
	"l1 1", "t1 1", "pe 1",
	"l1 2", "t1 2", "pe 2",
	"l1 3", "t1 3", "pe 3",
	"rast", "gcc", "gpccs",
	"prop 0", "prop 1", "prop 2", "prop 3",
	"l1 4", "t1 4", "pe 4",
	"l1 5", "t1 5", "pe 5",
	"l1 6", "t1 6", "pe 6",
	"l1 7", "t1 7", "pe 7",
	"gpm",
	"ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3",
	"rgg utlb",
};

/* reads info from hardware and fills in mmu fault info record */
static inline void get_exception_mmu_fault_info(
	struct gk20a *g, u32 engine_id,
	struct fifo_mmu_fault_info_gk20a *f)
{
	u32 fault_info_v;

	gk20a_dbg_fn("engine_id %d", engine_id);

	memset(f, 0, sizeof(*f));

	f->fault_info_v = fault_info_v = gk20a_readl(g,
	     fifo_intr_mmu_fault_info_r(engine_id));
	f->fault_type_v =
		fifo_intr_mmu_fault_info_type_v(fault_info_v);
	f->engine_subid_v =
		fifo_intr_mmu_fault_info_engine_subid_v(fault_info_v);
	f->client_v = fifo_intr_mmu_fault_info_client_v(fault_info_v);

	BUG_ON(f->fault_type_v >= ARRAY_SIZE(fault_type_descs));
	f->fault_type_desc =  fault_type_descs[f->fault_type_v];

	BUG_ON(f->engine_subid_v >= ARRAY_SIZE(engine_subid_descs));
	f->engine_subid_desc = engine_subid_descs[f->engine_subid_v];

	if (f->engine_subid_v ==
	    fifo_intr_mmu_fault_info_engine_subid_hub_v()) {

		BUG_ON(f->client_v >= ARRAY_SIZE(hub_client_descs));
		f->client_desc = hub_client_descs[f->client_v];
	} else if (f->engine_subid_v ==
		   fifo_intr_mmu_fault_info_engine_subid_gpc_v()) {
		BUG_ON(f->client_v >= ARRAY_SIZE(gpc_client_descs));
		f->client_desc = gpc_client_descs[f->client_v];
	} else {
		BUG_ON(1);
	}

	f->fault_hi_v = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(engine_id));
	f->fault_lo_v = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(engine_id));
	/* note:ignoring aperture on gk20a... */
	f->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
		 gk20a_readl(g, fifo_intr_mmu_fault_inst_r(engine_id)));
	/* note: inst_ptr is a 40b phys addr.  */
	f->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
}

void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
{
	struct fifo_gk20a *f = NULL;
	u32 engine_enum = ENGINE_INVAL_GK20A;
	u32 inst_id = 0;
	struct fifo_engine_info_gk20a *engine_info;

	gk20a_dbg_fn("");

	if (!g)
		return;

	f = &g->fifo;

	engine_info = gk20a_fifo_get_engine_info(g, engine_id);

	if (engine_info) {
		engine_enum = engine_info->engine_enum;
		inst_id = engine_info->inst_id;
	}

	if (engine_enum == ENGINE_INVAL_GK20A)
		gk20a_err(dev_from_gk20a(g), "unsupported engine_id %d", engine_id);

	if (engine_enum == ENGINE_GR_GK20A) {
		if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
			gk20a_pmu_disable_elpg(g);
		/* resetting engine will alter read/write index.
		 * need to flush circular buffer before re-enabling FECS.
		 */
		if (g->ops.fecs_trace.reset)
			g->ops.fecs_trace.reset(g);
		/*HALT_PIPELINE method, halt GR engine*/
		if (gr_gk20a_halt_pipe(g))
			gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe");
		/* resetting engine using mc_enable_r() is not
		enough, we do full init sequence */
		gk20a_gr_reset(g);
		if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
			gk20a_pmu_enable_elpg(g);
	}
	if ((engine_enum == ENGINE_GRCE_GK20A) ||
		(engine_enum == ENGINE_ASYNC_CE_GK20A)) {
			gk20a_reset(g, engine_info->reset_mask);
	}
}

static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
{
	u32 intr;

	intr = gk20a_readl(g, fifo_intr_chsw_error_r());
	gk20a_err(dev_from_gk20a(g), "chsw: %08x\n", intr);
	gk20a_fecs_dump_falcon_stats(g);
	gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
}

static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
{
	struct device *dev = dev_from_gk20a(g);
	u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
	gk20a_err(dev, "dropped mmu fault (0x%08x)", fault_id);
}

static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
		struct fifo_mmu_fault_info_gk20a *f, bool fake_fault)
{
	u32 engine_enum = ENGINE_INVAL_GK20A;
	struct fifo_gk20a *fifo = NULL;
	struct fifo_engine_info_gk20a *engine_info;

	if (!g || !f)
		return false;

	fifo = &g->fifo;

	engine_info = gk20a_fifo_get_engine_info(g, engine_id);

	if (engine_info) {
		engine_enum = engine_info->engine_enum;
	}

	if (engine_enum == ENGINE_INVAL_GK20A)
		return false;

	/* channel recovery is only deferred if an sm debugger
	   is attached and has MMU debug mode is enabled */
	if (!gk20a_gr_sm_debugger_attached(g) ||
	    !g->ops.mm.is_debug_mode_enabled(g))
		return false;

	/* if this fault is fake (due to RC recovery), don't defer recovery */
	if (fake_fault)
		return false;

	if (engine_enum != ENGINE_GR_GK20A ||
	    f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v())
		return false;

	return true;
}

/* caller must hold a channel reference */
static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
		struct channel_gk20a *ch)
{
	bool verbose = true;
	if (!ch)
		return verbose;

	if (ch->error_notifier) {
		u32 err = ch->error_notifier->info32;
		if (ch->error_notifier->status == 0xffff) {
			/* If error code is already set, this mmu fault
			 * was triggered as part of recovery from other
			 * error condition.
			 * Don't overwrite error flag. */
			/* Fifo timeout debug spew is controlled by user */
			if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
				verbose = ch->timeout_debug_dump;
		} else {
			gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
		}
	}
	/* mark channel as faulted */
	ch->has_timedout = true;
	wmb();
	/* unblock pending waits */
	wake_up(&ch->semaphore_wq);
	wake_up(&ch->notifier_wq);
	return verbose;
}

bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
		struct channel_gk20a *ch)
{
	gk20a_err(dev_from_gk20a(g),
		"channel %d generated a mmu fault", ch->hw_chid);

	return gk20a_fifo_set_ctx_mmu_error(g, ch);
}

bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
		struct tsg_gk20a *tsg)
{
	bool ret = true;
	struct channel_gk20a *ch = NULL;

	gk20a_err(dev_from_gk20a(g),
		"TSG %d generated a mmu fault", tsg->tsgid);

	mutex_lock(&tsg->ch_list_lock);
	list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
		if (gk20a_channel_get(ch)) {
			if (!gk20a_fifo_set_ctx_mmu_error(g, ch))
				ret = false;
			gk20a_channel_put(ch);
		}
	}
	mutex_unlock(&tsg->ch_list_lock);

	return ret;
}

void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt)
{
	struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
	struct channel_gk20a *ch;

	gk20a_dbg_fn("");

	gk20a_disable_tsg(tsg);

	if (preempt)
		g->ops.fifo.preempt_tsg(g, tsgid);

	mutex_lock(&tsg->ch_list_lock);
	list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
		if (gk20a_channel_get(ch)) {
			ch->has_timedout = true;
			gk20a_channel_abort_clean_up(ch);
			gk20a_channel_put(ch);
		}
	}
	mutex_unlock(&tsg->ch_list_lock);
}

int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
{
	u32 engine_id, engines;

	mutex_lock(&g->dbg_sessions_lock);
	gr_gk20a_disable_ctxsw(g);

	if (!g->fifo.deferred_reset_pending)
		goto clean_up;

	if (gk20a_is_channel_marked_as_tsg(ch))
		engines = gk20a_fifo_engines_on_id(g, ch->tsgid, true);
	else
		engines = gk20a_fifo_engines_on_id(g, ch->hw_chid, false);
	if (!engines)
		goto clean_up;

	/*
	 * If deferred reset is set for an engine, and channel is running
	 * on that engine, reset it
	 */
	for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32) {
		if (BIT(engine_id) & engines)
			gk20a_fifo_reset_engine(g, engine_id);
	}

	g->fifo.deferred_fault_engines = 0;
	g->fifo.deferred_reset_pending = false;

clean_up:
	gr_gk20a_enable_ctxsw(g);
	mutex_unlock(&g->dbg_sessions_lock);

	return 0;
}

static bool gk20a_fifo_handle_mmu_fault(
	struct gk20a *g,
	u32 mmu_fault_engines, /* queried from HW if 0 */
	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
	bool id_is_tsg)
{
	bool fake_fault;
	unsigned long fault_id;
	unsigned long engine_mmu_fault_id;
	bool verbose = true;
	u32 grfifo_ctl;

	gk20a_dbg_fn("");

	g->fifo.deferred_reset_pending = false;

	/* Disable power management */
	if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
		gk20a_pmu_disable_elpg(g);
	if (g->ops.clock_gating.slcg_gr_load_gating_prod)
		g->ops.clock_gating.slcg_gr_load_gating_prod(g,
				false);
	if (g->ops.clock_gating.slcg_perf_load_gating_prod)
		g->ops.clock_gating.slcg_perf_load_gating_prod(g,
				false);
	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
				false);

	gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);

	/* Disable fifo access */
	grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
	grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
	grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);

	gk20a_writel(g, gr_gpfifo_ctl_r(),
		grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
		gr_gpfifo_ctl_semaphore_access_f(0));

	if (mmu_fault_engines) {
		fault_id = mmu_fault_engines;
		fake_fault = true;
	} else {
		fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
		fake_fault = false;
		gk20a_debug_dump(g->dev);
	}


	/* go through all faulted engines */
	for_each_set_bit(engine_mmu_fault_id, &fault_id, 32) {
		/* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
		 * engines. Convert engine_mmu_id to engine_id */
		u32 engine_id = gk20a_mmu_id_to_engine_id(g,
					engine_mmu_fault_id);
		struct fifo_mmu_fault_info_gk20a f;
		struct channel_gk20a *ch = NULL;
		struct tsg_gk20a *tsg = NULL;
		struct channel_gk20a *referenced_channel = NULL;
		bool was_reset;
		/* read and parse engine status */
		u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
		u32 ctx_status = fifo_engine_status_ctx_status_v(status);
		bool ctxsw = (ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_switch_v()
				|| ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_save_v()
				|| ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_load_v());

		get_exception_mmu_fault_info(g, engine_mmu_fault_id, &f);
		trace_gk20a_mmu_fault(f.fault_hi_v,
				      f.fault_lo_v,
				      f.fault_info_v,
				      f.inst_ptr,
				      engine_id,
				      f.engine_subid_desc,
				      f.client_desc,
				      f.fault_type_desc);
		gk20a_err(dev_from_gk20a(g), "%s mmu fault on engine %d, "
			   "engine subid %d (%s), client %d (%s), "
			   "addr 0x%08x:0x%08x, type %d (%s), info 0x%08x,"
			   "inst_ptr 0x%llx\n",
			   fake_fault ? "fake" : "",
			   engine_id,
			   f.engine_subid_v, f.engine_subid_desc,
			   f.client_v, f.client_desc,
			   f.fault_hi_v, f.fault_lo_v,
			   f.fault_type_v, f.fault_type_desc,
			   f.fault_info_v, f.inst_ptr);

		if (ctxsw) {
			gk20a_fecs_dump_falcon_stats(g);
			gk20a_err(dev_from_gk20a(g), "gr_status_r : 0x%x",
					gk20a_readl(g, gr_status_r()));
		}

		/* get the channel/TSG */
		if (fake_fault) {
			/* use next_id if context load is failing */
			u32 id, type;

			if (hw_id == ~(u32)0) {
				id = (ctx_status ==
				      fifo_engine_status_ctx_status_ctxsw_load_v()) ?
					fifo_engine_status_next_id_v(status) :
					fifo_engine_status_id_v(status);
				type = (ctx_status ==
					fifo_engine_status_ctx_status_ctxsw_load_v()) ?
					fifo_engine_status_next_id_type_v(status) :
					fifo_engine_status_id_type_v(status);
			} else {
				id = hw_id;
				type = id_is_tsg ?
					fifo_engine_status_id_type_tsgid_v() :
					fifo_engine_status_id_type_chid_v();
			}

			if (type == fifo_engine_status_id_type_tsgid_v())
				tsg = &g->fifo.tsg[id];
			else if (type == fifo_engine_status_id_type_chid_v()) {
				ch = &g->fifo.channel[id];
				referenced_channel = gk20a_channel_get(ch);
			}
		} else {
			/* read channel based on instruction pointer */
			ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
			referenced_channel = ch;
		}

		if (ch && gk20a_is_channel_marked_as_tsg(ch))
			tsg = &g->fifo.tsg[ch->tsgid];

		/* check if engine reset should be deferred */
		if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g,
				engine_id, &f, fake_fault)) {
			g->fifo.deferred_fault_engines |= BIT(engine_id);

			/* handled during channel free */
			g->fifo.deferred_reset_pending = true;
			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
				   "sm debugger attached,"
				   " deferring channel recovery to channel free");
		} else if (engine_id != FIFO_INVAL_ENGINE_ID) {
			was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
			mutex_lock(&g->fifo.gr_reset_mutex);
			/* if lock is already taken, a reset is taking place
			so no need to repeat */
			if (!was_reset)
				gk20a_fifo_reset_engine(g, engine_id);
			mutex_unlock(&g->fifo.gr_reset_mutex);
		}

		if (ch)
			gk20a_ctxsw_trace_channel_reset(g, ch);
		else if (tsg)
			gk20a_ctxsw_trace_tsg_reset(g, tsg);

		/* disable the channel/TSG from hw and increment
		 * syncpoints */

		if (tsg) {
			if (!g->fifo.deferred_reset_pending)
				verbose =
				       gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);

			gk20a_fifo_abort_tsg(g, tsg->tsgid, false);

			/* put back the ref taken early above */
			if (referenced_channel)
				gk20a_channel_put(ch);
		} else if (ch) {
			if (referenced_channel) {
				if (!g->fifo.deferred_reset_pending)
					verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
				gk20a_channel_abort(ch, false);
				gk20a_channel_put(ch);
			} else {
				gk20a_err(dev_from_gk20a(g),
						"mmu error in freed channel %d",
						ch->hw_chid);
			}
		} else if (f.inst_ptr ==
				gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) {
			gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
		} else if (f.inst_ptr ==
				gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) {
			gk20a_err(dev_from_gk20a(g), "mmu fault from pmu");
		} else
			gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");
	}

	/* clear interrupt */
	gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);

	/* resume scheduler */
	gk20a_writel(g, fifo_error_sched_disable_r(),
		     gk20a_readl(g, fifo_error_sched_disable_r()));

	/* Re-enable fifo access */
	gk20a_writel(g, gr_gpfifo_ctl_r(),
		     gr_gpfifo_ctl_access_enabled_f() |
		     gr_gpfifo_ctl_semaphore_access_enabled_f());

	/* It is safe to enable ELPG again. */
	if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
		gk20a_pmu_enable_elpg(g);
	return verbose;
}

static void gk20a_fifo_get_faulty_id_type(struct gk20a *g, int engine_id,
					  u32 *id, u32 *type)
{
	u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
	u32 ctx_status = fifo_engine_status_ctx_status_v(status);

	/* use next_id if context load is failing */
	*id = (ctx_status ==
		fifo_engine_status_ctx_status_ctxsw_load_v()) ?
		fifo_engine_status_next_id_v(status) :
		fifo_engine_status_id_v(status);

	*type = (ctx_status ==
		fifo_engine_status_ctx_status_ctxsw_load_v()) ?
		fifo_engine_status_next_id_type_v(status) :
		fifo_engine_status_id_type_v(status);
}

static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g,
		unsigned long engine_ids)
{
	unsigned long end_jiffies = jiffies +
		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
	unsigned long delay = GR_IDLE_CHECK_DEFAULT;
	unsigned long engine_id;
	int ret;

	/* trigger faults for all bad engines */
	for_each_set_bit(engine_id, &engine_ids, 32) {
		u32 mmu_id;

		if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
			WARN_ON(true);
			break;
		}

		mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id);
		if (mmu_id != FIFO_INVAL_ENGINE_ID)
			gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id),
				     fifo_trigger_mmu_fault_id_f(mmu_id) |
				     fifo_trigger_mmu_fault_enable_f(1));
	}

	/* Wait for MMU fault to trigger */
	ret = -EBUSY;
	do {
		if (gk20a_readl(g, fifo_intr_0_r()) &
				fifo_intr_0_mmu_fault_pending_f()) {
			ret = 0;
			break;
		}

		usleep_range(delay, delay * 2);
		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
	} while (time_before(jiffies, end_jiffies) ||
			!tegra_platform_is_silicon());

	if (ret)
		gk20a_err(dev_from_gk20a(g), "mmu fault timeout");

	/* release mmu fault trigger */
	for_each_set_bit(engine_id, &engine_ids, 32)
		gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0);
}

static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
{
	unsigned int i;
	u32 engines = 0;

	for (i = 0; i < g->fifo.num_engines; i++) {
		u32 active_engine_id = g->fifo.active_engines_list[i];
		u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
		u32 ctx_status =
			fifo_engine_status_ctx_status_v(status);
		u32 ctx_id = (ctx_status ==
			fifo_engine_status_ctx_status_ctxsw_load_v()) ?
			fifo_engine_status_next_id_v(status) :
			fifo_engine_status_id_v(status);
		u32 type = (ctx_status ==
			fifo_engine_status_ctx_status_ctxsw_load_v()) ?
			fifo_engine_status_next_id_type_v(status) :
			fifo_engine_status_id_type_v(status);
		bool busy = fifo_engine_status_engine_v(status) ==
			fifo_engine_status_engine_busy_v();
		if (busy && ctx_id == id) {
			if ((is_tsg && type ==
					fifo_engine_status_id_type_tsgid_v()) ||
				    (!is_tsg && type ==
					fifo_engine_status_id_type_chid_v()))
				engines |= BIT(active_engine_id);
		}
	}

	return engines;
}

void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
{
	u32 engines;

	/* stop context switching to prevent engine assignments from
	   changing until channel is recovered */
	mutex_lock(&g->dbg_sessions_lock);
	gr_gk20a_disable_ctxsw(g);

	engines = gk20a_fifo_engines_on_id(g, hw_chid, false);

	if (engines)
		gk20a_fifo_recover(g, engines, hw_chid, false, true, verbose);
	else {
		struct channel_gk20a *ch = &g->fifo.channel[hw_chid];

		if (gk20a_channel_get(ch)) {
			gk20a_channel_abort(ch, false);

			if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch))
				gk20a_debug_dump(g->dev);

			gk20a_channel_put(ch);
		}
	}

	gr_gk20a_enable_ctxsw(g);
	mutex_unlock(&g->dbg_sessions_lock);
}

void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
{
	u32 engines;

	/* stop context switching to prevent engine assignments from
	   changing until TSG is recovered */
	mutex_lock(&g->dbg_sessions_lock);
	gr_gk20a_disable_ctxsw(g);

	engines = gk20a_fifo_engines_on_id(g, tsgid, true);

	if (engines)
		gk20a_fifo_recover(g, engines, tsgid, true, true, verbose);
	else {
		struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];

		if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg))
			gk20a_debug_dump(g->dev);

		gk20a_fifo_abort_tsg(g, tsgid, false);
	}

	gr_gk20a_enable_ctxsw(g);
	mutex_unlock(&g->dbg_sessions_lock);
}

void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
			u32 hw_id, bool id_is_tsg,
			bool id_is_known, bool verbose)
{
	unsigned long engine_id, i;
	unsigned long _engine_ids = __engine_ids;
	unsigned long engine_ids = 0;
	u32 val;
	u32 mmu_fault_engines = 0;
	u32 ref_type;
	u32 ref_id;
	u32 ref_id_is_tsg = false;

	if (verbose)
		gk20a_debug_dump(g->dev);

	if (g->ops.ltc.flush)
		g->ops.ltc.flush(g);

	if (id_is_known) {
		engine_ids = gk20a_fifo_engines_on_id(g, hw_id, id_is_tsg);
		ref_id = hw_id;
		ref_type = id_is_tsg ?
			fifo_engine_status_id_type_tsgid_v() :
			fifo_engine_status_id_type_chid_v();
		ref_id_is_tsg = id_is_tsg;
		/* atleast one engine will get passed during sched err*/
		engine_ids |= __engine_ids;
		for_each_set_bit(engine_id, &engine_ids, 32) {
			u32 mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id);

			if (mmu_id != FIFO_INVAL_ENGINE_ID)
				mmu_fault_engines |= BIT(mmu_id);
		}
	} else {
		/* store faulted engines in advance */
		for_each_set_bit(engine_id, &_engine_ids, 32) {
			gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id,
						      &ref_type);
			if (ref_type == fifo_engine_status_id_type_tsgid_v())
				ref_id_is_tsg = true;
			else
				ref_id_is_tsg = false;
			/* Reset *all* engines that use the
			 * same channel as faulty engine */
			for (i = 0; i < g->fifo.num_engines; i++) {
				u32 active_engine_id = g->fifo.active_engines_list[i];
				u32 type;
				u32 id;

				gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type);
				if (ref_type == type && ref_id == id) {
					u32 mmu_id = gk20a_engine_id_to_mmu_id(g, active_engine_id);

					engine_ids |= BIT(active_engine_id);
					if (mmu_id != FIFO_INVAL_ENGINE_ID)
						mmu_fault_engines |= BIT(mmu_id);
				}
			}
		}
	}

	if (mmu_fault_engines) {
		/*
		 * sched error prevents recovery, and ctxsw error will retrigger
		 * every 100ms. Disable the sched error to allow recovery.
		 */
		val = gk20a_readl(g, fifo_intr_en_0_r());
		val &= ~(fifo_intr_en_0_sched_error_m() |
			fifo_intr_en_0_mmu_fault_m());
		gk20a_writel(g, fifo_intr_en_0_r(), val);
		gk20a_writel(g, fifo_intr_0_r(),
				fifo_intr_0_sched_error_reset_f());

		g->ops.fifo.trigger_mmu_fault(g, engine_ids);
		gk20a_fifo_handle_mmu_fault(g, mmu_fault_engines, ref_id,
				ref_id_is_tsg);

		val = gk20a_readl(g, fifo_intr_en_0_r());
		val |= fifo_intr_en_0_mmu_fault_f(1)
			| fifo_intr_en_0_sched_error_f(1);
		gk20a_writel(g, fifo_intr_en_0_r(), val);
	}
}

/* force reset channel and tsg (if it's part of one) */
int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
				u32 err_code, bool verbose)
{
	struct tsg_gk20a *tsg = NULL;
	struct channel_gk20a *ch_tsg = NULL;
	struct gk20a *g = ch->g;

	if (gk20a_is_channel_marked_as_tsg(ch)) {
		tsg = &g->fifo.tsg[ch->tsgid];

		mutex_lock(&tsg->ch_list_lock);

		list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
			if (gk20a_channel_get(ch_tsg)) {
				gk20a_set_error_notifier(ch_tsg, err_code);
				gk20a_channel_put(ch_tsg);
			}
		}

		mutex_unlock(&tsg->ch_list_lock);
		gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
	} else {
		gk20a_set_error_notifier(ch, err_code);
		gk20a_fifo_recover_ch(g, ch->hw_chid, verbose);
	}

	return 0;
}

u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
			int *__id, bool *__is_tsg)
{
	u32 engine_id;
	int id = -1;
	bool is_tsg = false;
	u32 mailbox2;
	u32 active_engine_id = FIFO_INVAL_ENGINE_ID;

	for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
		u32 status;
		u32 ctx_status;
		bool failing_engine;

		active_engine_id = g->fifo.active_engines_list[engine_id];
		status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
		ctx_status = fifo_engine_status_ctx_status_v(status);

		/* we are interested in busy engines */
		failing_engine = fifo_engine_status_engine_v(status) ==
			fifo_engine_status_engine_busy_v();

		/* ..that are doing context switch */
		failing_engine = failing_engine &&
			(ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_switch_v()
			|| ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_save_v()
			|| ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_load_v());

		if (!failing_engine) {
		    active_engine_id = FIFO_INVAL_ENGINE_ID;
			continue;
		}

		if (ctx_status ==
				fifo_engine_status_ctx_status_ctxsw_load_v()) {
			id = fifo_engine_status_next_id_v(status);
			is_tsg = fifo_engine_status_next_id_type_v(status) !=
				fifo_engine_status_next_id_type_chid_v();
		} else if (ctx_status ==
			       fifo_engine_status_ctx_status_ctxsw_switch_v()) {
			mailbox2 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(2));
			if (mailbox2 & FECS_METHOD_WFI_RESTORE) {
				id = fifo_engine_status_next_id_v(status);
				is_tsg = fifo_engine_status_next_id_type_v(status) !=
					fifo_engine_status_next_id_type_chid_v();
			} else {
				id = fifo_engine_status_id_v(status);
				is_tsg = fifo_engine_status_id_type_v(status) !=
					fifo_engine_status_id_type_chid_v();
			}
		} else {
			id = fifo_engine_status_id_v(status);
			is_tsg = fifo_engine_status_id_type_v(status) !=
				fifo_engine_status_id_type_chid_v();
		}
		break;
	}

	*__id = id;
	*__is_tsg = is_tsg;

	return active_engine_id;
}

static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
		bool *verbose, u32 *ms)
{
	bool recover = false;
	bool progress = false;

	if (gk20a_channel_get(ch)) {
		recover = gk20a_channel_update_and_check_timeout(ch,
				GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000,
				&progress);
		*verbose = ch->timeout_debug_dump;
		*ms = ch->timeout_accumulated_ms;
		if (recover)
			gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);

		gk20a_channel_put(ch);
	}
	return recover;
}

static bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
		bool *verbose, u32 *ms)
{
	struct channel_gk20a *ch;
	bool recover = false;
	bool progress = false;

	*verbose = false;
	*ms = GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000;

	mutex_lock(&tsg->ch_list_lock);

	/* check if there was some progress on any of the TSG channels.
	 * fifo recovery is needed if at least one channel reached the
	 * maximum timeout without progress (update in gpfifo pointers).
	 */
	list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
		if (gk20a_channel_get(ch)) {
			recover = gk20a_channel_update_and_check_timeout(ch,
					*ms, &progress);
			if (progress || recover)
				break;
			gk20a_channel_put(ch);
		}
	}

	/* if at least one channel in the TSG made some progress, reset
	 * accumulated timeout for all channels in the TSG. In particular,
	 * this resets timeout for channels that already completed their work
	 */
	if (progress) {
		gk20a_dbg_info("progress on tsg=%d ch=%d",
				tsg->tsgid, ch->hw_chid);
		gk20a_channel_put(ch);
		*ms = GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000;
		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
			if (gk20a_channel_get(ch)) {
				ch->timeout_accumulated_ms = *ms;
				gk20a_channel_put(ch);
			}
		}
	}

	/* if one channel is presumed dead (no progress for too long), then
	 * fifo recovery is needed. we can't really figure out which channel
	 * caused the problem, so set timeout error notifier for all channels.
	 */
	if (recover) {
		gk20a_dbg_info("timeout on tsg=%d ch=%d",
				tsg->tsgid, ch->hw_chid);
		*ms = ch->timeout_accumulated_ms;
		gk20a_channel_put(ch);
		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
			if (gk20a_channel_get(ch)) {
				gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
				*verbose |= ch->timeout_debug_dump;
				gk20a_channel_put(ch);
			}
		}
	}

	/* if we could not detect progress on any of the channel, but none
	 * of them has reached the timeout, there is nothing more to do:
	 * timeout_accumulated_ms has been updated for all of them.
	 */
	mutex_unlock(&tsg->ch_list_lock);
	return recover;
}

static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
{
	u32 sched_error;
	u32 engine_id;
	int id = -1;
	bool is_tsg = false;
	bool ret = false;

	/* read the scheduler error register */
	sched_error = gk20a_readl(g, fifo_intr_sched_error_r());

	engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);

	/* could not find the engine - should never happen */
	if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
		gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, failed to find engine\n",
			sched_error);
		ret = false;
		goto err;
	}

	if (fifo_intr_sched_error_code_f(sched_error) ==
			fifo_intr_sched_error_code_ctxsw_timeout_v()) {
		struct fifo_gk20a *f = &g->fifo;
		u32 ms = 0;
		bool verbose = false;

		if (is_tsg) {
			ret = gk20a_fifo_check_tsg_ctxsw_timeout(
					&f->tsg[id], &verbose, &ms);
		} else {
			ret = gk20a_fifo_check_ch_ctxsw_timeout(
					&f->channel[id], &verbose, &ms);
		}

		if (ret) {
			gk20a_err(dev_from_gk20a(g),
				"fifo sched ctxsw timeout error: "
				"engine=%u, %s=%d, ms=%u",
				engine_id, is_tsg ? "tsg" : "ch", id, ms);
			/*
			 * Cancel all channels' timeout since SCHED error might
			 * trigger multiple watchdogs at a time
			 */
			gk20a_channel_timeout_restart_all_channels(g);
			gk20a_fifo_recover(g, BIT(engine_id), id,
					is_tsg, true, verbose);
		} else {
			gk20a_dbg_info(
				"fifo is waiting for ctx switch for %d ms, "
				"%s=%d", ms, is_tsg ? "tsg" : "ch", id);
		}
	} else {
		gk20a_err(dev_from_gk20a(g),
			"fifo sched error : 0x%08x, engine=%u, %s=%d",
			sched_error, engine_id, is_tsg ? "tsg" : "ch", id);
	}

err:
	return ret;
}

static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
{
	bool print_channel_reset_log = false;
	struct device *dev = dev_from_gk20a(g);
	u32 handled = 0;

	gk20a_dbg_fn("fifo_intr=0x%08x", fifo_intr);

	if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
		/* pio mode is unused.  this shouldn't happen, ever. */
		/* should we clear it or just leave it pending? */
		gk20a_err(dev, "fifo pio error!\n");
		BUG_ON(1);
	}

	if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
		u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
		gk20a_err(dev, "fifo bind error: 0x%08x", bind_error);
		print_channel_reset_log = true;
		handled |= fifo_intr_0_bind_error_pending_f();
	}

	if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
		print_channel_reset_log = gk20a_fifo_handle_sched_error(g);
		handled |= fifo_intr_0_sched_error_pending_f();
	}

	if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
		gk20a_fifo_handle_chsw_fault(g);
		handled |= fifo_intr_0_chsw_error_pending_f();
	}

	if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
		print_channel_reset_log =
			gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false);
		handled |= fifo_intr_0_mmu_fault_pending_f();
	}

	if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
		gk20a_fifo_handle_dropped_mmu_fault(g);
		handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
	}

	print_channel_reset_log = !g->fifo.deferred_reset_pending
			&& print_channel_reset_log;

	if (print_channel_reset_log) {
		unsigned int engine_id;
		gk20a_err(dev_from_gk20a(g),
			   "channel reset initiated from %s; intr=0x%08x",
			   __func__, fifo_intr);
		for (engine_id = 0;
		     engine_id < g->fifo.num_engines;
		     engine_id++) {
				u32 active_engine_id = g->fifo.active_engines_list[engine_id];
				u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
				gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_enum,
					active_engine_id);
				fifo_pbdma_exception_status(g,
						&g->fifo.engine_info[active_engine_id]);
				fifo_engine_exception_status(g,
						&g->fifo.engine_info[active_engine_id]);
		}
	}

	return handled;
}

static inline void gk20a_fifo_reset_pbdma_header(struct gk20a *g, int pbdma_id)
{
	gk20a_writel(g, pbdma_pb_header_r(pbdma_id),
			pbdma_pb_header_first_true_f() |
			pbdma_pb_header_type_non_inc_f());
}

static inline void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
						int pbdma_method_index)
{
	u32 pbdma_method_stride;
	u32 pbdma_method_reg;

	pbdma_method_stride = pbdma_method1_r(pbdma_id) -
				pbdma_method0_r(pbdma_id);

	pbdma_method_reg = pbdma_method0_r(pbdma_id) +
		(pbdma_method_index * pbdma_method_stride);

	gk20a_writel(g, pbdma_method_reg,
			pbdma_method0_valid_true_f() |
			pbdma_method0_first_true_f() |
			pbdma_method0_addr_f(
			     pbdma_udma_nop_r() >> 2));
}

static bool gk20a_fifo_is_sw_method_subch(struct gk20a *g, int pbdma_id,
						int pbdma_method_index)
{
	u32 pbdma_method_stride;
	u32 pbdma_method_reg, pbdma_method_subch;

	pbdma_method_stride = pbdma_method1_r(pbdma_id) -
				pbdma_method0_r(pbdma_id);

	pbdma_method_reg = pbdma_method0_r(pbdma_id) +
			(pbdma_method_index * pbdma_method_stride);

	pbdma_method_subch = pbdma_method0_subch_v(
			gk20a_readl(g, pbdma_method_reg));

	if (pbdma_method_subch == 5 || pbdma_method_subch == 6 ||
				       pbdma_method_subch == 7)
		return true;

	return false;
}

static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
					struct gk20a *g,
					struct fifo_gk20a *f,
					u32 pbdma_id)
{
	u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
	u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
	u32 handled = 0;
	u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR;
	bool reset = false;
	int i;

	gk20a_dbg_fn("");

	gk20a_dbg(gpu_dbg_intr, "pbdma id intr pending %d %08x %08x", pbdma_id,
			pbdma_intr_0, pbdma_intr_1);
	if (pbdma_intr_0) {
		if ((f->intr.pbdma.device_fatal_0 |
		     f->intr.pbdma.channel_fatal_0 |
		     f->intr.pbdma.restartable_0) & pbdma_intr_0) {
			gk20a_err(dev_from_gk20a(g),
				"pbdma_intr_0(%d):0x%08x PBH: %08x SHADOW: %08x M0: %08x %08x %08x %08x",
				pbdma_id, pbdma_intr_0,
				gk20a_readl(g, pbdma_pb_header_r(pbdma_id)),
				gk20a_readl(g, pbdma_hdr_shadow_r(pbdma_id)),
				gk20a_readl(g, pbdma_method0_r(pbdma_id)),
				gk20a_readl(g, pbdma_method1_r(pbdma_id)),
				gk20a_readl(g, pbdma_method2_r(pbdma_id)),
				gk20a_readl(g, pbdma_method3_r(pbdma_id))
				);
			reset = true;
			handled |= ((f->intr.pbdma.device_fatal_0 |
				     f->intr.pbdma.channel_fatal_0 |
				     f->intr.pbdma.restartable_0) &
				    pbdma_intr_0);
		}

		if (pbdma_intr_0 & pbdma_intr_0_acquire_pending_f()) {
			u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id));
			val &= ~pbdma_acquire_timeout_en_enable_f();
			gk20a_writel(g, pbdma_acquire_r(pbdma_id), val);
			if (g->timeouts_enabled) {
				reset = true;
				gk20a_err(dev_from_gk20a(g),
					"semaphore acquire timeout!");
			}
			handled |= pbdma_intr_0_acquire_pending_f();
		}

		if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) {
			gk20a_fifo_reset_pbdma_header(g, pbdma_id);
			gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
			reset = true;
		}

		if (pbdma_intr_0 & pbdma_intr_0_method_pending_f()) {
			gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
			reset = true;
		}

		if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
			error_notifier =
				NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
			reset = true;
		}

		if (pbdma_intr_0 & pbdma_intr_0_device_pending_f()) {
			gk20a_fifo_reset_pbdma_header(g, pbdma_id);

			for (i = 0; i < 4; i++) {
				if (gk20a_fifo_is_sw_method_subch(g,
						pbdma_id, i))
					gk20a_fifo_reset_pbdma_method(g,
							pbdma_id, i);
			}
			reset = true;
		}

		gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
	}

	/* all intrs in _intr_1 are "host copy engine" related,
	 * which gk20a doesn't have. for now just make them channel fatal. */
	if (pbdma_intr_1) {
		dev_err(dev, "channel hce error: pbdma_intr_1(%d): 0x%08x",
			pbdma_id, pbdma_intr_1);
		reset = true;
		gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
	}

	if (reset) {
		/* Remove the channel from runlist */
		u32 status = gk20a_readl(g, fifo_pbdma_status_r(pbdma_id));
		u32 id = fifo_pbdma_status_id_v(status);
		if (fifo_pbdma_status_id_type_v(status)
				== fifo_pbdma_status_id_type_chid_v()) {
			struct channel_gk20a *ch = &f->channel[id];

			if (gk20a_channel_get(ch)) {
				gk20a_set_error_notifier(ch, error_notifier);
				gk20a_fifo_recover_ch(g, id, true);
				gk20a_channel_put(ch);
			}
		} else if (fifo_pbdma_status_id_type_v(status)
				== fifo_pbdma_status_id_type_tsgid_v()) {
			struct tsg_gk20a *tsg = &f->tsg[id];
			struct channel_gk20a *ch = NULL;

			mutex_lock(&tsg->ch_list_lock);
			list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
				if (gk20a_channel_get(ch)) {
					gk20a_set_error_notifier(ch,
						error_notifier);
					gk20a_channel_put(ch);
				}
			}
			mutex_unlock(&tsg->ch_list_lock);
			gk20a_fifo_recover_tsg(g, id, true);
		}
	}

	return handled;
}

static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
{
	struct device *dev = dev_from_gk20a(g);
	struct fifo_gk20a *f = &g->fifo;
	u32 clear_intr = 0, i;
	u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
	u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());

	for (i = 0; i < host_num_pbdma; i++) {
		if (fifo_intr_pbdma_id_status_v(pbdma_pending, i)) {
			gk20a_dbg(gpu_dbg_intr, "pbdma id %d intr pending", i);
			clear_intr |=
				gk20a_fifo_handle_pbdma_intr(dev, g, f, i);
		}
	}
	return fifo_intr_0_pbdma_intr_pending_f();
}

void gk20a_fifo_isr(struct gk20a *g)
{
	u32 error_intr_mask =
		fifo_intr_0_bind_error_pending_f() |
		fifo_intr_0_sched_error_pending_f() |
		fifo_intr_0_chsw_error_pending_f() |
		fifo_intr_0_fb_flush_timeout_pending_f() |
		fifo_intr_0_dropped_mmu_fault_pending_f() |
		fifo_intr_0_mmu_fault_pending_f() |
		fifo_intr_0_lb_error_pending_f() |
		fifo_intr_0_pio_error_pending_f();

	u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
	u32 clear_intr = 0;

	if (g->fifo.sw_ready) {
		/* note we're not actually in an "isr", but rather
		 * in a threaded interrupt context... */
		mutex_lock(&g->fifo.intr.isr.mutex);

		gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);

		/* handle runlist update */
		if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
			gk20a_fifo_handle_runlist_event(g);
			clear_intr |= fifo_intr_0_runlist_event_pending_f();
		}
		if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f())
			clear_intr |= fifo_pbdma_isr(g, fifo_intr);

		if (unlikely(fifo_intr & error_intr_mask))
			clear_intr = fifo_error_isr(g, fifo_intr);

		mutex_unlock(&g->fifo.intr.isr.mutex);
	}
	gk20a_writel(g, fifo_intr_0_r(), clear_intr);

	return;
}

void gk20a_fifo_nonstall_isr(struct gk20a *g)
{
	u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
	u32 clear_intr = 0;

	gk20a_dbg(gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);

	if (fifo_intr & fifo_intr_0_channel_intr_pending_f())
		clear_intr = fifo_intr_0_channel_intr_pending_f();

	gk20a_writel(g, fifo_intr_0_r(), clear_intr);

	gk20a_channel_semaphore_wakeup(g, false);

	return;
}

void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
{
	if (is_tsg)
		gk20a_writel(g, fifo_preempt_r(),
			fifo_preempt_id_f(id) |
			fifo_preempt_type_tsg_f());
	else
		gk20a_writel(g, fifo_preempt_r(),
			fifo_preempt_chid_f(id) |
			fifo_preempt_type_channel_f());
}

static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
{
	u32 delay = GR_IDLE_CHECK_DEFAULT;
	unsigned long end_jiffies = jiffies
		+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
	u32 ret = 0;

	gk20a_dbg_fn("%d", id);

	/* issue preempt */
	gk20a_fifo_issue_preempt(g, id, is_tsg);

	gk20a_dbg_fn("%d", id);
	/* wait for preempt */
	ret = -EBUSY;
	do {
		if (!(gk20a_readl(g, fifo_preempt_r()) &
			fifo_preempt_pending_true_f())) {
			ret = 0;
			break;
		}

		usleep_range(delay, delay * 2);
		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
	} while (time_before(jiffies, end_jiffies) ||
			!tegra_platform_is_silicon());

	gk20a_dbg_fn("%d", id);
	if (ret) {
		if (is_tsg) {
			struct tsg_gk20a *tsg = &g->fifo.tsg[id];
			struct channel_gk20a *ch = NULL;

			gk20a_err(dev_from_gk20a(g),
				"preempt TSG %d timeout\n", id);

			mutex_lock(&tsg->ch_list_lock);
			list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
				if (!gk20a_channel_get(ch))
					continue;
				gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
				gk20a_channel_put(ch);
			}
			mutex_unlock(&tsg->ch_list_lock);
			gk20a_fifo_recover_tsg(g, id, true);
		} else {
			struct channel_gk20a *ch = &g->fifo.channel[id];

			gk20a_err(dev_from_gk20a(g),
				"preempt channel %d timeout\n", id);

			if (gk20a_channel_get(ch)) {
				gk20a_set_error_notifier(ch,
						NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
				gk20a_fifo_recover_ch(g, id, true);
				gk20a_channel_put(ch);
			}
		}
	}

	return ret;
}

int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
{
	struct fifo_gk20a *f = &g->fifo;
	u32 ret = 0;
	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
	u32 mutex_ret = 0;
	u32 i;

	gk20a_dbg_fn("%d", hw_chid);

	/* we have no idea which runlist we are using. lock all */
	for (i = 0; i < g->fifo.max_runlists; i++)
		mutex_lock(&f->runlist_info[i].mutex);

	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	ret = __locked_fifo_preempt(g, hw_chid, false);

	if (!mutex_ret)
		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	for (i = 0; i < g->fifo.max_runlists; i++)
		mutex_unlock(&f->runlist_info[i].mutex);

	return ret;
}

int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
{
	struct fifo_gk20a *f = &g->fifo;
	u32 ret = 0;
	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
	u32 mutex_ret = 0;
	u32 i;

	gk20a_dbg_fn("%d", tsgid);

	/* we have no idea which runlist we are using. lock all */
	for (i = 0; i < g->fifo.max_runlists; i++)
		mutex_lock(&f->runlist_info[i].mutex);

	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	ret = __locked_fifo_preempt(g, tsgid, true);

	if (!mutex_ret)
		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	for (i = 0; i < g->fifo.max_runlists; i++)
		mutex_unlock(&f->runlist_info[i].mutex);

	return ret;
}

int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch)
{
	int err;

	if (gk20a_is_channel_marked_as_tsg(ch))
		err = g->ops.fifo.preempt_tsg(ch->g, ch->tsgid);
	else
		err = g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);

	return err;
}

int gk20a_fifo_enable_engine_activity(struct gk20a *g,
				struct fifo_engine_info_gk20a *eng_info)
{
	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
	u32 mutex_ret;
	u32 enable;

	gk20a_dbg_fn("");

	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	enable = gk20a_readl(g, fifo_sched_disable_r());
	enable &= ~(fifo_sched_disable_true_v() >> eng_info->runlist_id);
	gk20a_writel(g, fifo_sched_disable_r(), enable);

	if (!mutex_ret)
		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	gk20a_dbg_fn("done");
	return 0;
}

int gk20a_fifo_enable_all_engine_activity(struct gk20a *g)
{
	unsigned int i;
	int err = 0, ret = 0;

	for (i = 0; i < g->fifo.num_engines; i++) {
		u32 active_engine_id = g->fifo.active_engines_list[i];
		err = gk20a_fifo_enable_engine_activity(g,
				&g->fifo.engine_info[active_engine_id]);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"failed to enable engine %d activity\n", active_engine_id);
			ret = err;
		}
	}

	return ret;
}

int gk20a_fifo_disable_engine_activity(struct gk20a *g,
				struct fifo_engine_info_gk20a *eng_info,
				bool wait_for_idle)
{
	u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
	u32 pbdma_chid = FIFO_INVAL_CHANNEL_ID;
	u32 engine_chid = FIFO_INVAL_CHANNEL_ID, disable;
	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
	u32 mutex_ret;
	u32 err = 0;

	gk20a_dbg_fn("");

	gr_stat =
		gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
	if (fifo_engine_status_engine_v(gr_stat) ==
	    fifo_engine_status_engine_busy_v() && !wait_for_idle)
		return -EBUSY;

	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	disable = gk20a_readl(g, fifo_sched_disable_r());
	disable = set_field(disable,
			fifo_sched_disable_runlist_m(eng_info->runlist_id),
			fifo_sched_disable_runlist_f(fifo_sched_disable_true_v(),
				eng_info->runlist_id));
	gk20a_writel(g, fifo_sched_disable_r(), disable);

	/* chid from pbdma status */
	pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
	chan_stat  = fifo_pbdma_status_chan_status_v(pbdma_stat);
	if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
	    chan_stat == fifo_pbdma_status_chan_status_chsw_save_v())
		pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
	else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
		 chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v())
		pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);

	if (pbdma_chid != FIFO_INVAL_CHANNEL_ID) {
		err = g->ops.fifo.preempt_channel(g, pbdma_chid);
		if (err)
			goto clean_up;
	}

	/* chid from engine status */
	eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
	ctx_stat  = fifo_engine_status_ctx_status_v(eng_stat);
	if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
	    ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v())
		engine_chid = fifo_engine_status_id_v(eng_stat);
	else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
		 ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v())
		engine_chid = fifo_engine_status_next_id_v(eng_stat);

	if (engine_chid != FIFO_INVAL_ENGINE_ID && engine_chid != pbdma_chid) {
		err = g->ops.fifo.preempt_channel(g, engine_chid);
		if (err)
			goto clean_up;
	}

clean_up:
	if (!mutex_ret)
		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	if (err) {
		gk20a_dbg_fn("failed");
		if (gk20a_fifo_enable_engine_activity(g, eng_info))
			gk20a_err(dev_from_gk20a(g),
				"failed to enable gr engine activity\n");
	} else {
		gk20a_dbg_fn("done");
	}
	return err;
}

int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
				bool wait_for_idle)
{
	unsigned int i;
	int err = 0, ret = 0;
	u32 active_engine_id;

	for (i = 0; i < g->fifo.num_engines; i++) {
		active_engine_id = g->fifo.active_engines_list[i];
		err = gk20a_fifo_disable_engine_activity(g,
				&g->fifo.engine_info[active_engine_id],
				wait_for_idle);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"failed to disable engine %d activity\n", active_engine_id);
			ret = err;
			break;
		}
	}

	if (err) {
		while (i-- != 0) {
			active_engine_id = g->fifo.active_engines_list[i];
			err = gk20a_fifo_enable_engine_activity(g,
						&g->fifo.engine_info[active_engine_id]);
			if (err)
				gk20a_err(dev_from_gk20a(g),
				 "failed to re-enable engine %d activity\n", active_engine_id);
		}
	}

	return ret;
}

static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
{
	struct fifo_gk20a *f = &g->fifo;
	u32 engines = 0;
	unsigned int i;

	for (i = 0; i < f->num_engines; i++) {
		u32 active_engine_id = g->fifo.active_engines_list[i];
		u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
		bool engine_busy = fifo_engine_status_engine_v(status) ==
			fifo_engine_status_engine_busy_v();

		if (engine_busy &&
		    (f->engine_info[active_engine_id].runlist_id == runlist_id))
			engines |= BIT(active_engine_id);
	}

	if (engines)
		gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true);
}

static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
{
	struct fifo_runlist_info_gk20a *runlist;
	unsigned long end_jiffies = jiffies +
		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
	unsigned long delay = GR_IDLE_CHECK_DEFAULT;
	int ret = -ETIMEDOUT;

	runlist = &g->fifo.runlist_info[runlist_id];
	do {
		if ((gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
				fifo_eng_runlist_pending_true_f()) == 0) {
			ret = 0;
			break;
		}

		usleep_range(delay, delay * 2);
		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
	} while (time_before(jiffies, end_jiffies) ||
		 !tegra_platform_is_silicon());

	return ret;
}

void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist)
{

	u32 runlist_entry_0 = ram_rl_entry_id_f(tsg->tsgid) |
			ram_rl_entry_type_tsg_f() |
			ram_rl_entry_tsg_length_f(tsg->num_active_channels);

	if (tsg->timeslice_timeout)
		runlist_entry_0 |=
			ram_rl_entry_timeslice_scale_f(tsg->timeslice_scale) |
			ram_rl_entry_timeslice_timeout_f(tsg->timeslice_timeout);
	else
		runlist_entry_0 |=
			ram_rl_entry_timeslice_scale_3_f() |
			ram_rl_entry_timeslice_timeout_128_f();

	runlist[0] = runlist_entry_0;
	runlist[1] = 0;

}

void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist)
{
	runlist[0] = ram_rl_entry_chid_f(ch->hw_chid);
	runlist[1] = 0;
}

/* recursively construct a runlist with interleaved bare channels and TSGs */
static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
				struct fifo_runlist_info_gk20a *runlist,
				u32 cur_level,
				u32 *runlist_entry,
				bool interleave_enabled,
				bool prev_empty,
				u32 *entries_left)
{
	bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
	struct channel_gk20a *ch;
	bool skip_next = false;
	u32 chid, tsgid, count = 0;
	u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);

	gk20a_dbg_fn("");

	/* for each bare channel, CH, on this level, insert all higher-level
	   channels and TSGs before inserting CH. */
	for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
		ch = &f->channel[chid];

		if (ch->interleave_level != cur_level)
			continue;

		if (gk20a_is_channel_marked_as_tsg(ch))
			continue;

		if (!last_level && !skip_next) {
			runlist_entry = gk20a_runlist_construct_locked(f,
							runlist,
							cur_level + 1,
							runlist_entry,
							interleave_enabled,
							false,
							entries_left);
			/* if interleaving is disabled, higher-level channels
			   and TSGs only need to be inserted once */
			if (!interleave_enabled)
				skip_next = true;
		}

		if (!(*entries_left))
			return NULL;

		gk20a_dbg_info("add channel %d to runlist", chid);
		f->g->ops.fifo.get_ch_runlist_entry(ch, runlist_entry);
		gk20a_dbg_info("run list count %d runlist [0] %x [1] %x\n",
				count, runlist_entry[0], runlist_entry[1]);
		runlist_entry += runlist_entry_words;
		count++;
		(*entries_left)--;
	}

	/* for each TSG, T, on this level, insert all higher-level channels
	   and TSGs before inserting T. */
	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
		struct tsg_gk20a *tsg = &f->tsg[tsgid];

		if (tsg->interleave_level != cur_level)
			continue;

		if (!last_level && !skip_next) {
			runlist_entry = gk20a_runlist_construct_locked(f,
							runlist,
							cur_level + 1,
							runlist_entry,
							interleave_enabled,
							false,
							entries_left);
			if (!interleave_enabled)
				skip_next = true;
		}

		if (!(*entries_left))
			return NULL;

		/* add TSG entry */
		gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
		f->g->ops.fifo.get_tsg_runlist_entry(tsg, runlist_entry);
		gk20a_dbg_info("tsg runlist count %d runlist [0] %x [1] %x\n",
				count, runlist_entry[0], runlist_entry[1]);
		runlist_entry += runlist_entry_words;
		count++;
		(*entries_left)--;

		mutex_lock(&tsg->ch_list_lock);
		/* add runnable channels bound to this TSG */
		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
			if (!test_bit(ch->hw_chid,
				      runlist->active_channels))
				continue;

			if (!(*entries_left)) {
				mutex_unlock(&tsg->ch_list_lock);
				return NULL;
			}

			gk20a_dbg_info("add channel %d to runlist",
				ch->hw_chid);
			f->g->ops.fifo.get_ch_runlist_entry(ch, runlist_entry);
			gk20a_dbg_info(
				"run list count %d runlist [0] %x [1] %x\n",
				count, runlist_entry[0], runlist_entry[1]);
			count++;
			runlist_entry += runlist_entry_words;
			(*entries_left)--;
		}
		mutex_unlock(&tsg->ch_list_lock);
	}

	/* append entries from higher level if this level is empty */
	if (!count && !last_level)
		runlist_entry = gk20a_runlist_construct_locked(f,
							runlist,
							cur_level + 1,
							runlist_entry,
							interleave_enabled,
							true,
							entries_left);

	/*
	 * if previous and this level have entries, append
	 * entries from higher level.
	 *
	 * ex. dropping from MEDIUM to LOW, need to insert HIGH
	 */
	if (interleave_enabled && count && !prev_empty && !last_level)
		runlist_entry = gk20a_runlist_construct_locked(f,
							runlist,
							cur_level + 1,
							runlist_entry,
							interleave_enabled,
							false,
							entries_left);
	return runlist_entry;
}

int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
				u32 id,
				bool is_tsg,
				u32 runlist_id,
				u32 new_level)
{
	gk20a_dbg_fn("");

	if (is_tsg)
		g->fifo.tsg[id].interleave_level = new_level;
	else
		g->fifo.channel[id].interleave_level = new_level;

	return 0;
}

static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
					    u32 hw_chid, bool add,
					    bool wait_for_finish)
{
	int ret = 0;
	struct fifo_gk20a *f = &g->fifo;
	struct fifo_runlist_info_gk20a *runlist = NULL;
	u32 *runlist_entry_base = NULL;
	u64 runlist_iova;
	u32 old_buf, new_buf;
	struct channel_gk20a *ch = NULL;
	struct tsg_gk20a *tsg = NULL;
	u32 count = 0;
	u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);

	runlist = &f->runlist_info[runlist_id];

	/* valid channel, add/remove it from active list.
	   Otherwise, keep active list untouched for suspend/resume. */
	if (hw_chid != FIFO_INVAL_CHANNEL_ID) {
		ch = &f->channel[hw_chid];
		if (gk20a_is_channel_marked_as_tsg(ch))
			tsg = &f->tsg[ch->tsgid];

		if (add) {
			if (test_and_set_bit(hw_chid,
				runlist->active_channels) == 1)
				return 0;
			if (tsg && ++tsg->num_active_channels)
				set_bit(f->channel[hw_chid].tsgid,
					runlist->active_tsgs);
		} else {
			if (test_and_clear_bit(hw_chid,
				runlist->active_channels) == 0)
				return 0;
			if (tsg && --tsg->num_active_channels == 0)
				clear_bit(f->channel[hw_chid].tsgid,
					runlist->active_tsgs);
		}
	}

	old_buf = runlist->cur_buffer;
	new_buf = !runlist->cur_buffer;

	runlist_iova = g->ops.mm.get_iova_addr(
			g, runlist->mem[new_buf].sgt->sgl, 0);

	gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx",
		runlist_id, (u64)runlist_iova);

	if (!runlist_iova) {
		ret = -EINVAL;
		goto clean_up;
	}

	runlist_entry_base = runlist->mem[new_buf].cpu_va;
	if (!runlist_entry_base) {
		ret = -ENOMEM;
		goto clean_up;
	}

	if (hw_chid != FIFO_INVAL_CHANNEL_ID || /* add/remove a valid channel */
	    add /* resume to add all channels back */) {
		u32 max_entries = f->num_runlist_entries;
		u32 *runlist_end;

		runlist_end = gk20a_runlist_construct_locked(f,
						runlist,
						0,
						runlist_entry_base,
						g->runlist_interleave,
						true,
						&max_entries);
		if (!runlist_end) {
			ret = -E2BIG;
			goto clean_up;
		}
		count = (runlist_end - runlist_entry_base) / runlist_entry_words;
		WARN_ON(count > f->num_runlist_entries);
	} else	/* suspend to remove all channels */
		count = 0;

	if (count != 0) {
		gk20a_writel(g, fifo_runlist_base_r(),
			fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
			gk20a_aperture_mask(g, &runlist->mem[new_buf],
			  fifo_runlist_base_target_sys_mem_ncoh_f(),
			  fifo_runlist_base_target_vid_mem_f()));
	}

	gk20a_writel(g, fifo_runlist_r(),
		fifo_runlist_engine_f(runlist_id) |
		fifo_eng_runlist_length_f(count));

	if (wait_for_finish) {
		ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);

		if (ret == -ETIMEDOUT) {
			gk20a_err(dev_from_gk20a(g),
				   "runlist update timeout");

			gk20a_fifo_runlist_reset_engines(g, runlist_id);

			/* engine reset needs the lock. drop it */
			/* wait until the runlist is active again */
			ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);
			/* get the lock back. at this point everything should
			 * should be fine */

			if (ret)
				gk20a_err(dev_from_gk20a(g),
					   "runlist update failed: %d", ret);
		} else if (ret == -EINTR)
			gk20a_err(dev_from_gk20a(g),
				   "runlist update interrupted");
	}

	runlist->cur_buffer = new_buf;

clean_up:
	return ret;
}

int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 hw_chid,
				bool add, bool wait_for_finish)
{
	u32 ret = -EINVAL;
	u32 runlist_id = 0;
	u32 errcode;
	unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;

	if (!g)
		goto end;

	ret = 0;
	for_each_set_bit(runlist_id, &ulong_runlist_ids, 32) {
		/* Capture the last failure error code */
		errcode = g->ops.fifo.update_runlist(g, runlist_id, hw_chid, add, wait_for_finish);
		if (errcode) {
			gk20a_err(dev_from_gk20a(g),
				"failed to update_runlist %d %d", runlist_id, errcode);
			ret = errcode;
		}
	}
end:
	return ret;
}

/* add/remove a channel from runlist
   special cases below: runlist->active_channels will NOT be changed.
   (hw_chid == ~0 && !add) means remove all active channels from runlist.
   (hw_chid == ~0 &&  add) means restore all active channels on runlist. */
int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
			      bool add, bool wait_for_finish)
{
	struct fifo_runlist_info_gk20a *runlist = NULL;
	struct fifo_gk20a *f = &g->fifo;
	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
	u32 mutex_ret;
	u32 ret = 0;

	gk20a_dbg_fn("");

	runlist = &f->runlist_info[runlist_id];

	mutex_lock(&runlist->mutex);

	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	ret = gk20a_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
					       wait_for_finish);

	if (!mutex_ret)
		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);

	mutex_unlock(&runlist->mutex);
	return ret;
}

int gk20a_fifo_suspend(struct gk20a *g)
{
	gk20a_dbg_fn("");

	/* stop bar1 snooping */
	if (g->ops.mm.is_bar1_supported(g))
		gk20a_writel(g, fifo_bar1_base_r(),
			fifo_bar1_base_valid_false_f());

	/* disable fifo intr */
	gk20a_writel(g, fifo_intr_en_0_r(), 0);
	gk20a_writel(g, fifo_intr_en_1_r(), 0);

	gk20a_dbg_fn("done");
	return 0;
}

bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
{
	if (gk20a_readl(g, fifo_intr_0_r()) &
			fifo_intr_0_mmu_fault_pending_f())
		return true;
	else
		return false;
}

bool gk20a_fifo_is_engine_busy(struct gk20a *g)
{
	unsigned int i;

	for (i = 0; i < fifo_engine_status__size_1_v(); i++) {
		u32 status = gk20a_readl(g, fifo_engine_status_r(i));
		if (fifo_engine_status_engine_v(status) ==
			fifo_engine_status_engine_busy_v())
			return true;
	}
	return false;
}

int gk20a_fifo_wait_engine_idle(struct gk20a *g)
{
	unsigned long end_jiffies = jiffies +
		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
	unsigned long delay = GR_IDLE_CHECK_DEFAULT;
	int ret = -ETIMEDOUT;
	u32 i;

	gk20a_dbg_fn("");

	for (i = 0; i < fifo_engine_status__size_1_v(); i++) {
		do {
			u32 status = gk20a_readl(g, fifo_engine_status_r(i));
			if (!fifo_engine_status_engine_v(status)) {
				ret = 0;
				break;
			}

			usleep_range(delay, delay * 2);
			delay = min_t(unsigned long,
					delay << 1, GR_IDLE_CHECK_MAX);
		} while (time_before(jiffies, end_jiffies) ||
				!tegra_platform_is_silicon());
		if (ret) {
			gk20a_dbg_info("cannot idle engine %u", i);
			break;
		}
	}

	gk20a_dbg_fn("done");

	return ret;
}

static void gk20a_fifo_apply_pb_timeout(struct gk20a *g)
{
	u32 timeout;

	if (tegra_platform_is_silicon()) {
		timeout = gk20a_readl(g, fifo_pb_timeout_r());
		timeout &= ~fifo_pb_timeout_detection_enabled_f();
		gk20a_writel(g, fifo_pb_timeout_r(), timeout);
	}
}

static u32 gk20a_fifo_get_num_fifos(struct gk20a *g)
{
	return ccsr_channel__size_1_v();
}

u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
{
	return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
}

struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
		u32 hw_chid)
{
	return g->fifo.channel + hw_chid;
}

#ifdef CONFIG_DEBUG_FS
static void *gk20a_fifo_sched_debugfs_seq_start(
		struct seq_file *s, loff_t *pos)
{
	struct gk20a *g = s->private;
	struct fifo_gk20a *f = &g->fifo;

	if (*pos >= f->num_channels)
		return NULL;

	return &f->channel[*pos];
}

static void *gk20a_fifo_sched_debugfs_seq_next(
		struct seq_file *s, void *v, loff_t *pos)
{
	struct gk20a *g = s->private;
	struct fifo_gk20a *f = &g->fifo;

	++(*pos);
	if (*pos >= f->num_channels)
		return NULL;

	return &f->channel[*pos];
}

static void gk20a_fifo_sched_debugfs_seq_stop(
		struct seq_file *s, void *v)
{
}

static int gk20a_fifo_sched_debugfs_seq_show(
		struct seq_file *s, void *v)
{
	struct gk20a *g = s->private;
	struct fifo_gk20a *f = &g->fifo;
	struct channel_gk20a *ch = v;
	struct tsg_gk20a *tsg = NULL;

	struct fifo_engine_info_gk20a *engine_info;
	struct fifo_runlist_info_gk20a *runlist;
	u32 runlist_id;
	int ret = SEQ_SKIP;
	u32 engine_id;

	engine_id = gk20a_fifo_get_gr_engine_id(g);
	engine_info = (f->engine_info + engine_id);
	runlist_id = engine_info->runlist_id;
	runlist = &f->runlist_info[runlist_id];

	if (ch == f->channel) {
		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
		seq_puts(s, "                            (usecs)   (msecs)\n");
		ret = 0;
	}

	if (!test_bit(ch->hw_chid, runlist->active_channels))
		return ret;

	if (gk20a_channel_get(ch)) {
		if (gk20a_is_channel_marked_as_tsg(ch))
			tsg = &f->tsg[ch->tsgid];

		seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
				ch->hw_chid,
				ch->tsgid,
				ch->tgid,
				tsg ? tsg->timeslice_us : ch->timeslice_us,
				ch->timeout_ms_max,
				tsg ? tsg->interleave_level : ch->interleave_level,
				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : -1,
				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : -1);
		gk20a_channel_put(ch);
	}
	return 0;
}

static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
	.start = gk20a_fifo_sched_debugfs_seq_start,
	.next = gk20a_fifo_sched_debugfs_seq_next,
	.stop = gk20a_fifo_sched_debugfs_seq_stop,
	.show = gk20a_fifo_sched_debugfs_seq_show
};

static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
	struct file *file)
{
	int err;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
	if (err)
		return err;

	gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);

	((struct seq_file *)file->private_data)->private = inode->i_private;
	return 0;
};

/*
 * The file operations structure contains our open function along with
 * set of the canned seq_ ops.
 */
static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
	.owner = THIS_MODULE,
	.open = gk20a_fifo_sched_debugfs_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release
};

void gk20a_fifo_debugfs_init(struct device *dev)
{
	struct gk20a_platform *platform = dev_get_drvdata(dev);
	struct gk20a *g = get_gk20a(dev);

	struct dentry *gpu_root = platform->debugfs;
	struct dentry *fifo_root;

	fifo_root = debugfs_create_dir("fifo", gpu_root);
	if (IS_ERR_OR_NULL(fifo_root))
		return;

	gk20a_dbg(gpu_dbg_info, "g=%p", g);

	debugfs_create_file("sched", 0600, fifo_root, g,
		&gk20a_fifo_sched_debugfs_fops);

}
#endif /* CONFIG_DEBUG_FS */

void gk20a_init_fifo(struct gpu_ops *gops)
{
	gk20a_init_channel(gops);
	gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
	gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
	gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
	gops->fifo.update_runlist = gk20a_fifo_update_runlist;
	gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault;
	gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout;
	gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
	gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
	gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
	gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
	gops->fifo.force_reset_ch = gk20a_fifo_force_reset_ch;
	gops->fifo.engine_enum_from_type = gk20a_fifo_engine_enum_from_type;
	/* gk20a doesn't support device_info_data packet parsing */
	gops->fifo.device_info_data_parse = NULL;
	gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v;
	gops->fifo.init_engine_info = gk20a_fifo_init_engine_info;
	gops->fifo.runlist_entry_size = ram_rl_entry_size_v;
	gops->fifo.get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry;
	gops->fifo.get_ch_runlist_entry = gk20a_get_ch_runlist_entry;
}
pmu); g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx( pmu, GK20A_PMU_DMAIDX_VIRT); g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu, clk_get_rate(platform->clk[1])); addr_args = (pwr_falcon_hwcfg_dmem_size_v( gk20a_readl(g, pwr_falcon_hwcfg_r())) << GK20A_PMU_DMEM_BLKSIZE2) - g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu); pmu_copy_to_dmem(pmu, addr_args, (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)), g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0); gk20a_writel(g, pwr_falcon_dmemc_r(0), pwr_falcon_dmemc_offs_f(0) | pwr_falcon_dmemc_blk_f(0) | pwr_falcon_dmemc_aincw_f(1)); addr_code = u64_lo32((pmu->ucode.gpu_va + desc->app_start_offset + desc->app_resident_code_offset) >> 8) ; addr_data = u64_lo32((pmu->ucode.gpu_va + desc->app_start_offset + desc->app_resident_data_offset) >> 8); addr_load = u64_lo32((pmu->ucode.gpu_va + desc->bootloader_start_offset) >> 8); gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data); gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1); gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args); g->ops.pmu.write_dmatrfbase(g, addr_load - (desc->bootloader_imem_offset >> 8)); blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; for (i = 0; i < blocks; i++) { gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(), desc->bootloader_imem_offset + (i << 8)); gk20a_writel(g, pwr_falcon_dmatrffboffs_r(), desc->bootloader_imem_offset + (i << 8)); gk20a_writel(g, pwr_falcon_dmatrfcmd_r(), pwr_falcon_dmatrfcmd_imem_f(1) | pwr_falcon_dmatrfcmd_write_f(0) | pwr_falcon_dmatrfcmd_size_f(6) | pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE)); } gk20a_writel(g, pwr_falcon_bootvec_r(), pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point)); gk20a_writel(g, pwr_falcon_cpuctl_r(), pwr_falcon_cpuctl_startcpu_f(1)); gk20a_writel(g, pwr_falcon_os_r(), desc->app_version); return 0; } void pmu_seq_init(struct pmu_gk20a *pmu) { u32 i; memset(pmu->seq, 0, sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); memset(pmu->pmu_seq_tbl, 0, sizeof(pmu->pmu_seq_tbl)); for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) pmu->seq[i].id = i; } static int pmu_seq_acquire(struct pmu_gk20a *pmu, struct pmu_sequence **pseq) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_sequence *seq; u32 index; mutex_lock(&pmu->pmu_seq_lock); index = find_first_zero_bit(pmu->pmu_seq_tbl, sizeof(pmu->pmu_seq_tbl)); if (index >= sizeof(pmu->pmu_seq_tbl)) { gk20a_err(dev_from_gk20a(g), "no free sequence available"); mutex_unlock(&pmu->pmu_seq_lock); return -EAGAIN; } set_bit(index, pmu->pmu_seq_tbl); mutex_unlock(&pmu->pmu_seq_lock); seq = &pmu->seq[index]; seq->state = PMU_SEQ_STATE_PENDING; *pseq = seq; return 0; } static void pmu_seq_release(struct pmu_gk20a *pmu, struct pmu_sequence *seq) { struct gk20a *g = gk20a_from_pmu(pmu); seq->state = PMU_SEQ_STATE_FREE; seq->desc = PMU_INVALID_SEQ_DESC; seq->callback = NULL; seq->cb_params = NULL; seq->msg = NULL; seq->out_payload = NULL; g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu, g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0); g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu, g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0); clear_bit(seq->id, pmu->pmu_seq_tbl); } static int pmu_queue_init(struct pmu_gk20a *pmu, u32 id, union pmu_init_msg_pmu *init) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_queue *queue = &pmu->queue[id]; queue->id = id; g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init); queue->mutex_id = id; mutex_init(&queue->mutex); gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x", id, queue->index, queue->offset, queue->size); return 0; } static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue, u32 *head, bool set) { struct gk20a *g = gk20a_from_pmu(pmu); BUG_ON(!head); if (PMU_IS_COMMAND_QUEUE(queue->id)) { if (queue->index >= pwr_pmu_queue_head__size_1_v()) return -EINVAL; if (!set) *head = pwr_pmu_queue_head_address_v( gk20a_readl(g, pwr_pmu_queue_head_r(queue->index))); else gk20a_writel(g, pwr_pmu_queue_head_r(queue->index), pwr_pmu_queue_head_address_f(*head)); } else { if (!set) *head = pwr_pmu_msgq_head_val_v( gk20a_readl(g, pwr_pmu_msgq_head_r())); else gk20a_writel(g, pwr_pmu_msgq_head_r(), pwr_pmu_msgq_head_val_f(*head)); } return 0; } static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue, u32 *tail, bool set) { struct gk20a *g = gk20a_from_pmu(pmu); BUG_ON(!tail); if (PMU_IS_COMMAND_QUEUE(queue->id)) { if (queue->index >= pwr_pmu_queue_tail__size_1_v()) return -EINVAL; if (!set) *tail = pwr_pmu_queue_tail_address_v( gk20a_readl(g, pwr_pmu_queue_tail_r(queue->index))); else gk20a_writel(g, pwr_pmu_queue_tail_r(queue->index), pwr_pmu_queue_tail_address_f(*tail)); } else { if (!set) *tail = pwr_pmu_msgq_tail_val_v( gk20a_readl(g, pwr_pmu_msgq_tail_r())); else gk20a_writel(g, pwr_pmu_msgq_tail_r(), pwr_pmu_msgq_tail_val_f(*tail)); } return 0; } static inline void pmu_queue_read(struct pmu_gk20a *pmu, u32 offset, u8 *dst, u32 size) { pmu_copy_from_dmem(pmu, offset, dst, size, 0); } static inline void pmu_queue_write(struct pmu_gk20a *pmu, u32 offset, u8 *src, u32 size) { pmu_copy_to_dmem(pmu, offset, src, size, 0); } int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_mutex *mutex; u32 data, owner, max_retry; if (!pmu->initialized) return -EINVAL; BUG_ON(!token); BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); BUG_ON(id > pmu->mutex_cnt); mutex = &pmu->mutex[id]; owner = pwr_pmu_mutex_value_v( gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { BUG_ON(mutex->ref_cnt == 0); gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token); mutex->ref_cnt++; return 0; } max_retry = 40; do { data = pwr_pmu_mutex_id_value_v( gk20a_readl(g, pwr_pmu_mutex_id_r())); if (data == pwr_pmu_mutex_id_value_init_v() || data == pwr_pmu_mutex_id_value_not_avail_v()) { gk20a_warn(dev_from_gk20a(g), "fail to generate mutex token: val 0x%08x", owner); usleep_range(20, 40); continue; } owner = data; gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), pwr_pmu_mutex_value_f(owner)); data = pwr_pmu_mutex_value_v( gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); if (owner == data) { mutex->ref_cnt = 1; gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x", mutex->index, *token); *token = owner; return 0; } else { gk20a_dbg_info("fail to acquire mutex idx=0x%08x", mutex->index); data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); data = set_field(data, pwr_pmu_mutex_id_release_value_m(), pwr_pmu_mutex_id_release_value_f(owner)); gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); usleep_range(20, 40); continue; } } while (max_retry-- > 0); return -EBUSY; } int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_mutex *mutex; u32 owner, data; if (!pmu->initialized) return -EINVAL; BUG_ON(!token); BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); BUG_ON(id > pmu->mutex_cnt); mutex = &pmu->mutex[id]; owner = pwr_pmu_mutex_value_v( gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); if (*token != owner) { gk20a_err(dev_from_gk20a(g), "requester 0x%08x NOT match owner 0x%08x", *token, owner); return -EINVAL; } if (--mutex->ref_cnt > 0) return -EBUSY; gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), pwr_pmu_mutex_value_initial_lock_f()); data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); data = set_field(data, pwr_pmu_mutex_id_release_value_m(), pwr_pmu_mutex_id_release_value_f(owner)); gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); gk20a_dbg_pmu("mutex released: id=%d, token=0x%x", mutex->index, *token); return 0; } static int pmu_queue_lock(struct pmu_gk20a *pmu, struct pmu_queue *queue) { int err; if (PMU_IS_MESSAGE_QUEUE(queue->id)) return 0; if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { mutex_lock(&queue->mutex); return 0; } err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock); return err; } static int pmu_queue_unlock(struct pmu_gk20a *pmu, struct pmu_queue *queue) { int err; if (PMU_IS_MESSAGE_QUEUE(queue->id)) return 0; if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { mutex_unlock(&queue->mutex); return 0; } err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock); return err; } /* called by pmu_read_message, no lock */ static bool pmu_queue_is_empty(struct pmu_gk20a *pmu, struct pmu_queue *queue) { u32 head, tail; pmu_queue_head(pmu, queue, &head, QUEUE_GET); if (queue->opened && queue->oflag == OFLAG_READ) tail = queue->position; else pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); return head == tail; } static bool pmu_queue_has_room(struct pmu_gk20a *pmu, struct pmu_queue *queue, u32 size, bool *need_rewind) { u32 head, tail; bool rewind = false; unsigned int free; size = ALIGN(size, QUEUE_ALIGNMENT); pmu_queue_head(pmu, queue, &head, QUEUE_GET); pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); if (head >= tail) { free = queue->offset + queue->size - head; free -= PMU_CMD_HDR_SIZE; if (size > free) { rewind = true; head = queue->offset; } } if (head < tail) free = tail - head - 1; if (need_rewind) *need_rewind = rewind; return size <= free; } static int pmu_queue_push(struct pmu_gk20a *pmu, struct pmu_queue *queue, void *data, u32 size) { gk20a_dbg_fn(""); if (!queue->opened && queue->oflag == OFLAG_WRITE){ gk20a_err(dev_from_gk20a(gk20a_from_pmu(pmu)), "queue not opened for write"); return -EINVAL; } pmu_queue_write(pmu, queue->position, data, size); queue->position += ALIGN(size, QUEUE_ALIGNMENT); return 0; } static int pmu_queue_pop(struct pmu_gk20a *pmu, struct pmu_queue *queue, void *data, u32 size, u32 *bytes_read) { u32 head, tail, used; *bytes_read = 0; if (!queue->opened && queue->oflag == OFLAG_READ){ gk20a_err(dev_from_gk20a(gk20a_from_pmu(pmu)), "queue not opened for read"); return -EINVAL; } pmu_queue_head(pmu, queue, &head, QUEUE_GET); tail = queue->position; if (head == tail) return 0; if (head > tail) used = head - tail; else used = queue->offset + queue->size - tail; if (size > used) { gk20a_warn(dev_from_gk20a(gk20a_from_pmu(pmu)), "queue size smaller than request read"); size = used; } pmu_queue_read(pmu, tail, data, size); queue->position += ALIGN(size, QUEUE_ALIGNMENT); *bytes_read = size; return 0; } static void pmu_queue_rewind(struct pmu_gk20a *pmu, struct pmu_queue *queue) { struct pmu_cmd cmd; gk20a_dbg_fn(""); if (!queue->opened) { gk20a_err(dev_from_gk20a(gk20a_from_pmu(pmu)), "queue not opened"); return; } if (queue->oflag == OFLAG_WRITE) { cmd.hdr.unit_id = PMU_UNIT_REWIND; cmd.hdr.size = PMU_CMD_HDR_SIZE; pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); gk20a_dbg_pmu("queue %d rewinded", queue->id); } queue->position = queue->offset; return; } /* open for read and lock the queue */ static int pmu_queue_open_read(struct pmu_gk20a *pmu, struct pmu_queue *queue) { int err; err = pmu_queue_lock(pmu, queue); if (err) return err; if (queue->opened) BUG(); pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); queue->oflag = OFLAG_READ; queue->opened = true; return 0; } /* open for write and lock the queue make sure there's enough free space for the write */ static int pmu_queue_open_write(struct pmu_gk20a *pmu, struct pmu_queue *queue, u32 size) { bool rewind = false; int err; err = pmu_queue_lock(pmu, queue); if (err) return err; if (queue->opened) BUG(); if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { gk20a_err(dev_from_gk20a(gk20a_from_pmu(pmu)), "queue full"); pmu_queue_unlock(pmu, queue); return -EAGAIN; } pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); queue->oflag = OFLAG_WRITE; queue->opened = true; if (rewind) pmu_queue_rewind(pmu, queue); return 0; } /* close and unlock the queue */ static int pmu_queue_close(struct pmu_gk20a *pmu, struct pmu_queue *queue, bool commit) { if (!queue->opened) return 0; if (commit) { if (queue->oflag == OFLAG_READ) { pmu_queue_tail(pmu, queue, &queue->position, QUEUE_SET); } else { pmu_queue_head(pmu, queue, &queue->position, QUEUE_SET); } } queue->opened = false; pmu_queue_unlock(pmu, queue); return 0; } void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) { gk20a_dbg_fn(""); if (gk20a_alloc_initialized(&pmu->dmem)) gk20a_alloc_destroy(&pmu->dmem); release_firmware(pmu->fw); } static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; gk20a_dbg_fn(""); pmu_enable_hw(pmu, true); return 0; } static int gk20a_prepare_ucode(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; int err = 0; struct device *d = dev_from_gk20a(g); struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; if (pmu->fw) return gk20a_init_pmu(pmu); pmu->fw = nvgpu_request_firmware(g, GK20A_PMU_UCODE_IMAGE, 0); if (!pmu->fw) { gk20a_err(d, "failed to load pmu ucode!!"); return err; } gk20a_dbg_fn("firmware loaded"); pmu->desc = (struct pmu_ucode_desc *)pmu->fw->data; pmu->ucode_image = (u32 *)((u8 *)pmu->desc + pmu->desc->descriptor_size); err = gk20a_gmmu_alloc_map_sys(vm, GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); if (err) goto err_release_fw; gk20a_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image, pmu->desc->app_start_offset + pmu->desc->app_size); return gk20a_init_pmu(pmu); err_release_fw: release_firmware(pmu->fw); pmu->fw = NULL; return err; } static int gk20a_init_pmu_setup_sw(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; struct device *d = dev_from_gk20a(g); unsigned int i; int err = 0; u8 *ptr; gk20a_dbg_fn(""); /* start with elpg disabled until first enable call */ pmu->elpg_refcnt = 0; if (pmu->sw_ready) { for (i = 0; i < pmu->mutex_cnt; i++) { pmu->mutex[i].id = i; pmu->mutex[i].index = i; } pmu_seq_init(pmu); gk20a_dbg_fn("skip init"); goto skip_init; } /* no infoRom script from vbios? */ /* TBD: sysmon subtask */ if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON)) pmu->perfmon_sampling_enabled = true; pmu->mutex_cnt = pwr_pmu_mutex__size_1_v(); pmu->mutex = kzalloc(pmu->mutex_cnt * sizeof(struct pmu_mutex), GFP_KERNEL); if (!pmu->mutex) { err = -ENOMEM; goto err; } for (i = 0; i < pmu->mutex_cnt; i++) { pmu->mutex[i].id = i; pmu->mutex[i].index = i; } pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * sizeof(struct pmu_sequence), GFP_KERNEL); if (!pmu->seq) { err = -ENOMEM; goto err_free_mutex; } pmu_seq_init(pmu); INIT_WORK(&pmu->pg_init, pmu_setup_hw); err = gk20a_gmmu_alloc_map_sys(vm, GK20A_PMU_SEQ_BUF_SIZE, &pmu->seq_buf); if (err) { gk20a_err(d, "failed to allocate memory\n"); goto err_free_seq; } err = gk20a_gmmu_alloc_map_sys(vm, GK20A_PMU_TRACE_BUFSIZE, &pmu->trace_buf); if (err) { gk20a_err(d, "failed to allocate trace memory\n"); goto err_free_seq_buf; } ptr = (u8 *)pmu->seq_buf.cpu_va; /* TBD: remove this if ZBC save/restore is handled by PMU * end an empty ZBC sequence for now */ ptr[0] = 0x16; /* opcode EXIT */ ptr[1] = 0; ptr[2] = 1; ptr[3] = 0; ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0; pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; pmu->sw_ready = true; skip_init: gk20a_dbg_fn("done"); return 0; err_free_seq_buf: gk20a_gmmu_unmap_free(vm, &pmu->seq_buf); err_free_seq: kfree(pmu->seq); err_free_mutex: kfree(pmu->mutex); err: gk20a_dbg_fn("fail"); return err; } static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status); static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat; gk20a_dbg_fn(""); gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); if (status != 0) { gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted"); /* TBD: disable ELPG */ return; } pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED); if ((!pmu->buf_loaded) && (pmu->pmu_state == PMU_STATE_LOADING_PG_BUF)) gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer"); else { schedule_work(&pmu->pg_init); } } static int gk20a_init_pmu_setup_hw1(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; int err = 0; gk20a_dbg_fn(""); mutex_lock(&pmu->isr_mutex); pmu_reset(pmu); pmu->isr_enabled = true; mutex_unlock(&pmu->isr_mutex); /* setup apertures - virtual */ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), pwr_fbif_transcfg_mem_type_virtual_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), pwr_fbif_transcfg_mem_type_virtual_f()); /* setup apertures - physical */ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_local_fb_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_coherent_sysmem_f()); gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), pwr_fbif_transcfg_mem_type_physical_f() | pwr_fbif_transcfg_target_noncoherent_sysmem_f()); err = g->ops.pmu.pmu_nsbootstrap(pmu); return err; } static void pmu_setup_hw_load_zbc(struct gk20a *g); static void pmu_setup_hw_enable_elpg(struct gk20a *g); void pmu_setup_hw(struct work_struct *work) { struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init); struct gk20a *g = gk20a_from_pmu(pmu); struct gk20a_platform *platform = dev_get_drvdata(g->dev); switch (pmu->pmu_state) { case PMU_STATE_INIT_RECEIVED: gk20a_dbg_pmu("pmu starting"); if (platform->can_elpg) pmu_init_powergating(g); break; case PMU_STATE_ELPG_BOOTED: gk20a_dbg_pmu("elpg booted"); gk20a_init_pmu_bind_fecs(g); break; case PMU_STATE_LOADING_PG_BUF: gk20a_dbg_pmu("loaded pg buf"); pmu_setup_hw_load_zbc(g); break; case PMU_STATE_LOADING_ZBC: gk20a_dbg_pmu("loaded zbc"); pmu_setup_hw_enable_elpg(g); break; case PMU_STATE_STARTED: gk20a_dbg_pmu("PMU booted"); break; default: gk20a_dbg_pmu("invalid state"); break; } } int gk20a_init_pmu_bind_fecs(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 desc; int err = 0; u32 gr_engine_id; gk20a_dbg_fn(""); gr_engine_id = gk20a_fifo_get_gr_engine_id(g); memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + g->ops.pmu_ver.pg_cmd_eng_buf_load_size(&cmd.cmd.pg); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg, PMU_PG_CMD_ID_ENG_BUF_LOAD); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg, gr_engine_id); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg, PMU_PGENG_GR_BUFFER_IDX_FECS); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg, pmu->pg_buf.size); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base(&cmd.cmd.pg, u64_lo32(pmu->pg_buf.gpu_va)); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset(&cmd.cmd.pg, (u8)(pmu->pg_buf.gpu_va & 0xFF)); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx(&cmd.cmd.pg, PMU_DMAIDX_VIRT); pmu->buf_loaded = false; gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); pmu->pmu_state = PMU_STATE_LOADING_PG_BUF; return err; } static void pmu_setup_hw_load_zbc(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 desc; u32 gr_engine_id; gr_engine_id = gk20a_fifo_get_gr_engine_id(g); memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + g->ops.pmu_ver.pg_cmd_eng_buf_load_size(&cmd.cmd.pg); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg, PMU_PG_CMD_ID_ENG_BUF_LOAD); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg, gr_engine_id); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg, PMU_PGENG_GR_BUFFER_IDX_ZBC); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg, pmu->seq_buf.size); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base(&cmd.cmd.pg, u64_lo32(pmu->seq_buf.gpu_va)); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset(&cmd.cmd.pg, (u8)(pmu->seq_buf.gpu_va & 0xFF)); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx(&cmd.cmd.pg, PMU_DMAIDX_VIRT); pmu->buf_loaded = false; gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); pmu->pmu_state = PMU_STATE_LOADING_ZBC; } static void pmu_setup_hw_enable_elpg(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; /* * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to * 7. This prevents PMU stalling on Host register accesses. Once the * cause for this hang is discovered and fixed, this WAR should be * removed. */ gk20a_writel(g, 0x10a164, 0x109ff); pmu->initialized = true; pmu->pmu_state = PMU_STATE_STARTED; pmu->zbc_ready = true; /* Save zbc table after PMU is initialized. */ gk20a_pmu_save_zbc(g, 0xf); if (g->elpg_enabled) { /* Init reg with prod values*/ if (g->ops.pmu.pmu_setup_elpg) g->ops.pmu.pmu_setup_elpg(g); gk20a_pmu_enable_elpg(g); } udelay(50); /* Enable AELPG */ if (g->aelpg_enabled) { gk20a_aelpg_init(g); gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); } } static void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr) { gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr); } int gk20a_pmu_reset(struct gk20a *g) { int err; struct pmu_gk20a *pmu = &g->pmu; err = pmu_reset(pmu); return err; } static bool gk20a_is_pmu_supported(struct gk20a *g) { return true; } void gk20a_init_pmu_ops(struct gpu_ops *gops) { gops->pmu.is_pmu_supported = gk20a_is_pmu_supported; gops->pmu.prepare_ucode = gk20a_prepare_ucode; gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1; gops->pmu.pmu_nsbootstrap = pmu_bootstrap; gops->pmu.pmu_setup_elpg = NULL; gops->pmu.init_wpr_region = NULL; gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.write_dmatrfbase = gk20a_write_dmatrfbase; gops->pmu.pmu_elpg_statistics = gk20a_pmu_elpg_statistics; gops->pmu.pmu_pg_grinit_param = NULL; gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL; gops->pmu.dump_secure_fuses = NULL; gops->pmu.is_lazy_bootstrap = NULL; gops->pmu.is_priv_load = NULL; gops->pmu.get_wpr = NULL; gops->pmu.alloc_blob_space = NULL; gops->pmu.pmu_populate_loader_cfg = NULL; gops->pmu.flcn_populate_bl_dmem_desc = NULL; gops->pmu.reset = gk20a_pmu_reset; } int gk20a_init_pmu_support(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; u32 err; gk20a_dbg_fn(""); if (pmu->initialized) return 0; err = gk20a_init_pmu_reset_enable_hw(g); if (err) return err; if (support_gk20a_pmu(g->dev)) { err = gk20a_init_pmu_setup_sw(g); if (err) return err; err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g); if (err) return err; pmu->pmu_state = PMU_STATE_STARTING; } return err; } static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg; gk20a_dbg_fn(""); if (status != 0) { gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); /* TBD: disable ELPG */ return; } switch (elpg_msg->msg) { case PMU_PG_ELPG_MSG_INIT_ACK: gk20a_dbg_pmu("INIT_PG is acknowledged from PMU"); break; case PMU_PG_ELPG_MSG_ALLOW_ACK: gk20a_dbg_pmu("ALLOW is acknowledged from PMU"); pmu->elpg_stat = PMU_ELPG_STAT_ON; break; case PMU_PG_ELPG_MSG_DISALLOW_ACK: gk20a_dbg_pmu("DISALLOW is acknowledged from PMU"); pmu->elpg_stat = PMU_ELPG_STAT_OFF; if (pmu->pmu_state == PMU_STATE_ELPG_BOOTING) { pmu->pmu_state = PMU_STATE_ELPG_BOOTED; schedule_work(&pmu->pg_init); } break; default: gk20a_err(dev_from_gk20a(g), "unsupported ELPG message : 0x%04x", elpg_msg->msg); } return; } static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; gk20a_dbg_fn(""); if (status != 0) { gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); /* TBD: disable ELPG */ return; } switch (msg->msg.pg.stat.sub_msg_id) { case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET: gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU"); pmu->stat_dmem_offset = msg->msg.pg.stat.data; break; default: break; } } static int pmu_init_powergating(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; u32 gr_engine_id; gk20a_dbg_fn(""); gr_engine_id = gk20a_fifo_get_gr_engine_id(g); if (tegra_cpu_is_asim()) { /* TBD: calculate threshold for silicon */ gk20a_writel(g, pwr_pmu_pg_idlefilth_r(gr_engine_id), PMU_PG_IDLE_THRESHOLD_SIM); gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(gr_engine_id), PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM); } else { /* TBD: calculate threshold for silicon */ gk20a_writel(g, pwr_pmu_pg_idlefilth_r(gr_engine_id), PMU_PG_IDLE_THRESHOLD); gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(gr_engine_id), PMU_PG_POST_POWERUP_IDLE_THRESHOLD); } gk20a_gr_wait_initialized(g); if (g->ops.pmu.pmu_pg_grinit_param) g->ops.pmu.pmu_pg_grinit_param(g, PMU_PG_FEATURE_GR_POWER_GATING_ENABLED); /* init ELPG */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); /* alloc dmem for powergating state log */ pmu->stat_dmem_offset = 0; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat); cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT; cmd.cmd.pg.stat.engine_id = gr_engine_id; cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM; cmd.cmd.pg.stat.data = 0; gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_stat_msg, pmu, &seq, ~0); /* disallow ELPG initially PMU ucode requires a disallow cmd before allow cmd */ pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); if (pmu->pmu_state == PMU_STATE_INIT_RECEIVED) pmu->pmu_state = PMU_STATE_ELPG_BOOTING; return 0; } static u8 get_perfmon_id(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; u8 unit_id; switch (ver) { case GK20A_GPUID_GK20A: case GK20A_GPUID_GM20B: unit_id = PMU_UNIT_PERFMON; break; #if defined(CONFIG_ARCH_TEGRA_18x_SOC) case TEGRA_18x_GPUID: case TEGRA_18x_GPUID2: case TEGRA_18x_GPUID3: unit_id = PMU_UNIT_PERFMON_T18X; break; #endif case GK20A_GPUID_GM206: case GK20A_GPUID_GM204: unit_id = PMU_UNIT_PERFMON_T18X; break; default: gk20a_err(g->dev, "no support for %x", ver); BUG(); } return unit_id; } static int pmu_init_perfmon(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_cmd cmd; struct pmu_payload payload; u32 seq; u32 data; gk20a_dbg_fn(""); pmu->perfmon_ready = 0; /* use counter #3 for GR && CE2 busy cycles */ gk20a_writel(g, pwr_pmu_idle_mask_r(3), pwr_pmu_idle_mask_gr_enabled_f() | pwr_pmu_idle_mask_ce_2_enabled_f()); /* disable idle filtering for counters 3 and 6 */ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_busy_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); /* use counter #6 for total cycles */ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); /* * We don't want to disturb counters #3 and #6, which are used by * perfmon, so we add wiring also to counters #1 and #2 for * exposing raw counter readings. */ gk20a_writel(g, pwr_pmu_idle_mask_r(1), pwr_pmu_idle_mask_gr_enabled_f() | pwr_pmu_idle_mask_ce_2_enabled_f()); data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_busy_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); data = set_field(data, pwr_pmu_idle_ctrl_value_m() | pwr_pmu_idle_ctrl_filter_m(), pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); if (!pmu->sample_buffer) pmu->sample_buffer = gk20a_alloc(&pmu->dmem, 2 * sizeof(u16)); if (!pmu->sample_buffer) { gk20a_err(dev_from_gk20a(g), "failed to allocate perfmon sample buffer"); return -ENOMEM; } /* init PERFMON */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = get_perfmon_id(pmu); cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; /* buffer to save counter values for pmu perfmon */ pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, (u16)pmu->sample_buffer); /* number of sample periods below lower threshold before pmu triggers perfmon decrease event TBD: = 15 */ pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); /* index of base counter, aka. always ticking counter */ pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); /* microseconds interval between pmu polls perf counters */ pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); /* number of perfmon counters counter #3 (GR and CE2) for gk20a */ pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); /* moving average window for sample periods TBD: = 3000000 / sample_period_us = 17 */ pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); memset(&payload, 0, sizeof(struct pmu_payload)); payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); payload.in.size = pv->get_perfmon_cntr_sz(pmu); payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT"); gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, NULL, NULL, &seq, ~0); return 0; } static int pmu_process_init_msg(struct pmu_gk20a *pmu, struct pmu_msg *msg) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_v *pv = &g->ops.pmu_ver; union pmu_init_msg_pmu *init; struct pmu_sha1_gid_data gid_data; u32 i, tail = 0; gk20a_dbg_pmu("init received\n"); tail = pwr_pmu_msgq_tail_val_v( gk20a_readl(g, pwr_pmu_msgq_tail_r())); pmu_copy_from_dmem(pmu, tail, (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); if (msg->hdr.unit_id != PMU_UNIT_INIT) { gk20a_err(dev_from_gk20a(g), "expecting init msg"); return -EINVAL; } pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { gk20a_err(dev_from_gk20a(g), "expecting init msg"); return -EINVAL; } tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); gk20a_writel(g, pwr_pmu_msgq_tail_r(), pwr_pmu_msgq_tail_val_f(tail)); init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init)); if (!pmu->gid_info.valid) { pmu_copy_from_dmem(pmu, pv->get_pmu_init_msg_pmu_sw_mg_off(init), (u8 *)&gid_data, sizeof(struct pmu_sha1_gid_data), 0); pmu->gid_info.valid = (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); if (pmu->gid_info.valid) { BUG_ON(sizeof(pmu->gid_info.gid) != sizeof(gid_data.gid)); memcpy(pmu->gid_info.gid, gid_data.gid, sizeof(pmu->gid_info.gid)); } } for (i = 0; i < PMU_QUEUE_COUNT; i++) pmu_queue_init(pmu, i, init); if (!gk20a_alloc_initialized(&pmu->dmem)) { /* Align start and end addresses */ u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), PMU_DMEM_ALLOC_ALIGNMENT); u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) + pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); u32 size = end - start; gk20a_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem", start, size, PMU_DMEM_ALLOC_ALIGNMENT, 0); } pmu->pmu_ready = true; pmu->pmu_state = PMU_STATE_INIT_RECEIVED; schedule_work(&pmu->pg_init); gk20a_dbg_pmu("init received end\n"); return 0; } static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue, struct pmu_msg *msg, int *status) { struct gk20a *g = gk20a_from_pmu(pmu); u32 read_size, bytes_read; int err; *status = 0; if (pmu_queue_is_empty(pmu, queue)) return false; err = pmu_queue_open_read(pmu, queue); if (err) { gk20a_err(dev_from_gk20a(g), "fail to open queue %d for read", queue->id); *status = err; return false; } err = pmu_queue_pop(pmu, queue, &msg->hdr, PMU_MSG_HDR_SIZE, &bytes_read); if (err || bytes_read != PMU_MSG_HDR_SIZE) { gk20a_err(dev_from_gk20a(g), "fail to read msg from queue %d", queue->id); *status = err | -EINVAL; goto clean_up; } if (msg->hdr.unit_id == PMU_UNIT_REWIND) { pmu_queue_rewind(pmu, queue); /* read again after rewind */ err = pmu_queue_pop(pmu, queue, &msg->hdr, PMU_MSG_HDR_SIZE, &bytes_read); if (err || bytes_read != PMU_MSG_HDR_SIZE) { gk20a_err(dev_from_gk20a(g), "fail to read msg from queue %d", queue->id); *status = err | -EINVAL; goto clean_up; } } if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { gk20a_err(dev_from_gk20a(g), "read invalid unit_id %d from queue %d", msg->hdr.unit_id, queue->id); *status = -EINVAL; goto clean_up; } if (msg->hdr.size > PMU_MSG_HDR_SIZE) { read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; err = pmu_queue_pop(pmu, queue, &msg->msg, read_size, &bytes_read); if (err || bytes_read != read_size) { gk20a_err(dev_from_gk20a(g), "fail to read msg from queue %d", queue->id); *status = err; goto clean_up; } } err = pmu_queue_close(pmu, queue, true); if (err) { gk20a_err(dev_from_gk20a(g), "fail to close queue %d", queue->id); *status = err; return false; } return true; clean_up: err = pmu_queue_close(pmu, queue, false); if (err) gk20a_err(dev_from_gk20a(g), "fail to close queue %d", queue->id); return false; } static int pmu_response_handle(struct pmu_gk20a *pmu, struct pmu_msg *msg) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_sequence *seq; struct pmu_v *pv = &g->ops.pmu_ver; int ret = 0; gk20a_dbg_fn(""); seq = &pmu->seq[msg->hdr.seq_id]; if (seq->state != PMU_SEQ_STATE_USED && seq->state != PMU_SEQ_STATE_CANCELLED) { gk20a_err(dev_from_gk20a(g), "msg for an unknown sequence %d", seq->id); return -EINVAL; } if (msg->hdr.unit_id == PMU_UNIT_RC && msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { gk20a_err(dev_from_gk20a(g), "unhandled cmd: seq %d", seq->id); } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { if (seq->msg) { if (seq->msg->hdr.size >= msg->hdr.size) { memcpy(seq->msg, msg, msg->hdr.size); } else { gk20a_err(dev_from_gk20a(g), "sequence %d msg buffer too small", seq->id); } } if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) { pmu_copy_from_dmem(pmu, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq)), seq->out_payload, pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)), 0); } } else seq->callback = NULL; if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0) gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_in_a_ptr(seq))); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq))); if (seq->out_mem != NULL) { memset(pv->pmu_allocation_get_fb_addr(pmu, pv->get_pmu_seq_out_a_ptr(seq)), 0x0, pv->pmu_allocation_get_fb_size(pmu, pv->get_pmu_seq_out_a_ptr(seq))); gk20a_pmu_surface_free(g, seq->out_mem); if (seq->out_mem != seq->in_mem) kfree(seq->out_mem); else seq->out_mem = NULL; } if (seq->in_mem != NULL) { memset(pv->pmu_allocation_get_fb_addr(pmu, pv->get_pmu_seq_in_a_ptr(seq)), 0x0, pv->pmu_allocation_get_fb_size(pmu, pv->get_pmu_seq_in_a_ptr(seq))); gk20a_pmu_surface_free(g, seq->in_mem); kfree(seq->in_mem); seq->in_mem = NULL; } if (seq->callback) seq->callback(g, msg, seq->cb_params, seq->desc, ret); pmu_seq_release(pmu, seq); /* TBD: notify client waiting for available dmem */ gk20a_dbg_fn("done"); return 0; } int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, u32 *var, u32 val); static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { struct pmu_gk20a *pmu = param; gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE"); pmu->zbc_save_done = 1; } void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) return; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd); cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update; cmd.cmd.zbc.entry_mask = ZBC_MASK(entries); pmu->zbc_save_done = 0; gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_zbc_msg, pmu, &seq, ~0); pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->zbc_save_done, 1); if (!pmu->zbc_save_done) gk20a_err(dev_from_gk20a(g), "ZBC save timeout"); } static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_cmd cmd; struct pmu_payload payload; u32 seq; /* PERFMON Start */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = get_perfmon_id(pmu); cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, PMU_PERFMON_CMD_ID_START); pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, PMU_DOMAIN_GROUP_PSTATE); pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); pv->perfmon_start_set_flags(&cmd.cmd.perfmon, PMU_PERFMON_FLAG_ENABLE_INCREASE | PMU_PERFMON_FLAG_ENABLE_DECREASE | PMU_PERFMON_FLAG_CLEAR_PREV); memset(&payload, 0, sizeof(struct pmu_payload)); /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */ /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */ pv->set_perfmon_cntr_valid(pmu, true); payload.in.buf = pv->get_perfmon_cntr_ptr(pmu); payload.in.size = pv->get_perfmon_cntr_sz(pmu); payload.in.offset = pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START"); gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, NULL, NULL, &seq, ~0); return 0; } static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_cmd cmd; u32 seq; /* PERFMON Stop */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = get_perfmon_id(pmu); cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, NULL, NULL, &seq, ~0); return 0; } static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu, struct pmu_perfmon_msg *msg) { gk20a_dbg_fn(""); switch (msg->msg_type) { case PMU_PERFMON_MSG_ID_INCREASE_EVENT: gk20a_dbg_pmu("perfmon increase event: " "state_id %d, ground_id %d, pct %d", msg->gen.state_id, msg->gen.group_id, msg->gen.data); (pmu->perfmon_events_cnt)++; break; case PMU_PERFMON_MSG_ID_DECREASE_EVENT: gk20a_dbg_pmu("perfmon decrease event: " "state_id %d, ground_id %d, pct %d", msg->gen.state_id, msg->gen.group_id, msg->gen.data); (pmu->perfmon_events_cnt)++; break; case PMU_PERFMON_MSG_ID_INIT_EVENT: pmu->perfmon_ready = 1; gk20a_dbg_pmu("perfmon init event"); break; default: break; } /* restart sampling */ if (pmu->perfmon_sampling_enabled) return pmu_perfmon_start_sampling(pmu); return 0; } static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg) { int err = 0; gk20a_dbg_fn(""); switch (msg->hdr.unit_id) { case PMU_UNIT_PERFMON: case PMU_UNIT_PERFMON_T18X: err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon); break; default: break; } return err; } static int pmu_process_message(struct pmu_gk20a *pmu) { struct pmu_msg msg; int status; struct gk20a *g = gk20a_from_pmu(pmu); if (unlikely(!pmu->pmu_ready)) { pmu_process_init_msg(pmu, &msg); if (g->ops.pmu.init_wpr_region != NULL) g->ops.pmu.init_wpr_region(g); pmu_init_perfmon(pmu); return 0; } while (pmu_read_message(pmu, &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { gk20a_dbg_pmu("read msg hdr: " "unit_id = 0x%08x, size = 0x%08x, " "ctrl_flags = 0x%08x, seq_id = 0x%08x", msg.hdr.unit_id, msg.hdr.size, msg.hdr.ctrl_flags, msg.hdr.seq_id); msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) { pmu_handle_event(pmu, &msg); } else { pmu_response_handle(pmu, &msg); } } return 0; } int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, u32 *var, u32 val) { struct gk20a *g = gk20a_from_pmu(pmu); unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); unsigned long delay = GR_IDLE_CHECK_DEFAULT; u32 servicedpmuint; servicedpmuint = pwr_falcon_irqstat_halt_true_f() | pwr_falcon_irqstat_exterr_true_f() | pwr_falcon_irqstat_swgen0_true_f(); do { if (*var == val) return 0; if (gk20a_readl(g, pwr_falcon_irqstat_r()) & servicedpmuint) gk20a_pmu_isr(g); usleep_range(delay, delay * 2); delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); } while (time_before(jiffies, end_jiffies) || !tegra_platform_is_silicon()); return -ETIMEDOUT; } static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_pg_stats stats; pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx", stats.pg_entry_start_timestamp); gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx", stats.pg_exit_start_timestamp); gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx", stats.pg_ingating_start_timestamp); gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx", stats.pg_ungating_start_timestamp); gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x", stats.pg_avg_entry_time_us); gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x", stats.pg_avg_exit_time_us); gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x", stats.pg_ingating_cnt); gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x", stats.pg_ingating_time_us); gk20a_dbg_pmu("pg_ungating_count : 0x%08x", stats.pg_ungating_count); gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ", stats.pg_ungating_time_us); gk20a_dbg_pmu("pg_gating_cnt : 0x%08x", stats.pg_gating_cnt); gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x", stats.pg_gating_deny_cnt); /* Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset in .nm file, e.g. 0x1000066c. use 0x66c. u32 i, val[20]; pmu_copy_from_dmem(pmu, 0x66c, (u8 *)val, sizeof(val), 0); gk20a_dbg_pmu("elpg log begin"); for (i = 0; i < 20; i++) gk20a_dbg_pmu("0x%08x", val[i]); gk20a_dbg_pmu("elpg log end"); */ gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3))); gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x", gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0))); gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x", gk20a_readl(g, pwr_pmu_pg_intren_r(0))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(3))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(4))); gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x", gk20a_readl(g, pwr_pmu_idle_count_r(7))); /* TBD: script can't generate those registers correctly gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x", gk20a_readl(g, pwr_pmu_idle_status_r())); gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x", gk20a_readl(g, pwr_pmu_pg_ctrl_r())); */ } void pmu_dump_falcon_stats(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); unsigned int i; gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d", gk20a_readl(g, pwr_falcon_os_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x", gk20a_readl(g, pwr_falcon_cpuctl_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x", gk20a_readl(g, pwr_falcon_idlestate_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x", gk20a_readl(g, pwr_falcon_mailbox0_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x", gk20a_readl(g, pwr_falcon_mailbox1_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x", gk20a_readl(g, pwr_falcon_irqstat_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x", gk20a_readl(g, pwr_falcon_irqmode_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x", gk20a_readl(g, pwr_falcon_irqmask_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x", gk20a_readl(g, pwr_falcon_irqdest_r())); for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_mailbox_r(i))); for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_debug_r(i))); for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rstat_f() | pwr_pmu_falcon_icd_cmd_idx_f(i)); gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x", i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); } i = gk20a_readl(g, pwr_pmu_bar0_error_status_r()); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i); if (i != 0) { gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_addr_r())); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_data_r())); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_timeout_r())); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x", gk20a_readl(g, pwr_pmu_bar0_ctl_r())); } i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r()); gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i); i = gk20a_readl(g, pwr_falcon_exterrstat_r()); gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i); if (pwr_falcon_exterrstat_valid_v(i) == pwr_falcon_exterrstat_valid_true_v()) { gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x", gk20a_readl(g, pwr_falcon_exterraddr_r())); gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x", gk20a_readl(g, mc_enable_r())); } gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x", gk20a_readl(g, pwr_falcon_engctl_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x", gk20a_readl(g, pwr_falcon_curctx_r())); gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x", gk20a_readl(g, pwr_falcon_nxtctx_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); for (i = 0; i < 4; i++) { gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), pwr_pmu_falcon_icd_cmd_opc_rreg_f() | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP)); gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x", gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); } gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n", pmu->elpg_stat); /* PMU may crash due to FECS crash. Dump FECS status */ gk20a_fecs_dump_falcon_stats(g); printtrace(pmu); } void gk20a_pmu_isr(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_queue *queue; u32 intr, mask; bool recheck = false; gk20a_dbg_fn(""); mutex_lock(&pmu->isr_mutex); if (!pmu->isr_enabled) { mutex_unlock(&pmu->isr_mutex); return; } mask = gk20a_readl(g, pwr_falcon_irqmask_r()) & gk20a_readl(g, pwr_falcon_irqdest_r()); intr = gk20a_readl(g, pwr_falcon_irqstat_r()); gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr); intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask; if (!intr || pmu->pmu_state == PMU_STATE_OFF) { gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); mutex_unlock(&pmu->isr_mutex); return; } if (intr & pwr_falcon_irqstat_halt_true_f()) { gk20a_err(dev_from_gk20a(g), "pmu halt intr not implemented"); pmu_dump_falcon_stats(pmu); if (gk20a_readl(g, pwr_pmu_mailbox_r (PMU_MODE_MISMATCH_STATUS_MAILBOX_R)) == PMU_MODE_MISMATCH_STATUS_VAL) if (g->ops.pmu.dump_secure_fuses) g->ops.pmu.dump_secure_fuses(g); } if (intr & pwr_falcon_irqstat_exterr_true_f()) { gk20a_err(dev_from_gk20a(g), "pmu exterr intr not implemented. Clearing interrupt."); pmu_dump_falcon_stats(pmu); gk20a_writel(g, pwr_falcon_exterrstat_r(), gk20a_readl(g, pwr_falcon_exterrstat_r()) & ~pwr_falcon_exterrstat_valid_m()); } if (intr & pwr_falcon_irqstat_swgen0_true_f()) { pmu_process_message(pmu); recheck = true; } gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); if (recheck) { queue = &pmu->queue[PMU_MESSAGE_QUEUE]; if (!pmu_queue_is_empty(pmu, queue)) gk20a_writel(g, pwr_falcon_irqsset_r(), pwr_falcon_irqsset_swgen0_set_f()); } mutex_unlock(&pmu->isr_mutex); } static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, struct pmu_msg *msg, struct pmu_payload *payload, u32 queue_id) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_queue *queue; u32 in_size, out_size; if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) goto invalid_cmd; queue = &pmu->queue[queue_id]; if (cmd->hdr.size < PMU_CMD_HDR_SIZE) goto invalid_cmd; if (cmd->hdr.size > (queue->size >> 1)) goto invalid_cmd; if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) goto invalid_cmd; if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) goto invalid_cmd; if (payload == NULL) return true; if (payload->in.buf == NULL && payload->out.buf == NULL) goto invalid_cmd; if ((payload->in.buf != NULL && payload->in.size == 0) || (payload->out.buf != NULL && payload->out.size == 0)) goto invalid_cmd; in_size = PMU_CMD_HDR_SIZE; if (payload->in.buf) { in_size += payload->in.offset; in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); } out_size = PMU_CMD_HDR_SIZE; if (payload->out.buf) { out_size += payload->out.offset; out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); } if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) goto invalid_cmd; if ((payload->in.offset != 0 && payload->in.buf == NULL) || (payload->out.offset != 0 && payload->out.buf == NULL)) goto invalid_cmd; return true; invalid_cmd: gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n" "queue_id=%d,\n" "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" "payload in=%p, in_size=%d, in_offset=%d,\n" "payload out=%p, out_size=%d, out_offset=%d", queue_id, cmd->hdr.size, cmd->hdr.unit_id, msg, msg?msg->hdr.unit_id:~0, &payload->in, payload->in.size, payload->in.offset, &payload->out, payload->out.size, payload->out.offset); return false; } static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, u32 queue_id, unsigned long timeout) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_queue *queue; unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); int err; gk20a_dbg_fn(""); queue = &pmu->queue[queue_id]; do { err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); if (err == -EAGAIN && time_before(jiffies, end_jiffies)) usleep_range(1000, 2000); else break; } while (1); if (err) goto clean_up; pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); err = pmu_queue_close(pmu, queue, true); clean_up: if (err) gk20a_err(dev_from_gk20a(g), "fail to write cmd to queue %d", queue_id); else gk20a_dbg_fn("done"); return err; } void gk20a_pmu_surface_describe(struct gk20a *g, struct mem_desc *mem, struct flcn_mem_desc_v0 *fb) { fb->address.lo = u64_lo32(mem->gpu_va); fb->address.hi = u64_hi32(mem->gpu_va); fb->params = ((u32)mem->size & 0xFFFFFF); fb->params |= (GK20A_PMU_DMAIDX_VIRT << 24); } int gk20a_pmu_vidmem_surface_alloc(struct gk20a *g, struct mem_desc *mem, u32 size) { struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; int err; err = gk20a_gmmu_alloc_map_vid(vm, size, mem); if (err) { gk20a_err(g->dev, "memory allocation failed"); return -ENOMEM; } return 0; } int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct mem_desc *mem, u32 size) { struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; int err; err = gk20a_gmmu_alloc_map_sys(vm, size, mem); if (err) { gk20a_err(g->dev, "failed to allocate memory\n"); return -ENOMEM; } return 0; } void gk20a_pmu_surface_free(struct gk20a *g, struct mem_desc *mem) { gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, mem); memset(mem, 0, sizeof(struct mem_desc)); } int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg, struct pmu_payload *payload, u32 queue_id, pmu_callback callback, void* cb_param, u32 *seq_desc, unsigned long timeout) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_sequence *seq; void *in = NULL, *out = NULL; int err; gk20a_dbg_fn(""); if ((!cmd) || (!seq_desc) || (!pmu->pmu_ready)) { if (!cmd) gk20a_warn(dev_from_gk20a(g), "%s(): PMU cmd buffer is NULL", __func__); else if (!seq_desc) gk20a_warn(dev_from_gk20a(g), "%s(): Seq descriptor is NULL", __func__); else gk20a_warn(dev_from_gk20a(g), "%s(): PMU is not ready", __func__); WARN_ON(1); return -EINVAL; } if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) return -EINVAL; err = pmu_seq_acquire(pmu, &seq); if (err) return err; cmd->hdr.seq_id = seq->id; cmd->hdr.ctrl_flags = 0; cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; seq->callback = callback; seq->cb_params = cb_param; seq->msg = msg; seq->out_payload = NULL; seq->desc = pmu->next_seq_desc++; if (payload) seq->out_payload = payload->out.buf; *seq_desc = seq->desc; if (payload && payload->in.offset != 0) { pv->set_pmu_allocation_ptr(pmu, &in, ((u8 *)&cmd->cmd + payload->in.offset)); if (payload->in.buf != payload->out.buf) pv->pmu_allocation_set_dmem_size(pmu, in, (u16)payload->in.size); else pv->pmu_allocation_set_dmem_size(pmu, in, (u16)max(payload->in.size, payload->out.size)); *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = gk20a_alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_size(pmu, in)); if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) goto clean_up; if (payload->in.fb_size != 0x0) { seq->in_mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL); if (!seq->in_mem) { err = -ENOMEM; goto clean_up; } gk20a_pmu_vidmem_surface_alloc(g, seq->in_mem, payload->in.fb_size); gk20a_pmu_surface_describe(g, seq->in_mem, (struct flcn_mem_desc_v0 *) pv->pmu_allocation_get_fb_addr(pmu, in)); gk20a_mem_wr_n(g, seq->in_mem, 0, payload->in.buf, payload->in.fb_size); } else { pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, in)), payload->in.buf, payload->in.size, 0); } pv->pmu_allocation_set_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq), pv->pmu_allocation_get_dmem_size(pmu, in)); pv->pmu_allocation_set_dmem_offset(pmu, pv->get_pmu_seq_in_a_ptr(seq), pv->pmu_allocation_get_dmem_offset(pmu, in)); } if (payload && payload->out.offset != 0) { pv->set_pmu_allocation_ptr(pmu, &out, ((u8 *)&cmd->cmd + payload->out.offset)); pv->pmu_allocation_set_dmem_size(pmu, out, (u16)payload->out.size); if (payload->in.buf != payload->out.buf) { *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = gk20a_alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_size(pmu, out)); if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, out))) goto clean_up; if (payload->out.fb_size != 0x0) { seq->out_mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL); if (!seq->out_mem) { err = -ENOMEM; goto clean_up; } gk20a_pmu_vidmem_surface_alloc(g, seq->out_mem, payload->out.fb_size); gk20a_pmu_surface_describe(g, seq->out_mem, (struct flcn_mem_desc_v0 *) pv->pmu_allocation_get_fb_addr(pmu, out)); } } else { BUG_ON(in == NULL); seq->out_mem = seq->in_mem; pv->pmu_allocation_set_dmem_offset(pmu, out, pv->pmu_allocation_get_dmem_offset(pmu, in)); } pv->pmu_allocation_set_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq), pv->pmu_allocation_get_dmem_size(pmu, out)); pv->pmu_allocation_set_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq), pv->pmu_allocation_get_dmem_offset(pmu, out)); } seq->state = PMU_SEQ_STATE_USED; err = pmu_write_cmd(pmu, cmd, queue_id, timeout); if (err) seq->state = PMU_SEQ_STATE_PENDING; gk20a_dbg_fn("done"); return 0; clean_up: gk20a_dbg_fn("fail"); if (in) gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, in)); if (out) gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, out)); pmu_seq_release(pmu, seq); return err; } static int gk20a_pmu_enable_elpg_locked(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq, status; u32 gr_engine_id; gk20a_dbg_fn(""); gr_engine_id = gk20a_fifo_get_gr_engine_id(g); memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW; /* no need to wait ack for ELPG enable but set pending to sync with follow up ELPG disable */ pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW"); status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); BUG_ON(status != 0); gk20a_dbg_fn("done"); return 0; } int gk20a_pmu_enable_elpg(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct gr_gk20a *gr = &g->gr; int ret = 0; gk20a_dbg_fn(""); if (!support_gk20a_pmu(g->dev)) return ret; mutex_lock(&pmu->elpg_mutex); pmu->elpg_refcnt++; if (pmu->elpg_refcnt <= 0) goto exit_unlock; /* something is not right if we end up in following code path */ if (unlikely(pmu->elpg_refcnt > 1)) { gk20a_warn(dev_from_gk20a(g), "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", __func__, pmu->elpg_refcnt); WARN_ON(1); } /* do NOT enable elpg until golden ctx is created, which is related with the ctx that ELPG save and restore. */ if (unlikely(!gr->ctx_vars.golden_image_initialized)) goto exit_unlock; /* return if ELPG is already on or on_pending or off_on_pending */ if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) goto exit_unlock; ret = gk20a_pmu_enable_elpg_locked(g); exit_unlock: mutex_unlock(&pmu->elpg_mutex); gk20a_dbg_fn("done"); return ret; } int gk20a_pmu_disable_elpg(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; int ret = 0; u32 gr_engine_id; gk20a_dbg_fn(""); gr_engine_id = gk20a_fifo_get_gr_engine_id(g); if (!support_gk20a_pmu(g->dev)) return ret; mutex_lock(&pmu->elpg_mutex); pmu->elpg_refcnt--; if (pmu->elpg_refcnt > 0) { gk20a_warn(dev_from_gk20a(g), "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", __func__, pmu->elpg_refcnt); WARN_ON(1); ret = 0; goto exit_unlock; } /* cancel off_on_pending and return */ if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { pmu->elpg_stat = PMU_ELPG_STAT_OFF; ret = 0; goto exit_reschedule; } /* wait if on_pending */ else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) { pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->elpg_stat, PMU_ELPG_STAT_ON); if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { gk20a_err(dev_from_gk20a(g), "ELPG_ALLOW_ACK failed, elpg_stat=%d", pmu->elpg_stat); pmu_dump_elpg_stats(pmu); pmu_dump_falcon_stats(pmu); ret = -EBUSY; goto exit_unlock; } } /* return if ELPG is already off */ else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { ret = 0; goto exit_reschedule; } memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, pmu, &seq, ~0); pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), &pmu->elpg_stat, PMU_ELPG_STAT_OFF); if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) { gk20a_err(dev_from_gk20a(g), "ELPG_DISALLOW_ACK failed"); pmu_dump_elpg_stats(pmu); pmu_dump_falcon_stats(pmu); ret = -EBUSY; goto exit_unlock; } exit_reschedule: exit_unlock: mutex_unlock(&pmu->elpg_mutex); gk20a_dbg_fn("done"); return ret; } int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) { struct pmu_gk20a *pmu = &g->pmu; int err; gk20a_dbg_fn(""); if (enable) err = pmu_perfmon_start_sampling(pmu); else err = pmu_perfmon_stop_sampling(pmu); return err; } int gk20a_pmu_destroy(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; u32 elpg_ingating_time, elpg_ungating_time, gating_cnt; gk20a_dbg_fn(""); if (!support_gk20a_pmu(g->dev)) return 0; /* make sure the pending operations are finished before we continue */ cancel_work_sync(&pmu->pg_init); gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time, &elpg_ungating_time, &gating_cnt); gk20a_pmu_disable_elpg(g); pmu->initialized = false; /* update the s/w ELPG residency counters */ g->pg_ingating_time_us += (u64)elpg_ingating_time; g->pg_ungating_time_us += (u64)elpg_ungating_time; g->pg_gating_cnt += gating_cnt; mutex_lock(&pmu->isr_mutex); pmu->isr_enabled = false; mutex_unlock(&pmu->isr_mutex); pmu->pmu_state = PMU_STATE_OFF; pmu->pmu_ready = false; pmu->perfmon_ready = false; pmu->zbc_ready = false; g->ops.pmu.lspmuwprinitdone = false; g->ops.pmu.fecsbootstrapdone = false; gk20a_dbg_fn("done"); return 0; } int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) { *load = g->pmu.load_shadow; return 0; } int gk20a_pmu_load_update(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; u16 _load = 0; if (!pmu->perfmon_ready) { pmu->load_shadow = 0; return 0; } pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); pmu->load_shadow = _load / 10; pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); return 0; } void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles) { if (!g->power_on || gk20a_busy(g->dev)) { *busy_cycles = 0; *total_cycles = 0; return; } *busy_cycles = pwr_pmu_idle_count_value_v( gk20a_readl(g, pwr_pmu_idle_count_r(1))); rmb(); *total_cycles = pwr_pmu_idle_count_value_v( gk20a_readl(g, pwr_pmu_idle_count_r(2))); gk20a_idle(g->dev); } void gk20a_pmu_reset_load_counters(struct gk20a *g) { u32 reg_val = pwr_pmu_idle_count_reset_f(1); if (!g->power_on || gk20a_busy(g->dev)) return; gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val); wmb(); gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val); gk20a_idle(g->dev); } void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_pg_stats stats; pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); *ingating_time = stats.pg_ingating_time_us; *ungating_time = stats.pg_ungating_time_us; *gating_cnt = stats.pg_gating_cnt; } static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt) { struct pmu_gk20a *pmu = &g->pmu; if (!pmu->initialized) { *ingating_time = 0; *ungating_time = 0; *gating_cnt = 0; return 0; } g->ops.pmu.pmu_elpg_statistics(g, ingating_time, ungating_time, gating_cnt); return 0; } /* Send an Adaptive Power (AP) related command to PMU */ int gk20a_pmu_ap_send_command(struct gk20a *g, union pmu_ap_cmd *p_ap_cmd, bool b_block) { struct pmu_gk20a *pmu = &g->pmu; /* FIXME: where is the PG structure defined?? */ u32 status = 0; struct pmu_cmd cmd; u32 seq; pmu_callback p_callback = NULL; memset(&cmd, 0, sizeof(struct pmu_cmd)); /* Copy common members */ cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd); cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP; cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id; /* Copy other members of command */ switch (p_ap_cmd->cmn.cmd_id) { case PMU_AP_CMD_ID_INIT: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT"); cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us = p_ap_cmd->init.pg_sampling_period_us; break; case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL"); cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id = p_ap_cmd->init_and_enable_ctrl.ctrl_id; memcpy( (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params), (void *)&(p_ap_cmd->init_and_enable_ctrl.params), sizeof(struct pmu_ap_ctrl_init_params)); p_callback = ap_callback_init_and_enable_ctrl; break; case PMU_AP_CMD_ID_ENABLE_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL"); cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id = p_ap_cmd->enable_ctrl.ctrl_id; break; case PMU_AP_CMD_ID_DISABLE_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL"); cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id = p_ap_cmd->disable_ctrl.ctrl_id; break; case PMU_AP_CMD_ID_KICK_CTRL: gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL"); cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id = p_ap_cmd->kick_ctrl.ctrl_id; cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count = p_ap_cmd->kick_ctrl.skip_count; break; default: gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n", __func__, p_ap_cmd->cmn.cmd_id); return 0x2f; } status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, p_callback, pmu, &seq, ~0); if (status) { gk20a_dbg_pmu( "%s: Unable to submit Adaptive Power Command %d\n", __func__, p_ap_cmd->cmn.cmd_id); goto err_return; } /* TODO: Implement blocking calls (b_block) */ err_return: return status; } static void ap_callback_init_and_enable_ctrl( struct gk20a *g, struct pmu_msg *msg, void *param, u32 seq_desc, u32 status) { /* Define p_ap (i.e pointer to pmu_ap structure) */ WARN_ON(!msg); if (!status) { switch (msg->msg.pg.ap_msg.cmn.msg_id) { case PMU_AP_MSG_ID_INIT_ACK: gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT"); break; default: gk20a_dbg_pmu( "%s: Invalid Adaptive Power Message: %x\n", __func__, msg->msg.pg.ap_msg.cmn.msg_id); break; } } } int gk20a_aelpg_init(struct gk20a *g) { int status = 0; /* Remove reliance on app_ctrl field. */ union pmu_ap_cmd ap_cmd; /* TODO: Check for elpg being ready? */ ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT; ap_cmd.init.pg_sampling_period_us = g->pmu.aelpg_param[0]; status = gk20a_pmu_ap_send_command(g, &ap_cmd, false); return status; } int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) { int status = 0; union pmu_ap_cmd ap_cmd; /* TODO: Probably check if ELPG is ready? */ ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL; ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id; ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us = g->pmu.aelpg_param[1]; ap_cmd.init_and_enable_ctrl.params.min_target_saving_us = g->pmu.aelpg_param[2]; ap_cmd.init_and_enable_ctrl.params.power_break_even_us = g->pmu.aelpg_param[3]; ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max = g->pmu.aelpg_param[4]; switch (ctrl_id) { case PMU_AP_CTRL_ID_GRAPHICS: break; default: break; } status = gk20a_pmu_ap_send_command(g, &ap_cmd, true); return status; } #ifdef CONFIG_DEBUG_FS static int elpg_residency_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; u32 ingating_time = 0; u32 ungating_time = 0; u32 gating_cnt; u64 total_ingating, total_ungating, residency, divisor, dividend; int err; /* Don't unnecessarily power on the device */ if (g->power_on) { err = gk20a_busy(g->dev); if (err) return err; gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, &ungating_time, &gating_cnt); gk20a_idle(g->dev); } total_ingating = g->pg_ingating_time_us + (u64)ingating_time; total_ungating = g->pg_ungating_time_us + (u64)ungating_time; divisor = total_ingating + total_ungating; /* We compute the residency on a scale of 1000 */ dividend = total_ingating * 1000; if (divisor) residency = div64_u64(dividend, divisor); else residency = 0; seq_printf(s, "Time in ELPG: %llu us\n" "Time out of ELPG: %llu us\n" "ELPG residency ratio: %llu\n", total_ingating, total_ungating, residency); return 0; } static int elpg_residency_open(struct inode *inode, struct file *file) { return single_open(file, elpg_residency_show, inode->i_private); } static const struct file_operations elpg_residency_fops = { .open = elpg_residency_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int elpg_transitions_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; u32 ingating_time, ungating_time, total_gating_cnt; u32 gating_cnt = 0; int err; if (g->power_on) { err = gk20a_busy(g->dev); if (err) return err; gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, &ungating_time, &gating_cnt); gk20a_idle(g->dev); } total_gating_cnt = g->pg_gating_cnt + gating_cnt; seq_printf(s, "%u\n", total_gating_cnt); return 0; } static int elpg_transitions_open(struct inode *inode, struct file *file) { return single_open(file, elpg_transitions_show, inode->i_private); } static const struct file_operations elpg_transitions_fops = { .open = elpg_transitions_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int falc_trace_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; struct pmu_gk20a *pmu = &g->pmu; u32 i = 0, j = 0, k, l, m; char *trace = pmu->trace_buf.cpu_va; char part_str[40]; u32 *trace1 = pmu->trace_buf.cpu_va; for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { for (j = 0; j < 0x40; j++) if (trace1[(i / 4) + j]) break; if (j == 0x40) return 0; seq_printf(s, "Index %x: ", trace1[(i / 4)]); l = 0; m = 0; while (find_hex_in_string((trace+i+20+m), g, &k)) { if (k >= 40) break; strncpy(part_str, (trace+i+20+m), k); part_str[k] = 0; seq_printf(s, "%s0x%x", part_str, trace1[(i / 4) + 1 + l]); l++; m += k + 2; } seq_printf(s, "%s", (trace+i+20+m)); } return 0; } static int falc_trace_open(struct inode *inode, struct file *file) { return single_open(file, falc_trace_show, inode->i_private); } static const struct file_operations falc_trace_fops = { .open = falc_trace_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int perfmon_events_enable_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); return 0; } static int perfmon_events_enable_open(struct inode *inode, struct file *file) { return single_open(file, perfmon_events_enable_show, inode->i_private); } static ssize_t perfmon_events_enable_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct seq_file *s = file->private_data; struct gk20a *g = s->private; unsigned long val = 0; char buf[40]; int buf_size; int err; memset(buf, 0, sizeof(buf)); buf_size = min(count, (sizeof(buf)-1)); if (copy_from_user(buf, userbuf, buf_size)) return -EFAULT; if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; /* Don't turn on gk20a unnecessarily */ if (g->power_on) { err = gk20a_busy(g->dev); if (err) return err; if (val && !g->pmu.perfmon_sampling_enabled) { g->pmu.perfmon_sampling_enabled = true; pmu_perfmon_start_sampling(&(g->pmu)); } else if (!val && g->pmu.perfmon_sampling_enabled) { g->pmu.perfmon_sampling_enabled = false; pmu_perfmon_stop_sampling(&(g->pmu)); } gk20a_idle(g->dev); } else { g->pmu.perfmon_sampling_enabled = val ? true : false; } return count; } static const struct file_operations perfmon_events_enable_fops = { .open = perfmon_events_enable_open, .read = seq_read, .write = perfmon_events_enable_write, .llseek = seq_lseek, .release = single_release, }; static int perfmon_events_count_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); return 0; } static int perfmon_events_count_open(struct inode *inode, struct file *file) { return single_open(file, perfmon_events_count_show, inode->i_private); } static const struct file_operations perfmon_events_count_fops = { .open = perfmon_events_count_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int security_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; seq_printf(s, "%d\n", g->pmu.pmu_mode); return 0; } static int security_open(struct inode *inode, struct file *file) { return single_open(file, security_show, inode->i_private); } static const struct file_operations security_fops = { .open = security_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; int gk20a_pmu_debugfs_init(struct device *dev) { struct dentry *d; struct gk20a_platform *platform = dev_get_drvdata(dev); struct gk20a *g = get_gk20a(dev); d = debugfs_create_file( "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, &elpg_residency_fops); if (!d) goto err_out; d = debugfs_create_file( "elpg_transitions", S_IRUGO, platform->debugfs, g, &elpg_transitions_fops); if (!d) goto err_out; d = debugfs_create_file( "falc_trace", S_IRUGO, platform->debugfs, g, &falc_trace_fops); if (!d) goto err_out; d = debugfs_create_file( "perfmon_events_enable", S_IRUGO, platform->debugfs, g, &perfmon_events_enable_fops); if (!d) goto err_out; d = debugfs_create_file( "perfmon_events_count", S_IRUGO, platform->debugfs, g, &perfmon_events_count_fops); if (!d) goto err_out; d = debugfs_create_file( "pmu_security", S_IRUGO, platform->debugfs, g, &security_fops); if (!d) goto err_out; return 0; err_out: pr_err("%s: Failed to make debugfs node\n", __func__); debugfs_remove_recursive(platform->debugfs); return -ENOMEM; } #endif