summaryrefslogblamecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
blob: b784b9a638a55ee6ff0aae842d071dd0215b6e48 (plain) (tree)
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149


























                                                                               
                          

                  
                     











                                                                         
                                                   




























































































































































































































































































































































































































































                                                                               
                           


































































































































                                                                    

                                                               













































                                                                      

































































































































































































































































































































































































































































































































































































                                                                               

                                 
 

                                                      
 






                                                                  













                                                 


                         
                                                                          












                                                  


                         

                                                                          






















                                                                             















































































































































































                                                                      

                                                    


                                            












                                                   






























                                                      


                                                     




































                                                                             
                                               
 





















                                                                          



























































                                                                     

                             




























































                                                                                 
                                                                                       










                                                                              
                                     






































                                                                            

                                                      
 
                                                  


                                                                              




















                                               

 
                                             






                                             
                





                                    
                 
                                     






                                                             
                                 




                                                                    
                                       



























                                                                     






                                                                                
                                                                            

















                                                                                 
                                                                                          

                                                                      

                                                  
 














                                                     












                                                                                 
                                                                                         

                                                                      

                                               
 


                                                     









                                                                              
                                           
 

                                                      
                                      
 









                                                                        















































                                                                        



                                                                



                                                                   


                                                               

























                                                                            



































                                                                             
                                                       












                                                                         
                                                             




                                                                      
                                                                                      






                                                                             
                                                           



                                                                      


                                            











                                                  
                    
















































                                                                         


                                                                            

































                                                                                
                                                          































































                                                                               




                                                                          



































































































































































                                                                               
                                                


                               
                                                     















                                                                      
                                                   







                                                                      





                                                            












                                                                               
                                                     

                                                  














                                                                     
                                                           

















                                                                              
                                                          







                                                                      






                                                                             
                                            




                                                                             
                                            









                                                    
                                          






                                                                       
                    










































































































































































































                                                                              





















































                                                                         

                                                       
















































































































































































































































































































                                                                                
                                                        

















                                                                               






















                                                                          



                                              



                             
                                           







                                        
















































                                                                             
                                                           














                                                                      





                                       

























                                                                              




                                                                   
                                  







                                                          
                                       


                                   


































































































                                                                                
                                                             





                                                              
                                                                             










                                                                         
                                                                    




                                                       
                                                                     




                                                        
                                                                  





































                                                                               
                                                                  






































































































































                                                                                














































































                                                                               

















                                                                        










                                                                             







                                                              
/*
 * drivers/video/tegra/host/gk20a/pmu_gk20a.c
 *
 * GK20A PMU (aka. gPMU outside gk20a context)
 *
 * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <linux/delay.h>	/* for mdelay */
#include <linux/firmware.h>
#include <linux/clk.h>
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/uaccess.h>

#include "gk20a.h"
#include "gr_gk20a.h"
#include "hw_mc_gk20a.h"
#include "hw_pwr_gk20a.h"
#include "hw_top_gk20a.h"

#define GK20A_PMU_UCODE_IMAGE	"gpmu_ucode.bin"

#define gk20a_dbg_pmu(fmt, arg...) \
	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)

static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
static void pmu_setup_hw(struct work_struct *work);
static void ap_callback_init_and_enable_ctrl(
		struct gk20a *g, struct pmu_msg *msg,
		void *param, u32 seq_desc, u32 status);
static int gk20a_pmu_ap_send_command(struct gk20a *g,
			union pmu_ap_cmd *p_ap_cmd, bool b_block);

static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
{
	return sizeof(struct pmu_cmdline_args_v0);
}

static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
{
	return sizeof(struct pmu_cmdline_args_v1);
}

static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
{
	pmu->args_v1.cpu_freq_hz = freq;
}

static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
{
	pmu->args_v0.cpu_freq_hz = freq;
}

static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
{
	return (void *)(&pmu->args_v1);
}

static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
{
	return (void *)(&pmu->args_v0);
}

static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
{
	return sizeof(struct pmu_allocation_v1);
}

static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
{
	return sizeof(struct pmu_allocation_v0);
}

static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
	void **pmu_alloc_ptr, void *assign_ptr)
{
	struct pmu_allocation_v1 **pmu_a_ptr =
		(struct pmu_allocation_v1 **)pmu_alloc_ptr;
	*pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
}

static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
	void **pmu_alloc_ptr, void *assign_ptr)
{
	struct pmu_allocation_v0 **pmu_a_ptr =
		(struct pmu_allocation_v0 **)pmu_alloc_ptr;
	*pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
}

static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr, u16 size)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;
	pmu_a_ptr->alloc.dmem.size = size;
}

static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr, u16 size)
{
	struct pmu_allocation_v0 *pmu_a_ptr =
		(struct pmu_allocation_v0 *)pmu_alloc_ptr;
	pmu_a_ptr->alloc.dmem.size = size;
}

static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;
	return pmu_a_ptr->alloc.dmem.size;
}

static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v0 *pmu_a_ptr =
		(struct pmu_allocation_v0 *)pmu_alloc_ptr;
	return pmu_a_ptr->alloc.dmem.size;
}

static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;
	return pmu_a_ptr->alloc.dmem.offset;
}

static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v0 *pmu_a_ptr =
		(struct pmu_allocation_v0 *)pmu_alloc_ptr;
	return pmu_a_ptr->alloc.dmem.offset;
}

static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;
	return &pmu_a_ptr->alloc.dmem.offset;
}

static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr)
{
	struct pmu_allocation_v0 *pmu_a_ptr =
		(struct pmu_allocation_v0 *)pmu_alloc_ptr;
	return &pmu_a_ptr->alloc.dmem.offset;
}

static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr, u32 offset)
{
	struct pmu_allocation_v1 *pmu_a_ptr =
		(struct pmu_allocation_v1 *)pmu_alloc_ptr;
	pmu_a_ptr->alloc.dmem.offset = offset;
}

static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
	void *pmu_alloc_ptr, u32 offset)
{
	struct pmu_allocation_v0 *pmu_a_ptr =
		(struct pmu_allocation_v0 *)pmu_alloc_ptr;
	pmu_a_ptr->alloc.dmem.offset = offset;
}

static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
{
	return (void *)(&(init->pmu_init_v1));
}

static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v1 *init =
		(struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
	return init->sw_managed_area_offset;
}

static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v1 *init =
		(struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
	return init->sw_managed_area_size;
}

static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
{
	return (void *)(&(init->pmu_init_v0));
}

static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v0 *init =
		(struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
	return init->sw_managed_area_offset;
}

static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
{
	struct pmu_init_msg_pmu_v0 *init =
		(struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
	return init->sw_managed_area_size;
}

static u32 get_pmu_perfmon_cmd_start_size_v1(void)
{
	return sizeof(struct pmu_perfmon_cmd_start_v1);
}

static u32 get_pmu_perfmon_cmd_start_size_v0(void)
{
	return sizeof(struct pmu_perfmon_cmd_start_v0);
}

static int get_perfmon_cmd_start_offsetofvar_v1(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_start_v1,
		counter_alloc);
	default:
		return -EINVAL;
		break;
	}
	return 0;
}

static int get_perfmon_cmd_start_offsetofvar_v0(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_start_v0,
		counter_alloc);
	default:
		return -EINVAL;
		break;
	}
	return 0;
}

static u32 get_pmu_perfmon_cmd_init_size_v1(void)
{
	return sizeof(struct pmu_perfmon_cmd_init_v1);
}

static u32 get_pmu_perfmon_cmd_init_size_v0(void)
{
	return sizeof(struct pmu_perfmon_cmd_init_v0);
}

static int get_perfmon_cmd_init_offsetofvar_v1(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_init_v1,
		counter_alloc);
	default:
		return -EINVAL;
		break;
	}
	return 0;
}

static int get_perfmon_cmd_init_offsetofvar_v0(
	enum pmu_perfmon_cmd_start_fields field)
{
	switch (field) {
	case COUNTER_ALLOC:
		return offsetof(struct pmu_perfmon_cmd_init_v0,
		counter_alloc);
	default:
		return -EINVAL;
		break;
	}
	return 0;
}

static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
	start->cmd_type = value;
}

static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
	start->cmd_type = value;
}

static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
	start->group_id = value;
}

static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
	start->group_id = value;
}

static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
	start->state_id = value;
}

static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
	start->state_id = value;
}

static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
	start->flags = value;
}

static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
{
	struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
	start->flags = value;
}

static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
{
	struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
	return start->flags;
}

static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
{
	struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
	return start->flags;
}

static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
	u16 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
	init->sample_buffer = value;
}

static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
	u16 value)
{
	struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
	init->sample_buffer = value;
}

static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
	init->to_decrease_count = value;
}

static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
	init->to_decrease_count = value;
}

static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
	init->base_counter_id = value;
}

static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
	init->base_counter_id = value;
}

static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
	u32 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
	init->sample_period_us = value;
}

static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
	u32 value)
{
	struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
	init->sample_period_us = value;
}

static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
	init->num_counters = value;
}

static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
	init->num_counters = value;
}

static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
	init->samples_in_moving_avg = value;
}

static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
	u8 value)
{
	struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
	init->samples_in_moving_avg = value;
}

static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
	u32 id, void *pmu_init_msg)
{
	struct pmu_init_msg_pmu_v0 *init =
		(struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
	queue->index    = init->queue_info[id].index;
	queue->offset   = init->queue_info[id].offset;
	queue->size = init->queue_info[id].size;
}

static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
	u32 id, void *pmu_init_msg)
{
	struct pmu_init_msg_pmu_v1 *init =
		(struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
	queue->index    = init->queue_info[id].index;
	queue->offset   = init->queue_info[id].offset;
	queue->size = init->queue_info[id].size;
}

static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
{
	return (void *)(&seq->in_v1);
}

static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
{
	return (void *)(&seq->in_v0);
}

static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
{
	return (void *)(&seq->out_v1);
}

static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
{
	return (void *)(&seq->out_v0);
}

static int gk20a_init_pmu(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	switch (pmu->desc->app_version) {
	case APP_VERSION_1:
	case APP_VERSION_2:
		g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
		g->ops.pmu_ver.get_pmu_cmdline_args_size =
			pmu_cmdline_size_v1;
		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
			set_pmu_cmdline_args_cpufreq_v1;
		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
			get_pmu_cmdline_args_ptr_v1;
		g->ops.pmu_ver.get_pmu_allocation_struct_size =
			get_pmu_allocation_size_v1;
		g->ops.pmu_ver.set_pmu_allocation_ptr =
			set_pmu_allocation_ptr_v1;
		g->ops.pmu_ver.pmu_allocation_set_dmem_size =
			pmu_allocation_set_dmem_size_v1;
		g->ops.pmu_ver.pmu_allocation_get_dmem_size =
			pmu_allocation_get_dmem_size_v1;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
			pmu_allocation_get_dmem_offset_v1;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
			pmu_allocation_get_dmem_offset_addr_v1;
		g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
			pmu_allocation_set_dmem_offset_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
			get_pmu_init_msg_pmu_queue_params_v1;
		g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
			get_pmu_msg_pmu_init_msg_ptr_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
			get_pmu_init_msg_pmu_sw_mg_off_v1;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
			get_pmu_init_msg_pmu_sw_mg_size_v1;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
			get_pmu_perfmon_cmd_start_size_v1;
		g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
			get_perfmon_cmd_start_offsetofvar_v1;
		g->ops.pmu_ver.perfmon_start_set_cmd_type =
			perfmon_start_set_cmd_type_v1;
		g->ops.pmu_ver.perfmon_start_set_group_id =
			perfmon_start_set_group_id_v1;
		g->ops.pmu_ver.perfmon_start_set_state_id =
			perfmon_start_set_state_id_v1;
		g->ops.pmu_ver.perfmon_start_set_flags =
			perfmon_start_set_flags_v1;
		g->ops.pmu_ver.perfmon_start_get_flags =
			perfmon_start_get_flags_v1;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
			get_pmu_perfmon_cmd_init_size_v1;
		g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
			get_perfmon_cmd_init_offsetofvar_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
			perfmon_cmd_init_set_sample_buffer_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
			perfmon_cmd_init_set_dec_cnt_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
			perfmon_cmd_init_set_base_cnt_id_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
			perfmon_cmd_init_set_samp_period_us_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
			perfmon_cmd_init_set_num_cnt_v1;
		g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
			perfmon_cmd_init_set_mov_avg_v1;
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
			get_pmu_sequence_in_alloc_ptr_v1;
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
			get_pmu_sequence_out_alloc_ptr_v1;
		break;
	case APP_VERSION_0:
		g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
		g->ops.pmu_ver.get_pmu_cmdline_args_size =
			pmu_cmdline_size_v0;
		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
			set_pmu_cmdline_args_cpufreq_v0;
		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
			get_pmu_cmdline_args_ptr_v0;
		g->ops.pmu_ver.get_pmu_allocation_struct_size =
			get_pmu_allocation_size_v0;
		g->ops.pmu_ver.set_pmu_allocation_ptr =
			set_pmu_allocation_ptr_v0;
		g->ops.pmu_ver.pmu_allocation_set_dmem_size =
			pmu_allocation_set_dmem_size_v0;
		g->ops.pmu_ver.pmu_allocation_get_dmem_size =
			pmu_allocation_get_dmem_size_v0;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
			pmu_allocation_get_dmem_offset_v0;
		g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
			pmu_allocation_get_dmem_offset_addr_v0;
		g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
			pmu_allocation_set_dmem_offset_v0;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
			get_pmu_init_msg_pmu_queue_params_v0;
		g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
			get_pmu_msg_pmu_init_msg_ptr_v0;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
			get_pmu_init_msg_pmu_sw_mg_off_v0;
		g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
			get_pmu_init_msg_pmu_sw_mg_size_v0;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
			get_pmu_perfmon_cmd_start_size_v0;
		g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
			get_perfmon_cmd_start_offsetofvar_v0;
		g->ops.pmu_ver.perfmon_start_set_cmd_type =
			perfmon_start_set_cmd_type_v0;
		g->ops.pmu_ver.perfmon_start_set_group_id =
			perfmon_start_set_group_id_v0;
		g->ops.pmu_ver.perfmon_start_set_state_id =
			perfmon_start_set_state_id_v0;
		g->ops.pmu_ver.perfmon_start_set_flags =
			perfmon_start_set_flags_v0;
		g->ops.pmu_ver.perfmon_start_get_flags =
			perfmon_start_get_flags_v0;
		g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
			get_pmu_perfmon_cmd_init_size_v0;
		g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
			get_perfmon_cmd_init_offsetofvar_v0;
		g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
			perfmon_cmd_init_set_sample_buffer_v0;
		g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
			perfmon_cmd_init_set_dec_cnt_v0;
		g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
			perfmon_cmd_init_set_base_cnt_id_v0;
		g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
			perfmon_cmd_init_set_samp_period_us_v0;
		g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
			perfmon_cmd_init_set_num_cnt_v0;
		g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
			perfmon_cmd_init_set_mov_avg_v0;
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
			get_pmu_sequence_in_alloc_ptr_v0;
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
			get_pmu_sequence_out_alloc_ptr_v0;
		break;
	default:
		gk20a_err(dev_from_gk20a(pmu->g),
		"PMU code version not supported version: %d\n",
			pmu->desc->app_version);
		return -EINVAL;
		break;
	}
	return 0;
}

static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
		u32 src, u8 *dst, u32 size, u8 port)
{
	struct gk20a *g = pmu->g;
	u32 i, words, bytes;
	u32 data, addr_mask;
	u32 *dst_u32 = (u32*)dst;

	if (size == 0) {
		gk20a_err(dev_from_gk20a(g),
			"size is zero");
		return;
	}

	if (src & 0x3) {
		gk20a_err(dev_from_gk20a(g),
			"src (0x%08x) not 4-byte aligned", src);
		return;
	}

	mutex_lock(&pmu->pmu_copy_lock);

	words = size >> 2;
	bytes = size & 0x3;

	addr_mask = pwr_falcon_dmemc_offs_m() |
		    pwr_falcon_dmemc_blk_m();

	src &= addr_mask;

	gk20a_writel(g, pwr_falcon_dmemc_r(port),
		src | pwr_falcon_dmemc_aincr_f(1));

	for (i = 0; i < words; i++)
		dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));

	if (bytes > 0) {
		data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
		for (i = 0; i < bytes; i++) {
			dst[(words << 2) + i] = ((u8 *)&data)[i];
		}
	}
	mutex_unlock(&pmu->pmu_copy_lock);
	return;
}

static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
		u32 dst, u8 *src, u32 size, u8 port)
{
	struct gk20a *g = pmu->g;
	u32 i, words, bytes;
	u32 data, addr_mask;
	u32 *src_u32 = (u32*)src;

	if (size == 0) {
		gk20a_err(dev_from_gk20a(g),
			"size is zero");
		return;
	}

	if (dst & 0x3) {
		gk20a_err(dev_from_gk20a(g),
			"dst (0x%08x) not 4-byte aligned", dst);
		return;
	}

	mutex_lock(&pmu->pmu_copy_lock);

	words = size >> 2;
	bytes = size & 0x3;

	addr_mask = pwr_falcon_dmemc_offs_m() |
		    pwr_falcon_dmemc_blk_m();

	dst &= addr_mask;

	gk20a_writel(g, pwr_falcon_dmemc_r(port),
		dst | pwr_falcon_dmemc_aincw_f(1));

	for (i = 0; i < words; i++)
		gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);

	if (bytes > 0) {
		data = 0;
		for (i = 0; i < bytes; i++)
			((u8 *)&data)[i] = src[(words << 2) + i];
		gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
	}

	data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
	size = ALIGN(size, 4);
	if (data != dst + size) {
		gk20a_err(dev_from_gk20a(g),
			"copy failed. bytes written %d, expected %d",
			data - dst, size);
	}
	mutex_unlock(&pmu->pmu_copy_lock);
	return;
}

static int pmu_idle(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	unsigned long end_jiffies = jiffies +
		msecs_to_jiffies(2000);
	u32 idle_stat;

	/* wait for pmu idle */
	do {
		idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());

		if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
		    pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
			break;
		}

		if (time_after_eq(jiffies, end_jiffies)) {
			gk20a_err(dev_from_gk20a(g),
				"timeout waiting pmu idle : 0x%08x",
				idle_stat);
			return -EBUSY;
		}
		usleep_range(100, 200);
	} while (1);

	gk20a_dbg_fn("done");
	return 0;
}

static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
{
	struct gk20a *g = pmu->g;

	gk20a_dbg_fn("");

	gk20a_writel(g, mc_intr_mask_0_r(),
		gk20a_readl(g, mc_intr_mask_0_r()) &
		~mc_intr_mask_0_pmu_enabled_f());
	gk20a_writel(g, mc_intr_mask_1_r(),
		gk20a_readl(g, mc_intr_mask_1_r()) &
		~mc_intr_mask_1_pmu_enabled_f());

	gk20a_writel(g, pwr_falcon_irqmclr_r(),
		pwr_falcon_irqmclr_gptmr_f(1)  |
		pwr_falcon_irqmclr_wdtmr_f(1)  |
		pwr_falcon_irqmclr_mthd_f(1)   |
		pwr_falcon_irqmclr_ctxsw_f(1)  |
		pwr_falcon_irqmclr_halt_f(1)   |
		pwr_falcon_irqmclr_exterr_f(1) |
		pwr_falcon_irqmclr_swgen0_f(1) |
		pwr_falcon_irqmclr_swgen1_f(1) |
		pwr_falcon_irqmclr_ext_f(0xff));

	if (enable) {
		/* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
		gk20a_writel(g, pwr_falcon_irqdest_r(),
			pwr_falcon_irqdest_host_gptmr_f(0)    |
			pwr_falcon_irqdest_host_wdtmr_f(1)    |
			pwr_falcon_irqdest_host_mthd_f(0)     |
			pwr_falcon_irqdest_host_ctxsw_f(0)    |
			pwr_falcon_irqdest_host_halt_f(1)     |
			pwr_falcon_irqdest_host_exterr_f(0)   |
			pwr_falcon_irqdest_host_swgen0_f(1)   |
			pwr_falcon_irqdest_host_swgen1_f(0)   |
			pwr_falcon_irqdest_host_ext_f(0xff)   |
			pwr_falcon_irqdest_target_gptmr_f(1)  |
			pwr_falcon_irqdest_target_wdtmr_f(0)  |
			pwr_falcon_irqdest_target_mthd_f(0)   |
			pwr_falcon_irqdest_target_ctxsw_f(0)  |
			pwr_falcon_irqdest_target_halt_f(0)   |
			pwr_falcon_irqdest_target_exterr_f(0) |
			pwr_falcon_irqdest_target_swgen0_f(0) |
			pwr_falcon_irqdest_target_swgen1_f(0) |
			pwr_falcon_irqdest_target_ext_f(0xff));

		/* 0=disable, 1=enable */
		gk20a_writel(g, pwr_falcon_irqmset_r(),
			pwr_falcon_irqmset_gptmr_f(1)  |
			pwr_falcon_irqmset_wdtmr_f(1)  |
			pwr_falcon_irqmset_mthd_f(0)   |
			pwr_falcon_irqmset_ctxsw_f(0)  |
			pwr_falcon_irqmset_halt_f(1)   |
			pwr_falcon_irqmset_exterr_f(1) |
			pwr_falcon_irqmset_swgen0_f(1) |
			pwr_falcon_irqmset_swgen1_f(1));

		gk20a_writel(g, mc_intr_mask_0_r(),
			gk20a_readl(g, mc_intr_mask_0_r()) |
			mc_intr_mask_0_pmu_enabled_f());
	}

	gk20a_dbg_fn("done");
}

static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
{
	struct gk20a *g = pmu->g;

	gk20a_dbg_fn("");

	if (enable) {
		int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
		gk20a_enable(g, mc_enable_pwr_enabled_f());

		do {
			u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
				(pwr_falcon_dmactl_dmem_scrubbing_m() |
				 pwr_falcon_dmactl_imem_scrubbing_m());

			if (!w) {
				gk20a_dbg_fn("done");
				return 0;
			}
			udelay(GR_IDLE_CHECK_DEFAULT);
		} while (--retries || !tegra_platform_is_silicon());

		gk20a_disable(g, mc_enable_pwr_enabled_f());
		gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");

		return -ETIMEDOUT;
	} else {
		gk20a_disable(g, mc_enable_pwr_enabled_f());
		return 0;
	}
}

static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
{
	struct gk20a *g = pmu->g;
	u32 pmc_enable;
	int err;

	gk20a_dbg_fn("");

	if (!enable) {
		pmc_enable = gk20a_readl(g, mc_enable_r());
		if (mc_enable_pwr_v(pmc_enable) !=
		    mc_enable_pwr_disabled_v()) {

			pmu_enable_irq(pmu, false);
			pmu_enable_hw(pmu, false);
		}
	} else {
		err = pmu_enable_hw(pmu, true);
		if (err)
			return err;

		/* TBD: post reset */

		err = pmu_idle(pmu);
		if (err)
			return err;

		pmu_enable_irq(pmu, true);
	}

	gk20a_dbg_fn("done");
	return 0;
}

static int pmu_reset(struct pmu_gk20a *pmu)
{
	int err;

	err = pmu_idle(pmu);
	if (err)
		return err;

	/* TBD: release pmu hw mutex */

	err = pmu_enable(pmu, false);
	if (err)
		return err;

	/* TBD: cancel all sequences */
	/* TBD: init all sequences and state tables */
	/* TBD: restore pre-init message handler */

	err = pmu_enable(pmu, true);
	if (err)
		return err;

	return 0;
}

static int pmu_bootstrap(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	struct gk20a_platform *platform = platform_get_drvdata(g->dev);
	struct mm_gk20a *mm = &g->mm;
	struct pmu_ucode_desc *desc = pmu->desc;
	u64 addr_code, addr_data, addr_load;
	u32 i, blocks, addr_args;

	gk20a_dbg_fn("");

	gk20a_writel(g, pwr_falcon_itfen_r(),
		gk20a_readl(g, pwr_falcon_itfen_r()) |
		pwr_falcon_itfen_ctxen_enable_f());
	gk20a_writel(g, pwr_pmu_new_instblk_r(),
		pwr_pmu_new_instblk_ptr_f(
			mm->pmu.inst_block.cpu_pa >> 12) |
		pwr_pmu_new_instblk_valid_f(1) |
		pwr_pmu_new_instblk_target_sys_coh_f());

	/* TBD: load all other surfaces */

	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
		clk_get_rate(platform->clk[1]));

	addr_args = (pwr_falcon_hwcfg_dmem_size_v(
		gk20a_readl(g, pwr_falcon_hwcfg_r()))
			<< GK20A_PMU_DMEM_BLKSIZE2) -
		g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);

	pmu_copy_to_dmem(pmu, addr_args,
			(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
			g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);

	gk20a_writel(g, pwr_falcon_dmemc_r(0),
		pwr_falcon_dmemc_offs_f(0) |
		pwr_falcon_dmemc_blk_f(0)  |
		pwr_falcon_dmemc_aincw_f(1));

	addr_code = u64_lo32((pmu->ucode.pmu_va +
			desc->app_start_offset +
			desc->app_resident_code_offset) >> 8) ;
	addr_data = u64_lo32((pmu->ucode.pmu_va +
			desc->app_start_offset +
			desc->app_resident_data_offset) >> 8);
	addr_load = u64_lo32((pmu->ucode.pmu_va +
			desc->bootloader_start_offset) >> 8);

	gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);

	gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
		addr_load - (desc->bootloader_imem_offset >> 8));

	blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;

	for (i = 0; i < blocks; i++) {
		gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
			desc->bootloader_imem_offset + (i << 8));
		gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
			desc->bootloader_imem_offset + (i << 8));
		gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
			pwr_falcon_dmatrfcmd_imem_f(1)  |
			pwr_falcon_dmatrfcmd_write_f(0) |
			pwr_falcon_dmatrfcmd_size_f(6)  |
			pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
	}

	gk20a_writel(g, pwr_falcon_bootvec_r(),
		pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));

	gk20a_writel(g, pwr_falcon_cpuctl_r(),
		pwr_falcon_cpuctl_startcpu_f(1));

	gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);

	return 0;
}

static void pmu_seq_init(struct pmu_gk20a *pmu)
{
	u32 i;

	memset(pmu->seq, 0,
		sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
	memset(pmu->pmu_seq_tbl, 0,
		sizeof(pmu->pmu_seq_tbl));

	for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
		pmu->seq[i].id = i;
}

static int pmu_seq_acquire(struct pmu_gk20a *pmu,
			struct pmu_sequence **pseq)
{
	struct gk20a *g = pmu->g;
	struct pmu_sequence *seq;
	u32 index;

	mutex_lock(&pmu->pmu_seq_lock);
	index = find_first_zero_bit(pmu->pmu_seq_tbl,
				sizeof(pmu->pmu_seq_tbl));
	if (index >= sizeof(pmu->pmu_seq_tbl)) {
		gk20a_err(dev_from_gk20a(g),
			"no free sequence available");
		mutex_unlock(&pmu->pmu_seq_lock);
		return -EAGAIN;
	}
	set_bit(index, pmu->pmu_seq_tbl);
	mutex_unlock(&pmu->pmu_seq_lock);

	seq = &pmu->seq[index];
	seq->state = PMU_SEQ_STATE_PENDING;

	*pseq = seq;
	return 0;
}

static void pmu_seq_release(struct pmu_gk20a *pmu,
			struct pmu_sequence *seq)
{
	struct gk20a *g = pmu->g;
	seq->state	= PMU_SEQ_STATE_FREE;
	seq->desc	= PMU_INVALID_SEQ_DESC;
	seq->callback	= NULL;
	seq->cb_params	= NULL;
	seq->msg	= NULL;
	seq->out_payload = NULL;
	g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
		g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
	g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
		g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);

	clear_bit(seq->id, pmu->pmu_seq_tbl);
}

static int pmu_queue_init(struct pmu_gk20a *pmu,
		u32 id, union pmu_init_msg_pmu *init)
{
	struct gk20a *g = pmu->g;
	struct pmu_queue *queue = &pmu->queue[id];
	queue->id	= id;
	g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);

	queue->mutex_id = id;
	mutex_init(&queue->mutex);

	gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
		id, queue->index, queue->offset, queue->size);

	return 0;
}

static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
			u32 *head, bool set)
{
	struct gk20a *g = pmu->g;

	BUG_ON(!head);

	if (PMU_IS_COMMAND_QUEUE(queue->id)) {

		if (queue->index >= pwr_pmu_queue_head__size_1_v())
			return -EINVAL;

		if (!set)
			*head = pwr_pmu_queue_head_address_v(
				gk20a_readl(g,
					pwr_pmu_queue_head_r(queue->index)));
		else
			gk20a_writel(g,
				pwr_pmu_queue_head_r(queue->index),
				pwr_pmu_queue_head_address_f(*head));
	} else {
		if (!set)
			*head = pwr_pmu_msgq_head_val_v(
				gk20a_readl(g, pwr_pmu_msgq_head_r()));
		else
			gk20a_writel(g,
				pwr_pmu_msgq_head_r(),
				pwr_pmu_msgq_head_val_f(*head));
	}

	return 0;
}

static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
			u32 *tail, bool set)
{
	struct gk20a *g = pmu->g;

	BUG_ON(!tail);

	if (PMU_IS_COMMAND_QUEUE(queue->id)) {

		if (queue->index >= pwr_pmu_queue_tail__size_1_v())
			return -EINVAL;

		if (!set)
			*tail = pwr_pmu_queue_tail_address_v(
				gk20a_readl(g,
					pwr_pmu_queue_tail_r(queue->index)));
		else
			gk20a_writel(g,
				pwr_pmu_queue_tail_r(queue->index),
				pwr_pmu_queue_tail_address_f(*tail));
	} else {
		if (!set)
			*tail = pwr_pmu_msgq_tail_val_v(
				gk20a_readl(g, pwr_pmu_msgq_tail_r()));
		else
			gk20a_writel(g,
				pwr_pmu_msgq_tail_r(),
				pwr_pmu_msgq_tail_val_f(*tail));
	}

	return 0;
}

static inline void pmu_queue_read(struct pmu_gk20a *pmu,
			u32 offset, u8 *dst, u32 size)
{
	pmu_copy_from_dmem(pmu, offset, dst, size, 0);
}

static inline void pmu_queue_write(struct pmu_gk20a *pmu,
			u32 offset, u8 *src, u32 size)
{
	pmu_copy_to_dmem(pmu, offset, src, size, 0);
}

int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
{
	struct gk20a *g = pmu->g;
	struct pmu_mutex *mutex;
	u32 data, owner, max_retry;

	if (!pmu->initialized)
		return 0;

	BUG_ON(!token);
	BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
	BUG_ON(id > pmu->mutex_cnt);

	mutex = &pmu->mutex[id];

	owner = pwr_pmu_mutex_value_v(
		gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));

	if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
		BUG_ON(mutex->ref_cnt == 0);
		gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
		mutex->ref_cnt++;
		return 0;
	}

	max_retry = 40;
	do {
		data = pwr_pmu_mutex_id_value_v(
			gk20a_readl(g, pwr_pmu_mutex_id_r()));
		if (data == pwr_pmu_mutex_id_value_init_v() ||
		    data == pwr_pmu_mutex_id_value_not_avail_v()) {
			gk20a_warn(dev_from_gk20a(g),
				"fail to generate mutex token: val 0x%08x",
				owner);
			usleep_range(20, 40);
			continue;
		}

		owner = data;
		gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
			pwr_pmu_mutex_value_f(owner));

		data = pwr_pmu_mutex_value_v(
			gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));

		if (owner == data) {
			mutex->ref_cnt = 1;
			gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
				mutex->index, *token);
			*token = owner;
			return 0;
		} else {
			gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
				mutex->index);

			data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
			data = set_field(data,
				pwr_pmu_mutex_id_release_value_m(),
				pwr_pmu_mutex_id_release_value_f(owner));
			gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);

			usleep_range(20, 40);
			continue;
		}
	} while (max_retry-- > 0);

	return -EBUSY;
}

int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
{
	struct gk20a *g = pmu->g;
	struct pmu_mutex *mutex;
	u32 owner, data;

	if (!pmu->initialized)
		return 0;

	BUG_ON(!token);
	BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
	BUG_ON(id > pmu->mutex_cnt);

	mutex = &pmu->mutex[id];

	owner = pwr_pmu_mutex_value_v(
		gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));

	if (*token != owner) {
		gk20a_err(dev_from_gk20a(g),
			"requester 0x%08x NOT match owner 0x%08x",
			*token, owner);
		return -EINVAL;
	}

	if (--mutex->ref_cnt > 0)
		return -EBUSY;

	gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
		pwr_pmu_mutex_value_initial_lock_f());

	data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
	data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
		pwr_pmu_mutex_id_release_value_f(owner));
	gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);

	gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
		mutex->index, *token);

	return 0;
}

static int pmu_queue_lock(struct pmu_gk20a *pmu,
			struct pmu_queue *queue)
{
	int err;

	if (PMU_IS_MESSAGE_QUEUE(queue->id))
		return 0;

	if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
		mutex_lock(&queue->mutex);
		return 0;
	}

	err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
	return err;
}

static int pmu_queue_unlock(struct pmu_gk20a *pmu,
			struct pmu_queue *queue)
{
	int err;

	if (PMU_IS_MESSAGE_QUEUE(queue->id))
		return 0;

	if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
		mutex_unlock(&queue->mutex);
		return 0;
	}

	err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
	return err;
}

/* called by pmu_read_message, no lock */
static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
			struct pmu_queue *queue)
{
	u32 head, tail;

	pmu_queue_head(pmu, queue, &head, QUEUE_GET);
	if (queue->opened && queue->oflag == OFLAG_READ)
		tail = queue->position;
	else
		pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);

	return head == tail;
}

static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
			struct pmu_queue *queue, u32 size, bool *need_rewind)
{
	u32 head, tail, free;
	bool rewind = false;

	size = ALIGN(size, QUEUE_ALIGNMENT);

	pmu_queue_head(pmu, queue, &head, QUEUE_GET);
	pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);

	if (head >= tail) {
		free = queue->offset + queue->size - head;
		free -= PMU_CMD_HDR_SIZE;

		if (size > free) {
			rewind = true;
			head = queue->offset;
		}
	}

	if (head < tail)
		free = tail - head - 1;

	if (need_rewind)
		*need_rewind = rewind;

	return size <= free;
}

static int pmu_queue_push(struct pmu_gk20a *pmu,
			struct pmu_queue *queue, void *data, u32 size)
{
	gk20a_dbg_fn("");

	if (!queue->opened && queue->oflag == OFLAG_WRITE){
		gk20a_err(dev_from_gk20a(pmu->g),
			"queue not opened for write");
		return -EINVAL;
	}

	pmu_queue_write(pmu, queue->position, data, size);
	queue->position += ALIGN(size, QUEUE_ALIGNMENT);
	return 0;
}

static int pmu_queue_pop(struct pmu_gk20a *pmu,
			struct pmu_queue *queue, void *data, u32 size,
			u32 *bytes_read)
{
	u32 head, tail, used;

	*bytes_read = 0;

	if (!queue->opened && queue->oflag == OFLAG_READ){
		gk20a_err(dev_from_gk20a(pmu->g),
			"queue not opened for read");
		return -EINVAL;
	}

	pmu_queue_head(pmu, queue, &head, QUEUE_GET);
	tail = queue->position;

	if (head == tail)
		return 0;

	if (head > tail)
		used = head - tail;
	else
		used = queue->offset + queue->size - tail;

	if (size > used) {
		gk20a_warn(dev_from_gk20a(pmu->g),
			"queue size smaller than request read");
		size = used;
	}

	pmu_queue_read(pmu, tail, data, size);
	queue->position += ALIGN(size, QUEUE_ALIGNMENT);
	*bytes_read = size;
	return 0;
}

static void pmu_queue_rewind(struct pmu_gk20a *pmu,
			struct pmu_queue *queue)
{
	struct pmu_cmd cmd;

	gk20a_dbg_fn("");

	if (!queue->opened) {
		gk20a_err(dev_from_gk20a(pmu->g),
			"queue not opened");
		return;
	}

	if (queue->oflag == OFLAG_WRITE) {
		cmd.hdr.unit_id = PMU_UNIT_REWIND;
		cmd.hdr.size = PMU_CMD_HDR_SIZE;
		pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
		gk20a_dbg_pmu("queue %d rewinded", queue->id);
	}

	queue->position = queue->offset;
	return;
}

/* open for read and lock the queue */
static int pmu_queue_open_read(struct pmu_gk20a *pmu,
			struct pmu_queue *queue)
{
	int err;

	err = pmu_queue_lock(pmu, queue);
	if (err)
		return err;

	if (queue->opened)
		BUG();

	pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
	queue->oflag = OFLAG_READ;
	queue->opened = true;

	return 0;
}

/* open for write and lock the queue
   make sure there's enough free space for the write */
static int pmu_queue_open_write(struct pmu_gk20a *pmu,
			struct pmu_queue *queue, u32 size)
{
	bool rewind = false;
	int err;

	err = pmu_queue_lock(pmu, queue);
	if (err)
		return err;

	if (queue->opened)
		BUG();

	if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
		gk20a_err(dev_from_gk20a(pmu->g), "queue full");
		return -EAGAIN;
	}

	pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
	queue->oflag = OFLAG_WRITE;
	queue->opened = true;

	if (rewind)
		pmu_queue_rewind(pmu, queue);

	return 0;
}

/* close and unlock the queue */
static int pmu_queue_close(struct pmu_gk20a *pmu,
			struct pmu_queue *queue, bool commit)
{
	if (!queue->opened)
		return 0;

	if (commit) {
		if (queue->oflag == OFLAG_READ) {
			pmu_queue_tail(pmu, queue,
				&queue->position, QUEUE_SET);
		}
		else {
			pmu_queue_head(pmu, queue,
				&queue->position, QUEUE_SET);
		}
	}

	queue->opened = false;

	pmu_queue_unlock(pmu, queue);

	return 0;
}

void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
{
	gk20a_dbg_fn("");

	gk20a_allocator_destroy(&pmu->dmem);
}

int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;

	gk20a_dbg_fn("");

	pmu_enable_hw(pmu, true);

	return 0;
}

int gk20a_init_pmu_setup_sw(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct mm_gk20a *mm = &g->mm;
	struct vm_gk20a *vm = &mm->pmu.vm;
	struct device *d = dev_from_gk20a(g);
	int i, err = 0;
	u8 *ptr;
	void *ucode_ptr;
	struct sg_table *sgt_pmu_ucode;
	struct sg_table *sgt_seq_buf;
	DEFINE_DMA_ATTRS(attrs);
	dma_addr_t iova;

	gk20a_dbg_fn("");

	if (pmu->sw_ready) {
		for (i = 0; i < pmu->mutex_cnt; i++) {
			pmu->mutex[i].id    = i;
			pmu->mutex[i].index = i;
		}
		pmu_seq_init(pmu);

		gk20a_dbg_fn("skip init");
		goto skip_init;
	}

	/* no infoRom script from vbios? */

	/* TBD: sysmon subtask */

	if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON))
		pmu->perfmon_sampling_enabled = true;

	pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
	pmu->mutex = kzalloc(pmu->mutex_cnt *
		sizeof(struct pmu_mutex), GFP_KERNEL);
	if (!pmu->mutex) {
		err = -ENOMEM;
		goto err;
	}

	for (i = 0; i < pmu->mutex_cnt; i++) {
		pmu->mutex[i].id    = i;
		pmu->mutex[i].index = i;
	}

	pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
		sizeof(struct pmu_sequence), GFP_KERNEL);
	if (!pmu->seq) {
		err = -ENOMEM;
		goto err_free_mutex;
	}

	pmu_seq_init(pmu);

	if (!g->pmu_fw) {
		g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
		if (!g->pmu_fw) {
			gk20a_err(d, "failed to load pmu ucode!!");
			err = -ENOENT;
			goto err_free_seq;
		}
	}

	gk20a_dbg_fn("firmware loaded");

	pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
	pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
			pmu->desc->descriptor_size);

	INIT_WORK(&pmu->pg_init, pmu_setup_hw);

	dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
	pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
					&iova,
					GFP_KERNEL,
					&attrs);
	if (!pmu->ucode.cpuva) {
		gk20a_err(d, "failed to allocate memory\n");
		err = -ENOMEM;
		goto err_release_fw;
	}

	pmu->ucode.iova = iova;
	pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
					&iova,
					GFP_KERNEL);
	if (!pmu->seq_buf.cpuva) {
		gk20a_err(d, "failed to allocate memory\n");
		err = -ENOMEM;
		goto err_free_pmu_ucode;
	}

	pmu->seq_buf.iova = iova;

	err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
				pmu->ucode.cpuva,
				pmu->ucode.iova,
				GK20A_PMU_UCODE_SIZE_MAX);
	if (err) {
		gk20a_err(d, "failed to allocate sg table\n");
		goto err_free_seq_buf;
	}

	pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
					GK20A_PMU_UCODE_SIZE_MAX,
					0, /* flags */
					gk20a_mem_flag_read_only);
	if (!pmu->ucode.pmu_va) {
		gk20a_err(d, "failed to map pmu ucode memory!!");
		goto err_free_ucode_sgt;
	}

	err = gk20a_get_sgtable(d, &sgt_seq_buf,
				pmu->seq_buf.cpuva,
				pmu->seq_buf.iova,
				GK20A_PMU_SEQ_BUF_SIZE);
	if (err) {
		gk20a_err(d, "failed to allocate sg table\n");
		goto err_unmap_ucode;
	}

	pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
					GK20A_PMU_SEQ_BUF_SIZE,
					0, /* flags */
					gk20a_mem_flag_none);
	if (!pmu->seq_buf.pmu_va) {
		gk20a_err(d, "failed to map pmu ucode memory!!");
		goto err_free_seq_buf_sgt;
	}

	ptr = (u8 *)pmu->seq_buf.cpuva;
	if (!ptr) {
		gk20a_err(d, "failed to map cpu ptr for zbc buffer");
		goto err_unmap_seq_buf;
	}

	/* TBD: remove this if ZBC save/restore is handled by PMU
	 * end an empty ZBC sequence for now */
	ptr[0] = 0x16; /* opcode EXIT */
	ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
	ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;

	pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;

	ucode_ptr = pmu->ucode.cpuva;

	for (i = 0; i < (pmu->desc->app_start_offset +
			pmu->desc->app_size) >> 2; i++)
		gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);

	gk20a_free_sgtable(&sgt_pmu_ucode);
	gk20a_free_sgtable(&sgt_seq_buf);

	pmu->sw_ready = true;

skip_init:
	mutex_init(&pmu->elpg_mutex);
	mutex_init(&pmu->isr_mutex);
	mutex_init(&pmu->pmu_copy_lock);
	mutex_init(&pmu->pmu_seq_lock);

	pmu->perfmon_counter.index = 3; /* GR & CE2 */
	pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;

	pmu->remove_support = gk20a_remove_pmu_support;
	err = gk20a_init_pmu(pmu);
	if (err) {
		gk20a_err(d, "failed to set function pointers\n");
		return err;
	}

	gk20a_dbg_fn("done");
	return 0;

 err_unmap_seq_buf:
	gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
		GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
 err_free_seq_buf_sgt:
	gk20a_free_sgtable(&sgt_seq_buf);
 err_unmap_ucode:
	gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
		GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
 err_free_ucode_sgt:
	gk20a_free_sgtable(&sgt_pmu_ucode);
 err_free_seq_buf:
	dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
		pmu->seq_buf.cpuva, pmu->seq_buf.iova);
	pmu->seq_buf.cpuva = NULL;
	pmu->seq_buf.iova = 0;
 err_free_pmu_ucode:
	dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
		pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
	pmu->ucode.cpuva = NULL;
	pmu->ucode.iova = 0;
 err_release_fw:
	release_firmware(g->pmu_fw);
 err_free_seq:
	kfree(pmu->seq);
 err_free_mutex:
	kfree(pmu->mutex);
 err:
	gk20a_dbg_fn("fail");
	return err;
}

static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
			void *param, u32 handle, u32 status);

static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
			void *param, u32 handle, u32 status)
{
	struct pmu_gk20a *pmu = param;
	struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;

	gk20a_dbg_fn("");

	gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
	if (status != 0) {
		gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
		/* TBD: disable ELPG */
		return;
	}

	if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
		gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
	}

	pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
	schedule_work(&pmu->pg_init);
}

int gk20a_init_pmu_setup_hw1(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	int err;

	gk20a_dbg_fn("");

	pmu_reset(pmu);

	/* setup apertures - virtual */
	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
		pwr_fbif_transcfg_mem_type_virtual_f());
	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
		pwr_fbif_transcfg_mem_type_virtual_f());
	/* setup apertures - physical */
	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
		pwr_fbif_transcfg_mem_type_physical_f() |
		pwr_fbif_transcfg_target_local_fb_f());
	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
		pwr_fbif_transcfg_mem_type_physical_f() |
		pwr_fbif_transcfg_target_coherent_sysmem_f());
	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
		pwr_fbif_transcfg_mem_type_physical_f() |
		pwr_fbif_transcfg_target_noncoherent_sysmem_f());

	/* TBD: load pmu ucode */
	err = pmu_bootstrap(pmu);
	if (err)
		return err;

	return 0;

}

static int gk20a_aelpg_init(struct gk20a *g);
static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);

static void pmu_setup_hw_load_zbc(struct gk20a *g);
static void pmu_setup_hw_enable_elpg(struct gk20a *g);

static void pmu_setup_hw(struct work_struct *work)
{
	struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
	struct gk20a *g = pmu->g;

	switch (pmu->pmu_state) {
	case PMU_STATE_ELPG_BOOTED:
		gk20a_dbg_pmu("elpg booted");
		gk20a_init_pmu_bind_fecs(g);
		break;
	case PMU_STATE_LOADING_PG_BUF:
		gk20a_dbg_pmu("loaded pg buf");
		pmu_setup_hw_load_zbc(g);
		break;
	case PMU_STATE_LOADING_ZBC:
		gk20a_dbg_pmu("loaded zbc");
		pmu_setup_hw_enable_elpg(g);
		break;
	case PMU_STATE_STARTED:
		gk20a_dbg_pmu("PMU booted");
		break;
	default:
		gk20a_dbg_pmu("invalid state");
		break;
	}
}

int gk20a_init_pmu_bind_fecs(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct mm_gk20a *mm = &g->mm;
	struct vm_gk20a *vm = &mm->pmu.vm;
	struct device *d = dev_from_gk20a(g);
	struct pmu_cmd cmd;
	u32 desc;
	int err;
	u32 size;
	struct sg_table *sgt_pg_buf;
	dma_addr_t iova;

	gk20a_dbg_fn("");

	size = 0;
	gk20a_gr_wait_initialized(g);
	err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to query fecs pg buffer size");
		return err;
	}

	if (!pmu->pg_buf.cpuva) {
		pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
						&iova,
						GFP_KERNEL);
		if (!pmu->pg_buf.cpuva) {
			gk20a_err(d, "failed to allocate memory\n");
			return -ENOMEM;
		}

		pmu->pg_buf.iova = iova;
		pmu->pg_buf.size = size;

		err = gk20a_get_sgtable(d, &sgt_pg_buf,
					pmu->pg_buf.cpuva,
					pmu->pg_buf.iova,
					size);
		if (err) {
			gk20a_err(d, "failed to create sg table\n");
			goto err_free_pg_buf;
		}

		pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
					&sgt_pg_buf,
					size,
					0, /* flags */
					gk20a_mem_flag_none);
		if (!pmu->pg_buf.pmu_va) {
			gk20a_err(d, "failed to map fecs pg buffer");
			err = -ENOMEM;
			goto err_free_sgtable;
		}

		gk20a_free_sgtable(&sgt_pg_buf);
	}

	err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to bind pmu inst to gr");
		return err;
	}

	err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to set pg buffer pmu va");
		return err;
	}

	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
	cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
	cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
	cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
	cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
	cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
	cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;

	pmu->buf_loaded = false;
	gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
			pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
	pmu->pmu_state = PMU_STATE_LOADING_PG_BUF;
	return err;

err_free_sgtable:
	gk20a_free_sgtable(&sgt_pg_buf);
err_free_pg_buf:
	dma_free_coherent(d, size,
		pmu->pg_buf.cpuva, pmu->pg_buf.iova);
	pmu->pg_buf.cpuva = NULL;
	pmu->pg_buf.iova = 0;
	return err;
}

static void pmu_setup_hw_load_zbc(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_cmd cmd;
	u32 desc;

	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
	cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
	cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
	cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
	cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
	cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
	cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;

	pmu->buf_loaded = false;
	gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
			pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
	pmu->pmu_state = PMU_STATE_LOADING_ZBC;
}

static void pmu_setup_hw_enable_elpg(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;

	/*
	 * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
	 * 7. This prevents PMU stalling on Host register accesses. Once the
	 * cause for this hang is discovered and fixed, this WAR should be
	 * removed.
	 */
	gk20a_writel(g, 0x10a164, 0x109ff);

	pmu->initialized = true;
	pmu->pmu_state = PMU_STATE_STARTED;

	pmu->zbc_ready = true;
	/* Save zbc table after PMU is initialized. */
	gr_gk20a_pmu_save_zbc(g, 0xf);

	if (g->elpg_enabled)
		gk20a_pmu_enable_elpg(g);

	udelay(50);

	/* Enable AELPG */
	if (g->aelpg_enabled) {
		gk20a_aelpg_init(g);
		gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
	}
}

int gk20a_init_pmu_support(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	u32 err;

	gk20a_dbg_fn("");

	if (pmu->initialized)
		return 0;

	pmu->g = g;

	err = gk20a_init_pmu_reset_enable_hw(g);
	if (err)
		return err;

	if (support_gk20a_pmu()) {
		err = gk20a_init_pmu_setup_sw(g);
		if (err)
			return err;

		err = gk20a_init_pmu_setup_hw1(g);
		if (err)
			return err;
	}

	return err;
}

static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
			void *param, u32 handle, u32 status)
{
	struct pmu_gk20a *pmu = param;
	struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;

	gk20a_dbg_fn("");

	if (status != 0) {
		gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
		/* TBD: disable ELPG */
		return;
	}

	switch (elpg_msg->msg) {
	case PMU_PG_ELPG_MSG_INIT_ACK:
		gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
		break;
	case PMU_PG_ELPG_MSG_ALLOW_ACK:
		gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
		pmu->elpg_stat = PMU_ELPG_STAT_ON;
		break;
	case PMU_PG_ELPG_MSG_DISALLOW_ACK:
		gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
		pmu->elpg_stat = PMU_ELPG_STAT_OFF;
		if (pmu->pmu_state == PMU_STATE_STARTING)
			pmu->pmu_state = PMU_STATE_ELPG_BOOTED;
		schedule_work(&pmu->pg_init);
		break;
	default:
		gk20a_err(dev_from_gk20a(g),
			"unsupported ELPG message : 0x%04x", elpg_msg->msg);
	}

	return;
}

static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
			void *param, u32 handle, u32 status)
{
	struct pmu_gk20a *pmu = param;

	gk20a_dbg_fn("");

	if (status != 0) {
		gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
		/* TBD: disable ELPG */
		return;
	}

	switch (msg->msg.pg.stat.sub_msg_id) {
	case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
		gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
		pmu->stat_dmem_offset = msg->msg.pg.stat.data;
		break;
	default:
		break;
	}
}

static int pmu_init_powergating(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	struct pmu_cmd cmd;
	u32 seq;

	gk20a_dbg_fn("");

	if (tegra_cpu_is_asim()) {
		/* TBD: calculate threshold for silicon */
		gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
				PMU_PG_IDLE_THRESHOLD_SIM);
		gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
				PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
	} else {
		/* TBD: calculate threshold for silicon */
		gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
				PMU_PG_IDLE_THRESHOLD);
		gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
				PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
	}

	/* init ELPG */
	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;

	gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
			pmu_handle_pg_elpg_msg, pmu, &seq, ~0);

	/* alloc dmem for powergating state log */
	pmu->stat_dmem_offset = 0;
	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
	cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
	cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
	cmd.cmd.pg.stat.data = 0;

	gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
			pmu_handle_pg_stat_msg, pmu, &seq, ~0);

	/* disallow ELPG initially
	   PMU ucode requires a disallow cmd before allow cmd */
	pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */
	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;

	gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
			pmu_handle_pg_elpg_msg, pmu, &seq, ~0);

	/* start with elpg disabled until first enable call */
	pmu->elpg_refcnt = 0;

	pmu->pmu_state = PMU_STATE_STARTING;

	return 0;
}

static int pmu_init_perfmon(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	struct pmu_v *pv = &g->ops.pmu_ver;
	struct pmu_cmd cmd;
	struct pmu_payload payload;
	u32 seq;
	u32 data;
	int err = 0;

	gk20a_dbg_fn("");

	pmu->perfmon_ready = 0;

	/* use counter #3 for GR && CE2 busy cycles */
	gk20a_writel(g, pwr_pmu_idle_mask_r(3),
		pwr_pmu_idle_mask_gr_enabled_f() |
		pwr_pmu_idle_mask_ce_2_enabled_f());

	/* disable idle filtering for counters 3 and 6 */
	data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
	data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
			pwr_pmu_idle_ctrl_filter_m(),
			pwr_pmu_idle_ctrl_value_busy_f() |
			pwr_pmu_idle_ctrl_filter_disabled_f());
	gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);

	/* use counter #6 for total cycles */
	data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
	data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
			pwr_pmu_idle_ctrl_filter_m(),
			pwr_pmu_idle_ctrl_value_always_f() |
			pwr_pmu_idle_ctrl_filter_disabled_f());
	gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);

	/*
	 * We don't want to disturb counters #3 and #6, which are used by
	 * perfmon, so we add wiring also to counters #1 and #2 for
	 * exposing raw counter readings.
	 */
	gk20a_writel(g, pwr_pmu_idle_mask_r(1),
		pwr_pmu_idle_mask_gr_enabled_f() |
		pwr_pmu_idle_mask_ce_2_enabled_f());

	data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
	data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
			pwr_pmu_idle_ctrl_filter_m(),
			pwr_pmu_idle_ctrl_value_busy_f() |
			pwr_pmu_idle_ctrl_filter_disabled_f());
	gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);

	data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
	data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
			pwr_pmu_idle_ctrl_filter_m(),
			pwr_pmu_idle_ctrl_value_always_f() |
			pwr_pmu_idle_ctrl_filter_disabled_f());
	gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);

	if (!pmu->sample_buffer)
		err = pmu->dmem.alloc(&pmu->dmem,
				      &pmu->sample_buffer, 2 * sizeof(u16));
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"failed to allocate perfmon sample buffer");
		return -ENOMEM;
	}

	/* init PERFMON */
	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PERFMON;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
	cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
	/* buffer to save counter values for pmu perfmon */
	pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
	(u16)pmu->sample_buffer);
	/* number of sample periods below lower threshold
	   before pmu triggers perfmon decrease event
	   TBD: = 15 */
	pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
	/* index of base counter, aka. always ticking counter */
	pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
	/* microseconds interval between pmu polls perf counters */
	pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
	/* number of perfmon counters
	   counter #3 (GR and CE2) for gk20a */
	pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
	/* moving average window for sample periods
	   TBD: = 3000000 / sample_period_us = 17 */
	pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);

	memset(&payload, 0, sizeof(struct pmu_payload));
	payload.in.buf = &pmu->perfmon_counter;
	payload.in.size = sizeof(struct pmu_perfmon_counter);
	payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);

	gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
	gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
			NULL, NULL, &seq, ~0);

	return 0;
}

static int pmu_process_init_msg(struct pmu_gk20a *pmu,
			struct pmu_msg *msg)
{
	struct gk20a *g = pmu->g;
	struct pmu_v *pv = &g->ops.pmu_ver;
	union pmu_init_msg_pmu *init;
	struct pmu_sha1_gid_data gid_data;
	u32 i, tail = 0;

	tail = pwr_pmu_msgq_tail_val_v(
		gk20a_readl(g, pwr_pmu_msgq_tail_r()));

	pmu_copy_from_dmem(pmu, tail,
		(u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);

	if (msg->hdr.unit_id != PMU_UNIT_INIT) {
		gk20a_err(dev_from_gk20a(g),
			"expecting init msg");
		return -EINVAL;
	}

	pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
		(u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);

	if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
		gk20a_err(dev_from_gk20a(g),
			"expecting init msg");
		return -EINVAL;
	}

	tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
	gk20a_writel(g, pwr_pmu_msgq_tail_r(),
		pwr_pmu_msgq_tail_val_f(tail));

	init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
	if (!pmu->gid_info.valid) {

		pmu_copy_from_dmem(pmu,
			pv->get_pmu_init_msg_pmu_sw_mg_off(init),
			(u8 *)&gid_data,
			sizeof(struct pmu_sha1_gid_data), 0);

		pmu->gid_info.valid =
			(*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);

		if (pmu->gid_info.valid) {

			BUG_ON(sizeof(pmu->gid_info.gid) !=
				sizeof(gid_data.gid));

			memcpy(pmu->gid_info.gid, gid_data.gid,
				sizeof(pmu->gid_info.gid));
		}
	}

	for (i = 0; i < PMU_QUEUE_COUNT; i++)
		pmu_queue_init(pmu, i, init);

	if (!pmu->dmem.alloc)
		gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
				pv->get_pmu_init_msg_pmu_sw_mg_off(init),
				pv->get_pmu_init_msg_pmu_sw_mg_size(init),
				PMU_DMEM_ALLOC_ALIGNMENT);

	pmu->pmu_ready = true;

	return 0;
}

static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
			struct pmu_msg *msg, int *status)
{
	struct gk20a *g = pmu->g;
	u32 read_size, bytes_read;
	int err;

	*status = 0;

	if (pmu_queue_is_empty(pmu, queue))
		return false;

	err = pmu_queue_open_read(pmu, queue);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to open queue %d for read", queue->id);
		*status = err;
		return false;
	}

	err = pmu_queue_pop(pmu, queue, &msg->hdr,
			PMU_MSG_HDR_SIZE, &bytes_read);
	if (err || bytes_read != PMU_MSG_HDR_SIZE) {
		gk20a_err(dev_from_gk20a(g),
			"fail to read msg from queue %d", queue->id);
		*status = err | -EINVAL;
		goto clean_up;
	}

	if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
		pmu_queue_rewind(pmu, queue);
		/* read again after rewind */
		err = pmu_queue_pop(pmu, queue, &msg->hdr,
				PMU_MSG_HDR_SIZE, &bytes_read);
		if (err || bytes_read != PMU_MSG_HDR_SIZE) {
			gk20a_err(dev_from_gk20a(g),
				"fail to read msg from queue %d", queue->id);
			*status = err | -EINVAL;
			goto clean_up;
		}
	}

	if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
		gk20a_err(dev_from_gk20a(g),
			"read invalid unit_id %d from queue %d",
			msg->hdr.unit_id, queue->id);
			*status = -EINVAL;
			goto clean_up;
	}

	if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
		read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
		err = pmu_queue_pop(pmu, queue, &msg->msg,
			read_size, &bytes_read);
		if (err || bytes_read != read_size) {
			gk20a_err(dev_from_gk20a(g),
				"fail to read msg from queue %d", queue->id);
			*status = err;
			goto clean_up;
		}
	}

	err = pmu_queue_close(pmu, queue, true);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to close queue %d", queue->id);
		*status = err;
		return false;
	}

	return true;

clean_up:
	err = pmu_queue_close(pmu, queue, false);
	if (err)
		gk20a_err(dev_from_gk20a(g),
			"fail to close queue %d", queue->id);
	return false;
}

static int pmu_response_handle(struct pmu_gk20a *pmu,
			struct pmu_msg *msg)
{
	struct gk20a *g = pmu->g;
	struct pmu_sequence *seq;
	struct pmu_v *pv = &g->ops.pmu_ver;
	int ret = 0;

	gk20a_dbg_fn("");

	seq = &pmu->seq[msg->hdr.seq_id];
	if (seq->state != PMU_SEQ_STATE_USED &&
	    seq->state != PMU_SEQ_STATE_CANCELLED) {
		gk20a_err(dev_from_gk20a(g),
			"msg for an unknown sequence %d", seq->id);
		return -EINVAL;
	}

	if (msg->hdr.unit_id == PMU_UNIT_RC &&
	    msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
		gk20a_err(dev_from_gk20a(g),
			"unhandled cmd: seq %d", seq->id);
	}
	else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
		if (seq->msg) {
			if (seq->msg->hdr.size >= msg->hdr.size) {
				memcpy(seq->msg, msg, msg->hdr.size);
				if (pv->pmu_allocation_get_dmem_size(pmu,
				pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
					pmu_copy_from_dmem(pmu,
					pv->pmu_allocation_get_dmem_offset(pmu,
					pv->get_pmu_seq_out_a_ptr(seq)),
					seq->out_payload,
					pv->pmu_allocation_get_dmem_size(pmu,
					pv->get_pmu_seq_out_a_ptr(seq)), 0);
				}
			} else {
				gk20a_err(dev_from_gk20a(g),
					"sequence %d msg buffer too small",
					seq->id);
			}
		}
	} else
		seq->callback = NULL;
	if (pv->pmu_allocation_get_dmem_size(pmu,
			pv->get_pmu_seq_in_a_ptr(seq)) != 0)
		pmu->dmem.free(&pmu->dmem,
		pv->pmu_allocation_get_dmem_offset(pmu,
		pv->get_pmu_seq_in_a_ptr(seq)),
		pv->pmu_allocation_get_dmem_size(pmu,
		pv->get_pmu_seq_in_a_ptr(seq)));
	if (pv->pmu_allocation_get_dmem_size(pmu,
			pv->get_pmu_seq_out_a_ptr(seq)) != 0)
		pmu->dmem.free(&pmu->dmem,
		pv->pmu_allocation_get_dmem_offset(pmu,
		pv->get_pmu_seq_out_a_ptr(seq)),
		pv->pmu_allocation_get_dmem_size(pmu,
		pv->get_pmu_seq_out_a_ptr(seq)));

	if (seq->callback)
		seq->callback(g, msg, seq->cb_params, seq->desc, ret);

	pmu_seq_release(pmu, seq);

	/* TBD: notify client waiting for available dmem */

	gk20a_dbg_fn("done");

	return 0;
}

static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
				 u32 *var, u32 val);

static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
			void *param, u32 handle, u32 status)
{
	struct pmu_gk20a *pmu = param;
	gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE");
	pmu->zbc_save_done = 1;
}

void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_cmd cmd;
	u32 seq;

	if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
		return;

	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
	cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
	cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);

	pmu->zbc_save_done = 0;

	gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
			   pmu_handle_zbc_msg, pmu, &seq, ~0);
	pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
			      &pmu->zbc_save_done, 1);
	if (!pmu->zbc_save_done)
		gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
}

static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	struct pmu_v *pv = &g->ops.pmu_ver;
	struct pmu_cmd cmd;
	struct pmu_payload payload;
	u32 seq;

	/* PERFMON Start */
	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PERFMON;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
	pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
		PMU_PERFMON_CMD_ID_START);
	pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
		PMU_DOMAIN_GROUP_PSTATE);
	pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
		pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);

	pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
		PMU_PERFMON_FLAG_ENABLE_INCREASE |
		PMU_PERFMON_FLAG_ENABLE_DECREASE |
		PMU_PERFMON_FLAG_CLEAR_PREV);

	memset(&payload, 0, sizeof(struct pmu_payload));

	/* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
	pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
	/* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
	pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
	pmu->perfmon_counter.valid = true;

	payload.in.buf = &pmu->perfmon_counter;
	payload.in.size = sizeof(pmu->perfmon_counter);
	payload.in.offset =
		pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);

	gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
	gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
			NULL, NULL, &seq, ~0);

	return 0;
}

static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	struct pmu_cmd cmd;
	u32 seq;

	/* PERFMON Stop */
	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PERFMON;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
	cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;

	gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
			NULL, NULL, &seq, ~0);
	return 0;
}

static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
			struct pmu_perfmon_msg *msg)
{
	gk20a_dbg_fn("");

	switch (msg->msg_type) {
	case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
		gk20a_dbg_pmu("perfmon increase event: "
			"state_id %d, ground_id %d, pct %d",
			msg->gen.state_id, msg->gen.group_id, msg->gen.data);
		(pmu->perfmon_events_cnt)++;
		break;
	case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
		gk20a_dbg_pmu("perfmon decrease event: "
			"state_id %d, ground_id %d, pct %d",
			msg->gen.state_id, msg->gen.group_id, msg->gen.data);
		(pmu->perfmon_events_cnt)++;
		break;
	case PMU_PERFMON_MSG_ID_INIT_EVENT:
		pmu->perfmon_ready = 1;
		gk20a_dbg_pmu("perfmon init event");
		break;
	default:
		break;
	}

	/* restart sampling */
	if (pmu->perfmon_sampling_enabled)
		return pmu_perfmon_start_sampling(pmu);
	return 0;
}


static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
{
	int err = 0;

	gk20a_dbg_fn("");

	switch (msg->hdr.unit_id) {
	case PMU_UNIT_PERFMON:
		err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
		break;
	default:
		break;
	}

	return err;
}

static int pmu_process_message(struct pmu_gk20a *pmu)
{
	struct pmu_msg msg;
	int status;

	if (unlikely(!pmu->pmu_ready)) {
		pmu_process_init_msg(pmu, &msg);
		pmu_init_powergating(pmu);
		pmu_init_perfmon(pmu);
		return 0;
	}

	while (pmu_read_message(pmu,
		&pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {

		gk20a_dbg_pmu("read msg hdr: "
				"unit_id = 0x%08x, size = 0x%08x, "
				"ctrl_flags = 0x%08x, seq_id = 0x%08x",
				msg.hdr.unit_id, msg.hdr.size,
				msg.hdr.ctrl_flags, msg.hdr.seq_id);

		msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;

		if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
			pmu_handle_event(pmu, &msg);
		} else {
			pmu_response_handle(pmu, &msg);
		}
	}

	return 0;
}

static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
				 u32 *var, u32 val)
{
	struct gk20a *g = pmu->g;
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
	unsigned long delay = GR_IDLE_CHECK_DEFAULT;

	do {
		if (*var == val)
			return 0;

		if (gk20a_readl(g, pwr_falcon_irqstat_r()))
			gk20a_pmu_isr(g);

		usleep_range(delay, delay * 2);
		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
	} while (time_before(jiffies, end_jiffies) ||
			!tegra_platform_is_silicon());

	return -ETIMEDOUT;
}

static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	struct pmu_pg_stats stats;

	pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
		(u8 *)&stats, sizeof(struct pmu_pg_stats), 0);

	gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
		stats.pg_entry_start_timestamp);
	gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
		stats.pg_exit_start_timestamp);
	gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
		stats.pg_ingating_start_timestamp);
	gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
		stats.pg_ungating_start_timestamp);
	gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
		stats.pg_avg_entry_time_us);
	gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
		stats.pg_avg_exit_time_us);
	gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
		stats.pg_ingating_cnt);
	gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
		stats.pg_ingating_time_us);
	gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
		stats.pg_ungating_count);
	gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
		stats.pg_ungating_time_us);
	gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
		stats.pg_gating_cnt);
	gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
		stats.pg_gating_deny_cnt);

	/*
	   Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
	   in .nm file, e.g. 0x1000066c. use 0x66c.
	u32 i, val[20];
	pmu_copy_from_dmem(pmu, 0x66c,
		(u8 *)val, sizeof(val), 0);
	gk20a_dbg_pmu("elpg log begin");
	for (i = 0; i < 20; i++)
		gk20a_dbg_pmu("0x%08x", val[i]);
	gk20a_dbg_pmu("elpg log end");
	*/

	gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
	gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
	gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
	gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
		gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
	gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
		gk20a_readl(g, pwr_pmu_pg_intren_r(0)));

	gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_count_r(3)));
	gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_count_r(4)));
	gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_count_r(7)));

	/*
	 TBD: script can't generate those registers correctly
	gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
		gk20a_readl(g, pwr_pmu_idle_status_r()));
	gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
		gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
	*/
}

static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
{
	struct gk20a *g = pmu->g;
	int i;

	gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
		gk20a_readl(g, pwr_falcon_os_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
		gk20a_readl(g, pwr_falcon_cpuctl_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
		gk20a_readl(g, pwr_falcon_idlestate_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
		gk20a_readl(g, pwr_falcon_mailbox0_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
		gk20a_readl(g, pwr_falcon_mailbox1_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
		gk20a_readl(g, pwr_falcon_irqstat_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
		gk20a_readl(g, pwr_falcon_irqmode_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
		gk20a_readl(g, pwr_falcon_irqmask_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
		gk20a_readl(g, pwr_falcon_irqdest_r()));

	for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
		gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
			i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));

	for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
		gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
			i, gk20a_readl(g, pwr_pmu_debug_r(i)));

	for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
		gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
			pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
			pwr_pmu_falcon_icd_cmd_idx_f(i));
		gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
			i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
	}

	i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
	gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
	if (i != 0) {
		gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
			gk20a_readl(g, pwr_pmu_bar0_addr_r()));
		gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
			gk20a_readl(g, pwr_pmu_bar0_data_r()));
		gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
			gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
		gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
			gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
	}

	i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
	gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);

	i = gk20a_readl(g, pwr_falcon_exterrstat_r());
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
	if (pwr_falcon_exterrstat_valid_v(i) ==
			pwr_falcon_exterrstat_valid_true_v()) {
		gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
			gk20a_readl(g, pwr_falcon_exterraddr_r()));
		gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
			gk20a_readl(g, mc_enable_r()));
	}

	gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
		gk20a_readl(g, pwr_falcon_engctl_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
		gk20a_readl(g, pwr_falcon_curctx_r()));
	gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
		gk20a_readl(g, pwr_falcon_nxtctx_r()));

	gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
		pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
		pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
	gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
		gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));

	gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
		pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
		pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
	gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
		gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));

	gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
		pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
		pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
	gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
		gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));

	gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
		pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
		pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
	gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
		gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));

	gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
		pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
		pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
	gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
		gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));

	for (i = 0; i < 4; i++) {
		gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
			pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
			pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
		gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
			gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));

		gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
			pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
			pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
		gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
			gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
	}
	gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n",
			pmu->elpg_stat);

	/* PMU may crash due to FECS crash. Dump FECS status */
	gk20a_fecs_dump_falcon_stats(g);
}

void gk20a_pmu_isr(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_queue *queue;
	u32 intr, mask;
	bool recheck = false;

	gk20a_dbg_fn("");

	mutex_lock(&pmu->isr_mutex);

	mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
		gk20a_readl(g, pwr_falcon_irqdest_r());

	intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;

	gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);

	if (!intr) {
		mutex_unlock(&pmu->isr_mutex);
		return;
	}

	if (intr & pwr_falcon_irqstat_halt_true_f()) {
		gk20a_err(dev_from_gk20a(g),
			"pmu halt intr not implemented");
		pmu_dump_falcon_stats(pmu);
	}
	if (intr & pwr_falcon_irqstat_exterr_true_f()) {
		gk20a_err(dev_from_gk20a(g),
			"pmu exterr intr not implemented. Clearing interrupt.");
		pmu_dump_falcon_stats(pmu);

		gk20a_writel(g, pwr_falcon_exterrstat_r(),
			gk20a_readl(g, pwr_falcon_exterrstat_r()) &
				~pwr_falcon_exterrstat_valid_m());
	}
	if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
		pmu_process_message(pmu);
		recheck = true;
	}

	gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);

	if (recheck) {
		queue = &pmu->queue[PMU_MESSAGE_QUEUE];
		if (!pmu_queue_is_empty(pmu, queue))
			gk20a_writel(g, pwr_falcon_irqsset_r(),
				pwr_falcon_irqsset_swgen0_set_f());
	}

	mutex_unlock(&pmu->isr_mutex);
}

static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
			struct pmu_msg *msg, struct pmu_payload *payload,
			u32 queue_id)
{
	struct gk20a *g = pmu->g;
	struct pmu_queue *queue;
	u32 in_size, out_size;

	if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
		goto invalid_cmd;

	queue = &pmu->queue[queue_id];
	if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
		goto invalid_cmd;

	if (cmd->hdr.size > (queue->size >> 1))
		goto invalid_cmd;

	if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
		goto invalid_cmd;

	if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
		goto invalid_cmd;

	if (payload == NULL)
		return true;

	if (payload->in.buf == NULL && payload->out.buf == NULL)
		goto invalid_cmd;

	if ((payload->in.buf != NULL && payload->in.size == 0) ||
	    (payload->out.buf != NULL && payload->out.size == 0))
		goto invalid_cmd;

	in_size = PMU_CMD_HDR_SIZE;
	if (payload->in.buf) {
		in_size += payload->in.offset;
		in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
	}

	out_size = PMU_CMD_HDR_SIZE;
	if (payload->out.buf) {
		out_size += payload->out.offset;
		out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
	}

	if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
		goto invalid_cmd;


	if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
	    (payload->out.offset != 0 && payload->out.buf == NULL))
		goto invalid_cmd;

	return true;

invalid_cmd:
	gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
		"queue_id=%d,\n"
		"cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
		"payload in=%p, in_size=%d, in_offset=%d,\n"
		"payload out=%p, out_size=%d, out_offset=%d",
		queue_id, cmd->hdr.size, cmd->hdr.unit_id,
		msg, msg?msg->hdr.unit_id:~0,
		&payload->in, payload->in.size, payload->in.offset,
		&payload->out, payload->out.size, payload->out.offset);

	return false;
}

static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
			u32 queue_id, unsigned long timeout)
{
	struct gk20a *g = pmu->g;
	struct pmu_queue *queue;
	unsigned long end_jiffies = jiffies +
		msecs_to_jiffies(timeout);
	int err;

	gk20a_dbg_fn("");

	queue = &pmu->queue[queue_id];

	do {
		err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
		if (err == -EAGAIN && time_before(jiffies, end_jiffies))
			usleep_range(1000, 2000);
		else
			break;
	} while (1);

	if (err)
		goto clean_up;

	pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);

	err = pmu_queue_close(pmu, queue, true);

clean_up:
	if (err)
		gk20a_err(dev_from_gk20a(g),
			"fail to write cmd to queue %d", queue_id);
	else
		gk20a_dbg_fn("done");

	return err;
}

int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
		struct pmu_msg *msg, struct pmu_payload *payload,
		u32 queue_id, pmu_callback callback, void* cb_param,
		u32 *seq_desc, unsigned long timeout)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_v *pv = &g->ops.pmu_ver;
	struct pmu_sequence *seq;
	void *in = NULL, *out = NULL;
	int err;

	gk20a_dbg_fn("");

	BUG_ON(!cmd);
	BUG_ON(!seq_desc);
	BUG_ON(!pmu->pmu_ready);

	if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
		return -EINVAL;

	err = pmu_seq_acquire(pmu, &seq);
	if (err)
		return err;

	cmd->hdr.seq_id = seq->id;

	cmd->hdr.ctrl_flags = 0;
	cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
	cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;

	seq->callback = callback;
	seq->cb_params = cb_param;
	seq->msg = msg;
	seq->out_payload = NULL;
	seq->desc = pmu->next_seq_desc++;

	if (payload)
		seq->out_payload = payload->out.buf;

	*seq_desc = seq->desc;

	if (payload && payload->in.offset != 0) {
		pv->set_pmu_allocation_ptr(pmu, &in,
		((u8 *)&cmd->cmd + payload->in.offset));

		if (payload->in.buf != payload->out.buf)
			pv->pmu_allocation_set_dmem_size(pmu, in,
			(u16)payload->in.size);
		else
			pv->pmu_allocation_set_dmem_size(pmu, in,
			(u16)max(payload->in.size, payload->out.size));

		err = pmu->dmem.alloc(&pmu->dmem,
		pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
		pv->pmu_allocation_get_dmem_size(pmu, in));
		if (err)
			goto clean_up;

		pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
		in)),
			payload->in.buf, payload->in.size, 0);
		pv->pmu_allocation_set_dmem_size(pmu,
		pv->get_pmu_seq_in_a_ptr(seq),
		pv->pmu_allocation_get_dmem_size(pmu, in));
		pv->pmu_allocation_set_dmem_offset(pmu,
		pv->get_pmu_seq_in_a_ptr(seq),
		pv->pmu_allocation_get_dmem_offset(pmu, in));
	}

	if (payload && payload->out.offset != 0) {
		pv->set_pmu_allocation_ptr(pmu, &out,
		((u8 *)&cmd->cmd + payload->out.offset));
		pv->pmu_allocation_set_dmem_size(pmu, out,
		(u16)payload->out.size);

		if (payload->out.buf != payload->in.buf) {
			err = pmu->dmem.alloc(&pmu->dmem,
			pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
			pv->pmu_allocation_get_dmem_size(pmu, out));
			if (err)
				goto clean_up;
		} else {
			BUG_ON(in == NULL);
			pv->pmu_allocation_set_dmem_offset(pmu, out,
			pv->pmu_allocation_get_dmem_offset(pmu, in));
		}

		pv->pmu_allocation_set_dmem_size(pmu,
		pv->get_pmu_seq_out_a_ptr(seq),
		pv->pmu_allocation_get_dmem_size(pmu, out));
		pv->pmu_allocation_set_dmem_offset(pmu,
		pv->get_pmu_seq_out_a_ptr(seq),
		pv->pmu_allocation_get_dmem_offset(pmu, out));
	}

	seq->state = PMU_SEQ_STATE_USED;
	err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
	if (err)
		seq->state = PMU_SEQ_STATE_PENDING;

	gk20a_dbg_fn("done");

	return 0;

clean_up:
	gk20a_dbg_fn("fail");
	if (in)
		pmu->dmem.free(&pmu->dmem,
		pv->pmu_allocation_get_dmem_offset(pmu, in),
		pv->pmu_allocation_get_dmem_size(pmu, in));
	if (out)
		pmu->dmem.free(&pmu->dmem,
		pv->pmu_allocation_get_dmem_offset(pmu, out),
		pv->pmu_allocation_get_dmem_size(pmu, out));

	pmu_seq_release(pmu, seq);
	return err;
}

static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_cmd cmd;
	u32 seq, status;

	gk20a_dbg_fn("");

	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;

	/* no need to wait ack for ELPG enable but set pending to sync
	   with follow up ELPG disable */
	pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;

	gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW");
	status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
			pmu_handle_pg_elpg_msg, pmu, &seq, ~0);

	BUG_ON(status != 0);

	gk20a_dbg_fn("done");
	return 0;
}

int gk20a_pmu_enable_elpg(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct gr_gk20a *gr = &g->gr;

	int ret = 0;

	gk20a_dbg_fn("");

	mutex_lock(&pmu->elpg_mutex);

	pmu->elpg_refcnt++;
	if (pmu->elpg_refcnt <= 0)
		goto exit_unlock;

	/* something is not right if we end up in following code path */
	if (unlikely(pmu->elpg_refcnt > 1)) {
		gk20a_warn(dev_from_gk20a(g),
		"%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
		__func__, pmu->elpg_refcnt);
		WARN_ON(1);
	}

	/* do NOT enable elpg until golden ctx is created,
	   which is related with the ctx that ELPG save and restore. */
	if (unlikely(!gr->ctx_vars.golden_image_initialized))
		goto exit_unlock;

	/* return if ELPG is already on or on_pending or off_on_pending */
	if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
		goto exit_unlock;

	ret = gk20a_pmu_enable_elpg_locked(g);

exit_unlock:
	mutex_unlock(&pmu->elpg_mutex);
	gk20a_dbg_fn("done");
	return ret;
}

int gk20a_pmu_disable_elpg(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_cmd cmd;
	u32 seq;
	int ret = 0;

	gk20a_dbg_fn("");

	mutex_lock(&pmu->elpg_mutex);

	pmu->elpg_refcnt--;
	if (pmu->elpg_refcnt > 0) {
		gk20a_warn(dev_from_gk20a(g),
		"%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
		__func__, pmu->elpg_refcnt);
		WARN_ON(1);
		ret = 0;
		goto exit_unlock;
	}

	/* cancel off_on_pending and return */
	if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
		pmu->elpg_stat = PMU_ELPG_STAT_OFF;
		ret = 0;
		goto exit_reschedule;
	}
	/* wait if on_pending */
	else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {

		pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
				      &pmu->elpg_stat, PMU_ELPG_STAT_ON);

		if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
			gk20a_err(dev_from_gk20a(g),
				"ELPG_ALLOW_ACK failed, elpg_stat=%d",
				pmu->elpg_stat);
			pmu_dump_elpg_stats(pmu);
			pmu_dump_falcon_stats(pmu);
			ret = -EBUSY;
			goto exit_unlock;
		}
	}
	/* return if ELPG is already off */
	else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
		ret = 0;
		goto exit_reschedule;
	}

	memset(&cmd, 0, sizeof(struct pmu_cmd));
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;

	pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;

	gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
			pmu_handle_pg_elpg_msg, pmu, &seq, ~0);

	pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
			      &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
	if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
		gk20a_err(dev_from_gk20a(g),
			"ELPG_DISALLOW_ACK failed");
		pmu_dump_elpg_stats(pmu);
		pmu_dump_falcon_stats(pmu);
		ret = -EBUSY;
		goto exit_unlock;
	}

exit_reschedule:
exit_unlock:
	mutex_unlock(&pmu->elpg_mutex);
	gk20a_dbg_fn("done");
	return ret;
}

int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
{
	struct pmu_gk20a *pmu = &g->pmu;
	int err;

	gk20a_dbg_fn("");

	if (enable)
		err = pmu_perfmon_start_sampling(pmu);
	else
		err = pmu_perfmon_stop_sampling(pmu);

	return err;
}

int gk20a_pmu_destroy(struct gk20a *g)
{
	struct pmu_gk20a *pmu = &g->pmu;
	u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;

	gk20a_dbg_fn("");

	if (!support_gk20a_pmu())
		return 0;

	/* make sure the pending operations are finished before we continue */
	cancel_work_sync(&pmu->pg_init);

	gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
		&elpg_ungating_time, &gating_cnt);

	gk20a_pmu_disable_elpg(g);
	pmu->initialized = false;

	/* update the s/w ELPG residency counters */
	g->pg_ingating_time_us += (u64)elpg_ingating_time;
	g->pg_ungating_time_us += (u64)elpg_ungating_time;
	g->pg_gating_cnt += gating_cnt;

	pmu_enable(pmu, false);
	pmu->pmu_state = PMU_STATE_OFF;
	pmu->pmu_ready = false;
	pmu->perfmon_ready = false;
	pmu->zbc_ready = false;

	gk20a_dbg_fn("done");
	return 0;
}

int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
{
	struct pmu_gk20a *pmu = &g->pmu;
	u16 _load = 0;

	if (!pmu->perfmon_ready) {
		*load = 0;
		return 0;
	}

	pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
	*load = _load / 10;

	return 0;
}

void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
				 u32 *total_cycles)
{
	if (!g->power_on) {
		*busy_cycles = 0;
		*total_cycles = 0;
		return;
	}

	gk20a_busy(g->dev);
	*busy_cycles = pwr_pmu_idle_count_value_v(
		gk20a_readl(g, pwr_pmu_idle_count_r(1)));
	rmb();
	*total_cycles = pwr_pmu_idle_count_value_v(
		gk20a_readl(g, pwr_pmu_idle_count_r(2)));
	gk20a_idle(g->dev);
}

void gk20a_pmu_reset_load_counters(struct gk20a *g)
{
	u32 reg_val = pwr_pmu_idle_count_reset_f(1);

	if (!g->power_on)
		return;

	gk20a_busy(g->dev);
	gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
	wmb();
	gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
	gk20a_idle(g->dev);
}

static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
			u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
{
	struct pmu_gk20a *pmu = &g->pmu;
	struct pmu_pg_stats stats;

	if (!pmu->initialized) {
		*ingating_time = 0;
		*ungating_time = 0;
		*gating_cnt = 0;
		return 0;
	}

	pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
		(u8 *)&stats, sizeof(struct pmu_pg_stats), 0);

	*ingating_time = stats.pg_ingating_time_us;
	*ungating_time = stats.pg_ungating_time_us;
	*gating_cnt = stats.pg_gating_cnt;

	return 0;
}

/* Send an Adaptive Power (AP) related command to PMU */
static int gk20a_pmu_ap_send_command(struct gk20a *g,
			union pmu_ap_cmd *p_ap_cmd, bool b_block)
{
	struct pmu_gk20a *pmu = &g->pmu;
	/* FIXME: where is the PG structure defined?? */
	u32 status = 0;
	struct pmu_cmd cmd;
	u32 seq;
	pmu_callback p_callback = NULL;

	memset(&cmd, 0, sizeof(struct pmu_cmd));

	/* Copy common members */
	cmd.hdr.unit_id = PMU_UNIT_PG;
	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);

	cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
	cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;

	/* Copy other members of command */
	switch (p_ap_cmd->cmn.cmd_id) {
	case PMU_AP_CMD_ID_INIT:
		gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT");
		cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
			p_ap_cmd->init.pg_sampling_period_us;
		p_callback = ap_callback_init_and_enable_ctrl;
		break;

	case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
		gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
		cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
		p_ap_cmd->init_and_enable_ctrl.ctrl_id;
		memcpy(
		(void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
			(void *)&(p_ap_cmd->init_and_enable_ctrl.params),
			sizeof(struct pmu_ap_ctrl_init_params));

		p_callback = ap_callback_init_and_enable_ctrl;
		break;

	case PMU_AP_CMD_ID_ENABLE_CTRL:
		gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
		cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
			p_ap_cmd->enable_ctrl.ctrl_id;
		break;

	case PMU_AP_CMD_ID_DISABLE_CTRL:
		gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
		cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
			p_ap_cmd->disable_ctrl.ctrl_id;
		break;

	case PMU_AP_CMD_ID_KICK_CTRL:
		gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL");
		cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
			p_ap_cmd->kick_ctrl.ctrl_id;
		cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
			p_ap_cmd->kick_ctrl.skip_count;
		break;

	default:
		gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
			__func__, p_ap_cmd->cmn.cmd_id);
		return 0x2f;
	}

	status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
			p_callback, pmu, &seq, ~0);

	if (!status) {
		gk20a_dbg_pmu(
			"%s: Unable to submit Adaptive Power Command %d\n",
			__func__, p_ap_cmd->cmn.cmd_id);
		goto err_return;
	}

	/* TODO: Implement blocking calls (b_block) */

err_return:
	return status;
}

static void ap_callback_init_and_enable_ctrl(
		struct gk20a *g, struct pmu_msg *msg,
		void *param, u32 seq_desc, u32 status)
{
	/* Define p_ap (i.e pointer to pmu_ap structure) */
	WARN_ON(!msg);

	if (!status) {
		switch (msg->msg.pg.ap_msg.cmn.msg_id) {
		case PMU_AP_MSG_ID_INIT_ACK:
			gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT");
			break;

		default:
			gk20a_dbg_pmu(
			"%s: Invalid Adaptive Power Message: %x\n",
			__func__, msg->msg.pg.ap_msg.cmn.msg_id);
			break;
		}
	}
}

static int gk20a_aelpg_init(struct gk20a *g)
{
	int status = 0;

	/* Remove reliance on app_ctrl field. */
	union pmu_ap_cmd ap_cmd;

	/* TODO: Check for elpg being ready? */
	ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
	ap_cmd.init.pg_sampling_period_us =
		APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;

	status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
	return status;
}

static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
{
	int status = 0;
	union pmu_ap_cmd ap_cmd;

	/* TODO: Probably check if ELPG is ready? */

	ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
	ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
	ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
		APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
	ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
		APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
	ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
		APCTRL_POWER_BREAKEVEN_DEFAULT_US;
	ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
		APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;

	switch (ctrl_id) {
	case PMU_AP_CTRL_ID_GRAPHICS:
		break;
	default:
		break;
	}

	status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
	return status;
}

#if CONFIG_DEBUG_FS
static int elpg_residency_show(struct seq_file *s, void *data)
{
	struct gk20a *g = s->private;
	u32 ingating_time = 0;
	u32 ungating_time = 0;
	u32 gating_cnt;
	u64 total_ingating, total_ungating, residency, divisor, dividend;

	/* Don't unnecessarily power on the device */
	if (g->power_on) {
		gk20a_busy(g->dev);
		gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
			&ungating_time, &gating_cnt);
		gk20a_idle(g->dev);
	}
	total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
	total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
	divisor = total_ingating + total_ungating;

	/* We compute the residency on a scale of 1000 */
	dividend = total_ingating * 1000;

	if (divisor)
		residency = div64_u64(dividend, divisor);
	else
		residency = 0;

	seq_printf(s, "Time in ELPG: %llu us\n"
			"Time out of ELPG: %llu us\n"
			"ELPG residency ratio: %llu\n",
			total_ingating, total_ungating, residency);
	return 0;

}

static int elpg_residency_open(struct inode *inode, struct file *file)
{
	return single_open(file, elpg_residency_show, inode->i_private);
}

static const struct file_operations elpg_residency_fops = {
	.open		= elpg_residency_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int elpg_transitions_show(struct seq_file *s, void *data)
{
	struct gk20a *g = s->private;
	u32 ingating_time, ungating_time, total_gating_cnt;
	u32 gating_cnt = 0;

	if (g->power_on) {
		gk20a_busy(g->dev);
		gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
			&ungating_time, &gating_cnt);
		gk20a_idle(g->dev);
	}
	total_gating_cnt = g->pg_gating_cnt + gating_cnt;

	seq_printf(s, "%u\n", total_gating_cnt);
	return 0;

}

static int elpg_transitions_open(struct inode *inode, struct file *file)
{
	return single_open(file, elpg_transitions_show, inode->i_private);
}

static const struct file_operations elpg_transitions_fops = {
	.open		= elpg_transitions_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int perfmon_events_enable_show(struct seq_file *s, void *data)
{
	struct gk20a *g = s->private;

	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
	return 0;

}

static int perfmon_events_enable_open(struct inode *inode, struct file *file)
{
	return single_open(file, perfmon_events_enable_show, inode->i_private);
}

static ssize_t perfmon_events_enable_write(struct file *file,
	const char __user *userbuf, size_t count, loff_t *ppos)
{
	struct seq_file *s = file->private_data;
	struct gk20a *g = s->private;
	unsigned long val = 0;
	char buf[40];
	int buf_size;

	memset(buf, 0, sizeof(buf));
	buf_size = min(count, (sizeof(buf)-1));

	if (copy_from_user(buf, userbuf, buf_size))
		return -EFAULT;

	if (kstrtoul(buf, 10, &val) < 0)
		return -EINVAL;

	/* Don't turn on gk20a unnecessarily */
	if (g->power_on) {
		gk20a_busy(g->dev);
		if (val && !g->pmu.perfmon_sampling_enabled) {
			g->pmu.perfmon_sampling_enabled = true;
			pmu_perfmon_start_sampling(&(g->pmu));
		} else if (!val && g->pmu.perfmon_sampling_enabled) {
			g->pmu.perfmon_sampling_enabled = false;
			pmu_perfmon_stop_sampling(&(g->pmu));
		}
		gk20a_idle(g->dev);
	} else {
		g->pmu.perfmon_sampling_enabled = val ? true : false;
	}

	return count;
}

static const struct file_operations perfmon_events_enable_fops = {
	.open		= perfmon_events_enable_open,
	.read		= seq_read,
	.write		= perfmon_events_enable_write,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int perfmon_events_count_show(struct seq_file *s, void *data)
{
	struct gk20a *g = s->private;

	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
	return 0;

}

static int perfmon_events_count_open(struct inode *inode, struct file *file)
{
	return single_open(file, perfmon_events_count_show, inode->i_private);
}

static const struct file_operations perfmon_events_count_fops = {
	.open		= perfmon_events_count_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

int gk20a_pmu_debugfs_init(struct platform_device *dev)
{
	struct dentry *d;
	struct gk20a_platform *platform = platform_get_drvdata(dev);
	struct gk20a *g = get_gk20a(dev);

	d = debugfs_create_file(
		"elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
						&elpg_residency_fops);
	if (!d)
		goto err_out;

	d = debugfs_create_file(
		"elpg_transitions", S_IRUGO, platform->debugfs, g,
						&elpg_transitions_fops);
	if (!d)
		goto err_out;

	d = debugfs_create_file(
		"perfmon_events_enable", S_IRUGO, platform->debugfs, g,
						&perfmon_events_enable_fops);
	if (!d)
		goto err_out;

	d = debugfs_create_file(
		"perfmon_events_count", S_IRUGO, platform->debugfs, g,
						&perfmon_events_count_fops);
	if (!d)
		goto err_out;
	return 0;

err_out:
	pr_err("%s: Failed to make debugfs node\n", __func__);
	debugfs_remove_recursive(platform->debugfs);
	return -ENOMEM;
}
#endif