summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorBo Yan <byan@nvidia.com>2014-04-14 15:03:27 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:38 -0400
commit9eb1f57ba2b7072c6c53ca9dc59b10a87d88db23 (patch)
treede095fcbae617e938064e45a96b4e176015fe773 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent2531751f53fc21bdeb0ece4af550ea1e8efb9653 (diff)
gpu: nvgpu: Add GPU driver for GM20B
this moves GM20B driver to the new location Change-Id: I5fde14e114a8db79738a4c61849912b1ae225fb5
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c593
1 files changed, 593 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
new file mode 100644
index 00000000..54184766
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -0,0 +1,593 @@
1/*
2 * GM20B GPC MMU
3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/types.h>
17
18#include "gk20a/gk20a.h"
19#include "gk20a/gr_gk20a.h"
20
21#include "gr_gm20b.h"
22#include "hw_gr_gm20b.h"
23#include "hw_fb_gm20b.h"
24#include "hw_proj_gm20b.h"
25#include "hw_ctxsw_prog_gm20b.h"
26
27static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
28{
29 u32 temp;
30
31 gk20a_dbg_info("initialize gpc mmu");
32
33 temp = gk20a_readl(g, fb_mmu_ctrl_r());
34 temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() |
35 gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() |
36 gr_gpcs_pri_mmu_ctrl_vol_fault_m() |
37 gr_gpcs_pri_mmu_ctrl_comp_fault_m() |
38 gr_gpcs_pri_mmu_ctrl_miss_gran_m() |
39 gr_gpcs_pri_mmu_ctrl_cache_mode_m() |
40 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
41 gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
42 gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
43 gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
44 gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
45 gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
46
47 gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
48 gk20a_readl(g, fb_mmu_debug_ctrl_r()));
49 gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
50 gk20a_readl(g, fb_mmu_debug_wr_r()));
51 gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
52 gk20a_readl(g, fb_mmu_debug_rd_r()));
53
54 gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(),
55 gk20a_readl(g, fb_fbhub_num_active_ltcs_r()));
56}
57
58static void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
59{
60 struct gr_gk20a *gr = &g->gr;
61
62 gr->bundle_cb_default_size =
63 gr_scc_bundle_cb_size_div_256b__prod_v();
64 gr->min_gpm_fifo_depth =
65 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
66 gr->bundle_cb_token_limit =
67 gr_pd_ab_dist_cfg2_token_limit_init_v();
68}
69
70static void gr_gm20b_cb_size_default(struct gk20a *g)
71{
72 struct gr_gk20a *gr = &g->gr;
73
74 gr->attrib_cb_default_size =
75 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
76 gr->alpha_cb_default_size =
77 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
78}
79
80static int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
81{
82 struct gr_gk20a *gr = &g->gr;
83 int size;
84
85 gr->attrib_cb_size = gr->attrib_cb_default_size
86 + (gr->attrib_cb_default_size >> 1);
87 gr->alpha_cb_size = gr->alpha_cb_default_size
88 + (gr->alpha_cb_default_size >> 1);
89
90 size = gr->attrib_cb_size *
91 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
92 gr->max_tpc_count;
93
94 size += gr->alpha_cb_size *
95 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
96 gr->max_tpc_count;
97
98 return size;
99}
100
101static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
102 struct channel_ctx_gk20a *ch_ctx,
103 u64 addr, bool patch)
104{
105 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
106 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
107 gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
108
109 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
110 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
111 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
112
113 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
114 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
115 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
116}
117
118static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
119 struct channel_ctx_gk20a *ch_ctx,
120 u64 addr, u64 size, bool patch)
121{
122 u32 data;
123
124 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
125 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
126
127 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
128 gr_scc_bundle_cb_size_div_256b_f(size) |
129 gr_scc_bundle_cb_size_valid_true_f(), patch);
130
131 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
132 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
133
134 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
135 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
136 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
137
138 /* data for state_limit */
139 data = (g->gr.bundle_cb_default_size *
140 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
141 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
142
143 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
144
145 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
146 g->gr.bundle_cb_token_limit, data);
147
148 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
149 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
150 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
151
152}
153
154static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
155 struct channel_gk20a *c, bool patch)
156{
157 struct gr_gk20a *gr = &g->gr;
158 struct channel_ctx_gk20a *ch_ctx = NULL;
159 u32 attrib_offset_in_chunk = 0;
160 u32 alpha_offset_in_chunk = 0;
161 u32 pd_ab_max_output;
162 u32 gpc_index, ppc_index;
163 u32 temp;
164 u32 cbm_cfg_size1, cbm_cfg_size2;
165
166 gk20a_dbg_fn("");
167
168 if (patch) {
169 int err;
170 ch_ctx = &c->ch_ctx;
171 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
172 if (err)
173 return err;
174 }
175
176 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
177 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
178 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
179 patch);
180
181 pd_ab_max_output = (gr->alpha_cb_default_size *
182 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
183 gr_pd_ab_dist_cfg1_max_output_granularity_v();
184
185 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
186 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
187 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
188
189 alpha_offset_in_chunk = attrib_offset_in_chunk +
190 gr->tpc_count * gr->attrib_cb_size;
191
192 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
193 temp = proj_gpc_stride_v() * gpc_index;
194 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
195 ppc_index++) {
196 cbm_cfg_size1 = gr->attrib_cb_default_size *
197 gr->pes_tpc_count[ppc_index][gpc_index];
198 cbm_cfg_size2 = gr->alpha_cb_default_size *
199 gr->pes_tpc_count[ppc_index][gpc_index];
200
201 gr_gk20a_ctx_patch_write(g, ch_ctx,
202 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
203 proj_ppc_in_gpc_stride_v() * ppc_index,
204 cbm_cfg_size1, patch);
205
206 gr_gk20a_ctx_patch_write(g, ch_ctx,
207 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
208 proj_ppc_in_gpc_stride_v() * ppc_index,
209 attrib_offset_in_chunk, patch);
210
211 attrib_offset_in_chunk += gr->attrib_cb_size *
212 gr->pes_tpc_count[ppc_index][gpc_index];
213
214 gr_gk20a_ctx_patch_write(g, ch_ctx,
215 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
216 proj_ppc_in_gpc_stride_v() * ppc_index,
217 cbm_cfg_size2, patch);
218
219 gr_gk20a_ctx_patch_write(g, ch_ctx,
220 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
221 proj_ppc_in_gpc_stride_v() * ppc_index,
222 alpha_offset_in_chunk, patch);
223
224 alpha_offset_in_chunk += gr->alpha_cb_size *
225 gr->pes_tpc_count[ppc_index][gpc_index];
226
227 gr_gk20a_ctx_patch_write(g, ch_ctx,
228 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index),
229 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) |
230 gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3),
231 patch);
232 }
233 }
234
235 if (patch)
236 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
237
238 return 0;
239}
240
241static void gr_gm20b_commit_global_pagepool(struct gk20a *g,
242 struct channel_ctx_gk20a *ch_ctx,
243 u64 addr, u32 size, bool patch)
244{
245 gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch);
246
247 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(),
248 gr_gpcs_swdx_rm_pagepool_total_pages_f(size) |
249 gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch);
250
251}
252
253static int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
254 u32 class_num, u32 offset, u32 data)
255{
256 gk20a_dbg_fn("");
257
258 if (class_num == MAXWELL_COMPUTE_B) {
259 switch (offset << 2) {
260 case NVB1C0_SET_SHADER_EXCEPTIONS:
261 gk20a_gr_set_shader_exceptions(g, data);
262 break;
263 default:
264 goto fail;
265 }
266 }
267
268 if (class_num == MAXWELL_B) {
269 switch (offset << 2) {
270 case NVB197_SET_SHADER_EXCEPTIONS:
271 gk20a_gr_set_shader_exceptions(g, data);
272 break;
273 case NVB197_SET_CIRCULAR_BUFFER_SIZE:
274 g->ops.gr.set_circular_buffer_size(g, data);
275 break;
276 case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
277 g->ops.gr.set_alpha_circular_buffer_size(g, data);
278 break;
279 default:
280 goto fail;
281 }
282 }
283 return 0;
284
285fail:
286 return -EINVAL;
287}
288
289static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
290{
291 struct gr_gk20a *gr = &g->gr;
292 u32 gpc_index, ppc_index, stride, val;
293 u32 pd_ab_max_output;
294 u32 alpha_cb_size = data * 4;
295
296 gk20a_dbg_fn("");
297 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
298 return; */
299
300 if (alpha_cb_size > gr->alpha_cb_size)
301 alpha_cb_size = gr->alpha_cb_size;
302
303 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
304 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
305 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
306 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
307
308 pd_ab_max_output = alpha_cb_size *
309 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
310 gr_pd_ab_dist_cfg1_max_output_granularity_v();
311
312 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
313 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
314
315 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
316 stride = proj_gpc_stride_v() * gpc_index;
317
318 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
319 ppc_index++) {
320
321 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
322 stride +
323 proj_ppc_in_gpc_stride_v() * ppc_index);
324
325 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
326 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
327 gr->pes_tpc_count[ppc_index][gpc_index]));
328
329 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
330 stride +
331 proj_ppc_in_gpc_stride_v() * ppc_index, val);
332 }
333 }
334}
335
336void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
337{
338 struct gr_gk20a *gr = &g->gr;
339 u32 gpc_index, ppc_index, stride, val;
340 u32 cb_size = data * 4;
341
342 gk20a_dbg_fn("");
343
344 if (cb_size > gr->attrib_cb_size)
345 cb_size = gr->attrib_cb_size;
346
347 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
348 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
349 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
350 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
351
352 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
353 stride = proj_gpc_stride_v() * gpc_index;
354
355 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
356 ppc_index++) {
357
358 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
359 stride +
360 proj_ppc_in_gpc_stride_v() * ppc_index);
361
362 val = set_field(val,
363 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
364 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
365 gr->pes_tpc_count[ppc_index][gpc_index]));
366
367 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
368 stride +
369 proj_ppc_in_gpc_stride_v() * ppc_index, val);
370
371 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
372 ppc_index + gpc_index));
373
374 val = set_field(val,
375 gr_gpcs_swdx_tc_beta_cb_size_v_m(),
376 gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
377 gr->gpc_ppc_count[gpc_index]));
378 val = set_field(val,
379 gr_gpcs_swdx_tc_beta_cb_size_div3_m(),
380 gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size *
381 gr->gpc_ppc_count[gpc_index])/3));
382
383 gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
384 ppc_index + gpc_index), val);
385 }
386 }
387}
388
389static void gr_gm20b_enable_hww_exceptions(struct gk20a *g)
390{
391 gr_gk20a_enable_hww_exceptions(g);
392
393 gk20a_writel(g, gr_ds_hww_esr_2_r(),
394 gr_ds_hww_esr_2_en_enabled_f() |
395 gr_ds_hww_esr_2_reset_task_f());
396 gk20a_writel(g, gr_ds_hww_report_mask_2_r(),
397 gr_ds_hww_report_mask_2_sph24_err_report_f());
398}
399
400static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
401{
402 /* setup sm warp esr report masks */
403 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
404 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
405 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
406 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
407 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
408 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
409 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
410 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
411 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
412 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
413 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
414 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
415 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
416 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
417 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
418 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
419 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
420 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
421 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
422 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f() |
423 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f() |
424 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
425 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
426
427 /* setup sm global esr report mask */
428 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
429 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
430 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f());
431}
432
433static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
434{
435 bool valid = false;
436
437 switch (class_num) {
438 case MAXWELL_COMPUTE_B:
439 case MAXWELL_B:
440 case FERMI_TWOD_A:
441 case KEPLER_DMA_COPY_A:
442 case MAXWELL_DMA_COPY_A:
443 valid = true;
444 break;
445
446 default:
447 break;
448 }
449
450 return valid;
451}
452
453static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
454 u32 *num_sm_dsm_perf_regs,
455 u32 **sm_dsm_perf_regs,
456 u32 *perf_register_stride)
457{
458 gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs,
459 sm_dsm_perf_regs,
460 perf_register_stride);
461 *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
462}
463
464static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
465 u32 *num_sm_dsm_perf_regs,
466 u32 **sm_dsm_perf_regs,
467 u32 *ctrl_register_stride)
468{
469 gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs,
470 sm_dsm_perf_regs,
471 ctrl_register_stride);
472 *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
473}
474
475static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
476{
477 struct gr_gk20a *gr = &g->gr;
478 u32 tpc_index, gpc_index;
479 u32 tpc_offset, gpc_offset;
480 u32 sm_id = 0, gpc_id = 0;
481 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
482 u32 tpc_per_gpc;
483
484 gk20a_dbg_fn("");
485
486 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
487 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
488 gpc_offset = proj_gpc_stride_v() * gpc_index;
489 if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
490 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
491
492 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
493 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
494 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
495 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
496 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
497 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
498
499 sm_id_to_gpc_id[sm_id] = gpc_index;
500 sm_id++;
501 }
502 }
503 }
504
505 for (tpc_index = 0, gpc_id = 0;
506 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
507 tpc_index++, gpc_id += 8) {
508
509 if (gpc_id >= gr->gpc_count)
510 gpc_id = 0;
511
512 tpc_per_gpc =
513 gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
514 gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
515 gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
516 gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
517 gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
518 gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
519 gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
520 gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
521
522 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
523 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
524 }
525
526 /* gr__setup_pd_mapping stubbed for gk20a */
527 gr_gk20a_setup_rop_mapping(g, gr);
528
529 for (gpc_index = 0;
530 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
531 gpc_index += 4) {
532
533 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
534 gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
535 gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
536 gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
537 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
538 }
539
540 gk20a_writel(g, gr_cwd_fs_r(),
541 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
542 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
543
544 gk20a_writel(g, gr_bes_zrop_settings_r(),
545 gr_bes_zrop_settings_num_active_ltcs_f(gr->num_fbps));
546 gk20a_writel(g, gr_bes_crop_settings_r(),
547 gr_bes_crop_settings_num_active_ltcs_f(gr->num_fbps));
548
549 gk20a_writel(g, gr_bes_crop_debug3_r(),
550 gk20a_readl(g, gr_be0_crop_debug3_r()) |
551 gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
552
553 if (tegra_platform_is_silicon()) {
554 gk20a_writel(g, gr_fe_tpc_fs_r(), gr->pes_tpc_mask[0][0]);
555
556 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0) |
557 gr_cwd_gpc_tpc_id_tpc1_f(1));
558
559 gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0) |
560 gr_cwd_sm_id_tpc1_f(1));
561 } else {
562 gk20a_writel(g, gr_fe_tpc_fs_r(), 1);
563
564 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0));
565
566 gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0));
567
568 }
569
570 return 0;
571}
572
573void gm20b_init_gr(struct gpu_ops *gops)
574{
575 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
576 gops->gr.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults;
577 gops->gr.cb_size_default = gr_gm20b_cb_size_default;
578 gops->gr.calc_global_ctx_buffer_size =
579 gr_gm20b_calc_global_ctx_buffer_size;
580 gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb;
581 gops->gr.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb;
582 gops->gr.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager;
583 gops->gr.commit_global_pagepool = gr_gm20b_commit_global_pagepool;
584 gops->gr.handle_sw_method = gr_gm20b_handle_sw_method;
585 gops->gr.set_alpha_circular_buffer_size = gr_gm20b_set_alpha_circular_buffer_size;
586 gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size;
587 gops->gr.enable_hww_exceptions = gr_gm20b_enable_hww_exceptions;
588 gops->gr.is_valid_class = gr_gm20b_is_valid_class;
589 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
590 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
591 gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep;
592 gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
593}