diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.c | 45 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 1 |
13 files changed, 70 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 2d1eb388..c55ba146 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -472,6 +472,10 @@ struct gpu_ops { | |||
472 | u32 *priv_addr_table, | 472 | u32 *priv_addr_table, |
473 | u32 *num_registers); | 473 | u32 *num_registers); |
474 | u32 (*get_pmm_per_chiplet_offset)(void); | 474 | u32 (*get_pmm_per_chiplet_offset)(void); |
475 | void (*split_fbpa_broadcast_addr)(struct gk20a *g, u32 addr, | ||
476 | u32 num_fbpas, | ||
477 | u32 *priv_addr_table, | ||
478 | u32 *priv_addr_table_index); | ||
475 | } gr; | 479 | } gr; |
476 | struct { | 480 | struct { |
477 | void (*init_hw)(struct gk20a *g); | 481 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 96bc72af..121f264a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6323,6 +6323,17 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6323 | return -EINVAL; | 6323 | return -EINVAL; |
6324 | } | 6324 | } |
6325 | 6325 | ||
6326 | void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | ||
6327 | u32 num_fbpas, | ||
6328 | u32 *priv_addr_table, u32 *t) | ||
6329 | { | ||
6330 | u32 fbpa_id; | ||
6331 | |||
6332 | for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) | ||
6333 | priv_addr_table[(*t)++] = pri_fbpa_addr(g, | ||
6334 | pri_fbpa_addr_mask(g, addr), fbpa_id); | ||
6335 | } | ||
6336 | |||
6326 | int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | 6337 | int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, |
6327 | u32 gpc_num, | 6338 | u32 gpc_num, |
6328 | u32 *priv_addr_table, u32 *t) | 6339 | u32 *priv_addr_table, u32 *t) |
@@ -6356,7 +6367,6 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6356 | u32 broadcast_flags; | 6367 | u32 broadcast_flags; |
6357 | u32 t; | 6368 | u32 t; |
6358 | int err; | 6369 | int err; |
6359 | int fbpa_num; | ||
6360 | 6370 | ||
6361 | t = 0; | 6371 | t = 0; |
6362 | *num_registers = 0; | 6372 | *num_registers = 0; |
@@ -6430,11 +6440,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6430 | g->ops.gr.split_ltc_broadcast_addr(g, addr, | 6440 | g->ops.gr.split_ltc_broadcast_addr(g, addr, |
6431 | priv_addr_table, &t); | 6441 | priv_addr_table, &t); |
6432 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { | 6442 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { |
6433 | for (fbpa_num = 0; | 6443 | g->ops.gr.split_fbpa_broadcast_addr(g, addr, |
6434 | fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); | 6444 | nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS), |
6435 | fbpa_num++) | 6445 | priv_addr_table, &t); |
6436 | priv_addr_table[t++] = pri_fbpa_addr(g, | ||
6437 | pri_fbpa_addr_mask(g, addr), fbpa_num); | ||
6438 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { | 6446 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { |
6439 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | 6447 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) |
6440 | for (tpc_num = 0; | 6448 | for (tpc_num = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index cd58cfa3..02f5e534 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -832,4 +832,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
832 | u32 addr, | 832 | u32 addr, |
833 | u32 *priv_addr_table, | 833 | u32 *priv_addr_table, |
834 | u32 *num_registers); | 834 | u32 *num_registers); |
835 | void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | ||
836 | u32 num_fbpas, | ||
837 | u32 *priv_addr_table, u32 *t); | ||
835 | #endif /*__GR_GK20A_H__*/ | 838 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index bbe2b100..d6f64bb1 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -324,6 +324,7 @@ static const struct gpu_ops gm20b_ops = { | |||
324 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | 324 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, |
325 | .get_pmm_per_chiplet_offset = | 325 | .get_pmm_per_chiplet_offset = |
326 | gr_gm20b_get_pmm_per_chiplet_offset, | 326 | gr_gm20b_get_pmm_per_chiplet_offset, |
327 | .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, | ||
327 | }, | 328 | }, |
328 | .fb = { | 329 | .fb = { |
329 | .reset = fb_gk20a_reset, | 330 | .reset = fb_gk20a_reset, |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 91bc614c..eecb0f09 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -387,6 +387,7 @@ static const struct gpu_ops gp106_ops = { | |||
387 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | 387 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, |
388 | .get_pmm_per_chiplet_offset = | 388 | .get_pmm_per_chiplet_offset = |
389 | gr_gm20b_get_pmm_per_chiplet_offset, | 389 | gr_gm20b_get_pmm_per_chiplet_offset, |
390 | .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, | ||
390 | }, | 391 | }, |
391 | .fb = { | 392 | .fb = { |
392 | .reset = gp106_fb_reset, | 393 | .reset = gp106_fb_reset, |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 5c10d79d..d70d6ac0 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -355,6 +355,7 @@ static const struct gpu_ops gp10b_ops = { | |||
355 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | 355 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, |
356 | .get_pmm_per_chiplet_offset = | 356 | .get_pmm_per_chiplet_offset = |
357 | gr_gm20b_get_pmm_per_chiplet_offset, | 357 | gr_gm20b_get_pmm_per_chiplet_offset, |
358 | .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, | ||
358 | }, | 359 | }, |
359 | .fb = { | 360 | .fb = { |
360 | .reset = fb_gk20a_reset, | 361 | .reset = fb_gk20a_reset, |
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index c6273733..98e61eb0 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include "gk20a/gk20a.h" | 29 | #include "gk20a/gk20a.h" |
30 | #include "gk20a/gr_gk20a.h" | 30 | #include "gk20a/gr_gk20a.h" |
31 | #include "gk20a/gr_pri_gk20a.h" | ||
31 | 32 | ||
32 | #include "gv100/gr_gv100.h" | 33 | #include "gv100/gr_gv100.h" |
33 | #include "gv11b/subctx_gv11b.h" | 34 | #include "gv11b/subctx_gv11b.h" |
@@ -332,6 +333,23 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g) | |||
332 | return size; | 333 | return size; |
333 | } | 334 | } |
334 | 335 | ||
336 | static u32 gr_gv100_get_active_fpba_mask(struct gk20a *g, u32 num_fbpas) | ||
337 | { | ||
338 | u32 active_fbpa_mask; | ||
339 | |||
340 | /* | ||
341 | * Read active fbpa mask from fuse | ||
342 | * Note that 0:enable and 1:disable in value read from fuse so we've to | ||
343 | * flip the bits. | ||
344 | * Also set unused bits to zero | ||
345 | */ | ||
346 | active_fbpa_mask = nvgpu_readl(g, fuse_status_opt_fbio_r()); | ||
347 | active_fbpa_mask = ~active_fbpa_mask; | ||
348 | active_fbpa_mask = active_fbpa_mask & ((1 << num_fbpas) - 1); | ||
349 | |||
350 | return active_fbpa_mask; | ||
351 | } | ||
352 | |||
335 | int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g, | 353 | int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g, |
336 | struct ctxsw_buf_offset_map_entry *map, | 354 | struct ctxsw_buf_offset_map_entry *map, |
337 | struct aiv_list_gk20a *regs, | 355 | struct aiv_list_gk20a *regs, |
@@ -348,15 +366,7 @@ int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g, | |||
348 | if ((cnt + (regs->count * num_fbpas)) > max_cnt) | 366 | if ((cnt + (regs->count * num_fbpas)) > max_cnt) |
349 | return -EINVAL; | 367 | return -EINVAL; |
350 | 368 | ||
351 | /* | 369 | active_fbpa_mask = gr_gv100_get_active_fpba_mask(g, num_fbpas); |
352 | * Read active fbpa mask from fuse | ||
353 | * Note that 0:enable and 1:disable in value read from fuse so we've to | ||
354 | * flip the bits. | ||
355 | * Also set unused bits to zero | ||
356 | */ | ||
357 | active_fbpa_mask = nvgpu_readl(g, fuse_status_opt_fbio_r()); | ||
358 | active_fbpa_mask = ~active_fbpa_mask; | ||
359 | active_fbpa_mask = active_fbpa_mask & ((1 << num_fbpas) - 1); | ||
360 | 370 | ||
361 | for (idx = 0; idx < regs->count; idx++) { | 371 | for (idx = 0; idx < regs->count; idx++) { |
362 | for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) { | 372 | for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) { |
@@ -383,3 +393,20 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, | |||
383 | return gr_gk20a_add_ctxsw_reg_perf_pma(map, regs, | 393 | return gr_gk20a_add_ctxsw_reg_perf_pma(map, regs, |
384 | count, offset, max_cnt, base, mask); | 394 | count, offset, max_cnt, base, mask); |
385 | } | 395 | } |
396 | |||
397 | void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | ||
398 | u32 num_fbpas, | ||
399 | u32 *priv_addr_table, u32 *t) | ||
400 | { | ||
401 | u32 active_fbpa_mask; | ||
402 | u32 fbpa_id; | ||
403 | |||
404 | active_fbpa_mask = gr_gv100_get_active_fpba_mask(g, num_fbpas); | ||
405 | |||
406 | for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) { | ||
407 | if (active_fbpa_mask & BIT(fbpa_id)) { | ||
408 | priv_addr_table[(*t)++] = pri_fbpa_addr(g, | ||
409 | pri_fbpa_addr_mask(g, addr), fbpa_id); | ||
410 | } | ||
411 | } | ||
412 | } | ||
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 7b107db2..ccc73e28 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h | |||
@@ -43,4 +43,7 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, | |||
43 | struct aiv_list_gk20a *regs, | 43 | struct aiv_list_gk20a *regs, |
44 | u32 *count, u32 *offset, | 44 | u32 *count, u32 *offset, |
45 | u32 max_cnt, u32 base, u32 mask); | 45 | u32 max_cnt, u32 base, u32 mask); |
46 | void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | ||
47 | u32 num_fbpas, | ||
48 | u32 *priv_addr_table, u32 *t); | ||
46 | #endif | 49 | #endif |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index fef2fb94..fc303e70 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -434,6 +434,7 @@ static const struct gpu_ops gv100_ops = { | |||
434 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, | 434 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, |
435 | .get_pmm_per_chiplet_offset = | 435 | .get_pmm_per_chiplet_offset = |
436 | gr_gv11b_get_pmm_per_chiplet_offset, | 436 | gr_gv11b_get_pmm_per_chiplet_offset, |
437 | .split_fbpa_broadcast_addr = gr_gv100_split_fbpa_broadcast_addr, | ||
437 | }, | 438 | }, |
438 | .fb = { | 439 | .fb = { |
439 | .reset = gv100_fb_reset, | 440 | .reset = gv100_fb_reset, |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index dfb14db7..24366911 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -4557,7 +4557,6 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, | |||
4557 | u32 broadcast_flags; | 4557 | u32 broadcast_flags; |
4558 | u32 t; | 4558 | u32 t; |
4559 | int err; | 4559 | int err; |
4560 | int fbpa_num; | ||
4561 | 4560 | ||
4562 | t = 0; | 4561 | t = 0; |
4563 | *num_registers = 0; | 4562 | *num_registers = 0; |
@@ -4671,11 +4670,9 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, | |||
4671 | g->ops.gr.split_ltc_broadcast_addr(g, addr, | 4670 | g->ops.gr.split_ltc_broadcast_addr(g, addr, |
4672 | priv_addr_table, &t); | 4671 | priv_addr_table, &t); |
4673 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { | 4672 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { |
4674 | for (fbpa_num = 0; | 4673 | g->ops.gr.split_fbpa_broadcast_addr(g, addr, |
4675 | fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); | 4674 | nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS), |
4676 | fbpa_num++) | 4675 | priv_addr_table, &t); |
4677 | priv_addr_table[t++] = pri_fbpa_addr(g, | ||
4678 | pri_fbpa_addr_mask(g, addr), fbpa_num); | ||
4679 | } else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) && | 4676 | } else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) && |
4680 | (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC)) { | 4677 | (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC)) { |
4681 | gr_gv11b_split_pmm_fbp_broadcast_address(g, | 4678 | gr_gv11b_split_pmm_fbp_broadcast_address(g, |
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index d0a564db..49e83c4c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -407,6 +407,7 @@ static const struct gpu_ops gv11b_ops = { | |||
407 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, | 407 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, |
408 | .get_pmm_per_chiplet_offset = | 408 | .get_pmm_per_chiplet_offset = |
409 | gr_gv11b_get_pmm_per_chiplet_offset, | 409 | gr_gv11b_get_pmm_per_chiplet_offset, |
410 | .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, | ||
410 | }, | 411 | }, |
411 | .fb = { | 412 | .fb = { |
412 | .reset = gv11b_fb_reset, | 413 | .reset = gv11b_fb_reset, |
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 53ba1e14..5a11af7e 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | |||
@@ -229,6 +229,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { | |||
229 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | 229 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, |
230 | .get_pmm_per_chiplet_offset = | 230 | .get_pmm_per_chiplet_offset = |
231 | gr_gm20b_get_pmm_per_chiplet_offset, | 231 | gr_gm20b_get_pmm_per_chiplet_offset, |
232 | .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, | ||
232 | }, | 233 | }, |
233 | .fb = { | 234 | .fb = { |
234 | .reset = fb_gk20a_reset, | 235 | .reset = fb_gk20a_reset, |
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 0b9049dd..975bce32 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | |||
@@ -265,6 +265,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { | |||
265 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, | 265 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, |
266 | .get_pmm_per_chiplet_offset = | 266 | .get_pmm_per_chiplet_offset = |
267 | gr_gv11b_get_pmm_per_chiplet_offset, | 267 | gr_gv11b_get_pmm_per_chiplet_offset, |
268 | .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, | ||
268 | }, | 269 | }, |
269 | .fb = { | 270 | .fb = { |
270 | .reset = gv11b_fb_reset, | 271 | .reset = gv11b_fb_reset, |