summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2017-08-15 18:28:35 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-26 20:29:23 -0400
commit9825a8ec69d54c725c38015006aed655d10ac567 (patch)
tree75a085816bca625618869cdbc50522a29b57ea04 /drivers/gpu/nvgpu/gv11b
parent971987f363751076a9bddddf7deb5a5a17e8739a (diff)
gpu: nvgpu: fix handling of EGPC_ETPC_SM addresses
Implemented litter values for following defines: GPU_LIT_SMPC_PRI_BASE GPU_LIT_SMPC_PRI_SHARED_BASE GPU_LIT_SMPC_PRI_UNIQUE_BASE9 GPU_LIT_SMPC_PRI_STRIDE Added broadcast flags for smpc Handled all combinations of broadcast/unicast EGPC, ETPC, SM Bug 200337994 Change-Id: I7aa3c4d9ac4e819010061d44fb5a40056762f518 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1539075 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c106
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h2
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c12
3 files changed, 111 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index b96f2bc6..f209012c 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -3409,6 +3409,45 @@ bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr)
3409 pri_is_egpc_addr_shared(g, addr); 3409 pri_is_egpc_addr_shared(g, addr);
3410} 3410}
3411 3411
3412static inline u32 pri_smpc_in_etpc_addr_mask(struct gk20a *g, u32 addr)
3413{
3414 u32 smpc_stride = nvgpu_get_litter_value(g,
3415 GPU_LIT_SMPC_PRI_STRIDE);
3416
3417 return (addr & (smpc_stride - 1));
3418}
3419
3420static u32 pri_smpc_ext_addr(struct gk20a *g, u32 sm_offset, u32 gpc_num,
3421 u32 tpc_num, u32 sm_num)
3422{
3423 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3424 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g,
3425 GPU_LIT_TPC_IN_GPC_BASE);
3426 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
3427 GPU_LIT_TPC_IN_GPC_STRIDE);
3428 u32 egpc_base = g->ops.gr.get_egpc_base(g);
3429 u32 smpc_unique_base = nvgpu_get_litter_value(g,
3430 GPU_LIT_SMPC_PRI_UNIQUE_BASE);
3431 u32 smpc_stride = nvgpu_get_litter_value(g,
3432 GPU_LIT_SMPC_PRI_STRIDE);
3433
3434 return (egpc_base + (gpc_num * gpc_stride) + tpc_in_gpc_base +
3435 (tpc_num * tpc_in_gpc_stride) +
3436 (sm_num * smpc_stride) +
3437 (smpc_unique_base + sm_offset));
3438}
3439
3440static bool pri_is_smpc_addr_in_etpc_shared(struct gk20a *g, u32 addr)
3441{
3442 u32 smpc_shared_base = nvgpu_get_litter_value(g,
3443 GPU_LIT_SMPC_PRI_SHARED_BASE);
3444 u32 smpc_stride = nvgpu_get_litter_value(g,
3445 GPU_LIT_SMPC_PRI_STRIDE);
3446
3447 return (addr >= smpc_shared_base) &&
3448 (addr < smpc_shared_base + smpc_stride);
3449}
3450
3412bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) 3451bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr)
3413{ 3452{
3414 u32 egpc_addr = 0; 3453 u32 egpc_addr = 0;
@@ -3476,6 +3515,7 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type,
3476 u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) 3515 u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags)
3477{ 3516{
3478 u32 gpc_addr; 3517 u32 gpc_addr;
3518 u32 tpc_addr;
3479 3519
3480 if (g->ops.gr.is_egpc_addr(g, addr)) { 3520 if (g->ops.gr.is_egpc_addr(g, addr)) {
3481 nvgpu_log_info(g, "addr=0x%x is egpc", addr); 3521 nvgpu_log_info(g, "addr=0x%x is egpc", addr);
@@ -3501,6 +3541,9 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type,
3501 *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); 3541 *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
3502 nvgpu_log_info(g, "tpc=0x%x", *tpc_num); 3542 nvgpu_log_info(g, "tpc=0x%x", *tpc_num);
3503 } 3543 }
3544 tpc_addr = pri_tpccs_addr_mask(addr);
3545 if (pri_is_smpc_addr_in_etpc_shared(g, tpc_addr))
3546 *broadcast_flags |= PRI_BROADCAST_FLAGS_SMPC;
3504 } 3547 }
3505 3548
3506 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, 3549 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
@@ -3511,6 +3554,25 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type,
3511 return -EINVAL; 3554 return -EINVAL;
3512} 3555}
3513 3556
3557static void gv11b_gr_update_priv_addr_table_smpc(struct gk20a *g, u32 gpc_num,
3558 u32 tpc_num, u32 addr,
3559 u32 *priv_addr_table, u32 *t)
3560{
3561 u32 sm_per_tpc, sm_num;
3562
3563 nvgpu_log_info(g, "broadcast flags smpc");
3564
3565 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
3566 for (sm_num = 0; sm_num < sm_per_tpc; sm_num++) {
3567 priv_addr_table[*t] = pri_smpc_ext_addr(g,
3568 pri_smpc_in_etpc_addr_mask(g, addr),
3569 gpc_num, tpc_num, sm_num);
3570 nvgpu_log_info(g, "priv_addr_table[%d]:%#08x",
3571 *t, priv_addr_table[*t]);
3572 (*t)++;
3573 }
3574}
3575
3514void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, 3576void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
3515 u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) 3577 u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t)
3516{ 3578{
@@ -3531,15 +3593,27 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
3531 for (tpc_num = 0; 3593 for (tpc_num = 0;
3532 tpc_num < g->gr.gpc_tpc_count[gpc_num]; 3594 tpc_num < g->gr.gpc_tpc_count[gpc_num];
3533 tpc_num++) { 3595 tpc_num++) {
3534 priv_addr_table[*t] = 3596 if (broadcast_flags &
3535 pri_etpc_addr(g, 3597 PRI_BROADCAST_FLAGS_SMPC) {
3536 pri_tpccs_addr_mask(addr), 3598 gv11b_gr_update_priv_addr_table_smpc(
3537 gpc_num, tpc_num); 3599 g, gpc_num, tpc_num, addr,
3538 nvgpu_log_info(g, 3600 priv_addr_table, t);
3539 "priv_addr_table[%d]:%#08x", 3601 } else {
3540 *t, priv_addr_table[*t]); 3602 priv_addr_table[*t] =
3541 (*t)++; 3603 pri_etpc_addr(g,
3604 pri_tpccs_addr_mask(addr),
3605 gpc_num, tpc_num);
3606 nvgpu_log_info(g,
3607 "priv_addr_table[%d]:%#08x",
3608 *t, priv_addr_table[*t]);
3609 (*t)++;
3610 }
3542 } 3611 }
3612 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) {
3613 tpc_num = 0;
3614 gv11b_gr_update_priv_addr_table_smpc(
3615 g, gpc_num, tpc_num, addr,
3616 priv_addr_table, t);
3543 } else { 3617 } else {
3544 priv_addr_table[*t] = 3618 priv_addr_table[*t] =
3545 pri_egpc_addr(g, 3619 pri_egpc_addr(g,
@@ -3553,10 +3627,17 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
3553 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) { 3627 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) {
3554 if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { 3628 if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) {
3555 nvgpu_log_info(g, "broadcast flags etpc but not egpc"); 3629 nvgpu_log_info(g, "broadcast flags etpc but not egpc");
3630 gpc_num = 0;
3556 for (tpc_num = 0; 3631 for (tpc_num = 0;
3557 tpc_num < g->gr.gpc_tpc_count[gpc]; 3632 tpc_num < g->gr.gpc_tpc_count[gpc];
3558 tpc_num++) { 3633 tpc_num++) {
3559 priv_addr_table[*t] = 3634 if (broadcast_flags &
3635 PRI_BROADCAST_FLAGS_SMPC)
3636 gv11b_gr_update_priv_addr_table_smpc(
3637 g, gpc_num, tpc_num, addr,
3638 priv_addr_table, t);
3639 else {
3640 priv_addr_table[*t] =
3560 pri_etpc_addr(g, 3641 pri_etpc_addr(g,
3561 pri_tpccs_addr_mask(addr), 3642 pri_tpccs_addr_mask(addr),
3562 gpc, tpc_num); 3643 gpc, tpc_num);
@@ -3564,7 +3645,14 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
3564 "priv_addr_table[%d]:%#08x", 3645 "priv_addr_table[%d]:%#08x",
3565 *t, priv_addr_table[*t]); 3646 *t, priv_addr_table[*t]);
3566 (*t)++; 3647 (*t)++;
3648 }
3567 } 3649 }
3650 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) {
3651 tpc_num = 0;
3652 gpc_num = 0;
3653 gv11b_gr_update_priv_addr_table_smpc(
3654 g, gpc_num, tpc_num, addr,
3655 priv_addr_table, t);
3568 } else { 3656 } else {
3569 priv_addr_table[*t] = addr; 3657 priv_addr_table[*t] = addr;
3570 nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", 3658 nvgpu_log_info(g, "priv_addr_table[%d]:%#08x",
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index 8880fbdd..3f06fe77 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -28,6 +28,8 @@
28#define EGPC_PRI_BASE 0x580000 28#define EGPC_PRI_BASE 0x580000
29#define EGPC_PRI_SHARED_BASE 0x480000 29#define EGPC_PRI_SHARED_BASE 0x480000
30 30
31#define PRI_BROADCAST_FLAGS_SMPC BIT(17)
32
31#define GV11B_ZBC_TYPE_STENCIL T19X_ZBC 33#define GV11B_ZBC_TYPE_STENCIL T19X_ZBC
32#define ZBC_STENCIL_CLEAR_FMT_INVAILD 0 34#define ZBC_STENCIL_CLEAR_FMT_INVAILD 0
33#define ZBC_STENCIL_CLEAR_FMT_U8 1 35#define ZBC_STENCIL_CLEAR_FMT_U8 1
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f8bdc213..9baa3581 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -166,6 +166,18 @@ static int gv11b_get_litter_value(struct gk20a *g, int value)
166 case GPU_LIT_SM_PRI_STRIDE: 166 case GPU_LIT_SM_PRI_STRIDE:
167 ret = proj_sm_stride_v(); 167 ret = proj_sm_stride_v();
168 break; 168 break;
169 case GPU_LIT_SMPC_PRI_BASE:
170 ret = proj_smpc_base_v();
171 break;
172 case GPU_LIT_SMPC_PRI_SHARED_BASE:
173 ret = proj_smpc_shared_base_v();
174 break;
175 case GPU_LIT_SMPC_PRI_UNIQUE_BASE:
176 ret = proj_smpc_unique_base_v();
177 break;
178 case GPU_LIT_SMPC_PRI_STRIDE:
179 ret = proj_smpc_stride_v();
180 break;
169 /* Even though GV11B doesn't have an FBPA unit, the HW reports one, 181 /* Even though GV11B doesn't have an FBPA unit, the HW reports one,
170 * and the microcode as a result leaves space in the context buffer 182 * and the microcode as a result leaves space in the context buffer
171 * for one, so make sure SW accounts for this also. 183 * for one, so make sure SW accounts for this also.