diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 106 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_proj_gv11b.h | 16 |
4 files changed, 127 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index b96f2bc6..f209012c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -3409,6 +3409,45 @@ bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) | |||
3409 | pri_is_egpc_addr_shared(g, addr); | 3409 | pri_is_egpc_addr_shared(g, addr); |
3410 | } | 3410 | } |
3411 | 3411 | ||
3412 | static inline u32 pri_smpc_in_etpc_addr_mask(struct gk20a *g, u32 addr) | ||
3413 | { | ||
3414 | u32 smpc_stride = nvgpu_get_litter_value(g, | ||
3415 | GPU_LIT_SMPC_PRI_STRIDE); | ||
3416 | |||
3417 | return (addr & (smpc_stride - 1)); | ||
3418 | } | ||
3419 | |||
3420 | static u32 pri_smpc_ext_addr(struct gk20a *g, u32 sm_offset, u32 gpc_num, | ||
3421 | u32 tpc_num, u32 sm_num) | ||
3422 | { | ||
3423 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3424 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, | ||
3425 | GPU_LIT_TPC_IN_GPC_BASE); | ||
3426 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
3427 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
3428 | u32 egpc_base = g->ops.gr.get_egpc_base(g); | ||
3429 | u32 smpc_unique_base = nvgpu_get_litter_value(g, | ||
3430 | GPU_LIT_SMPC_PRI_UNIQUE_BASE); | ||
3431 | u32 smpc_stride = nvgpu_get_litter_value(g, | ||
3432 | GPU_LIT_SMPC_PRI_STRIDE); | ||
3433 | |||
3434 | return (egpc_base + (gpc_num * gpc_stride) + tpc_in_gpc_base + | ||
3435 | (tpc_num * tpc_in_gpc_stride) + | ||
3436 | (sm_num * smpc_stride) + | ||
3437 | (smpc_unique_base + sm_offset)); | ||
3438 | } | ||
3439 | |||
3440 | static bool pri_is_smpc_addr_in_etpc_shared(struct gk20a *g, u32 addr) | ||
3441 | { | ||
3442 | u32 smpc_shared_base = nvgpu_get_litter_value(g, | ||
3443 | GPU_LIT_SMPC_PRI_SHARED_BASE); | ||
3444 | u32 smpc_stride = nvgpu_get_litter_value(g, | ||
3445 | GPU_LIT_SMPC_PRI_STRIDE); | ||
3446 | |||
3447 | return (addr >= smpc_shared_base) && | ||
3448 | (addr < smpc_shared_base + smpc_stride); | ||
3449 | } | ||
3450 | |||
3412 | bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) | 3451 | bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) |
3413 | { | 3452 | { |
3414 | u32 egpc_addr = 0; | 3453 | u32 egpc_addr = 0; |
@@ -3476,6 +3515,7 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, | |||
3476 | u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) | 3515 | u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) |
3477 | { | 3516 | { |
3478 | u32 gpc_addr; | 3517 | u32 gpc_addr; |
3518 | u32 tpc_addr; | ||
3479 | 3519 | ||
3480 | if (g->ops.gr.is_egpc_addr(g, addr)) { | 3520 | if (g->ops.gr.is_egpc_addr(g, addr)) { |
3481 | nvgpu_log_info(g, "addr=0x%x is egpc", addr); | 3521 | nvgpu_log_info(g, "addr=0x%x is egpc", addr); |
@@ -3501,6 +3541,9 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, | |||
3501 | *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); | 3541 | *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
3502 | nvgpu_log_info(g, "tpc=0x%x", *tpc_num); | 3542 | nvgpu_log_info(g, "tpc=0x%x", *tpc_num); |
3503 | } | 3543 | } |
3544 | tpc_addr = pri_tpccs_addr_mask(addr); | ||
3545 | if (pri_is_smpc_addr_in_etpc_shared(g, tpc_addr)) | ||
3546 | *broadcast_flags |= PRI_BROADCAST_FLAGS_SMPC; | ||
3504 | } | 3547 | } |
3505 | 3548 | ||
3506 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | 3549 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, |
@@ -3511,6 +3554,25 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, | |||
3511 | return -EINVAL; | 3554 | return -EINVAL; |
3512 | } | 3555 | } |
3513 | 3556 | ||
3557 | static void gv11b_gr_update_priv_addr_table_smpc(struct gk20a *g, u32 gpc_num, | ||
3558 | u32 tpc_num, u32 addr, | ||
3559 | u32 *priv_addr_table, u32 *t) | ||
3560 | { | ||
3561 | u32 sm_per_tpc, sm_num; | ||
3562 | |||
3563 | nvgpu_log_info(g, "broadcast flags smpc"); | ||
3564 | |||
3565 | sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
3566 | for (sm_num = 0; sm_num < sm_per_tpc; sm_num++) { | ||
3567 | priv_addr_table[*t] = pri_smpc_ext_addr(g, | ||
3568 | pri_smpc_in_etpc_addr_mask(g, addr), | ||
3569 | gpc_num, tpc_num, sm_num); | ||
3570 | nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", | ||
3571 | *t, priv_addr_table[*t]); | ||
3572 | (*t)++; | ||
3573 | } | ||
3574 | } | ||
3575 | |||
3514 | void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, | 3576 | void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, |
3515 | u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) | 3577 | u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) |
3516 | { | 3578 | { |
@@ -3531,15 +3593,27 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, | |||
3531 | for (tpc_num = 0; | 3593 | for (tpc_num = 0; |
3532 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 3594 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
3533 | tpc_num++) { | 3595 | tpc_num++) { |
3534 | priv_addr_table[*t] = | 3596 | if (broadcast_flags & |
3535 | pri_etpc_addr(g, | 3597 | PRI_BROADCAST_FLAGS_SMPC) { |
3536 | pri_tpccs_addr_mask(addr), | 3598 | gv11b_gr_update_priv_addr_table_smpc( |
3537 | gpc_num, tpc_num); | 3599 | g, gpc_num, tpc_num, addr, |
3538 | nvgpu_log_info(g, | 3600 | priv_addr_table, t); |
3539 | "priv_addr_table[%d]:%#08x", | 3601 | } else { |
3540 | *t, priv_addr_table[*t]); | 3602 | priv_addr_table[*t] = |
3541 | (*t)++; | 3603 | pri_etpc_addr(g, |
3604 | pri_tpccs_addr_mask(addr), | ||
3605 | gpc_num, tpc_num); | ||
3606 | nvgpu_log_info(g, | ||
3607 | "priv_addr_table[%d]:%#08x", | ||
3608 | *t, priv_addr_table[*t]); | ||
3609 | (*t)++; | ||
3610 | } | ||
3542 | } | 3611 | } |
3612 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) { | ||
3613 | tpc_num = 0; | ||
3614 | gv11b_gr_update_priv_addr_table_smpc( | ||
3615 | g, gpc_num, tpc_num, addr, | ||
3616 | priv_addr_table, t); | ||
3543 | } else { | 3617 | } else { |
3544 | priv_addr_table[*t] = | 3618 | priv_addr_table[*t] = |
3545 | pri_egpc_addr(g, | 3619 | pri_egpc_addr(g, |
@@ -3553,10 +3627,17 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, | |||
3553 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) { | 3627 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) { |
3554 | if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { | 3628 | if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { |
3555 | nvgpu_log_info(g, "broadcast flags etpc but not egpc"); | 3629 | nvgpu_log_info(g, "broadcast flags etpc but not egpc"); |
3630 | gpc_num = 0; | ||
3556 | for (tpc_num = 0; | 3631 | for (tpc_num = 0; |
3557 | tpc_num < g->gr.gpc_tpc_count[gpc]; | 3632 | tpc_num < g->gr.gpc_tpc_count[gpc]; |
3558 | tpc_num++) { | 3633 | tpc_num++) { |
3559 | priv_addr_table[*t] = | 3634 | if (broadcast_flags & |
3635 | PRI_BROADCAST_FLAGS_SMPC) | ||
3636 | gv11b_gr_update_priv_addr_table_smpc( | ||
3637 | g, gpc_num, tpc_num, addr, | ||
3638 | priv_addr_table, t); | ||
3639 | else { | ||
3640 | priv_addr_table[*t] = | ||
3560 | pri_etpc_addr(g, | 3641 | pri_etpc_addr(g, |
3561 | pri_tpccs_addr_mask(addr), | 3642 | pri_tpccs_addr_mask(addr), |
3562 | gpc, tpc_num); | 3643 | gpc, tpc_num); |
@@ -3564,7 +3645,14 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, | |||
3564 | "priv_addr_table[%d]:%#08x", | 3645 | "priv_addr_table[%d]:%#08x", |
3565 | *t, priv_addr_table[*t]); | 3646 | *t, priv_addr_table[*t]); |
3566 | (*t)++; | 3647 | (*t)++; |
3648 | } | ||
3567 | } | 3649 | } |
3650 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) { | ||
3651 | tpc_num = 0; | ||
3652 | gpc_num = 0; | ||
3653 | gv11b_gr_update_priv_addr_table_smpc( | ||
3654 | g, gpc_num, tpc_num, addr, | ||
3655 | priv_addr_table, t); | ||
3568 | } else { | 3656 | } else { |
3569 | priv_addr_table[*t] = addr; | 3657 | priv_addr_table[*t] = addr; |
3570 | nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", | 3658 | nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 8880fbdd..3f06fe77 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h | |||
@@ -28,6 +28,8 @@ | |||
28 | #define EGPC_PRI_BASE 0x580000 | 28 | #define EGPC_PRI_BASE 0x580000 |
29 | #define EGPC_PRI_SHARED_BASE 0x480000 | 29 | #define EGPC_PRI_SHARED_BASE 0x480000 |
30 | 30 | ||
31 | #define PRI_BROADCAST_FLAGS_SMPC BIT(17) | ||
32 | |||
31 | #define GV11B_ZBC_TYPE_STENCIL T19X_ZBC | 33 | #define GV11B_ZBC_TYPE_STENCIL T19X_ZBC |
32 | #define ZBC_STENCIL_CLEAR_FMT_INVAILD 0 | 34 | #define ZBC_STENCIL_CLEAR_FMT_INVAILD 0 |
33 | #define ZBC_STENCIL_CLEAR_FMT_U8 1 | 35 | #define ZBC_STENCIL_CLEAR_FMT_U8 1 |
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f8bdc213..9baa3581 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -166,6 +166,18 @@ static int gv11b_get_litter_value(struct gk20a *g, int value) | |||
166 | case GPU_LIT_SM_PRI_STRIDE: | 166 | case GPU_LIT_SM_PRI_STRIDE: |
167 | ret = proj_sm_stride_v(); | 167 | ret = proj_sm_stride_v(); |
168 | break; | 168 | break; |
169 | case GPU_LIT_SMPC_PRI_BASE: | ||
170 | ret = proj_smpc_base_v(); | ||
171 | break; | ||
172 | case GPU_LIT_SMPC_PRI_SHARED_BASE: | ||
173 | ret = proj_smpc_shared_base_v(); | ||
174 | break; | ||
175 | case GPU_LIT_SMPC_PRI_UNIQUE_BASE: | ||
176 | ret = proj_smpc_unique_base_v(); | ||
177 | break; | ||
178 | case GPU_LIT_SMPC_PRI_STRIDE: | ||
179 | ret = proj_smpc_stride_v(); | ||
180 | break; | ||
169 | /* Even though GV11B doesn't have an FBPA unit, the HW reports one, | 181 | /* Even though GV11B doesn't have an FBPA unit, the HW reports one, |
170 | * and the microcode as a result leaves space in the context buffer | 182 | * and the microcode as a result leaves space in the context buffer |
171 | * for one, so make sure SW accounts for this also. | 183 | * for one, so make sure SW accounts for this also. |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_proj_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_proj_gv11b.h index 7e7ad14a..8406ea21 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_proj_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_proj_gv11b.h | |||
@@ -116,6 +116,22 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void) | |||
116 | { | 116 | { |
117 | return 0x00001800; | 117 | return 0x00001800; |
118 | } | 118 | } |
119 | static inline u32 proj_smpc_base_v(void) | ||
120 | { | ||
121 | return 0x00000200; | ||
122 | } | ||
123 | static inline u32 proj_smpc_shared_base_v(void) | ||
124 | { | ||
125 | return 0x00000300; | ||
126 | } | ||
127 | static inline u32 proj_smpc_unique_base_v(void) | ||
128 | { | ||
129 | return 0x00000600; | ||
130 | } | ||
131 | static inline u32 proj_smpc_stride_v(void) | ||
132 | { | ||
133 | return 0x00000100; | ||
134 | } | ||
119 | static inline u32 proj_host_num_engines_v(void) | 135 | static inline u32 proj_host_num_engines_v(void) |
120 | { | 136 | { |
121 | return 0x00000004; | 137 | return 0x00000004; |