summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-04-06 09:04:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-04-10 14:23:07 -0400
commit19aa748be53787da6abe435ea7043a7827d0fde0 (patch)
treed4588653f031bb0ca4410e287ce0ef291e455422 /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent4314771142e0b68810b8fa86ec45b6f6b4e24651 (diff)
gpu: nvgpu: add support to get unicast addresses on volta
We have new broadcast registers on Volta, and we need to generate correct unicast addresses for them so that we can write those registers to context image Add new GR HAL create_priv_addr_table() to do this conversion Set gr_gk20a_create_priv_addr_table() for older chips Set gr_gv11b_create_priv_addr_table() for Volta gr_gv11b_create_priv_addr_table() will use the broadcast flags and then generate appriate list of unicast register for each broadcast register Bug 200398811 Jira NVGPU-556 Change-Id: Id53a9e56106d200fe560ffc93394cc0e976f455f Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1690027 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c208
1 files changed, 208 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 61649d06..67603739 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -57,6 +57,7 @@
57#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> 57#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
58#include <nvgpu/hw/gv11b/hw_therm_gv11b.h> 58#include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
59#include <nvgpu/hw/gv11b/hw_fb_gv11b.h> 59#include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
60#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
60 61
61#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 62#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
62 63
@@ -4511,3 +4512,210 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
4511 *addr_type = CTXSW_ADDR_TYPE_SYS; 4512 *addr_type = CTXSW_ADDR_TYPE_SYS;
4512 return 0; 4513 return 0;
4513} 4514}
4515
4516static u32 gr_gv11b_pri_pmmgpc_addr(u32 gpc_num, u32 domain_idx, u32 offset)
4517{
4518 return perf_pmmgpc_base_v() +
4519 (gpc_num * (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)) +
4520 (domain_idx * perf_pmmgpc_perdomain_offset_v()) +
4521 offset;
4522}
4523
4524static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g,
4525 u32 offset, u32 *priv_addr_table, u32 *t,
4526 u32 domain_start, u32 num_domains)
4527{
4528 u32 domain_idx = 0;
4529 u32 fbp_num = 0;
4530 u32 base = 0;
4531
4532 for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) {
4533 base = perf_pmmfbp_base_v() +
4534 (fbp_num *
4535 (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1));
4536
4537 for (domain_idx = domain_start;
4538 domain_idx < (domain_start + num_domains);
4539 domain_idx++) {
4540 priv_addr_table[(*t)++] = base +
4541 (domain_idx * perf_pmmgpc_perdomain_offset_v())
4542 + offset;
4543 }
4544 }
4545}
4546
4547
4548int gr_gv11b_create_priv_addr_table(struct gk20a *g,
4549 u32 addr,
4550 u32 *priv_addr_table,
4551 u32 *num_registers)
4552{
4553 int addr_type; /*enum ctxsw_addr_type */
4554 u32 gpc_num, tpc_num, ppc_num, be_num;
4555 u32 priv_addr, gpc_addr;
4556 u32 broadcast_flags;
4557 u32 t;
4558 int err;
4559 int fbpa_num;
4560
4561 t = 0;
4562 *num_registers = 0;
4563
4564 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
4565
4566 err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
4567 &gpc_num, &tpc_num, &ppc_num, &be_num,
4568 &broadcast_flags);
4569 gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
4570 if (err)
4571 return err;
4572
4573 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
4574 (addr_type == CTXSW_ADDR_TYPE_BE)) {
4575 /*
4576 * The BE broadcast registers are included in the compressed PRI
4577 * table. Convert a BE unicast address to a broadcast address
4578 * so that we can look up the offset
4579 */
4580 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
4581 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE))
4582 priv_addr_table[t++] = pri_be_shared_addr(g, addr);
4583 else
4584 priv_addr_table[t++] = addr;
4585
4586 *num_registers = t;
4587 return 0;
4588 }
4589
4590 /*
4591 * The GPC/TPC unicast registers are included in the compressed PRI
4592 * tables. Convert a GPC/TPC broadcast address to unicast addresses so
4593 * that we can look up the offsets
4594 */
4595 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
4596 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
4597
4598 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
4599 for (tpc_num = 0;
4600 tpc_num < g->gr.gpc_tpc_count[gpc_num];
4601 tpc_num++)
4602 priv_addr_table[t++] =
4603 pri_tpc_addr(g,
4604 pri_tpccs_addr_mask(addr),
4605 gpc_num, tpc_num);
4606
4607 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
4608 err = gr_gk20a_split_ppc_broadcast_addr(g,
4609 addr, gpc_num, priv_addr_table, &t);
4610 if (err)
4611 return err;
4612 } else {
4613 priv_addr = pri_gpc_addr(g,
4614 pri_gpccs_addr_mask(addr),
4615 gpc_num);
4616
4617 gpc_addr = pri_gpccs_addr_mask(priv_addr);
4618 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
4619 if (tpc_num >= g->gr.gpc_tpc_count[gpc_num])
4620 continue;
4621
4622 priv_addr_table[t++] = priv_addr;
4623 }
4624 }
4625 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMMGPC) {
4626 u32 pmm_domain_start = 0;
4627 u32 domain_idx = 0;
4628 u32 num_domains = 0;
4629 u32 offset = 0;
4630
4631 if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCA) {
4632 pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START;
4633 num_domains = NV_PERF_PMMGPC_NUM_DOMAINS;
4634 offset = PRI_PMMGS_OFFSET_MASK(addr);
4635 } else if (broadcast_flags &
4636 PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCB) {
4637 pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START +
4638 NV_PERF_PMMGPC_NUM_DOMAINS;
4639 num_domains = NV_PERF_PMMGPC_NUM_DOMAINS;
4640 offset = PRI_PMMGS_OFFSET_MASK(addr);
4641 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCS) {
4642 pmm_domain_start = (addr -
4643 (NV_PERF_PMMGPC_GPCS + PRI_PMMS_ADDR_MASK(addr)))/
4644 perf_pmmgpc_perdomain_offset_v();
4645 num_domains = 1;
4646 offset = PRI_PMMS_ADDR_MASK(addr);
4647 } else {
4648 return -EINVAL;
4649 }
4650
4651 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
4652 for (domain_idx = pmm_domain_start;
4653 domain_idx < (pmm_domain_start + num_domains);
4654 domain_idx++) {
4655 priv_addr_table[t++] =
4656 gr_gv11b_pri_pmmgpc_addr(gpc_num,
4657 domain_idx, offset);
4658 }
4659 }
4660 } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
4661 (addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
4662 g->ops.gr.egpc_etpc_priv_addr_table) {
4663 gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
4664 g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num,
4665 broadcast_flags, priv_addr_table, &t);
4666 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
4667 g->ops.gr.split_lts_broadcast_addr(g, addr,
4668 priv_addr_table, &t);
4669 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
4670 g->ops.gr.split_ltc_broadcast_addr(g, addr,
4671 priv_addr_table, &t);
4672 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) {
4673 for (fbpa_num = 0;
4674 fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
4675 fbpa_num++)
4676 priv_addr_table[t++] = pri_fbpa_addr(g,
4677 pri_fbpa_addr_mask(g, addr), fbpa_num);
4678 } else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) &&
4679 (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC)) {
4680 gr_gv11b_split_pmm_fbp_broadcast_address(g,
4681 PRI_PMMGS_OFFSET_MASK(addr),
4682 priv_addr_table, &t,
4683 NV_PERF_PMMFBP_LTC_DOMAIN_START,
4684 NV_PERF_PMMFBP_LTC_NUM_DOMAINS);
4685 } else if ((addr_type == CTXSW_ADDR_TYPE_ROP) &&
4686 (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP)) {
4687 gr_gv11b_split_pmm_fbp_broadcast_address(g,
4688 PRI_PMMGS_OFFSET_MASK(addr),
4689 priv_addr_table, &t,
4690 NV_PERF_PMMFBP_ROP_DOMAIN_START,
4691 NV_PERF_PMMFBP_ROP_NUM_DOMAINS);
4692 } else if ((addr_type == CTXSW_ADDR_TYPE_FBP) &&
4693 (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPS)) {
4694 u32 domain_start;
4695
4696 domain_start = (addr -
4697 (NV_PERF_PMMFBP_FBPS + PRI_PMMS_ADDR_MASK(addr)))/
4698 perf_pmmgpc_perdomain_offset_v();
4699 gr_gv11b_split_pmm_fbp_broadcast_address(g,
4700 PRI_PMMS_ADDR_MASK(addr),
4701 priv_addr_table, &t,
4702 domain_start, 1);
4703 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) {
4704 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
4705 for (tpc_num = 0;
4706 tpc_num < g->gr.gpc_tpc_count[gpc_num];
4707 tpc_num++)
4708 priv_addr_table[t++] =
4709 pri_tpc_addr(g,
4710 pri_tpccs_addr_mask(addr),
4711 gpc_num, tpc_num);
4712 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
4713 err = gr_gk20a_split_ppc_broadcast_addr(g,
4714 addr, gpc_num, priv_addr_table, &t);
4715 else
4716 priv_addr_table[t++] = addr;
4717 }
4718
4719 *num_registers = t;
4720 return 0;
4721}