summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-04-06 09:04:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-04-10 14:23:07 -0400
commit19aa748be53787da6abe435ea7043a7827d0fde0 (patch)
treed4588653f031bb0ca4410e287ce0ef291e455422 /drivers/gpu/nvgpu
parent4314771142e0b68810b8fa86ec45b6f6b4e24651 (diff)
gpu: nvgpu: add support to get unicast addresses on volta
We have new broadcast registers on Volta, and we need to generate correct unicast addresses for them so that we can write those registers to context image Add new GR HAL create_priv_addr_table() to do this conversion Set gr_gk20a_create_priv_addr_table() for older chips Set gr_gv11b_create_priv_addr_table() for Volta gr_gv11b_create_priv_addr_table() will use the broadcast flags and then generate appriate list of unicast register for each broadcast register Bug 200398811 Jira NVGPU-556 Change-Id: Id53a9e56106d200fe560ffc93394cc0e976f455f Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1690027 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c1
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c1
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c208
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h4
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h7
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h26
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h26
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c1
15 files changed, 293 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 164668cb..edc1c5ff 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -466,6 +466,10 @@ struct gpu_ops {
466 u32 *gpc_num, u32 *tpc_num, 466 u32 *gpc_num, u32 *tpc_num,
467 u32 *ppc_num, u32 *be_num, 467 u32 *ppc_num, u32 *be_num,
468 u32 *broadcast_flags); 468 u32 *broadcast_flags);
469 int (*create_priv_addr_table)(struct gk20a *g,
470 u32 addr,
471 u32 *priv_addr_table,
472 u32 *num_registers);
469 } gr; 473 } gr;
470 struct { 474 struct {
471 void (*init_hw)(struct gk20a *g); 475 void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3912a1df..04d00e55 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6325,7 +6325,7 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6325 return -EINVAL; 6325 return -EINVAL;
6326} 6326}
6327 6327
6328static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, 6328int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
6329 u32 gpc_num, 6329 u32 gpc_num,
6330 u32 *priv_addr_table, u32 *t) 6330 u32 *priv_addr_table, u32 *t)
6331{ 6331{
@@ -6347,7 +6347,7 @@ static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
6347 * GPC/TPC addresses. The addresses generated by this function can be 6347 * GPC/TPC addresses. The addresses generated by this function can be
6348 * successfully processed by gr_gk20a_find_priv_offset_in_buffer 6348 * successfully processed by gr_gk20a_find_priv_offset_in_buffer
6349 */ 6349 */
6350static int gr_gk20a_create_priv_addr_table(struct gk20a *g, 6350int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6351 u32 addr, 6351 u32 addr,
6352 u32 *priv_addr_table, 6352 u32 *priv_addr_table,
6353 u32 *num_registers) 6353 u32 *num_registers)
@@ -6494,7 +6494,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6494 memset(offset_addrs, 0, sizeof(u32) * max_offsets); 6494 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6495 *num_offsets = 0; 6495 *num_offsets = 0;
6496 6496
6497 gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers); 6497 g->ops.gr.create_priv_addr_table(g, addr, &priv_registers[0],
6498 &num_registers);
6498 6499
6499 if ((max_offsets > 1) && (num_registers > max_offsets)) { 6500 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6500 gk20a_dbg_fn("max_offsets = %d, num_registers = %d", 6501 gk20a_dbg_fn("max_offsets = %d, num_registers = %d",
@@ -6571,7 +6572,8 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
6571 memset(offset_addrs, 0, sizeof(u32) * max_offsets); 6572 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6572 *num_offsets = 0; 6573 *num_offsets = 0;
6573 6574
6574 gr_gk20a_create_priv_addr_table(g, addr, priv_registers, &num_registers); 6575 g->ops.gr.create_priv_addr_table(g, addr, priv_registers,
6576 &num_registers);
6575 6577
6576 if ((max_offsets > 1) && (num_registers > max_offsets)) { 6578 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6577 err = -EINVAL; 6579 err = -EINVAL;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index ee76148a..cd58cfa3 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -825,4 +825,11 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
825 int *addr_type, 825 int *addr_type,
826 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, 826 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
827 u32 *broadcast_flags); 827 u32 *broadcast_flags);
828int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
829 u32 gpc_num,
830 u32 *priv_addr_table, u32 *t);
831int gr_gk20a_create_priv_addr_table(struct gk20a *g,
832 u32 addr,
833 u32 *priv_addr_table,
834 u32 *num_registers);
828#endif /*__GR_GK20A_H__*/ 835#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 65e75374..ac1c7123 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -321,6 +321,7 @@ static const struct gpu_ops gm20b_ops = {
321 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, 321 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
322 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, 322 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
323 .decode_priv_addr = gr_gk20a_decode_priv_addr, 323 .decode_priv_addr = gr_gk20a_decode_priv_addr,
324 .create_priv_addr_table = gr_gk20a_create_priv_addr_table,
324 }, 325 },
325 .fb = { 326 .fb = {
326 .reset = fb_gk20a_reset, 327 .reset = fb_gk20a_reset,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 4daa510c..f2ce4050 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -384,6 +384,7 @@ static const struct gpu_ops gp106_ops = {
384 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, 384 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
385 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, 385 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
386 .decode_priv_addr = gr_gk20a_decode_priv_addr, 386 .decode_priv_addr = gr_gk20a_decode_priv_addr,
387 .create_priv_addr_table = gr_gk20a_create_priv_addr_table,
387 }, 388 },
388 .fb = { 389 .fb = {
389 .reset = gp106_fb_reset, 390 .reset = gp106_fb_reset,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 2f122e20..a31418f7 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -352,6 +352,7 @@ static const struct gpu_ops gp10b_ops = {
352 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, 352 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
353 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, 353 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
354 .decode_priv_addr = gr_gk20a_decode_priv_addr, 354 .decode_priv_addr = gr_gk20a_decode_priv_addr,
355 .create_priv_addr_table = gr_gk20a_create_priv_addr_table,
355 }, 356 },
356 .fb = { 357 .fb = {
357 .reset = fb_gk20a_reset, 358 .reset = fb_gk20a_reset,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 5cafcaae..c7c648a7 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -431,6 +431,7 @@ static const struct gpu_ops gv100_ops = {
431 .add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa, 431 .add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa,
432 .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, 432 .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma,
433 .decode_priv_addr = gr_gv11b_decode_priv_addr, 433 .decode_priv_addr = gr_gv11b_decode_priv_addr,
434 .create_priv_addr_table = gr_gv11b_create_priv_addr_table,
434 }, 435 },
435 .fb = { 436 .fb = {
436 .reset = gv100_fb_reset, 437 .reset = gv100_fb_reset,
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 61649d06..67603739 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -57,6 +57,7 @@
57#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> 57#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
58#include <nvgpu/hw/gv11b/hw_therm_gv11b.h> 58#include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
59#include <nvgpu/hw/gv11b/hw_fb_gv11b.h> 59#include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
60#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
60 61
61#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 62#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
62 63
@@ -4511,3 +4512,210 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
4511 *addr_type = CTXSW_ADDR_TYPE_SYS; 4512 *addr_type = CTXSW_ADDR_TYPE_SYS;
4512 return 0; 4513 return 0;
4513} 4514}
4515
4516static u32 gr_gv11b_pri_pmmgpc_addr(u32 gpc_num, u32 domain_idx, u32 offset)
4517{
4518 return perf_pmmgpc_base_v() +
4519 (gpc_num * (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)) +
4520 (domain_idx * perf_pmmgpc_perdomain_offset_v()) +
4521 offset;
4522}
4523
4524static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g,
4525 u32 offset, u32 *priv_addr_table, u32 *t,
4526 u32 domain_start, u32 num_domains)
4527{
4528 u32 domain_idx = 0;
4529 u32 fbp_num = 0;
4530 u32 base = 0;
4531
4532 for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) {
4533 base = perf_pmmfbp_base_v() +
4534 (fbp_num *
4535 (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1));
4536
4537 for (domain_idx = domain_start;
4538 domain_idx < (domain_start + num_domains);
4539 domain_idx++) {
4540 priv_addr_table[(*t)++] = base +
4541 (domain_idx * perf_pmmgpc_perdomain_offset_v())
4542 + offset;
4543 }
4544 }
4545}
4546
4547
4548int gr_gv11b_create_priv_addr_table(struct gk20a *g,
4549 u32 addr,
4550 u32 *priv_addr_table,
4551 u32 *num_registers)
4552{
4553 int addr_type; /*enum ctxsw_addr_type */
4554 u32 gpc_num, tpc_num, ppc_num, be_num;
4555 u32 priv_addr, gpc_addr;
4556 u32 broadcast_flags;
4557 u32 t;
4558 int err;
4559 int fbpa_num;
4560
4561 t = 0;
4562 *num_registers = 0;
4563
4564 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
4565
4566 err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
4567 &gpc_num, &tpc_num, &ppc_num, &be_num,
4568 &broadcast_flags);
4569 gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
4570 if (err)
4571 return err;
4572
4573 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
4574 (addr_type == CTXSW_ADDR_TYPE_BE)) {
4575 /*
4576 * The BE broadcast registers are included in the compressed PRI
4577 * table. Convert a BE unicast address to a broadcast address
4578 * so that we can look up the offset
4579 */
4580 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
4581 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE))
4582 priv_addr_table[t++] = pri_be_shared_addr(g, addr);
4583 else
4584 priv_addr_table[t++] = addr;
4585
4586 *num_registers = t;
4587 return 0;
4588 }
4589
4590 /*
4591 * The GPC/TPC unicast registers are included in the compressed PRI
4592 * tables. Convert a GPC/TPC broadcast address to unicast addresses so
4593 * that we can look up the offsets
4594 */
4595 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
4596 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
4597
4598 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
4599 for (tpc_num = 0;
4600 tpc_num < g->gr.gpc_tpc_count[gpc_num];
4601 tpc_num++)
4602 priv_addr_table[t++] =
4603 pri_tpc_addr(g,
4604 pri_tpccs_addr_mask(addr),
4605 gpc_num, tpc_num);
4606
4607 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
4608 err = gr_gk20a_split_ppc_broadcast_addr(g,
4609 addr, gpc_num, priv_addr_table, &t);
4610 if (err)
4611 return err;
4612 } else {
4613 priv_addr = pri_gpc_addr(g,
4614 pri_gpccs_addr_mask(addr),
4615 gpc_num);
4616
4617 gpc_addr = pri_gpccs_addr_mask(priv_addr);
4618 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
4619 if (tpc_num >= g->gr.gpc_tpc_count[gpc_num])
4620 continue;
4621
4622 priv_addr_table[t++] = priv_addr;
4623 }
4624 }
4625 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMMGPC) {
4626 u32 pmm_domain_start = 0;
4627 u32 domain_idx = 0;
4628 u32 num_domains = 0;
4629 u32 offset = 0;
4630
4631 if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCA) {
4632 pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START;
4633 num_domains = NV_PERF_PMMGPC_NUM_DOMAINS;
4634 offset = PRI_PMMGS_OFFSET_MASK(addr);
4635 } else if (broadcast_flags &
4636 PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCB) {
4637 pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START +
4638 NV_PERF_PMMGPC_NUM_DOMAINS;
4639 num_domains = NV_PERF_PMMGPC_NUM_DOMAINS;
4640 offset = PRI_PMMGS_OFFSET_MASK(addr);
4641 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCS) {
4642 pmm_domain_start = (addr -
4643 (NV_PERF_PMMGPC_GPCS + PRI_PMMS_ADDR_MASK(addr)))/
4644 perf_pmmgpc_perdomain_offset_v();
4645 num_domains = 1;
4646 offset = PRI_PMMS_ADDR_MASK(addr);
4647 } else {
4648 return -EINVAL;
4649 }
4650
4651 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
4652 for (domain_idx = pmm_domain_start;
4653 domain_idx < (pmm_domain_start + num_domains);
4654 domain_idx++) {
4655 priv_addr_table[t++] =
4656 gr_gv11b_pri_pmmgpc_addr(gpc_num,
4657 domain_idx, offset);
4658 }
4659 }
4660 } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
4661 (addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
4662 g->ops.gr.egpc_etpc_priv_addr_table) {
4663 gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
4664 g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num,
4665 broadcast_flags, priv_addr_table, &t);
4666 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
4667 g->ops.gr.split_lts_broadcast_addr(g, addr,
4668 priv_addr_table, &t);
4669 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
4670 g->ops.gr.split_ltc_broadcast_addr(g, addr,
4671 priv_addr_table, &t);
4672 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) {
4673 for (fbpa_num = 0;
4674 fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
4675 fbpa_num++)
4676 priv_addr_table[t++] = pri_fbpa_addr(g,
4677 pri_fbpa_addr_mask(g, addr), fbpa_num);
4678 } else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) &&
4679 (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC)) {
4680 gr_gv11b_split_pmm_fbp_broadcast_address(g,
4681 PRI_PMMGS_OFFSET_MASK(addr),
4682 priv_addr_table, &t,
4683 NV_PERF_PMMFBP_LTC_DOMAIN_START,
4684 NV_PERF_PMMFBP_LTC_NUM_DOMAINS);
4685 } else if ((addr_type == CTXSW_ADDR_TYPE_ROP) &&
4686 (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP)) {
4687 gr_gv11b_split_pmm_fbp_broadcast_address(g,
4688 PRI_PMMGS_OFFSET_MASK(addr),
4689 priv_addr_table, &t,
4690 NV_PERF_PMMFBP_ROP_DOMAIN_START,
4691 NV_PERF_PMMFBP_ROP_NUM_DOMAINS);
4692 } else if ((addr_type == CTXSW_ADDR_TYPE_FBP) &&
4693 (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPS)) {
4694 u32 domain_start;
4695
4696 domain_start = (addr -
4697 (NV_PERF_PMMFBP_FBPS + PRI_PMMS_ADDR_MASK(addr)))/
4698 perf_pmmgpc_perdomain_offset_v();
4699 gr_gv11b_split_pmm_fbp_broadcast_address(g,
4700 PRI_PMMS_ADDR_MASK(addr),
4701 priv_addr_table, &t,
4702 domain_start, 1);
4703 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) {
4704 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
4705 for (tpc_num = 0;
4706 tpc_num < g->gr.gpc_tpc_count[gpc_num];
4707 tpc_num++)
4708 priv_addr_table[t++] =
4709 pri_tpc_addr(g,
4710 pri_tpccs_addr_mask(addr),
4711 gpc_num, tpc_num);
4712 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
4713 err = gr_gk20a_split_ppc_broadcast_addr(g,
4714 addr, gpc_num, priv_addr_table, &t);
4715 else
4716 priv_addr_table[t++] = addr;
4717 }
4718
4719 *num_registers = t;
4720 return 0;
4721}
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index 7d286535..3c581326 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -238,4 +238,8 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
238 int *addr_type, 238 int *addr_type,
239 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, 239 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
240 u32 *broadcast_flags); 240 u32 *broadcast_flags);
241int gr_gv11b_create_priv_addr_table(struct gk20a *g,
242 u32 addr,
243 u32 *priv_addr_table,
244 u32 *num_registers);
241#endif 245#endif
diff --git a/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h
index c71f4c9c..78658bf8 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h
@@ -37,6 +37,13 @@
37#define NV_PERF_PMMGPC_GPCS 0x00278000 37#define NV_PERF_PMMGPC_GPCS 0x00278000
38#define NV_PERF_PMMFBP_FBPS 0x0027C000 38#define NV_PERF_PMMFBP_FBPS 0x0027C000
39 39
40#define NV_PERF_PMMGPCTPCA_DOMAIN_START 2
41#define NV_PERF_PMMFBP_LTC_DOMAIN_START 2
42#define NV_PERF_PMMFBP_ROP_DOMAIN_START 6
43#define NV_PERF_PMMGPC_NUM_DOMAINS 7
44#define NV_PERF_PMMFBP_LTC_NUM_DOMAINS 4
45#define NV_PERF_PMMFBP_ROP_NUM_DOMAINS 2
46
40#define PRI_PMMGS_ADDR_WIDTH 9 47#define PRI_PMMGS_ADDR_WIDTH 9
41#define PRI_PMMS_ADDR_WIDTH 14 48#define PRI_PMMS_ADDR_WIDTH 14
42 49
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index c33844dc..e39df1db 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -404,6 +404,7 @@ static const struct gpu_ops gv11b_ops = {
404 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, 404 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
405 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, 405 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
406 .decode_priv_addr = gr_gv11b_decode_priv_addr, 406 .decode_priv_addr = gr_gv11b_decode_priv_addr,
407 .create_priv_addr_table = gr_gv11b_create_priv_addr_table,
407 }, 408 },
408 .fb = { 409 .fb = {
409 .reset = gv11b_fb_reset, 410 .reset = gv11b_fb_reset,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
index 4fbe37cb..268efc52 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -56,6 +56,30 @@
56#ifndef _hw_perf_gv100_h_ 56#ifndef _hw_perf_gv100_h_
57#define _hw_perf_gv100_h_ 57#define _hw_perf_gv100_h_
58 58
59static inline u32 perf_pmmgpc_perdomain_offset_v(void)
60{
61 return 0x00000200U;
62}
63static inline u32 perf_pmmgpc_base_v(void)
64{
65 return 0x00180000U;
66}
67static inline u32 perf_pmmgpc_extent_v(void)
68{
69 return 0x00183fffU;
70}
71static inline u32 perf_pmmsys_base_v(void)
72{
73 return 0x00240000U;
74}
75static inline u32 perf_pmmsys_extent_v(void)
76{
77 return 0x00243fffU;
78}
79static inline u32 perf_pmmfbp_base_v(void)
80{
81 return 0x00200000U;
82}
59static inline u32 perf_pmasys_control_r(void) 83static inline u32 perf_pmasys_control_r(void)
60{ 84{
61 return 0x0024a000U; 85 return 0x0024a000U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h
index 788a6ab6..1d4b2c16 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -56,6 +56,30 @@
56#ifndef _hw_perf_gv11b_h_ 56#ifndef _hw_perf_gv11b_h_
57#define _hw_perf_gv11b_h_ 57#define _hw_perf_gv11b_h_
58 58
59static inline u32 perf_pmmgpc_perdomain_offset_v(void)
60{
61 return 0x00000200U;
62}
63static inline u32 perf_pmmgpc_base_v(void)
64{
65 return 0x00180000U;
66}
67static inline u32 perf_pmmgpc_extent_v(void)
68{
69 return 0x00183fffU;
70}
71static inline u32 perf_pmmsys_base_v(void)
72{
73 return 0x00240000U;
74}
75static inline u32 perf_pmmsys_extent_v(void)
76{
77 return 0x00243fffU;
78}
79static inline u32 perf_pmmfbp_base_v(void)
80{
81 return 0x00200000U;
82}
59static inline u32 perf_pmasys_control_r(void) 83static inline u32 perf_pmasys_control_r(void)
60{ 84{
61 return 0x0024a000U; 85 return 0x0024a000U;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index 34d0fc16..c56e900e 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -226,6 +226,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
226 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, 226 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
227 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, 227 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
228 .decode_priv_addr = gr_gk20a_decode_priv_addr, 228 .decode_priv_addr = gr_gk20a_decode_priv_addr,
229 .create_priv_addr_table = gr_gk20a_create_priv_addr_table,
229 }, 230 },
230 .fb = { 231 .fb = {
231 .reset = fb_gk20a_reset, 232 .reset = fb_gk20a_reset,
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
index d63b91fc..a4496044 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -262,6 +262,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
262 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, 262 .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
263 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, 263 .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
264 .decode_priv_addr = gr_gv11b_decode_priv_addr, 264 .decode_priv_addr = gr_gv11b_decode_priv_addr,
265 .create_priv_addr_table = gr_gv11b_create_priv_addr_table,
265 }, 266 },
266 .fb = { 267 .fb = {
267 .reset = gv11b_fb_reset, 268 .reset = gv11b_fb_reset,