diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 208 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 1 |
15 files changed, 293 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 164668cb..edc1c5ff 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -466,6 +466,10 @@ struct gpu_ops { | |||
466 | u32 *gpc_num, u32 *tpc_num, | 466 | u32 *gpc_num, u32 *tpc_num, |
467 | u32 *ppc_num, u32 *be_num, | 467 | u32 *ppc_num, u32 *be_num, |
468 | u32 *broadcast_flags); | 468 | u32 *broadcast_flags); |
469 | int (*create_priv_addr_table)(struct gk20a *g, | ||
470 | u32 addr, | ||
471 | u32 *priv_addr_table, | ||
472 | u32 *num_registers); | ||
469 | } gr; | 473 | } gr; |
470 | struct { | 474 | struct { |
471 | void (*init_hw)(struct gk20a *g); | 475 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3912a1df..04d00e55 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6325,7 +6325,7 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6325 | return -EINVAL; | 6325 | return -EINVAL; |
6326 | } | 6326 | } |
6327 | 6327 | ||
6328 | static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | 6328 | int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, |
6329 | u32 gpc_num, | 6329 | u32 gpc_num, |
6330 | u32 *priv_addr_table, u32 *t) | 6330 | u32 *priv_addr_table, u32 *t) |
6331 | { | 6331 | { |
@@ -6347,7 +6347,7 @@ static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | |||
6347 | * GPC/TPC addresses. The addresses generated by this function can be | 6347 | * GPC/TPC addresses. The addresses generated by this function can be |
6348 | * successfully processed by gr_gk20a_find_priv_offset_in_buffer | 6348 | * successfully processed by gr_gk20a_find_priv_offset_in_buffer |
6349 | */ | 6349 | */ |
6350 | static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | 6350 | int gr_gk20a_create_priv_addr_table(struct gk20a *g, |
6351 | u32 addr, | 6351 | u32 addr, |
6352 | u32 *priv_addr_table, | 6352 | u32 *priv_addr_table, |
6353 | u32 *num_registers) | 6353 | u32 *num_registers) |
@@ -6494,7 +6494,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | |||
6494 | memset(offset_addrs, 0, sizeof(u32) * max_offsets); | 6494 | memset(offset_addrs, 0, sizeof(u32) * max_offsets); |
6495 | *num_offsets = 0; | 6495 | *num_offsets = 0; |
6496 | 6496 | ||
6497 | gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers); | 6497 | g->ops.gr.create_priv_addr_table(g, addr, &priv_registers[0], |
6498 | &num_registers); | ||
6498 | 6499 | ||
6499 | if ((max_offsets > 1) && (num_registers > max_offsets)) { | 6500 | if ((max_offsets > 1) && (num_registers > max_offsets)) { |
6500 | gk20a_dbg_fn("max_offsets = %d, num_registers = %d", | 6501 | gk20a_dbg_fn("max_offsets = %d, num_registers = %d", |
@@ -6571,7 +6572,8 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, | |||
6571 | memset(offset_addrs, 0, sizeof(u32) * max_offsets); | 6572 | memset(offset_addrs, 0, sizeof(u32) * max_offsets); |
6572 | *num_offsets = 0; | 6573 | *num_offsets = 0; |
6573 | 6574 | ||
6574 | gr_gk20a_create_priv_addr_table(g, addr, priv_registers, &num_registers); | 6575 | g->ops.gr.create_priv_addr_table(g, addr, priv_registers, |
6576 | &num_registers); | ||
6575 | 6577 | ||
6576 | if ((max_offsets > 1) && (num_registers > max_offsets)) { | 6578 | if ((max_offsets > 1) && (num_registers > max_offsets)) { |
6577 | err = -EINVAL; | 6579 | err = -EINVAL; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index ee76148a..cd58cfa3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -825,4 +825,11 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
825 | int *addr_type, | 825 | int *addr_type, |
826 | u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, | 826 | u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, |
827 | u32 *broadcast_flags); | 827 | u32 *broadcast_flags); |
828 | int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | ||
829 | u32 gpc_num, | ||
830 | u32 *priv_addr_table, u32 *t); | ||
831 | int gr_gk20a_create_priv_addr_table(struct gk20a *g, | ||
832 | u32 addr, | ||
833 | u32 *priv_addr_table, | ||
834 | u32 *num_registers); | ||
828 | #endif /*__GR_GK20A_H__*/ | 835 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 65e75374..ac1c7123 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -321,6 +321,7 @@ static const struct gpu_ops gm20b_ops = { | |||
321 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, | 321 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, |
322 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, | 322 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, |
323 | .decode_priv_addr = gr_gk20a_decode_priv_addr, | 323 | .decode_priv_addr = gr_gk20a_decode_priv_addr, |
324 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | ||
324 | }, | 325 | }, |
325 | .fb = { | 326 | .fb = { |
326 | .reset = fb_gk20a_reset, | 327 | .reset = fb_gk20a_reset, |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 4daa510c..f2ce4050 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -384,6 +384,7 @@ static const struct gpu_ops gp106_ops = { | |||
384 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, | 384 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, |
385 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, | 385 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, |
386 | .decode_priv_addr = gr_gk20a_decode_priv_addr, | 386 | .decode_priv_addr = gr_gk20a_decode_priv_addr, |
387 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | ||
387 | }, | 388 | }, |
388 | .fb = { | 389 | .fb = { |
389 | .reset = gp106_fb_reset, | 390 | .reset = gp106_fb_reset, |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 2f122e20..a31418f7 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -352,6 +352,7 @@ static const struct gpu_ops gp10b_ops = { | |||
352 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, | 352 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, |
353 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, | 353 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, |
354 | .decode_priv_addr = gr_gk20a_decode_priv_addr, | 354 | .decode_priv_addr = gr_gk20a_decode_priv_addr, |
355 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | ||
355 | }, | 356 | }, |
356 | .fb = { | 357 | .fb = { |
357 | .reset = fb_gk20a_reset, | 358 | .reset = fb_gk20a_reset, |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 5cafcaae..c7c648a7 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -431,6 +431,7 @@ static const struct gpu_ops gv100_ops = { | |||
431 | .add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa, | 431 | .add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa, |
432 | .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, | 432 | .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, |
433 | .decode_priv_addr = gr_gv11b_decode_priv_addr, | 433 | .decode_priv_addr = gr_gv11b_decode_priv_addr, |
434 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, | ||
434 | }, | 435 | }, |
435 | .fb = { | 436 | .fb = { |
436 | .reset = gv100_fb_reset, | 437 | .reset = gv100_fb_reset, |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 61649d06..67603739 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> | 57 | #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> |
58 | #include <nvgpu/hw/gv11b/hw_therm_gv11b.h> | 58 | #include <nvgpu/hw/gv11b/hw_therm_gv11b.h> |
59 | #include <nvgpu/hw/gv11b/hw_fb_gv11b.h> | 59 | #include <nvgpu/hw/gv11b/hw_fb_gv11b.h> |
60 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> | ||
60 | 61 | ||
61 | #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 | 62 | #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 |
62 | 63 | ||
@@ -4511,3 +4512,210 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, | |||
4511 | *addr_type = CTXSW_ADDR_TYPE_SYS; | 4512 | *addr_type = CTXSW_ADDR_TYPE_SYS; |
4512 | return 0; | 4513 | return 0; |
4513 | } | 4514 | } |
4515 | |||
4516 | static u32 gr_gv11b_pri_pmmgpc_addr(u32 gpc_num, u32 domain_idx, u32 offset) | ||
4517 | { | ||
4518 | return perf_pmmgpc_base_v() + | ||
4519 | (gpc_num * (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)) + | ||
4520 | (domain_idx * perf_pmmgpc_perdomain_offset_v()) + | ||
4521 | offset; | ||
4522 | } | ||
4523 | |||
4524 | static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g, | ||
4525 | u32 offset, u32 *priv_addr_table, u32 *t, | ||
4526 | u32 domain_start, u32 num_domains) | ||
4527 | { | ||
4528 | u32 domain_idx = 0; | ||
4529 | u32 fbp_num = 0; | ||
4530 | u32 base = 0; | ||
4531 | |||
4532 | for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) { | ||
4533 | base = perf_pmmfbp_base_v() + | ||
4534 | (fbp_num * | ||
4535 | (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)); | ||
4536 | |||
4537 | for (domain_idx = domain_start; | ||
4538 | domain_idx < (domain_start + num_domains); | ||
4539 | domain_idx++) { | ||
4540 | priv_addr_table[(*t)++] = base + | ||
4541 | (domain_idx * perf_pmmgpc_perdomain_offset_v()) | ||
4542 | + offset; | ||
4543 | } | ||
4544 | } | ||
4545 | } | ||
4546 | |||
4547 | |||
4548 | int gr_gv11b_create_priv_addr_table(struct gk20a *g, | ||
4549 | u32 addr, | ||
4550 | u32 *priv_addr_table, | ||
4551 | u32 *num_registers) | ||
4552 | { | ||
4553 | int addr_type; /*enum ctxsw_addr_type */ | ||
4554 | u32 gpc_num, tpc_num, ppc_num, be_num; | ||
4555 | u32 priv_addr, gpc_addr; | ||
4556 | u32 broadcast_flags; | ||
4557 | u32 t; | ||
4558 | int err; | ||
4559 | int fbpa_num; | ||
4560 | |||
4561 | t = 0; | ||
4562 | *num_registers = 0; | ||
4563 | |||
4564 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
4565 | |||
4566 | err = g->ops.gr.decode_priv_addr(g, addr, &addr_type, | ||
4567 | &gpc_num, &tpc_num, &ppc_num, &be_num, | ||
4568 | &broadcast_flags); | ||
4569 | gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); | ||
4570 | if (err) | ||
4571 | return err; | ||
4572 | |||
4573 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || | ||
4574 | (addr_type == CTXSW_ADDR_TYPE_BE)) { | ||
4575 | /* | ||
4576 | * The BE broadcast registers are included in the compressed PRI | ||
4577 | * table. Convert a BE unicast address to a broadcast address | ||
4578 | * so that we can look up the offset | ||
4579 | */ | ||
4580 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && | ||
4581 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) | ||
4582 | priv_addr_table[t++] = pri_be_shared_addr(g, addr); | ||
4583 | else | ||
4584 | priv_addr_table[t++] = addr; | ||
4585 | |||
4586 | *num_registers = t; | ||
4587 | return 0; | ||
4588 | } | ||
4589 | |||
4590 | /* | ||
4591 | * The GPC/TPC unicast registers are included in the compressed PRI | ||
4592 | * tables. Convert a GPC/TPC broadcast address to unicast addresses so | ||
4593 | * that we can look up the offsets | ||
4594 | */ | ||
4595 | if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { | ||
4596 | for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { | ||
4597 | |||
4598 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | ||
4599 | for (tpc_num = 0; | ||
4600 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | ||
4601 | tpc_num++) | ||
4602 | priv_addr_table[t++] = | ||
4603 | pri_tpc_addr(g, | ||
4604 | pri_tpccs_addr_mask(addr), | ||
4605 | gpc_num, tpc_num); | ||
4606 | |||
4607 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { | ||
4608 | err = gr_gk20a_split_ppc_broadcast_addr(g, | ||
4609 | addr, gpc_num, priv_addr_table, &t); | ||
4610 | if (err) | ||
4611 | return err; | ||
4612 | } else { | ||
4613 | priv_addr = pri_gpc_addr(g, | ||
4614 | pri_gpccs_addr_mask(addr), | ||
4615 | gpc_num); | ||
4616 | |||
4617 | gpc_addr = pri_gpccs_addr_mask(priv_addr); | ||
4618 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); | ||
4619 | if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) | ||
4620 | continue; | ||
4621 | |||
4622 | priv_addr_table[t++] = priv_addr; | ||
4623 | } | ||
4624 | } | ||
4625 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMMGPC) { | ||
4626 | u32 pmm_domain_start = 0; | ||
4627 | u32 domain_idx = 0; | ||
4628 | u32 num_domains = 0; | ||
4629 | u32 offset = 0; | ||
4630 | |||
4631 | if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCA) { | ||
4632 | pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START; | ||
4633 | num_domains = NV_PERF_PMMGPC_NUM_DOMAINS; | ||
4634 | offset = PRI_PMMGS_OFFSET_MASK(addr); | ||
4635 | } else if (broadcast_flags & | ||
4636 | PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCB) { | ||
4637 | pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START + | ||
4638 | NV_PERF_PMMGPC_NUM_DOMAINS; | ||
4639 | num_domains = NV_PERF_PMMGPC_NUM_DOMAINS; | ||
4640 | offset = PRI_PMMGS_OFFSET_MASK(addr); | ||
4641 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCS) { | ||
4642 | pmm_domain_start = (addr - | ||
4643 | (NV_PERF_PMMGPC_GPCS + PRI_PMMS_ADDR_MASK(addr)))/ | ||
4644 | perf_pmmgpc_perdomain_offset_v(); | ||
4645 | num_domains = 1; | ||
4646 | offset = PRI_PMMS_ADDR_MASK(addr); | ||
4647 | } else { | ||
4648 | return -EINVAL; | ||
4649 | } | ||
4650 | |||
4651 | for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { | ||
4652 | for (domain_idx = pmm_domain_start; | ||
4653 | domain_idx < (pmm_domain_start + num_domains); | ||
4654 | domain_idx++) { | ||
4655 | priv_addr_table[t++] = | ||
4656 | gr_gv11b_pri_pmmgpc_addr(gpc_num, | ||
4657 | domain_idx, offset); | ||
4658 | } | ||
4659 | } | ||
4660 | } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) || | ||
4661 | (addr_type == CTXSW_ADDR_TYPE_ETPC)) && | ||
4662 | g->ops.gr.egpc_etpc_priv_addr_table) { | ||
4663 | gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC"); | ||
4664 | g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num, | ||
4665 | broadcast_flags, priv_addr_table, &t); | ||
4666 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) { | ||
4667 | g->ops.gr.split_lts_broadcast_addr(g, addr, | ||
4668 | priv_addr_table, &t); | ||
4669 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) { | ||
4670 | g->ops.gr.split_ltc_broadcast_addr(g, addr, | ||
4671 | priv_addr_table, &t); | ||
4672 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { | ||
4673 | for (fbpa_num = 0; | ||
4674 | fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); | ||
4675 | fbpa_num++) | ||
4676 | priv_addr_table[t++] = pri_fbpa_addr(g, | ||
4677 | pri_fbpa_addr_mask(g, addr), fbpa_num); | ||
4678 | } else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) && | ||
4679 | (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC)) { | ||
4680 | gr_gv11b_split_pmm_fbp_broadcast_address(g, | ||
4681 | PRI_PMMGS_OFFSET_MASK(addr), | ||
4682 | priv_addr_table, &t, | ||
4683 | NV_PERF_PMMFBP_LTC_DOMAIN_START, | ||
4684 | NV_PERF_PMMFBP_LTC_NUM_DOMAINS); | ||
4685 | } else if ((addr_type == CTXSW_ADDR_TYPE_ROP) && | ||
4686 | (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP)) { | ||
4687 | gr_gv11b_split_pmm_fbp_broadcast_address(g, | ||
4688 | PRI_PMMGS_OFFSET_MASK(addr), | ||
4689 | priv_addr_table, &t, | ||
4690 | NV_PERF_PMMFBP_ROP_DOMAIN_START, | ||
4691 | NV_PERF_PMMFBP_ROP_NUM_DOMAINS); | ||
4692 | } else if ((addr_type == CTXSW_ADDR_TYPE_FBP) && | ||
4693 | (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPS)) { | ||
4694 | u32 domain_start; | ||
4695 | |||
4696 | domain_start = (addr - | ||
4697 | (NV_PERF_PMMFBP_FBPS + PRI_PMMS_ADDR_MASK(addr)))/ | ||
4698 | perf_pmmgpc_perdomain_offset_v(); | ||
4699 | gr_gv11b_split_pmm_fbp_broadcast_address(g, | ||
4700 | PRI_PMMS_ADDR_MASK(addr), | ||
4701 | priv_addr_table, &t, | ||
4702 | domain_start, 1); | ||
4703 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { | ||
4704 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | ||
4705 | for (tpc_num = 0; | ||
4706 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | ||
4707 | tpc_num++) | ||
4708 | priv_addr_table[t++] = | ||
4709 | pri_tpc_addr(g, | ||
4710 | pri_tpccs_addr_mask(addr), | ||
4711 | gpc_num, tpc_num); | ||
4712 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) | ||
4713 | err = gr_gk20a_split_ppc_broadcast_addr(g, | ||
4714 | addr, gpc_num, priv_addr_table, &t); | ||
4715 | else | ||
4716 | priv_addr_table[t++] = addr; | ||
4717 | } | ||
4718 | |||
4719 | *num_registers = t; | ||
4720 | return 0; | ||
4721 | } | ||
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 7d286535..3c581326 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h | |||
@@ -238,4 +238,8 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, | |||
238 | int *addr_type, | 238 | int *addr_type, |
239 | u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, | 239 | u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, |
240 | u32 *broadcast_flags); | 240 | u32 *broadcast_flags); |
241 | int gr_gv11b_create_priv_addr_table(struct gk20a *g, | ||
242 | u32 addr, | ||
243 | u32 *priv_addr_table, | ||
244 | u32 *num_registers); | ||
241 | #endif | 245 | #endif |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h index c71f4c9c..78658bf8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h | |||
@@ -37,6 +37,13 @@ | |||
37 | #define NV_PERF_PMMGPC_GPCS 0x00278000 | 37 | #define NV_PERF_PMMGPC_GPCS 0x00278000 |
38 | #define NV_PERF_PMMFBP_FBPS 0x0027C000 | 38 | #define NV_PERF_PMMFBP_FBPS 0x0027C000 |
39 | 39 | ||
40 | #define NV_PERF_PMMGPCTPCA_DOMAIN_START 2 | ||
41 | #define NV_PERF_PMMFBP_LTC_DOMAIN_START 2 | ||
42 | #define NV_PERF_PMMFBP_ROP_DOMAIN_START 6 | ||
43 | #define NV_PERF_PMMGPC_NUM_DOMAINS 7 | ||
44 | #define NV_PERF_PMMFBP_LTC_NUM_DOMAINS 4 | ||
45 | #define NV_PERF_PMMFBP_ROP_NUM_DOMAINS 2 | ||
46 | |||
40 | #define PRI_PMMGS_ADDR_WIDTH 9 | 47 | #define PRI_PMMGS_ADDR_WIDTH 9 |
41 | #define PRI_PMMS_ADDR_WIDTH 14 | 48 | #define PRI_PMMS_ADDR_WIDTH 14 |
42 | 49 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index c33844dc..e39df1db 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -404,6 +404,7 @@ static const struct gpu_ops gv11b_ops = { | |||
404 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, | 404 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, |
405 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, | 405 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, |
406 | .decode_priv_addr = gr_gv11b_decode_priv_addr, | 406 | .decode_priv_addr = gr_gv11b_decode_priv_addr, |
407 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, | ||
407 | }, | 408 | }, |
408 | .fb = { | 409 | .fb = { |
409 | .reset = gv11b_fb_reset, | 410 | .reset = gv11b_fb_reset, |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h index 4fbe37cb..268efc52 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -56,6 +56,30 @@ | |||
56 | #ifndef _hw_perf_gv100_h_ | 56 | #ifndef _hw_perf_gv100_h_ |
57 | #define _hw_perf_gv100_h_ | 57 | #define _hw_perf_gv100_h_ |
58 | 58 | ||
59 | static inline u32 perf_pmmgpc_perdomain_offset_v(void) | ||
60 | { | ||
61 | return 0x00000200U; | ||
62 | } | ||
63 | static inline u32 perf_pmmgpc_base_v(void) | ||
64 | { | ||
65 | return 0x00180000U; | ||
66 | } | ||
67 | static inline u32 perf_pmmgpc_extent_v(void) | ||
68 | { | ||
69 | return 0x00183fffU; | ||
70 | } | ||
71 | static inline u32 perf_pmmsys_base_v(void) | ||
72 | { | ||
73 | return 0x00240000U; | ||
74 | } | ||
75 | static inline u32 perf_pmmsys_extent_v(void) | ||
76 | { | ||
77 | return 0x00243fffU; | ||
78 | } | ||
79 | static inline u32 perf_pmmfbp_base_v(void) | ||
80 | { | ||
81 | return 0x00200000U; | ||
82 | } | ||
59 | static inline u32 perf_pmasys_control_r(void) | 83 | static inline u32 perf_pmasys_control_r(void) |
60 | { | 84 | { |
61 | return 0x0024a000U; | 85 | return 0x0024a000U; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h index 788a6ab6..1d4b2c16 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -56,6 +56,30 @@ | |||
56 | #ifndef _hw_perf_gv11b_h_ | 56 | #ifndef _hw_perf_gv11b_h_ |
57 | #define _hw_perf_gv11b_h_ | 57 | #define _hw_perf_gv11b_h_ |
58 | 58 | ||
59 | static inline u32 perf_pmmgpc_perdomain_offset_v(void) | ||
60 | { | ||
61 | return 0x00000200U; | ||
62 | } | ||
63 | static inline u32 perf_pmmgpc_base_v(void) | ||
64 | { | ||
65 | return 0x00180000U; | ||
66 | } | ||
67 | static inline u32 perf_pmmgpc_extent_v(void) | ||
68 | { | ||
69 | return 0x00183fffU; | ||
70 | } | ||
71 | static inline u32 perf_pmmsys_base_v(void) | ||
72 | { | ||
73 | return 0x00240000U; | ||
74 | } | ||
75 | static inline u32 perf_pmmsys_extent_v(void) | ||
76 | { | ||
77 | return 0x00243fffU; | ||
78 | } | ||
79 | static inline u32 perf_pmmfbp_base_v(void) | ||
80 | { | ||
81 | return 0x00200000U; | ||
82 | } | ||
59 | static inline u32 perf_pmasys_control_r(void) | 83 | static inline u32 perf_pmasys_control_r(void) |
60 | { | 84 | { |
61 | return 0x0024a000U; | 85 | return 0x0024a000U; |
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 34d0fc16..c56e900e 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | |||
@@ -226,6 +226,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { | |||
226 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, | 226 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, |
227 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, | 227 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, |
228 | .decode_priv_addr = gr_gk20a_decode_priv_addr, | 228 | .decode_priv_addr = gr_gk20a_decode_priv_addr, |
229 | .create_priv_addr_table = gr_gk20a_create_priv_addr_table, | ||
229 | }, | 230 | }, |
230 | .fb = { | 231 | .fb = { |
231 | .reset = fb_gk20a_reset, | 232 | .reset = fb_gk20a_reset, |
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index d63b91fc..a4496044 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | |||
@@ -262,6 +262,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { | |||
262 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, | 262 | .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, |
263 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, | 263 | .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, |
264 | .decode_priv_addr = gr_gv11b_decode_priv_addr, | 264 | .decode_priv_addr = gr_gv11b_decode_priv_addr, |
265 | .create_priv_addr_table = gr_gv11b_create_priv_addr_table, | ||
265 | }, | 266 | }, |
266 | .fb = { | 267 | .fb = { |
267 | .reset = gv11b_fb_reset, | 268 | .reset = gv11b_fb_reset, |