summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorneha <njoshi@nvidia.com>2016-04-11 08:12:39 -0400
committerNirav Patel <nipatel@nvidia.com>2016-07-22 18:10:22 -0400
commitf3d89a2997800a185c2b645593fffe342dc332df (patch)
tree7196e71579579b35a1341cf950de3afa2acebdda /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent51a32d8f2c97ae42fd714078c97af83c7136878f (diff)
gpu: nvgpu: Full chip support for ctxsw
nvgpu changes needed to handle the newly added ctxsw lists Fix regops support for ppc registers Squashed from: Change-Id: I08e6dec3bb2f7aa51de912c9d1c84a350ce07f72 Signed-off-by: neha <njoshi@nvidia.com> Reviewed-on: http://git-master/r/1151010 (cherry picked from commit fd03ad9f09e66f78db88fb7ece448e26e0515821) and: Change-Id: I75a7f810ee0b613c22ac2cef2d936563d8067f97 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1158888 (cherry picked from commit f00a7fcc57fb937b800e46760087ff6f7637520c) Bug 200180000 Bug 1771830 Reviewed-on: http://git-master/r/1164397 (cherry picked from commit 7028f051e4f37edeff90a9923f022cec6c645a8f) Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Change-Id: I796ddf93ef37170843a4a6b44190cd6780d25852 Reviewed-on: http://git-master/r/1183588 Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c113
1 files changed, 110 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index c5b2ba5c..62d3b231 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -64,6 +64,8 @@
64#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200 64#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
65#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000 65#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000
66#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200 66#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
67#define NV_PCFG_BASE 0x00088000
68#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020
67#define FE_PWR_MODE_TIMEOUT_MAX 2000 69#define FE_PWR_MODE_TIMEOUT_MAX 2000
68#define FE_PWR_MODE_TIMEOUT_DEFAULT 10 70#define FE_PWR_MODE_TIMEOUT_DEFAULT 10
69#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000 71#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000
@@ -6446,6 +6448,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6446 } else 6448 } else
6447 *gpc_num = pri_get_gpc_num(g, addr); 6449 *gpc_num = pri_get_gpc_num(g, addr);
6448 6450
6451 if (pri_is_ppc_addr(g, gpc_addr)) {
6452 *addr_type = CTXSW_ADDR_TYPE_PPC;
6453 if (pri_is_ppc_addr_shared(g, gpc_addr)) {
6454 *broadcast_flags |= PRI_BROADCAST_FLAGS_PPC;
6455 return 0;
6456 }
6457 }
6449 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { 6458 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
6450 *addr_type = CTXSW_ADDR_TYPE_TPC; 6459 *addr_type = CTXSW_ADDR_TYPE_TPC;
6451 if (pri_is_tpc_addr_shared(g, gpc_addr)) { 6460 if (pri_is_tpc_addr_shared(g, gpc_addr)) {
@@ -7490,6 +7499,72 @@ static int map_cmp(const void *a, const void *b)
7490 return 0; 7499 return 0;
7491} 7500}
7492 7501
7502static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry *map,
7503 struct aiv_list_gk20a *regs,
7504 u32 *count, u32 *offset,
7505 u32 max_cnt, u32 base, u32 mask)
7506{
7507 u32 idx;
7508 u32 cnt = *count;
7509 u32 off = *offset;
7510
7511 if ((cnt + regs->count) > max_cnt)
7512 return -EINVAL;
7513
7514 for (idx = 0; idx < regs->count; idx++) {
7515 if ((base + (regs->l[idx].addr & mask)) < 0xFFF)
7516 map[cnt].addr = base + (regs->l[idx].addr & mask)
7517 + NV_PCFG_BASE;
7518 else
7519 map[cnt].addr = base + (regs->l[idx].addr & mask);
7520 map[cnt++].offset = off;
7521 off += 4;
7522 }
7523 *count = cnt;
7524 *offset = off;
7525 return 0;
7526}
7527
7528static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
7529 struct ctxsw_buf_offset_map_entry *map,
7530 struct aiv_list_gk20a *regs,
7531 u32 *count, u32 *offset,
7532 u32 max_cnt, u32 base, u32 mask)
7533{
7534 u32 idx;
7535 u32 cnt = *count;
7536 u32 off = *offset;
7537
7538 if ((cnt + regs->count) > max_cnt)
7539 return -EINVAL;
7540
7541 /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
7542 * To handle the case of PPC registers getting added into GPC, the below
7543 * code specifically checks for any PPC offsets and adds them using
7544 * proper mask
7545 */
7546 for (idx = 0; idx < regs->count; idx++) {
7547 /* Check if the address is PPC address */
7548 if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
7549 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
7550 GPU_LIT_PPC_IN_GPC_BASE);
7551 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
7552 GPU_LIT_PPC_IN_GPC_STRIDE);
7553 /* Use PPC mask instead of the GPC mask provided */
7554 u32 ppcmask = ppc_in_gpc_stride - 1;
7555
7556 map[cnt].addr = base + ppc_in_gpc_base
7557 + (regs->l[idx].addr & ppcmask);
7558 } else
7559 map[cnt].addr = base + (regs->l[idx].addr & mask);
7560 map[cnt++].offset = off;
7561 off += 4;
7562 }
7563 *count = cnt;
7564 *offset = off;
7565 return 0;
7566}
7567
7493static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map, 7568static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
7494 struct aiv_list_gk20a *regs, 7569 struct aiv_list_gk20a *regs,
7495 u32 *count, u32 *offset, 7570 u32 *count, u32 *offset,
@@ -7577,12 +7652,18 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7577 return -EINVAL; 7652 return -EINVAL;
7578 7653
7579 base = gpc_base + (gpc_stride * gpc_num); 7654 base = gpc_base + (gpc_stride * gpc_num);
7580 if (add_ctxsw_buffer_map_entries(map, 7655 if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
7581 &g->gr.ctx_vars.ctxsw_regs.pm_gpc, 7656 &g->gr.ctx_vars.ctxsw_regs.pm_gpc,
7582 count, offset, max_cnt, base, 7657 count, offset, max_cnt, base,
7583 (gpc_stride - 1))) 7658 (gpc_stride - 1)))
7584 return -EINVAL; 7659 return -EINVAL;
7585 7660
7661 base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
7662 if (add_ctxsw_buffer_map_entries(map,
7663 &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc,
7664 count, offset, max_cnt, base, ~0))
7665 return -EINVAL;
7666
7586 base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); 7667 base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
7587 if (add_ctxsw_buffer_map_entries(map, 7668 if (add_ctxsw_buffer_map_entries(map,
7588 &g->gr.ctx_vars.ctxsw_regs.perf_gpc, 7669 &g->gr.ctx_vars.ctxsw_regs.perf_gpc,
@@ -7609,6 +7690,12 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7609 *| | 7690 *| |
7610 *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words 7691 *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
7611 *|---------------------------------------------| 7692 *|---------------------------------------------|
7693 *| |
7694 *| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
7695 *|---------------------------------------------|
7696 *| |
7697 *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
7698 *|---------------------------------------------|
7612 *| PADDING for 256 byte alignment | 7699 *| PADDING for 256 byte alignment |
7613 *|---------------------------------------------|<----256 byte aligned 7700 *|---------------------------------------------|<----256 byte aligned
7614 *| LIST_compressed_nv_perf_fbp_ctx_regs | 7701 *| LIST_compressed_nv_perf_fbp_ctx_regs |
@@ -7620,6 +7707,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7620 *| LIST_compressed_pm_fbpa_ctx_regs | 7707 *| LIST_compressed_pm_fbpa_ctx_regs |
7621 *| |Space allocated: numRegs * n words (for n FB units) 7708 *| |Space allocated: numRegs * n words (for n FB units)
7622 *|---------------------------------------------| 7709 *|---------------------------------------------|
7710 *| LIST_compressed_pm_rop_ctx_regs |
7711 *|---------------------------------------------|
7623 *| LIST_compressed_pm_ltc_ctx_regs | 7712 *| LIST_compressed_pm_ltc_ctx_regs |
7624 *| LTC0 LTS0 | 7713 *| LTC0 LTS0 |
7625 *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units) 7714 *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
@@ -7641,7 +7730,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7641 *| LIST_pm_ctx_reg_PPC REG1 TPCn | 7730 *| LIST_pm_ctx_reg_PPC REG1 TPCn |
7642 *| * numPpcs REGn TPC0 | 7731 *| * numPpcs REGn TPC0 |
7643 *| LIST_pm_ctx_reg_GPC REGn TPC1 | 7732 *| LIST_pm_ctx_reg_GPC REGn TPC1 |
7644 *| LIST_nv_perf_ctx_reg_GPC REGn TPCn | 7733 *| List_pm_ctx_reg_uc_GPC REGn TPCn |
7734 *| LIST_nv_perf_ctx_reg_GPC |
7645 *| ---- |-- 7735 *| ---- |--
7646 *| GPC1 . | 7736 *| GPC1 . |
7647 *| . |<---- 7737 *| . |<----
@@ -7679,7 +7769,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7679 return -ENOMEM; 7769 return -ENOMEM;
7680 7770
7681 /* Add entries from _LIST_pm_ctx_reg_SYS */ 7771 /* Add entries from _LIST_pm_ctx_reg_SYS */
7682 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, 7772 if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
7683 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) 7773 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
7684 goto cleanup; 7774 goto cleanup;
7685 7775
@@ -7688,6 +7778,16 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7688 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) 7778 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
7689 goto cleanup; 7779 goto cleanup;
7690 7780
7781 /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
7782 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router,
7783 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
7784 goto cleanup;
7785
7786 /* Add entries from _LIST_nv_perf_pma_ctx_reg*/
7787 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma,
7788 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
7789 goto cleanup;
7790
7691 offset = ALIGN(offset, 256); 7791 offset = ALIGN(offset, 256);
7692 7792
7693 /* Add entries from _LIST_nv_perf_fbp_ctx_regs */ 7793 /* Add entries from _LIST_nv_perf_fbp_ctx_regs */
@@ -7714,6 +7814,13 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7714 num_fbpas, fbpa_stride, ~0)) 7814 num_fbpas, fbpa_stride, ~0))
7715 goto cleanup; 7815 goto cleanup;
7716 7816
7817 /* Add entries from _LIST_nv_pm_rop_ctx_regs */
7818 if (add_ctxsw_buffer_map_entries(map,
7819 &g->gr.ctx_vars.ctxsw_regs.pm_rop,
7820 &count, &offset,
7821 hwpm_ctxsw_reg_count_max, 0, ~0))
7822 goto cleanup;
7823
7717 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ 7824 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
7718 if (add_ctxsw_buffer_map_entries_subunits(map, 7825 if (add_ctxsw_buffer_map_entries_subunits(map,
7719 &g->gr.ctx_vars.ctxsw_regs.pm_ltc, 7826 &g->gr.ctx_vars.ctxsw_regs.pm_ltc,