summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-04-21 16:45:10 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-04 18:37:42 -0400
commit3b566957fec720d7315549ae0d5e98eacd7c247e (patch)
tree52cba1cde99d3a270f695715527acebad23da3ff /drivers/gpu/nvgpu
parentf99de40936236b4e8b00fa847f502c7b94af85c3 (diff)
gpu: nvgpu: Add context reset at golden context init
Part of golden context initialization is in powerup sequence, and part done as part of first channel creation. The sequence is missing a context reset, which causes initialization of golden context to fail on dGPU. Just moving the code to golden context initialization does not work, because iGPU can be rail gated, and part of the sequence is required in GPU boot. Thus a part of context initialization is replicated to golden context init after a context reset. Change-Id: Ife1b167447018317d3a692b706880e0eda073e43 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1130698
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c145
1 files changed, 139 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 7e0bd7ce..9a51e11c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1518,6 +1518,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1518 struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; 1518 struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1519 struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem; 1519 struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem;
1520 u32 err = 0; 1520 u32 err = 0;
1521 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1522 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
1523 unsigned long end_jiffies = jiffies +
1524 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
1525 u32 last_method_data = 0;
1526 int retries = 200;
1521 1527
1522 gk20a_dbg_fn(""); 1528 gk20a_dbg_fn("");
1523 1529
@@ -1529,19 +1535,150 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1529 if (gr->ctx_vars.golden_image_initialized) 1535 if (gr->ctx_vars.golden_image_initialized)
1530 goto clean_up; 1536 goto clean_up;
1531 1537
1538 if (!tegra_platform_is_linsim()) {
1539 gk20a_writel(g, gr_fe_pwr_mode_r(),
1540 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f());
1541 do {
1542 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1543 if (req == gr_fe_pwr_mode_req_done_v())
1544 break;
1545 udelay(GR_IDLE_CHECK_DEFAULT);
1546 } while (--retries || !tegra_platform_is_silicon());
1547 }
1548
1549 if (!retries)
1550 gk20a_err(g->dev, "timeout forcing FE on");
1551
1552 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
1553 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
1554 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
1555 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
1556 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
1557 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
1558 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
1559 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
1560 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
1561 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
1562 gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
1563 udelay(10);
1564
1565 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
1566 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
1567 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
1568 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
1569 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
1570 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
1571 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
1572 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
1573 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
1574 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
1575 gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
1576 udelay(10);
1577
1578 if (!tegra_platform_is_linsim()) {
1579 gk20a_writel(g, gr_fe_pwr_mode_r(),
1580 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_auto_f());
1581
1582 retries = 200;
1583 do {
1584 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1585 if (req == gr_fe_pwr_mode_req_done_v())
1586 break;
1587 udelay(GR_IDLE_CHECK_DEFAULT);
1588 } while (--retries || !tegra_platform_is_silicon());
1589
1590 if (!retries)
1591 gk20a_err(g->dev, "timeout setting FE power to auto");
1592 }
1593
1594 /* clear scc ram */
1595 gk20a_writel(g, gr_scc_init_r(),
1596 gr_scc_init_ram_trigger_f());
1597
1532 err = gr_gk20a_fecs_ctx_bind_channel(g, c); 1598 err = gr_gk20a_fecs_ctx_bind_channel(g, c);
1533 if (err) 1599 if (err)
1534 goto clean_up; 1600 goto clean_up;
1535 1601
1602 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
1603
1604 /* load ctx init */
1605 for (i = 0; i < sw_ctx_load->count; i++)
1606 gk20a_writel(g, sw_ctx_load->l[i].addr,
1607 sw_ctx_load->l[i].value);
1608
1609 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
1610
1611 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
1612 if (err)
1613 goto clean_up;
1614
1615 /* disable fe_go_idle */
1616 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1617 gr_fe_go_idle_timeout_count_disabled_f());
1618
1619 err = gr_gk20a_commit_global_ctx_buffers(g, c, false);
1620 if (err)
1621 goto clean_up;
1622
1623 /* override a few ctx state registers */
1624 gr_gk20a_commit_global_timeslice(g, c, false);
1625
1626 /* floorsweep anything left */
1627 g->ops.gr.init_fs_state(g);
1628
1629 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
1630 if (err)
1631 goto restore_fe_go_idle;
1632
1536 err = gk20a_init_sw_bundle(g); 1633 err = gk20a_init_sw_bundle(g);
1537 if (err) 1634 if (err)
1538 goto clean_up; 1635 goto clean_up;
1539 1636
1540 err = gr_gk20a_elpg_protected_call(g, 1637restore_fe_go_idle:
1541 gr_gk20a_commit_global_ctx_buffers(g, c, false)); 1638 /* restore fe_go_idle */
1639 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1640 gr_fe_go_idle_timeout_count_prod_f());
1641
1642 if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT))
1643 goto clean_up;
1644
1645 /* load method init */
1646 if (sw_method_init->count) {
1647 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
1648 sw_method_init->l[0].value);
1649 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
1650 gr_pri_mme_shadow_raw_index_write_trigger_f() |
1651 sw_method_init->l[0].addr);
1652 last_method_data = sw_method_init->l[0].value;
1653 }
1654 for (i = 1; i < sw_method_init->count; i++) {
1655 if (sw_method_init->l[i].value != last_method_data) {
1656 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
1657 sw_method_init->l[i].value);
1658 last_method_data = sw_method_init->l[i].value;
1659 }
1660 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
1661 gr_pri_mme_shadow_raw_index_write_trigger_f() |
1662 sw_method_init->l[i].addr);
1663 }
1664
1665 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
1542 if (err) 1666 if (err)
1543 goto clean_up; 1667 goto clean_up;
1544 1668
1669 kfree(gr->sm_error_states);
1670
1671 /* we need to allocate this after g->ops.gr.init_fs_state() since
1672 * we initialize gr->no_of_sm in this function
1673 */
1674 gr->sm_error_states = kzalloc(
1675 sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
1676 * gr->no_of_sm, GFP_KERNEL);
1677 if (!gr->sm_error_states) {
1678 err = -ENOMEM;
1679 goto restore_fe_go_idle;
1680 }
1681
1545 if (gk20a_mem_begin(g, gold_mem)) 1682 if (gk20a_mem_begin(g, gold_mem))
1546 goto clean_up; 1683 goto clean_up;
1547 1684
@@ -4666,10 +4803,6 @@ static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
4666 gk20a_writel(g, gr_intr_r(), ~0); 4803 gk20a_writel(g, gr_intr_r(), ~0);
4667 gk20a_writel(g, gr_intr_en_r(), ~0); 4804 gk20a_writel(g, gr_intr_en_r(), ~0);
4668 4805
4669 /* clear scc ram */
4670 gk20a_writel(g, gr_scc_init_r(),
4671 gr_scc_init_ram_trigger_f());
4672
4673 /* load non_ctx init */ 4806 /* load non_ctx init */
4674 for (i = 0; i < sw_non_ctx_load->count; i++) 4807 for (i = 0; i < sw_non_ctx_load->count; i++)
4675 gk20a_writel(g, sw_non_ctx_load->l[i].addr, 4808 gk20a_writel(g, sw_non_ctx_load->l[i].addr,