summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2017-05-12 14:07:00 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-24 07:55:42 -0400
commit2173add7ae7210606afdaa56995a61d012b9a2f1 (patch)
tree3e6f637ab0c4f2e28aa63823105764f39c774a85 /drivers/gpu/nvgpu/gv11b
parent45ca7cb8c5774cfc15015973b1883faa1d93b9e6 (diff)
gpu: nvgpu: per-chip GPCCS exception support
Adding support for ISR handling of GPCCS exceptions and GCC ECC support JIRA: GPUT19X-83 Change-Id: Ica749dc678f152d536052cf47f2ea2b205a231d6 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1480997 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c122
1 files changed, 120 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 014ba537..764374cc 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -634,6 +634,70 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
634 return 0; 634 return 0;
635} 635}
636 636
637static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
638 u32 exception)
639{
640 int ret = 0;
641 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
642 int hww_esr;
643 u32 offset = proj_gpc_stride_v() * gpc;
644
645 hww_esr = gk20a_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset);
646
647 if (!(hww_esr & (gr_gpc0_gpccs_hww_esr_ecc_uncorrected_m() |
648 gr_gpc0_gpccs_hww_esr_ecc_corrected_m())))
649 return ret;
650
651 ecc_status = gk20a_readl(g,
652 gr_gpc0_gpccs_falcon_ecc_status_r() + offset);
653 ecc_addr = gk20a_readl(g,
654 gr_gpc0_gpccs_falcon_ecc_address_r() + offset);
655 corrected_cnt = gk20a_readl(g,
656 gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + offset);
657 uncorrected_cnt = gk20a_readl(g,
658 gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset);
659
660 /* clear the interrupt */
661 gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
662 gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
663
664 nvgpu_log(g, gpu_dbg_intr,
665 "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
666
667 if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m())
668 nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
669 if (ecc_status &
670 gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m())
671 nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
672 if (ecc_status &
673 gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m())
674 nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
675 if (ecc_status &
676 gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m())
677 nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
678
679 nvgpu_log(g, gpu_dbg_intr,
680 "ecc error row address: 0x%x",
681 gr_gpc0_gpccs_falcon_ecc_address_row_address_v(ecc_addr));
682
683 nvgpu_log(g, gpu_dbg_intr,
684 "ecc error count corrected: %d, uncorrected %d",
685 gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v(corrected_cnt),
686 gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt));
687
688 return ret;
689}
690
691static int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
692 u32 gpc_exception)
693{
694 if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpccs_m())
695 return gr_gv11b_handle_gpccs_ecc_exception(g, gpc,
696 gpc_exception);
697
698 return 0;
699}
700
637static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) 701static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
638{ 702{
639 struct gr_gk20a *gr = &g->gr; 703 struct gr_gk20a *gr = &g->gr;
@@ -646,7 +710,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
646 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); 710 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1);
647 711
648 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), 712 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
649 (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1))); 713 (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1)
714 gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1));
650} 715}
651 716
652static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, 717static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
@@ -1622,6 +1687,55 @@ static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1622 return ret; 1687 return ret;
1623} 1688}
1624 1689
1690static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
1691{
1692 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
1693
1694 if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() |
1695 gr_fecs_host_int_status_ecc_corrected_m())) {
1696 ecc_status = gk20a_readl(g, gr_fecs_falcon_ecc_status_r());
1697 ecc_addr = gk20a_readl(g,
1698 gr_fecs_falcon_ecc_address_r());
1699 corrected_cnt = gk20a_readl(g,
1700 gr_fecs_falcon_ecc_corrected_err_count_r());
1701 uncorrected_cnt = gk20a_readl(g,
1702 gr_fecs_falcon_ecc_uncorrected_err_count_r());
1703
1704 /* clear the interrupt */
1705 gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
1706 gr_fecs_falcon_ecc_status_reset_task_f());
1707
1708 nvgpu_log(g, gpu_dbg_intr,
1709 "fecs ecc interrupt intr: 0x%x", intr);
1710
1711 if (ecc_status &
1712 gr_fecs_falcon_ecc_status_corrected_err_imem_m())
1713 nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
1714 if (ecc_status &
1715 gr_fecs_falcon_ecc_status_uncorrected_err_imem_m())
1716 nvgpu_log(g, gpu_dbg_intr,
1717 "imem ecc error uncorrected");
1718 if (ecc_status &
1719 gr_fecs_falcon_ecc_status_corrected_err_dmem_m())
1720 nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
1721 if (ecc_status &
1722 gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m())
1723 nvgpu_log(g, gpu_dbg_intr,
1724 "dmem ecc error uncorrected");
1725
1726 nvgpu_log(g, gpu_dbg_intr,
1727 "ecc error row address: 0x%x",
1728 gr_fecs_falcon_ecc_address_row_address_v(ecc_addr));
1729
1730 nvgpu_log(g, gpu_dbg_intr,
1731 "ecc error count corrected: %d, uncorrected %d",
1732 gr_fecs_falcon_ecc_corrected_err_count_total_v(
1733 corrected_cnt),
1734 gr_fecs_falcon_ecc_uncorrected_err_count_total_v(
1735 uncorrected_cnt));
1736 }
1737}
1738
1625static int gr_gv11b_handle_fecs_error(struct gk20a *g, 1739static int gr_gv11b_handle_fecs_error(struct gk20a *g,
1626 struct channel_gk20a *__ch, 1740 struct channel_gk20a *__ch,
1627 struct gr_gk20a_isr_data *isr_data) 1741 struct gr_gk20a_isr_data *isr_data)
@@ -1680,6 +1794,9 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g,
1680 gk20a_channel_put(ch); 1794 gk20a_channel_put(ch);
1681 } 1795 }
1682 1796
1797 /* Handle ECC errors */
1798 gr_gv11b_handle_fecs_ecc_error(g, gr_fecs_intr);
1799
1683clean_up: 1800clean_up:
1684 /* handle any remaining interrupts */ 1801 /* handle any remaining interrupts */
1685 return gk20a_gr_handle_fecs_error(g, __ch, isr_data); 1802 return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
@@ -2214,5 +2331,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
2214 gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; 2331 gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr;
2215 gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; 2332 gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode;
2216 gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; 2333 gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask;
2217 2334 gops->gr.handle_gpc_gpccs_exception =
2335 gr_gv11b_handle_gpc_gpccs_exception;
2218} 2336}