diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 122 |
1 files changed, 120 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 014ba537..764374cc 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -634,6 +634,70 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
634 | return 0; | 634 | return 0; |
635 | } | 635 | } |
636 | 636 | ||
637 | static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, | ||
638 | u32 exception) | ||
639 | { | ||
640 | int ret = 0; | ||
641 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | ||
642 | int hww_esr; | ||
643 | u32 offset = proj_gpc_stride_v() * gpc; | ||
644 | |||
645 | hww_esr = gk20a_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset); | ||
646 | |||
647 | if (!(hww_esr & (gr_gpc0_gpccs_hww_esr_ecc_uncorrected_m() | | ||
648 | gr_gpc0_gpccs_hww_esr_ecc_corrected_m()))) | ||
649 | return ret; | ||
650 | |||
651 | ecc_status = gk20a_readl(g, | ||
652 | gr_gpc0_gpccs_falcon_ecc_status_r() + offset); | ||
653 | ecc_addr = gk20a_readl(g, | ||
654 | gr_gpc0_gpccs_falcon_ecc_address_r() + offset); | ||
655 | corrected_cnt = gk20a_readl(g, | ||
656 | gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + offset); | ||
657 | uncorrected_cnt = gk20a_readl(g, | ||
658 | gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset); | ||
659 | |||
660 | /* clear the interrupt */ | ||
661 | gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, | ||
662 | gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); | ||
663 | |||
664 | nvgpu_log(g, gpu_dbg_intr, | ||
665 | "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); | ||
666 | |||
667 | if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) | ||
668 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); | ||
669 | if (ecc_status & | ||
670 | gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) | ||
671 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); | ||
672 | if (ecc_status & | ||
673 | gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) | ||
674 | nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); | ||
675 | if (ecc_status & | ||
676 | gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) | ||
677 | nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); | ||
678 | |||
679 | nvgpu_log(g, gpu_dbg_intr, | ||
680 | "ecc error row address: 0x%x", | ||
681 | gr_gpc0_gpccs_falcon_ecc_address_row_address_v(ecc_addr)); | ||
682 | |||
683 | nvgpu_log(g, gpu_dbg_intr, | ||
684 | "ecc error count corrected: %d, uncorrected %d", | ||
685 | gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v(corrected_cnt), | ||
686 | gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt)); | ||
687 | |||
688 | return ret; | ||
689 | } | ||
690 | |||
691 | static int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, | ||
692 | u32 gpc_exception) | ||
693 | { | ||
694 | if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpccs_m()) | ||
695 | return gr_gv11b_handle_gpccs_ecc_exception(g, gpc, | ||
696 | gpc_exception); | ||
697 | |||
698 | return 0; | ||
699 | } | ||
700 | |||
637 | static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | 701 | static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) |
638 | { | 702 | { |
639 | struct gr_gk20a *gr = &g->gr; | 703 | struct gr_gk20a *gr = &g->gr; |
@@ -646,7 +710,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | |||
646 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); | 710 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); |
647 | 711 | ||
648 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), | 712 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), |
649 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1))); | 713 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) |
714 | gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1)); | ||
650 | } | 715 | } |
651 | 716 | ||
652 | static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | 717 | static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, |
@@ -1622,6 +1687,55 @@ static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) | |||
1622 | return ret; | 1687 | return ret; |
1623 | } | 1688 | } |
1624 | 1689 | ||
1690 | static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) | ||
1691 | { | ||
1692 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | ||
1693 | |||
1694 | if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() | | ||
1695 | gr_fecs_host_int_status_ecc_corrected_m())) { | ||
1696 | ecc_status = gk20a_readl(g, gr_fecs_falcon_ecc_status_r()); | ||
1697 | ecc_addr = gk20a_readl(g, | ||
1698 | gr_fecs_falcon_ecc_address_r()); | ||
1699 | corrected_cnt = gk20a_readl(g, | ||
1700 | gr_fecs_falcon_ecc_corrected_err_count_r()); | ||
1701 | uncorrected_cnt = gk20a_readl(g, | ||
1702 | gr_fecs_falcon_ecc_uncorrected_err_count_r()); | ||
1703 | |||
1704 | /* clear the interrupt */ | ||
1705 | gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), | ||
1706 | gr_fecs_falcon_ecc_status_reset_task_f()); | ||
1707 | |||
1708 | nvgpu_log(g, gpu_dbg_intr, | ||
1709 | "fecs ecc interrupt intr: 0x%x", intr); | ||
1710 | |||
1711 | if (ecc_status & | ||
1712 | gr_fecs_falcon_ecc_status_corrected_err_imem_m()) | ||
1713 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); | ||
1714 | if (ecc_status & | ||
1715 | gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) | ||
1716 | nvgpu_log(g, gpu_dbg_intr, | ||
1717 | "imem ecc error uncorrected"); | ||
1718 | if (ecc_status & | ||
1719 | gr_fecs_falcon_ecc_status_corrected_err_dmem_m()) | ||
1720 | nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); | ||
1721 | if (ecc_status & | ||
1722 | gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) | ||
1723 | nvgpu_log(g, gpu_dbg_intr, | ||
1724 | "dmem ecc error uncorrected"); | ||
1725 | |||
1726 | nvgpu_log(g, gpu_dbg_intr, | ||
1727 | "ecc error row address: 0x%x", | ||
1728 | gr_fecs_falcon_ecc_address_row_address_v(ecc_addr)); | ||
1729 | |||
1730 | nvgpu_log(g, gpu_dbg_intr, | ||
1731 | "ecc error count corrected: %d, uncorrected %d", | ||
1732 | gr_fecs_falcon_ecc_corrected_err_count_total_v( | ||
1733 | corrected_cnt), | ||
1734 | gr_fecs_falcon_ecc_uncorrected_err_count_total_v( | ||
1735 | uncorrected_cnt)); | ||
1736 | } | ||
1737 | } | ||
1738 | |||
1625 | static int gr_gv11b_handle_fecs_error(struct gk20a *g, | 1739 | static int gr_gv11b_handle_fecs_error(struct gk20a *g, |
1626 | struct channel_gk20a *__ch, | 1740 | struct channel_gk20a *__ch, |
1627 | struct gr_gk20a_isr_data *isr_data) | 1741 | struct gr_gk20a_isr_data *isr_data) |
@@ -1680,6 +1794,9 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, | |||
1680 | gk20a_channel_put(ch); | 1794 | gk20a_channel_put(ch); |
1681 | } | 1795 | } |
1682 | 1796 | ||
1797 | /* Handle ECC errors */ | ||
1798 | gr_gv11b_handle_fecs_ecc_error(g, gr_fecs_intr); | ||
1799 | |||
1683 | clean_up: | 1800 | clean_up: |
1684 | /* handle any remaining interrupts */ | 1801 | /* handle any remaining interrupts */ |
1685 | return gk20a_gr_handle_fecs_error(g, __ch, isr_data); | 1802 | return gk20a_gr_handle_fecs_error(g, __ch, isr_data); |
@@ -2214,5 +2331,6 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
2214 | gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; | 2331 | gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; |
2215 | gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; | 2332 | gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; |
2216 | gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; | 2333 | gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; |
2217 | 2334 | gops->gr.handle_gpc_gpccs_exception = | |
2335 | gr_gv11b_handle_gpc_gpccs_exception; | ||
2218 | } | 2336 | } |