summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-01-04 08:02:46 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-12 14:05:31 -0500
commit5b10690479d016dd27d4592d94e7bd12a9277a34 (patch)
tree663501e778ad0089c1930c7beff8a62dfac34ea6 /drivers/gpu/nvgpu
parent6170f1eed53b4e6770a6948474ab8a8a5e2621bc (diff)
gpu: nvgpu: handle SM reported MMU_NACK exception
Upon receiving MMU_FAULT error, MMU will forward MMU_NACK to SM If MMU_NACK is masked out, SM will simply release the semaphores And if semaphores are released before MMU fault is handled, user space could see that operation as successful incorrectly Fix this by handling SM reported MMU_NACK exception Enable MMU_NACK reporting in gv11b_gr_set_hww_esr_report_mask In MMU_NACK handling path, we just set the error notifier and clear the interrupt so that the User Space sees the error as soon as semaphores are released by SM And MMU_FAULT handling path will take care of triggering RC recovery anyways Also add necessary h/w accessors for mmu_nack Bug 2040594 Jira NVGPU-473 Change-Id: Ic925c2d3f3069016c57d177713066c29ab39dc3d Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1631708 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c44
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h10
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h10
3 files changed, 61 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index baad5e47..d5924169 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1994,6 +1994,39 @@ void gr_gv11b_get_access_map(struct gk20a *g,
1994 *num_entries = ARRAY_SIZE(wl_addr_gv11b); 1994 *num_entries = ARRAY_SIZE(wl_addr_gv11b);
1995} 1995}
1996 1996
1997static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
1998 u32 gpc, u32 tpc, u32 sm,
1999 u32 warp_esr,
2000 struct channel_gk20a *fault_ch)
2001{
2002 struct tsg_gk20a *tsg;
2003 u32 offset;
2004
2005 if (fault_ch) {
2006 tsg = &g->fifo.tsg[fault_ch->tsgid];
2007
2008 /*
2009 * Upon receiving MMU_FAULT error, MMU will forward MMU_NACK
2010 * to SM. So MMU_FAULT handling path will take care of
2011 * triggering RC recovery
2012 *
2013 * In MMU_NACK handling path, we just set the error notifier
2014 * and clear the interrupt so that the User Space sees the error
2015 * as soon as semaphores are released by SM
2016 */
2017 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
2018 }
2019
2020 /* clear interrupt */
2021 offset = gk20a_gr_gpc_offset(g, gpc) +
2022 gk20a_gr_tpc_offset(g, tpc) +
2023 gv11b_gr_sm_offset(g, sm);
2024 nvgpu_writel(g,
2025 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
2026
2027 return 0;
2028}
2029
1997/* @brief pre-process work on the SM exceptions to determine if we clear them or not. 2030/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1998 * 2031 *
1999 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing 2032 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
@@ -2013,6 +2046,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2013 *early_exit = false; 2046 *early_exit = false;
2014 *ignore_debugger = false; 2047 *ignore_debugger = false;
2015 2048
2049 /*
2050 * We don't need to trigger CILP in case of MMU_NACK
2051 * So just handle MMU_NACK and return
2052 */
2053 if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f())
2054 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
2055 warp_esr, fault_ch);
2056
2016 if (fault_ch) 2057 if (fault_ch)
2017 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == 2058 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
2018 NVGPU_PREEMPTION_MODE_COMPUTE_CILP); 2059 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
@@ -2992,7 +3033,8 @@ void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g)
2992 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | 3033 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
2993 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | 3034 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
2994 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | 3035 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() |
2995 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()); 3036 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f() |
3037 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f());
2996 3038
2997 /* setup sm global esr report mask. vat_alarm_report is not enabled */ 3039 /* setup sm global esr report mask. vat_alarm_report is not enabled */
2998 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), 3040 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(),
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
index 09cbc793..e669c0f0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -3344,6 +3344,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_repor
3344{ 3344{
3345 return 0x400000U; 3345 return 0x400000U;
3346} 3346}
3347static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f(void)
3348{
3349 return 0x4000000U;
3350}
3347static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void) 3351static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void)
3348{ 3352{
3349 return 0x00419d0cU; 3353 return 0x00419d0cU;
@@ -3552,6 +3556,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v(void)
3552{ 3556{
3553 return 0x00000000U; 3557 return 0x00000000U;
3554} 3558}
3559static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
3560{
3561 return 0x20U;
3562}
3555static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void) 3563static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
3556{ 3564{
3557 return 0x0U; 3565 return 0x0U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 1e82456f..2b9bffdd 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -3940,6 +3940,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_repor
3940{ 3940{
3941 return 0x400000U; 3941 return 0x400000U;
3942} 3942}
3943static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f(void)
3944{
3945 return 0x4000000U;
3946}
3943static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void) 3947static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void)
3944{ 3948{
3945 return 0x00419d0cU; 3949 return 0x00419d0cU;
@@ -4240,6 +4244,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
4240{ 4244{
4241 return 0x0U; 4245 return 0x0U;
4242} 4246}
4247static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
4248{
4249 return 0x20U;
4250}
4243static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m(void) 4251static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m(void)
4244{ 4252{
4245 return 0xffU << 16U; 4253 return 0xffU << 16U;