From 226c671f8e99e7ed274c5c630090c6190a1367a5 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Thu, 12 Feb 2015 10:53:26 -0800 Subject: gpu: nvgpu: More robust recovery Make recovery a more straightforward process. When we detect a fault, trigger MMU fault, and wait for it to trigger, and complete recovery. Also reset engines before aborting channel to ensure no stray sync point increments can happen. Change-Id: Iac685db6534cb64fe62d9fb452391f43100f2999 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/709060 (cherry picked from commit 95c62ffd9ac30a0d2eb88d033dcc6e6ff25efd6f) Reviewed-on: http://git-master/r/707443 --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 81 +++++++++++++++------------------ drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h | 14 +++++- 4 files changed, 51 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a32496a2..4e68fe67 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -570,7 +570,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) if (g->fifo.deferred_reset_pending) { gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" " deferred, running now"); - fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines); + gk20a_fifo_reset_engine(g, g->fifo.mmu_fault_engines); g->fifo.mmu_fault_engines = 0; g->fifo.deferred_reset_pending = false; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index a872e304..18928142 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -827,7 +827,7 @@ static inline void get_exception_mmu_fault_info( f->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v(); } -static void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) +void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) { gk20a_dbg_fn(""); @@ -877,34 +877,6 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, return true; } -void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, - unsigned long fault_id) { - u32 engine_mmu_id; - - /* reset engines */ - for_each_set_bit(engine_mmu_id, &fault_id, 32) { - u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); - if (engine_id != ~0) - gk20a_fifo_reset_engine(g, engine_id); - } - - /* clear interrupt */ - gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id); - - /* resume scheduler */ - gk20a_writel(g, fifo_error_sched_disable_r(), - gk20a_readl(g, fifo_error_sched_disable_r())); - - /* Re-enable fifo access */ - gk20a_writel(g, gr_gpfifo_ctl_r(), - gr_gpfifo_ctl_access_enabled_f() | - gr_gpfifo_ctl_semaphore_access_enabled_f()); - - /* It is safe to enable ELPG again. */ - if (support_gk20a_pmu(g->dev) && g->elpg_enabled) - gk20a_pmu_enable_elpg(g); -} - static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, struct channel_gk20a *ch) { @@ -1083,10 +1055,12 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) /* handled during channel free */ g->fifo.deferred_reset_pending = true; - } + } else if (engine_id != ~0) + gk20a_fifo_reset_engine(g, engine_id); /* disable the channel/TSG from hw and increment * syncpoints */ + if (tsg) { struct channel_gk20a *ch = NULL; if (!g->fifo.deferred_reset_pending) @@ -1119,9 +1093,21 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) return verbose; } - /* resetting the engines and clearing the runlists is done in - a separate function to allow deferred reset. */ - fifo_gk20a_finish_mmu_fault_handling(g, fault_id); + /* clear interrupt */ + gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id); + + /* resume scheduler */ + gk20a_writel(g, fifo_error_sched_disable_r(), + gk20a_readl(g, fifo_error_sched_disable_r())); + + /* Re-enable fifo access */ + gk20a_writel(g, gr_gpfifo_ctl_r(), + gr_gpfifo_ctl_access_enabled_f() | + gr_gpfifo_ctl_semaphore_access_enabled_f()); + + /* It is safe to enable ELPG again. */ + if (support_gk20a_pmu(g->dev) && g->elpg_enabled) + gk20a_pmu_enable_elpg(g); return verbose; } @@ -1152,15 +1138,6 @@ static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g, unsigned long engine_id; int ret; - /* - * sched error prevents recovery, and ctxsw error will retrigger - * every 100ms. Disable the sched error to allow recovery. - */ - gk20a_writel(g, fifo_intr_en_0_r(), - 0x7FFFFFFF & ~fifo_intr_en_0_sched_error_m()); - gk20a_writel(g, fifo_intr_0_r(), - fifo_intr_0_sched_error_reset_f()); - /* trigger faults for all bad engines */ for_each_set_bit(engine_id, &engine_ids, 32) { if (engine_id > g->fifo.max_engines) { @@ -1194,9 +1171,6 @@ static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g, /* release mmu fault trigger */ for_each_set_bit(engine_id, &engine_ids, 32) gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0); - - /* Re-enable sched error */ - gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF); } static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) @@ -1272,6 +1246,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, unsigned long engine_id, i; unsigned long _engine_ids = __engine_ids; unsigned long engine_ids = 0; + u32 val; if (verbose) gk20a_debug_dump(g->dev); @@ -1302,7 +1277,23 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, } + /* + * sched error prevents recovery, and ctxsw error will retrigger + * every 100ms. Disable the sched error to allow recovery. + */ + val = gk20a_readl(g, fifo_intr_en_0_r()); + val &= ~(fifo_intr_en_0_sched_error_m() | fifo_intr_en_0_mmu_fault_m()); + gk20a_writel(g, fifo_intr_en_0_r(), val); + gk20a_writel(g, fifo_intr_0_r(), + fifo_intr_0_sched_error_reset_f()); + g->ops.fifo.trigger_mmu_fault(g, engine_ids); + gk20a_fifo_handle_mmu_fault(g); + + val = gk20a_readl(g, fifo_intr_en_0_r()); + val |= fifo_intr_en_0_mmu_fault_f(1) + | fifo_intr_en_0_sched_error_f(1); + gk20a_writel(g, fifo_intr_en_0_r(), val); } int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index ecae970f..8fda38f5 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -162,6 +162,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); +void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); void gk20a_init_fifo(struct gpu_ops *gops); diff --git a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h index 757ae3f0..a131972e 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -246,10 +246,22 @@ static inline u32 fifo_intr_en_0_r(void) { return 0x00002140; } +static inline u32 fifo_intr_en_0_sched_error_f(u32 v) +{ + return (v & 0x1) << 8; +} static inline u32 fifo_intr_en_0_sched_error_m(void) { return 0x1 << 8; } +static inline u32 fifo_intr_en_0_mmu_fault_f(u32 v) +{ + return (v & 0x1) << 28; +} +static inline u32 fifo_intr_en_0_mmu_fault_m(void) +{ + return 0x1 << 28; +} static inline u32 fifo_intr_en_1_r(void) { return 0x00002528; -- cgit v1.2.2