summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c43
2 files changed, 72 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 6d89940a..b96372b4 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1943,14 +1943,42 @@ void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
1943void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg, 1943void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1944 bool verbose, u32 rc_type) 1944 bool verbose, u32 rc_type)
1945{ 1945{
1946 u32 engines; 1946 u32 engines = 0U;
1947 int err;
1947 1948
1948 /* stop context switching to prevent engine assignments from 1949 /* stop context switching to prevent engine assignments from
1949 changing until TSG is recovered */ 1950 changing until TSG is recovered */
1950 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 1951 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1951 gr_gk20a_disable_ctxsw(g);
1952 1952
1953 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true); 1953 /* disable tsg so that it does not get scheduled again */
1954 g->ops.fifo.disable_tsg(tsg);
1955
1956 /*
1957 * stop context switching to prevent engine assignments from
1958 * changing until engine status is checked to make sure tsg
1959 * being recovered is not loaded on the engines
1960 */
1961 err = gr_gk20a_disable_ctxsw(g);
1962
1963 if (err != 0) {
1964 /* if failed to disable ctxsw, just abort tsg */
1965 nvgpu_err(g, "failed to disable ctxsw");
1966 } else {
1967 /* recover engines if tsg is loaded on the engines */
1968 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
1969
1970 /*
1971 * it is ok to enable ctxsw before tsg is recovered. If engines
1972 * is 0, no engine recovery is needed and if it is non zero,
1973 * gk20a_fifo_recover will call get_engines_mask_on_id again.
1974 * By that time if tsg is not on the engine, engine need not
1975 * be reset.
1976 */
1977 err = gr_gk20a_enable_ctxsw(g);
1978 if (err != 0) {
1979 nvgpu_err(g, "failed to enable ctxsw");
1980 }
1981 }
1954 1982
1955 if (engines) { 1983 if (engines) {
1956 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose, 1984 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
@@ -1963,7 +1991,6 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1963 gk20a_fifo_abort_tsg(g, tsg, false); 1991 gk20a_fifo_abort_tsg(g, tsg, false);
1964 } 1992 }
1965 1993
1966 gr_gk20a_enable_ctxsw(g);
1967 nvgpu_mutex_release(&g->dbg_sessions_lock); 1994 nvgpu_mutex_release(&g->dbg_sessions_lock);
1968} 1995}
1969 1996
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index a4c1ce58..788ebf45 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -628,7 +628,14 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
628 .cond.fail = GR_IS_UCODE_OP_EQUAL }, true); 628 .cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
629} 629}
630 630
631/* Stop processing (stall) context switches at FECS. */ 631/**
632 * Stop processing (stall) context switches at FECS:-
633 * If fecs is sent stop_ctxsw method, elpg entry/exit cannot happen
634 * and may timeout. It could manifest as different error signatures
635 * depending on when stop_ctxsw fecs method gets sent with respect
636 * to pmu elpg sequence. It could come as pmu halt or abort or
637 * maybe ext error too.
638*/
632int gr_gk20a_disable_ctxsw(struct gk20a *g) 639int gr_gk20a_disable_ctxsw(struct gk20a *g)
633{ 640{
634 int err = 0; 641 int err = 0;
@@ -638,8 +645,24 @@ int gr_gk20a_disable_ctxsw(struct gk20a *g)
638 nvgpu_mutex_acquire(&g->ctxsw_disable_lock); 645 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
639 g->ctxsw_disable_count++; 646 g->ctxsw_disable_count++;
640 if (g->ctxsw_disable_count == 1) { 647 if (g->ctxsw_disable_count == 1) {
641 err = gr_gk20a_ctrl_ctxsw(g, 648 err = nvgpu_pg_elpg_disable(g);
649 if (err != 0) {
650 nvgpu_err(g, "failed to disable elpg. not safe to "
651 "stop_ctxsw");
652 /* stop ctxsw command is not sent */
653 g->ctxsw_disable_count--;
654 } else {
655 err = gr_gk20a_ctrl_ctxsw(g,
642 gr_fecs_method_push_adr_stop_ctxsw_v(), NULL); 656 gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
657 if (err != 0) {
658 nvgpu_err(g, "failed to stop fecs ctxsw");
659 /* stop ctxsw failed */
660 g->ctxsw_disable_count--;
661 }
662 }
663 } else {
664 nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d",
665 g->ctxsw_disable_count);
643 } 666 }
644 nvgpu_mutex_release(&g->ctxsw_disable_lock); 667 nvgpu_mutex_release(&g->ctxsw_disable_lock);
645 668
@@ -654,12 +677,28 @@ int gr_gk20a_enable_ctxsw(struct gk20a *g)
654 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); 677 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
655 678
656 nvgpu_mutex_acquire(&g->ctxsw_disable_lock); 679 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
680
681 if (g->ctxsw_disable_count == 0) {
682 goto ctxsw_already_enabled;
683 }
657 g->ctxsw_disable_count--; 684 g->ctxsw_disable_count--;
658 WARN_ON(g->ctxsw_disable_count < 0); 685 WARN_ON(g->ctxsw_disable_count < 0);
659 if (g->ctxsw_disable_count == 0) { 686 if (g->ctxsw_disable_count == 0) {
660 err = gr_gk20a_ctrl_ctxsw(g, 687 err = gr_gk20a_ctrl_ctxsw(g,
661 gr_fecs_method_push_adr_start_ctxsw_v(), NULL); 688 gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
689 if (err != 0) {
690 nvgpu_err(g, "failed to start fecs ctxsw");
691 } else {
692 if (nvgpu_pg_elpg_enable(g) != 0) {
693 nvgpu_err(g, "failed to enable elpg "
694 "after start_ctxsw");
695 }
696 }
697 } else {
698 nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet",
699 g->ctxsw_disable_count);
662 } 700 }
701ctxsw_already_enabled:
663 nvgpu_mutex_release(&g->ctxsw_disable_lock); 702 nvgpu_mutex_release(&g->ctxsw_disable_lock);
664 703
665 return err; 704 return err;