diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 35 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 43 |
2 files changed, 72 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 6d89940a..b96372b4 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -1943,14 +1943,42 @@ void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch, | |||
1943 | void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg, | 1943 | void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg, |
1944 | bool verbose, u32 rc_type) | 1944 | bool verbose, u32 rc_type) |
1945 | { | 1945 | { |
1946 | u32 engines; | 1946 | u32 engines = 0U; |
1947 | int err; | ||
1947 | 1948 | ||
1948 | /* stop context switching to prevent engine assignments from | 1949 | /* stop context switching to prevent engine assignments from |
1949 | changing until TSG is recovered */ | 1950 | changing until TSG is recovered */ |
1950 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 1951 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
1951 | gr_gk20a_disable_ctxsw(g); | ||
1952 | 1952 | ||
1953 | engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true); | 1953 | /* disable tsg so that it does not get scheduled again */ |
1954 | g->ops.fifo.disable_tsg(tsg); | ||
1955 | |||
1956 | /* | ||
1957 | * stop context switching to prevent engine assignments from | ||
1958 | * changing until engine status is checked to make sure tsg | ||
1959 | * being recovered is not loaded on the engines | ||
1960 | */ | ||
1961 | err = gr_gk20a_disable_ctxsw(g); | ||
1962 | |||
1963 | if (err != 0) { | ||
1964 | /* if failed to disable ctxsw, just abort tsg */ | ||
1965 | nvgpu_err(g, "failed to disable ctxsw"); | ||
1966 | } else { | ||
1967 | /* recover engines if tsg is loaded on the engines */ | ||
1968 | engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true); | ||
1969 | |||
1970 | /* | ||
1971 | * it is ok to enable ctxsw before tsg is recovered. If engines | ||
1972 | * is 0, no engine recovery is needed and if it is non zero, | ||
1973 | * gk20a_fifo_recover will call get_engines_mask_on_id again. | ||
1974 | * By that time if tsg is not on the engine, engine need not | ||
1975 | * be reset. | ||
1976 | */ | ||
1977 | err = gr_gk20a_enable_ctxsw(g); | ||
1978 | if (err != 0) { | ||
1979 | nvgpu_err(g, "failed to enable ctxsw"); | ||
1980 | } | ||
1981 | } | ||
1954 | 1982 | ||
1955 | if (engines) { | 1983 | if (engines) { |
1956 | gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose, | 1984 | gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose, |
@@ -1963,7 +1991,6 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg, | |||
1963 | gk20a_fifo_abort_tsg(g, tsg, false); | 1991 | gk20a_fifo_abort_tsg(g, tsg, false); |
1964 | } | 1992 | } |
1965 | 1993 | ||
1966 | gr_gk20a_enable_ctxsw(g); | ||
1967 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 1994 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
1968 | } | 1995 | } |
1969 | 1996 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a4c1ce58..788ebf45 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -628,7 +628,14 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret) | |||
628 | .cond.fail = GR_IS_UCODE_OP_EQUAL }, true); | 628 | .cond.fail = GR_IS_UCODE_OP_EQUAL }, true); |
629 | } | 629 | } |
630 | 630 | ||
631 | /* Stop processing (stall) context switches at FECS. */ | 631 | /** |
632 | * Stop processing (stall) context switches at FECS:- | ||
633 | * If fecs is sent stop_ctxsw method, elpg entry/exit cannot happen | ||
634 | * and may timeout. It could manifest as different error signatures | ||
635 | * depending on when stop_ctxsw fecs method gets sent with respect | ||
636 | * to pmu elpg sequence. It could come as pmu halt or abort or | ||
637 | * maybe ext error too. | ||
638 | */ | ||
632 | int gr_gk20a_disable_ctxsw(struct gk20a *g) | 639 | int gr_gk20a_disable_ctxsw(struct gk20a *g) |
633 | { | 640 | { |
634 | int err = 0; | 641 | int err = 0; |
@@ -638,8 +645,24 @@ int gr_gk20a_disable_ctxsw(struct gk20a *g) | |||
638 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); | 645 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); |
639 | g->ctxsw_disable_count++; | 646 | g->ctxsw_disable_count++; |
640 | if (g->ctxsw_disable_count == 1) { | 647 | if (g->ctxsw_disable_count == 1) { |
641 | err = gr_gk20a_ctrl_ctxsw(g, | 648 | err = nvgpu_pg_elpg_disable(g); |
649 | if (err != 0) { | ||
650 | nvgpu_err(g, "failed to disable elpg. not safe to " | ||
651 | "stop_ctxsw"); | ||
652 | /* stop ctxsw command is not sent */ | ||
653 | g->ctxsw_disable_count--; | ||
654 | } else { | ||
655 | err = gr_gk20a_ctrl_ctxsw(g, | ||
642 | gr_fecs_method_push_adr_stop_ctxsw_v(), NULL); | 656 | gr_fecs_method_push_adr_stop_ctxsw_v(), NULL); |
657 | if (err != 0) { | ||
658 | nvgpu_err(g, "failed to stop fecs ctxsw"); | ||
659 | /* stop ctxsw failed */ | ||
660 | g->ctxsw_disable_count--; | ||
661 | } | ||
662 | } | ||
663 | } else { | ||
664 | nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d", | ||
665 | g->ctxsw_disable_count); | ||
643 | } | 666 | } |
644 | nvgpu_mutex_release(&g->ctxsw_disable_lock); | 667 | nvgpu_mutex_release(&g->ctxsw_disable_lock); |
645 | 668 | ||
@@ -654,12 +677,28 @@ int gr_gk20a_enable_ctxsw(struct gk20a *g) | |||
654 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | 677 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); |
655 | 678 | ||
656 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); | 679 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); |
680 | |||
681 | if (g->ctxsw_disable_count == 0) { | ||
682 | goto ctxsw_already_enabled; | ||
683 | } | ||
657 | g->ctxsw_disable_count--; | 684 | g->ctxsw_disable_count--; |
658 | WARN_ON(g->ctxsw_disable_count < 0); | 685 | WARN_ON(g->ctxsw_disable_count < 0); |
659 | if (g->ctxsw_disable_count == 0) { | 686 | if (g->ctxsw_disable_count == 0) { |
660 | err = gr_gk20a_ctrl_ctxsw(g, | 687 | err = gr_gk20a_ctrl_ctxsw(g, |
661 | gr_fecs_method_push_adr_start_ctxsw_v(), NULL); | 688 | gr_fecs_method_push_adr_start_ctxsw_v(), NULL); |
689 | if (err != 0) { | ||
690 | nvgpu_err(g, "failed to start fecs ctxsw"); | ||
691 | } else { | ||
692 | if (nvgpu_pg_elpg_enable(g) != 0) { | ||
693 | nvgpu_err(g, "failed to enable elpg " | ||
694 | "after start_ctxsw"); | ||
695 | } | ||
696 | } | ||
697 | } else { | ||
698 | nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet", | ||
699 | g->ctxsw_disable_count); | ||
662 | } | 700 | } |
701 | ctxsw_already_enabled: | ||
663 | nvgpu_mutex_release(&g->ctxsw_disable_lock); | 702 | nvgpu_mutex_release(&g->ctxsw_disable_lock); |
664 | 703 | ||
665 | return err; | 704 | return err; |