diff options
author | Richard Zhao <rizhao@nvidia.com> | 2016-08-12 20:10:28 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-08-18 18:03:54 -0400 |
commit | 198b895a881ca067a2411b7367579cb1d594ab5a (patch) | |
tree | 90e667382a589de9ba4d97bf511ca6bd42efe03c /drivers | |
parent | deffbf8ee2017d4ea804f35946673dd0f6e0fcf2 (diff) |
gpu: nvgpu: use force_reset_ch in ch wdt handler
- let force_reset_ch pass down err code
- force_reset_ch callback can cover vgpu too.
Bug 1776876
JIRA VFND-2151
Change-Id: I48f7890294c6455247198e0cab5f21f83f61f0e1
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1202255
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 52 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 9 |
5 files changed, 17 insertions, 59 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a8a39302..41fced99 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1721,10 +1721,6 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1721 | struct channel_gk20a_job *job; | 1721 | struct channel_gk20a_job *job; |
1722 | struct gk20a *g; | 1722 | struct gk20a *g; |
1723 | struct channel_gk20a *ch; | 1723 | struct channel_gk20a *ch; |
1724 | struct channel_gk20a *failing_ch; | ||
1725 | u32 engine_id; | ||
1726 | int id = -1; | ||
1727 | bool is_tsg = false; | ||
1728 | 1724 | ||
1729 | ch = container_of(to_delayed_work(work), struct channel_gk20a, | 1725 | ch = container_of(to_delayed_work(work), struct channel_gk20a, |
1730 | timeout.wq); | 1726 | timeout.wq); |
@@ -1746,16 +1742,11 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1746 | ch->timeout.initialized = false; | 1742 | ch->timeout.initialized = false; |
1747 | mutex_unlock(&ch->timeout.lock); | 1743 | mutex_unlock(&ch->timeout.lock); |
1748 | 1744 | ||
1749 | if (gr_gk20a_disable_ctxsw(g)) { | ||
1750 | gk20a_err(dev_from_gk20a(g), "Unable to disable ctxsw!"); | ||
1751 | goto fail_unlock; | ||
1752 | } | ||
1753 | |||
1754 | if (gk20a_fence_is_expired(job->post_fence)) { | 1745 | if (gk20a_fence_is_expired(job->post_fence)) { |
1755 | gk20a_err(dev_from_gk20a(g), | 1746 | gk20a_err(dev_from_gk20a(g), |
1756 | "Timed out fence is expired on c=%d!", | 1747 | "Timed out fence is expired on c=%d!", |
1757 | ch->hw_chid); | 1748 | ch->hw_chid); |
1758 | goto fail_enable_ctxsw; | 1749 | goto fail_unlock; |
1759 | } | 1750 | } |
1760 | 1751 | ||
1761 | gk20a_err(dev_from_gk20a(g), "Confirmed: job on channel %d timed out", | 1752 | gk20a_err(dev_from_gk20a(g), "Confirmed: job on channel %d timed out", |
@@ -1764,43 +1755,9 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1764 | gk20a_debug_dump(g->dev); | 1755 | gk20a_debug_dump(g->dev); |
1765 | gk20a_gr_debug_dump(g->dev); | 1756 | gk20a_gr_debug_dump(g->dev); |
1766 | 1757 | ||
1767 | /* Get failing engine data */ | 1758 | g->ops.fifo.force_reset_ch(ch, |
1768 | engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg); | 1759 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); |
1769 | |||
1770 | if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { | ||
1771 | /* If no failing engine, abort the channels */ | ||
1772 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
1773 | struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; | ||
1774 | |||
1775 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); | ||
1776 | gk20a_fifo_abort_tsg(g, ch->tsgid, false); | ||
1777 | } else { | ||
1778 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); | ||
1779 | gk20a_channel_abort(ch, false); | ||
1780 | } | ||
1781 | } else { | ||
1782 | /* If failing engine, trigger recovery */ | ||
1783 | failing_ch = gk20a_channel_get(&g->fifo.channel[id]); | ||
1784 | if (!failing_ch) | ||
1785 | goto fail_enable_ctxsw; | ||
1786 | |||
1787 | if (failing_ch->hw_chid != ch->hw_chid) { | ||
1788 | gk20a_channel_timeout_start(ch, job); | ||
1789 | |||
1790 | mutex_lock(&failing_ch->timeout.lock); | ||
1791 | failing_ch->timeout.initialized = false; | ||
1792 | mutex_unlock(&failing_ch->timeout.lock); | ||
1793 | } | ||
1794 | |||
1795 | gk20a_fifo_recover(g, BIT(engine_id), | ||
1796 | failing_ch->hw_chid, is_tsg, | ||
1797 | true, failing_ch->timeout_debug_dump); | ||
1798 | |||
1799 | gk20a_channel_put(failing_ch); | ||
1800 | } | ||
1801 | 1760 | ||
1802 | fail_enable_ctxsw: | ||
1803 | gr_gk20a_enable_ctxsw(g); | ||
1804 | fail_unlock: | 1761 | fail_unlock: |
1805 | mutex_unlock(&g->ch_wdt_lock); | 1762 | mutex_unlock(&g->ch_wdt_lock); |
1806 | gk20a_channel_put(ch); | 1763 | gk20a_channel_put(ch); |
@@ -3231,7 +3188,8 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3231 | __func__, cmd); | 3188 | __func__, cmd); |
3232 | break; | 3189 | break; |
3233 | } | 3190 | } |
3234 | err = ch->g->ops.fifo.force_reset_ch(ch, true); | 3191 | err = ch->g->ops.fifo.force_reset_ch(ch, |
3192 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true); | ||
3235 | gk20a_idle(dev); | 3193 | gk20a_idle(dev); |
3236 | break; | 3194 | break; |
3237 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL: | 3195 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL: |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index b025f4d6..bd31656f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -1746,7 +1746,8 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | |||
1746 | } | 1746 | } |
1747 | 1747 | ||
1748 | /* force reset channel and tsg (if it's part of one) */ | 1748 | /* force reset channel and tsg (if it's part of one) */ |
1749 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) | 1749 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, |
1750 | u32 err_code, bool verbose) | ||
1750 | { | 1751 | { |
1751 | struct tsg_gk20a *tsg = NULL; | 1752 | struct tsg_gk20a *tsg = NULL; |
1752 | struct channel_gk20a *ch_tsg = NULL; | 1753 | struct channel_gk20a *ch_tsg = NULL; |
@@ -1759,8 +1760,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) | |||
1759 | 1760 | ||
1760 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { | 1761 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { |
1761 | if (gk20a_channel_get(ch_tsg)) { | 1762 | if (gk20a_channel_get(ch_tsg)) { |
1762 | gk20a_set_error_notifier(ch_tsg, | 1763 | gk20a_set_error_notifier(ch_tsg, err_code); |
1763 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | ||
1764 | gk20a_channel_put(ch_tsg); | 1764 | gk20a_channel_put(ch_tsg); |
1765 | } | 1765 | } |
1766 | } | 1766 | } |
@@ -1768,8 +1768,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) | |||
1768 | mutex_unlock(&tsg->ch_list_lock); | 1768 | mutex_unlock(&tsg->ch_list_lock); |
1769 | gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); | 1769 | gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); |
1770 | } else { | 1770 | } else { |
1771 | gk20a_set_error_notifier(ch, | 1771 | gk20a_set_error_notifier(ch, err_code); |
1772 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | ||
1773 | gk20a_fifo_recover_ch(g, ch->hw_chid, verbose); | 1772 | gk20a_fifo_recover_ch(g, ch->hw_chid, verbose); |
1774 | } | 1773 | } |
1775 | 1774 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index f5a73a12..17c6dbf6 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -197,7 +197,8 @@ void gk20a_fifo_recover(struct gk20a *g, | |||
197 | bool id_is_known, bool verbose); | 197 | bool id_is_known, bool verbose); |
198 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); | 198 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); |
199 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); | 199 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); |
200 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); | 200 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, |
201 | u32 err_code, bool verbose); | ||
201 | void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); | 202 | void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); |
202 | int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); | 203 | int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); |
203 | void gk20a_init_fifo(struct gpu_ops *gops); | 204 | void gk20a_init_fifo(struct gpu_ops *gops); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 6f735af9..463317e3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -363,7 +363,8 @@ struct gpu_ops { | |||
363 | int (*channel_set_timeslice)(struct channel_gk20a *ch, | 363 | int (*channel_set_timeslice)(struct channel_gk20a *ch, |
364 | u32 timeslice); | 364 | u32 timeslice); |
365 | int (*tsg_set_timeslice)(struct tsg_gk20a *tsg, u32 timeslice); | 365 | int (*tsg_set_timeslice)(struct tsg_gk20a *tsg, u32 timeslice); |
366 | int (*force_reset_ch)(struct channel_gk20a *ch, bool verbose); | 366 | int (*force_reset_ch)(struct channel_gk20a *ch, |
367 | u32 err_code, bool verbose); | ||
367 | int (*engine_enum_from_type)(struct gk20a *g, u32 engine_type, | 368 | int (*engine_enum_from_type)(struct gk20a *g, u32 engine_type, |
368 | u32 *inst_id); | 369 | u32 *inst_id); |
369 | void (*device_info_data_parse)(struct gk20a *g, | 370 | void (*device_info_data_parse)(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 9a8c319b..90e44e8c 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -634,7 +634,8 @@ static int vgpu_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice) | |||
634 | return err ? err : msg.ret; | 634 | return err ? err : msg.ret; |
635 | } | 635 | } |
636 | 636 | ||
637 | static int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) | 637 | static int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, |
638 | u32 err_code, bool verbose) | ||
638 | { | 639 | { |
639 | struct tsg_gk20a *tsg = NULL; | 640 | struct tsg_gk20a *tsg = NULL; |
640 | struct channel_gk20a *ch_tsg = NULL; | 641 | struct channel_gk20a *ch_tsg = NULL; |
@@ -653,16 +654,14 @@ static int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) | |||
653 | 654 | ||
654 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { | 655 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { |
655 | if (gk20a_channel_get(ch_tsg)) { | 656 | if (gk20a_channel_get(ch_tsg)) { |
656 | gk20a_set_error_notifier(ch_tsg, | 657 | gk20a_set_error_notifier(ch_tsg, err_code); |
657 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | ||
658 | gk20a_channel_put(ch_tsg); | 658 | gk20a_channel_put(ch_tsg); |
659 | } | 659 | } |
660 | } | 660 | } |
661 | 661 | ||
662 | mutex_unlock(&tsg->ch_list_lock); | 662 | mutex_unlock(&tsg->ch_list_lock); |
663 | } else { | 663 | } else { |
664 | gk20a_set_error_notifier(ch, | 664 | gk20a_set_error_notifier(ch, err_code); |
665 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | ||
666 | } | 665 | } |
667 | 666 | ||
668 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET; | 667 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET; |