diff options
author | Seema Khowala <seemaj@nvidia.com> | 2017-03-22 12:24:19 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-23 20:18:28 -0400 |
commit | 17df1921807a190d24dbd5b0e0f78192c2e3b772 (patch) | |
tree | 9f76ed1e5762e1e2cf57a374fb6cd39facf50af4 /drivers/gpu | |
parent | df94d474a8200fc61969e2fc35d1b2a8d7fa5b8c (diff) |
gpu: nvgpu: gr faults: do not depend on fake mmu fault notifier
Currently NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT is being set in
error notifier for non mmu fault too. For fake mmu faults i.e.
trigger mmu fault cases, make sure proper notifiers are set and
driver is not depending on sending mmu error fault notifier.
This change is needed for t19x fifo recovery too.
NVGPU_CHANNEL_GR_ERROR_SW_METHOD (12), NVGPU_CHANNEL_GR_EXCEPTION(13)
and NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD (37) are new error
notifiers.
JIRA GPUT19X-7
Change-Id: Idee83e842c835bdba9eb18578aad0c372ea74c5d
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: http://git-master/r/1310563
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 101 |
2 files changed, 69 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index b4589eaa..ad69cd79 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -3272,7 +3272,10 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | |||
3272 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | 3272 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, |
3273 | u32 hw_chid) | 3273 | u32 hw_chid) |
3274 | { | 3274 | { |
3275 | return g->fifo.channel + hw_chid; | 3275 | if (hw_chid != FIFO_INVAL_CHANNEL_ID) |
3276 | return g->fifo.channel + hw_chid; | ||
3277 | else | ||
3278 | return NULL; | ||
3276 | } | 3279 | } |
3277 | 3280 | ||
3278 | #ifdef CONFIG_DEBUG_FS | 3281 | #ifdef CONFIG_DEBUG_FS |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 9f527edd..5121d6e9 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -5555,14 +5555,40 @@ fail: | |||
5555 | return -EINVAL; | 5555 | return -EINVAL; |
5556 | } | 5556 | } |
5557 | 5557 | ||
5558 | static void gk20a_gr_set_error_notifier(struct gk20a *g, | ||
5559 | struct gr_gk20a_isr_data *isr_data, u32 error_notifier) | ||
5560 | { | ||
5561 | struct fifo_gk20a *f = &g->fifo; | ||
5562 | struct channel_gk20a *ch; | ||
5563 | struct tsg_gk20a *tsg; | ||
5564 | struct channel_gk20a *ch_tsg; | ||
5565 | |||
5566 | if (isr_data->chid != FIFO_INVAL_CHANNEL_ID) { | ||
5567 | ch = &f->channel[isr_data->chid]; | ||
5568 | |||
5569 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
5570 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
5571 | down_read(&tsg->ch_list_lock); | ||
5572 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { | ||
5573 | if (gk20a_channel_get(ch_tsg)) { | ||
5574 | gk20a_set_error_notifier(ch_tsg, | ||
5575 | error_notifier); | ||
5576 | gk20a_channel_put(ch_tsg); | ||
5577 | } | ||
5578 | } | ||
5579 | up_read(&tsg->ch_list_lock); | ||
5580 | } else { | ||
5581 | gk20a_set_error_notifier(ch, error_notifier); | ||
5582 | } | ||
5583 | } | ||
5584 | } | ||
5585 | |||
5558 | static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, | 5586 | static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, |
5559 | struct gr_gk20a_isr_data *isr_data) | 5587 | struct gr_gk20a_isr_data *isr_data) |
5560 | { | 5588 | { |
5561 | struct fifo_gk20a *f = &g->fifo; | ||
5562 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
5563 | gk20a_dbg_fn(""); | 5589 | gk20a_dbg_fn(""); |
5564 | gk20a_set_error_notifier(ch, | 5590 | gk20a_gr_set_error_notifier(g, isr_data, |
5565 | NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); | 5591 | NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); |
5566 | gk20a_err(dev_from_gk20a(g), | 5592 | gk20a_err(dev_from_gk20a(g), |
5567 | "gr semaphore timeout\n"); | 5593 | "gr semaphore timeout\n"); |
5568 | return -EINVAL; | 5594 | return -EINVAL; |
@@ -5571,11 +5597,9 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, | |||
5571 | static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, | 5597 | static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, |
5572 | struct gr_gk20a_isr_data *isr_data) | 5598 | struct gr_gk20a_isr_data *isr_data) |
5573 | { | 5599 | { |
5574 | struct fifo_gk20a *f = &g->fifo; | ||
5575 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
5576 | gk20a_dbg_fn(""); | 5600 | gk20a_dbg_fn(""); |
5577 | gk20a_set_error_notifier(ch, | 5601 | gk20a_gr_set_error_notifier(g, isr_data, |
5578 | NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); | 5602 | NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); |
5579 | /* This is an unrecoverable error, reset is needed */ | 5603 | /* This is an unrecoverable error, reset is needed */ |
5580 | gk20a_err(dev_from_gk20a(g), | 5604 | gk20a_err(dev_from_gk20a(g), |
5581 | "gr semaphore timeout\n"); | 5605 | "gr semaphore timeout\n"); |
@@ -5588,22 +5612,22 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g, | |||
5588 | int ret = g->ops.gr.handle_sw_method(g, isr_data->addr, | 5612 | int ret = g->ops.gr.handle_sw_method(g, isr_data->addr, |
5589 | isr_data->class_num, isr_data->offset, | 5613 | isr_data->class_num, isr_data->offset, |
5590 | isr_data->data_lo); | 5614 | isr_data->data_lo); |
5591 | if (ret) | 5615 | if (ret) { |
5616 | gk20a_gr_set_error_notifier(g, isr_data, | ||
5617 | NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); | ||
5592 | gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x" | 5618 | gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x" |
5593 | ", offset 0x%08x address 0x%08x\n", | 5619 | ", offset 0x%08x address 0x%08x\n", |
5594 | isr_data->class_num, isr_data->offset, isr_data->addr); | 5620 | isr_data->class_num, isr_data->offset, isr_data->addr); |
5595 | 5621 | } | |
5596 | return ret; | 5622 | return ret; |
5597 | } | 5623 | } |
5598 | 5624 | ||
5599 | static int gk20a_gr_handle_illegal_class(struct gk20a *g, | 5625 | static int gk20a_gr_handle_illegal_class(struct gk20a *g, |
5600 | struct gr_gk20a_isr_data *isr_data) | 5626 | struct gr_gk20a_isr_data *isr_data) |
5601 | { | 5627 | { |
5602 | struct fifo_gk20a *f = &g->fifo; | ||
5603 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
5604 | gk20a_dbg_fn(""); | 5628 | gk20a_dbg_fn(""); |
5605 | gk20a_set_error_notifier(ch, | 5629 | gk20a_gr_set_error_notifier(g, isr_data, |
5606 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); | 5630 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); |
5607 | gk20a_err(dev_from_gk20a(g), | 5631 | gk20a_err(dev_from_gk20a(g), |
5608 | "invalid class 0x%08x, offset 0x%08x", | 5632 | "invalid class 0x%08x, offset 0x%08x", |
5609 | isr_data->class_num, isr_data->offset); | 5633 | isr_data->class_num, isr_data->offset); |
@@ -5626,6 +5650,8 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, | |||
5626 | gr_fecs_intr, isr_data->chid); | 5650 | gr_fecs_intr, isr_data->chid); |
5627 | 5651 | ||
5628 | if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { | 5652 | if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { |
5653 | gk20a_gr_set_error_notifier(g, isr_data, | ||
5654 | NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD); | ||
5629 | gk20a_err(dev_from_gk20a(g), | 5655 | gk20a_err(dev_from_gk20a(g), |
5630 | "firmware method error 0x%08x for offset 0x%04x", | 5656 | "firmware method error 0x%08x for offset 0x%04x", |
5631 | gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)), | 5657 | gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)), |
@@ -5640,35 +5666,34 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, | |||
5640 | static int gk20a_gr_handle_class_error(struct gk20a *g, | 5666 | static int gk20a_gr_handle_class_error(struct gk20a *g, |
5641 | struct gr_gk20a_isr_data *isr_data) | 5667 | struct gr_gk20a_isr_data *isr_data) |
5642 | { | 5668 | { |
5643 | struct fifo_gk20a *f = &g->fifo; | 5669 | u32 gr_class_error; |
5644 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | 5670 | |
5645 | u32 gr_class_error = | ||
5646 | gr_class_error_code_v(gk20a_readl(g, gr_class_error_r())); | ||
5647 | gk20a_dbg_fn(""); | 5671 | gk20a_dbg_fn(""); |
5648 | 5672 | ||
5649 | gk20a_set_error_notifier(ch, | 5673 | gr_class_error = |
5650 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); | 5674 | gr_class_error_code_v(gk20a_readl(g, gr_class_error_r())); |
5675 | gk20a_gr_set_error_notifier(g, isr_data, | ||
5676 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); | ||
5651 | gk20a_err(dev_from_gk20a(g), | 5677 | gk20a_err(dev_from_gk20a(g), |
5652 | "class error 0x%08x, offset 0x%08x, unhandled intr 0x%08x for channel %u\n", | 5678 | "class error 0x%08x, offset 0x%08x," |
5679 | " unhandled intr 0x%08x for channel %u\n", | ||
5653 | isr_data->class_num, isr_data->offset, | 5680 | isr_data->class_num, isr_data->offset, |
5654 | gr_class_error, ch->hw_chid); | 5681 | gr_class_error, isr_data->chid); |
5682 | |||
5655 | return -EINVAL; | 5683 | return -EINVAL; |
5656 | } | 5684 | } |
5657 | 5685 | ||
5658 | static int gk20a_gr_handle_firmware_method(struct gk20a *g, | 5686 | static int gk20a_gr_handle_firmware_method(struct gk20a *g, |
5659 | struct gr_gk20a_isr_data *isr_data) | 5687 | struct gr_gk20a_isr_data *isr_data) |
5660 | { | 5688 | { |
5661 | struct fifo_gk20a *f = &g->fifo; | ||
5662 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
5663 | |||
5664 | gk20a_dbg_fn(""); | 5689 | gk20a_dbg_fn(""); |
5665 | 5690 | ||
5666 | gk20a_set_error_notifier(ch, | 5691 | gk20a_gr_set_error_notifier(g, isr_data, |
5667 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); | 5692 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); |
5668 | gk20a_err(dev_from_gk20a(g), | 5693 | gk20a_err(dev_from_gk20a(g), |
5669 | "firmware method 0x%08x, offset 0x%08x for channel %u\n", | 5694 | "firmware method 0x%08x, offset 0x%08x for channel %u\n", |
5670 | isr_data->class_num, isr_data->offset, | 5695 | isr_data->class_num, isr_data->offset, |
5671 | ch->hw_chid); | 5696 | isr_data->chid); |
5672 | return -EINVAL; | 5697 | return -EINVAL; |
5673 | } | 5698 | } |
5674 | 5699 | ||
@@ -6404,7 +6429,7 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6404 | if (ch) | 6429 | if (ch) |
6405 | isr_data.chid = ch->hw_chid; | 6430 | isr_data.chid = ch->hw_chid; |
6406 | else | 6431 | else |
6407 | isr_data.chid = 0xffffffff; | 6432 | isr_data.chid = FIFO_INVAL_CHANNEL_ID; |
6408 | 6433 | ||
6409 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 6434 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
6410 | "channel %d: addr 0x%08x, " | 6435 | "channel %d: addr 0x%08x, " |
@@ -6507,24 +6532,22 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6507 | if (exception & gr_exception_gpc_m() && need_reset == 0) { | 6532 | if (exception & gr_exception_gpc_m() && need_reset == 0) { |
6508 | bool post_event = false; | 6533 | bool post_event = false; |
6509 | 6534 | ||
6510 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending"); | 6535 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
6511 | 6536 | "GPC exception pending"); | |
6512 | 6537 | ||
6513 | fault_ch = gk20a_fifo_channel_from_hw_chid(g, | 6538 | fault_ch = gk20a_fifo_channel_from_hw_chid(g, |
6514 | isr_data.chid); | 6539 | isr_data.chid); |
6515 | 6540 | ||
6541 | /*isr_data.chid can be ~0 and fault_ch can be NULL */ | ||
6516 | /* check if any gpc has an exception */ | 6542 | /* check if any gpc has an exception */ |
6517 | need_reset |= gk20a_gr_handle_gpc_exception(g, | 6543 | need_reset |= gk20a_gr_handle_gpc_exception(g, |
6518 | &post_event, fault_ch, &global_esr); | 6544 | &post_event, fault_ch, &global_esr); |
6519 | 6545 | ||
6520 | /* signal clients waiting on an event */ | 6546 | /* signal clients waiting on an event */ |
6521 | if (gk20a_gr_sm_debugger_attached(g) && post_event && fault_ch) { | 6547 | if (gk20a_gr_sm_debugger_attached(g) && |
6548 | post_event && fault_ch) { | ||
6522 | gk20a_dbg_gpu_post_events(fault_ch); | 6549 | gk20a_dbg_gpu_post_events(fault_ch); |
6523 | } | 6550 | } |
6524 | |||
6525 | if (need_reset && ch) | ||
6526 | gk20a_set_error_notifier(ch, | ||
6527 | NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); | ||
6528 | } | 6551 | } |
6529 | 6552 | ||
6530 | if (exception & gr_exception_ds_m()) { | 6553 | if (exception & gr_exception_ds_m()) { |
@@ -6536,6 +6559,12 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6536 | 6559 | ||
6537 | gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); | 6560 | gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); |
6538 | gr_intr &= ~gr_intr_exception_pending_f(); | 6561 | gr_intr &= ~gr_intr_exception_pending_f(); |
6562 | |||
6563 | if (need_reset) { | ||
6564 | gk20a_err(dev, "set gr exception notifier"); | ||
6565 | gk20a_gr_set_error_notifier(g, &isr_data, | ||
6566 | NVGPU_CHANNEL_GR_EXCEPTION); | ||
6567 | } | ||
6539 | } | 6568 | } |
6540 | 6569 | ||
6541 | if (need_reset) { | 6570 | if (need_reset) { |