summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-03-22 12:24:19 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-23 20:18:28 -0400
commit17df1921807a190d24dbd5b0e0f78192c2e3b772 (patch)
tree9f76ed1e5762e1e2cf57a374fb6cd39facf50af4
parentdf94d474a8200fc61969e2fc35d1b2a8d7fa5b8c (diff)
gpu: nvgpu: gr faults: do not depend on fake mmu fault notifier
Currently NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT is being set in error notifier for non mmu fault too. For fake mmu faults i.e. trigger mmu fault cases, make sure proper notifiers are set and driver is not depending on sending mmu error fault notifier. This change is needed for t19x fifo recovery too. NVGPU_CHANNEL_GR_ERROR_SW_METHOD (12), NVGPU_CHANNEL_GR_EXCEPTION(13) and NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD (37) are new error notifiers. JIRA GPUT19X-7 Change-Id: Idee83e842c835bdba9eb18578aad0c372ea74c5d Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: http://git-master/r/1310563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c101
-rw-r--r--include/uapi/linux/nvgpu.h17
3 files changed, 79 insertions, 44 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index b4589eaa..ad69cd79 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3272,7 +3272,10 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
3272struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 3272struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
3273 u32 hw_chid) 3273 u32 hw_chid)
3274{ 3274{
3275 return g->fifo.channel + hw_chid; 3275 if (hw_chid != FIFO_INVAL_CHANNEL_ID)
3276 return g->fifo.channel + hw_chid;
3277 else
3278 return NULL;
3276} 3279}
3277 3280
3278#ifdef CONFIG_DEBUG_FS 3281#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 9f527edd..5121d6e9 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5555,14 +5555,40 @@ fail:
5555 return -EINVAL; 5555 return -EINVAL;
5556} 5556}
5557 5557
5558static void gk20a_gr_set_error_notifier(struct gk20a *g,
5559 struct gr_gk20a_isr_data *isr_data, u32 error_notifier)
5560{
5561 struct fifo_gk20a *f = &g->fifo;
5562 struct channel_gk20a *ch;
5563 struct tsg_gk20a *tsg;
5564 struct channel_gk20a *ch_tsg;
5565
5566 if (isr_data->chid != FIFO_INVAL_CHANNEL_ID) {
5567 ch = &f->channel[isr_data->chid];
5568
5569 if (gk20a_is_channel_marked_as_tsg(ch)) {
5570 tsg = &g->fifo.tsg[ch->tsgid];
5571 down_read(&tsg->ch_list_lock);
5572 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
5573 if (gk20a_channel_get(ch_tsg)) {
5574 gk20a_set_error_notifier(ch_tsg,
5575 error_notifier);
5576 gk20a_channel_put(ch_tsg);
5577 }
5578 }
5579 up_read(&tsg->ch_list_lock);
5580 } else {
5581 gk20a_set_error_notifier(ch, error_notifier);
5582 }
5583 }
5584}
5585
5558static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, 5586static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
5559 struct gr_gk20a_isr_data *isr_data) 5587 struct gr_gk20a_isr_data *isr_data)
5560{ 5588{
5561 struct fifo_gk20a *f = &g->fifo;
5562 struct channel_gk20a *ch = &f->channel[isr_data->chid];
5563 gk20a_dbg_fn(""); 5589 gk20a_dbg_fn("");
5564 gk20a_set_error_notifier(ch, 5590 gk20a_gr_set_error_notifier(g, isr_data,
5565 NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); 5591 NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
5566 gk20a_err(dev_from_gk20a(g), 5592 gk20a_err(dev_from_gk20a(g),
5567 "gr semaphore timeout\n"); 5593 "gr semaphore timeout\n");
5568 return -EINVAL; 5594 return -EINVAL;
@@ -5571,11 +5597,9 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
5571static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, 5597static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
5572 struct gr_gk20a_isr_data *isr_data) 5598 struct gr_gk20a_isr_data *isr_data)
5573{ 5599{
5574 struct fifo_gk20a *f = &g->fifo;
5575 struct channel_gk20a *ch = &f->channel[isr_data->chid];
5576 gk20a_dbg_fn(""); 5600 gk20a_dbg_fn("");
5577 gk20a_set_error_notifier(ch, 5601 gk20a_gr_set_error_notifier(g, isr_data,
5578 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); 5602 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
5579 /* This is an unrecoverable error, reset is needed */ 5603 /* This is an unrecoverable error, reset is needed */
5580 gk20a_err(dev_from_gk20a(g), 5604 gk20a_err(dev_from_gk20a(g),
5581 "gr semaphore timeout\n"); 5605 "gr semaphore timeout\n");
@@ -5588,22 +5612,22 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
5588 int ret = g->ops.gr.handle_sw_method(g, isr_data->addr, 5612 int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
5589 isr_data->class_num, isr_data->offset, 5613 isr_data->class_num, isr_data->offset,
5590 isr_data->data_lo); 5614 isr_data->data_lo);
5591 if (ret) 5615 if (ret) {
5616 gk20a_gr_set_error_notifier(g, isr_data,
5617 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
5592 gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x" 5618 gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x"
5593 ", offset 0x%08x address 0x%08x\n", 5619 ", offset 0x%08x address 0x%08x\n",
5594 isr_data->class_num, isr_data->offset, isr_data->addr); 5620 isr_data->class_num, isr_data->offset, isr_data->addr);
5595 5621 }
5596 return ret; 5622 return ret;
5597} 5623}
5598 5624
5599static int gk20a_gr_handle_illegal_class(struct gk20a *g, 5625static int gk20a_gr_handle_illegal_class(struct gk20a *g,
5600 struct gr_gk20a_isr_data *isr_data) 5626 struct gr_gk20a_isr_data *isr_data)
5601{ 5627{
5602 struct fifo_gk20a *f = &g->fifo;
5603 struct channel_gk20a *ch = &f->channel[isr_data->chid];
5604 gk20a_dbg_fn(""); 5628 gk20a_dbg_fn("");
5605 gk20a_set_error_notifier(ch, 5629 gk20a_gr_set_error_notifier(g, isr_data,
5606 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 5630 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
5607 gk20a_err(dev_from_gk20a(g), 5631 gk20a_err(dev_from_gk20a(g),
5608 "invalid class 0x%08x, offset 0x%08x", 5632 "invalid class 0x%08x, offset 0x%08x",
5609 isr_data->class_num, isr_data->offset); 5633 isr_data->class_num, isr_data->offset);
@@ -5626,6 +5650,8 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
5626 gr_fecs_intr, isr_data->chid); 5650 gr_fecs_intr, isr_data->chid);
5627 5651
5628 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { 5652 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
5653 gk20a_gr_set_error_notifier(g, isr_data,
5654 NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD);
5629 gk20a_err(dev_from_gk20a(g), 5655 gk20a_err(dev_from_gk20a(g),
5630 "firmware method error 0x%08x for offset 0x%04x", 5656 "firmware method error 0x%08x for offset 0x%04x",
5631 gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)), 5657 gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
@@ -5640,35 +5666,34 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
5640static int gk20a_gr_handle_class_error(struct gk20a *g, 5666static int gk20a_gr_handle_class_error(struct gk20a *g,
5641 struct gr_gk20a_isr_data *isr_data) 5667 struct gr_gk20a_isr_data *isr_data)
5642{ 5668{
5643 struct fifo_gk20a *f = &g->fifo; 5669 u32 gr_class_error;
5644 struct channel_gk20a *ch = &f->channel[isr_data->chid]; 5670
5645 u32 gr_class_error =
5646 gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
5647 gk20a_dbg_fn(""); 5671 gk20a_dbg_fn("");
5648 5672
5649 gk20a_set_error_notifier(ch, 5673 gr_class_error =
5650 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 5674 gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
5675 gk20a_gr_set_error_notifier(g, isr_data,
5676 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
5651 gk20a_err(dev_from_gk20a(g), 5677 gk20a_err(dev_from_gk20a(g),
5652 "class error 0x%08x, offset 0x%08x, unhandled intr 0x%08x for channel %u\n", 5678 "class error 0x%08x, offset 0x%08x,"
5679 " unhandled intr 0x%08x for channel %u\n",
5653 isr_data->class_num, isr_data->offset, 5680 isr_data->class_num, isr_data->offset,
5654 gr_class_error, ch->hw_chid); 5681 gr_class_error, isr_data->chid);
5682
5655 return -EINVAL; 5683 return -EINVAL;
5656} 5684}
5657 5685
5658static int gk20a_gr_handle_firmware_method(struct gk20a *g, 5686static int gk20a_gr_handle_firmware_method(struct gk20a *g,
5659 struct gr_gk20a_isr_data *isr_data) 5687 struct gr_gk20a_isr_data *isr_data)
5660{ 5688{
5661 struct fifo_gk20a *f = &g->fifo;
5662 struct channel_gk20a *ch = &f->channel[isr_data->chid];
5663
5664 gk20a_dbg_fn(""); 5689 gk20a_dbg_fn("");
5665 5690
5666 gk20a_set_error_notifier(ch, 5691 gk20a_gr_set_error_notifier(g, isr_data,
5667 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 5692 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
5668 gk20a_err(dev_from_gk20a(g), 5693 gk20a_err(dev_from_gk20a(g),
5669 "firmware method 0x%08x, offset 0x%08x for channel %u\n", 5694 "firmware method 0x%08x, offset 0x%08x for channel %u\n",
5670 isr_data->class_num, isr_data->offset, 5695 isr_data->class_num, isr_data->offset,
5671 ch->hw_chid); 5696 isr_data->chid);
5672 return -EINVAL; 5697 return -EINVAL;
5673} 5698}
5674 5699
@@ -6404,7 +6429,7 @@ int gk20a_gr_isr(struct gk20a *g)
6404 if (ch) 6429 if (ch)
6405 isr_data.chid = ch->hw_chid; 6430 isr_data.chid = ch->hw_chid;
6406 else 6431 else
6407 isr_data.chid = 0xffffffff; 6432 isr_data.chid = FIFO_INVAL_CHANNEL_ID;
6408 6433
6409 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 6434 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
6410 "channel %d: addr 0x%08x, " 6435 "channel %d: addr 0x%08x, "
@@ -6507,24 +6532,22 @@ int gk20a_gr_isr(struct gk20a *g)
6507 if (exception & gr_exception_gpc_m() && need_reset == 0) { 6532 if (exception & gr_exception_gpc_m() && need_reset == 0) {
6508 bool post_event = false; 6533 bool post_event = false;
6509 6534
6510 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending"); 6535 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
6511 6536 "GPC exception pending");
6512 6537
6513 fault_ch = gk20a_fifo_channel_from_hw_chid(g, 6538 fault_ch = gk20a_fifo_channel_from_hw_chid(g,
6514 isr_data.chid); 6539 isr_data.chid);
6515 6540
6541 /*isr_data.chid can be ~0 and fault_ch can be NULL */
6516 /* check if any gpc has an exception */ 6542 /* check if any gpc has an exception */
6517 need_reset |= gk20a_gr_handle_gpc_exception(g, 6543 need_reset |= gk20a_gr_handle_gpc_exception(g,
6518 &post_event, fault_ch, &global_esr); 6544 &post_event, fault_ch, &global_esr);
6519 6545
6520 /* signal clients waiting on an event */ 6546 /* signal clients waiting on an event */
6521 if (gk20a_gr_sm_debugger_attached(g) && post_event && fault_ch) { 6547 if (gk20a_gr_sm_debugger_attached(g) &&
6548 post_event && fault_ch) {
6522 gk20a_dbg_gpu_post_events(fault_ch); 6549 gk20a_dbg_gpu_post_events(fault_ch);
6523 } 6550 }
6524
6525 if (need_reset && ch)
6526 gk20a_set_error_notifier(ch,
6527 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
6528 } 6551 }
6529 6552
6530 if (exception & gr_exception_ds_m()) { 6553 if (exception & gr_exception_ds_m()) {
@@ -6536,6 +6559,12 @@ int gk20a_gr_isr(struct gk20a *g)
6536 6559
6537 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); 6560 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
6538 gr_intr &= ~gr_intr_exception_pending_f(); 6561 gr_intr &= ~gr_intr_exception_pending_f();
6562
6563 if (need_reset) {
6564 gk20a_err(dev, "set gr exception notifier");
6565 gk20a_gr_set_error_notifier(g, &isr_data,
6566 NVGPU_CHANNEL_GR_EXCEPTION);
6567 }
6539 } 6568 }
6540 6569
6541 if (need_reset) { 6570 if (need_reset) {
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 75011998..ca9b49e6 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1470,13 +1470,16 @@ struct nvgpu_notification {
1470 __u32 nanoseconds[2]; /* nanoseconds since Jan. 1, 1970 */ 1470 __u32 nanoseconds[2]; /* nanoseconds since Jan. 1, 1970 */
1471 } time_stamp; /* -0007 */ 1471 } time_stamp; /* -0007 */
1472 __u32 info32; /* info returned depends on method 0008-000b */ 1472 __u32 info32; /* info returned depends on method 0008-000b */
1473#define NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT 8 1473#define NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT 8
1474#define NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY 13 1474#define NVGPU_CHANNEL_GR_ERROR_SW_METHOD 12
1475#define NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT 24 1475#define NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY 13
1476#define NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY 25 1476#define NVGPU_CHANNEL_GR_EXCEPTION 13
1477#define NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT 31 1477#define NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT 24
1478#define NVGPU_CHANNEL_PBDMA_ERROR 32 1478#define NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY 25
1479#define NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR 43 1479#define NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT 31
1480#define NVGPU_CHANNEL_PBDMA_ERROR 32
1481#define NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD 37
1482#define NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR 43
1480#define NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH 80 1483#define NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH 80
1481 __u16 info16; /* info returned depends on method 000c-000d */ 1484 __u16 info16; /* info returned depends on method 000c-000d */
1482 __u16 status; /* user sets bit 15, NV sets status 000e-000f */ 1485 __u16 status; /* user sets bit 15, NV sets status 000e-000f */