summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-11-06 08:44:23 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-27 12:23:11 -0500
commitc6b9177cfff8a41c3c3c78f5c47c7df677ced58c (patch)
treeb402ccda611d85ec88f8557cb26d949617d92466 /drivers/gpu
parenta0cea295e7b7f917c6b52221ab34c3a6111fb224 (diff)
gpu: nvgpu: define error_notifiers in common code
All the linux specific error_notifier codes are defined in linux specific header file <uapi/linux/nvgpu.h> and used in all the common driver But since they are defined in linux specific file, we need to move all the uses of those error_notifiers in linux specific code only Hence define new error_notifiers in include/nvgpu/error_notifier.h and use them in the common code Add new API nvgpu_error_notifier_to_channel_notifier() to convert common error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error notifier of the form NVGPU_CHANNEL_* Any future additions to error notifiers requires update to both the form of error notifiers Move all error notifier related metadata from channel_gk20a (common code) to linux specific structure nvgpu_channel_linux Update all accesses to this data from new structure instead of channel_gk20a Move and rename below APIs to linux specific file and declare them in error_notifier.h nvgpu_set_error_notifier_locked() nvgpu_set_error_notifier() nvgpu_is_error_notifier_set() Add below new API and use it in fifo_vgpu.c nvgpu_set_error_notifier_if_empty() Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used NVGPU-426 Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1593361 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c131
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.h10
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c35
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c34
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c25
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c65
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c46
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c21
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/error_notifier.h49
10 files changed, 285 insertions, 138 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 8366ed88..1ae2d444 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -17,6 +17,7 @@
17#include <nvgpu/enabled.h> 17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h> 18#include <nvgpu/debug.h>
19#include <nvgpu/ltc.h> 19#include <nvgpu/ltc.h>
20#include <nvgpu/error_notifier.h>
20 21
21/* 22/*
22 * This is required for nvgpu_vm_find_buf() which is used in the tracing 23 * This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -37,6 +38,124 @@
37#include <trace/events/gk20a.h> 38#include <trace/events/gk20a.h>
38#include <uapi/linux/nvgpu.h> 39#include <uapi/linux/nvgpu.h>
39 40
41/*
42 * API to convert error_notifiers in common code and of the form
43 * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
44 * space and of the form NVGPU_CHANNEL_*
45 */
46static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
47{
48 switch (error_notifier) {
49 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
50 return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
51 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
52 return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
53 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
54 return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
55 case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
56 return NVGPU_CHANNEL_GR_EXCEPTION;
57 case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
58 return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
59 case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
60 return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
61 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
62 return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
63 case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
64 return NVGPU_CHANNEL_PBDMA_ERROR;
65 case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
66 return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
67 case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
68 return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
69 case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
70 return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
71 }
72
73 pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
74
75 return error_notifier;
76}
77
78/**
79 * nvgpu_set_error_notifier_locked()
80 * Should be called with ch->error_notifier_mutex held
81 *
82 * error should be of the form NVGPU_ERR_NOTIFIER_*
83 */
84void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
85{
86 struct nvgpu_channel_linux *priv = ch->os_priv;
87
88 error = nvgpu_error_notifier_to_channel_notifier(error);
89
90 if (priv->error_notifier.dmabuf) {
91 struct nvgpu_notification *notification =
92 priv->error_notifier.notification;
93 struct timespec time_data;
94 u64 nsec;
95
96 getnstimeofday(&time_data);
97 nsec = ((u64)time_data.tv_sec) * 1000000000u +
98 (u64)time_data.tv_nsec;
99 notification->time_stamp.nanoseconds[0] =
100 (u32)nsec;
101 notification->time_stamp.nanoseconds[1] =
102 (u32)(nsec >> 32);
103 notification->info32 = error;
104 notification->status = 0xffff;
105
106 nvgpu_err(ch->g,
107 "error notifier set to %d for ch %d", error, ch->chid);
108 }
109}
110
111/* error should be of the form NVGPU_ERR_NOTIFIER_* */
112void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
113{
114 struct nvgpu_channel_linux *priv = ch->os_priv;
115
116 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
117 nvgpu_set_error_notifier_locked(ch, error);
118 nvgpu_mutex_release(&priv->error_notifier.mutex);
119}
120
121void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
122{
123 struct nvgpu_channel_linux *priv = ch->os_priv;
124
125 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
126 if (priv->error_notifier.dmabuf) {
127 struct nvgpu_notification *notification =
128 priv->error_notifier.notification;
129
130 /* Don't overwrite error flag if it is already set */
131 if (notification->status != 0xffff)
132 nvgpu_set_error_notifier_locked(ch, error);
133 }
134 nvgpu_mutex_release(&priv->error_notifier.mutex);
135}
136
137/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
138bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
139{
140 struct nvgpu_channel_linux *priv = ch->os_priv;
141 bool notifier_set = false;
142
143 error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
144
145 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
146 if (priv->error_notifier.dmabuf) {
147 struct nvgpu_notification *notification =
148 priv->error_notifier.notification;
149 u32 err = notification->info32;
150
151 if (err == error_notifier)
152 notifier_set = true;
153 }
154 nvgpu_mutex_release(&priv->error_notifier.mutex);
155
156 return notifier_set;
157}
158
40static void gk20a_channel_update_runcb_fn(struct work_struct *work) 159static void gk20a_channel_update_runcb_fn(struct work_struct *work)
41{ 160{
42 struct nvgpu_channel_completion_cb *completion_cb = 161 struct nvgpu_channel_completion_cb *completion_cb =
@@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
128static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) 247static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
129{ 248{
130 struct nvgpu_channel_linux *priv; 249 struct nvgpu_channel_linux *priv;
250 int err;
131 251
132 priv = nvgpu_kzalloc(g, sizeof(*priv)); 252 priv = nvgpu_kzalloc(g, sizeof(*priv));
133 if (!priv) 253 if (!priv)
@@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
136 ch->os_priv = priv; 256 ch->os_priv = priv;
137 priv->ch = ch; 257 priv->ch = ch;
138 258
259 err = nvgpu_mutex_init(&priv->error_notifier.mutex);
260 if (err) {
261 nvgpu_kfree(g, priv);
262 return err;
263 }
264
139 nvgpu_channel_work_completion_init(ch); 265 nvgpu_channel_work_completion_init(ch);
140 266
141 return 0; 267 return 0;
@@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
143 269
144static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) 270static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
145{ 271{
146 nvgpu_kfree(g, ch->os_priv); 272 struct nvgpu_channel_linux *priv = ch->os_priv;
273
274 nvgpu_mutex_destroy(&priv->error_notifier.mutex);
275 nvgpu_kfree(g, priv);
147} 276}
148 277
149int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) 278int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h
index ba1935f3..a4df75d6 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ b/drivers/gpu/nvgpu/common/linux/channel.h
@@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb {
42 struct work_struct work; 42 struct work_struct work;
43}; 43};
44 44
45struct nvgpu_error_notifier {
46 struct dma_buf *dmabuf;
47 void *vaddr;
48
49 struct nvgpu_notification *notification;
50
51 struct nvgpu_mutex mutex;
52};
53
45struct nvgpu_channel_linux { 54struct nvgpu_channel_linux {
46 struct channel_gk20a *ch; 55 struct channel_gk20a *ch;
47 56
48 struct nvgpu_channel_completion_cb completion_cb; 57 struct nvgpu_channel_completion_cb completion_cb;
58 struct nvgpu_error_notifier error_notifier;
49}; 59};
50 60
51int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); 61int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 0ac50140..67bec31b 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -30,6 +30,7 @@
30#include <nvgpu/list.h> 30#include <nvgpu/list.h>
31#include <nvgpu/debug.h> 31#include <nvgpu/debug.h>
32#include <nvgpu/enabled.h> 32#include <nvgpu/enabled.h>
33#include <nvgpu/error_notifier.h>
33 34
34#include "gk20a/gk20a.h" 35#include "gk20a/gk20a.h"
35#include "gk20a/dbg_gpu_gk20a.h" 36#include "gk20a/dbg_gpu_gk20a.h"
@@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
227 228
228static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) 229static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
229{ 230{
230 nvgpu_mutex_acquire(&ch->error_notifier_mutex); 231 struct nvgpu_channel_linux *priv = ch->os_priv;
231 if (ch->error_notifier_ref) { 232
232 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va); 233 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
233 dma_buf_put(ch->error_notifier_ref); 234 if (priv->error_notifier.dmabuf) {
234 ch->error_notifier_ref = NULL; 235 dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
235 ch->error_notifier = NULL; 236 dma_buf_put(priv->error_notifier.dmabuf);
236 ch->error_notifier_va = NULL; 237 priv->error_notifier.dmabuf = NULL;
238 priv->error_notifier.notification = NULL;
239 priv->error_notifier.vaddr = NULL;
237 } 240 }
238 nvgpu_mutex_release(&ch->error_notifier_mutex); 241 nvgpu_mutex_release(&priv->error_notifier.mutex);
239} 242}
240 243
241static int gk20a_init_error_notifier(struct channel_gk20a *ch, 244static int gk20a_init_error_notifier(struct channel_gk20a *ch,
@@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
244 struct dma_buf *dmabuf; 247 struct dma_buf *dmabuf;
245 void *va; 248 void *va;
246 u64 end = args->offset + sizeof(struct nvgpu_notification); 249 u64 end = args->offset + sizeof(struct nvgpu_notification);
250 struct nvgpu_channel_linux *priv = ch->os_priv;
247 251
248 if (!args->mem) { 252 if (!args->mem) {
249 pr_err("gk20a_init_error_notifier: invalid memory handle\n"); 253 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
@@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
273 return -ENOMEM; 277 return -ENOMEM;
274 } 278 }
275 279
276 ch->error_notifier = va + args->offset; 280 priv->error_notifier.notification = va + args->offset;
277 ch->error_notifier_va = va; 281 priv->error_notifier.vaddr = va;
278 memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification)); 282 memset(priv->error_notifier.notification, 0,
283 sizeof(struct nvgpu_notification));
279 284
280 /* set channel notifiers pointer */ 285 /* set channel notifiers pointer */
281 nvgpu_mutex_acquire(&ch->error_notifier_mutex); 286 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
282 ch->error_notifier_ref = dmabuf; 287 priv->error_notifier.dmabuf = dmabuf;
283 nvgpu_mutex_release(&ch->error_notifier_mutex); 288 nvgpu_mutex_release(&priv->error_notifier.mutex);
284 289
285 return 0; 290 return 0;
286} 291}
@@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp,
1361 break; 1366 break;
1362 } 1367 }
1363 err = ch->g->ops.fifo.force_reset_ch(ch, 1368 err = ch->g->ops.fifo.force_reset_ch(ch,
1364 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true); 1369 NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
1365 gk20a_idle(ch->g); 1370 gk20a_idle(ch->g);
1366 break; 1371 break;
1367 case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL: 1372 case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
index cdcecca5..7a2a02e9 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
@@ -25,10 +25,13 @@
25#include <nvgpu/atomic.h> 25#include <nvgpu/atomic.h>
26#include <nvgpu/bug.h> 26#include <nvgpu/bug.h>
27#include <nvgpu/barrier.h> 27#include <nvgpu/barrier.h>
28#include <nvgpu/error_notifier.h>
28 29
29#include "vgpu.h" 30#include "vgpu.h"
30#include "fifo_vgpu.h" 31#include "fifo_vgpu.h"
31 32
33#include "common/linux/channel.h"
34
32#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h> 35#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
33#include <nvgpu/hw/gk20a/hw_ram_gk20a.h> 36#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
34 37
@@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
691 694
692 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { 695 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
693 if (gk20a_channel_get(ch_tsg)) { 696 if (gk20a_channel_get(ch_tsg)) {
694 gk20a_set_error_notifier(ch_tsg, err_code); 697 nvgpu_set_error_notifier(ch_tsg, err_code);
695 ch_tsg->has_timedout = true; 698 ch_tsg->has_timedout = true;
696 gk20a_channel_put(ch_tsg); 699 gk20a_channel_put(ch_tsg);
697 } 700 }
@@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
699 702
700 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 703 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
701 } else { 704 } else {
702 gk20a_set_error_notifier(ch, err_code); 705 nvgpu_set_error_notifier(ch, err_code);
703 ch->has_timedout = true; 706 ch->has_timedout = true;
704 } 707 }
705 708
@@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
716static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g, 719static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
717 struct channel_gk20a *ch) 720 struct channel_gk20a *ch)
718{ 721{
719 nvgpu_mutex_acquire(&ch->error_notifier_mutex); 722 /*
720 if (ch->error_notifier_ref) { 723 * If error code is already set, this mmu fault
721 if (ch->error_notifier->status == 0xffff) { 724 * was triggered as part of recovery from other
722 /* If error code is already set, this mmu fault 725 * error condition.
723 * was triggered as part of recovery from other 726 * Don't overwrite error flag.
724 * error condition. 727 */
725 * Don't overwrite error flag. */ 728 nvgpu_set_error_notifier_if_empty(ch,
726 } else { 729 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
727 gk20a_set_error_notifier_locked(ch,
728 NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
729 }
730 }
731 nvgpu_mutex_release(&ch->error_notifier_mutex);
732 730
733 /* mark channel as faulted */ 731 /* mark channel as faulted */
734 ch->has_timedout = true; 732 ch->has_timedout = true;
@@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
778 776
779 switch (info->type) { 777 switch (info->type) {
780 case TEGRA_VGPU_FIFO_INTR_PBDMA: 778 case TEGRA_VGPU_FIFO_INTR_PBDMA:
781 gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR); 779 nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
782 break; 780 break;
783 case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT: 781 case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
784 gk20a_set_error_notifier(ch, 782 nvgpu_set_error_notifier(ch,
785 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 783 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
786 break; 784 break;
787 case TEGRA_VGPU_FIFO_INTR_MMU_FAULT: 785 case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
788 vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch); 786 vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
index dd2ae306..33551d17 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -20,6 +20,7 @@
20 20
21#include <nvgpu/kmem.h> 21#include <nvgpu/kmem.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/error_notifier.h>
23 24
24#include "vgpu.h" 25#include "vgpu.h"
25#include "gr_vgpu.h" 26#include "gr_vgpu.h"
@@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
941 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); 942 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
942 break; 943 break;
943 case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT: 944 case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
944 gk20a_set_error_notifier(ch, 945 nvgpu_set_error_notifier(ch,
945 NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); 946 NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
946 break; 947 break;
947 case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY: 948 case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
948 gk20a_set_error_notifier(ch, 949 nvgpu_set_error_notifier(ch,
949 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); 950 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
950 case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD: 951 case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
951 break; 952 break;
952 case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS: 953 case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
953 gk20a_set_error_notifier(ch, 954 nvgpu_set_error_notifier(ch,
954 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 955 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
955 break; 956 break;
956 case TEGRA_VGPU_GR_INTR_FECS_ERROR: 957 case TEGRA_VGPU_GR_INTR_FECS_ERROR:
957 break; 958 break;
958 case TEGRA_VGPU_GR_INTR_CLASS_ERROR: 959 case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
959 gk20a_set_error_notifier(ch, 960 nvgpu_set_error_notifier(ch,
960 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 961 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
961 break; 962 break;
962 case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD: 963 case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
963 gk20a_set_error_notifier(ch, 964 nvgpu_set_error_notifier(ch,
964 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 965 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
965 break; 966 break;
966 case TEGRA_VGPU_GR_INTR_EXCEPTION: 967 case TEGRA_VGPU_GR_INTR_EXCEPTION:
967 gk20a_set_error_notifier(ch, 968 nvgpu_set_error_notifier(ch,
968 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 969 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
969 break; 970 break;
970 case TEGRA_VGPU_GR_INTR_SM_EXCEPTION: 971 case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
971 gk20a_dbg_gpu_post_events(ch); 972 gk20a_dbg_gpu_post_events(ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index e01d6cdb..a0415861 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -44,6 +44,7 @@
44#include <nvgpu/ltc.h> 44#include <nvgpu/ltc.h>
45#include <nvgpu/barrier.h> 45#include <nvgpu/barrier.h>
46#include <nvgpu/ctxsw_trace.h> 46#include <nvgpu/ctxsw_trace.h>
47#include <nvgpu/error_notifier.h>
47 48
48#include "gk20a.h" 49#include "gk20a.h"
49#include "dbg_gpu_gk20a.h" 50#include "dbg_gpu_gk20a.h"
@@ -339,37 +340,6 @@ int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
339 return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true); 340 return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
340} 341}
341 342
342/**
343 * gk20a_set_error_notifier_locked()
344 * Should be called with ch->error_notifier_mutex held
345 */
346void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
347{
348 if (ch->error_notifier_ref) {
349 struct timespec time_data;
350 u64 nsec;
351 getnstimeofday(&time_data);
352 nsec = ((u64)time_data.tv_sec) * 1000000000u +
353 (u64)time_data.tv_nsec;
354 ch->error_notifier->time_stamp.nanoseconds[0] =
355 (u32)nsec;
356 ch->error_notifier->time_stamp.nanoseconds[1] =
357 (u32)(nsec >> 32);
358 ch->error_notifier->info32 = error;
359 ch->error_notifier->status = 0xffff;
360
361 nvgpu_err(ch->g,
362 "error notifier set to %d for ch %d", error, ch->chid);
363 }
364}
365
366void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
367{
368 nvgpu_mutex_acquire(&ch->error_notifier_mutex);
369 gk20a_set_error_notifier_locked(ch, error);
370 nvgpu_mutex_release(&ch->error_notifier_mutex);
371}
372
373static void gk20a_wait_until_counter_is_N( 343static void gk20a_wait_until_counter_is_N(
374 struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, 344 struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
375 struct nvgpu_cond *c, const char *caller, const char *counter_name) 345 struct nvgpu_cond *c, const char *caller, const char *counter_name)
@@ -1550,7 +1520,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
1550 gk20a_gr_debug_dump(g); 1520 gk20a_gr_debug_dump(g);
1551 1521
1552 g->ops.fifo.force_reset_ch(ch, 1522 g->ops.fifo.force_reset_ch(ch,
1553 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); 1523 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, true);
1554} 1524}
1555 1525
1556/** 1526/**
@@ -2210,53 +2180,48 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2210 err = nvgpu_mutex_init(&c->ioctl_lock); 2180 err = nvgpu_mutex_init(&c->ioctl_lock);
2211 if (err) 2181 if (err)
2212 return err; 2182 return err;
2213 err = nvgpu_mutex_init(&c->error_notifier_mutex);
2214 if (err)
2215 goto fail_1;
2216 err = nvgpu_mutex_init(&c->joblist.cleanup_lock); 2183 err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
2217 if (err) 2184 if (err)
2218 goto fail_2; 2185 goto fail_1;
2219 err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); 2186 err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
2220 if (err) 2187 if (err)
2221 goto fail_3; 2188 goto fail_2;
2222 err = nvgpu_mutex_init(&c->sync_lock); 2189 err = nvgpu_mutex_init(&c->sync_lock);
2223 if (err) 2190 if (err)
2224 goto fail_4; 2191 goto fail_3;
2225#if defined(CONFIG_GK20A_CYCLE_STATS) 2192#if defined(CONFIG_GK20A_CYCLE_STATS)
2226 err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); 2193 err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2227 if (err) 2194 if (err)
2228 goto fail_5; 2195 goto fail_4;
2229 err = nvgpu_mutex_init(&c->cs_client_mutex); 2196 err = nvgpu_mutex_init(&c->cs_client_mutex);
2230 if (err) 2197 if (err)
2231 goto fail_6; 2198 goto fail_5;
2232#endif 2199#endif
2233 err = nvgpu_mutex_init(&c->event_id_list_lock); 2200 err = nvgpu_mutex_init(&c->event_id_list_lock);
2234 if (err) 2201 if (err)
2235 goto fail_7; 2202 goto fail_6;
2236 err = nvgpu_mutex_init(&c->dbg_s_lock); 2203 err = nvgpu_mutex_init(&c->dbg_s_lock);
2237 if (err) 2204 if (err)
2238 goto fail_8; 2205 goto fail_7;
2239 2206
2240 nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); 2207 nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
2241 2208
2242 return 0; 2209 return 0;
2243 2210
2244fail_8:
2245 nvgpu_mutex_destroy(&c->event_id_list_lock);
2246fail_7: 2211fail_7:
2212 nvgpu_mutex_destroy(&c->event_id_list_lock);
2213fail_6:
2247#if defined(CONFIG_GK20A_CYCLE_STATS) 2214#if defined(CONFIG_GK20A_CYCLE_STATS)
2248 nvgpu_mutex_destroy(&c->cs_client_mutex); 2215 nvgpu_mutex_destroy(&c->cs_client_mutex);
2249fail_6:
2250 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
2251fail_5: 2216fail_5:
2217 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
2218fail_4:
2252#endif 2219#endif
2253 nvgpu_mutex_destroy(&c->sync_lock); 2220 nvgpu_mutex_destroy(&c->sync_lock);
2254fail_4:
2255 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
2256fail_3: 2221fail_3:
2257 nvgpu_mutex_destroy(&c->joblist.cleanup_lock); 2222 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
2258fail_2: 2223fail_2:
2259 nvgpu_mutex_destroy(&c->error_notifier_mutex); 2224 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
2260fail_1: 2225fail_1:
2261 nvgpu_mutex_destroy(&c->ioctl_lock); 2226 nvgpu_mutex_destroy(&c->ioctl_lock);
2262 2227
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 8c9095b2..ff96d0d7 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -273,11 +273,6 @@ struct channel_gk20a {
273 bool timeout_debug_dump; 273 bool timeout_debug_dump;
274 unsigned int timeslice_us; 274 unsigned int timeslice_us;
275 275
276 struct dma_buf *error_notifier_ref;
277 struct nvgpu_notification *error_notifier;
278 void *error_notifier_va;
279 struct nvgpu_mutex error_notifier_mutex;
280
281 struct nvgpu_mutex sync_lock; 276 struct nvgpu_mutex sync_lock;
282 struct gk20a_channel_sync *sync; 277 struct gk20a_channel_sync *sync;
283 278
@@ -335,8 +330,6 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
335void gk20a_disable_channel(struct channel_gk20a *ch); 330void gk20a_disable_channel(struct channel_gk20a *ch);
336void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); 331void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
337void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); 332void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
338void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
339void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error);
340void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); 333void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
341int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, 334int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
342 struct priv_cmd_entry *entry); 335 struct priv_cmd_entry *entry);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 648a8c86..38aecc93 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -39,6 +39,7 @@
39#include <nvgpu/nvhost.h> 39#include <nvgpu/nvhost.h>
40#include <nvgpu/barrier.h> 40#include <nvgpu/barrier.h>
41#include <nvgpu/ctxsw_trace.h> 41#include <nvgpu/ctxsw_trace.h>
42#include <nvgpu/error_notifier.h>
42 43
43#include "gk20a.h" 44#include "gk20a.h"
44#include "mm_gk20a.h" 45#include "mm_gk20a.h"
@@ -557,7 +558,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
557 nvgpu_mutex_destroy(&tsg->event_id_list_lock); 558 nvgpu_mutex_destroy(&tsg->event_id_list_lock);
558 559
559 nvgpu_mutex_destroy(&c->ioctl_lock); 560 nvgpu_mutex_destroy(&c->ioctl_lock);
560 nvgpu_mutex_destroy(&c->error_notifier_mutex);
561 nvgpu_mutex_destroy(&c->joblist.cleanup_lock); 561 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
562 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); 562 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
563 nvgpu_mutex_destroy(&c->sync_lock); 563 nvgpu_mutex_destroy(&c->sync_lock);
@@ -1339,14 +1339,10 @@ static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
1339 if (!refch) 1339 if (!refch)
1340 return verbose; 1340 return verbose;
1341 1341
1342 nvgpu_mutex_acquire(&refch->error_notifier_mutex); 1342 if (nvgpu_is_error_notifier_set(refch,
1343 if (refch->error_notifier_ref) { 1343 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT))
1344 u32 err = refch->error_notifier->info32; 1344 verbose = refch->timeout_debug_dump;
1345 1345
1346 if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
1347 verbose = refch->timeout_debug_dump;
1348 }
1349 nvgpu_mutex_release(&refch->error_notifier_mutex);
1350 return verbose; 1346 return verbose;
1351} 1347}
1352 1348
@@ -1400,8 +1396,8 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
1400{ 1396{
1401 nvgpu_err(g, 1397 nvgpu_err(g,
1402 "channel %d generated a mmu fault", refch->chid); 1398 "channel %d generated a mmu fault", refch->chid);
1403 gk20a_set_error_notifier(refch, 1399 nvgpu_set_error_notifier(refch,
1404 NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); 1400 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
1405} 1401}
1406 1402
1407void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, 1403void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -1939,7 +1935,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
1939 1935
1940 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { 1936 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
1941 if (gk20a_channel_get(ch_tsg)) { 1937 if (gk20a_channel_get(ch_tsg)) {
1942 gk20a_set_error_notifier(ch_tsg, err_code); 1938 nvgpu_set_error_notifier(ch_tsg, err_code);
1943 gk20a_channel_put(ch_tsg); 1939 gk20a_channel_put(ch_tsg);
1944 } 1940 }
1945 } 1941 }
@@ -1947,7 +1943,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
1947 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 1943 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1948 gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); 1944 gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
1949 } else { 1945 } else {
1950 gk20a_set_error_notifier(ch, err_code); 1946 nvgpu_set_error_notifier(ch, err_code);
1951 gk20a_fifo_recover_ch(g, ch->chid, verbose); 1947 gk20a_fifo_recover_ch(g, ch->chid, verbose);
1952 } 1948 }
1953 1949
@@ -2108,8 +2104,8 @@ static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
2108 *verbose = ch->timeout_debug_dump; 2104 *verbose = ch->timeout_debug_dump;
2109 *ms = ch->timeout_accumulated_ms; 2105 *ms = ch->timeout_accumulated_ms;
2110 if (recover) 2106 if (recover)
2111 gk20a_set_error_notifier(ch, 2107 nvgpu_set_error_notifier(ch,
2112 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 2108 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2113 2109
2114 gk20a_channel_put(ch); 2110 gk20a_channel_put(ch);
2115 } 2111 }
@@ -2170,8 +2166,8 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
2170 gk20a_channel_put(ch); 2166 gk20a_channel_put(ch);
2171 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 2167 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2172 if (gk20a_channel_get(ch)) { 2168 if (gk20a_channel_get(ch)) {
2173 gk20a_set_error_notifier(ch, 2169 nvgpu_set_error_notifier(ch,
2174 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 2170 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2175 *verbose |= ch->timeout_debug_dump; 2171 *verbose |= ch->timeout_debug_dump;
2176 gk20a_channel_put(ch); 2172 gk20a_channel_put(ch);
2177 } 2173 }
@@ -2413,7 +2409,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
2413 rc_type = RC_TYPE_PBDMA_FAULT; 2409 rc_type = RC_TYPE_PBDMA_FAULT;
2414 nvgpu_err(g, 2410 nvgpu_err(g,
2415 "semaphore acquire timeout!"); 2411 "semaphore acquire timeout!");
2416 *error_notifier = NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; 2412 *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
2417 } 2413 }
2418 *handled |= pbdma_intr_0_acquire_pending_f(); 2414 *handled |= pbdma_intr_0_acquire_pending_f();
2419 } 2415 }
@@ -2431,7 +2427,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
2431 2427
2432 if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) { 2428 if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
2433 *error_notifier = 2429 *error_notifier =
2434 NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; 2430 NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
2435 rc_type = RC_TYPE_PBDMA_FAULT; 2431 rc_type = RC_TYPE_PBDMA_FAULT;
2436 } 2432 }
2437 2433
@@ -2485,7 +2481,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2485 struct channel_gk20a *ch = &f->channel[id]; 2481 struct channel_gk20a *ch = &f->channel[id];
2486 2482
2487 if (gk20a_channel_get(ch)) { 2483 if (gk20a_channel_get(ch)) {
2488 gk20a_set_error_notifier(ch, error_notifier); 2484 nvgpu_set_error_notifier(ch, error_notifier);
2489 gk20a_fifo_recover_ch(g, id, true); 2485 gk20a_fifo_recover_ch(g, id, true);
2490 gk20a_channel_put(ch); 2486 gk20a_channel_put(ch);
2491 } 2487 }
@@ -2497,7 +2493,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2497 nvgpu_rwsem_down_read(&tsg->ch_list_lock); 2493 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2498 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 2494 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2499 if (gk20a_channel_get(ch)) { 2495 if (gk20a_channel_get(ch)) {
2500 gk20a_set_error_notifier(ch, 2496 nvgpu_set_error_notifier(ch,
2501 error_notifier); 2497 error_notifier);
2502 gk20a_channel_put(ch); 2498 gk20a_channel_put(ch);
2503 } 2499 }
@@ -2514,7 +2510,7 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2514 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id)); 2510 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
2515 2511
2516 u32 handled = 0; 2512 u32 handled = 0;
2517 u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR; 2513 u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
2518 unsigned int rc_type = RC_TYPE_NO_RC; 2514 unsigned int rc_type = RC_TYPE_NO_RC;
2519 2515
2520 if (pbdma_intr_0) { 2516 if (pbdma_intr_0) {
@@ -2658,8 +2654,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
2658 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 2654 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2659 if (!gk20a_channel_get(ch)) 2655 if (!gk20a_channel_get(ch))
2660 continue; 2656 continue;
2661 gk20a_set_error_notifier(ch, 2657 nvgpu_set_error_notifier(ch,
2662 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 2658 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2663 gk20a_channel_put(ch); 2659 gk20a_channel_put(ch);
2664 } 2660 }
2665 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 2661 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
@@ -2671,8 +2667,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
2671 "preempt channel %d timeout", id); 2667 "preempt channel %d timeout", id);
2672 2668
2673 if (gk20a_channel_get(ch)) { 2669 if (gk20a_channel_get(ch)) {
2674 gk20a_set_error_notifier(ch, 2670 nvgpu_set_error_notifier(ch,
2675 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 2671 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2676 gk20a_fifo_recover_ch(g, id, true); 2672 gk20a_fifo_recover_ch(g, id, true);
2677 gk20a_channel_put(ch); 2673 gk20a_channel_put(ch);
2678 } 2674 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 270d36d6..b3969b60 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -40,6 +40,7 @@
40#include <nvgpu/barrier.h> 40#include <nvgpu/barrier.h>
41#include <nvgpu/mm.h> 41#include <nvgpu/mm.h>
42#include <nvgpu/ctxsw_trace.h> 42#include <nvgpu/ctxsw_trace.h>
43#include <nvgpu/error_notifier.h>
43 44
44#include "gk20a.h" 45#include "gk20a.h"
45#include "gr_ctx_gk20a.h" 46#include "gr_ctx_gk20a.h"
@@ -5113,14 +5114,14 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g,
5113 nvgpu_rwsem_down_read(&tsg->ch_list_lock); 5114 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
5114 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { 5115 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
5115 if (gk20a_channel_get(ch_tsg)) { 5116 if (gk20a_channel_get(ch_tsg)) {
5116 gk20a_set_error_notifier(ch_tsg, 5117 nvgpu_set_error_notifier(ch_tsg,
5117 error_notifier); 5118 error_notifier);
5118 gk20a_channel_put(ch_tsg); 5119 gk20a_channel_put(ch_tsg);
5119 } 5120 }
5120 } 5121 }
5121 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 5122 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
5122 } else { 5123 } else {
5123 gk20a_set_error_notifier(ch, error_notifier); 5124 nvgpu_set_error_notifier(ch, error_notifier);
5124 } 5125 }
5125 } 5126 }
5126} 5127}
@@ -5130,7 +5131,7 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
5130{ 5131{
5131 gk20a_dbg_fn(""); 5132 gk20a_dbg_fn("");
5132 gk20a_gr_set_error_notifier(g, isr_data, 5133 gk20a_gr_set_error_notifier(g, isr_data,
5133 NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); 5134 NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
5134 nvgpu_err(g, 5135 nvgpu_err(g,
5135 "gr semaphore timeout"); 5136 "gr semaphore timeout");
5136 return -EINVAL; 5137 return -EINVAL;
@@ -5141,7 +5142,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
5141{ 5142{
5142 gk20a_dbg_fn(""); 5143 gk20a_dbg_fn("");
5143 gk20a_gr_set_error_notifier(g, isr_data, 5144 gk20a_gr_set_error_notifier(g, isr_data,
5144 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); 5145 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
5145 /* This is an unrecoverable error, reset is needed */ 5146 /* This is an unrecoverable error, reset is needed */
5146 nvgpu_err(g, 5147 nvgpu_err(g,
5147 "gr semaphore timeout"); 5148 "gr semaphore timeout");
@@ -5156,7 +5157,7 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
5156 isr_data->data_lo); 5157 isr_data->data_lo);
5157 if (ret) { 5158 if (ret) {
5158 gk20a_gr_set_error_notifier(g, isr_data, 5159 gk20a_gr_set_error_notifier(g, isr_data,
5159 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); 5160 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
5160 nvgpu_err(g, "invalid method class 0x%08x" 5161 nvgpu_err(g, "invalid method class 0x%08x"
5161 ", offset 0x%08x address 0x%08x", 5162 ", offset 0x%08x address 0x%08x",
5162 isr_data->class_num, isr_data->offset, isr_data->addr); 5163 isr_data->class_num, isr_data->offset, isr_data->addr);
@@ -5169,7 +5170,7 @@ static int gk20a_gr_handle_illegal_class(struct gk20a *g,
5169{ 5170{
5170 gk20a_dbg_fn(""); 5171 gk20a_dbg_fn("");
5171 gk20a_gr_set_error_notifier(g, isr_data, 5172 gk20a_gr_set_error_notifier(g, isr_data,
5172 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 5173 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5173 nvgpu_err(g, 5174 nvgpu_err(g,
5174 "invalid class 0x%08x, offset 0x%08x", 5175 "invalid class 0x%08x, offset 0x%08x",
5175 isr_data->class_num, isr_data->offset); 5176 isr_data->class_num, isr_data->offset);
@@ -5193,7 +5194,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
5193 5194
5194 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { 5195 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
5195 gk20a_gr_set_error_notifier(g, isr_data, 5196 gk20a_gr_set_error_notifier(g, isr_data,
5196 NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD); 5197 NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
5197 nvgpu_err(g, 5198 nvgpu_err(g,
5198 "firmware method error 0x%08x for offset 0x%04x", 5199 "firmware method error 0x%08x for offset 0x%04x",
5199 gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)), 5200 gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
@@ -5215,7 +5216,7 @@ static int gk20a_gr_handle_class_error(struct gk20a *g,
5215 gr_class_error = 5216 gr_class_error =
5216 gr_class_error_code_v(gk20a_readl(g, gr_class_error_r())); 5217 gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
5217 gk20a_gr_set_error_notifier(g, isr_data, 5218 gk20a_gr_set_error_notifier(g, isr_data,
5218 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 5219 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5219 nvgpu_err(g, "class error 0x%08x, offset 0x%08x," 5220 nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
5220 "sub channel 0x%08x mme generated %d," 5221 "sub channel 0x%08x mme generated %d,"
5221 " mme pc 0x%08xdata high %d priv status %d" 5222 " mme pc 0x%08xdata high %d priv status %d"
@@ -5244,7 +5245,7 @@ static int gk20a_gr_handle_firmware_method(struct gk20a *g,
5244 gk20a_dbg_fn(""); 5245 gk20a_dbg_fn("");
5245 5246
5246 gk20a_gr_set_error_notifier(g, isr_data, 5247 gk20a_gr_set_error_notifier(g, isr_data,
5247 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); 5248 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5248 nvgpu_err(g, 5249 nvgpu_err(g,
5249 "firmware method 0x%08x, offset 0x%08x for channel %u", 5250 "firmware method 0x%08x, offset 0x%08x for channel %u",
5250 isr_data->class_num, isr_data->offset, 5251 isr_data->class_num, isr_data->offset,
@@ -6024,7 +6025,7 @@ int gk20a_gr_isr(struct gk20a *g)
6024 if (need_reset) { 6025 if (need_reset) {
6025 nvgpu_err(g, "set gr exception notifier"); 6026 nvgpu_err(g, "set gr exception notifier");
6026 gk20a_gr_set_error_notifier(g, &isr_data, 6027 gk20a_gr_set_error_notifier(g, &isr_data,
6027 NVGPU_CHANNEL_GR_EXCEPTION); 6028 NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
6028 } 6029 }
6029 } 6030 }
6030 6031
diff --git a/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h
new file mode 100644
index 00000000..3e70c379
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h
@@ -0,0 +1,49 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef __NVGPU_ERROR_NOTIFIER_H__
24#define __NVGPU_ERROR_NOTIFIER_H__
25
26#include <nvgpu/types.h>
27
28struct channel_gk20a;
29
30enum {
31 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT = 0,
32 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD,
33 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY,
34 NVGPU_ERR_NOTIFIER_GR_EXCEPTION,
35 NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT,
36 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY,
37 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT,
38 NVGPU_ERR_NOTIFIER_PBDMA_ERROR,
39 NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD,
40 NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR,
41 NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH,
42};
43
44void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error);
45void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error);
46void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error);
47bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier);
48
49#endif /* __NVGPU_ERROR_NOTIFIER_H__ */