From c6b9177cfff8a41c3c3c78f5c47c7df677ced58c Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 6 Nov 2017 05:44:23 -0800 Subject: gpu: nvgpu: define error_notifiers in common code All the linux specific error_notifier codes are defined in linux specific header file and used in all the common driver But since they are defined in linux specific file, we need to move all the uses of those error_notifiers in linux specific code only Hence define new error_notifiers in include/nvgpu/error_notifier.h and use them in the common code Add new API nvgpu_error_notifier_to_channel_notifier() to convert common error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error notifier of the form NVGPU_CHANNEL_* Any future additions to error notifiers requires update to both the form of error notifiers Move all error notifier related metadata from channel_gk20a (common code) to linux specific structure nvgpu_channel_linux Update all accesses to this data from new structure instead of channel_gk20a Move and rename below APIs to linux specific file and declare them in error_notifier.h nvgpu_set_error_notifier_locked() nvgpu_set_error_notifier() nvgpu_is_error_notifier_set() Add below new API and use it in fifo_vgpu.c nvgpu_set_error_notifier_if_empty() Include wherever new error_notifier codes are used NVGPU-426 Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1593361 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/channel.c | 131 ++++++++++++++++++++++- drivers/gpu/nvgpu/common/linux/channel.h | 10 ++ drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 35 +++--- drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c | 34 +++--- drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c | 25 ++--- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 65 +++-------- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 7 -- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 46 ++++---- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 21 ++-- drivers/gpu/nvgpu/include/nvgpu/error_notifier.h | 49 +++++++++ 10 files changed, 285 insertions(+), 138 deletions(-) create mode 100644 drivers/gpu/nvgpu/include/nvgpu/error_notifier.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 8366ed88..1ae2d444 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * This is required for nvgpu_vm_find_buf() which is used in the tracing @@ -37,6 +38,124 @@ #include #include +/* + * API to convert error_notifiers in common code and of the form + * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user + * space and of the form NVGPU_CHANNEL_* + */ +static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) +{ + switch (error_notifier) { + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: + return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: + return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: + return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; + case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: + return NVGPU_CHANNEL_GR_EXCEPTION; + case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: + return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: + return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: + return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; + case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: + return NVGPU_CHANNEL_PBDMA_ERROR; + case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: + return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; + case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: + return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; + case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: + return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; + } + + pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); + + return error_notifier; +} + +/** + * nvgpu_set_error_notifier_locked() + * Should be called with ch->error_notifier_mutex held + * + * error should be of the form NVGPU_ERR_NOTIFIER_* + */ +void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + error = nvgpu_error_notifier_to_channel_notifier(error); + + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + struct timespec time_data; + u64 nsec; + + getnstimeofday(&time_data); + nsec = ((u64)time_data.tv_sec) * 1000000000u + + (u64)time_data.tv_nsec; + notification->time_stamp.nanoseconds[0] = + (u32)nsec; + notification->time_stamp.nanoseconds[1] = + (u32)(nsec >> 32); + notification->info32 = error; + notification->status = 0xffff; + + nvgpu_err(ch->g, + "error notifier set to %d for ch %d", error, ch->chid); + } +} + +/* error should be of the form NVGPU_ERR_NOTIFIER_* */ +void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + nvgpu_set_error_notifier_locked(ch, error); + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + + /* Don't overwrite error flag if it is already set */ + if (notification->status != 0xffff) + nvgpu_set_error_notifier_locked(ch, error); + } + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ +bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + bool notifier_set = false; + + error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + u32 err = notification->info32; + + if (err == error_notifier) + notifier_set = true; + } + nvgpu_mutex_release(&priv->error_notifier.mutex); + + return notifier_set; +} + static void gk20a_channel_update_runcb_fn(struct work_struct *work) { struct nvgpu_channel_completion_cb *completion_cb = @@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch) static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) { struct nvgpu_channel_linux *priv; + int err; priv = nvgpu_kzalloc(g, sizeof(*priv)); if (!priv) @@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) ch->os_priv = priv; priv->ch = ch; + err = nvgpu_mutex_init(&priv->error_notifier.mutex); + if (err) { + nvgpu_kfree(g, priv); + return err; + } + nvgpu_channel_work_completion_init(ch); return 0; @@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) { - nvgpu_kfree(g, ch->os_priv); + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_destroy(&priv->error_notifier.mutex); + nvgpu_kfree(g, priv); } int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h index ba1935f3..a4df75d6 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.h +++ b/drivers/gpu/nvgpu/common/linux/channel.h @@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb { struct work_struct work; }; +struct nvgpu_error_notifier { + struct dma_buf *dmabuf; + void *vaddr; + + struct nvgpu_notification *notification; + + struct nvgpu_mutex mutex; +}; + struct nvgpu_channel_linux { struct channel_gk20a *ch; struct nvgpu_channel_completion_cb completion_cb; + struct nvgpu_error_notifier error_notifier; }; int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 0ac50140..67bec31b 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" @@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) { - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - if (ch->error_notifier_ref) { - dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va); - dma_buf_put(ch->error_notifier_ref); - ch->error_notifier_ref = NULL; - ch->error_notifier = NULL; - ch->error_notifier_va = NULL; + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); + dma_buf_put(priv->error_notifier.dmabuf); + priv->error_notifier.dmabuf = NULL; + priv->error_notifier.notification = NULL; + priv->error_notifier.vaddr = NULL; } - nvgpu_mutex_release(&ch->error_notifier_mutex); + nvgpu_mutex_release(&priv->error_notifier.mutex); } static int gk20a_init_error_notifier(struct channel_gk20a *ch, @@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch, struct dma_buf *dmabuf; void *va; u64 end = args->offset + sizeof(struct nvgpu_notification); + struct nvgpu_channel_linux *priv = ch->os_priv; if (!args->mem) { pr_err("gk20a_init_error_notifier: invalid memory handle\n"); @@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch, return -ENOMEM; } - ch->error_notifier = va + args->offset; - ch->error_notifier_va = va; - memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification)); + priv->error_notifier.notification = va + args->offset; + priv->error_notifier.vaddr = va; + memset(priv->error_notifier.notification, 0, + sizeof(struct nvgpu_notification)); /* set channel notifiers pointer */ - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - ch->error_notifier_ref = dmabuf; - nvgpu_mutex_release(&ch->error_notifier_mutex); + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + priv->error_notifier.dmabuf = dmabuf; + nvgpu_mutex_release(&priv->error_notifier.mutex); return 0; } @@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp, break; } err = ch->g->ops.fifo.force_reset_ch(ch, - NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true); + NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); gk20a_idle(ch->g); break; case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL: diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c index cdcecca5..7a2a02e9 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c @@ -25,10 +25,13 @@ #include #include #include +#include #include "vgpu.h" #include "fifo_vgpu.h" +#include "common/linux/channel.h" + #include #include @@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch_tsg)) { - gk20a_set_error_notifier(ch_tsg, err_code); + nvgpu_set_error_notifier(ch_tsg, err_code); ch_tsg->has_timedout = true; gk20a_channel_put(ch_tsg); } @@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - gk20a_set_error_notifier(ch, err_code); + nvgpu_set_error_notifier(ch, err_code); ch->has_timedout = true; } @@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g, struct channel_gk20a *ch) { - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - if (ch->error_notifier_ref) { - if (ch->error_notifier->status == 0xffff) { - /* If error code is already set, this mmu fault - * was triggered as part of recovery from other - * error condition. - * Don't overwrite error flag. */ - } else { - gk20a_set_error_notifier_locked(ch, - NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); - } - } - nvgpu_mutex_release(&ch->error_notifier_mutex); + /* + * If error code is already set, this mmu fault + * was triggered as part of recovery from other + * error condition. + * Don't overwrite error flag. + */ + nvgpu_set_error_notifier_if_empty(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); /* mark channel as faulted */ ch->has_timedout = true; @@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) switch (info->type) { case TEGRA_VGPU_FIFO_INTR_PBDMA: - gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR); + nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR); break; case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); break; case TEGRA_VGPU_FIFO_INTR_MMU_FAULT: vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch); diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c index dd2ae306..33551d17 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c @@ -20,6 +20,7 @@ #include #include +#include #include "vgpu.h" #include "gr_vgpu.h" @@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); break; case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT); break; case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD: break; case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FECS_ERROR: break; case TEGRA_VGPU_GR_INTR_CLASS_ERROR: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_EXCEPTION: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_SM_EXCEPTION: gk20a_dbg_gpu_post_events(ch); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index e01d6cdb..a0415861 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "gk20a.h" #include "dbg_gpu_gk20a.h" @@ -339,37 +340,6 @@ int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true); } -/** - * gk20a_set_error_notifier_locked() - * Should be called with ch->error_notifier_mutex held - */ -void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error) -{ - if (ch->error_notifier_ref) { - struct timespec time_data; - u64 nsec; - getnstimeofday(&time_data); - nsec = ((u64)time_data.tv_sec) * 1000000000u + - (u64)time_data.tv_nsec; - ch->error_notifier->time_stamp.nanoseconds[0] = - (u32)nsec; - ch->error_notifier->time_stamp.nanoseconds[1] = - (u32)(nsec >> 32); - ch->error_notifier->info32 = error; - ch->error_notifier->status = 0xffff; - - nvgpu_err(ch->g, - "error notifier set to %d for ch %d", error, ch->chid); - } -} - -void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) -{ - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - gk20a_set_error_notifier_locked(ch, error); - nvgpu_mutex_release(&ch->error_notifier_mutex); -} - static void gk20a_wait_until_counter_is_N( struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, struct nvgpu_cond *c, const char *caller, const char *counter_name) @@ -1550,7 +1520,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) gk20a_gr_debug_dump(g); g->ops.fifo.force_reset_ch(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, true); } /** @@ -2210,53 +2180,48 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) err = nvgpu_mutex_init(&c->ioctl_lock); if (err) return err; - err = nvgpu_mutex_init(&c->error_notifier_mutex); - if (err) - goto fail_1; err = nvgpu_mutex_init(&c->joblist.cleanup_lock); if (err) - goto fail_2; + goto fail_1; err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); if (err) - goto fail_3; + goto fail_2; err = nvgpu_mutex_init(&c->sync_lock); if (err) - goto fail_4; + goto fail_3; #if defined(CONFIG_GK20A_CYCLE_STATS) err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); if (err) - goto fail_5; + goto fail_4; err = nvgpu_mutex_init(&c->cs_client_mutex); if (err) - goto fail_6; + goto fail_5; #endif err = nvgpu_mutex_init(&c->event_id_list_lock); if (err) - goto fail_7; + goto fail_6; err = nvgpu_mutex_init(&c->dbg_s_lock); if (err) - goto fail_8; + goto fail_7; nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); return 0; -fail_8: - nvgpu_mutex_destroy(&c->event_id_list_lock); fail_7: + nvgpu_mutex_destroy(&c->event_id_list_lock); +fail_6: #if defined(CONFIG_GK20A_CYCLE_STATS) nvgpu_mutex_destroy(&c->cs_client_mutex); -fail_6: - nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); fail_5: + nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); +fail_4: #endif nvgpu_mutex_destroy(&c->sync_lock); -fail_4: - nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); fail_3: - nvgpu_mutex_destroy(&c->joblist.cleanup_lock); + nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); fail_2: - nvgpu_mutex_destroy(&c->error_notifier_mutex); + nvgpu_mutex_destroy(&c->joblist.cleanup_lock); fail_1: nvgpu_mutex_destroy(&c->ioctl_lock); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 8c9095b2..ff96d0d7 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -273,11 +273,6 @@ struct channel_gk20a { bool timeout_debug_dump; unsigned int timeslice_us; - struct dma_buf *error_notifier_ref; - struct nvgpu_notification *error_notifier; - void *error_notifier_va; - struct nvgpu_mutex error_notifier_mutex; - struct nvgpu_mutex sync_lock; struct gk20a_channel_sync *sync; @@ -335,8 +330,6 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, void gk20a_disable_channel(struct channel_gk20a *ch); void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); -void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); -void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error); void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, struct priv_cmd_entry *entry); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 648a8c86..38aecc93 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "gk20a.h" #include "mm_gk20a.h" @@ -557,7 +558,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) nvgpu_mutex_destroy(&tsg->event_id_list_lock); nvgpu_mutex_destroy(&c->ioctl_lock); - nvgpu_mutex_destroy(&c->error_notifier_mutex); nvgpu_mutex_destroy(&c->joblist.cleanup_lock); nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); nvgpu_mutex_destroy(&c->sync_lock); @@ -1339,14 +1339,10 @@ static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g, if (!refch) return verbose; - nvgpu_mutex_acquire(&refch->error_notifier_mutex); - if (refch->error_notifier_ref) { - u32 err = refch->error_notifier->info32; + if (nvgpu_is_error_notifier_set(refch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) + verbose = refch->timeout_debug_dump; - if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT) - verbose = refch->timeout_debug_dump; - } - nvgpu_mutex_release(&refch->error_notifier_mutex); return verbose; } @@ -1400,8 +1396,8 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, { nvgpu_err(g, "channel %d generated a mmu fault", refch->chid); - gk20a_set_error_notifier(refch, - NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); + nvgpu_set_error_notifier(refch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); } void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, @@ -1939,7 +1935,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch_tsg)) { - gk20a_set_error_notifier(ch_tsg, err_code); + nvgpu_set_error_notifier(ch_tsg, err_code); gk20a_channel_put(ch_tsg); } } @@ -1947,7 +1943,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, nvgpu_rwsem_up_read(&tsg->ch_list_lock); gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); } else { - gk20a_set_error_notifier(ch, err_code); + nvgpu_set_error_notifier(ch, err_code); gk20a_fifo_recover_ch(g, ch->chid, verbose); } @@ -2108,8 +2104,8 @@ static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch, *verbose = ch->timeout_debug_dump; *ms = ch->timeout_accumulated_ms; if (recover) - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_channel_put(ch); } @@ -2170,8 +2166,8 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg, gk20a_channel_put(ch); list_for_each_entry(ch, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); *verbose |= ch->timeout_debug_dump; gk20a_channel_put(ch); } @@ -2413,7 +2409,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id, rc_type = RC_TYPE_PBDMA_FAULT; nvgpu_err(g, "semaphore acquire timeout!"); - *error_notifier = NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; + *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT; } *handled |= pbdma_intr_0_acquire_pending_f(); } @@ -2431,7 +2427,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id, if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) { *error_notifier = - NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; + NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH; rc_type = RC_TYPE_PBDMA_FAULT; } @@ -2485,7 +2481,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, struct channel_gk20a *ch = &f->channel[id]; if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, error_notifier); + nvgpu_set_error_notifier(ch, error_notifier); gk20a_fifo_recover_ch(g, id, true); gk20a_channel_put(ch); } @@ -2497,7 +2493,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, nvgpu_rwsem_down_read(&tsg->ch_list_lock); list_for_each_entry(ch, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, + nvgpu_set_error_notifier(ch, error_notifier); gk20a_channel_put(ch); } @@ -2514,7 +2510,7 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f, u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id)); u32 handled = 0; - u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR; + u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR; unsigned int rc_type = RC_TYPE_NO_RC; if (pbdma_intr_0) { @@ -2658,8 +2654,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, list_for_each_entry(ch, &tsg->ch_list, ch_entry) { if (!gk20a_channel_get(ch)) continue; - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_channel_put(ch); } nvgpu_rwsem_up_read(&tsg->ch_list_lock); @@ -2671,8 +2667,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, "preempt channel %d timeout", id); if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_fifo_recover_ch(g, id, true); gk20a_channel_put(ch); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 270d36d6..b3969b60 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "gk20a.h" #include "gr_ctx_gk20a.h" @@ -5113,14 +5114,14 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g, nvgpu_rwsem_down_read(&tsg->ch_list_lock); list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch_tsg)) { - gk20a_set_error_notifier(ch_tsg, + nvgpu_set_error_notifier(ch_tsg, error_notifier); gk20a_channel_put(ch_tsg); } } nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - gk20a_set_error_notifier(ch, error_notifier); + nvgpu_set_error_notifier(ch, error_notifier); } } } @@ -5130,7 +5131,7 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, { gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT); nvgpu_err(g, "gr semaphore timeout"); return -EINVAL; @@ -5141,7 +5142,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, { gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); /* This is an unrecoverable error, reset is needed */ nvgpu_err(g, "gr semaphore timeout"); @@ -5156,7 +5157,7 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g, isr_data->data_lo); if (ret) { gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); nvgpu_err(g, "invalid method class 0x%08x" ", offset 0x%08x address 0x%08x", isr_data->class_num, isr_data->offset, isr_data->addr); @@ -5169,7 +5170,7 @@ static int gk20a_gr_handle_illegal_class(struct gk20a *g, { gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); nvgpu_err(g, "invalid class 0x%08x, offset 0x%08x", isr_data->class_num, isr_data->offset); @@ -5193,7 +5194,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD); + NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD); nvgpu_err(g, "firmware method error 0x%08x for offset 0x%04x", gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)), @@ -5215,7 +5216,7 @@ static int gk20a_gr_handle_class_error(struct gk20a *g, gr_class_error = gr_class_error_code_v(gk20a_readl(g, gr_class_error_r())); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); nvgpu_err(g, "class error 0x%08x, offset 0x%08x," "sub channel 0x%08x mme generated %d," " mme pc 0x%08xdata high %d priv status %d" @@ -5244,7 +5245,7 @@ static int gk20a_gr_handle_firmware_method(struct gk20a *g, gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); nvgpu_err(g, "firmware method 0x%08x, offset 0x%08x for channel %u", isr_data->class_num, isr_data->offset, @@ -6024,7 +6025,7 @@ int gk20a_gr_isr(struct gk20a *g) if (need_reset) { nvgpu_err(g, "set gr exception notifier"); gk20a_gr_set_error_notifier(g, &isr_data, - NVGPU_CHANNEL_GR_EXCEPTION); + NVGPU_ERR_NOTIFIER_GR_EXCEPTION); } } diff --git a/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h new file mode 100644 index 00000000..3e70c379 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVGPU_ERROR_NOTIFIER_H__ +#define __NVGPU_ERROR_NOTIFIER_H__ + +#include + +struct channel_gk20a; + +enum { + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT = 0, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY, + NVGPU_ERR_NOTIFIER_GR_EXCEPTION, + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT, + NVGPU_ERR_NOTIFIER_PBDMA_ERROR, + NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD, + NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, + NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH, +}; + +void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error); +void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error); +void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error); +bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier); + +#endif /* __NVGPU_ERROR_NOTIFIER_H__ */ -- cgit v1.2.2