gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c

ctrl_gk20a.c had some direct accesses to hardware. These violate the HAL rules, because we don't have per-GPU ctrl, and thus the code cannot be made GPU independent. Move all GR accesses to new GR HALs and use existing bus HAL for accessing timer. Remove #includes of all hardware headers. JIRA NVGPU-28 Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1327001 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2017-03-23 14:03:15 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-03-29 12:15:59 -0400
commit: 4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch)
tree: 4c8240ac83887c21db902a255306c67041c4525c
parent: f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff)
5 files changed, 214 insertions, 161 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 97125a99..5661b402 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -24,14 +24,7 @@
 #include <linux/delay.h>
 #include "gk20a.h"
-#include "gr_gk20a.h"
 #include "fence_gk20a.h"
-#include "regops_gk20a.h"
-#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
 #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \
        (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache(
                struct gk20a *g,
                struct nvgpu_gpu_inval_icache_args *args)
 {
-        int err = 0;
-        u32     cache_ctrl, regval;
        struct channel_gk20a *ch;
-        struct nvgpu_dbg_gpu_reg_op ops;
+        int err;
        ch = gk20a_get_channel_from_file(args->channel_fd);
        if (!ch)
                return -EINVAL;
-        ops.op     = REGOP(READ_32);
-        ops.type   = REGOP(TYPE_GR_CTX);
-        ops.status = REGOP(STATUS_SUCCESS);
-        ops.value_hi      = 0;
-        ops.and_n_mask_lo = 0;
-        ops.and_n_mask_hi = 0;
-        ops.offset       = gr_pri_gpc0_gcc_dbg_r();
        /* Take the global lock, since we'll be doing global regops */
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        err = g->ops.gr.inval_icache(g, ch);
-        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
-        regval = ops.value_lo;
-        if (!err) {
-                ops.op = REGOP(WRITE_32);
-                ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
-                err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
-        }
-        if (err) {
-                gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
-                goto end;
-        }
-        cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
-        cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
-        gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
-end:
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
 static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
 {
-        int err = 0;
+        int err;
-        u32 dbgr_control0;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        /* assert stop trigger. uniformity assumption: all SMs will have
+        err = g->ops.gr.trigger_suspend(g);
-         * the same state in dbg_control0. */
-        dbgr_control0 =
-                gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
-        dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
-        /* broadcast write */
-        gk20a_writel(g,
-                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
 {
        int err = 0;
        struct warpstate *w_state;
-        struct gr_gk20a *gr = &g->gr;
+        u32 sm_count, size;
-        u32 gpc, tpc, sm_count, sm_id, size;
-        u32 global_mask;
        sm_count = g->gr.gpc_count * g->gr.tpc_count;
        size = sm_count * sizeof(struct warpstate);
        w_state = kzalloc(size, GFP_KERNEL);
+        if (!w_state)
-    /* Wait for the SMs to reach full stop. This condition is:
+                return -ENOMEM;
-     * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
-     * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
-     *    masks.
-     */
-        global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()   |
-                          gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
-                          gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        g->ops.gr.wait_for_pause(g, w_state);
-        /* Lock down all SMs */
-        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
-                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
-                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
-                err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
-                if (err) {
-                        gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
-                        goto end;
-                }
-        }
-        /* Read the warp status */
-        g->ops.gr.bpt_reg_info(g, w_state);
        /* Copy to user space - pointed by "args->pwarpstate" */
        if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
                err = -EFAULT;
        }
-end:
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        kfree(w_state);
        return err;
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
        int err = 0;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        err = g->ops.gr.resume_from_pause(g);
-        /* Clear the pause mask to tell the GPU we want to resume everyone */
-        gk20a_writel(g,
-                gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
-        /* explicitly re-enable forwarding of SM interrupts upon any resume */
-        gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
-                gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
-        /* Now resume all sms, write a 0 to the stop trigger
-         * then a 1 to the run trigger */
-        gk20a_resume_all_sms(g);
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
 static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
 {
-        int ret = 0;
+        return g->ops.gr.clear_sm_errors(g);
-        u32 gpc_offset, tpc_offset, gpc, tpc;
-        struct gr_gk20a *gr = &g->gr;
-        u32 global_esr;
-        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
-                gpc_offset = gpc_stride * gpc;
-                /* check if any tpc has an exception */
-                for (tpc = 0; tpc < gr->tpc_count; tpc++) {
-                        tpc_offset = tpc_in_gpc_stride * tpc;
-                        global_esr = gk20a_readl(g,
-                                        gr_gpc0_tpc0_sm_hww_global_esr_r() +
-                                        gpc_offset + tpc_offset);
-                        /* clear the hwws, also causes tpc and gpc
-                         * exceptions to be cleared */
-                        gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
-                }
-        }
-        return ret;
 }
 static int nvgpu_gpu_ioctl_has_any_exception(
                struct gk20a *g,
                struct nvgpu_gpu_tpc_exception_en_status_args *args)
 {
-        int err = 0;
+        u32 tpc_exception_en;
-        struct gr_gk20a *gr = &g->gr;
-        u32 sm_id, tpc_exception_en = 0;
-        u32 offset, regval, tpc_offset, gpc_offset;
-        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
-        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
-                tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
-                gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
-                offset = tpc_offset + gpc_offset;
-                regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
-                                                                offset);
-                /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
-                tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
-        }
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        args->tpc_exception_en_sm_mask = tpc_exception_en;
-        return err;
+        return 0;
 }
 static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g,
 {
        int err = 0;
        unsigned int i = 0;
-        u32 gpu_timestamp_hi_new = 0;
-        u32 gpu_timestamp_hi_old = 0;
        if (gk20a_busy(g)) {
                gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g,
                goto end;
        }
-        /* get zipper reads of gpu and cpu counter values */
-        gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
        for (i = 0; i < args->count; i++) {
-                u32 gpu_timestamp_lo = 0;
+                err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
-                u32 gpu_timestamp_hi = 0;
+                if (err)
+                        return err;
-                gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
                args->samples[i].cpu_timestamp = get_cpu_timestamp();
-                rmb(); /* maintain zipper read order */
-                gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
-                /* pick the appropriate gpu counter hi bits */
-                gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
-                        gpu_timestamp_hi_old : gpu_timestamp_hi_new;
-                args->samples[i].gpu_timestamp =
-                        ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
-                gpu_timestamp_hi_old = gpu_timestamp_hi_new;
        }
 end:
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4f50ae36..951c8267 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -346,6 +346,12 @@ struct gpu_ops {
                                        struct mem_desc *mem, u64 gpu_va);
                void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine);
                void (*load_tpc_mask)(struct gk20a *g);
+                int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch);
+                int (*trigger_suspend)(struct gk20a *g);
+                int (*wait_for_pause)(struct gk20a *g, struct warpstate *w_state);
+                int (*resume_from_pause)(struct gk20a *g);
+                int (*clear_sm_errors)(struct gk20a *g);
+                u32 (*tpc_enabled_exceptions)(struct gk20a *g);
        } gr;
        struct {
                void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3df19a4e..172931d7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -9291,6 +9291,178 @@ static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr,
 {
 }
+int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch)
+{
+        int err = 0;
+        u32     cache_ctrl, regval;
+        struct nvgpu_dbg_gpu_reg_op ops;
+        ops.op     = REGOP(READ_32);
+        ops.type   = REGOP(TYPE_GR_CTX);
+        ops.status = REGOP(STATUS_SUCCESS);
+        ops.value_hi      = 0;
+        ops.and_n_mask_lo = 0;
+        ops.and_n_mask_hi = 0;
+        ops.offset       = gr_pri_gpc0_gcc_dbg_r();
+        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
+        if (err) {
+                gk20a_err(dev_from_gk20a(g), "Failed to read register");
+                return err;
+        }
+        regval = ops.value_lo;
+        ops.op = REGOP(WRITE_32);
+        ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
+        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
+        if (err) {
+                gk20a_err(dev_from_gk20a(g), "Failed to write register");
+                return err;
+        }
+        ops.op     = REGOP(READ_32);
+        ops.offset = gr_pri_gpc0_tpc0_sm_cache_control_r();
+        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
+        if (err) {
+                gk20a_err(dev_from_gk20a(g), "Failed to read register");
+                return err;
+        }
+        cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
+        cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
+        gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
+        return 0;
+}
+int gr_gk20a_trigger_suspend(struct gk20a *g)
+{
+        int err = 0;
+        u32 dbgr_control0;
+        /* assert stop trigger. uniformity assumption: all SMs will have
+         * the same state in dbg_control0. */
+        dbgr_control0 =
+                gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
+        dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
+        /* broadcast write */
+        gk20a_writel(g,
+                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
+        return err;
+}
+int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
+{
+        int err = 0;
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc, tpc, sm_id;
+        u32 global_mask;
+        /* Wait for the SMs to reach full stop. This condition is:
+         * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
+         * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
+         *    masks.
+        */
+        global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()   |
+                          gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
+                          gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
+        /* Lock down all SMs */
+        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
+                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
+                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+                err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
+                        return err;
+                }
+        }
+        /* Read the warp status */
+        g->ops.gr.bpt_reg_info(g, w_state);
+        return 0;
+}
+int gr_gk20a_resume_from_pause(struct gk20a *g)
+{
+        int err = 0;
+        /* Clear the pause mask to tell the GPU we want to resume everyone */
+        gk20a_writel(g,
+                gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
+        /* explicitly re-enable forwarding of SM interrupts upon any resume */
+        gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
+                gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
+        /* Now resume all sms, write a 0 to the stop trigger
+         * then a 1 to the run trigger */
+        gk20a_resume_all_sms(g);
+        return err;
+}
+int gr_gk20a_clear_sm_errors(struct gk20a *g)
+{
+        int ret = 0;
+        u32 gpc_offset, tpc_offset, gpc, tpc;
+        struct gr_gk20a *gr = &g->gr;
+        u32 global_esr;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+                gpc_offset = gpc_stride * gpc;
+                /* check if any tpc has an exception */
+                for (tpc = 0; tpc < gr->tpc_count; tpc++) {
+                        tpc_offset = tpc_in_gpc_stride * tpc;
+                        global_esr = gk20a_readl(g,
+                                        gr_gpc0_tpc0_sm_hww_global_esr_r() +
+                                        gpc_offset + tpc_offset);
+                        /* clear the hwws, also causes tpc and gpc
+                         * exceptions to be cleared */
+                        gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
+                }
+        }
+        return ret;
+}
+u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 sm_id, tpc_exception_en = 0;
+        u32 offset, regval, tpc_offset, gpc_offset;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
+                tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
+                gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
+                offset = tpc_offset + gpc_offset;
+                regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
+                                                                offset);
+                /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
+                tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
+        }
+        return tpc_exception_en;
+}
 void gk20a_init_gr_ops(struct gpu_ops *gops)
 {
        gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -9376,4 +9548,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
        gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
        gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
+        gops->gr.inval_icache = gr_gk20a_inval_icache;
+        gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
+        gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
+        gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
+        gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
+        gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2dd1eaf5..33721f08 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -704,6 +704,12 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
                              struct dbg_session_gk20a *dbg_s,
                              int *ctx_resident_ch_fd);
 void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
+int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch);
+int gr_gk20a_trigger_suspend(struct gk20a *g);
+int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state);
+int gr_gk20a_resume_from_pause(struct gk20a *g);
+int gr_gk20a_clear_sm_errors(struct gk20a *g);
+u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
 int gr_gk20a_commit_global_timeslice(struct gk20a *g,
                                        struct channel_gk20a *c, bool patch);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 4f4b8d4a..a43fcdab 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1609,5 +1609,10 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
        gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
        gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask;
+        gops->gr.inval_icache = gr_gk20a_inval_icache;
+        gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
+        gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
+        gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
+        gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
+        gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
 }
author	Terje Bergstrom <tbergstrom@nvidia.com>	2017-03-23 14:03:15 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-03-29 12:15:59 -0400
commit	4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch)
tree	4c8240ac83887c21db902a255306c67041c4525c
parent	f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 97125a99..5661b402 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -24,14 +24,7 @@
24	#include <linux/delay.h>	24	#include <linux/delay.h>
25		25
26	#include "gk20a.h"	26	#include "gk20a.h"
27	#include "gr_gk20a.h"
28	#include "fence_gk20a.h"	27	#include "fence_gk20a.h"
29	#include "regops_gk20a.h"
30
31	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
32	#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
33	#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
34
35		28
36	#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \	29	#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \
37	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))	30	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache(
342	struct gk20a *g,	335	struct gk20a *g,
343	struct nvgpu_gpu_inval_icache_args *args)	336	struct nvgpu_gpu_inval_icache_args *args)
344	{	337	{
345
346	int err = 0;
347	u32 cache_ctrl, regval;
348	struct channel_gk20a *ch;	338	struct channel_gk20a *ch;
349	struct nvgpu_dbg_gpu_reg_op ops;	339	int err;
350		340
351	ch = gk20a_get_channel_from_file(args->channel_fd);	341	ch = gk20a_get_channel_from_file(args->channel_fd);
352	if (!ch)	342	if (!ch)
353	return -EINVAL;	343	return -EINVAL;
354		344
355	ops.op = REGOP(READ_32);
356	ops.type = REGOP(TYPE_GR_CTX);
357	ops.status = REGOP(STATUS_SUCCESS);
358	ops.value_hi = 0;
359	ops.and_n_mask_lo = 0;
360	ops.and_n_mask_hi = 0;
361	ops.offset = gr_pri_gpc0_gcc_dbg_r();
362
363	/* Take the global lock, since we'll be doing global regops */	345	/* Take the global lock, since we'll be doing global regops */
364	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	346	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
365		347	err = g->ops.gr.inval_icache(g, ch);
366	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
367
368	regval = ops.value_lo;
369
370	if (!err) {
371	ops.op = REGOP(WRITE_32);
372	ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
373	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
374	}
375
376	if (err) {
377	gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
378	goto end;
379	}
380
381	cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
382	cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
383	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
384
385	end:
386	nvgpu_mutex_release(&g->dbg_sessions_lock);	348	nvgpu_mutex_release(&g->dbg_sessions_lock);
387	return err;	349	return err;
388	}	350	}
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
428		390
429	static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)	391	static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
430	{	392	{
431	int err = 0;	393	int err;
432	u32 dbgr_control0;
433		394
434	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	395	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
435	/* assert stop trigger. uniformity assumption: all SMs will have	396	err = g->ops.gr.trigger_suspend(g);
436	* the same state in dbg_control0. */
437	dbgr_control0 =
438	gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
439	dbgr_control0 \|= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
440
441	/* broadcast write */
442	gk20a_writel(g,
443	gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
444
445	nvgpu_mutex_release(&g->dbg_sessions_lock);	397	nvgpu_mutex_release(&g->dbg_sessions_lock);
446	return err;	398	return err;
447	}	399	}
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
451	{	403	{
452	int err = 0;	404	int err = 0;
453	struct warpstate *w_state;	405	struct warpstate *w_state;
454	struct gr_gk20a *gr = &g->gr;	406	u32 sm_count, size;
455	u32 gpc, tpc, sm_count, sm_id, size;
456	u32 global_mask;
457		407
458	sm_count = g->gr.gpc_count * g->gr.tpc_count;	408	sm_count = g->gr.gpc_count * g->gr.tpc_count;
459	size = sm_count * sizeof(struct warpstate);	409	size = sm_count * sizeof(struct warpstate);
460	w_state = kzalloc(size, GFP_KERNEL);	410	w_state = kzalloc(size, GFP_KERNEL);
461		411	if (!w_state)
462	/* Wait for the SMs to reach full stop. This condition is:	412	return -ENOMEM;
463	* 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
464	* 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
465	* masks.
466	*/
467	global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() \|
468	gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() \|
469	gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
470		413
471	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	414	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
472		415	g->ops.gr.wait_for_pause(g, w_state);
473	/* Lock down all SMs */
474	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
475
476	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
477	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
478
479	err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
480
481	if (err) {
482	gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
483	goto end;
484	}
485	}
486
487	/* Read the warp status */
488	g->ops.gr.bpt_reg_info(g, w_state);
489		416
490	/* Copy to user space - pointed by "args->pwarpstate" */	417	/* Copy to user space - pointed by "args->pwarpstate" */
491	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {	418	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
493	err = -EFAULT;	420	err = -EFAULT;
494	}	421	}
495		422
496	end:
497	nvgpu_mutex_release(&g->dbg_sessions_lock);	423	nvgpu_mutex_release(&g->dbg_sessions_lock);
498	kfree(w_state);	424	kfree(w_state);
499	return err;	425	return err;
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
504	int err = 0;	430	int err = 0;
505		431
506	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	432	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
507		433	err = g->ops.gr.resume_from_pause(g);
508	/* Clear the pause mask to tell the GPU we want to resume everyone */
509	gk20a_writel(g,
510	gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
511
512	/* explicitly re-enable forwarding of SM interrupts upon any resume */
513	gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
514	gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
515
516	/* Now resume all sms, write a 0 to the stop trigger
517	* then a 1 to the run trigger */
518	gk20a_resume_all_sms(g);
519
520	nvgpu_mutex_release(&g->dbg_sessions_lock);	434	nvgpu_mutex_release(&g->dbg_sessions_lock);
521	return err;	435	return err;
522	}	436	}
523		437
524	static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)	438	static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
525	{	439	{
526	int ret = 0;	440	return g->ops.gr.clear_sm_errors(g);
527	u32 gpc_offset, tpc_offset, gpc, tpc;
528	struct gr_gk20a *gr = &g->gr;
529	u32 global_esr;
530	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
531	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
532
533	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
534
535	gpc_offset = gpc_stride * gpc;
536
537	/* check if any tpc has an exception */
538	for (tpc = 0; tpc < gr->tpc_count; tpc++) {
539
540	tpc_offset = tpc_in_gpc_stride * tpc;
541
542	global_esr = gk20a_readl(g,
543	gr_gpc0_tpc0_sm_hww_global_esr_r() +
544	gpc_offset + tpc_offset);
545
546	/* clear the hwws, also causes tpc and gpc
547	* exceptions to be cleared */
548	gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
549	}
550	}
551
552	return ret;
553	}	441	}
554		442
555	static int nvgpu_gpu_ioctl_has_any_exception(	443	static int nvgpu_gpu_ioctl_has_any_exception(
556	struct gk20a *g,	444	struct gk20a *g,
557	struct nvgpu_gpu_tpc_exception_en_status_args *args)	445	struct nvgpu_gpu_tpc_exception_en_status_args *args)
558	{	446	{
559	int err = 0;	447	u32 tpc_exception_en;
560	struct gr_gk20a *gr = &g->gr;
561	u32 sm_id, tpc_exception_en = 0;
562	u32 offset, regval, tpc_offset, gpc_offset;
563	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
564	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
565		448
566	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	449	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
567		450	tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
568	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
569
570	tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
571	gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
572	offset = tpc_offset + gpc_offset;
573
574	regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
575	offset);
576	/* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
577	tpc_exception_en \|= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
578	}
579
580	nvgpu_mutex_release(&g->dbg_sessions_lock);	451	nvgpu_mutex_release(&g->dbg_sessions_lock);
		452
581	args->tpc_exception_en_sm_mask = tpc_exception_en;	453	args->tpc_exception_en_sm_mask = tpc_exception_en;
582	return err;	454
		455	return 0;
583	}	456	}
584		457
585	static int gk20a_ctrl_get_num_vsms(struct gk20a *g,	458	static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g,
648	{	521	{
649	int err = 0;	522	int err = 0;
650	unsigned int i = 0;	523	unsigned int i = 0;
651	u32 gpu_timestamp_hi_new = 0;
652	u32 gpu_timestamp_hi_old = 0;
653		524
654	if (gk20a_busy(g)) {	525	if (gk20a_busy(g)) {
655	gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");	526	gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g,
657	goto end;	528	goto end;
658	}	529	}
659		530
660	/* get zipper reads of gpu and cpu counter values */
661	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
662	for (i = 0; i < args->count; i++) {	531	for (i = 0; i < args->count; i++) {
663	u32 gpu_timestamp_lo = 0;	532	err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
664	u32 gpu_timestamp_hi = 0;	533	if (err)
		534	return err;
665		535
666	gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
667	args->samples[i].cpu_timestamp = get_cpu_timestamp();	536	args->samples[i].cpu_timestamp = get_cpu_timestamp();
668	rmb(); /* maintain zipper read order */
669	gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
670
671	/* pick the appropriate gpu counter hi bits */
672	gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
673	gpu_timestamp_hi_old : gpu_timestamp_hi_new;
674
675	args->samples[i].gpu_timestamp =
676	((u64)gpu_timestamp_hi << 32) \| (u64)gpu_timestamp_lo;
677
678	gpu_timestamp_hi_old = gpu_timestamp_hi_new;
679	}	537	}
680		538
681	end:	539	end:


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 4f50ae36..951c8267 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -346,6 +346,12 @@ struct gpu_ops {
346	struct mem_desc *mem, u64 gpu_va);	346	struct mem_desc *mem, u64 gpu_va);
347	void (init_elcg_mode)(struct gk20a g, u32 mode, u32 engine);	347	void (init_elcg_mode)(struct gk20a g, u32 mode, u32 engine);
348	void (load_tpc_mask)(struct gk20a g);	348	void (load_tpc_mask)(struct gk20a g);
		349	int (inval_icache)(struct gk20a g, struct channel_gk20a *ch);
		350	int (trigger_suspend)(struct gk20a g);
		351	int (wait_for_pause)(struct gk20a g, struct warpstate *w_state);
		352	int (resume_from_pause)(struct gk20a g);
		353	int (clear_sm_errors)(struct gk20a g);
		354	u32 (tpc_enabled_exceptions)(struct gk20a g);
349	} gr;	355	} gr;
350	struct {	356	struct {
351	void (init_hw)(struct gk20a g);	357	void (init_hw)(struct gk20a g);


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3df19a4e..172931d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -9291,6 +9291,178 @@ static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr,
9291	{	9291	{
9292	}	9292	}
9293		9293
		9294	int gr_gk20a_inval_icache(struct gk20a g, struct channel_gk20a ch)
		9295	{
		9296	int err = 0;
		9297	u32 cache_ctrl, regval;
		9298	struct nvgpu_dbg_gpu_reg_op ops;
		9299
		9300	ops.op = REGOP(READ_32);
		9301	ops.type = REGOP(TYPE_GR_CTX);
		9302	ops.status = REGOP(STATUS_SUCCESS);
		9303	ops.value_hi = 0;
		9304	ops.and_n_mask_lo = 0;
		9305	ops.and_n_mask_hi = 0;
		9306	ops.offset = gr_pri_gpc0_gcc_dbg_r();
		9307
		9308	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
		9309	if (err) {
		9310	gk20a_err(dev_from_gk20a(g), "Failed to read register");
		9311	return err;
		9312	}
		9313
		9314	regval = ops.value_lo;
		9315
		9316	ops.op = REGOP(WRITE_32);
		9317	ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
		9318	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
		9319	if (err) {
		9320	gk20a_err(dev_from_gk20a(g), "Failed to write register");
		9321	return err;
		9322	}
		9323
		9324	ops.op = REGOP(READ_32);
		9325	ops.offset = gr_pri_gpc0_tpc0_sm_cache_control_r();
		9326	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
		9327	if (err) {
		9328	gk20a_err(dev_from_gk20a(g), "Failed to read register");
		9329	return err;
		9330	}
		9331
		9332	cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
		9333	cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
		9334	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
		9335
		9336	return 0;
		9337	}
		9338
		9339	int gr_gk20a_trigger_suspend(struct gk20a *g)
		9340	{
		9341	int err = 0;
		9342	u32 dbgr_control0;
		9343
		9344	/* assert stop trigger. uniformity assumption: all SMs will have
		9345	* the same state in dbg_control0. */
		9346	dbgr_control0 =
		9347	gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
		9348	dbgr_control0 \|= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
		9349
		9350	/* broadcast write */
		9351	gk20a_writel(g,
		9352	gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
		9353
		9354	return err;
		9355	}
		9356
		9357	int gr_gk20a_wait_for_pause(struct gk20a g, struct warpstate w_state)
		9358	{
		9359	int err = 0;
		9360	struct gr_gk20a *gr = &g->gr;
		9361	u32 gpc, tpc, sm_id;
		9362	u32 global_mask;
		9363
		9364	/* Wait for the SMs to reach full stop. This condition is:
		9365	* 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
		9366	* 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
		9367	* masks.
		9368	*/
		9369	global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() \|
		9370	gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() \|
		9371	gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
		9372
		9373	/* Lock down all SMs */
		9374	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
		9375
		9376	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
		9377	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
		9378
		9379	err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
		9380
		9381	if (err) {
		9382	gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
		9383	return err;
		9384	}
		9385	}
		9386
		9387	/* Read the warp status */
		9388	g->ops.gr.bpt_reg_info(g, w_state);
		9389
		9390	return 0;
		9391	}
		9392
		9393	int gr_gk20a_resume_from_pause(struct gk20a *g)
		9394	{
		9395	int err = 0;
		9396
		9397	/* Clear the pause mask to tell the GPU we want to resume everyone */
		9398	gk20a_writel(g,
		9399	gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
		9400
		9401	/* explicitly re-enable forwarding of SM interrupts upon any resume */
		9402	gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
		9403	gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
		9404
		9405	/* Now resume all sms, write a 0 to the stop trigger
		9406	* then a 1 to the run trigger */
		9407	gk20a_resume_all_sms(g);
		9408
		9409	return err;
		9410	}
		9411
		9412	int gr_gk20a_clear_sm_errors(struct gk20a *g)
		9413	{
		9414	int ret = 0;
		9415	u32 gpc_offset, tpc_offset, gpc, tpc;
		9416	struct gr_gk20a *gr = &g->gr;
		9417	u32 global_esr;
		9418	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
		9419	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
		9420
		9421	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
		9422
		9423	gpc_offset = gpc_stride * gpc;
		9424
		9425	/* check if any tpc has an exception */
		9426	for (tpc = 0; tpc < gr->tpc_count; tpc++) {
		9427
		9428	tpc_offset = tpc_in_gpc_stride * tpc;
		9429
		9430	global_esr = gk20a_readl(g,
		9431	gr_gpc0_tpc0_sm_hww_global_esr_r() +
		9432	gpc_offset + tpc_offset);
		9433
		9434	/* clear the hwws, also causes tpc and gpc
		9435	* exceptions to be cleared */
		9436	gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
		9437	}
		9438	}
		9439
		9440	return ret;
		9441	}
		9442
		9443	u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)
		9444	{
		9445	struct gr_gk20a *gr = &g->gr;
		9446	u32 sm_id, tpc_exception_en = 0;
		9447	u32 offset, regval, tpc_offset, gpc_offset;
		9448	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
		9449	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
		9450
		9451	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
		9452
		9453	tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
		9454	gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
		9455	offset = tpc_offset + gpc_offset;
		9456
		9457	regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
		9458	offset);
		9459	/* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
		9460	tpc_exception_en \|= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
		9461	}
		9462
		9463	return tpc_exception_en;
		9464	}
		9465
9294	void gk20a_init_gr_ops(struct gpu_ops *gops)	9466	void gk20a_init_gr_ops(struct gpu_ops *gops)
9295	{	9467	{
9296	gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;	9468	gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -9376,4 +9548,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9376	gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;	9548	gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
9377	gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;	9549	gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
9378	gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;	9550	gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
		9551	gops->gr.inval_icache = gr_gk20a_inval_icache;
		9552	gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
		9553	gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
		9554	gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
		9555	gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
		9556	gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
9379	}	9557	}


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2dd1eaf5..33721f08 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -704,6 +704,12 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
704	struct dbg_session_gk20a *dbg_s,	704	struct dbg_session_gk20a *dbg_s,
705	int *ctx_resident_ch_fd);	705	int *ctx_resident_ch_fd);
706	void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);	706	void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
		707	int gr_gk20a_inval_icache(struct gk20a g, struct channel_gk20a ch);
		708	int gr_gk20a_trigger_suspend(struct gk20a *g);
		709	int gr_gk20a_wait_for_pause(struct gk20a g, struct warpstate w_state);
		710	int gr_gk20a_resume_from_pause(struct gk20a *g);
		711	int gr_gk20a_clear_sm_errors(struct gk20a *g);
		712	u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
707		713
708	int gr_gk20a_commit_global_timeslice(struct gk20a *g,	714	int gr_gk20a_commit_global_timeslice(struct gk20a *g,
709	struct channel_gk20a *c, bool patch);	715	struct channel_gk20a *c, bool patch);


diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 4f4b8d4a..a43fcdab 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1609,5 +1609,10 @@ void gm20b_init_gr(struct gpu_ops *gops)
1609	gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;	1609	gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
1610	gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;	1610	gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
1611	gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask;	1611	gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask;
1612		1612	gops->gr.inval_icache = gr_gk20a_inval_icache;
		1613	gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
		1614	gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
		1615	gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
		1616	gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
		1617	gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
1613	}	1618	}