gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c

ctrl_gk20a.c had some direct accesses to hardware. These violate the HAL rules, because we don't have per-GPU ctrl, and thus the code cannot be made GPU independent. Move all GR accesses to new GR HALs and use existing bus HAL for accessing timer. Remove #includes of all hardware headers. JIRA NVGPU-28 Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1327001 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2017-03-23 14:03:15 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-03-29 12:15:59 -0400
commit: 4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch)
tree: 4c8240ac83887c21db902a255306c67041c4525c /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
parent: f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff)
1 files changed, 18 insertions, 160 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 97125a99..5661b402 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -24,14 +24,7 @@
 #include <linux/delay.h>
 #include "gk20a.h"
-#include "gr_gk20a.h"
 #include "fence_gk20a.h"
-#include "regops_gk20a.h"
-#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
 #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \
        (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache(
                struct gk20a *g,
                struct nvgpu_gpu_inval_icache_args *args)
 {
-        int err = 0;
-        u32     cache_ctrl, regval;
        struct channel_gk20a *ch;
-        struct nvgpu_dbg_gpu_reg_op ops;
+        int err;
        ch = gk20a_get_channel_from_file(args->channel_fd);
        if (!ch)
                return -EINVAL;
-        ops.op     = REGOP(READ_32);
-        ops.type   = REGOP(TYPE_GR_CTX);
-        ops.status = REGOP(STATUS_SUCCESS);
-        ops.value_hi      = 0;
-        ops.and_n_mask_lo = 0;
-        ops.and_n_mask_hi = 0;
-        ops.offset       = gr_pri_gpc0_gcc_dbg_r();
        /* Take the global lock, since we'll be doing global regops */
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        err = g->ops.gr.inval_icache(g, ch);
-        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
-        regval = ops.value_lo;
-        if (!err) {
-                ops.op = REGOP(WRITE_32);
-                ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
-                err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
-        }
-        if (err) {
-                gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
-                goto end;
-        }
-        cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
-        cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
-        gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
-end:
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
 static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
 {
-        int err = 0;
+        int err;
-        u32 dbgr_control0;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        /* assert stop trigger. uniformity assumption: all SMs will have
+        err = g->ops.gr.trigger_suspend(g);
-         * the same state in dbg_control0. */
-        dbgr_control0 =
-                gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
-        dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
-        /* broadcast write */
-        gk20a_writel(g,
-                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
 {
        int err = 0;
        struct warpstate *w_state;
-        struct gr_gk20a *gr = &g->gr;
+        u32 sm_count, size;
-        u32 gpc, tpc, sm_count, sm_id, size;
-        u32 global_mask;
        sm_count = g->gr.gpc_count * g->gr.tpc_count;
        size = sm_count * sizeof(struct warpstate);
        w_state = kzalloc(size, GFP_KERNEL);
+        if (!w_state)
-    /* Wait for the SMs to reach full stop. This condition is:
+                return -ENOMEM;
-     * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
-     * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
-     *    masks.
-     */
-        global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()   |
-                          gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
-                          gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        g->ops.gr.wait_for_pause(g, w_state);
-        /* Lock down all SMs */
-        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
-                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
-                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
-                err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
-                if (err) {
-                        gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
-                        goto end;
-                }
-        }
-        /* Read the warp status */
-        g->ops.gr.bpt_reg_info(g, w_state);
        /* Copy to user space - pointed by "args->pwarpstate" */
        if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
                err = -EFAULT;
        }
-end:
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        kfree(w_state);
        return err;
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
        int err = 0;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        err = g->ops.gr.resume_from_pause(g);
-        /* Clear the pause mask to tell the GPU we want to resume everyone */
-        gk20a_writel(g,
-                gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
-        /* explicitly re-enable forwarding of SM interrupts upon any resume */
-        gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
-                gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
-        /* Now resume all sms, write a 0 to the stop trigger
-         * then a 1 to the run trigger */
-        gk20a_resume_all_sms(g);
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
 static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
 {
-        int ret = 0;
+        return g->ops.gr.clear_sm_errors(g);
-        u32 gpc_offset, tpc_offset, gpc, tpc;
-        struct gr_gk20a *gr = &g->gr;
-        u32 global_esr;
-        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
-                gpc_offset = gpc_stride * gpc;
-                /* check if any tpc has an exception */
-                for (tpc = 0; tpc < gr->tpc_count; tpc++) {
-                        tpc_offset = tpc_in_gpc_stride * tpc;
-                        global_esr = gk20a_readl(g,
-                                        gr_gpc0_tpc0_sm_hww_global_esr_r() +
-                                        gpc_offset + tpc_offset);
-                        /* clear the hwws, also causes tpc and gpc
-                         * exceptions to be cleared */
-                        gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
-                }
-        }
-        return ret;
 }
 static int nvgpu_gpu_ioctl_has_any_exception(
                struct gk20a *g,
                struct nvgpu_gpu_tpc_exception_en_status_args *args)
 {
-        int err = 0;
+        u32 tpc_exception_en;
-        struct gr_gk20a *gr = &g->gr;
-        u32 sm_id, tpc_exception_en = 0;
-        u32 offset, regval, tpc_offset, gpc_offset;
-        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
-        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
-                tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
-                gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
-                offset = tpc_offset + gpc_offset;
-                regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
-                                                                offset);
-                /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
-                tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
-        }
        nvgpu_mutex_release(&g->dbg_sessions_lock);
        args->tpc_exception_en_sm_mask = tpc_exception_en;
-        return err;
+        return 0;
 }
 static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g,
 {
        int err = 0;
        unsigned int i = 0;
-        u32 gpu_timestamp_hi_new = 0;
-        u32 gpu_timestamp_hi_old = 0;
        if (gk20a_busy(g)) {
                gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g,
                goto end;
        }
-        /* get zipper reads of gpu and cpu counter values */
-        gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
        for (i = 0; i < args->count; i++) {
-                u32 gpu_timestamp_lo = 0;
+                err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
-                u32 gpu_timestamp_hi = 0;
+                if (err)
+                        return err;
-                gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
                args->samples[i].cpu_timestamp = get_cpu_timestamp();
-                rmb(); /* maintain zipper read order */
-                gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
-                /* pick the appropriate gpu counter hi bits */
-                gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
-                        gpu_timestamp_hi_old : gpu_timestamp_hi_new;
-                args->samples[i].gpu_timestamp =
-                        ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
-                gpu_timestamp_hi_old = gpu_timestamp_hi_new;
        }
 end:
author	Terje Bergstrom <tbergstrom@nvidia.com>	2017-03-23 14:03:15 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-03-29 12:15:59 -0400
commit	4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch)
tree	4c8240ac83887c21db902a255306c67041c4525c /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
parent	f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 97125a99..5661b402 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -24,14 +24,7 @@
24	#include <linux/delay.h>	24	#include <linux/delay.h>
25		25
26	#include "gk20a.h"	26	#include "gk20a.h"
27	#include "gr_gk20a.h"
28	#include "fence_gk20a.h"	27	#include "fence_gk20a.h"
29	#include "regops_gk20a.h"
30
31	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
32	#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
33	#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
34
35		28
36	#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \	29	#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \
37	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))	30	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache(
342	struct gk20a *g,	335	struct gk20a *g,
343	struct nvgpu_gpu_inval_icache_args *args)	336	struct nvgpu_gpu_inval_icache_args *args)
344	{	337	{
345
346	int err = 0;
347	u32 cache_ctrl, regval;
348	struct channel_gk20a *ch;	338	struct channel_gk20a *ch;
349	struct nvgpu_dbg_gpu_reg_op ops;	339	int err;
350		340
351	ch = gk20a_get_channel_from_file(args->channel_fd);	341	ch = gk20a_get_channel_from_file(args->channel_fd);
352	if (!ch)	342	if (!ch)
353	return -EINVAL;	343	return -EINVAL;
354		344
355	ops.op = REGOP(READ_32);
356	ops.type = REGOP(TYPE_GR_CTX);
357	ops.status = REGOP(STATUS_SUCCESS);
358	ops.value_hi = 0;
359	ops.and_n_mask_lo = 0;
360	ops.and_n_mask_hi = 0;
361	ops.offset = gr_pri_gpc0_gcc_dbg_r();
362
363	/* Take the global lock, since we'll be doing global regops */	345	/* Take the global lock, since we'll be doing global regops */
364	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	346	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
365		347	err = g->ops.gr.inval_icache(g, ch);
366	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
367
368	regval = ops.value_lo;
369
370	if (!err) {
371	ops.op = REGOP(WRITE_32);
372	ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
373	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
374	}
375
376	if (err) {
377	gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
378	goto end;
379	}
380
381	cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
382	cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
383	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
384
385	end:
386	nvgpu_mutex_release(&g->dbg_sessions_lock);	348	nvgpu_mutex_release(&g->dbg_sessions_lock);
387	return err;	349	return err;
388	}	350	}
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
428		390
429	static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)	391	static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
430	{	392	{
431	int err = 0;	393	int err;
432	u32 dbgr_control0;
433		394
434	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	395	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
435	/* assert stop trigger. uniformity assumption: all SMs will have	396	err = g->ops.gr.trigger_suspend(g);
436	* the same state in dbg_control0. */
437	dbgr_control0 =
438	gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
439	dbgr_control0 \|= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
440
441	/* broadcast write */
442	gk20a_writel(g,
443	gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
444
445	nvgpu_mutex_release(&g->dbg_sessions_lock);	397	nvgpu_mutex_release(&g->dbg_sessions_lock);
446	return err;	398	return err;
447	}	399	}
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
451	{	403	{
452	int err = 0;	404	int err = 0;
453	struct warpstate *w_state;	405	struct warpstate *w_state;
454	struct gr_gk20a *gr = &g->gr;	406	u32 sm_count, size;
455	u32 gpc, tpc, sm_count, sm_id, size;
456	u32 global_mask;
457		407
458	sm_count = g->gr.gpc_count * g->gr.tpc_count;	408	sm_count = g->gr.gpc_count * g->gr.tpc_count;
459	size = sm_count * sizeof(struct warpstate);	409	size = sm_count * sizeof(struct warpstate);
460	w_state = kzalloc(size, GFP_KERNEL);	410	w_state = kzalloc(size, GFP_KERNEL);
461		411	if (!w_state)
462	/* Wait for the SMs to reach full stop. This condition is:	412	return -ENOMEM;
463	* 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
464	* 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
465	* masks.
466	*/
467	global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() \|
468	gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() \|
469	gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
470		413
471	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	414	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
472		415	g->ops.gr.wait_for_pause(g, w_state);
473	/* Lock down all SMs */
474	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
475
476	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
477	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
478
479	err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
480
481	if (err) {
482	gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
483	goto end;
484	}
485	}
486
487	/* Read the warp status */
488	g->ops.gr.bpt_reg_info(g, w_state);
489		416
490	/* Copy to user space - pointed by "args->pwarpstate" */	417	/* Copy to user space - pointed by "args->pwarpstate" */
491	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {	418	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
493	err = -EFAULT;	420	err = -EFAULT;
494	}	421	}
495		422
496	end:
497	nvgpu_mutex_release(&g->dbg_sessions_lock);	423	nvgpu_mutex_release(&g->dbg_sessions_lock);
498	kfree(w_state);	424	kfree(w_state);
499	return err;	425	return err;
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
504	int err = 0;	430	int err = 0;
505		431
506	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	432	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
507		433	err = g->ops.gr.resume_from_pause(g);
508	/* Clear the pause mask to tell the GPU we want to resume everyone */
509	gk20a_writel(g,
510	gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
511
512	/* explicitly re-enable forwarding of SM interrupts upon any resume */
513	gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
514	gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
515
516	/* Now resume all sms, write a 0 to the stop trigger
517	* then a 1 to the run trigger */
518	gk20a_resume_all_sms(g);
519
520	nvgpu_mutex_release(&g->dbg_sessions_lock);	434	nvgpu_mutex_release(&g->dbg_sessions_lock);
521	return err;	435	return err;
522	}	436	}
523		437
524	static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)	438	static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
525	{	439	{
526	int ret = 0;	440	return g->ops.gr.clear_sm_errors(g);
527	u32 gpc_offset, tpc_offset, gpc, tpc;
528	struct gr_gk20a *gr = &g->gr;
529	u32 global_esr;
530	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
531	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
532
533	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
534
535	gpc_offset = gpc_stride * gpc;
536
537	/* check if any tpc has an exception */
538	for (tpc = 0; tpc < gr->tpc_count; tpc++) {
539
540	tpc_offset = tpc_in_gpc_stride * tpc;
541
542	global_esr = gk20a_readl(g,
543	gr_gpc0_tpc0_sm_hww_global_esr_r() +
544	gpc_offset + tpc_offset);
545
546	/* clear the hwws, also causes tpc and gpc
547	* exceptions to be cleared */
548	gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
549	}
550	}
551
552	return ret;
553	}	441	}
554		442
555	static int nvgpu_gpu_ioctl_has_any_exception(	443	static int nvgpu_gpu_ioctl_has_any_exception(
556	struct gk20a *g,	444	struct gk20a *g,
557	struct nvgpu_gpu_tpc_exception_en_status_args *args)	445	struct nvgpu_gpu_tpc_exception_en_status_args *args)
558	{	446	{
559	int err = 0;	447	u32 tpc_exception_en;
560	struct gr_gk20a *gr = &g->gr;
561	u32 sm_id, tpc_exception_en = 0;
562	u32 offset, regval, tpc_offset, gpc_offset;
563	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
564	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
565		448
566	nvgpu_mutex_acquire(&g->dbg_sessions_lock);	449	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
567		450	tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
568	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
569
570	tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
571	gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
572	offset = tpc_offset + gpc_offset;
573
574	regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
575	offset);
576	/* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
577	tpc_exception_en \|= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
578	}
579
580	nvgpu_mutex_release(&g->dbg_sessions_lock);	451	nvgpu_mutex_release(&g->dbg_sessions_lock);
		452
581	args->tpc_exception_en_sm_mask = tpc_exception_en;	453	args->tpc_exception_en_sm_mask = tpc_exception_en;
582	return err;	454
		455	return 0;
583	}	456	}
584		457
585	static int gk20a_ctrl_get_num_vsms(struct gk20a *g,	458	static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g,
648	{	521	{
649	int err = 0;	522	int err = 0;
650	unsigned int i = 0;	523	unsigned int i = 0;
651	u32 gpu_timestamp_hi_new = 0;
652	u32 gpu_timestamp_hi_old = 0;
653		524
654	if (gk20a_busy(g)) {	525	if (gk20a_busy(g)) {
655	gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");	526	gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g,
657	goto end;	528	goto end;
658	}	529	}
659		530
660	/* get zipper reads of gpu and cpu counter values */
661	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
662	for (i = 0; i < args->count; i++) {	531	for (i = 0; i < args->count; i++) {
663	u32 gpu_timestamp_lo = 0;	532	err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
664	u32 gpu_timestamp_hi = 0;	533	if (err)
		534	return err;
665		535
666	gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
667	args->samples[i].cpu_timestamp = get_cpu_timestamp();	536	args->samples[i].cpu_timestamp = get_cpu_timestamp();
668	rmb(); /* maintain zipper read order */
669	gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
670
671	/* pick the appropriate gpu counter hi bits */
672	gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
673	gpu_timestamp_hi_old : gpu_timestamp_hi_new;
674
675	args->samples[i].gpu_timestamp =
676	((u64)gpu_timestamp_hi << 32) \| (u64)gpu_timestamp_lo;
677
678	gpu_timestamp_hi_old = gpu_timestamp_hi_new;
679	}	537	}
680		538
681	end:	539	end: