summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-02-05 13:05:56 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:05:22 -0400
commit24ddf71b9009291b829e6c30eb1b22e8838f7367 (patch)
tree0a2816a1f813c17c32c51390f1d9311f7e95cc4d
parent5b6e8995b2a3d399a8cc7fd249301122053666e1 (diff)
gpu: nvgpu: Use busy looping on memory ops
Use busy looping on L2 and TLB maintenance operations. This speeds them up by an order of magnitude. Add also trace points to measure performance for memory ops and interrupt processing. Change-Id: Ic4a8525d3d946b2b8f57b4b8ddcfc61605619399 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/681640
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/mc_gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c31
-rw-r--r--include/trace/events/gk20a.h67
4 files changed, 103 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 9e19fa53..75775d57 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -26,6 +26,7 @@
26#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
27#include <linux/firmware.h> 27#include <linux/firmware.h>
28#include <linux/nvhost.h> 28#include <linux/nvhost.h>
29#include <trace/events/gk20a.h>
29 30
30#include "gk20a.h" 31#include "gk20a.h"
31#include "kind_gk20a.h" 32#include "kind_gk20a.h"
@@ -4998,6 +4999,8 @@ static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
4998{ 4999{
4999 gk20a_dbg_fn(""); 5000 gk20a_dbg_fn("");
5000 5001
5002 trace_gr_gk20a_handle_sw_method(g->dev->name);
5003
5001 if (class_num == KEPLER_COMPUTE_A) { 5004 if (class_num == KEPLER_COMPUTE_A) {
5002 switch (offset << 2) { 5005 switch (offset << 2) {
5003 case NVA0C0_SET_SHADER_EXCEPTIONS: 5006 case NVA0C0_SET_SHADER_EXCEPTIONS:
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
index 9f9e756b..86fea3a1 100644
--- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
@@ -14,6 +14,7 @@
14 */ 14 */
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <trace/events/gk20a.h>
17 18
18#include "gk20a.h" 19#include "gk20a.h"
19#include "mc_gk20a.h" 20#include "mc_gk20a.h"
@@ -23,6 +24,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
23{ 24{
24 u32 mc_intr_0; 25 u32 mc_intr_0;
25 26
27 trace_mc_gk20a_intr_stall(g->dev->name);
28
26 if (!g->power_on) 29 if (!g->power_on)
27 return IRQ_NONE; 30 return IRQ_NONE;
28 31
@@ -37,6 +40,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
37 /* flush previous write */ 40 /* flush previous write */
38 gk20a_readl(g, mc_intr_en_0_r()); 41 gk20a_readl(g, mc_intr_en_0_r());
39 42
43 trace_mc_gk20a_intr_stall_done(g->dev->name);
44
40 return IRQ_WAKE_THREAD; 45 return IRQ_WAKE_THREAD;
41} 46}
42 47
@@ -67,6 +72,8 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
67 72
68 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); 73 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
69 74
75 trace_mc_gk20a_intr_thread_stall(g->dev->name);
76
70 mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); 77 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
71 78
72 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); 79 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
@@ -92,6 +99,8 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
92 /* flush previous write */ 99 /* flush previous write */
93 gk20a_readl(g, mc_intr_en_0_r()); 100 gk20a_readl(g, mc_intr_en_0_r());
94 101
102 trace_mc_gk20a_intr_thread_stall_done(g->dev->name);
103
95 return IRQ_HANDLED; 104 return IRQ_HANDLED;
96} 105}
97 106
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 2874567c..798b6405 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -27,6 +27,7 @@
27#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
28#include <linux/dma-buf.h> 28#include <linux/dma-buf.h>
29#include <uapi/linux/nvgpu.h> 29#include <uapi/linux/nvgpu.h>
30#include <trace/events/gk20a.h>
30 31
31#include "gk20a.h" 32#include "gk20a.h"
32#include "mm_gk20a.h" 33#include "mm_gk20a.h"
@@ -2816,6 +2817,9 @@ int gk20a_mm_fb_flush(struct gk20a *g)
2816 /* Make sure all previous writes are committed to the L2. There's no 2817 /* Make sure all previous writes are committed to the L2. There's no
2817 guarantee that writes are to DRAM. This will be a sysmembar internal 2818 guarantee that writes are to DRAM. This will be a sysmembar internal
2818 to the L2. */ 2819 to the L2. */
2820
2821 trace_gk20a_mm_fb_flush(g->dev->name);
2822
2819 gk20a_writel(g, flush_fb_flush_r(), 2823 gk20a_writel(g, flush_fb_flush_r(),
2820 flush_fb_flush_pending_busy_f()); 2824 flush_fb_flush_pending_busy_f());
2821 2825
@@ -2828,7 +2832,7 @@ int gk20a_mm_fb_flush(struct gk20a *g)
2828 flush_fb_flush_pending_busy_v()) { 2832 flush_fb_flush_pending_busy_v()) {
2829 gk20a_dbg_info("fb_flush 0x%x", data); 2833 gk20a_dbg_info("fb_flush 0x%x", data);
2830 retry--; 2834 retry--;
2831 usleep_range(20, 40); 2835 udelay(5);
2832 } else 2836 } else
2833 break; 2837 break;
2834 } while (retry >= 0 || !tegra_platform_is_silicon()); 2838 } while (retry >= 0 || !tegra_platform_is_silicon());
@@ -2839,6 +2843,8 @@ int gk20a_mm_fb_flush(struct gk20a *g)
2839 ret = -EBUSY; 2843 ret = -EBUSY;
2840 } 2844 }
2841 2845
2846 trace_gk20a_mm_fb_flush_done(g->dev->name);
2847
2842 mutex_unlock(&mm->l2_op_lock); 2848 mutex_unlock(&mm->l2_op_lock);
2843 2849
2844 return ret; 2850 return ret;
@@ -2849,6 +2855,8 @@ static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2849 u32 data; 2855 u32 data;
2850 s32 retry = 200; 2856 s32 retry = 200;
2851 2857
2858 trace_gk20a_mm_l2_invalidate(g->dev->name);
2859
2852 /* Invalidate any clean lines from the L2 so subsequent reads go to 2860 /* Invalidate any clean lines from the L2 so subsequent reads go to
2853 DRAM. Dirty lines are not affected by this operation. */ 2861 DRAM. Dirty lines are not affected by this operation. */
2854 gk20a_writel(g, flush_l2_system_invalidate_r(), 2862 gk20a_writel(g, flush_l2_system_invalidate_r(),
@@ -2864,7 +2872,7 @@ static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2864 gk20a_dbg_info("l2_system_invalidate 0x%x", 2872 gk20a_dbg_info("l2_system_invalidate 0x%x",
2865 data); 2873 data);
2866 retry--; 2874 retry--;
2867 usleep_range(20, 40); 2875 udelay(5);
2868 } else 2876 } else
2869 break; 2877 break;
2870 } while (retry >= 0 || !tegra_platform_is_silicon()); 2878 } while (retry >= 0 || !tegra_platform_is_silicon());
@@ -2872,6 +2880,8 @@ static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2872 if (retry < 0) 2880 if (retry < 0)
2873 gk20a_warn(dev_from_gk20a(g), 2881 gk20a_warn(dev_from_gk20a(g),
2874 "l2_system_invalidate too many retries"); 2882 "l2_system_invalidate too many retries");
2883
2884 trace_gk20a_mm_l2_invalidate_done(g->dev->name);
2875} 2885}
2876 2886
2877void gk20a_mm_l2_invalidate(struct gk20a *g) 2887void gk20a_mm_l2_invalidate(struct gk20a *g)
@@ -2900,6 +2910,8 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2900 2910
2901 mutex_lock(&mm->l2_op_lock); 2911 mutex_lock(&mm->l2_op_lock);
2902 2912
2913 trace_gk20a_mm_l2_flush(g->dev->name);
2914
2903 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 2915 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2904 as clean, so subsequent reads might hit in the L2. */ 2916 as clean, so subsequent reads might hit in the L2. */
2905 gk20a_writel(g, flush_l2_flush_dirty_r(), 2917 gk20a_writel(g, flush_l2_flush_dirty_r(),
@@ -2914,7 +2926,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2914 flush_l2_flush_dirty_pending_busy_v()) { 2926 flush_l2_flush_dirty_pending_busy_v()) {
2915 gk20a_dbg_info("l2_flush_dirty 0x%x", data); 2927 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
2916 retry--; 2928 retry--;
2917 usleep_range(20, 40); 2929 udelay(5);
2918 } else 2930 } else
2919 break; 2931 break;
2920 } while (retry >= 0 || !tegra_platform_is_silicon()); 2932 } while (retry >= 0 || !tegra_platform_is_silicon());
@@ -2923,6 +2935,8 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2923 gk20a_warn(dev_from_gk20a(g), 2935 gk20a_warn(dev_from_gk20a(g),
2924 "l2_flush_dirty too many retries"); 2936 "l2_flush_dirty too many retries");
2925 2937
2938 trace_gk20a_mm_l2_flush_done(g->dev->name);
2939
2926 if (invalidate) 2940 if (invalidate)
2927 gk20a_mm_l2_invalidate_locked(g); 2941 gk20a_mm_l2_invalidate_locked(g);
2928 2942
@@ -2964,7 +2978,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2964 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g, 2978 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g,
2965 vm->pdes.sgt->sgl) >> 12); 2979 vm->pdes.sgt->sgl) >> 12);
2966 u32 data; 2980 u32 data;
2967 s32 retry = 200; 2981 s32 retry = 2000;
2968 static DEFINE_MUTEX(tlb_lock); 2982 static DEFINE_MUTEX(tlb_lock);
2969 2983
2970 gk20a_dbg_fn(""); 2984 gk20a_dbg_fn("");
@@ -2986,11 +3000,14 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2986 } 3000 }
2987 3001
2988 mutex_lock(&tlb_lock); 3002 mutex_lock(&tlb_lock);
3003
3004 trace_gk20a_mm_tlb_invalidate(g->dev->name);
3005
2989 do { 3006 do {
2990 data = gk20a_readl(g, fb_mmu_ctrl_r()); 3007 data = gk20a_readl(g, fb_mmu_ctrl_r());
2991 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) 3008 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2992 break; 3009 break;
2993 usleep_range(20, 40); 3010 udelay(2);
2994 retry--; 3011 retry--;
2995 } while (retry >= 0 || !tegra_platform_is_silicon()); 3012 } while (retry >= 0 || !tegra_platform_is_silicon());
2996 3013
@@ -3014,13 +3031,15 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
3014 fb_mmu_ctrl_pri_fifo_empty_false_f()) 3031 fb_mmu_ctrl_pri_fifo_empty_false_f())
3015 break; 3032 break;
3016 retry--; 3033 retry--;
3017 usleep_range(20, 40); 3034 udelay(2);
3018 } while (retry >= 0 || !tegra_platform_is_silicon()); 3035 } while (retry >= 0 || !tegra_platform_is_silicon());
3019 3036
3020 if (retry < 0) 3037 if (retry < 0)
3021 gk20a_warn(dev_from_gk20a(g), 3038 gk20a_warn(dev_from_gk20a(g),
3022 "mmu invalidate too many retries"); 3039 "mmu invalidate too many retries");
3023 3040
3041 trace_gk20a_mm_tlb_invalidate_done(g->dev->name);
3042
3024out: 3043out:
3025 mutex_unlock(&tlb_lock); 3044 mutex_unlock(&tlb_lock);
3026 vm->tlb_dirty = false; 3045 vm->tlb_dirty = false;
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index 32e3100a..096b0559 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * gk20a event logging to ftrace. 2 * gk20a event logging to ftrace.
3 * 3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -65,6 +65,71 @@ DEFINE_EVENT(gk20a, gk20a_gpfifo_submit_wait_for_space_done,
65 TP_ARGS(name) 65 TP_ARGS(name)
66); 66);
67 67
68DEFINE_EVENT(gk20a, gk20a_mm_l2_invalidate,
69 TP_PROTO(const char *name),
70 TP_ARGS(name)
71);
72
73DEFINE_EVENT(gk20a, gk20a_mm_l2_invalidate_done,
74 TP_PROTO(const char *name),
75 TP_ARGS(name)
76);
77
78DEFINE_EVENT(gk20a, gk20a_mm_l2_flush,
79 TP_PROTO(const char *name),
80 TP_ARGS(name)
81);
82
83DEFINE_EVENT(gk20a, gk20a_mm_l2_flush_done,
84 TP_PROTO(const char *name),
85 TP_ARGS(name)
86);
87
88DEFINE_EVENT(gk20a, gk20a_mm_tlb_invalidate,
89 TP_PROTO(const char *name),
90 TP_ARGS(name)
91);
92
93DEFINE_EVENT(gk20a, gk20a_mm_tlb_invalidate_done,
94 TP_PROTO(const char *name),
95 TP_ARGS(name)
96);
97
98DEFINE_EVENT(gk20a, gk20a_mm_fb_flush,
99 TP_PROTO(const char *name),
100 TP_ARGS(name)
101);
102
103DEFINE_EVENT(gk20a, gk20a_mm_fb_flush_done,
104 TP_PROTO(const char *name),
105 TP_ARGS(name)
106);
107
108DEFINE_EVENT(gk20a, mc_gk20a_intr_thread_stall,
109 TP_PROTO(const char *name),
110 TP_ARGS(name)
111);
112
113DEFINE_EVENT(gk20a, mc_gk20a_intr_thread_stall_done,
114 TP_PROTO(const char *name),
115 TP_ARGS(name)
116);
117
118DEFINE_EVENT(gk20a, mc_gk20a_intr_stall,
119 TP_PROTO(const char *name),
120 TP_ARGS(name)
121);
122
123DEFINE_EVENT(gk20a, mc_gk20a_intr_stall_done,
124 TP_PROTO(const char *name),
125 TP_ARGS(name)
126);
127
128DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method,
129 TP_PROTO(const char *name),
130 TP_ARGS(name)
131);
132
68TRACE_EVENT(gk20a_channel_update, 133TRACE_EVENT(gk20a_channel_update,
69 TP_PROTO(const void *channel), 134 TP_PROTO(const void *channel),
70 TP_ARGS(channel), 135 TP_ARGS(channel),