summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c206
1 files changed, 206 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
new file mode 100644
index 00000000..2eb45a88
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
@@ -0,0 +1,206 @@
1/*
2 * GV11B Cycle stats snapshots support
3 *
4 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <linux/dma-mapping.h>
26#include <linux/dma-buf.h>
27
28#include <nvgpu/bitops.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/lock.h>
31#include <nvgpu/dma.h>
32#include <nvgpu/mm.h>
33
34#include "gk20a/gk20a.h"
35#include "gk20a/css_gr_gk20a.h"
36#include "css_gr_gv11b.h"
37
38#include <nvgpu/log.h>
39#include <nvgpu/bug.h>
40
41#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
42#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
43
44
45/* reports whether the hw queue overflowed */
46static inline bool css_hw_get_overflow_status(struct gk20a *g)
47{
48 const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
49 return st == (gk20a_readl(g, perf_pmasys_control_r()) & st);
50}
51
52/* returns how many pending snapshot entries are pending */
53static inline u32 css_hw_get_pending_snapshots(struct gk20a *g)
54{
55 return gk20a_readl(g, perf_pmasys_mem_bytes_r()) /
56 sizeof(struct gk20a_cs_snapshot_fifo_entry);
57}
58
59/* informs hw how many snapshots have been processed (frees up fifo space) */
60static inline void gv11b_css_hw_set_handled_snapshots(struct gk20a *g, u32 done)
61{
62 if (done > 0) {
63 gk20a_writel(g, perf_pmasys_mem_bump_r(),
64 done * sizeof(struct gk20a_cs_snapshot_fifo_entry));
65 }
66}
67
68/* disable streaming to memory */
69static void gv11b_css_hw_reset_streaming(struct gk20a *g)
70{
71 u32 engine_status;
72
73 /* reset the perfmon */
74 g->ops.mc.reset(g, mc_enable_perfmon_enabled_f());
75
76 /* RBUFEMPTY must be set -- otherwise we'll pick up */
77 /* snapshot that have been queued up from earlier */
78 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
79
80 /* turn off writes */
81 gk20a_writel(g, perf_pmasys_control_r(),
82 perf_pmasys_control_membuf_clear_status_doit_f());
83
84 /* pointing all pending snapshots as handled */
85 gv11b_css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g));
86}
87
88int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
89 struct gk20a_cs_snapshot_client *cs_client)
90{
91 struct gk20a *g = ch->g;
92 struct gr_gk20a *gr = &g->gr;
93 struct gk20a_cs_snapshot *data = gr->cs_data;
94 u32 snapshot_size = cs_client->snapshot_size;
95 int ret;
96
97 u32 virt_addr_lo;
98 u32 virt_addr_hi;
99 u32 inst_pa_page;
100
101 if (data->hw_snapshot)
102 return 0;
103
104 if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
105 snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;
106
107 ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size,
108 &data->hw_memdesc);
109 if (ret)
110 return ret;
111
112 /* perf output buffer may not cross a 4GB boundary - with a separate */
113 /* va smaller than that, it won't but check anyway */
114 if (!data->hw_memdesc.cpu_va ||
115 data->hw_memdesc.size < snapshot_size ||
116 data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) {
117 ret = -EFAULT;
118 goto failed_allocation;
119 }
120
121 data->hw_snapshot =
122 (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va;
123 data->hw_end = data->hw_snapshot +
124 snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
125 data->hw_get = data->hw_snapshot;
126 memset(data->hw_snapshot, 0xff, snapshot_size);
127
128 virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va);
129 virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va);
130
131 gv11b_css_hw_reset_streaming(g);
132
133 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
134 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
135 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
136 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
137
138 /* this field is aligned to 4K */
139 inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
140
141 gk20a_writel(g, perf_pmasys_mem_block_r(),
142 perf_pmasys_mem_block_base_f(inst_pa_page) |
143 perf_pmasys_mem_block_valid_true_f() |
144 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
145 perf_pmasys_mem_block_target_sys_ncoh_f(),
146 perf_pmasys_mem_block_target_lfb_f()));
147
148
149 gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n");
150
151 return 0;
152
153failed_allocation:
154 if (data->hw_memdesc.size) {
155 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
156 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
157 }
158 data->hw_snapshot = NULL;
159
160 return ret;
161}
162
163void gv11b_css_hw_disable_snapshot(struct gr_gk20a *gr)
164{
165 struct gk20a *g = gr->g;
166 struct gk20a_cs_snapshot *data = gr->cs_data;
167
168 if (!data->hw_snapshot)
169 return;
170
171 gv11b_css_hw_reset_streaming(g);
172
173 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
174 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
175 perf_pmasys_outbaseupper_ptr_f(0));
176 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
177
178 gk20a_writel(g, perf_pmasys_mem_block_r(),
179 perf_pmasys_mem_block_base_f(0) |
180 perf_pmasys_mem_block_valid_false_f() |
181 perf_pmasys_mem_block_target_f(0));
182
183 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
184 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
185 data->hw_snapshot = NULL;
186
187 gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n");
188}
189
190int gv11b_css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
191 bool *hw_overflow)
192{
193 struct gk20a *g = ch->g;
194 struct gr_gk20a *gr = &g->gr;
195 struct gk20a_cs_snapshot *css = gr->cs_data;
196
197 if (!css->hw_snapshot)
198 return -EINVAL;
199
200 *pending = css_hw_get_pending_snapshots(g);
201 if (!*pending)
202 return 0;
203
204 *hw_overflow = css_hw_get_overflow_status(g);
205 return 0;
206}