summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2017-06-22 19:28:19 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-20 22:03:56 -0400
commitcf70c925cd3d8e6c83feee04316b080d5f258afc (patch)
tree54c88ee84cff31561accbd72728bfc2a56f7a510 /drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
parented8ac6e005d95e051bd03a182bbe0aa09a3c2266 (diff)
gpu: nvgpu: gv11b: update css ops
Updated following hal functions for css gv11b and reused them for gv100: enable_snapshot disable_snapshot check_data_available These changes are needed because of following reasons: 1. Register offsets for perf_pmasys_* are changed for gv11b/gv100 from gk20a. 2. Updated memory type for perf_pmasys_mem_block_target based on memory aperture used for hwpm inst_block. Bug 200327596 Change-Id: I500d17670e2f389d8d0e77884374bcc3504a41f8 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1507546 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c206
1 files changed, 206 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
new file mode 100644
index 00000000..6afd92fa
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
@@ -0,0 +1,206 @@
1/*
2 * GV11B Cycle stats snapshots support
3 *
4 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <linux/dma-mapping.h>
26#include <linux/dma-buf.h>
27
28#include <nvgpu/bitops.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/lock.h>
31#include <nvgpu/dma.h>
32
33#include "gk20a/gk20a.h"
34#include "gk20a/css_gr_gk20a.h"
35#include "css_gr_gv11b.h"
36
37#include <nvgpu/log.h>
38#include <nvgpu/bug.h>
39
40#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
41#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
42
43
44/* reports whether the hw queue overflowed */
45static inline bool css_hw_get_overflow_status(struct gk20a *g)
46{
47 const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
48 return st == (gk20a_readl(g, perf_pmasys_control_r()) & st);
49}
50
51/* returns how many pending snapshot entries are pending */
52static inline u32 css_hw_get_pending_snapshots(struct gk20a *g)
53{
54 return gk20a_readl(g, perf_pmasys_mem_bytes_r()) /
55 sizeof(struct gk20a_cs_snapshot_fifo_entry);
56}
57
58/* informs hw how many snapshots have been processed (frees up fifo space) */
59static inline void gv11b_css_hw_set_handled_snapshots(struct gk20a *g, u32 done)
60{
61 if (done > 0) {
62 gk20a_writel(g, perf_pmasys_mem_bump_r(),
63 done * sizeof(struct gk20a_cs_snapshot_fifo_entry));
64 }
65}
66
67/* disable streaming to memory */
68static void gv11b_css_hw_reset_streaming(struct gk20a *g)
69{
70 u32 engine_status;
71
72 /* reset the perfmon */
73 g->ops.mc.reset(g, mc_enable_perfmon_enabled_f());
74
75 /* RBUFEMPTY must be set -- otherwise we'll pick up */
76 /* snapshot that have been queued up from earlier */
77 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
78
79 /* turn off writes */
80 gk20a_writel(g, perf_pmasys_control_r(),
81 perf_pmasys_control_membuf_clear_status_doit_f());
82
83 /* pointing all pending snapshots as handled */
84 gv11b_css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g));
85}
86
87int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
88 struct gk20a_cs_snapshot_client *cs_client)
89{
90 struct gk20a *g = ch->g;
91 struct gr_gk20a *gr = &g->gr;
92 struct gk20a_cs_snapshot *data = gr->cs_data;
93 u32 snapshot_size = cs_client->snapshot_size;
94 int ret;
95
96 u32 virt_addr_lo;
97 u32 virt_addr_hi;
98 u32 inst_pa_page;
99
100 if (data->hw_snapshot)
101 return 0;
102
103 if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
104 snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;
105
106 ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size,
107 &data->hw_memdesc);
108 if (ret)
109 return ret;
110
111 /* perf output buffer may not cross a 4GB boundary - with a separate */
112 /* va smaller than that, it won't but check anyway */
113 if (!data->hw_memdesc.cpu_va ||
114 data->hw_memdesc.size < snapshot_size ||
115 data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) {
116 ret = -EFAULT;
117 goto failed_allocation;
118 }
119
120 data->hw_snapshot =
121 (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va;
122 data->hw_end = data->hw_snapshot +
123 snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
124 data->hw_get = data->hw_snapshot;
125 memset(data->hw_snapshot, 0xff, snapshot_size);
126
127 virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va);
128 virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va);
129
130 gv11b_css_hw_reset_streaming(g);
131
132 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
133 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
134 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
135 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
136
137 /* this field is aligned to 4K */
138 inst_pa_page = gk20a_mm_inst_block_addr(g,
139 &g->mm.hwpm.inst_block) >> 12;
140
141 gk20a_writel(g, perf_pmasys_mem_block_r(),
142 perf_pmasys_mem_block_base_f(inst_pa_page) |
143 perf_pmasys_mem_block_valid_true_f() |
144 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
145 perf_pmasys_mem_block_target_sys_ncoh_f(),
146 perf_pmasys_mem_block_target_lfb_f()));
147
148
149 gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n");
150
151 return 0;
152
153failed_allocation:
154 if (data->hw_memdesc.size) {
155 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
156 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
157 }
158 data->hw_snapshot = NULL;
159
160 return ret;
161}
162
163void gv11b_css_hw_disable_snapshot(struct gr_gk20a *gr)
164{
165 struct gk20a *g = gr->g;
166 struct gk20a_cs_snapshot *data = gr->cs_data;
167
168 if (!data->hw_snapshot)
169 return;
170
171 gv11b_css_hw_reset_streaming(g);
172
173 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
174 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
175 perf_pmasys_outbaseupper_ptr_f(0));
176 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
177
178 gk20a_writel(g, perf_pmasys_mem_block_r(),
179 perf_pmasys_mem_block_base_f(0) |
180 perf_pmasys_mem_block_valid_false_f() |
181 perf_pmasys_mem_block_target_f(0));
182
183 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
184 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
185 data->hw_snapshot = NULL;
186
187 gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n");
188}
189
190int gv11b_css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
191 bool *hw_overflow)
192{
193 struct gk20a *g = ch->g;
194 struct gr_gk20a *gr = &g->gr;
195 struct gk20a_cs_snapshot *css = gr->cs_data;
196
197 if (!css->hw_snapshot)
198 return -EINVAL;
199
200 *pending = css_hw_get_pending_snapshots(g);
201 if (!*pending)
202 return 0;
203
204 *hw_overflow = css_hw_get_overflow_status(g);
205 return 0;
206}