diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c new file mode 100644 index 00000000..2eb45a88 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * GV11B Cycle stats snapshots support | ||
3 | * | ||
4 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/dma-buf.h> | ||
27 | |||
28 | #include <nvgpu/bitops.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/lock.h> | ||
31 | #include <nvgpu/dma.h> | ||
32 | #include <nvgpu/mm.h> | ||
33 | |||
34 | #include "gk20a/gk20a.h" | ||
35 | #include "gk20a/css_gr_gk20a.h" | ||
36 | #include "css_gr_gv11b.h" | ||
37 | |||
38 | #include <nvgpu/log.h> | ||
39 | #include <nvgpu/bug.h> | ||
40 | |||
41 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> | ||
42 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> | ||
43 | |||
44 | |||
45 | /* reports whether the hw queue overflowed */ | ||
46 | static inline bool css_hw_get_overflow_status(struct gk20a *g) | ||
47 | { | ||
48 | const u32 st = perf_pmasys_control_membuf_status_overflowed_f(); | ||
49 | return st == (gk20a_readl(g, perf_pmasys_control_r()) & st); | ||
50 | } | ||
51 | |||
52 | /* returns how many pending snapshot entries are pending */ | ||
53 | static inline u32 css_hw_get_pending_snapshots(struct gk20a *g) | ||
54 | { | ||
55 | return gk20a_readl(g, perf_pmasys_mem_bytes_r()) / | ||
56 | sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
57 | } | ||
58 | |||
59 | /* informs hw how many snapshots have been processed (frees up fifo space) */ | ||
60 | static inline void gv11b_css_hw_set_handled_snapshots(struct gk20a *g, u32 done) | ||
61 | { | ||
62 | if (done > 0) { | ||
63 | gk20a_writel(g, perf_pmasys_mem_bump_r(), | ||
64 | done * sizeof(struct gk20a_cs_snapshot_fifo_entry)); | ||
65 | } | ||
66 | } | ||
67 | |||
68 | /* disable streaming to memory */ | ||
69 | static void gv11b_css_hw_reset_streaming(struct gk20a *g) | ||
70 | { | ||
71 | u32 engine_status; | ||
72 | |||
73 | /* reset the perfmon */ | ||
74 | g->ops.mc.reset(g, mc_enable_perfmon_enabled_f()); | ||
75 | |||
76 | /* RBUFEMPTY must be set -- otherwise we'll pick up */ | ||
77 | /* snapshot that have been queued up from earlier */ | ||
78 | engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); | ||
79 | |||
80 | /* turn off writes */ | ||
81 | gk20a_writel(g, perf_pmasys_control_r(), | ||
82 | perf_pmasys_control_membuf_clear_status_doit_f()); | ||
83 | |||
84 | /* pointing all pending snapshots as handled */ | ||
85 | gv11b_css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g)); | ||
86 | } | ||
87 | |||
88 | int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch, | ||
89 | struct gk20a_cs_snapshot_client *cs_client) | ||
90 | { | ||
91 | struct gk20a *g = ch->g; | ||
92 | struct gr_gk20a *gr = &g->gr; | ||
93 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
94 | u32 snapshot_size = cs_client->snapshot_size; | ||
95 | int ret; | ||
96 | |||
97 | u32 virt_addr_lo; | ||
98 | u32 virt_addr_hi; | ||
99 | u32 inst_pa_page; | ||
100 | |||
101 | if (data->hw_snapshot) | ||
102 | return 0; | ||
103 | |||
104 | if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE) | ||
105 | snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE; | ||
106 | |||
107 | ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size, | ||
108 | &data->hw_memdesc); | ||
109 | if (ret) | ||
110 | return ret; | ||
111 | |||
112 | /* perf output buffer may not cross a 4GB boundary - with a separate */ | ||
113 | /* va smaller than that, it won't but check anyway */ | ||
114 | if (!data->hw_memdesc.cpu_va || | ||
115 | data->hw_memdesc.size < snapshot_size || | ||
116 | data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) { | ||
117 | ret = -EFAULT; | ||
118 | goto failed_allocation; | ||
119 | } | ||
120 | |||
121 | data->hw_snapshot = | ||
122 | (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va; | ||
123 | data->hw_end = data->hw_snapshot + | ||
124 | snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
125 | data->hw_get = data->hw_snapshot; | ||
126 | memset(data->hw_snapshot, 0xff, snapshot_size); | ||
127 | |||
128 | virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va); | ||
129 | virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va); | ||
130 | |||
131 | gv11b_css_hw_reset_streaming(g); | ||
132 | |||
133 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
134 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
135 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
136 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); | ||
137 | |||
138 | /* this field is aligned to 4K */ | ||
139 | inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; | ||
140 | |||
141 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
142 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
143 | perf_pmasys_mem_block_valid_true_f() | | ||
144 | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, | ||
145 | perf_pmasys_mem_block_target_sys_ncoh_f(), | ||
146 | perf_pmasys_mem_block_target_lfb_f())); | ||
147 | |||
148 | |||
149 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n"); | ||
150 | |||
151 | return 0; | ||
152 | |||
153 | failed_allocation: | ||
154 | if (data->hw_memdesc.size) { | ||
155 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
156 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
157 | } | ||
158 | data->hw_snapshot = NULL; | ||
159 | |||
160 | return ret; | ||
161 | } | ||
162 | |||
163 | void gv11b_css_hw_disable_snapshot(struct gr_gk20a *gr) | ||
164 | { | ||
165 | struct gk20a *g = gr->g; | ||
166 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
167 | |||
168 | if (!data->hw_snapshot) | ||
169 | return; | ||
170 | |||
171 | gv11b_css_hw_reset_streaming(g); | ||
172 | |||
173 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
174 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
175 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
176 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
177 | |||
178 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
179 | perf_pmasys_mem_block_base_f(0) | | ||
180 | perf_pmasys_mem_block_valid_false_f() | | ||
181 | perf_pmasys_mem_block_target_f(0)); | ||
182 | |||
183 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
184 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
185 | data->hw_snapshot = NULL; | ||
186 | |||
187 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n"); | ||
188 | } | ||
189 | |||
190 | int gv11b_css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
191 | bool *hw_overflow) | ||
192 | { | ||
193 | struct gk20a *g = ch->g; | ||
194 | struct gr_gk20a *gr = &g->gr; | ||
195 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
196 | |||
197 | if (!css->hw_snapshot) | ||
198 | return -EINVAL; | ||
199 | |||
200 | *pending = css_hw_get_pending_snapshots(g); | ||
201 | if (!*pending) | ||
202 | return 0; | ||
203 | |||
204 | *hw_overflow = css_hw_get_overflow_status(g); | ||
205 | return 0; | ||
206 | } | ||