aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux')
-rw-r--r--include/os/linux/cde.c1794
-rw-r--r--include/os/linux/cde.h326
-rw-r--r--include/os/linux/cde_gm20b.c59
-rw-r--r--include/os/linux/cde_gm20b.h33
-rw-r--r--include/os/linux/cde_gp10b.c153
-rw-r--r--include/os/linux/cde_gp10b.h40
-rw-r--r--include/os/linux/channel.h102
-rw-r--r--include/os/linux/clk.c286
-rw-r--r--include/os/linux/clk.h22
-rw-r--r--include/os/linux/comptags.c140
-rw-r--r--include/os/linux/cond.c73
-rw-r--r--include/os/linux/ctxsw_trace.c792
-rw-r--r--include/os/linux/ctxsw_trace.h39
-rw-r--r--include/os/linux/debug.c457
-rw-r--r--include/os/linux/debug_allocator.c69
-rw-r--r--include/os/linux/debug_allocator.h21
-rw-r--r--include/os/linux/debug_bios.c60
-rw-r--r--include/os/linux/debug_bios.h21
-rw-r--r--include/os/linux/debug_cde.c53
-rw-r--r--include/os/linux/debug_cde.h21
-rw-r--r--include/os/linux/debug_ce.c30
-rw-r--r--include/os/linux/debug_ce.h21
-rw-r--r--include/os/linux/debug_clk_gm20b.c280
-rw-r--r--include/os/linux/debug_clk_gm20b.h29
-rw-r--r--include/os/linux/debug_clk_gp106.c193
-rw-r--r--include/os/linux/debug_clk_gp106.h29
-rw-r--r--include/os/linux/debug_clk_gv100.c193
-rw-r--r--include/os/linux/debug_clk_gv100.h29
-rw-r--r--include/os/linux/debug_fecs_trace.c151
-rw-r--r--include/os/linux/debug_fecs_trace.h30
-rw-r--r--include/os/linux/debug_fifo.c376
-rw-r--r--include/os/linux/debug_fifo.h22
-rw-r--r--include/os/linux/debug_gr.c31
-rw-r--r--include/os/linux/debug_gr.h21
-rw-r--r--include/os/linux/debug_hal.c95
-rw-r--r--include/os/linux/debug_hal.h22
-rw-r--r--include/os/linux/debug_kmem.c312
-rw-r--r--include/os/linux/debug_kmem.h23
-rw-r--r--include/os/linux/debug_ltc.c94
-rw-r--r--include/os/linux/debug_ltc.h21
-rw-r--r--include/os/linux/debug_pmgr.c104
-rw-r--r--include/os/linux/debug_pmgr.h28
-rw-r--r--include/os/linux/debug_pmu.c484
-rw-r--r--include/os/linux/debug_pmu.h21
-rw-r--r--include/os/linux/debug_sched.c79
-rw-r--r--include/os/linux/debug_sched.h21
-rw-r--r--include/os/linux/debug_therm_gp106.c49
-rw-r--r--include/os/linux/debug_therm_gp106.h29
-rw-r--r--include/os/linux/debug_xve.c177
-rw-r--r--include/os/linux/debug_xve.h21
-rw-r--r--include/os/linux/dmabuf.c219
-rw-r--r--include/os/linux/dmabuf.h62
-rw-r--r--include/os/linux/dmabuf_vidmem.c269
-rw-r--r--include/os/linux/dmabuf_vidmem.h78
-rw-r--r--include/os/linux/driver_common.c400
-rw-r--r--include/os/linux/driver_common.h22
-rw-r--r--include/os/linux/dt.c29
-rw-r--r--include/os/linux/ecc_linux.h49
-rw-r--r--include/os/linux/ecc_sysfs.c80
-rw-r--r--include/os/linux/firmware.c117
-rw-r--r--include/os/linux/fuse.c55
-rw-r--r--include/os/linux/intr.c136
-rw-r--r--include/os/linux/intr.h22
-rw-r--r--include/os/linux/io.c130
-rw-r--r--include/os/linux/io_usermode.c29
-rw-r--r--include/os/linux/ioctl.c297
-rw-r--r--include/os/linux/ioctl.h23
-rw-r--r--include/os/linux/ioctl_as.c427
-rw-r--r--include/os/linux/ioctl_as.h30
-rw-r--r--include/os/linux/ioctl_channel.c1388
-rw-r--r--include/os/linux/ioctl_channel.h57
-rw-r--r--include/os/linux/ioctl_clk_arb.c583
-rw-r--r--include/os/linux/ioctl_ctrl.c2144
-rw-r--r--include/os/linux/ioctl_ctrl.h27
-rw-r--r--include/os/linux/ioctl_dbg.c2210
-rw-r--r--include/os/linux/ioctl_dbg.h38
-rw-r--r--include/os/linux/ioctl_tsg.c750
-rw-r--r--include/os/linux/ioctl_tsg.h28
-rw-r--r--include/os/linux/kmem.c653
-rw-r--r--include/os/linux/kmem_priv.h105
-rw-r--r--include/os/linux/linux-channel.c657
-rw-r--r--include/os/linux/linux-dma.c534
-rw-r--r--include/os/linux/log.c132
-rw-r--r--include/os/linux/ltc.c60
-rw-r--r--include/os/linux/module.c1547
-rw-r--r--include/os/linux/module.h35
-rw-r--r--include/os/linux/module_usermode.c62
-rw-r--r--include/os/linux/module_usermode.h27
-rw-r--r--include/os/linux/nvgpu_mem.c348
-rw-r--r--include/os/linux/nvhost.c295
-rw-r--r--include/os/linux/nvhost_priv.h24
-rw-r--r--include/os/linux/nvidia_p2p.c299
-rw-r--r--include/os/linux/nvlink.c132
-rw-r--r--include/os/linux/nvlink.h22
-rw-r--r--include/os/linux/os_fence_android.c79
-rw-r--r--include/os/linux/os_fence_android_sema.c112
-rw-r--r--include/os/linux/os_fence_android_syncpt.c121
-rw-r--r--include/os/linux/os_linux.h192
-rw-r--r--include/os/linux/os_ops.c61
-rw-r--r--include/os/linux/os_ops.h22
-rw-r--r--include/os/linux/os_ops_gm20b.c47
-rw-r--r--include/os/linux/os_ops_gm20b.h22
-rw-r--r--include/os/linux/os_ops_gp106.c40
-rw-r--r--include/os/linux/os_ops_gp106.h22
-rw-r--r--include/os/linux/os_ops_gp10b.c41
-rw-r--r--include/os/linux/os_ops_gp10b.h22
-rw-r--r--include/os/linux/os_ops_gv100.c40
-rw-r--r--include/os/linux/os_ops_gv100.h22
-rw-r--r--include/os/linux/os_ops_gv11b.c30
-rw-r--r--include/os/linux/os_ops_gv11b.h24
-rw-r--r--include/os/linux/os_sched.c32
-rw-r--r--include/os/linux/pci.c854
-rw-r--r--include/os/linux/pci.h27
-rw-r--r--include/os/linux/pci_usermode.c24
-rw-r--r--include/os/linux/pci_usermode.h23
-rw-r--r--include/os/linux/platform_gk20a.h329
-rw-r--r--include/os/linux/platform_gk20a_tegra.c966
-rw-r--r--include/os/linux/platform_gk20a_tegra.h23
-rw-r--r--include/os/linux/platform_gp10b.h39
-rw-r--r--include/os/linux/platform_gp10b_tegra.c510
-rw-r--r--include/os/linux/platform_gp10b_tegra.h22
-rw-r--r--include/os/linux/platform_gv11b_tegra.c331
-rw-r--r--include/os/linux/rwsem.c39
-rw-r--r--include/os/linux/scale.c428
-rw-r--r--include/os/linux/scale.h66
-rw-r--r--include/os/linux/sched.c666
-rw-r--r--include/os/linux/sched.h36
-rw-r--r--include/os/linux/sdl.c341
-rw-r--r--include/os/linux/sim.c96
-rw-r--r--include/os/linux/sim_pci.c93
-rw-r--r--include/os/linux/soc.c122
-rw-r--r--include/os/linux/sync_sema_android.c418
-rw-r--r--include/os/linux/sync_sema_android.h51
-rw-r--r--include/os/linux/sysfs.c1275
-rw-r--r--include/os/linux/sysfs.h24
-rw-r--r--include/os/linux/thread.c70
-rw-r--r--include/os/linux/timers.c269
-rw-r--r--include/os/linux/vgpu/fecs_trace_vgpu.c225
-rw-r--r--include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c103
-rw-r--r--include/os/linux/vgpu/platform_vgpu_tegra.c97
-rw-r--r--include/os/linux/vgpu/platform_vgpu_tegra.h24
-rw-r--r--include/os/linux/vgpu/sysfs_vgpu.c143
-rw-r--r--include/os/linux/vgpu/vgpu_ivc.c77
-rw-r--r--include/os/linux/vgpu/vgpu_ivm.c53
-rw-r--r--include/os/linux/vgpu/vgpu_linux.c525
-rw-r--r--include/os/linux/vgpu/vgpu_linux.h68
-rw-r--r--include/os/linux/vm.c358
-rw-r--r--include/os/linux/vpr.c22
148 files changed, 0 insertions, 31973 deletions
diff --git a/include/os/linux/cde.c b/include/os/linux/cde.c
deleted file mode 100644
index 715513c..0000000
--- a/include/os/linux/cde.c
+++ /dev/null
@@ -1,1794 +0,0 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <trace/events/gk20a.h>
25
26#include <nvgpu/dma.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/kmem.h>
31#include <nvgpu/log.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/firmware.h>
34#include <nvgpu/os_sched.h>
35#include <nvgpu/channel.h>
36#include <nvgpu/utils.h>
37#include <nvgpu/gk20a.h>
38
39#include <nvgpu/linux/vm.h>
40
41#include "gk20a/mm_gk20a.h"
42#include "gk20a/fence_gk20a.h"
43#include "gk20a/gr_gk20a.h"
44
45#include "cde.h"
46#include "os_linux.h"
47#include "dmabuf.h"
48#include "channel.h"
49#include "cde_gm20b.h"
50#include "cde_gp10b.h"
51
52#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
53#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
54
55static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
56static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
57
58#define CTX_DELETE_TIME 1000
59
60#define MAX_CTX_USE_COUNT 42
61#define MAX_CTX_RETRY_TIME 2000
62
63static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
64{
65 struct nvgpu_mapped_buf *buffer;
66 dma_addr_t addr = 0;
67 struct gk20a *g = gk20a_from_vm(vm);
68
69 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
70 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
71 if (buffer)
72 addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
73 nvgpu_mutex_release(&vm->update_gmmu_lock);
74
75 return addr;
76}
77
78static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
79{
80 unsigned int i;
81
82 for (i = 0; i < cde_ctx->num_bufs; i++) {
83 struct nvgpu_mem *mem = cde_ctx->mem + i;
84 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
85 }
86
87 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
88
89 cde_ctx->convert_cmd = NULL;
90 cde_ctx->init_convert_cmd = NULL;
91 cde_ctx->num_bufs = 0;
92 cde_ctx->num_params = 0;
93 cde_ctx->init_cmd_num_entries = 0;
94 cde_ctx->convert_cmd_num_entries = 0;
95 cde_ctx->init_cmd_executed = false;
96}
97
98static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
99__must_hold(&cde_app->mutex)
100{
101 struct nvgpu_os_linux *l = cde_ctx->l;
102 struct gk20a *g = &l->g;
103 struct channel_gk20a *ch = cde_ctx->ch;
104 struct vm_gk20a *vm = ch->vm;
105
106 trace_gk20a_cde_remove_ctx(cde_ctx);
107
108 /* release mapped memory */
109 gk20a_deinit_cde_img(cde_ctx);
110 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
111 cde_ctx->backing_store_vaddr);
112
113 /*
114 * free the channel
115 * gk20a_channel_close() will also unbind the channel from TSG
116 */
117 gk20a_channel_close(ch);
118 nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release);
119
120 /* housekeeping on app */
121 nvgpu_list_del(&cde_ctx->list);
122 l->cde_app.ctx_count--;
123 nvgpu_kfree(g, cde_ctx);
124}
125
126static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
127 bool wait_finish)
128__releases(&cde_app->mutex)
129__acquires(&cde_app->mutex)
130{
131 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
132
133 /* permanent contexts do not have deleter works */
134 if (!cde_ctx->is_temporary)
135 return;
136
137 if (wait_finish) {
138 nvgpu_mutex_release(&cde_app->mutex);
139 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
140 nvgpu_mutex_acquire(&cde_app->mutex);
141 } else {
142 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
143 }
144}
145
146static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
147__must_hold(&l->cde_app->mutex)
148{
149 struct gk20a_cde_app *cde_app = &l->cde_app;
150 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
151
152 /* safe to go off the mutex in cancel_deleter since app is
153 * deinitialised; no new jobs are started. deleter works may be only at
154 * waiting for the mutex or before, going to abort */
155
156 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
157 &cde_app->free_contexts, gk20a_cde_ctx, list) {
158 gk20a_cde_cancel_deleter(cde_ctx, true);
159 gk20a_cde_remove_ctx(cde_ctx);
160 }
161
162 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
163 &cde_app->used_contexts, gk20a_cde_ctx, list) {
164 gk20a_cde_cancel_deleter(cde_ctx, true);
165 gk20a_cde_remove_ctx(cde_ctx);
166 }
167}
168
169static void gk20a_cde_stop(struct nvgpu_os_linux *l)
170__must_hold(&l->cde_app->mutex)
171{
172 struct gk20a_cde_app *cde_app = &l->cde_app;
173
174 /* prevent further conversions and delayed works from working */
175 cde_app->initialised = false;
176 /* free all data, empty the list */
177 gk20a_cde_remove_contexts(l);
178}
179
180void gk20a_cde_destroy(struct nvgpu_os_linux *l)
181__acquires(&l->cde_app->mutex)
182__releases(&l->cde_app->mutex)
183{
184 struct gk20a_cde_app *cde_app = &l->cde_app;
185
186 if (!cde_app->initialised)
187 return;
188
189 nvgpu_mutex_acquire(&cde_app->mutex);
190 gk20a_cde_stop(l);
191 nvgpu_mutex_release(&cde_app->mutex);
192
193 nvgpu_mutex_destroy(&cde_app->mutex);
194}
195
196void gk20a_cde_suspend(struct nvgpu_os_linux *l)
197__acquires(&l->cde_app->mutex)
198__releases(&l->cde_app->mutex)
199{
200 struct gk20a_cde_app *cde_app = &l->cde_app;
201 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
202
203 if (!cde_app->initialised)
204 return;
205
206 nvgpu_mutex_acquire(&cde_app->mutex);
207
208 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
209 &cde_app->free_contexts, gk20a_cde_ctx, list) {
210 gk20a_cde_cancel_deleter(cde_ctx, false);
211 }
212
213 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
214 &cde_app->used_contexts, gk20a_cde_ctx, list) {
215 gk20a_cde_cancel_deleter(cde_ctx, false);
216 }
217
218 nvgpu_mutex_release(&cde_app->mutex);
219
220}
221
222static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
223__must_hold(&l->cde_app->mutex)
224{
225 struct gk20a_cde_app *cde_app = &l->cde_app;
226 struct gk20a_cde_ctx *cde_ctx;
227
228 cde_ctx = gk20a_cde_allocate_context(l);
229 if (IS_ERR(cde_ctx))
230 return PTR_ERR(cde_ctx);
231
232 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
233 cde_app->ctx_count++;
234 if (cde_app->ctx_count > cde_app->ctx_count_top)
235 cde_app->ctx_count_top = cde_app->ctx_count;
236
237 return 0;
238}
239
240static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
241__must_hold(&l->cde_app->mutex)
242{
243 int err;
244 int i;
245
246 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
247 err = gk20a_cde_create_context(l);
248 if (err)
249 goto out;
250 }
251
252 return 0;
253out:
254 gk20a_cde_remove_contexts(l);
255 return err;
256}
257
258static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
259 struct nvgpu_firmware *img,
260 struct gk20a_cde_hdr_buf *buf)
261{
262 struct nvgpu_mem *mem;
263 struct nvgpu_os_linux *l = cde_ctx->l;
264 struct gk20a *g = &l->g;
265 int err;
266
267 /* check that the file can hold the buf */
268 if (buf->data_byte_offset != 0 &&
269 buf->data_byte_offset + buf->num_bytes > img->size) {
270 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
271 cde_ctx->num_bufs);
272 return -EINVAL;
273 }
274
275 /* check that we have enough buf elems available */
276 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
277 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
278 cde_ctx->num_bufs);
279 return -ENOMEM;
280 }
281
282 /* allocate buf */
283 mem = cde_ctx->mem + cde_ctx->num_bufs;
284 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
285 if (err) {
286 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
287 cde_ctx->num_bufs);
288 return -ENOMEM;
289 }
290
291 /* copy the content */
292 if (buf->data_byte_offset != 0)
293 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
294 buf->num_bytes);
295
296 cde_ctx->num_bufs++;
297
298 return 0;
299}
300
301static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
302 int type, s32 shift, u64 mask, u64 value)
303{
304 struct nvgpu_os_linux *l = cde_ctx->l;
305 struct gk20a *g = &l->g;
306 u32 *target_mem_ptr = target;
307 u64 *target_mem_ptr_u64 = target;
308 u64 current_value, new_value;
309
310 value = (shift >= 0) ? value << shift : value >> -shift;
311 value &= mask;
312
313 /* read current data from the location */
314 current_value = 0;
315 if (type == TYPE_PARAM_TYPE_U32) {
316 if (mask != 0xfffffffful)
317 current_value = *target_mem_ptr;
318 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
319 if (mask != ~0ul)
320 current_value = *target_mem_ptr_u64;
321 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
322 current_value = *target_mem_ptr_u64;
323 current_value = (u64)(current_value >> 32) |
324 (u64)(current_value << 32);
325 } else {
326 nvgpu_warn(g, "cde: unknown type. type=%d",
327 type);
328 return -EINVAL;
329 }
330
331 current_value &= ~mask;
332 new_value = current_value | value;
333
334 /* store the element data back */
335 if (type == TYPE_PARAM_TYPE_U32)
336 *target_mem_ptr = (u32)new_value;
337 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
338 *target_mem_ptr_u64 = new_value;
339 else {
340 new_value = (u64)(new_value >> 32) |
341 (u64)(new_value << 32);
342 *target_mem_ptr_u64 = new_value;
343 }
344
345 return 0;
346}
347
348static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
349 struct nvgpu_firmware *img,
350 struct gk20a_cde_hdr_replace *replace)
351{
352 struct nvgpu_mem *source_mem;
353 struct nvgpu_mem *target_mem;
354 struct nvgpu_os_linux *l = cde_ctx->l;
355 struct gk20a *g = &l->g;
356 u32 *target_mem_ptr;
357 u64 vaddr;
358 int err;
359
360 if (replace->target_buf >= cde_ctx->num_bufs ||
361 replace->source_buf >= cde_ctx->num_bufs) {
362 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
363 replace->target_buf, replace->source_buf,
364 cde_ctx->num_bufs);
365 return -EINVAL;
366 }
367
368 source_mem = cde_ctx->mem + replace->source_buf;
369 target_mem = cde_ctx->mem + replace->target_buf;
370 target_mem_ptr = target_mem->cpu_va;
371
372 if (source_mem->size < (replace->source_byte_offset + 3) ||
373 target_mem->size < (replace->target_byte_offset + 3)) {
374 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
375 replace->target_byte_offset,
376 replace->source_byte_offset,
377 source_mem->size,
378 target_mem->size);
379 return -EINVAL;
380 }
381
382 /* calculate the target pointer */
383 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
384
385 /* determine patch value */
386 vaddr = source_mem->gpu_va + replace->source_byte_offset;
387 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
388 replace->shift, replace->mask,
389 vaddr);
390 if (err) {
391 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
392 err, replace->target_buf,
393 replace->target_byte_offset,
394 replace->source_buf,
395 replace->source_byte_offset);
396 }
397
398 return err;
399}
400
401static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
402{
403 struct nvgpu_os_linux *l = cde_ctx->l;
404 struct gk20a *g = &l->g;
405 struct nvgpu_mem *target_mem;
406 u32 *target_mem_ptr;
407 u64 new_data;
408 int user_id = 0, err;
409 unsigned int i;
410
411 for (i = 0; i < cde_ctx->num_params; i++) {
412 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
413 target_mem = cde_ctx->mem + param->target_buf;
414 target_mem_ptr = target_mem->cpu_va;
415 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
416
417 switch (param->id) {
418 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
419 new_data = g->gr.comptags_per_cacheline;
420 break;
421 case TYPE_PARAM_GPU_CONFIGURATION:
422 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
423 g->gr.cacheline_size;
424 break;
425 case TYPE_PARAM_FIRSTPAGEOFFSET:
426 new_data = cde_ctx->surf_param_offset;
427 break;
428 case TYPE_PARAM_NUMPAGES:
429 new_data = cde_ctx->surf_param_lines;
430 break;
431 case TYPE_PARAM_BACKINGSTORE:
432 new_data = cde_ctx->backing_store_vaddr;
433 break;
434 case TYPE_PARAM_DESTINATION:
435 new_data = cde_ctx->compbit_vaddr;
436 break;
437 case TYPE_PARAM_DESTINATION_SIZE:
438 new_data = cde_ctx->compbit_size;
439 break;
440 case TYPE_PARAM_BACKINGSTORE_SIZE:
441 new_data = g->gr.compbit_store.mem.size;
442 break;
443 case TYPE_PARAM_SOURCE_SMMU_ADDR:
444 new_data = gpuva_to_iova_base(cde_ctx->vm,
445 cde_ctx->surf_vaddr);
446 if (new_data == 0) {
447 nvgpu_warn(g, "cde: failed to find 0x%llx",
448 cde_ctx->surf_vaddr);
449 return -EINVAL;
450 }
451 break;
452 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
453 new_data = g->gr.compbit_store.base_hw;
454 break;
455 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
456 new_data = g->gr.gobs_per_comptagline_per_slice;
457 break;
458 case TYPE_PARAM_SCATTERBUFFER:
459 new_data = cde_ctx->scatterbuffer_vaddr;
460 break;
461 case TYPE_PARAM_SCATTERBUFFER_SIZE:
462 new_data = cde_ctx->scatterbuffer_size;
463 break;
464 default:
465 user_id = param->id - NUM_RESERVED_PARAMS;
466 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
467 continue;
468 new_data = cde_ctx->user_param_values[user_id];
469 }
470
471 nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
472 i, param->id, param->target_buf,
473 param->target_byte_offset, new_data,
474 param->data_offset, param->type, param->shift,
475 param->mask);
476
477 new_data += param->data_offset;
478
479 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
480 param->shift, param->mask, new_data);
481
482 if (err) {
483 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
484 err, i, param->id, param->target_buf,
485 param->target_byte_offset, new_data);
486 return err;
487 }
488 }
489
490 return 0;
491}
492
493static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
494 struct nvgpu_firmware *img,
495 struct gk20a_cde_hdr_param *param)
496{
497 struct nvgpu_mem *target_mem;
498 struct nvgpu_os_linux *l = cde_ctx->l;
499 struct gk20a *g = &l->g;
500
501 if (param->target_buf >= cde_ctx->num_bufs) {
502 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
503 cde_ctx->num_params, param->target_buf,
504 cde_ctx->num_bufs);
505 return -EINVAL;
506 }
507
508 target_mem = cde_ctx->mem + param->target_buf;
509 if (target_mem->size < (param->target_byte_offset + 3)) {
510 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
511 cde_ctx->num_params, param->target_byte_offset,
512 target_mem->size);
513 return -EINVAL;
514 }
515
516 /* does this parameter fit into our parameter structure */
517 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
518 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
519 cde_ctx->num_params);
520 return -ENOMEM;
521 }
522
523 /* is the given id valid? */
524 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
525 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
526 param->id, cde_ctx->num_params,
527 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
528 return -EINVAL;
529 }
530
531 cde_ctx->params[cde_ctx->num_params] = *param;
532 cde_ctx->num_params++;
533
534 return 0;
535}
536
537static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
538 struct nvgpu_firmware *img,
539 u32 required_class)
540{
541 struct nvgpu_os_linux *l = cde_ctx->l;
542 struct gk20a *g = &l->g;
543 int err;
544
545 /* CDE enabled */
546 cde_ctx->ch->cde = true;
547
548 err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
549 if (err) {
550 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
551 err);
552 return err;
553 }
554
555 return 0;
556}
557
558static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
559 struct nvgpu_firmware *img,
560 u32 op,
561 struct gk20a_cde_cmd_elem *cmd_elem,
562 u32 num_elems)
563{
564 struct nvgpu_os_linux *l = cde_ctx->l;
565 struct gk20a *g = &l->g;
566 struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem;
567 u32 *num_entries;
568 unsigned int i;
569
570 /* check command type */
571 if (op == TYPE_BUF_COMMAND_INIT) {
572 gpfifo = &cde_ctx->init_convert_cmd;
573 num_entries = &cde_ctx->init_cmd_num_entries;
574 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
575 gpfifo = &cde_ctx->convert_cmd;
576 num_entries = &cde_ctx->convert_cmd_num_entries;
577 } else {
578 nvgpu_warn(g, "cde: unknown command. op=%u",
579 op);
580 return -EINVAL;
581 }
582
583 /* allocate gpfifo entries to be pushed */
584 *gpfifo = nvgpu_kzalloc(g,
585 sizeof(struct nvgpu_gpfifo_entry) * num_elems);
586 if (!*gpfifo) {
587 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
588 return -ENOMEM;
589 }
590
591 gpfifo_elem = *gpfifo;
592 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
593 struct nvgpu_mem *target_mem;
594
595 /* validate the current entry */
596 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
597 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
598 cmd_elem->target_buf, cde_ctx->num_bufs);
599 return -EINVAL;
600 }
601
602 target_mem = cde_ctx->mem + cmd_elem->target_buf;
603 if (target_mem->size<
604 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
605 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
606 target_mem->size,
607 cmd_elem->target_byte_offset,
608 cmd_elem->num_bytes);
609 return -EINVAL;
610 }
611
612 /* store the element into gpfifo */
613 gpfifo_elem->entry0 =
614 u64_lo32(target_mem->gpu_va +
615 cmd_elem->target_byte_offset);
616 gpfifo_elem->entry1 =
617 u64_hi32(target_mem->gpu_va +
618 cmd_elem->target_byte_offset) |
619 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
620 sizeof(u32));
621 }
622
623 *num_entries = num_elems;
624 return 0;
625}
626
627static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
628{
629 struct nvgpu_os_linux *l = cde_ctx->l;
630 struct gk20a *g = &l->g;
631 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
632 sizeof(struct nvgpu_gpfifo_entry);
633 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
634 sizeof(struct nvgpu_gpfifo_entry);
635 unsigned long total_bytes = init_bytes + conv_bytes;
636 struct nvgpu_gpfifo_entry *combined_cmd;
637
638 /* allocate buffer that has space for both */
639 combined_cmd = nvgpu_kzalloc(g, total_bytes);
640 if (!combined_cmd) {
641 nvgpu_warn(g,
642 "cde: could not allocate memory for gpfifo entries");
643 return -ENOMEM;
644 }
645
646 /* move the original init here and append convert */
647 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
648 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
649 cde_ctx->convert_cmd, conv_bytes);
650
651 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
652 nvgpu_kfree(g, cde_ctx->convert_cmd);
653
654 cde_ctx->init_convert_cmd = combined_cmd;
655 cde_ctx->convert_cmd = combined_cmd
656 + cde_ctx->init_cmd_num_entries;
657
658 return 0;
659}
660
661static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
662 struct nvgpu_firmware *img)
663{
664 struct nvgpu_os_linux *l = cde_ctx->l;
665 struct gk20a *g = &l->g;
666 struct gk20a_cde_app *cde_app = &l->cde_app;
667 u32 *data = (u32 *)img->data;
668 u32 num_of_elems;
669 struct gk20a_cde_hdr_elem *elem;
670 u32 min_size = 0;
671 int err = 0;
672 unsigned int i;
673
674 min_size += 2 * sizeof(u32);
675 if (img->size < min_size) {
676 nvgpu_warn(g, "cde: invalid image header");
677 return -EINVAL;
678 }
679
680 cde_app->firmware_version = data[0];
681 num_of_elems = data[1];
682
683 min_size += num_of_elems * sizeof(*elem);
684 if (img->size < min_size) {
685 nvgpu_warn(g, "cde: bad image");
686 return -EINVAL;
687 }
688
689 elem = (struct gk20a_cde_hdr_elem *)&data[2];
690 for (i = 0; i < num_of_elems; i++) {
691 int err = 0;
692 switch (elem->type) {
693 case TYPE_BUF:
694 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
695 break;
696 case TYPE_REPLACE:
697 err = gk20a_init_cde_replace(cde_ctx, img,
698 &elem->replace);
699 break;
700 case TYPE_PARAM:
701 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
702 break;
703 case TYPE_REQUIRED_CLASS:
704 err = gk20a_init_cde_required_class(cde_ctx, img,
705 elem->required_class);
706 break;
707 case TYPE_COMMAND:
708 {
709 struct gk20a_cde_cmd_elem *cmd = (void *)
710 &img->data[elem->command.data_byte_offset];
711 err = gk20a_init_cde_command(cde_ctx, img,
712 elem->command.op, cmd,
713 elem->command.num_entries);
714 break;
715 }
716 case TYPE_ARRAY:
717 memcpy(&cde_app->arrays[elem->array.id][0],
718 elem->array.data,
719 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
720 break;
721 default:
722 nvgpu_warn(g, "cde: unknown header element");
723 err = -EINVAL;
724 }
725
726 if (err)
727 goto deinit_image;
728
729 elem++;
730 }
731
732 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
733 nvgpu_warn(g, "cde: convert command not defined");
734 err = -EINVAL;
735 goto deinit_image;
736 }
737
738 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
739 nvgpu_warn(g, "cde: convert command not defined");
740 err = -EINVAL;
741 goto deinit_image;
742 }
743
744 err = gk20a_cde_pack_cmdbufs(cde_ctx);
745 if (err)
746 goto deinit_image;
747
748 return 0;
749
750deinit_image:
751 gk20a_deinit_cde_img(cde_ctx);
752 return err;
753}
754
755static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
756 u32 op, struct nvgpu_channel_fence *fence,
757 u32 flags, struct gk20a_fence **fence_out)
758{
759 struct nvgpu_os_linux *l = cde_ctx->l;
760 struct gk20a *g = &l->g;
761 struct nvgpu_gpfifo_entry *gpfifo = NULL;
762 int num_entries = 0;
763
764 /* check command type */
765 if (op == TYPE_BUF_COMMAND_INIT) {
766 /* both init and convert combined */
767 gpfifo = cde_ctx->init_convert_cmd;
768 num_entries = cde_ctx->init_cmd_num_entries
769 + cde_ctx->convert_cmd_num_entries;
770 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
771 gpfifo = cde_ctx->convert_cmd;
772 num_entries = cde_ctx->convert_cmd_num_entries;
773 } else if (op == TYPE_BUF_COMMAND_NOOP) {
774 /* Any non-null gpfifo will suffice with 0 num_entries */
775 gpfifo = cde_ctx->init_convert_cmd;
776 num_entries = 0;
777 } else {
778 nvgpu_warn(g, "cde: unknown buffer");
779 return -EINVAL;
780 }
781
782 if (gpfifo == NULL) {
783 nvgpu_warn(g, "cde: buffer not available");
784 return -ENOSYS;
785 }
786
787 return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,
788 num_entries, flags, fence, fence_out);
789}
790
791static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
792__acquires(&cde_app->mutex)
793__releases(&cde_app->mutex)
794{
795 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
796 struct gk20a *g = &cde_ctx->l->g;
797
798 nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
799 trace_gk20a_cde_release(cde_ctx);
800
801 nvgpu_mutex_acquire(&cde_app->mutex);
802
803 if (cde_ctx->in_use) {
804 cde_ctx->in_use = false;
805 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
806 cde_app->ctx_usecount--;
807 } else {
808 nvgpu_log_info(g, "double release cde context %p", cde_ctx);
809 }
810
811 nvgpu_mutex_release(&cde_app->mutex);
812}
813
814static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
815__acquires(&cde_app->mutex)
816__releases(&cde_app->mutex)
817{
818 struct delayed_work *delay_work = to_delayed_work(work);
819 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
820 struct gk20a_cde_ctx, ctx_deleter_work);
821 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
822 struct nvgpu_os_linux *l = cde_ctx->l;
823 struct gk20a *g = &l->g;
824 int err;
825
826 /* someone has just taken it? engine deletion started? */
827 if (cde_ctx->in_use || !cde_app->initialised)
828 return;
829
830 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
831 "cde: attempting to delete temporary %p", cde_ctx);
832
833 err = gk20a_busy(g);
834 if (err) {
835 /* this context would find new use anyway later, so not freeing
836 * here does not leak anything */
837 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
838 " temp ctx deletion");
839 return;
840 }
841
842 nvgpu_mutex_acquire(&cde_app->mutex);
843 if (cde_ctx->in_use || !cde_app->initialised) {
844 nvgpu_log(g, gpu_dbg_cde_ctx,
845 "cde: context use raced, not deleting %p",
846 cde_ctx);
847 goto out;
848 }
849
850 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
851 "double pending %p", cde_ctx);
852
853 gk20a_cde_remove_ctx(cde_ctx);
854 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
855 "cde: destroyed %p count=%d use=%d max=%d",
856 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
857 cde_app->ctx_count_top);
858
859out:
860 nvgpu_mutex_release(&cde_app->mutex);
861 gk20a_idle(g);
862}
863
864static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
865__must_hold(&cde_app->mutex)
866{
867 struct gk20a *g = &l->g;
868 struct gk20a_cde_app *cde_app = &l->cde_app;
869 struct gk20a_cde_ctx *cde_ctx;
870
871 /* exhausted? */
872
873 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
874 return ERR_PTR(-EAGAIN);
875
876 /* idle context available? */
877
878 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
879 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
880 gk20a_cde_ctx, list);
881 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
882 "cde: got free %p count=%d use=%d max=%d",
883 cde_ctx, cde_app->ctx_count,
884 cde_app->ctx_usecount,
885 cde_app->ctx_count_top);
886 trace_gk20a_cde_get_context(cde_ctx);
887
888 /* deleter work may be scheduled, but in_use prevents it */
889 cde_ctx->in_use = true;
890 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
891 cde_app->ctx_usecount++;
892
893 /* cancel any deletions now that ctx is in use */
894 gk20a_cde_cancel_deleter(cde_ctx, true);
895 return cde_ctx;
896 }
897
898 /* no free contexts, get a temporary one */
899
900 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
901 "cde: no free contexts, count=%d",
902 cde_app->ctx_count);
903
904 cde_ctx = gk20a_cde_allocate_context(l);
905 if (IS_ERR(cde_ctx)) {
906 nvgpu_warn(g, "cde: cannot allocate context: %ld",
907 PTR_ERR(cde_ctx));
908 return cde_ctx;
909 }
910
911 trace_gk20a_cde_get_context(cde_ctx);
912 cde_ctx->in_use = true;
913 cde_ctx->is_temporary = true;
914 cde_app->ctx_usecount++;
915 cde_app->ctx_count++;
916 if (cde_app->ctx_count > cde_app->ctx_count_top)
917 cde_app->ctx_count_top = cde_app->ctx_count;
918 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
919
920 return cde_ctx;
921}
922
923static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
924__releases(&cde_app->mutex)
925__acquires(&cde_app->mutex)
926{
927 struct gk20a *g = &l->g;
928 struct gk20a_cde_app *cde_app = &l->cde_app;
929 struct gk20a_cde_ctx *cde_ctx = NULL;
930 struct nvgpu_timeout timeout;
931
932 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
933 NVGPU_TIMER_CPU_TIMER);
934
935 do {
936 cde_ctx = gk20a_cde_do_get_context(l);
937 if (PTR_ERR(cde_ctx) != -EAGAIN)
938 break;
939
940 /* exhausted, retry */
941 nvgpu_mutex_release(&cde_app->mutex);
942 cond_resched();
943 nvgpu_mutex_acquire(&cde_app->mutex);
944 } while (!nvgpu_timeout_expired(&timeout));
945
946 return cde_ctx;
947}
948
949static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
950{
951 struct gk20a *g = &l->g;
952 struct gk20a_cde_ctx *cde_ctx;
953 int ret;
954
955 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
956 if (!cde_ctx)
957 return ERR_PTR(-ENOMEM);
958
959 cde_ctx->l = l;
960 cde_ctx->dev = dev_from_gk20a(g);
961
962 ret = gk20a_cde_load(cde_ctx);
963 if (ret) {
964 nvgpu_kfree(g, cde_ctx);
965 return ERR_PTR(ret);
966 }
967
968 nvgpu_init_list_node(&cde_ctx->list);
969 cde_ctx->is_temporary = false;
970 cde_ctx->in_use = false;
971 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
972 gk20a_cde_ctx_deleter_fn);
973
974 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
975 trace_gk20a_cde_allocate_context(cde_ctx);
976 return cde_ctx;
977}
978
979static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm,
980 u32 map_offset, u32 map_size)
981{
982 struct gk20a *g = gk20a_from_vm(vm);
983
984 /*
985 * To be simple we will just make the map size depend on the
986 * iommu'ability of the driver. If there's an IOMMU we can rely on
987 * buffers being contiguous. If not, then we'll use 4k pages since we
988 * know that will work for any buffer.
989 */
990 if (!nvgpu_iommuable(g))
991 return SZ_4K;
992
993 /*
994 * If map size or offset is not 64K aligned then use small pages.
995 */
996 if (map_size & (vm->big_page_size - 1) ||
997 map_offset & (vm->big_page_size - 1))
998 return SZ_4K;
999
1000 return vm->big_page_size;
1001}
1002
1003int gk20a_cde_convert(struct nvgpu_os_linux *l,
1004 struct dma_buf *compbits_scatter_buf,
1005 u64 compbits_byte_offset,
1006 u64 scatterbuffer_byte_offset,
1007 struct nvgpu_channel_fence *fence,
1008 u32 __flags, struct gk20a_cde_param *params,
1009 int num_params, struct gk20a_fence **fence_out)
1010__acquires(&l->cde_app->mutex)
1011__releases(&l->cde_app->mutex)
1012{
1013 struct gk20a *g = &l->g;
1014 struct gk20a_cde_ctx *cde_ctx = NULL;
1015 struct gk20a_comptags comptags;
1016 struct nvgpu_os_buffer os_buf = {
1017 compbits_scatter_buf,
1018 NULL,
1019 dev_from_gk20a(g)
1020 };
1021 u64 mapped_compbits_offset = 0;
1022 u64 compbits_size = 0;
1023 u64 mapped_scatterbuffer_offset = 0;
1024 u64 scatterbuffer_size = 0;
1025 u64 map_vaddr = 0;
1026 u64 map_offset = 0;
1027 u64 map_size = 0;
1028 u8 *surface = NULL;
1029 u64 big_page_mask = 0;
1030 u32 flags;
1031 int err, i;
1032 const s16 compbits_kind = 0;
1033 u32 submit_op;
1034 struct dma_buf_attachment *attachment;
1035
1036 nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
1037 compbits_byte_offset, scatterbuffer_byte_offset);
1038
1039 /* scatter buffer must be after compbits buffer */
1040 if (scatterbuffer_byte_offset &&
1041 scatterbuffer_byte_offset < compbits_byte_offset)
1042 return -EINVAL;
1043
1044 err = gk20a_busy(g);
1045 if (err)
1046 return err;
1047
1048 nvgpu_mutex_acquire(&l->cde_app.mutex);
1049 cde_ctx = gk20a_cde_get_context(l);
1050 nvgpu_mutex_release(&l->cde_app.mutex);
1051 if (IS_ERR(cde_ctx)) {
1052 err = PTR_ERR(cde_ctx);
1053 goto exit_idle;
1054 }
1055
1056 /* First, map the buffer to local va */
1057
1058 /* ensure that the compbits buffer has drvdata */
1059 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
1060 dev_from_gk20a(g));
1061 if (err)
1062 goto exit_idle;
1063
1064 /* compbits don't start at page aligned offset, so we need to align
1065 the region to be mapped */
1066 big_page_mask = cde_ctx->vm->big_page_size - 1;
1067 map_offset = compbits_byte_offset & ~big_page_mask;
1068 map_size = compbits_scatter_buf->size - map_offset;
1069
1070
1071 /* compute compbit start offset from the beginning of the mapped
1072 area */
1073 mapped_compbits_offset = compbits_byte_offset - map_offset;
1074 if (scatterbuffer_byte_offset) {
1075 compbits_size = scatterbuffer_byte_offset -
1076 compbits_byte_offset;
1077 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1078 map_offset;
1079 scatterbuffer_size = compbits_scatter_buf->size -
1080 scatterbuffer_byte_offset;
1081 } else {
1082 compbits_size = compbits_scatter_buf->size -
1083 compbits_byte_offset;
1084 }
1085
1086 nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1087 map_offset, map_size);
1088 nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1089 mapped_compbits_offset, compbits_size);
1090 nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1091 mapped_scatterbuffer_offset, scatterbuffer_size);
1092
1093
1094 /* map the destination buffer */
1095 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
1096 err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
1097 NVGPU_VM_MAP_CACHEABLE |
1098 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
1099 gk20a_cde_mapping_page_size(cde_ctx->vm,
1100 map_offset,
1101 map_size),
1102 NV_KIND_INVALID,
1103 compbits_kind, /* incompressible kind */
1104 gk20a_mem_flag_none,
1105 map_offset, map_size,
1106 NULL,
1107 &map_vaddr);
1108 if (err) {
1109 nvgpu_warn(g, "cde: failed to map compbits scatter buf at %lld size %lld",
1110 map_offset, map_size);
1111 dma_buf_put(compbits_scatter_buf);
1112 err = -EINVAL;
1113 goto exit_idle;
1114 }
1115
1116 if (scatterbuffer_byte_offset &&
1117 l->ops.cde.need_scatter_buffer &&
1118 l->ops.cde.need_scatter_buffer(g)) {
1119 struct sg_table *sgt;
1120 void *scatter_buffer;
1121
1122 surface = dma_buf_vmap(compbits_scatter_buf);
1123 if (IS_ERR(surface)) {
1124 nvgpu_warn(g,
1125 "dma_buf_vmap failed");
1126 err = -EINVAL;
1127 goto exit_unmap_vaddr;
1128 }
1129
1130 scatter_buffer = surface + scatterbuffer_byte_offset;
1131
1132 nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1133 surface, scatter_buffer);
1134 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf,
1135 &attachment);
1136 if (IS_ERR(sgt)) {
1137 nvgpu_warn(g,
1138 "mm_pin failed");
1139 err = -EINVAL;
1140 goto exit_unmap_surface;
1141 } else {
1142 err = l->ops.cde.populate_scatter_buffer(g, sgt,
1143 compbits_byte_offset, scatter_buffer,
1144 scatterbuffer_size);
1145 WARN_ON(err);
1146
1147 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1148 attachment, sgt);
1149 if (err)
1150 goto exit_unmap_surface;
1151 }
1152
1153 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1154 dma_buf_vunmap(compbits_scatter_buf, surface);
1155 surface = NULL;
1156 }
1157
1158 /* store source buffer compression tags */
1159 gk20a_get_comptags(&os_buf, &comptags);
1160 cde_ctx->surf_param_offset = comptags.offset;
1161 cde_ctx->surf_param_lines = comptags.lines;
1162
1163 /* store surface vaddr. This is actually compbit vaddr, but since
1164 compbits live in the same surface, and we can get the alloc base
1165 address by using gpuva_to_iova_base, this will do */
1166 cde_ctx->surf_vaddr = map_vaddr;
1167
1168 /* store information about destination */
1169 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1170 cde_ctx->compbit_size = compbits_size;
1171
1172 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1173 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1174
1175 /* remove existing argument data */
1176 memset(cde_ctx->user_param_values, 0,
1177 sizeof(cde_ctx->user_param_values));
1178
1179 /* read user space arguments for the conversion */
1180 for (i = 0; i < num_params; i++) {
1181 struct gk20a_cde_param *param = params + i;
1182 int id = param->id - NUM_RESERVED_PARAMS;
1183
1184 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1185 nvgpu_warn(g, "cde: unknown user parameter");
1186 err = -EINVAL;
1187 goto exit_unmap_surface;
1188 }
1189 cde_ctx->user_param_values[id] = param->value;
1190 }
1191
1192 /* patch data */
1193 err = gk20a_cde_patch_params(cde_ctx);
1194 if (err) {
1195 nvgpu_warn(g, "cde: failed to patch parameters");
1196 goto exit_unmap_surface;
1197 }
1198
1199 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1200 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1201 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1202 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1203 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1204 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1205
1206 /* take always the postfence as it is needed for protecting the
1207 * cde context */
1208 flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
1209
1210 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1211 gk20a_idle(g);
1212
1213 if (comptags.lines == 0) {
1214 /*
1215 * Nothing to do on the buffer, but do a null kickoff for
1216 * managing the pre and post fences.
1217 */
1218 submit_op = TYPE_BUF_COMMAND_NOOP;
1219 } else if (!cde_ctx->init_cmd_executed) {
1220 /*
1221 * First time, so include the init pushbuf too in addition to
1222 * the conversion code.
1223 */
1224 submit_op = TYPE_BUF_COMMAND_INIT;
1225 } else {
1226 /*
1227 * The usual condition: execute just the conversion.
1228 */
1229 submit_op = TYPE_BUF_COMMAND_CONVERT;
1230 }
1231 err = gk20a_cde_execute_buffer(cde_ctx, submit_op,
1232 fence, flags, fence_out);
1233
1234 if (comptags.lines != 0 && !err)
1235 cde_ctx->init_cmd_executed = true;
1236
1237 /* unmap the buffers - channel holds references to them now */
1238 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1239
1240 return err;
1241
1242exit_unmap_surface:
1243 if (surface)
1244 dma_buf_vunmap(compbits_scatter_buf, surface);
1245exit_unmap_vaddr:
1246 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1247exit_idle:
1248 gk20a_idle(g);
1249 return err;
1250}
1251
1252static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1253__acquires(&cde_app->mutex)
1254__releases(&cde_app->mutex)
1255{
1256 struct gk20a_cde_ctx *cde_ctx = data;
1257 struct nvgpu_os_linux *l = cde_ctx->l;
1258 struct gk20a *g = &l->g;
1259 struct gk20a_cde_app *cde_app = &l->cde_app;
1260 bool channel_idle;
1261
1262 channel_gk20a_joblist_lock(ch);
1263 channel_idle = channel_gk20a_joblist_is_empty(ch);
1264 channel_gk20a_joblist_unlock(ch);
1265
1266 if (!channel_idle)
1267 return;
1268
1269 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1270 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1271 if (!cde_ctx->in_use)
1272 nvgpu_log_info(g, "double finish cde context %p on channel %p",
1273 cde_ctx, ch);
1274
1275 if (gk20a_channel_check_timedout(ch)) {
1276 if (cde_ctx->is_temporary) {
1277 nvgpu_warn(g,
1278 "cde: channel had timed out"
1279 " (temporary channel)");
1280 /* going to be deleted anyway */
1281 } else {
1282 nvgpu_warn(g,
1283 "cde: channel had timed out"
1284 ", reloading");
1285 /* mark it to be deleted, replace with a new one */
1286 nvgpu_mutex_acquire(&cde_app->mutex);
1287 cde_ctx->is_temporary = true;
1288 if (gk20a_cde_create_context(l)) {
1289 nvgpu_err(g, "cde: can't replace context");
1290 }
1291 nvgpu_mutex_release(&cde_app->mutex);
1292 }
1293 }
1294
1295 /* delete temporary contexts later (watch for doubles) */
1296 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1297 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1298 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1299 msecs_to_jiffies(CTX_DELETE_TIME));
1300 }
1301
1302 if (!gk20a_channel_check_timedout(ch)) {
1303 gk20a_cde_ctx_release(cde_ctx);
1304 }
1305}
1306
1307static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1308{
1309 struct nvgpu_os_linux *l = cde_ctx->l;
1310 struct gk20a *g = &l->g;
1311 struct nvgpu_firmware *img;
1312 struct channel_gk20a *ch;
1313 struct tsg_gk20a *tsg;
1314 struct gr_gk20a *gr = &g->gr;
1315 struct nvgpu_setup_bind_args setup_bind_args;
1316 int err = 0;
1317 u64 vaddr;
1318
1319 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1320 if (!img) {
1321 nvgpu_err(g, "cde: could not fetch the firmware");
1322 return -ENOSYS;
1323 }
1324
1325 tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
1326 if (!tsg) {
1327 nvgpu_err(g, "cde: could not create TSG");
1328 err = -ENOMEM;
1329 goto err_get_gk20a_channel;
1330 }
1331
1332 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1333 cde_ctx,
1334 -1,
1335 false);
1336 if (!ch) {
1337 nvgpu_warn(g, "cde: gk20a channel not available");
1338 err = -ENOMEM;
1339 goto err_get_gk20a_channel;
1340 }
1341
1342 ch->timeout.enabled = false;
1343
1344 /* bind the channel to the vm */
1345 err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
1346 if (err) {
1347 nvgpu_warn(g, "cde: could not bind vm");
1348 goto err_commit_va;
1349 }
1350
1351 err = gk20a_tsg_bind_channel(tsg, ch);
1352 if (err) {
1353 nvgpu_err(g, "cde: unable to bind to tsg");
1354 goto err_setup_bind;
1355 }
1356
1357 setup_bind_args.num_gpfifo_entries = 1024;
1358 setup_bind_args.num_inflight_jobs = 0;
1359 setup_bind_args.flags = 0;
1360 err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
1361 if (err) {
1362 nvgpu_warn(g, "cde: unable to setup channel");
1363 goto err_setup_bind;
1364 }
1365
1366 /* map backing store to gpu virtual space */
1367 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1368 g->gr.compbit_store.mem.size,
1369 NVGPU_VM_MAP_CACHEABLE,
1370 gk20a_mem_flag_read_only,
1371 false,
1372 gr->compbit_store.mem.aperture);
1373
1374 if (!vaddr) {
1375 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1376 err = -ENOMEM;
1377 goto err_map_backingstore;
1378 }
1379
1380 /* store initialisation data */
1381 cde_ctx->ch = ch;
1382 cde_ctx->tsg = tsg;
1383 cde_ctx->vm = ch->vm;
1384 cde_ctx->backing_store_vaddr = vaddr;
1385
1386 /* initialise the firmware */
1387 err = gk20a_init_cde_img(cde_ctx, img);
1388 if (err) {
1389 nvgpu_warn(g, "cde: image initialisation failed");
1390 goto err_init_cde_img;
1391 }
1392
1393 /* initialisation done */
1394 nvgpu_release_firmware(g, img);
1395
1396 return 0;
1397
1398err_init_cde_img:
1399 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1400err_map_backingstore:
1401err_setup_bind:
1402 nvgpu_vm_put(ch->vm);
1403err_commit_va:
1404err_get_gk20a_channel:
1405 nvgpu_release_firmware(g, img);
1406 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1407 return err;
1408}
1409
1410int gk20a_cde_reload(struct nvgpu_os_linux *l)
1411__acquires(&l->cde_app->mutex)
1412__releases(&l->cde_app->mutex)
1413{
1414 struct gk20a *g = &l->g;
1415 struct gk20a_cde_app *cde_app = &l->cde_app;
1416 int err;
1417
1418 if (!cde_app->initialised)
1419 return -ENOSYS;
1420
1421 err = gk20a_busy(g);
1422 if (err)
1423 return err;
1424
1425 nvgpu_mutex_acquire(&cde_app->mutex);
1426
1427 gk20a_cde_stop(l);
1428
1429 err = gk20a_cde_create_contexts(l);
1430 if (!err)
1431 cde_app->initialised = true;
1432
1433 nvgpu_mutex_release(&cde_app->mutex);
1434
1435 gk20a_idle(g);
1436 return err;
1437}
1438
1439int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1440__acquires(&cde_app->mutex)
1441__releases(&cde_app->mutex)
1442{
1443 struct gk20a_cde_app *cde_app = &l->cde_app;
1444 struct gk20a *g = &l->g;
1445 int err;
1446
1447 if (cde_app->initialised)
1448 return 0;
1449
1450 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1451
1452 err = nvgpu_mutex_init(&cde_app->mutex);
1453 if (err)
1454 return err;
1455
1456 nvgpu_mutex_acquire(&cde_app->mutex);
1457
1458 nvgpu_init_list_node(&cde_app->free_contexts);
1459 nvgpu_init_list_node(&cde_app->used_contexts);
1460 cde_app->ctx_count = 0;
1461 cde_app->ctx_count_top = 0;
1462 cde_app->ctx_usecount = 0;
1463
1464 err = gk20a_cde_create_contexts(l);
1465 if (!err)
1466 cde_app->initialised = true;
1467
1468 nvgpu_mutex_release(&cde_app->mutex);
1469 nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1470
1471 if (err)
1472 nvgpu_mutex_destroy(&cde_app->mutex);
1473
1474 return err;
1475}
1476
1477enum cde_launch_patch_id {
1478 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1479 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1480 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1481 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1482 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1483 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1484 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1485 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1486 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1487 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1488 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1489 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1490 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1491 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1492 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1493 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1494 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1495 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1496 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1497 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1498 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1499 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1500 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1501 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1502 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1503 PATCH_H_LAUNCH_WORD1_ID = 1049,
1504 PATCH_H_LAUNCH_WORD2_ID = 1050,
1505 PATCH_V_LAUNCH_WORD1_ID = 1051,
1506 PATCH_V_LAUNCH_WORD2_ID = 1052,
1507 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1508 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1509 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1510 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1511};
1512
1513/* maximum number of WRITE_PATCHes in the below function */
1514#define MAX_CDE_LAUNCH_PATCHES 32
1515
1516static int gk20a_buffer_convert_gpu_to_cde_v1(
1517 struct nvgpu_os_linux *l,
1518 struct dma_buf *dmabuf, u32 consumer,
1519 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1520 u64 scatterbuffer_offset,
1521 u32 width, u32 height, u32 block_height_log2,
1522 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1523 struct gk20a_buffer_state *state)
1524{
1525 struct gk20a *g = &l->g;
1526 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1527 int param = 0;
1528 int err = 0;
1529 struct gk20a_fence *new_fence = NULL;
1530 const int wgx = 8;
1531 const int wgy = 8;
1532 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1533 const int xalign = compbits_per_byte * wgx;
1534 const int yalign = wgy;
1535
1536 /* Compute per launch parameters */
1537 const int xtiles = (width + 7) >> 3;
1538 const int ytiles = (height + 7) >> 3;
1539 const int gridw_h = roundup(xtiles, xalign) / xalign;
1540 const int gridh_h = roundup(ytiles, yalign) / yalign;
1541 const int gridw_v = roundup(ytiles, xalign) / xalign;
1542 const int gridh_v = roundup(xtiles, yalign) / yalign;
1543 const int xblocks = (xtiles + 1) >> 1;
1544 const int voffset = compbits_voffset - compbits_hoffset;
1545
1546 int hprog = -1;
1547 int vprog = -1;
1548
1549 if (l->ops.cde.get_program_numbers)
1550 l->ops.cde.get_program_numbers(g, block_height_log2,
1551 l->cde_app.shader_parameter,
1552 &hprog, &vprog);
1553 else {
1554 nvgpu_warn(g, "cde: chip not supported");
1555 return -ENOSYS;
1556 }
1557
1558 if (hprog < 0 || vprog < 0) {
1559 nvgpu_warn(g, "cde: could not determine programs");
1560 return -ENOSYS;
1561 }
1562
1563 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1564 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1565 xtiles, ytiles);
1566
1567 nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1568 width, height, block_height_log2,
1569 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1570 nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1571 width, height, xtiles, ytiles);
1572 nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1573 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1574 nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1575 hprog,
1576 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1577 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1578 vprog,
1579 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1580 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1581
1582 /* Write parameters */
1583#define WRITE_PATCH(NAME, VALUE) \
1584 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1585 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1586 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1587 block_height_log2);
1588 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1589 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1590 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1591 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1592 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1593
1594 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1595 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1596 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1597 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1598 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1599 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1600
1601 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1602 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1603 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1604 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1605 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1606 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1607
1608 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1609 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1610 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1611 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1612 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1613 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1614 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1615 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1616
1617 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1618 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1619 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1620 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1621 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1622 } else {
1623 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1624 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1625 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1626 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1627 }
1628
1629 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1630 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1631 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1632 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1633 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1634 } else {
1635 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1636 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1637 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1638 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1639 }
1640#undef WRITE_PATCH
1641
1642 err = gk20a_cde_convert(l, dmabuf,
1643 compbits_hoffset,
1644 scatterbuffer_offset,
1645 fence_in, submit_flags,
1646 params, param, &new_fence);
1647 if (err)
1648 goto out;
1649
1650 /* compbits generated, update state & fence */
1651 gk20a_fence_put(state->fence);
1652 state->fence = new_fence;
1653 state->valid_compbits |= consumer &
1654 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1655out:
1656 return err;
1657}
1658
1659static int gk20a_buffer_convert_gpu_to_cde(
1660 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1661 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1662 u64 scatterbuffer_offset,
1663 u32 width, u32 height, u32 block_height_log2,
1664 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1665 struct gk20a_buffer_state *state)
1666{
1667 struct gk20a *g = &l->g;
1668 int err = 0;
1669
1670 if (!l->cde_app.initialised)
1671 return -ENOSYS;
1672
1673 nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n",
1674 l->cde_app.firmware_version);
1675
1676 if (l->cde_app.firmware_version == 1) {
1677 err = gk20a_buffer_convert_gpu_to_cde_v1(
1678 l, dmabuf, consumer, offset, compbits_hoffset,
1679 compbits_voffset, scatterbuffer_offset,
1680 width, height, block_height_log2,
1681 submit_flags, fence_in, state);
1682 } else {
1683 nvgpu_err(g, "unsupported CDE firmware version %d",
1684 l->cde_app.firmware_version);
1685 err = -EINVAL;
1686 }
1687
1688 return err;
1689}
1690
1691int gk20a_prepare_compressible_read(
1692 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1693 u64 compbits_hoffset, u64 compbits_voffset,
1694 u64 scatterbuffer_offset,
1695 u32 width, u32 height, u32 block_height_log2,
1696 u32 submit_flags, struct nvgpu_channel_fence *fence,
1697 u32 *valid_compbits, u32 *zbc_color,
1698 struct gk20a_fence **fence_out)
1699{
1700 struct gk20a *g = &l->g;
1701 int err = 0;
1702 struct gk20a_buffer_state *state;
1703 struct dma_buf *dmabuf;
1704 u32 missing_bits;
1705
1706 dmabuf = dma_buf_get(buffer_fd);
1707 if (IS_ERR(dmabuf))
1708 return -EINVAL;
1709
1710 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1711 if (err) {
1712 dma_buf_put(dmabuf);
1713 return err;
1714 }
1715
1716 missing_bits = (state->valid_compbits ^ request) & request;
1717
1718 nvgpu_mutex_acquire(&state->lock);
1719
1720 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1721
1722 gk20a_fence_put(state->fence);
1723 state->fence = NULL;
1724 /* state->fence = decompress();
1725 state->valid_compbits = 0; */
1726 err = -EINVAL;
1727 goto out;
1728 } else if (missing_bits) {
1729 u32 missing_cde_bits = missing_bits &
1730 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1731 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1732 missing_cde_bits) {
1733 err = gk20a_buffer_convert_gpu_to_cde(
1734 l, dmabuf,
1735 missing_cde_bits,
1736 offset, compbits_hoffset,
1737 compbits_voffset, scatterbuffer_offset,
1738 width, height, block_height_log2,
1739 submit_flags, fence,
1740 state);
1741 if (err)
1742 goto out;
1743 }
1744 }
1745
1746 if (state->fence && fence_out)
1747 *fence_out = gk20a_fence_get(state->fence);
1748
1749 if (valid_compbits)
1750 *valid_compbits = state->valid_compbits;
1751
1752 if (zbc_color)
1753 *zbc_color = state->zbc_color;
1754
1755out:
1756 nvgpu_mutex_release(&state->lock);
1757 dma_buf_put(dmabuf);
1758 return err;
1759}
1760
1761int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1762 u32 valid_compbits, u64 offset, u32 zbc_color)
1763{
1764 int err;
1765 struct gk20a_buffer_state *state;
1766 struct dma_buf *dmabuf;
1767
1768 dmabuf = dma_buf_get(buffer_fd);
1769 if (IS_ERR(dmabuf)) {
1770 nvgpu_err(g, "invalid dmabuf");
1771 return -EINVAL;
1772 }
1773
1774 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1775 if (err) {
1776 nvgpu_err(g, "could not get state from dmabuf");
1777 dma_buf_put(dmabuf);
1778 return err;
1779 }
1780
1781 nvgpu_mutex_acquire(&state->lock);
1782
1783 /* Update the compbits state. */
1784 state->valid_compbits = valid_compbits;
1785 state->zbc_color = zbc_color;
1786
1787 /* Discard previous compbit job fence. */
1788 gk20a_fence_put(state->fence);
1789 state->fence = NULL;
1790
1791 nvgpu_mutex_release(&state->lock);
1792 dma_buf_put(dmabuf);
1793 return 0;
1794}
diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h
deleted file mode 100644
index 5928b62..0000000
--- a/include/os/linux/cde.h
+++ /dev/null
@@ -1,326 +0,0 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#include <nvgpu/nvgpu_mem.h>
23#include <nvgpu/list.h>
24#include <nvgpu/lock.h>
25
26#include <linux/kobject.h>
27#include <linux/workqueue.h>
28
29#define MAX_CDE_BUFS 10
30#define MAX_CDE_PARAMS 64
31#define MAX_CDE_USER_PARAMS 40
32#define MAX_CDE_ARRAY_ENTRIES 9
33
34/*
35 * The size of the context ring buffer that is dedicated for handling cde
36 * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
37 * wait on the previous job to that channel, so increasing this value
38 * reduces the likelihood of stalls.
39 */
40#define NUM_CDE_CONTEXTS 4
41
42struct dma_buf;
43struct device;
44struct nvgpu_os_linux;
45struct gk20a;
46struct gk20a_fence;
47struct nvgpu_channel_fence;
48struct channel_gk20a;
49struct vm_gk20a;
50struct nvgpu_gpfifo_entry;
51
52/*
53 * this element defines a buffer that is allocated and mapped into gpu address
54 * space. data_byte_offset defines the beginning of the buffer inside the
55 * firmare. num_bytes defines how many bytes the firmware contains.
56 *
57 * If data_byte_offset is zero, we allocate an empty buffer.
58 */
59
60struct gk20a_cde_hdr_buf {
61 u64 data_byte_offset;
62 u64 num_bytes;
63};
64
65/*
66 * this element defines a constant patching in buffers. It basically
67 * computes physical address to <source_buf>+source_byte_offset. The
68 * address is then modified into patch value as per:
69 * value = (current_value & ~mask) | (address << shift) & mask .
70 *
71 * The type field defines the register size as:
72 * 0=u32,
73 * 1=u64 (little endian),
74 * 2=u64 (big endian)
75 */
76
77struct gk20a_cde_hdr_replace {
78 u32 target_buf;
79 u32 source_buf;
80 s32 shift;
81 u32 type;
82 u64 target_byte_offset;
83 u64 source_byte_offset;
84 u64 mask;
85};
86
87enum {
88 TYPE_PARAM_TYPE_U32 = 0,
89 TYPE_PARAM_TYPE_U64_LITTLE,
90 TYPE_PARAM_TYPE_U64_BIG
91};
92
93/*
94 * this element defines a runtime patching in buffers. Parameters with id from
95 * 0 to 1024 are reserved for special usage as follows:
96 * 0 = comptags_per_cacheline,
97 * 1 = slices_per_fbp,
98 * 2 = num_fbps
99 * 3 = source buffer first page offset
100 * 4 = source buffer block height log2
101 * 5 = backing store memory address
102 * 6 = destination memory address
103 * 7 = destination size (bytes)
104 * 8 = backing store size (bytes)
105 * 9 = cache line size
106 *
107 * Parameters above id 1024 are user-specified. I.e. they determine where a
108 * parameters from user space should be placed in buffers, what is their
109 * type, etc.
110 *
111 * Once the value is available, we add data_offset to the value.
112 *
113 * The value address is then modified into patch value as per:
114 * value = (current_value & ~mask) | (address << shift) & mask .
115 *
116 * The type field defines the register size as:
117 * 0=u32,
118 * 1=u64 (little endian),
119 * 2=u64 (big endian)
120 */
121
122struct gk20a_cde_hdr_param {
123 u32 id;
124 u32 target_buf;
125 s32 shift;
126 u32 type;
127 s64 data_offset;
128 u64 target_byte_offset;
129 u64 mask;
130};
131
132enum {
133 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
134 TYPE_PARAM_GPU_CONFIGURATION,
135 TYPE_PARAM_FIRSTPAGEOFFSET,
136 TYPE_PARAM_NUMPAGES,
137 TYPE_PARAM_BACKINGSTORE,
138 TYPE_PARAM_DESTINATION,
139 TYPE_PARAM_DESTINATION_SIZE,
140 TYPE_PARAM_BACKINGSTORE_SIZE,
141 TYPE_PARAM_SOURCE_SMMU_ADDR,
142 TYPE_PARAM_BACKINGSTORE_BASE_HW,
143 TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
144 TYPE_PARAM_SCATTERBUFFER,
145 TYPE_PARAM_SCATTERBUFFER_SIZE,
146 NUM_RESERVED_PARAMS = 1024,
147};
148
149/*
150 * This header element defines a command. The op field determines whether the
151 * element is defining an init (0) or convert command (1). data_byte_offset
152 * denotes the beginning address of command elements in the file.
153 */
154
155struct gk20a_cde_hdr_command {
156 u32 op;
157 u32 num_entries;
158 u64 data_byte_offset;
159};
160
161enum {
162 TYPE_BUF_COMMAND_INIT = 0,
163 TYPE_BUF_COMMAND_CONVERT,
164 TYPE_BUF_COMMAND_NOOP
165};
166
167/*
168 * This is a command element defines one entry inside push buffer. target_buf
169 * defines the buffer including the pushbuffer entries, target_byte_offset the
170 * offset inside the buffer and num_bytes the number of words in the buffer.
171 */
172
173struct gk20a_cde_cmd_elem {
174 u32 target_buf;
175 u32 padding;
176 u64 target_byte_offset;
177 u64 num_bytes;
178};
179
180/*
181 * This element is used for storing a small array of data.
182 */
183
184enum {
185 ARRAY_PROGRAM_OFFSET = 0,
186 ARRAY_REGISTER_COUNT,
187 ARRAY_LAUNCH_COMMAND,
188 NUM_CDE_ARRAYS
189};
190
191struct gk20a_cde_hdr_array {
192 u32 id;
193 u32 data[MAX_CDE_ARRAY_ENTRIES];
194};
195
196/*
197 * Following defines a single header element. Each element has a type and
198 * some of the data structures.
199 */
200
201struct gk20a_cde_hdr_elem {
202 u32 type;
203 u32 padding;
204 union {
205 struct gk20a_cde_hdr_buf buf;
206 struct gk20a_cde_hdr_replace replace;
207 struct gk20a_cde_hdr_param param;
208 u32 required_class;
209 struct gk20a_cde_hdr_command command;
210 struct gk20a_cde_hdr_array array;
211 };
212};
213
214enum {
215 TYPE_BUF = 0,
216 TYPE_REPLACE,
217 TYPE_PARAM,
218 TYPE_REQUIRED_CLASS,
219 TYPE_COMMAND,
220 TYPE_ARRAY
221};
222
223struct gk20a_cde_param {
224 u32 id;
225 u32 padding;
226 u64 value;
227};
228
229struct gk20a_cde_ctx {
230 struct nvgpu_os_linux *l;
231 struct device *dev;
232
233 /* channel related data */
234 struct channel_gk20a *ch;
235 struct tsg_gk20a *tsg;
236 struct vm_gk20a *vm;
237
238 /* buf converter configuration */
239 struct nvgpu_mem mem[MAX_CDE_BUFS];
240 unsigned int num_bufs;
241
242 /* buffer patching params (where should patching be done) */
243 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
244 unsigned int num_params;
245
246 /* storage for user space parameter values */
247 u32 user_param_values[MAX_CDE_USER_PARAMS];
248
249 u32 surf_param_offset;
250 u32 surf_param_lines;
251 u64 surf_vaddr;
252
253 u64 compbit_vaddr;
254 u64 compbit_size;
255
256 u64 scatterbuffer_vaddr;
257 u64 scatterbuffer_size;
258
259 u64 backing_store_vaddr;
260
261 struct nvgpu_gpfifo_entry *init_convert_cmd;
262 int init_cmd_num_entries;
263
264 struct nvgpu_gpfifo_entry *convert_cmd;
265 int convert_cmd_num_entries;
266
267 struct kobj_attribute attr;
268
269 bool init_cmd_executed;
270
271 struct nvgpu_list_node list;
272 bool is_temporary;
273 bool in_use;
274 struct delayed_work ctx_deleter_work;
275};
276
277static inline struct gk20a_cde_ctx *
278gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
279{
280 return (struct gk20a_cde_ctx *)
281 ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
282};
283
284struct gk20a_cde_app {
285 bool initialised;
286 struct nvgpu_mutex mutex;
287
288 struct nvgpu_list_node free_contexts;
289 struct nvgpu_list_node used_contexts;
290 unsigned int ctx_count;
291 unsigned int ctx_usecount;
292 unsigned int ctx_count_top;
293
294 u32 firmware_version;
295
296 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
297
298 u32 shader_parameter;
299};
300
301void gk20a_cde_destroy(struct nvgpu_os_linux *l);
302void gk20a_cde_suspend(struct nvgpu_os_linux *l);
303int gk20a_init_cde_support(struct nvgpu_os_linux *l);
304int gk20a_cde_reload(struct nvgpu_os_linux *l);
305int gk20a_cde_convert(struct nvgpu_os_linux *l,
306 struct dma_buf *compbits_buf,
307 u64 compbits_byte_offset,
308 u64 scatterbuffer_byte_offset,
309 struct nvgpu_channel_fence *fence,
310 u32 __flags, struct gk20a_cde_param *params,
311 int num_params, struct gk20a_fence **fence_out);
312
313int gk20a_prepare_compressible_read(
314 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
315 u64 compbits_hoffset, u64 compbits_voffset,
316 u64 scatterbuffer_offset,
317 u32 width, u32 height, u32 block_height_log2,
318 u32 submit_flags, struct nvgpu_channel_fence *fence,
319 u32 *valid_compbits, u32 *zbc_color,
320 struct gk20a_fence **fence_out);
321int gk20a_mark_compressible_write(
322 struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
323 u32 zbc_color);
324int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
325
326#endif
diff --git a/include/os/linux/cde_gm20b.c b/include/os/linux/cde_gm20b.c
deleted file mode 100644
index a9a4754..0000000
--- a/include/os/linux/cde_gm20b.c
+++ /dev/null
@@ -1,59 +0,0 @@
1/*
2 * GM20B CDE
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/gk20a.h>
26
27#include "cde_gm20b.h"
28
29enum programs {
30 PROG_HPASS = 0,
31 PROG_VPASS_LARGE = 1,
32 PROG_VPASS_SMALL = 2,
33 PROG_HPASS_DEBUG = 3,
34 PROG_VPASS_LARGE_DEBUG = 4,
35 PROG_VPASS_SMALL_DEBUG = 5,
36 PROG_PASSTHROUGH = 6,
37};
38
39void gm20b_cde_get_program_numbers(struct gk20a *g,
40 u32 block_height_log2,
41 u32 shader_parameter,
42 int *hprog_out, int *vprog_out)
43{
44 int hprog = PROG_HPASS;
45 int vprog = (block_height_log2 >= 2) ?
46 PROG_VPASS_LARGE : PROG_VPASS_SMALL;
47 if (shader_parameter == 1) {
48 hprog = PROG_PASSTHROUGH;
49 vprog = PROG_PASSTHROUGH;
50 } else if (shader_parameter == 2) {
51 hprog = PROG_HPASS_DEBUG;
52 vprog = (block_height_log2 >= 2) ?
53 PROG_VPASS_LARGE_DEBUG :
54 PROG_VPASS_SMALL_DEBUG;
55 }
56
57 *hprog_out = hprog;
58 *vprog_out = vprog;
59}
diff --git a/include/os/linux/cde_gm20b.h b/include/os/linux/cde_gm20b.h
deleted file mode 100644
index fac8aaf..0000000
--- a/include/os/linux/cde_gm20b.h
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * GM20B CDE
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_CDE
26#define _NVHOST_GM20B_CDE
27
28void gm20b_cde_get_program_numbers(struct gk20a *g,
29 u32 block_height_log2,
30 u32 shader_parameter,
31 int *hprog_out, int *vprog_out);
32
33#endif
diff --git a/include/os/linux/cde_gp10b.c b/include/os/linux/cde_gp10b.c
deleted file mode 100644
index 6356d33..0000000
--- a/include/os/linux/cde_gp10b.c
+++ /dev/null
@@ -1,153 +0,0 @@
1/*
2 * GP10B CDE
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/log.h>
26#include <nvgpu/dma.h>
27#include <nvgpu/gk20a.h>
28
29#include "cde_gp10b.h"
30
31enum gp10b_programs {
32 GP10B_PROG_HPASS = 0,
33 GP10B_PROG_HPASS_4K = 1,
34 GP10B_PROG_VPASS = 2,
35 GP10B_PROG_VPASS_4K = 3,
36 GP10B_PROG_HPASS_DEBUG = 4,
37 GP10B_PROG_HPASS_4K_DEBUG = 5,
38 GP10B_PROG_VPASS_DEBUG = 6,
39 GP10B_PROG_VPASS_4K_DEBUG = 7,
40 GP10B_PROG_PASSTHROUGH = 8,
41};
42
43void gp10b_cde_get_program_numbers(struct gk20a *g,
44 u32 block_height_log2,
45 u32 shader_parameter,
46 int *hprog_out, int *vprog_out)
47{
48 int hprog, vprog;
49
50 if (shader_parameter == 1) {
51 hprog = GP10B_PROG_PASSTHROUGH;
52 vprog = GP10B_PROG_PASSTHROUGH;
53 } else {
54 hprog = GP10B_PROG_HPASS;
55 vprog = GP10B_PROG_VPASS;
56 if (shader_parameter == 2) {
57 hprog = GP10B_PROG_HPASS_DEBUG;
58 vprog = GP10B_PROG_VPASS_DEBUG;
59 }
60 if (!nvgpu_iommuable(g)) {
61 if (!g->mm.disable_bigpage) {
62 nvgpu_warn(g,
63 "When no IOMMU big pages cannot be used");
64 }
65 hprog |= 1;
66 vprog |= 1;
67 }
68 }
69
70 *hprog_out = hprog;
71 *vprog_out = vprog;
72}
73
74bool gp10b_need_scatter_buffer(struct gk20a *g)
75{
76 return !nvgpu_iommuable(g);
77}
78
79static u8 parity(u32 a)
80{
81 a ^= a>>16u;
82 a ^= a>>8u;
83 a ^= a>>4u;
84 a &= 0xfu;
85 return (0x6996u >> a) & 1u;
86}
87
88int gp10b_populate_scatter_buffer(struct gk20a *g,
89 struct sg_table *sgt,
90 size_t surface_size,
91 void *scatter_buffer_ptr,
92 size_t scatter_buffer_size)
93{
94 /* map scatter buffer to CPU VA and fill it */
95 const u32 page_size_log2 = 12;
96 const u32 page_size = 1 << page_size_log2;
97 const u32 page_size_shift = page_size_log2 - 7u;
98
99 /* 0011 1111 1111 1111 1111 1110 0100 1000 */
100 const u32 getSliceMaskGP10B = 0x3ffffe48;
101 u8 *scatter_buffer = scatter_buffer_ptr;
102
103 size_t i;
104 struct scatterlist *sg = NULL;
105 u8 d = 0;
106 size_t page = 0;
107 size_t pages_left;
108
109 surface_size = round_up(surface_size, page_size);
110
111 pages_left = surface_size >> page_size_log2;
112 if ((pages_left >> 3) > scatter_buffer_size)
113 return -ENOMEM;
114
115 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
116 unsigned int j;
117 u64 surf_pa = sg_phys(sg);
118 unsigned int n = (int)(sg->length >> page_size_log2);
119
120 nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
121
122 for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
123 u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
124 u8 scatter_bit = parity(addr);
125 u8 bit = page & 7;
126
127 d |= scatter_bit << bit;
128 if (bit == 7) {
129 scatter_buffer[page >> 3] = d;
130 d = 0;
131 }
132
133 ++page;
134 --pages_left;
135 }
136
137 if (pages_left == 0)
138 break;
139 }
140
141 /* write the last byte in case the number of pages is not divisible by 8 */
142 if ((page & 7) != 0)
143 scatter_buffer[page >> 3] = d;
144
145 if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
146 nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
147 for (i = 0; i < page >> 3; i++) {
148 nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
149 }
150 }
151
152 return 0;
153}
diff --git a/include/os/linux/cde_gp10b.h b/include/os/linux/cde_gp10b.h
deleted file mode 100644
index 3ecca2a..0000000
--- a/include/os/linux/cde_gp10b.h
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * GP10B CDE
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GP10B_CDE
26#define _NVHOST_GP10B_CDE
27
28#include "os_linux.h"
29
30void gp10b_cde_get_program_numbers(struct gk20a *g,
31 u32 block_height_log2,
32 u32 shader_parameter,
33 int *hprog_out, int *vprog_out);
34bool gp10b_need_scatter_buffer(struct gk20a *g);
35int gp10b_populate_scatter_buffer(struct gk20a *g,
36 struct sg_table *sgt,
37 size_t surface_size,
38 void *scatter_buffer_ptr,
39 size_t scatter_buffer_size);
40#endif
diff --git a/include/os/linux/channel.h b/include/os/linux/channel.h
deleted file mode 100644
index e6326fa..0000000
--- a/include/os/linux/channel.h
+++ /dev/null
@@ -1,102 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef NVGPU_LINUX_CHANNEL_H
17#define NVGPU_LINUX_CHANNEL_H
18
19#include <linux/workqueue.h>
20#include <linux/dma-buf.h>
21
22#include <nvgpu/types.h>
23
24struct channel_gk20a;
25struct nvgpu_gpfifo;
26struct nvgpu_submit_gpfifo_args;
27struct nvgpu_channel_fence;
28struct gk20a_fence;
29struct fifo_profile_gk20a;
30struct nvgpu_os_linux;
31
32struct sync_fence;
33struct sync_timeline;
34
35struct nvgpu_channel_completion_cb {
36 /*
37 * Signal channel owner via a callback, if set, in job cleanup with
38 * schedule_work. Means that something finished on the channel (perhaps
39 * more than one job).
40 */
41 void (*fn)(struct channel_gk20a *, void *);
42 void *user_data;
43 /* Make access to the two above atomic */
44 struct nvgpu_spinlock lock;
45 /* Per-channel async work task, cannot reschedule itself */
46 struct work_struct work;
47};
48
49struct nvgpu_error_notifier {
50 struct dma_buf *dmabuf;
51 void *vaddr;
52
53 struct nvgpu_notification *notification;
54
55 struct nvgpu_mutex mutex;
56};
57
58/*
59 * This struct contains fence_related data.
60 * e.g. sync_timeline for sync_fences.
61 */
62struct nvgpu_os_fence_framework {
63 struct sync_timeline *timeline;
64};
65
66struct nvgpu_usermode_bufs_linux {
67 /*
68 * Common low level info of these is stored in nvgpu_mems in
69 * channel_gk20a; these hold lifetimes for the actual dmabuf and its
70 * dma mapping.
71 */
72 struct nvgpu_usermode_buf_linux {
73 struct dma_buf *dmabuf;
74 struct dma_buf_attachment *attachment;
75 struct sg_table *sgt;
76 } gpfifo, userd;
77};
78
79struct nvgpu_channel_linux {
80 struct channel_gk20a *ch;
81
82 struct nvgpu_os_fence_framework fence_framework;
83
84 struct nvgpu_channel_completion_cb completion_cb;
85 struct nvgpu_error_notifier error_notifier;
86
87 struct dma_buf *cyclestate_buffer_handler;
88
89 struct nvgpu_usermode_bufs_linux usermode;
90};
91
92u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
93int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
94void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
95
96struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
97 void (*update_fn)(struct channel_gk20a *, void *),
98 void *update_fn_data,
99 int runlist_id,
100 bool is_privileged_channel);
101
102#endif
diff --git a/include/os/linux/clk.c b/include/os/linux/clk.c
deleted file mode 100644
index e9796ea..0000000
--- a/include/os/linux/clk.c
+++ /dev/null
@@ -1,286 +0,0 @@
1/*
2 * Linux clock support
3 *
4 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/clk.h>
20
21#include <soc/tegra/tegra-dvfs.h>
22#include <soc/tegra/tegra-bpmp-dvfs.h>
23
24#include "clk.h"
25#include "os_linux.h"
26#include "platform_gk20a.h"
27
28#include <nvgpu/gk20a.h>
29#include <nvgpu/clk_arb.h>
30
31#define HZ_TO_MHZ(x) ((x) / 1000000)
32
33static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
34{
35 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
36 unsigned long ret;
37
38 switch (api_domain) {
39 case CTRL_CLK_DOMAIN_GPCCLK:
40 if (g->clk.tegra_clk)
41 ret = clk_get_rate(g->clk.tegra_clk);
42 else
43 ret = clk_get_rate(platform->clk[0]);
44 break;
45 case CTRL_CLK_DOMAIN_PWRCLK:
46 ret = clk_get_rate(platform->clk[1]);
47 break;
48 default:
49 nvgpu_err(g, "unknown clock: %u", api_domain);
50 ret = 0;
51 break;
52 }
53
54 return ret;
55}
56
57static int nvgpu_linux_clk_set_rate(struct gk20a *g,
58 u32 api_domain, unsigned long rate)
59{
60 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
61 int ret;
62
63 switch (api_domain) {
64 case CTRL_CLK_DOMAIN_GPCCLK:
65 if (g->clk.tegra_clk)
66 ret = clk_set_rate(g->clk.tegra_clk, rate);
67 else
68 ret = clk_set_rate(platform->clk[0], rate);
69 break;
70 case CTRL_CLK_DOMAIN_PWRCLK:
71 ret = clk_set_rate(platform->clk[1], rate);
72 break;
73 default:
74 nvgpu_err(g, "unknown clock: %u", api_domain);
75 ret = -EINVAL;
76 break;
77 }
78
79 return ret;
80}
81
82static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
83{
84 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
85
86 /*
87 * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
88 * to frequency governor is a shared user on the gbus. The latter can be
89 * accessed as GPU clock parent, and incorporate DVFS related data.
90 */
91 if (g->clk.tegra_clk)
92 return tegra_dvfs_get_fmax_at_vmin_safe_t(
93 g->clk.tegra_clk_parent);
94
95 if (platform->maxmin_clk_id)
96 return tegra_bpmp_dvfs_get_fmax_at_vmin(
97 platform->maxmin_clk_id);
98
99 return 0;
100}
101
102static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
103{
104 struct clk *c;
105
106 c = clk_get_sys("gpu_ref", "gpu_ref");
107 if (IS_ERR(c)) {
108 nvgpu_err(g, "failed to get GPCPLL reference clock");
109 return 0;
110 }
111
112 return clk_get_rate(c);
113}
114
115static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
116 unsigned long rate)
117{
118 return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
119 clk->tegra_clk_parent, rate);
120}
121
122static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
123{
124 int ret;
125 u16 min_mhz, max_mhz;
126
127 switch (api_domain) {
128 case CTRL_CLK_DOMAIN_GPCCLK:
129 ret = tegra_dvfs_get_maxrate(g->clk.tegra_clk_parent);
130 /* If dvfs not supported */
131 if (ret == 0) {
132 int err = nvgpu_clk_arb_get_arbiter_clk_range(g,
133 NVGPU_CLK_DOMAIN_GPCCLK,
134 &min_mhz, &max_mhz);
135 if (err == 0) {
136 ret = max_mhz * 1000000L;
137 }
138 }
139 break;
140 default:
141 nvgpu_err(g, "unknown clock: %u", api_domain);
142 ret = 0;
143 break;
144 }
145
146 return ret;
147}
148
149/*
150 * This API is used to return a list of supported frequencies by igpu.
151 * Set *num_points as 0 to get the size of the freqs list, returned
152 * by *num_points itself. freqs array must be provided by caller.
153 * If *num_points is non-zero, then freqs array size must atleast
154 * equal *num_points.
155 */
156static int nvgpu_linux_clk_get_f_points(struct gk20a *g,
157 u32 api_domain, u32 *num_points, u16 *freqs)
158{
159 struct device *dev = dev_from_gk20a(g);
160 struct gk20a_platform *platform = gk20a_get_platform(dev);
161 unsigned long *gpu_freq_table;
162 int ret = 0;
163 int num_supported_freq = 0;
164 u32 i;
165
166 switch (api_domain) {
167 case CTRL_CLK_DOMAIN_GPCCLK:
168 ret = platform->get_clk_freqs(dev, &gpu_freq_table,
169 &num_supported_freq);
170
171 if (ret) {
172 return ret;
173 }
174
175 if (num_points == NULL) {
176 return -EINVAL;
177 }
178
179 if (*num_points != 0U) {
180 if (freqs == NULL || (*num_points > (u32)num_supported_freq)) {
181 return -EINVAL;
182 }
183 }
184
185 if (*num_points == 0) {
186 *num_points = num_supported_freq;
187 } else {
188 for (i = 0; i < *num_points; i++) {
189 freqs[i] = HZ_TO_MHZ(gpu_freq_table[i]);
190 }
191 }
192 break;
193 default:
194 nvgpu_err(g, "unknown clock: %u", api_domain);
195 ret = -EINVAL;
196 break;
197 }
198
199 return ret;
200}
201
202static int nvgpu_clk_get_range(struct gk20a *g, u32 api_domain,
203 u16 *min_mhz, u16 *max_mhz)
204{
205 struct device *dev = dev_from_gk20a(g);
206 struct gk20a_platform *platform = gk20a_get_platform(dev);
207 unsigned long *freqs;
208 int num_freqs;
209 int ret;
210
211 switch (api_domain) {
212 case CTRL_CLK_DOMAIN_GPCCLK:
213 ret = platform->get_clk_freqs(dev, &freqs, &num_freqs);
214
215 if (!ret) {
216 *min_mhz = HZ_TO_MHZ(freqs[0]);
217 *max_mhz = HZ_TO_MHZ(freqs[num_freqs - 1]);
218 }
219 break;
220 default:
221 nvgpu_err(g, "unknown clock: %u", api_domain);
222 ret = -EINVAL;
223 break;
224 }
225
226 return ret;
227}
228
229/* rate_target should be passed in as Hz
230 rounded_rate is returned in Hz */
231static int nvgpu_clk_get_round_rate(struct gk20a *g,
232 u32 api_domain, unsigned long rate_target,
233 unsigned long *rounded_rate)
234{
235 struct device *dev = dev_from_gk20a(g);
236 struct gk20a_platform *platform = gk20a_get_platform(dev);
237 unsigned long *freqs;
238 int num_freqs;
239 int i, ret = 0;
240
241 switch (api_domain) {
242 case CTRL_CLK_DOMAIN_GPCCLK:
243 ret = platform->get_clk_freqs(dev, &freqs, &num_freqs);
244
245 for (i = 0; i < num_freqs; ++i) {
246 if (freqs[i] >= rate_target) {
247 *rounded_rate = freqs[i];
248 return 0;
249 }
250 }
251 *rounded_rate = freqs[num_freqs - 1];
252 break;
253 default:
254 nvgpu_err(g, "unknown clock: %u", api_domain);
255 ret = -EINVAL;
256 break;
257 }
258
259 return ret;
260}
261
262static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
263{
264 return clk_prepare_enable(clk->tegra_clk);
265}
266
267static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
268{
269 clk_disable_unprepare(clk->tegra_clk);
270}
271
272void nvgpu_linux_init_clk_support(struct gk20a *g)
273{
274 g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
275 g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
276 g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
277 g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
278 g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
279 g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
280 g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
281 g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
282 g->ops.clk.clk_domain_get_f_points = nvgpu_linux_clk_get_f_points;
283 g->ops.clk.get_clk_range = nvgpu_clk_get_range;
284 g->ops.clk.clk_get_round_rate = nvgpu_clk_get_round_rate;
285 g->ops.clk.measure_freq = nvgpu_clk_measure_freq;
286}
diff --git a/include/os/linux/clk.h b/include/os/linux/clk.h
deleted file mode 100644
index 614a7fd..0000000
--- a/include/os/linux/clk.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_COMMON_LINUX_CLK_H
18
19struct gk20a;
20void nvgpu_linux_init_clk_support(struct gk20a *g);
21
22#endif
diff --git a/include/os/linux/comptags.c b/include/os/linux/comptags.c
deleted file mode 100644
index ab37197..0000000
--- a/include/os/linux/comptags.c
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18
19#include <nvgpu/comptags.h>
20#include <nvgpu/gk20a.h>
21
22#include <nvgpu/linux/vm.h>
23
24#include "dmabuf.h"
25
26void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
27 struct gk20a_comptags *comptags)
28{
29 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
30 buf->dev);
31
32 if (!comptags)
33 return;
34
35 if (!priv) {
36 memset(comptags, 0, sizeof(*comptags));
37 return;
38 }
39
40 nvgpu_mutex_acquire(&priv->lock);
41 *comptags = priv->comptags;
42 nvgpu_mutex_release(&priv->lock);
43}
44
45int gk20a_alloc_or_get_comptags(struct gk20a *g,
46 struct nvgpu_os_buffer *buf,
47 struct gk20a_comptag_allocator *allocator,
48 struct gk20a_comptags *comptags)
49{
50 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
51 buf->dev);
52 u32 offset;
53 int err;
54 unsigned int ctag_granularity;
55 u32 lines;
56
57 if (!priv)
58 return -ENOSYS;
59
60 nvgpu_mutex_acquire(&priv->lock);
61
62 if (priv->comptags.allocated) {
63 /*
64 * already allocated
65 */
66 *comptags = priv->comptags;
67
68 err = 0;
69 goto exit_locked;
70 }
71
72 ctag_granularity = g->ops.fb.compression_page_size(g);
73 lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
74
75 /* 0-sized buffer? Shouldn't occur, but let's check anyways. */
76 if (lines < 1) {
77 err = -EINVAL;
78 goto exit_locked;
79 }
80
81 /* store the allocator so we can use it when we free the ctags */
82 priv->comptag_allocator = allocator;
83 err = gk20a_comptaglines_alloc(allocator, &offset, lines);
84 if (!err) {
85 priv->comptags.offset = offset;
86 priv->comptags.lines = lines;
87 priv->comptags.needs_clear = true;
88 } else {
89 priv->comptags.offset = 0;
90 priv->comptags.lines = 0;
91 priv->comptags.needs_clear = false;
92 }
93
94 /*
95 * We don't report an error here if comptag alloc failed. The
96 * caller will simply fallback to incompressible kinds. It
97 * would not be safe to re-allocate comptags anyways on
98 * successive calls, as that would break map aliasing.
99 */
100 err = 0;
101 priv->comptags.allocated = true;
102
103 *comptags = priv->comptags;
104
105exit_locked:
106 nvgpu_mutex_release(&priv->lock);
107
108 return err;
109}
110
111bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
112{
113 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
114 buf->dev);
115 bool clear_started = false;
116
117 if (priv) {
118 nvgpu_mutex_acquire(&priv->lock);
119
120 clear_started = priv->comptags.needs_clear;
121
122 if (!clear_started)
123 nvgpu_mutex_release(&priv->lock);
124 }
125
126 return clear_started;
127}
128
129void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
130 bool clear_successful)
131{
132 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
133 buf->dev);
134 if (priv) {
135 if (clear_successful)
136 priv->comptags.needs_clear = false;
137
138 nvgpu_mutex_release(&priv->lock);
139 }
140}
diff --git a/include/os/linux/cond.c b/include/os/linux/cond.c
deleted file mode 100644
index 633c34f..0000000
--- a/include/os/linux/cond.c
+++ /dev/null
@@ -1,73 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/wait.h>
18#include <linux/sched.h>
19
20#include <nvgpu/cond.h>
21
22int nvgpu_cond_init(struct nvgpu_cond *cond)
23{
24 init_waitqueue_head(&cond->wq);
25 cond->initialized = true;
26
27 return 0;
28}
29
30void nvgpu_cond_destroy(struct nvgpu_cond *cond)
31{
32 cond->initialized = false;
33}
34
35int nvgpu_cond_signal(struct nvgpu_cond *cond)
36{
37 if (!cond->initialized)
38 return -EINVAL;
39
40 wake_up(&cond->wq);
41
42 return 0;
43}
44
45int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
46{
47 if (!cond->initialized)
48 return -EINVAL;
49
50 wake_up_interruptible(&cond->wq);
51
52 return 0;
53}
54
55int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
56{
57 if (!cond->initialized)
58 return -EINVAL;
59
60 wake_up_all(&cond->wq);
61
62 return 0;
63}
64
65int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
66{
67 if (!cond->initialized)
68 return -EINVAL;
69
70 wake_up_interruptible_all(&cond->wq);
71
72 return 0;
73}
diff --git a/include/os/linux/ctxsw_trace.c b/include/os/linux/ctxsw_trace.c
deleted file mode 100644
index 2d36d9c..0000000
--- a/include/os/linux/ctxsw_trace.c
+++ /dev/null
@@ -1,792 +0,0 @@
1/*
2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/wait.h>
18#include <linux/ktime.h>
19#include <linux/uaccess.h>
20#include <linux/poll.h>
21#include <trace/events/gk20a.h>
22#include <uapi/linux/nvgpu.h>
23#include <nvgpu/ctxsw_trace.h>
24#include <nvgpu/kmem.h>
25#include <nvgpu/log.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/barrier.h>
28#include <nvgpu/gk20a.h>
29#include <nvgpu/channel.h>
30
31#include "gk20a/gr_gk20a.h"
32#include "gk20a/fecs_trace_gk20a.h"
33
34#include "platform_gk20a.h"
35#include "os_linux.h"
36#include "ctxsw_trace.h"
37
38#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
39#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
40
41#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
42
43/* Userland-facing FIFO (one global + eventually one per VM) */
44struct gk20a_ctxsw_dev {
45 struct gk20a *g;
46
47 struct nvgpu_ctxsw_ring_header *hdr;
48 struct nvgpu_gpu_ctxsw_trace_entry *ents;
49 struct nvgpu_gpu_ctxsw_trace_filter filter;
50 bool write_enabled;
51 struct nvgpu_cond readout_wq;
52 size_t size;
53 u32 num_ents;
54
55 nvgpu_atomic_t vma_ref;
56
57 struct nvgpu_mutex write_lock;
58};
59
60
61struct gk20a_ctxsw_trace {
62 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
63};
64
65static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
66{
67 return (hdr->write_idx == hdr->read_idx);
68}
69
70static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
71{
72 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
73}
74
75static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
76{
77 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
78}
79
80static void nvgpu_set_ctxsw_trace_entry(struct nvgpu_ctxsw_trace_entry *entry_dst,
81 struct nvgpu_gpu_ctxsw_trace_entry *entry_src)
82{
83 entry_dst->tag = entry_src->tag;
84 entry_dst->vmid = entry_src->vmid;
85 entry_dst->seqno = entry_src->seqno;
86 entry_dst->context_id = entry_src->context_id;
87 entry_dst->pid = entry_src->pid;
88 entry_dst->timestamp = entry_src->timestamp;
89}
90
91ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
92 loff_t *off)
93{
94 struct gk20a_ctxsw_dev *dev = filp->private_data;
95 struct gk20a *g = dev->g;
96 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
97 struct nvgpu_ctxsw_trace_entry __user *entry =
98 (struct nvgpu_ctxsw_trace_entry *) buf;
99 struct nvgpu_ctxsw_trace_entry user_entry;
100 size_t copied = 0;
101 int err;
102
103 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
104 "filp=%p buf=%p size=%zu", filp, buf, size);
105
106 nvgpu_mutex_acquire(&dev->write_lock);
107 while (ring_is_empty(hdr)) {
108 nvgpu_mutex_release(&dev->write_lock);
109 if (filp->f_flags & O_NONBLOCK)
110 return -EAGAIN;
111 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
112 !ring_is_empty(hdr), 0);
113 if (err)
114 return err;
115 nvgpu_mutex_acquire(&dev->write_lock);
116 }
117
118 while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) {
119 if (ring_is_empty(hdr))
120 break;
121
122 nvgpu_set_ctxsw_trace_entry(&user_entry, &dev->ents[hdr->read_idx]);
123 if (copy_to_user(entry, &user_entry,
124 sizeof(*entry))) {
125 nvgpu_mutex_release(&dev->write_lock);
126 return -EFAULT;
127 }
128
129 hdr->read_idx++;
130 if (hdr->read_idx >= hdr->num_ents)
131 hdr->read_idx = 0;
132
133 entry++;
134 copied += sizeof(*entry);
135 size -= sizeof(*entry);
136 }
137
138 nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
139 hdr->read_idx);
140
141 *off = hdr->read_idx;
142 nvgpu_mutex_release(&dev->write_lock);
143
144 return copied;
145}
146
147static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
148{
149 struct gk20a *g = dev->g;
150
151 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
152 nvgpu_mutex_acquire(&dev->write_lock);
153 dev->write_enabled = true;
154 nvgpu_mutex_release(&dev->write_lock);
155 dev->g->ops.fecs_trace.enable(dev->g);
156 return 0;
157}
158
159static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
160{
161 struct gk20a *g = dev->g;
162
163 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
164 dev->g->ops.fecs_trace.disable(dev->g);
165 nvgpu_mutex_acquire(&dev->write_lock);
166 dev->write_enabled = false;
167 nvgpu_mutex_release(&dev->write_lock);
168 return 0;
169}
170
171static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
172 size_t size)
173{
174 struct gk20a *g = dev->g;
175 void *buf;
176 int err;
177
178 if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
179 return -EBUSY;
180
181 err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
182 if (err)
183 return err;
184
185
186 dev->hdr = buf;
187 dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1);
188 dev->size = size;
189 dev->num_ents = dev->hdr->num_ents;
190
191 nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
192 dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
193 return 0;
194}
195
196int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
197 void **buf, size_t *size)
198{
199 struct nvgpu_ctxsw_ring_header *hdr;
200
201 *size = roundup(*size, PAGE_SIZE);
202 hdr = vmalloc_user(*size);
203 if (!hdr)
204 return -ENOMEM;
205
206 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
207 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
208 hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
209 / sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
210 hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
211 hdr->drop_count = 0;
212 hdr->read_idx = 0;
213 hdr->write_idx = 0;
214 hdr->write_seqno = 0;
215
216 *buf = hdr;
217 return 0;
218}
219
220int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
221{
222 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
223
224 nvgpu_vfree(g, dev->hdr);
225 return 0;
226}
227
228static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
229 struct nvgpu_ctxsw_ring_setup_args *args)
230{
231 struct gk20a *g = dev->g;
232 size_t size = args->size;
233 int ret;
234
235 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
236
237 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
238 return -EINVAL;
239
240 nvgpu_mutex_acquire(&dev->write_lock);
241 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
242 nvgpu_mutex_release(&dev->write_lock);
243
244 return ret;
245}
246
247static void nvgpu_set_ctxsw_trace_filter_args(struct nvgpu_gpu_ctxsw_trace_filter *filter_dst,
248 struct nvgpu_ctxsw_trace_filter *filter_src)
249{
250 memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
251}
252
253static void nvgpu_get_ctxsw_trace_filter_args(struct nvgpu_ctxsw_trace_filter *filter_dst,
254 struct nvgpu_gpu_ctxsw_trace_filter *filter_src)
255{
256 memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
257}
258
259static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
260 struct nvgpu_ctxsw_trace_filter_args *args)
261{
262 struct gk20a *g = dev->g;
263
264 nvgpu_mutex_acquire(&dev->write_lock);
265 nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter);
266 nvgpu_mutex_release(&dev->write_lock);
267
268 if (g->ops.fecs_trace.set_filter)
269 g->ops.fecs_trace.set_filter(g, &dev->filter);
270 return 0;
271}
272
273static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
274 struct nvgpu_ctxsw_trace_filter_args *args)
275{
276 nvgpu_mutex_acquire(&dev->write_lock);
277 nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter);
278 nvgpu_mutex_release(&dev->write_lock);
279
280 return 0;
281}
282
283static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
284{
285 struct gk20a *g = dev->g;
286 int err;
287
288 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
289
290 err = gk20a_busy(g);
291 if (err)
292 return err;
293
294 if (g->ops.fecs_trace.flush)
295 err = g->ops.fecs_trace.flush(g);
296
297 if (likely(!err))
298 err = g->ops.fecs_trace.poll(g);
299
300 gk20a_idle(g);
301 return err;
302}
303
304int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
305{
306 struct nvgpu_os_linux *l;
307 struct gk20a *g;
308 struct gk20a_ctxsw_trace *trace;
309 struct gk20a_ctxsw_dev *dev;
310 int err;
311 size_t size;
312 u32 n;
313
314 /* only one VM for now */
315 const int vmid = 0;
316
317 l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
318 g = gk20a_get(&l->g);
319 if (!g)
320 return -ENODEV;
321
322 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
323
324 err = gk20a_busy(g);
325 if (err)
326 goto free_ref;
327
328 trace = g->ctxsw_trace;
329 if (!trace) {
330 err = -ENODEV;
331 goto idle;
332 }
333
334 /* Allow only one user for this device */
335 dev = &trace->devs[vmid];
336 nvgpu_mutex_acquire(&dev->write_lock);
337 if (dev->hdr) {
338 err = -EBUSY;
339 goto done;
340 }
341
342 /* By default, allocate ring buffer big enough to accommodate
343 * FECS records with default event filter */
344
345 /* enable all traces by default */
346 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
347
348 /* compute max number of entries generated with this filter */
349 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
350
351 size = sizeof(struct nvgpu_ctxsw_ring_header) +
352 n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
353 nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
354 size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry));
355
356 err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
357 if (!err) {
358 filp->private_data = dev;
359 nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
360 filp, dev, size);
361 }
362
363done:
364 nvgpu_mutex_release(&dev->write_lock);
365
366idle:
367 gk20a_idle(g);
368free_ref:
369 if (err)
370 gk20a_put(g);
371 return err;
372}
373
374int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
375{
376 struct gk20a_ctxsw_dev *dev = filp->private_data;
377 struct gk20a *g = dev->g;
378
379 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
380
381 g->ops.fecs_trace.disable(g);
382
383 nvgpu_mutex_acquire(&dev->write_lock);
384 dev->write_enabled = false;
385 nvgpu_mutex_release(&dev->write_lock);
386
387 if (dev->hdr) {
388 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
389 dev->hdr = NULL;
390 }
391 gk20a_put(g);
392 return 0;
393}
394
395long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
396 unsigned long arg)
397{
398 struct gk20a_ctxsw_dev *dev = filp->private_data;
399 struct gk20a *g = dev->g;
400 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
401 int err = 0;
402
403 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
404
405 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
406 (_IOC_NR(cmd) == 0) ||
407 (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
408 (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
409 return -EINVAL;
410
411 memset(buf, 0, sizeof(buf));
412 if (_IOC_DIR(cmd) & _IOC_WRITE) {
413 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
414 return -EFAULT;
415 }
416
417 switch (cmd) {
418 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
419 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
420 break;
421 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
422 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
423 break;
424 case NVGPU_CTXSW_IOCTL_RING_SETUP:
425 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
426 (struct nvgpu_ctxsw_ring_setup_args *) buf);
427 break;
428 case NVGPU_CTXSW_IOCTL_SET_FILTER:
429 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
430 (struct nvgpu_ctxsw_trace_filter_args *) buf);
431 break;
432 case NVGPU_CTXSW_IOCTL_GET_FILTER:
433 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
434 (struct nvgpu_ctxsw_trace_filter_args *) buf);
435 break;
436 case NVGPU_CTXSW_IOCTL_POLL:
437 err = gk20a_ctxsw_dev_ioctl_poll(dev);
438 break;
439 default:
440 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
441 cmd);
442 err = -ENOTTY;
443 }
444
445 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
446 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
447
448 return err;
449}
450
451unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
452{
453 struct gk20a_ctxsw_dev *dev = filp->private_data;
454 struct gk20a *g = dev->g;
455 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
456 unsigned int mask = 0;
457
458 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
459
460 nvgpu_mutex_acquire(&dev->write_lock);
461 poll_wait(filp, &dev->readout_wq.wq, wait);
462 if (!ring_is_empty(hdr))
463 mask |= POLLIN | POLLRDNORM;
464 nvgpu_mutex_release(&dev->write_lock);
465
466 return mask;
467}
468
469static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
470{
471 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
472 struct gk20a *g = dev->g;
473
474 nvgpu_atomic_inc(&dev->vma_ref);
475 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
476 nvgpu_atomic_read(&dev->vma_ref));
477}
478
479static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
480{
481 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
482 struct gk20a *g = dev->g;
483
484 nvgpu_atomic_dec(&dev->vma_ref);
485 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
486 nvgpu_atomic_read(&dev->vma_ref));
487}
488
489static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
490 .open = gk20a_ctxsw_dev_vma_open,
491 .close = gk20a_ctxsw_dev_vma_close,
492};
493
494int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
495 struct vm_area_struct *vma)
496{
497 return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
498}
499
500int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
501{
502 struct gk20a_ctxsw_dev *dev = filp->private_data;
503 struct gk20a *g = dev->g;
504 int ret;
505
506 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
507 vma->vm_start, vma->vm_end);
508
509 ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
510 if (likely(!ret)) {
511 vma->vm_private_data = dev;
512 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
513 vma->vm_ops->open(vma);
514 }
515
516 return ret;
517}
518
519#ifdef CONFIG_GK20A_CTXSW_TRACE
520static int gk20a_ctxsw_init_devs(struct gk20a *g)
521{
522 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
523 struct gk20a_ctxsw_dev *dev = trace->devs;
524 int err;
525 int i;
526
527 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
528 dev->g = g;
529 dev->hdr = NULL;
530 dev->write_enabled = false;
531 nvgpu_cond_init(&dev->readout_wq);
532 err = nvgpu_mutex_init(&dev->write_lock);
533 if (err)
534 return err;
535 nvgpu_atomic_set(&dev->vma_ref, 0);
536 dev++;
537 }
538 return 0;
539}
540#endif
541
542int gk20a_ctxsw_trace_init(struct gk20a *g)
543{
544#ifdef CONFIG_GK20A_CTXSW_TRACE
545 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
546 int err;
547
548 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
549
550 /* if tracing is not supported, skip this */
551 if (!g->ops.fecs_trace.init)
552 return 0;
553
554 if (likely(trace)) {
555 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
556 return 0;
557 }
558
559 trace = nvgpu_kzalloc(g, sizeof(*trace));
560 if (unlikely(!trace))
561 return -ENOMEM;
562 g->ctxsw_trace = trace;
563
564 err = gk20a_ctxsw_init_devs(g);
565 if (err)
566 goto fail;
567
568 err = g->ops.fecs_trace.init(g);
569 if (unlikely(err))
570 goto fail;
571
572 return 0;
573
574fail:
575 memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
576 nvgpu_kfree(g, trace);
577 g->ctxsw_trace = NULL;
578 return err;
579#else
580 return 0;
581#endif
582}
583
584void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
585{
586#ifdef CONFIG_GK20A_CTXSW_TRACE
587 struct gk20a_ctxsw_trace *trace;
588 struct gk20a_ctxsw_dev *dev;
589 int i;
590
591 if (!g->ctxsw_trace)
592 return;
593
594 trace = g->ctxsw_trace;
595 dev = trace->devs;
596
597 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
598 nvgpu_mutex_destroy(&dev->write_lock);
599 dev++;
600 }
601
602 nvgpu_kfree(g, g->ctxsw_trace);
603 g->ctxsw_trace = NULL;
604
605 g->ops.fecs_trace.deinit(g);
606#endif
607}
608
609int gk20a_ctxsw_trace_write(struct gk20a *g,
610 struct nvgpu_gpu_ctxsw_trace_entry *entry)
611{
612 struct nvgpu_ctxsw_ring_header *hdr;
613 struct gk20a_ctxsw_dev *dev;
614 int ret = 0;
615 const char *reason;
616 u32 write_idx;
617
618 if (!g->ctxsw_trace)
619 return 0;
620
621 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
622 return -ENODEV;
623
624 dev = &g->ctxsw_trace->devs[entry->vmid];
625 hdr = dev->hdr;
626
627 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
628 "dev=%p hdr=%p", dev, hdr);
629
630 nvgpu_mutex_acquire(&dev->write_lock);
631
632 if (unlikely(!hdr)) {
633 /* device has been released */
634 ret = -ENODEV;
635 goto done;
636 }
637
638 write_idx = hdr->write_idx;
639 if (write_idx >= dev->num_ents) {
640 nvgpu_err(dev->g,
641 "write_idx=%u out of range [0..%u]",
642 write_idx, dev->num_ents);
643 ret = -ENOSPC;
644 reason = "write_idx out of range";
645 goto disable;
646 }
647
648 entry->seqno = hdr->write_seqno++;
649
650 if (!dev->write_enabled) {
651 ret = -EBUSY;
652 reason = "write disabled";
653 goto drop;
654 }
655
656 if (unlikely(ring_is_full(hdr))) {
657 ret = -ENOSPC;
658 reason = "user fifo full";
659 goto drop;
660 }
661
662 if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
663 reason = "filtered out";
664 goto filter;
665 }
666
667 nvgpu_log(g, gpu_dbg_ctxsw,
668 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
669 entry->seqno, entry->context_id, entry->pid,
670 entry->tag, entry->timestamp);
671
672 dev->ents[write_idx] = *entry;
673
674 /* ensure record is written before updating write index */
675 nvgpu_smp_wmb();
676
677 write_idx++;
678 if (unlikely(write_idx >= hdr->num_ents))
679 write_idx = 0;
680 hdr->write_idx = write_idx;
681 nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
682 hdr->read_idx, hdr->write_idx, ring_len(hdr));
683
684 nvgpu_mutex_release(&dev->write_lock);
685 return ret;
686
687disable:
688 g->ops.fecs_trace.disable(g);
689
690drop:
691 hdr->drop_count++;
692
693filter:
694 nvgpu_log(g, gpu_dbg_ctxsw,
695 "dropping seqno=%d context_id=%08x pid=%lld "
696 "tag=%x time=%llx (%s)",
697 entry->seqno, entry->context_id, entry->pid,
698 entry->tag, entry->timestamp, reason);
699
700done:
701 nvgpu_mutex_release(&dev->write_lock);
702 return ret;
703}
704
705void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
706{
707 struct gk20a_ctxsw_dev *dev;
708
709 if (!g->ctxsw_trace)
710 return;
711
712 dev = &g->ctxsw_trace->devs[vmid];
713 nvgpu_cond_signal_interruptible(&dev->readout_wq);
714}
715
716void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
717{
718#ifdef CONFIG_GK20A_CTXSW_TRACE
719 struct nvgpu_gpu_ctxsw_trace_entry entry = {
720 .vmid = 0,
721 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
722 .context_id = 0,
723 .pid = ch->tgid,
724 };
725
726 if (!g->ctxsw_trace)
727 return;
728
729 g->ops.ptimer.read_ptimer(g, &entry.timestamp);
730 gk20a_ctxsw_trace_write(g, &entry);
731 gk20a_ctxsw_trace_wake_up(g, 0);
732#endif
733 trace_gk20a_channel_reset(ch->chid, ch->tsgid);
734}
735
736void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
737{
738#ifdef CONFIG_GK20A_CTXSW_TRACE
739 struct nvgpu_gpu_ctxsw_trace_entry entry = {
740 .vmid = 0,
741 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
742 .context_id = 0,
743 .pid = tsg->tgid,
744 };
745
746 if (!g->ctxsw_trace)
747 return;
748
749 g->ops.ptimer.read_ptimer(g, &entry.timestamp);
750 gk20a_ctxsw_trace_write(g, &entry);
751 gk20a_ctxsw_trace_wake_up(g, 0);
752#endif
753 trace_gk20a_channel_reset(~0, tsg->tsgid);
754}
755
756/*
757 * Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_*
758 * into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_*
759 */
760
761u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags)
762{
763 switch (tags){
764 case NVGPU_CTXSW_TAG_SOF:
765 return NVGPU_GPU_CTXSW_TAG_SOF;
766 case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
767 return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST;
768 case NVGPU_CTXSW_TAG_FE_ACK:
769 return NVGPU_GPU_CTXSW_TAG_FE_ACK;
770 case NVGPU_CTXSW_TAG_FE_ACK_WFI:
771 return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI;
772 case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
773 return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP;
774 case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
775 return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP;
776 case NVGPU_CTXSW_TAG_FE_ACK_CILP:
777 return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP;
778 case NVGPU_CTXSW_TAG_SAVE_END:
779 return NVGPU_GPU_CTXSW_TAG_SAVE_END;
780 case NVGPU_CTXSW_TAG_RESTORE_START:
781 return NVGPU_GPU_CTXSW_TAG_RESTORE_START;
782 case NVGPU_CTXSW_TAG_CONTEXT_START:
783 return NVGPU_GPU_CTXSW_TAG_CONTEXT_START;
784 case NVGPU_CTXSW_TAG_ENGINE_RESET:
785 return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET;
786 case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP:
787 return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP;
788 }
789
790 WARN_ON(1);
791 return tags;
792}
diff --git a/include/os/linux/ctxsw_trace.h b/include/os/linux/ctxsw_trace.h
deleted file mode 100644
index 88ca7f2..0000000
--- a/include/os/linux/ctxsw_trace.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __CTXSW_TRACE_H__
18#define __CTXSW_TRACE_H__
19
20#include <nvgpu/types.h>
21
22#define GK20A_CTXSW_TRACE_NUM_DEVS 1
23
24struct file;
25struct inode;
26struct poll_table_struct;
27
28struct gk20a;
29
30int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
31int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
32long gk20a_ctxsw_dev_ioctl(struct file *filp,
33 unsigned int cmd, unsigned long arg);
34ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
35 size_t size, loff_t *offs);
36unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
37 struct poll_table_struct *pts);
38
39#endif /* __CTXSW_TRACE_H__ */
diff --git a/include/os/linux/debug.c b/include/os/linux/debug.c
deleted file mode 100644
index b8c4596..0000000
--- a/include/os/linux/debug.c
+++ /dev/null
@@ -1,457 +0,0 @@
1/*
2 * Copyright (C) 2017-2021 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "debug_ce.h"
17#include "debug_fifo.h"
18#include "debug_gr.h"
19#include "debug_allocator.h"
20#include "debug_kmem.h"
21#include "debug_pmu.h"
22#include "debug_sched.h"
23#include "debug_hal.h"
24#include "debug_xve.h"
25#include "debug_ltc.h"
26#include "debug_bios.h"
27#include "os_linux.h"
28#include "platform_gk20a.h"
29
30#include <nvgpu/gk20a.h>
31
32#include <linux/debugfs.h>
33#include <linux/seq_file.h>
34#include <linux/uaccess.h>
35
36#include <nvgpu/debug.h>
37
38unsigned int gk20a_debug_trace_cmdbuf;
39
40static inline void gk20a_debug_write_printk(void *ctx, const char *str,
41 size_t len)
42{
43 pr_info("%s", str);
44}
45
46static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
47 size_t len)
48{
49 seq_write((struct seq_file *)ctx, str, len);
50}
51
52void gk20a_debug_output(struct gk20a_debug_output *o,
53 const char *fmt, ...)
54{
55 va_list args;
56 int len;
57
58 va_start(args, fmt);
59 len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
60 va_end(args);
61 o->fn(o->ctx, o->buf, len);
62}
63
64static int gk20a_gr_dump_regs(struct gk20a *g,
65 struct gk20a_debug_output *o)
66{
67 if (g->ops.gr.dump_gr_regs)
68 gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
69
70 return 0;
71}
72
73int gk20a_gr_debug_dump(struct gk20a *g)
74{
75 struct gk20a_debug_output o = {
76 .fn = gk20a_debug_write_printk
77 };
78
79 gk20a_gr_dump_regs(g, &o);
80
81 return 0;
82}
83
84static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
85{
86 struct device *dev = s->private;
87 struct gk20a *g = gk20a_get_platform(dev)->g;
88 struct gk20a_debug_output o = {
89 .fn = gk20a_debug_write_to_seqfile,
90 .ctx = s,
91 };
92 int err;
93
94 err = gk20a_busy(g);
95 if (err) {
96 nvgpu_err(g, "failed to power on gpu: %d", err);
97 return -EINVAL;
98 }
99
100 gk20a_gr_dump_regs(g, &o);
101
102 gk20a_idle(g);
103
104 return 0;
105}
106
107void gk20a_debug_dump(struct gk20a *g)
108{
109 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
110 struct gk20a_debug_output o = {
111 .fn = gk20a_debug_write_printk
112 };
113
114 if (platform->dump_platform_dependencies)
115 platform->dump_platform_dependencies(dev_from_gk20a(g));
116
117 /* HAL only initialized after 1st power-on */
118 if (g->ops.debug.show_dump)
119 g->ops.debug.show_dump(g, &o);
120}
121
122static int gk20a_debug_show(struct seq_file *s, void *unused)
123{
124 struct device *dev = s->private;
125 struct gk20a_debug_output o = {
126 .fn = gk20a_debug_write_to_seqfile,
127 .ctx = s,
128 };
129 struct gk20a *g;
130 int err;
131
132 g = gk20a_get_platform(dev)->g;
133
134 err = gk20a_busy(g);
135 if (err) {
136 nvgpu_err(g, "failed to power on gpu: %d", err);
137 return -EFAULT;
138 }
139
140 /* HAL only initialized after 1st power-on */
141 if (g->ops.debug.show_dump)
142 g->ops.debug.show_dump(g, &o);
143
144 gk20a_idle(g);
145 return 0;
146}
147
148static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
149{
150 return single_open(file, gk20a_gr_debug_show, inode->i_private);
151}
152
153static int gk20a_debug_open(struct inode *inode, struct file *file)
154{
155 return single_open(file, gk20a_debug_show, inode->i_private);
156}
157
158static const struct file_operations gk20a_gr_debug_fops = {
159 .open = gk20a_gr_debug_open,
160 .read = seq_read,
161 .llseek = seq_lseek,
162 .release = single_release,
163};
164
165static const struct file_operations gk20a_debug_fops = {
166 .open = gk20a_debug_open,
167 .read = seq_read,
168 .llseek = seq_lseek,
169 .release = single_release,
170};
171
172void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
173{
174 g->ops.fifo.dump_pbdma_status(g, o);
175 g->ops.fifo.dump_eng_status(g, o);
176
177 gk20a_debug_dump_all_channel_status_ramfc(g, o);
178}
179
180static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
181{
182 char buf[3];
183 struct gk20a *g = file->private_data;
184
185 if (g->mm.disable_bigpage)
186 buf[0] = 'Y';
187 else
188 buf[0] = 'N';
189 buf[1] = '\n';
190 buf[2] = 0x00;
191 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
192}
193
194static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
195{
196 char buf[32];
197 int buf_size;
198 bool bv;
199 struct gk20a *g = file->private_data;
200
201 buf_size = min(count, (sizeof(buf)-1));
202 if (copy_from_user(buf, user_buf, buf_size))
203 return -EFAULT;
204
205 if (strtobool(buf, &bv) == 0) {
206 g->mm.disable_bigpage = bv;
207 gk20a_init_gpu_characteristics(g);
208 }
209
210 return count;
211}
212
213static struct file_operations disable_bigpage_fops = {
214 .open = simple_open,
215 .read = disable_bigpage_read,
216 .write = disable_bigpage_write,
217};
218
219static int railgate_residency_show(struct seq_file *s, void *data)
220{
221 struct gk20a *g = s->private;
222 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
223 unsigned long time_since_last_state_transition_ms;
224 unsigned long total_rail_gate_time_ms;
225 unsigned long total_rail_ungate_time_ms;
226
227 if (platform && platform->is_railgated && platform->is_railgated(dev_from_gk20a(g))) {
228 time_since_last_state_transition_ms =
229 jiffies_to_msecs(jiffies -
230 g->pstats.last_rail_gate_complete);
231 total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
232 total_rail_gate_time_ms =
233 g->pstats.total_rail_gate_time_ms +
234 time_since_last_state_transition_ms;
235 } else {
236 time_since_last_state_transition_ms =
237 jiffies_to_msecs(jiffies -
238 g->pstats.last_rail_ungate_complete);
239 total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
240 total_rail_ungate_time_ms =
241 g->pstats.total_rail_ungate_time_ms +
242 time_since_last_state_transition_ms;
243 }
244
245 seq_printf(s, "Time with Rails Gated: %lu ms\n"
246 "Time with Rails UnGated: %lu ms\n"
247 "Total railgating cycles: %lu\n",
248 total_rail_gate_time_ms,
249 total_rail_ungate_time_ms,
250 g->pstats.railgating_cycle_count - 1);
251 return 0;
252
253}
254
255static int railgate_residency_open(struct inode *inode, struct file *file)
256{
257 return single_open(file, railgate_residency_show, inode->i_private);
258}
259
260static const struct file_operations railgate_residency_fops = {
261 .open = railgate_residency_open,
262 .read = seq_read,
263 .llseek = seq_lseek,
264 .release = single_release,
265};
266
267static int gk20a_railgating_debugfs_init(struct gk20a *g)
268{
269 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
270 struct dentry *d;
271
272 d = debugfs_create_file(
273 "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
274 &railgate_residency_fops);
275 if (!d)
276 return -ENOMEM;
277
278 return 0;
279}
280static ssize_t timeouts_enabled_read(struct file *file,
281 char __user *user_buf, size_t count, loff_t *ppos)
282{
283 char buf[3];
284 struct gk20a *g = file->private_data;
285
286 if (nvgpu_is_timeouts_enabled(g))
287 buf[0] = 'Y';
288 else
289 buf[0] = 'N';
290 buf[1] = '\n';
291 buf[2] = 0x00;
292 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
293}
294
295static ssize_t timeouts_enabled_write(struct file *file,
296 const char __user *user_buf, size_t count, loff_t *ppos)
297{
298 char buf[3];
299 int buf_size;
300 bool timeouts_enabled;
301 struct gk20a *g = file->private_data;
302
303 buf_size = min(count, (sizeof(buf)-1));
304 if (copy_from_user(buf, user_buf, buf_size))
305 return -EFAULT;
306
307 if (strtobool(buf, &timeouts_enabled) == 0) {
308 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
309 if (timeouts_enabled == false) {
310 /* requesting to disable timeouts */
311 if (g->timeouts_disabled_by_user == false) {
312 nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
313 g->timeouts_disabled_by_user = true;
314 }
315 } else {
316 /* requesting to enable timeouts */
317 if (g->timeouts_disabled_by_user == true) {
318 nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
319 g->timeouts_disabled_by_user = false;
320 }
321 }
322 nvgpu_mutex_release(&g->dbg_sessions_lock);
323 }
324
325 return count;
326}
327
328static const struct file_operations timeouts_enabled_fops = {
329 .open = simple_open,
330 .read = timeouts_enabled_read,
331 .write = timeouts_enabled_write,
332};
333
334void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
335{
336 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
337 struct device *dev = dev_from_gk20a(g);
338
339 l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
340 if (!l->debugfs)
341 return;
342
343 if (debugfs_symlink)
344 l->debugfs_alias =
345 debugfs_create_symlink(debugfs_symlink,
346 NULL, dev_name(dev));
347
348 debugfs_create_file("status", S_IRUGO, l->debugfs,
349 dev, &gk20a_debug_fops);
350 debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
351 dev, &gk20a_gr_debug_fops);
352 debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
353 l->debugfs, &gk20a_debug_trace_cmdbuf);
354
355 debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
356 l->debugfs, &g->ch_wdt_timeout_ms);
357
358 debugfs_create_u32("disable_syncpoints", S_IRUGO,
359 l->debugfs, &g->disable_syncpoints);
360
361 /* New debug logging API. */
362 debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
363 l->debugfs, &g->log_mask);
364 debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
365 l->debugfs, &g->log_trace);
366
367 l->debugfs_ltc_enabled =
368 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
369 l->debugfs,
370 &g->mm.ltc_enabled_target);
371
372 l->debugfs_gr_idle_timeout_default =
373 debugfs_create_u32("gr_idle_timeout_default_us",
374 S_IRUGO|S_IWUSR, l->debugfs,
375 &g->gr_idle_timeout_default);
376 l->debugfs_timeouts_enabled =
377 debugfs_create_file("timeouts_enabled",
378 S_IRUGO|S_IWUSR,
379 l->debugfs,
380 g,
381 &timeouts_enabled_fops);
382
383 l->debugfs_disable_bigpage =
384 debugfs_create_file("disable_bigpage",
385 S_IRUGO|S_IWUSR,
386 l->debugfs,
387 g,
388 &disable_bigpage_fops);
389
390 l->debugfs_timeslice_low_priority_us =
391 debugfs_create_u32("timeslice_low_priority_us",
392 S_IRUGO|S_IWUSR,
393 l->debugfs,
394 &g->timeslice_low_priority_us);
395 l->debugfs_timeslice_medium_priority_us =
396 debugfs_create_u32("timeslice_medium_priority_us",
397 S_IRUGO|S_IWUSR,
398 l->debugfs,
399 &g->timeslice_medium_priority_us);
400 l->debugfs_timeslice_high_priority_us =
401 debugfs_create_u32("timeslice_high_priority_us",
402 S_IRUGO|S_IWUSR,
403 l->debugfs,
404 &g->timeslice_high_priority_us);
405 l->debugfs_runlist_interleave =
406 debugfs_create_bool("runlist_interleave",
407 S_IRUGO|S_IWUSR,
408 l->debugfs,
409 &g->runlist_interleave);
410 l->debugfs_force_preemption_gfxp =
411 debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
412 l->debugfs,
413 &g->gr.ctx_vars.force_preemption_gfxp);
414
415 l->debugfs_force_preemption_cilp =
416 debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
417 l->debugfs,
418 &g->gr.ctx_vars.force_preemption_cilp);
419
420 l->debugfs_dump_ctxsw_stats =
421 debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
422 S_IRUGO|S_IWUSR, l->debugfs,
423 &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close);
424
425 gr_gk20a_debugfs_init(g);
426 gk20a_pmu_debugfs_init(g);
427 gk20a_railgating_debugfs_init(g);
428#ifdef CONFIG_NVGPU_SUPPORT_CDE
429 gk20a_cde_debugfs_init(g);
430#endif
431 gk20a_ce_debugfs_init(g);
432 nvgpu_alloc_debugfs_init(g);
433 nvgpu_hal_debugfs_init(g);
434 gk20a_fifo_debugfs_init(g);
435 gk20a_sched_debugfs_init(g);
436#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
437 nvgpu_kmem_debugfs_init(g);
438#endif
439 nvgpu_ltc_debugfs_init(g);
440 if (g->pci_vendor_id) {
441 nvgpu_xve_debugfs_init(g);
442 nvgpu_bios_debugfs_init(g);
443 }
444}
445
446void gk20a_debug_deinit(struct gk20a *g)
447{
448 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
449
450 if (!l->debugfs)
451 return;
452
453 gk20a_fifo_debugfs_deinit(g);
454
455 debugfs_remove_recursive(l->debugfs);
456 debugfs_remove(l->debugfs_alias);
457}
diff --git a/include/os/linux/debug_allocator.c b/include/os/linux/debug_allocator.c
deleted file mode 100644
index d63a903..0000000
--- a/include/os/linux/debug_allocator.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_allocator.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/allocator.h>
22
23static int __alloc_show(struct seq_file *s, void *unused)
24{
25 struct nvgpu_allocator *a = s->private;
26
27 nvgpu_alloc_print_stats(a, s, 1);
28
29 return 0;
30}
31
32static int __alloc_open(struct inode *inode, struct file *file)
33{
34 return single_open(file, __alloc_show, inode->i_private);
35}
36
37static const struct file_operations __alloc_fops = {
38 .open = __alloc_open,
39 .read = seq_read,
40 .llseek = seq_lseek,
41 .release = single_release,
42};
43
44void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
45{
46 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
47
48 if (!l->debugfs_allocators)
49 return;
50
51 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
52 l->debugfs_allocators,
53 a, &__alloc_fops);
54}
55
56void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
57{
58}
59
60void nvgpu_alloc_debugfs_init(struct gk20a *g)
61{
62 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
63
64 l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
65 if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
66 l->debugfs_allocators = NULL;
67 return;
68 }
69}
diff --git a/include/os/linux/debug_allocator.h b/include/os/linux/debug_allocator.h
deleted file mode 100644
index 1b21cfc..0000000
--- a/include/os/linux/debug_allocator.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
16#define __NVGPU_DEBUG_ALLOCATOR_H__
17
18struct gk20a;
19void nvgpu_alloc_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/include/os/linux/debug_bios.c b/include/os/linux/debug_bios.c
deleted file mode 100644
index f69ccf3..0000000
--- a/include/os/linux/debug_bios.c
+++ /dev/null
@@ -1,60 +0,0 @@
1/*
2 * Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <nvgpu/types.h>
16
17#include "debug_bios.h"
18#include "os_linux.h"
19
20#include <linux/debugfs.h>
21#include <linux/uaccess.h>
22
23static int bios_version_show(struct seq_file *s, void *unused)
24{
25 struct gk20a *g = s->private;
26
27 seq_printf(s, "Version %02x.%02x.%02x.%02x.%02x\n",
28 (g->bios.vbios_version >> 24) & 0xFF,
29 (g->bios.vbios_version >> 16) & 0xFF,
30 (g->bios.vbios_version >> 8) & 0xFF,
31 (g->bios.vbios_version >> 0) & 0xFF,
32 (g->bios.vbios_oem_version) & 0xFF);
33
34 return 0;
35}
36
37static int bios_version_open(struct inode *inode, struct file *file)
38{
39 return single_open(file, bios_version_show, inode->i_private);
40}
41
42static const struct file_operations bios_version_fops = {
43 .open = bios_version_open,
44 .read = seq_read,
45 .llseek = seq_lseek,
46 .release = single_release,
47};
48
49
50int nvgpu_bios_debugfs_init(struct gk20a *g)
51{
52 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
53 struct dentry *gpu_root = l->debugfs;
54
55 debugfs_create_file("bios", S_IRUGO,
56 gpu_root, g,
57 &bios_version_fops);
58
59 return 0;
60}
diff --git a/include/os/linux/debug_bios.h b/include/os/linux/debug_bios.h
deleted file mode 100644
index f8e7783..0000000
--- a/include/os/linux/debug_bios.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_BIOS_H__
16#define __NVGPU_DEBUG_BIOS_H__
17
18struct gk20a;
19int nvgpu_bios_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_BIOS_H__ */
diff --git a/include/os/linux/debug_cde.c b/include/os/linux/debug_cde.c
deleted file mode 100644
index f0afa6e..0000000
--- a/include/os/linux/debug_cde.c
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "platform_gk20a.h"
17#include "os_linux.h"
18
19#include <linux/debugfs.h>
20
21
22static ssize_t gk20a_cde_reload_write(struct file *file,
23 const char __user *userbuf, size_t count, loff_t *ppos)
24{
25 struct nvgpu_os_linux *l = file->private_data;
26 gk20a_cde_reload(l);
27 return count;
28}
29
30static const struct file_operations gk20a_cde_reload_fops = {
31 .open = simple_open,
32 .write = gk20a_cde_reload_write,
33};
34
35void gk20a_cde_debugfs_init(struct gk20a *g)
36{
37 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
38 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
39
40 if (!platform->has_cde)
41 return;
42
43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
44 l->debugfs, &l->cde_app.shader_parameter);
45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
46 l->debugfs, &l->cde_app.ctx_count);
47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
48 l->debugfs, &l->cde_app.ctx_usecount);
49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
50 l->debugfs, &l->cde_app.ctx_count_top);
51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
52 l, &gk20a_cde_reload_fops);
53}
diff --git a/include/os/linux/debug_cde.h b/include/os/linux/debug_cde.h
deleted file mode 100644
index 4895edd..0000000
--- a/include/os/linux/debug_cde.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CDE_H__
16#define __NVGPU_DEBUG_CDE_H__
17
18struct gk20a;
19void gk20a_cde_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/include/os/linux/debug_ce.c b/include/os/linux/debug_ce.c
deleted file mode 100644
index cea0bb4..0000000
--- a/include/os/linux/debug_ce.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_ce.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19
20void gk20a_ce_debugfs_init(struct gk20a *g)
21{
22 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
23
24 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
25 l->debugfs, &g->ce_app.ctx_count);
26 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
27 l->debugfs, &g->ce_app.app_state);
28 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
29 l->debugfs, &g->ce_app.next_ctx_id);
30}
diff --git a/include/os/linux/debug_ce.h b/include/os/linux/debug_ce.h
deleted file mode 100644
index 2a8750c..0000000
--- a/include/os/linux/debug_ce.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CE_H__
16#define __NVGPU_DEBUG_CE_H__
17
18struct gk20a;
19void gk20a_ce_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/include/os/linux/debug_clk_gm20b.c b/include/os/linux/debug_clk_gm20b.c
deleted file mode 100644
index b8b95fd..0000000
--- a/include/os/linux/debug_clk_gm20b.c
+++ /dev/null
@@ -1,280 +0,0 @@
1/*
2 * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <linux/uaccess.h>
16#include <linux/debugfs.h>
17#include <linux/seq_file.h>
18
19#include <nvgpu/io.h>
20#include <nvgpu/clk_arb.h>
21
22#include "gm20b/clk_gm20b.h"
23#include "os_linux.h"
24#include "platform_gk20a.h"
25
26static int rate_get(void *data, u64 *val)
27{
28 struct gk20a *g = (struct gk20a *)data;
29 struct clk_gk20a *clk = &g->clk;
30
31 *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
32 return 0;
33}
34static int rate_set(void *data, u64 val)
35{
36 struct gk20a *g = (struct gk20a *)data;
37 if (nvgpu_clk_arb_has_active_req(g))
38 return 0;
39 return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
40}
41DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
42
43static int pll_reg_show(struct seq_file *s, void *data)
44{
45 struct gk20a *g = s->private;
46 struct nvgpu_clk_pll_debug_data d;
47 u32 reg, m, n, pl, f;
48 int err = 0;
49
50 if (g->ops.clk.get_pll_debug_data) {
51 err = g->ops.clk.get_pll_debug_data(g, &d);
52 if (err)
53 return err;
54 } else {
55 return -EINVAL;
56 }
57
58 seq_printf(s, "bypassctrl = %s, ",
59 d.trim_sys_bypassctrl_val ? "bypass" : "vco");
60 seq_printf(s, "sel_vco = %s, ",
61 d.trim_sys_sel_vco_val ? "vco" : "bypass");
62
63 seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
64 d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
65 d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
66 d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
67
68 reg = d.trim_sys_gpcpll_coeff_val;
69 m = d.trim_sys_gpcpll_coeff_mdiv;
70 n = d.trim_sys_gpcpll_coeff_ndiv;
71 pl = d.trim_sys_gpcpll_coeff_pldiv;
72 f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
73 seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
74 seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
75
76 seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
77 d.trim_sys_gpcpll_dvfs0_val,
78 d.trim_sys_gpcpll_dvfs0_dfs_coeff,
79 d.trim_sys_gpcpll_dvfs0_dfs_det_max,
80 d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
81
82 return 0;
83}
84
85static int pll_reg_open(struct inode *inode, struct file *file)
86{
87 return single_open(file, pll_reg_show, inode->i_private);
88}
89
90static const struct file_operations pll_reg_fops = {
91 .open = pll_reg_open,
92 .read = seq_read,
93 .llseek = seq_lseek,
94 .release = single_release,
95};
96
97static int pll_reg_raw_show(struct seq_file *s, void *data)
98{
99 struct gk20a *g = s->private;
100 struct nvgpu_clk_pll_debug_data d;
101 u32 reg;
102 int err = 0;
103
104 if (g->ops.clk.get_pll_debug_data) {
105 err = g->ops.clk.get_pll_debug_data(g, &d);
106 if (err)
107 return err;
108 } else {
109 return -EINVAL;
110 }
111
112 seq_puts(s, "GPCPLL REGISTERS:\n");
113 for (reg = d.trim_sys_gpcpll_cfg_reg;
114 reg < d.trim_sys_gpcpll_dvfs2_reg;
115 reg += sizeof(u32))
116 seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
117
118 reg = d.trim_bcast_gpcpll_dvfs2_reg;
119 if (reg)
120 seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
121
122 seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
123
124 seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
125 d.trim_sys_sel_vco_val);
126 seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
127 d.trim_sys_gpc2clk_out_val);
128 seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
129 d.trim_sys_bypassctrl_val);
130
131 return 0;
132}
133
134static int pll_reg_raw_open(struct inode *inode, struct file *file)
135{
136 return single_open(file, pll_reg_raw_show, inode->i_private);
137}
138
139static ssize_t pll_reg_raw_write(struct file *file,
140 const char __user *userbuf, size_t count, loff_t *ppos)
141{
142 struct gk20a *g = file->f_path.dentry->d_inode->i_private;
143 char buf[80];
144 u32 reg, val;
145 int err = 0;
146
147 if (sizeof(buf) <= count)
148 return -EINVAL;
149
150 if (copy_from_user(buf, userbuf, count))
151 return -EFAULT;
152
153 /* terminate buffer and trim - white spaces may be appended
154 * at the end when invoked from shell command line */
155 buf[count] = '\0';
156 strim(buf);
157
158 if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
159 return -EINVAL;
160
161 if (g->ops.clk.pll_reg_write(g, reg, val))
162 err = g->ops.clk.pll_reg_write(g, reg, val);
163 else
164 err = -EINVAL;
165
166 return err;
167}
168
169static const struct file_operations pll_reg_raw_fops = {
170 .open = pll_reg_raw_open,
171 .read = seq_read,
172 .write = pll_reg_raw_write,
173 .llseek = seq_lseek,
174 .release = single_release,
175};
176
177static int monitor_get(void *data, u64 *val)
178{
179 struct gk20a *g = (struct gk20a *)data;
180 int err = 0;
181
182 if (g->ops.clk.get_gpcclk_clock_counter)
183 err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
184 else
185 err = -EINVAL;
186
187 return err;
188}
189DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
190
191static int voltage_get(void *data, u64 *val)
192{
193 struct gk20a *g = (struct gk20a *)data;
194 int err = 0;
195
196 if (g->ops.clk.get_voltage)
197 err = g->ops.clk.get_voltage(&g->clk, val);
198 else
199 err = -EINVAL;
200
201 return err;
202}
203DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
204
205static int pll_param_show(struct seq_file *s, void *data)
206{
207 struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
208
209 seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
210 gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
211 gpc_pll_params->vco_ctrl);
212 return 0;
213}
214
215static int pll_param_open(struct inode *inode, struct file *file)
216{
217 return single_open(file, pll_param_show, inode->i_private);
218}
219
220static const struct file_operations pll_param_fops = {
221 .open = pll_param_open,
222 .read = seq_read,
223 .llseek = seq_lseek,
224 .release = single_release,
225};
226
227int gm20b_clk_init_debugfs(struct gk20a *g)
228{
229 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
230 struct dentry *d;
231
232 if (!l->debugfs)
233 return -EINVAL;
234
235 d = debugfs_create_file(
236 "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
237 if (!d)
238 goto err_out;
239
240 d = debugfs_create_file(
241 "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
242 if (!d)
243 goto err_out;
244
245 d = debugfs_create_file("pll_reg_raw",
246 S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
247 if (!d)
248 goto err_out;
249
250 d = debugfs_create_file(
251 "monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
252 if (!d)
253 goto err_out;
254
255 d = debugfs_create_file(
256 "voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
257 if (!d)
258 goto err_out;
259
260 d = debugfs_create_file(
261 "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
262 if (!d)
263 goto err_out;
264
265 d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
266 (u32 *)&g->clk.gpc_pll.mode);
267 if (!d)
268 goto err_out;
269
270 d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
271 l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
272 if (!d)
273 goto err_out;
274
275 return 0;
276
277err_out:
278 pr_err("%s: Failed to make debugfs node\n", __func__);
279 return -ENOMEM;
280}
diff --git a/include/os/linux/debug_clk_gm20b.h b/include/os/linux/debug_clk_gm20b.h
deleted file mode 100644
index 850ad89..0000000
--- a/include/os/linux/debug_clk_gm20b.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __DEBUG_CLK_GM20B_H
18#define __DEBUG_CLK_GM20B_H
19
20#ifdef CONFIG_DEBUG_FS
21int gm20b_clk_init_debugfs(struct gk20a *g);
22#else
23inline int gm20b_clk_init_debugfs(struct gk20a *g)
24{
25 return 0;
26}
27#endif
28
29#endif
diff --git a/include/os/linux/debug_clk_gp106.c b/include/os/linux/debug_clk_gp106.c
deleted file mode 100644
index 4900c00..0000000
--- a/include/os/linux/debug_clk_gp106.c
+++ /dev/null
@@ -1,193 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/debugfs.h>
18
19#include <nvgpu/clk.h>
20
21#include "os_linux.h"
22
23void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
24
25static int gp106_get_rate_show(void *data , u64 *val)
26{
27 struct namemap_cfg *c = (struct namemap_cfg *)data;
28 struct gk20a *g = c->g;
29
30 if (!g->ops.clk.get_rate_cntr)
31 return -EINVAL;
32
33 *val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) :
34 0 /* TODO PLL read */;
35
36 return 0;
37}
38DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gp106_get_rate_show, NULL, "%llu\n");
39
40static int sys_cfc_read(void *data , u64 *val)
41{
42 struct gk20a *g = (struct gk20a *)data;
43 bool bload = boardobjgrpmask_bitget(
44 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
45 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS);
46
47 /* val = 1 implies CLFC is loaded or enabled */
48 *val = bload ? 1 : 0;
49 return 0;
50}
51static int sys_cfc_write(void *data , u64 val)
52{
53 struct gk20a *g = (struct gk20a *)data;
54 int status;
55 /* val = 1 implies load or enable the CLFC */
56 bool bload = val ? true : false;
57
58 nvgpu_clk_arb_pstate_change_lock(g, true);
59 status = clk_pmu_freq_controller_load(g, bload,
60 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS);
61 nvgpu_clk_arb_pstate_change_lock(g, false);
62
63 return status;
64}
65DEFINE_SIMPLE_ATTRIBUTE(sys_cfc_fops, sys_cfc_read, sys_cfc_write, "%llu\n");
66
67static int ltc_cfc_read(void *data , u64 *val)
68{
69 struct gk20a *g = (struct gk20a *)data;
70 bool bload = boardobjgrpmask_bitget(
71 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
72 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC);
73
74 /* val = 1 implies CLFC is loaded or enabled */
75 *val = bload ? 1 : 0;
76 return 0;
77}
78static int ltc_cfc_write(void *data , u64 val)
79{
80 struct gk20a *g = (struct gk20a *)data;
81 int status;
82 /* val = 1 implies load or enable the CLFC */
83 bool bload = val ? true : false;
84
85 nvgpu_clk_arb_pstate_change_lock(g, true);
86 status = clk_pmu_freq_controller_load(g, bload,
87 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC);
88 nvgpu_clk_arb_pstate_change_lock(g, false);
89
90 return status;
91}
92DEFINE_SIMPLE_ATTRIBUTE(ltc_cfc_fops, ltc_cfc_read, ltc_cfc_write, "%llu\n");
93
94static int xbar_cfc_read(void *data , u64 *val)
95{
96 struct gk20a *g = (struct gk20a *)data;
97 bool bload = boardobjgrpmask_bitget(
98 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
99 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR);
100
101 /* val = 1 implies CLFC is loaded or enabled */
102 *val = bload ? 1 : 0;
103 return 0;
104}
105static int xbar_cfc_write(void *data , u64 val)
106{
107 struct gk20a *g = (struct gk20a *)data;
108 int status;
109 /* val = 1 implies load or enable the CLFC */
110 bool bload = val ? true : false;
111
112 nvgpu_clk_arb_pstate_change_lock(g, true);
113 status = clk_pmu_freq_controller_load(g, bload,
114 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR);
115 nvgpu_clk_arb_pstate_change_lock(g, false);
116
117 return status;
118}
119DEFINE_SIMPLE_ATTRIBUTE(xbar_cfc_fops, xbar_cfc_read,
120 xbar_cfc_write, "%llu\n");
121
122static int gpc_cfc_read(void *data , u64 *val)
123{
124 struct gk20a *g = (struct gk20a *)data;
125 bool bload = boardobjgrpmask_bitget(
126 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
127 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0);
128
129 /* val = 1 implies CLFC is loaded or enabled */
130 *val = bload ? 1 : 0;
131 return 0;
132}
133static int gpc_cfc_write(void *data , u64 val)
134{
135 struct gk20a *g = (struct gk20a *)data;
136 int status;
137 /* val = 1 implies load or enable the CLFC */
138 bool bload = val ? true : false;
139
140 nvgpu_clk_arb_pstate_change_lock(g, true);
141 status = clk_pmu_freq_controller_load(g, bload,
142 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0);
143 nvgpu_clk_arb_pstate_change_lock(g, false);
144
145 return status;
146}
147DEFINE_SIMPLE_ATTRIBUTE(gpc_cfc_fops, gpc_cfc_read, gpc_cfc_write, "%llu\n");
148
149int gp106_clk_init_debugfs(struct gk20a *g)
150{
151 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
152 struct dentry *gpu_root = l->debugfs;
153 struct dentry *clocks_root, *clk_freq_ctlr_root;
154 struct dentry *d;
155 unsigned int i;
156
157 if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root)))
158 return -ENOMEM;
159
160 clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root);
161 if (clk_freq_ctlr_root == NULL)
162 return -ENOMEM;
163
164 d = debugfs_create_file("sys", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
165 g, &sys_cfc_fops);
166 d = debugfs_create_file("ltc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
167 g, &ltc_cfc_fops);
168 d = debugfs_create_file("xbar", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
169 g, &xbar_cfc_fops);
170 d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
171 g, &gpc_cfc_fops);
172
173 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
174
175 for (i = 0; i < g->clk.namemap_num; i++) {
176 if (g->clk.clk_namemap[i].is_enable) {
177 d = debugfs_create_file(
178 g->clk.clk_namemap[i].name,
179 S_IRUGO,
180 clocks_root,
181 &g->clk.clk_namemap[i],
182 &get_rate_fops);
183 if (!d)
184 goto err_out;
185 }
186 }
187 return 0;
188
189err_out:
190 pr_err("%s: Failed to make debugfs node\n", __func__);
191 debugfs_remove_recursive(clocks_root);
192 return -ENOMEM;
193}
diff --git a/include/os/linux/debug_clk_gp106.h b/include/os/linux/debug_clk_gp106.h
deleted file mode 100644
index b1d031d..0000000
--- a/include/os/linux/debug_clk_gp106.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __DEBUG_CLK_GP106_H
18#define __DEBUG_CLK_GP106_H
19
20#ifdef CONFIG_DEBUG_FS
21int gp106_clk_init_debugfs(struct gk20a *g);
22#else
23inline int gp106_clk_init_debugfs(struct gk20a *g)
24{
25 return 0;
26}
27#endif
28
29#endif
diff --git a/include/os/linux/debug_clk_gv100.c b/include/os/linux/debug_clk_gv100.c
deleted file mode 100644
index 623f2b6..0000000
--- a/include/os/linux/debug_clk_gv100.c
+++ /dev/null
@@ -1,193 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/debugfs.h>
18
19#include "gv100/clk_gv100.h"
20
21#include "os_linux.h"
22
23void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
24
25static int gv100_get_rate_show(void *data , u64 *val)
26{
27 struct namemap_cfg *c = (struct namemap_cfg *)data;
28 struct gk20a *g = c->g;
29
30 if (!g->ops.clk.get_rate_cntr)
31 return -EINVAL;
32
33 *val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) :
34 0 /* TODO PLL read */;
35
36 return 0;
37}
38DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gv100_get_rate_show, NULL, "%llu\n");
39
40static int sys_cfc_read(void *data , u64 *val)
41{
42 struct gk20a *g = (struct gk20a *)data;
43 bool bload = boardobjgrpmask_bitget(
44 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
45 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS);
46
47 /* val = 1 implies CLFC is loaded or enabled */
48 *val = bload ? 1 : 0;
49 return 0;
50}
51static int sys_cfc_write(void *data , u64 val)
52{
53 struct gk20a *g = (struct gk20a *)data;
54 int status;
55 /* val = 1 implies load or enable the CLFC */
56 bool bload = val ? true : false;
57
58 nvgpu_clk_arb_pstate_change_lock(g, true);
59 status = clk_pmu_freq_controller_load(g, bload,
60 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS);
61 nvgpu_clk_arb_pstate_change_lock(g, false);
62
63 return status;
64}
65DEFINE_SIMPLE_ATTRIBUTE(sys_cfc_fops, sys_cfc_read, sys_cfc_write, "%llu\n");
66
67static int ltc_cfc_read(void *data , u64 *val)
68{
69 struct gk20a *g = (struct gk20a *)data;
70 bool bload = boardobjgrpmask_bitget(
71 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
72 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC);
73
74 /* val = 1 implies CLFC is loaded or enabled */
75 *val = bload ? 1 : 0;
76 return 0;
77}
78static int ltc_cfc_write(void *data , u64 val)
79{
80 struct gk20a *g = (struct gk20a *)data;
81 int status;
82 /* val = 1 implies load or enable the CLFC */
83 bool bload = val ? true : false;
84
85 nvgpu_clk_arb_pstate_change_lock(g, true);
86 status = clk_pmu_freq_controller_load(g, bload,
87 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC);
88 nvgpu_clk_arb_pstate_change_lock(g, false);
89
90 return status;
91}
92DEFINE_SIMPLE_ATTRIBUTE(ltc_cfc_fops, ltc_cfc_read, ltc_cfc_write, "%llu\n");
93
94static int xbar_cfc_read(void *data , u64 *val)
95{
96 struct gk20a *g = (struct gk20a *)data;
97 bool bload = boardobjgrpmask_bitget(
98 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
99 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR);
100
101 /* val = 1 implies CLFC is loaded or enabled */
102 *val = bload ? 1 : 0;
103 return 0;
104}
105static int xbar_cfc_write(void *data , u64 val)
106{
107 struct gk20a *g = (struct gk20a *)data;
108 int status;
109 /* val = 1 implies load or enable the CLFC */
110 bool bload = val ? true : false;
111
112 nvgpu_clk_arb_pstate_change_lock(g, true);
113 status = clk_pmu_freq_controller_load(g, bload,
114 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR);
115 nvgpu_clk_arb_pstate_change_lock(g, false);
116
117 return status;
118}
119DEFINE_SIMPLE_ATTRIBUTE(xbar_cfc_fops, xbar_cfc_read,
120 xbar_cfc_write, "%llu\n");
121
122static int gpc_cfc_read(void *data , u64 *val)
123{
124 struct gk20a *g = (struct gk20a *)data;
125 bool bload = boardobjgrpmask_bitget(
126 &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super,
127 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0);
128
129 /* val = 1 implies CLFC is loaded or enabled */
130 *val = bload ? 1 : 0;
131 return 0;
132}
133static int gpc_cfc_write(void *data , u64 val)
134{
135 struct gk20a *g = (struct gk20a *)data;
136 int status;
137 /* val = 1 implies load or enable the CLFC */
138 bool bload = val ? true : false;
139
140 nvgpu_clk_arb_pstate_change_lock(g, true);
141 status = clk_pmu_freq_controller_load(g, bload,
142 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0);
143 nvgpu_clk_arb_pstate_change_lock(g, false);
144
145 return status;
146}
147DEFINE_SIMPLE_ATTRIBUTE(gpc_cfc_fops, gpc_cfc_read, gpc_cfc_write, "%llu\n");
148
149int gv100_clk_init_debugfs(struct gk20a *g)
150{
151 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
152 struct dentry *gpu_root = l->debugfs;
153 struct dentry *clocks_root, *clk_freq_ctlr_root;
154 struct dentry *d;
155 unsigned int i;
156
157 if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root)))
158 return -ENOMEM;
159
160 clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root);
161 if (clk_freq_ctlr_root == NULL)
162 return -ENOMEM;
163
164 d = debugfs_create_file("sys", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
165 g, &sys_cfc_fops);
166 d = debugfs_create_file("ltc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
167 g, &ltc_cfc_fops);
168 d = debugfs_create_file("xbar", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
169 g, &xbar_cfc_fops);
170 d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
171 g, &gpc_cfc_fops);
172
173 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
174
175 for (i = 0; i < g->clk.namemap_num; i++) {
176 if (g->clk.clk_namemap[i].is_enable) {
177 d = debugfs_create_file(
178 g->clk.clk_namemap[i].name,
179 S_IRUGO,
180 clocks_root,
181 &g->clk.clk_namemap[i],
182 &get_rate_fops);
183 if (!d)
184 goto err_out;
185 }
186 }
187 return 0;
188
189err_out:
190 pr_err("%s: Failed to make debugfs node\n", __func__);
191 debugfs_remove_recursive(clocks_root);
192 return -ENOMEM;
193}
diff --git a/include/os/linux/debug_clk_gv100.h b/include/os/linux/debug_clk_gv100.h
deleted file mode 100644
index 419b4ab..0000000
--- a/include/os/linux/debug_clk_gv100.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __DEBUG_CLK_GV100_H
18#define __DEBUG_CLK_GV100_H
19
20#ifdef CONFIG_DEBUG_FS
21int gv100_clk_init_debugfs(struct gk20a *g);
22#else
23static inline int gv100_clk_init_debugfs(struct gk20a *g)
24{
25 return 0;
26}
27#endif
28
29#endif
diff --git a/include/os/linux/debug_fecs_trace.c b/include/os/linux/debug_fecs_trace.c
deleted file mode 100644
index 7786053..0000000
--- a/include/os/linux/debug_fecs_trace.c
+++ /dev/null
@@ -1,151 +0,0 @@
1/*
2 * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/debugfs.h>
18
19#include <nvgpu/fecs_trace.h>
20
21#include "os_linux.h"
22
23/*
24 * The sequence iterator functions. We simply use the count of the
25 * next line as our internal position.
26 */
27static void *gk20a_fecs_trace_debugfs_ring_seq_start(
28 struct seq_file *s, loff_t *pos)
29{
30 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
31 return NULL;
32
33 return pos;
34}
35
36static void *gk20a_fecs_trace_debugfs_ring_seq_next(
37 struct seq_file *s, void *v, loff_t *pos)
38{
39 ++(*pos);
40 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
41 return NULL;
42 return pos;
43}
44
45static void gk20a_fecs_trace_debugfs_ring_seq_stop(
46 struct seq_file *s, void *v)
47{
48}
49
50static int gk20a_fecs_trace_debugfs_ring_seq_show(
51 struct seq_file *s, void *v)
52{
53 loff_t *pos = (loff_t *) v;
54 struct gk20a *g = *(struct gk20a **)s->private;
55 struct gk20a_fecs_trace_record *r =
56 gk20a_fecs_trace_get_record(g, *pos);
57 int i;
58 const u32 invalid_tag = gk20a_fecs_trace_record_ts_tag_invalid_ts_v();
59 u32 tag;
60 u64 timestamp;
61
62 seq_printf(s, "record #%lld (%p)\n", *pos, r);
63 seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
64 seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
65 if (gk20a_fecs_trace_is_valid_record(r)) {
66 seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
67 seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
68 seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
69 seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
70 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
71 tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
72 if (tag == invalid_tag)
73 continue;
74 timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
75 timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
76 seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
77 }
78 }
79 return 0;
80}
81
82/*
83 * Tie them all together into a set of seq_operations.
84 */
85static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = {
86 .start = gk20a_fecs_trace_debugfs_ring_seq_start,
87 .next = gk20a_fecs_trace_debugfs_ring_seq_next,
88 .stop = gk20a_fecs_trace_debugfs_ring_seq_stop,
89 .show = gk20a_fecs_trace_debugfs_ring_seq_show
90};
91
92/*
93 * Time to set up the file operations for our /proc file. In this case,
94 * all we need is an open function which sets up the sequence ops.
95 */
96
97static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode,
98 struct file *file)
99{
100 struct gk20a **p;
101
102 p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops,
103 sizeof(struct gk20a *));
104 if (!p)
105 return -ENOMEM;
106
107 *p = (struct gk20a *)inode->i_private;
108 return 0;
109};
110
111/*
112 * The file operations structure contains our open function along with
113 * set of the canned seq_ ops.
114 */
115static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = {
116 .owner = THIS_MODULE,
117 .open = gk20a_ctxsw_debugfs_ring_open,
118 .read = seq_read,
119 .llseek = seq_lseek,
120 .release = seq_release_private
121};
122
123static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val)
124{
125 *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg);
126 return 0;
127}
128DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops,
129 gk20a_fecs_trace_debugfs_read, NULL, "%llu\n");
130
131static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val)
132{
133 *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg);
134 return 0;
135}
136DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops,
137 gk20a_fecs_trace_debugfs_write, NULL, "%llu\n");
138
139int nvgpu_fecs_trace_init_debugfs(struct gk20a *g)
140{
141 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
142
143 debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g,
144 &gk20a_fecs_trace_debugfs_read_fops);
145 debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g,
146 &gk20a_fecs_trace_debugfs_write_fops);
147 debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g,
148 &gk20a_fecs_trace_debugfs_ring_fops);
149
150 return 0;
151}
diff --git a/include/os/linux/debug_fecs_trace.h b/include/os/linux/debug_fecs_trace.h
deleted file mode 100644
index 54ebaaf..0000000
--- a/include/os/linux/debug_fecs_trace.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef LINUX_DEBUG_FECS_TRACE_H
18#define LINUX_DEBUG_FECS_TRACE_H
19
20struct gk20a;
21
22#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_GK20A_CTXSW_TRACE)
23int nvgpu_fecs_trace_init_debugfs(struct gk20a *g);
24#else
25static int nvgpu_fecs_trace_init_debugfs(struct gk20a *g)
26{
27 return 0;
28}
29#endif
30#endif
diff --git a/include/os/linux/debug_fifo.c b/include/os/linux/debug_fifo.c
deleted file mode 100644
index 98da8bc..0000000
--- a/include/os/linux/debug_fifo.c
+++ /dev/null
@@ -1,376 +0,0 @@
1/*
2 * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22#include <nvgpu/timers.h>
23#include <nvgpu/channel.h>
24
25void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
26
27static void *gk20a_fifo_sched_debugfs_seq_start(
28 struct seq_file *s, loff_t *pos)
29{
30 struct gk20a *g = s->private;
31 struct fifo_gk20a *f = &g->fifo;
32
33 if (*pos >= f->num_channels)
34 return NULL;
35
36 return &f->channel[*pos];
37}
38
39static void *gk20a_fifo_sched_debugfs_seq_next(
40 struct seq_file *s, void *v, loff_t *pos)
41{
42 struct gk20a *g = s->private;
43 struct fifo_gk20a *f = &g->fifo;
44
45 ++(*pos);
46 if (*pos >= f->num_channels)
47 return NULL;
48
49 return &f->channel[*pos];
50}
51
52static void gk20a_fifo_sched_debugfs_seq_stop(
53 struct seq_file *s, void *v)
54{
55}
56
57static int gk20a_fifo_sched_debugfs_seq_show(
58 struct seq_file *s, void *v)
59{
60 struct gk20a *g = s->private;
61 struct fifo_gk20a *f = &g->fifo;
62 struct channel_gk20a *ch = v;
63 struct tsg_gk20a *tsg = NULL;
64
65 struct fifo_engine_info_gk20a *engine_info;
66 struct fifo_runlist_info_gk20a *runlist;
67 u32 runlist_id;
68 int ret = SEQ_SKIP;
69 u32 engine_id;
70
71 engine_id = gk20a_fifo_get_gr_engine_id(g);
72 engine_info = (f->engine_info + engine_id);
73 runlist_id = engine_info->runlist_id;
74 runlist = &f->runlist_info[runlist_id];
75
76 if (ch == f->channel) {
77 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
78 seq_puts(s, " (usecs) (msecs)\n");
79 ret = 0;
80 }
81
82 if (!test_bit(ch->chid, runlist->active_channels))
83 return ret;
84
85 if (gk20a_channel_get(ch)) {
86 tsg = tsg_gk20a_from_ch(ch);
87
88 if (tsg)
89 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
90 ch->chid,
91 ch->tsgid,
92 ch->tgid,
93 tsg->timeslice_us,
94 ch->timeout_ms_max,
95 tsg->interleave_level,
96 tsg->gr_ctx.graphics_preempt_mode,
97 tsg->gr_ctx.compute_preempt_mode);
98 gk20a_channel_put(ch);
99 }
100 return 0;
101}
102
103static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
104 .start = gk20a_fifo_sched_debugfs_seq_start,
105 .next = gk20a_fifo_sched_debugfs_seq_next,
106 .stop = gk20a_fifo_sched_debugfs_seq_stop,
107 .show = gk20a_fifo_sched_debugfs_seq_show
108};
109
110static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
111 struct file *file)
112{
113 struct gk20a *g = inode->i_private;
114 int err;
115
116 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
117 if (err)
118 return err;
119
120 nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
121
122 ((struct seq_file *)file->private_data)->private = inode->i_private;
123 return 0;
124};
125
126/*
127 * The file operations structure contains our open function along with
128 * set of the canned seq_ ops.
129 */
130static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
131 .owner = THIS_MODULE,
132 .open = gk20a_fifo_sched_debugfs_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release
136};
137
138static int gk20a_fifo_profile_enable(void *data, u64 val)
139{
140 struct gk20a *g = (struct gk20a *) data;
141 struct fifo_gk20a *f = &g->fifo;
142
143
144 nvgpu_mutex_acquire(&f->profile.lock);
145 if (val == 0) {
146 if (f->profile.enabled) {
147 f->profile.enabled = false;
148 nvgpu_ref_put(&f->profile.ref,
149 __gk20a_fifo_profile_free);
150 }
151 } else {
152 if (!f->profile.enabled) {
153 /* not kref init as it can have a running condition if
154 * we enable/disable/enable while kickoff is happening
155 */
156 if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
157 f->profile.data = nvgpu_vzalloc(g,
158 FIFO_PROFILING_ENTRIES *
159 sizeof(struct fifo_profile_gk20a));
160 f->profile.sorted = nvgpu_vzalloc(g,
161 FIFO_PROFILING_ENTRIES *
162 sizeof(u64));
163 if (!(f->profile.data && f->profile.sorted)) {
164 nvgpu_vfree(g, f->profile.data);
165 nvgpu_vfree(g, f->profile.sorted);
166 nvgpu_mutex_release(&f->profile.lock);
167 return -ENOMEM;
168 }
169 nvgpu_ref_init(&f->profile.ref);
170 }
171 atomic_set(&f->profile.get.atomic_var, 0);
172 f->profile.enabled = true;
173 }
174 }
175 nvgpu_mutex_release(&f->profile.lock);
176
177 return 0;
178}
179
180DEFINE_SIMPLE_ATTRIBUTE(
181 gk20a_fifo_profile_enable_debugfs_fops,
182 NULL,
183 gk20a_fifo_profile_enable,
184 "%llu\n"
185);
186
187static int __profile_cmp(const void *a, const void *b)
188{
189 return *((unsigned long long *) a) - *((unsigned long long *) b);
190}
191
192/*
193 * This uses about 800b in the stack, but the function using it is not part
194 * of a callstack where much memory is being used, so it is fine
195 */
196#define PERCENTILE_WIDTH 5
197#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
198
199static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
200 u64 *percentiles, u32 index_end, u32 index_start)
201{
202 unsigned int nelem = 0;
203 unsigned int index;
204 struct fifo_profile_gk20a *profile;
205
206 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
207 profile = &g->fifo.profile.data[index];
208
209 if (profile->timestamp[index_end] >
210 profile->timestamp[index_start]) {
211 /* This is a valid element */
212 g->fifo.profile.sorted[nelem] =
213 profile->timestamp[index_end] -
214 profile->timestamp[index_start];
215 nelem++;
216 }
217 }
218
219 /* sort it */
220 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
221 __profile_cmp, NULL);
222
223 /* build ranges */
224 for (index = 0; index < PERCENTILE_RANGES; index++) {
225 percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
226 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
227 nelem)/100 - 1];
228 }
229 return nelem;
230}
231
232static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
233{
234 struct gk20a *g = s->private;
235 unsigned int get, nelem, index;
236 /*
237 * 800B in the stack, but function is declared statically and only
238 * called from debugfs handler
239 */
240 u64 percentiles_ioctl[PERCENTILE_RANGES];
241 u64 percentiles_kickoff[PERCENTILE_RANGES];
242 u64 percentiles_jobtracking[PERCENTILE_RANGES];
243 u64 percentiles_append[PERCENTILE_RANGES];
244 u64 percentiles_userd[PERCENTILE_RANGES];
245
246 if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
247 seq_printf(s, "Profiling disabled\n");
248 return 0;
249 }
250
251 get = atomic_read(&g->fifo.profile.get.atomic_var);
252
253 __gk20a_fifo_create_stats(g, percentiles_ioctl,
254 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
255 __gk20a_fifo_create_stats(g, percentiles_kickoff,
256 PROFILE_END, PROFILE_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
258 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
259 __gk20a_fifo_create_stats(g, percentiles_append,
260 PROFILE_APPEND, PROFILE_JOB_TRACKING);
261 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
262 PROFILE_END, PROFILE_APPEND);
263
264 seq_printf(s, "Number of kickoffs: %d\n", nelem);
265 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
266
267 for (index = 0; index < PERCENTILE_RANGES; index++)
268 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
269 PERCENTILE_WIDTH * (index+1),
270 percentiles_ioctl[index],
271 percentiles_kickoff[index],
272 percentiles_append[index],
273 percentiles_jobtracking[index],
274 percentiles_userd[index]);
275
276 nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
277
278 return 0;
279}
280
281static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
282{
283 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
284}
285
286static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
287 .open = gk20a_fifo_profile_stats_open,
288 .read = seq_read,
289 .llseek = seq_lseek,
290 .release = single_release,
291};
292
293
294void gk20a_fifo_debugfs_init(struct gk20a *g)
295{
296 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
297 struct dentry *gpu_root = l->debugfs;
298 struct dentry *fifo_root;
299 struct dentry *profile_root;
300
301 fifo_root = debugfs_create_dir("fifo", gpu_root);
302 if (IS_ERR_OR_NULL(fifo_root))
303 return;
304
305 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
306
307 debugfs_create_file("sched", 0600, fifo_root, g,
308 &gk20a_fifo_sched_debugfs_fops);
309
310 profile_root = debugfs_create_dir("profile", fifo_root);
311 if (IS_ERR_OR_NULL(profile_root))
312 return;
313
314 nvgpu_mutex_init(&g->fifo.profile.lock);
315 g->fifo.profile.enabled = false;
316 atomic_set(&g->fifo.profile.get.atomic_var, 0);
317 atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
318
319 debugfs_create_file("enable", 0600, profile_root, g,
320 &gk20a_fifo_profile_enable_debugfs_fops);
321
322 debugfs_create_file("stats", 0600, profile_root, g,
323 &gk20a_fifo_profile_stats_debugfs_fops);
324
325}
326
327void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
328{
329 if (profile)
330 profile->timestamp[idx] = nvgpu_current_time_ns();
331}
332
333void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
334{
335 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
336 profile.ref);
337 nvgpu_vfree(f->g, f->profile.data);
338 nvgpu_vfree(f->g, f->profile.sorted);
339}
340
341/* Get the next element in the ring buffer of profile entries
342 * and grab a reference to the structure
343 */
344struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
345{
346 struct fifo_gk20a *f = &g->fifo;
347 struct fifo_profile_gk20a *profile;
348 unsigned int index;
349
350 /* If kref is zero, profiling is not enabled */
351 if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
352 return NULL;
353 index = atomic_inc_return(&f->profile.get.atomic_var);
354 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
355
356 return profile;
357}
358
359/* Free the reference to the structure. This allows deferred cleanups */
360void gk20a_fifo_profile_release(struct gk20a *g,
361 struct fifo_profile_gk20a *profile)
362{
363 nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
364}
365
366void gk20a_fifo_debugfs_deinit(struct gk20a *g)
367{
368 struct fifo_gk20a *f = &g->fifo;
369
370 nvgpu_mutex_acquire(&f->profile.lock);
371 if (f->profile.enabled) {
372 f->profile.enabled = false;
373 nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
374 }
375 nvgpu_mutex_release(&f->profile.lock);
376}
diff --git a/include/os/linux/debug_fifo.h b/include/os/linux/debug_fifo.h
deleted file mode 100644
index 46ac853..0000000
--- a/include/os/linux/debug_fifo.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_FIFO_H__
16#define __NVGPU_DEBUG_FIFO_H__
17
18struct gk20a;
19void gk20a_fifo_debugfs_init(struct gk20a *g);
20void gk20a_fifo_debugfs_deinit(struct gk20a *g);
21
22#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/include/os/linux/debug_gr.c b/include/os/linux/debug_gr.c
deleted file mode 100644
index d54c6d6..0000000
--- a/include/os/linux/debug_gr.c
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_gr.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19
20int gr_gk20a_debugfs_init(struct gk20a *g)
21{
22 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
23
24 l->debugfs_gr_default_attrib_cb_size =
25 debugfs_create_u32("gr_default_attrib_cb_size",
26 S_IRUGO|S_IWUSR, l->debugfs,
27 &g->gr.attrib_cb_default_size);
28
29 return 0;
30}
31
diff --git a/include/os/linux/debug_gr.h b/include/os/linux/debug_gr.h
deleted file mode 100644
index 4b46acb..0000000
--- a/include/os/linux/debug_gr.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_GR_H__
16#define __NVGPU_DEBUG_GR_H__
17
18struct gk20a;
19int gr_gk20a_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/include/os/linux/debug_hal.c b/include/os/linux/debug_hal.c
deleted file mode 100644
index 031e335..0000000
--- a/include/os/linux/debug_hal.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_hal.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21/* Format and print a single function pointer to the specified seq_file. */
22static void __hal_print_op(struct seq_file *s, void *op_ptr)
23{
24 seq_printf(s, "%pF\n", op_ptr);
25}
26
27/*
28 * Prints an array of function pointer addresses in op_ptrs to the
29 * specified seq_file
30 */
31static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
32{
33 int i;
34
35 for (i = 0; i < num_ops; i++)
36 __hal_print_op(s, op_ptrs[i]);
37}
38
39/*
40 * Show file operation, which generates content of the file once. Prints a list
41 * of gpu operations as defined by gops and the corresponding function pointer
42 * destination addresses. Relies on no compiler reordering of struct fields and
43 * assumption that all members are function pointers.
44 */
45static int __hal_show(struct seq_file *s, void *unused)
46{
47 struct gpu_ops *gops = s->private;
48
49 __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
50
51 return 0;
52}
53
54static int __hal_open(struct inode *inode, struct file *file)
55{
56 return single_open(file, __hal_show, inode->i_private);
57}
58
59static const struct file_operations __hal_fops = {
60 .open = __hal_open,
61 .read = seq_read,
62 .llseek = seq_lseek,
63 .release = single_release,
64};
65
66void nvgpu_hal_debugfs_fini(struct gk20a *g)
67{
68 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
69
70 if (!(l->debugfs_hal == NULL))
71 debugfs_remove_recursive(l->debugfs_hal);
72}
73
74void nvgpu_hal_debugfs_init(struct gk20a *g)
75{
76 struct dentry *d;
77 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
78
79 if (!l->debugfs)
80 return;
81 l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
82 if (IS_ERR_OR_NULL(l->debugfs_hal)) {
83 l->debugfs_hal = NULL;
84 return;
85 }
86
87 /* Pass along reference to the gpu_ops struct as private data */
88 d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
89 &g->ops, &__hal_fops);
90 if (!d) {
91 nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
92 debugfs_remove_recursive(l->debugfs_hal);
93 return;
94 }
95}
diff --git a/include/os/linux/debug_hal.h b/include/os/linux/debug_hal.h
deleted file mode 100644
index eee6f23..0000000
--- a/include/os/linux/debug_hal.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_HAL_H__
16#define __NVGPU_DEBUG_HAL_H__
17
18struct gk20a;
19void nvgpu_hal_debugfs_fini(struct gk20a *g);
20void nvgpu_hal_debugfs_init(struct gk20a *g);
21
22#endif /* __NVGPU_DEBUG_HAL_H__ */
diff --git a/include/os/linux/debug_kmem.c b/include/os/linux/debug_kmem.c
deleted file mode 100644
index a0c7d47..0000000
--- a/include/os/linux/debug_kmem.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/debugfs.h>
15#include <linux/seq_file.h>
16
17#include "os_linux.h"
18#include "debug_kmem.h"
19#include "kmem_priv.h"
20
21/**
22 * to_human_readable_bytes - Determine suffix for passed size.
23 *
24 * @bytes - Number of bytes to generate a suffix for.
25 * @hr_bytes [out] - The human readable number of bytes.
26 * @hr_suffix [out] - The suffix for the HR number of bytes.
27 *
28 * Computes a human readable decomposition of the passed number of bytes. The
29 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
30 * number of bytes is then passed back in @hr_bytes. This returns the following
31 * ranges:
32 *
33 * 0 - 1023 B
34 * 1 - 1023 KB
35 * 1 - 1023 MB
36 * 1 - 1023 GB
37 * 1 - 1023 TB
38 * 1 - ... PB
39 */
40static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
41 const char **hr_suffix)
42{
43 static const char *suffixes[] =
44 { "B", "KB", "MB", "GB", "TB", "PB" };
45
46 u64 suffix_ind = 0;
47
48 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
49 bytes >>= 10;
50 suffix_ind++;
51 }
52
53 /*
54 * Handle case where bytes > 1023PB.
55 */
56 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
57 suffix_ind : ARRAY_SIZE(suffixes) - 1;
58
59 *hr_bytes = bytes;
60 *hr_suffix = suffixes[suffix_ind];
61}
62
63/**
64 * print_hr_bytes - Print human readable bytes
65 *
66 * @s - A seq_file to print to. May be NULL.
67 * @msg - A message to print before the bytes.
68 * @bytes - Number of bytes.
69 *
70 * Print @msg followed by the human readable decomposition of the passed number
71 * of bytes.
72 *
73 * If @s is NULL then this prints will be made to the kernel log.
74 */
75static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
76{
77 u64 hr_bytes;
78 const char *hr_suffix;
79
80 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
81 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
82}
83
84/**
85 * print_histogram - Build a histogram of the memory usage.
86 *
87 * @tracker The tracking to pull data from.
88 * @s A seq_file to dump info into.
89 */
90static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
91 struct seq_file *s)
92{
93 int i;
94 u64 pot_min, pot_max;
95 u64 nr_buckets;
96 unsigned int *buckets;
97 unsigned int total_allocs;
98 struct nvgpu_rbtree_node *node;
99 static const char histogram_line[] =
100 "++++++++++++++++++++++++++++++++++++++++";
101
102 /*
103 * pot_min is essentially a round down to the nearest power of 2. This
104 * is the start of the histogram. pot_max is just a round up to the
105 * nearest power of two. Each histogram bucket is one power of two so
106 * the histogram buckets are exponential.
107 */
108 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
109 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
110
111 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
112
113 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
114 if (!buckets) {
115 __pstat(s, "OOM: could not allocate bucket storage!?\n");
116 return;
117 }
118
119 /*
120 * Iterate across all of the allocs and determine what bucket they
121 * should go in. Round the size down to the nearest power of two to
122 * find the right bucket.
123 */
124 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
125 while (node) {
126 int b;
127 u64 bucket_min;
128 struct nvgpu_mem_alloc *alloc =
129 nvgpu_mem_alloc_from_rbtree_node(node);
130
131 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
132 if (bucket_min < tracker->min_alloc)
133 bucket_min = tracker->min_alloc;
134
135 b = __ffs(bucket_min) - __ffs(pot_min);
136
137 /*
138 * Handle the one case were there's an alloc exactly as big as
139 * the maximum bucket size of the largest bucket. Most of the
140 * buckets have an inclusive minimum and exclusive maximum. But
141 * the largest bucket needs to have an _inclusive_ maximum as
142 * well.
143 */
144 if (b == (int)nr_buckets)
145 b--;
146
147 buckets[b]++;
148
149 nvgpu_rbtree_enum_next(&node, node);
150 }
151
152 total_allocs = 0;
153 for (i = 0; i < (int)nr_buckets; i++)
154 total_allocs += buckets[i];
155
156 __pstat(s, "Alloc histogram:\n");
157
158 /*
159 * Actually compute the histogram lines.
160 */
161 for (i = 0; i < (int)nr_buckets; i++) {
162 char this_line[sizeof(histogram_line) + 1];
163 u64 line_length;
164 u64 hr_bytes;
165 const char *hr_suffix;
166
167 memset(this_line, 0, sizeof(this_line));
168
169 /*
170 * Compute the normalized line length. Cant use floating point
171 * so we will just multiply everything by 1000 and use fixed
172 * point.
173 */
174 line_length = (1000 * buckets[i]) / total_allocs;
175 line_length *= sizeof(histogram_line);
176 line_length /= 1000;
177
178 memset(this_line, '+', line_length);
179
180 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
181 &hr_bytes, &hr_suffix);
182 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
183 hr_bytes, hr_bytes << 1,
184 hr_suffix, buckets[i], this_line);
185 }
186}
187
188/**
189 * nvgpu_kmem_print_stats - Print kmem tracking stats.
190 *
191 * @tracker The tracking to pull data from.
192 * @s A seq_file to dump info into.
193 *
194 * Print stats from a tracker. If @s is non-null then seq_printf() will be
195 * used with @s. Otherwise the stats are pr_info()ed.
196 */
197void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
198 struct seq_file *s)
199{
200 nvgpu_lock_tracker(tracker);
201
202 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
203
204 __pstat(s, "Basic Stats:\n");
205 __pstat(s, " Number of allocs %lld\n",
206 tracker->nr_allocs);
207 __pstat(s, " Number of frees %lld\n",
208 tracker->nr_frees);
209 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
210 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
211 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
212 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
213 print_hr_bytes(s, " Bytes allocated (real) ",
214 tracker->bytes_alloced_real);
215 print_hr_bytes(s, " Bytes freed (real) ",
216 tracker->bytes_freed_real);
217 __pstat(s, "\n");
218
219 print_histogram(tracker, s);
220
221 nvgpu_unlock_tracker(tracker);
222}
223
224static int __kmem_tracking_show(struct seq_file *s, void *unused)
225{
226 struct nvgpu_mem_alloc_tracker *tracker = s->private;
227
228 nvgpu_kmem_print_stats(tracker, s);
229
230 return 0;
231}
232
233static int __kmem_tracking_open(struct inode *inode, struct file *file)
234{
235 return single_open(file, __kmem_tracking_show, inode->i_private);
236}
237
238static const struct file_operations __kmem_tracking_fops = {
239 .open = __kmem_tracking_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = single_release,
243};
244
245static int __kmem_traces_dump_tracker(struct gk20a *g,
246 struct nvgpu_mem_alloc_tracker *tracker,
247 struct seq_file *s)
248{
249 struct nvgpu_rbtree_node *node;
250
251 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
252 while (node) {
253 struct nvgpu_mem_alloc *alloc =
254 nvgpu_mem_alloc_from_rbtree_node(node);
255
256 kmem_print_mem_alloc(g, alloc, s);
257
258 nvgpu_rbtree_enum_next(&node, node);
259 }
260
261 return 0;
262}
263
264static int __kmem_traces_show(struct seq_file *s, void *unused)
265{
266 struct gk20a *g = s->private;
267
268 nvgpu_lock_tracker(g->vmallocs);
269 seq_puts(s, "Oustanding vmallocs:\n");
270 __kmem_traces_dump_tracker(g, g->vmallocs, s);
271 seq_puts(s, "\n");
272 nvgpu_unlock_tracker(g->vmallocs);
273
274 nvgpu_lock_tracker(g->kmallocs);
275 seq_puts(s, "Oustanding kmallocs:\n");
276 __kmem_traces_dump_tracker(g, g->kmallocs, s);
277 nvgpu_unlock_tracker(g->kmallocs);
278
279 return 0;
280}
281
282static int __kmem_traces_open(struct inode *inode, struct file *file)
283{
284 return single_open(file, __kmem_traces_show, inode->i_private);
285}
286
287static const struct file_operations __kmem_traces_fops = {
288 .open = __kmem_traces_open,
289 .read = seq_read,
290 .llseek = seq_lseek,
291 .release = single_release,
292};
293
294void nvgpu_kmem_debugfs_init(struct gk20a *g)
295{
296 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
297 struct dentry *node;
298
299 l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
300 if (IS_ERR_OR_NULL(l->debugfs_kmem))
301 return;
302
303 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
304 l->debugfs_kmem,
305 g->vmallocs, &__kmem_tracking_fops);
306 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
307 l->debugfs_kmem,
308 g->kmallocs, &__kmem_tracking_fops);
309 node = debugfs_create_file("traces", S_IRUGO,
310 l->debugfs_kmem,
311 g, &__kmem_traces_fops);
312}
diff --git a/include/os/linux/debug_kmem.h b/include/os/linux/debug_kmem.h
deleted file mode 100644
index 44322b5..0000000
--- a/include/os/linux/debug_kmem.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_KMEM_H__
16#define __NVGPU_DEBUG_KMEM_H__
17
18struct gk20a;
19#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
20void nvgpu_kmem_debugfs_init(struct gk20a *g);
21#endif
22
23#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/include/os/linux/debug_ltc.c b/include/os/linux/debug_ltc.c
deleted file mode 100644
index 1b4c221..0000000
--- a/include/os/linux/debug_ltc.c
+++ /dev/null
@@ -1,94 +0,0 @@
1/*
2 * Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_ltc.h"
16#include "os_linux.h"
17
18#include <nvgpu/gk20a.h>
19
20#include <linux/debugfs.h>
21#include <linux/uaccess.h>
22
23static ssize_t ltc_intr_illegal_compstat_read(struct file *file,
24 char __user *user_buf, size_t count, loff_t *ppos)
25{
26 char buf[3];
27 struct gk20a *g = file->private_data;
28
29 if (g->ltc_intr_en_illegal_compstat)
30 buf[0] = 'Y';
31 else
32 buf[0] = 'N';
33 buf[1] = '\n';
34 buf[2] = 0x00;
35
36 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
37}
38
39static ssize_t ltc_intr_illegal_compstat_write(struct file *file,
40 const char __user *user_buf, size_t count, loff_t *ppos)
41{
42 char buf[3];
43 int buf_size;
44 bool intr_illegal_compstat_enabled;
45 struct gk20a *g = file->private_data;
46 int err;
47
48 if (!g->ops.ltc.intr_en_illegal_compstat)
49 return -EINVAL;
50
51 buf_size = min(count, (sizeof(buf)-1));
52 if (copy_from_user(buf, user_buf, buf_size))
53 return -EFAULT;
54
55 err = gk20a_busy(g);
56 if (err)
57 return err;
58
59 if (strtobool(buf, &intr_illegal_compstat_enabled) == 0) {
60 g->ops.ltc.intr_en_illegal_compstat(g,
61 intr_illegal_compstat_enabled);
62 g->ltc_intr_en_illegal_compstat = intr_illegal_compstat_enabled;
63 }
64
65 gk20a_idle(g);
66
67 return buf_size;
68}
69
70static const struct file_operations ltc_intr_illegal_compstat_fops = {
71 .open = simple_open,
72 .read = ltc_intr_illegal_compstat_read,
73 .write = ltc_intr_illegal_compstat_write,
74};
75
76int nvgpu_ltc_debugfs_init(struct gk20a *g)
77{
78 struct dentry *d;
79 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
80 struct dentry *gpu_root = l->debugfs;
81
82 l->debugfs_ltc = debugfs_create_dir("ltc", gpu_root);
83 if (IS_ERR_OR_NULL(l->debugfs_ltc))
84 return -ENODEV;
85
86 /* Debug fs node to enable/disable illegal_compstat */
87 d = debugfs_create_file("intr_illegal_compstat_enable", 0600,
88 l->debugfs_ltc, g,
89 &ltc_intr_illegal_compstat_fops);
90 if (!d)
91 return -ENOMEM;
92
93 return 0;
94}
diff --git a/include/os/linux/debug_ltc.h b/include/os/linux/debug_ltc.h
deleted file mode 100644
index 3ad734c..0000000
--- a/include/os/linux/debug_ltc.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_LTC_H__
16#define __NVGPU_DEBUG_LTC_H__
17
18struct gk20a;
19int nvgpu_ltc_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_LTC_H__ */
diff --git a/include/os/linux/debug_pmgr.c b/include/os/linux/debug_pmgr.c
deleted file mode 100644
index c264978..0000000
--- a/include/os/linux/debug_pmgr.c
+++ /dev/null
@@ -1,104 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/debugfs.h>
18
19#include "os_linux.h"
20
21#include "pmgr/pmgr.h"
22
23static int pmgr_pwr_devices_get_power_u64(void *data, u64 *p)
24{
25 struct gk20a *g = (struct gk20a *)data;
26 int err;
27 u32 val;
28
29 err = pmgr_pwr_devices_get_power(g, &val);
30 *p = val;
31
32 return err;
33}
34
35static int pmgr_pwr_devices_get_current_u64(void *data, u64 *p)
36{
37 struct gk20a *g = (struct gk20a *)data;
38 int err;
39 u32 val;
40
41 err = pmgr_pwr_devices_get_current(g, &val);
42 *p = val;
43
44 return err;
45}
46
47static int pmgr_pwr_devices_get_voltage_u64(void *data, u64 *p)
48{
49 struct gk20a *g = (struct gk20a *)data;
50 int err;
51 u32 val;
52
53 err = pmgr_pwr_devices_get_voltage(g, &val);
54 *p = val;
55
56 return err;
57}
58
59DEFINE_SIMPLE_ATTRIBUTE(
60 pmgr_power_ctrl_fops, pmgr_pwr_devices_get_power_u64, NULL, "%llu\n");
61
62DEFINE_SIMPLE_ATTRIBUTE(
63 pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current_u64, NULL, "%llu\n");
64
65DEFINE_SIMPLE_ATTRIBUTE(
66 pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_voltage_u64, NULL, "%llu\n");
67
68static void pmgr_debugfs_init(struct gk20a *g)
69{
70 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
71 struct dentry *dbgentry;
72
73 dbgentry = debugfs_create_file(
74 "power", S_IRUGO, l->debugfs, g, &pmgr_power_ctrl_fops);
75 if (!dbgentry)
76 nvgpu_err(g, "debugfs entry create failed for power");
77
78 dbgentry = debugfs_create_file(
79 "current", S_IRUGO, l->debugfs, g, &pmgr_current_ctrl_fops);
80 if (!dbgentry)
81 nvgpu_err(g, "debugfs entry create failed for current");
82
83 dbgentry = debugfs_create_file(
84 "voltage", S_IRUGO, l->debugfs, g, &pmgr_voltage_ctrl_fops);
85 if (!dbgentry)
86 nvgpu_err(g, "debugfs entry create failed for voltage");
87}
88
89int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l)
90{
91 struct gk20a *g = &l->g;
92 int ret = 0;
93
94 if (!nvgpu_is_enabled(g, NVGPU_PMU_PSTATE))
95 return ret;
96
97 if (!g->ops.clk.support_pmgr_domain)
98 return ret;
99
100 pmgr_debugfs_init(g);
101
102 return ret;
103}
104
diff --git a/include/os/linux/debug_pmgr.h b/include/os/linux/debug_pmgr.h
deleted file mode 100644
index bd6c556..0000000
--- a/include/os/linux/debug_pmgr.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __LINUX_DEBUG_PMGR_H
18#define __LINUX_DEBUG_PMGR_H
19
20#ifdef CONFIG_DEBUG_FS
21int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l);
22#else
23int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l)
24{
25 return 0;
26}
27#endif
28#endif
diff --git a/include/os/linux/debug_pmu.c b/include/os/linux/debug_pmu.c
deleted file mode 100644
index f3e36d0..0000000
--- a/include/os/linux/debug_pmu.c
+++ /dev/null
@@ -1,484 +0,0 @@
1/*
2 * Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <nvgpu/enabled.h>
16#include "debug_pmu.h"
17#include "os_linux.h"
18
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21#include <linux/uaccess.h>
22
23static int lpwr_debug_show(struct seq_file *s, void *data)
24{
25 struct gk20a *g = s->private;
26
27 if (g->ops.pmu.pmu_pg_engines_feature_list &&
28 g->ops.pmu.pmu_pg_engines_feature_list(g,
29 PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
30 NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
31 seq_printf(s, "PSTATE: %u\n"
32 "RPPG Enabled: %u\n"
33 "RPPG ref count: %u\n"
34 "RPPG state: %u\n"
35 "MSCG Enabled: %u\n"
36 "MSCG pstate state: %u\n"
37 "MSCG transition state: %u\n",
38 g->ops.clk_arb.get_current_pstate(g),
39 g->elpg_enabled, g->pmu.elpg_refcnt,
40 g->pmu.elpg_stat, g->mscg_enabled,
41 g->pmu.mscg_stat, g->pmu.mscg_transition_state);
42
43 } else
44 seq_printf(s, "ELPG Enabled: %u\n"
45 "ELPG ref count: %u\n"
46 "ELPG state: %u\n",
47 g->elpg_enabled, g->pmu.elpg_refcnt,
48 g->pmu.elpg_stat);
49
50 return 0;
51
52}
53
54static int lpwr_debug_open(struct inode *inode, struct file *file)
55{
56 return single_open(file, lpwr_debug_show, inode->i_private);
57}
58
59static const struct file_operations lpwr_debug_fops = {
60 .open = lpwr_debug_open,
61 .read = seq_read,
62 .llseek = seq_lseek,
63 .release = single_release,
64};
65
66static int mscg_stat_show(struct seq_file *s, void *data)
67{
68 struct gk20a *g = s->private;
69 u64 total_ingating, total_ungating, residency, divisor, dividend;
70 struct pmu_pg_stats_data pg_stat_data = { 0 };
71 int err;
72
73 /* Don't unnecessarily power on the device */
74 if (g->power_on) {
75 err = gk20a_busy(g);
76 if (err)
77 return err;
78
79 nvgpu_pmu_get_pg_stats(g,
80 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
81 gk20a_idle(g);
82 }
83 total_ingating = g->pg_ingating_time_us +
84 (u64)pg_stat_data.ingating_time;
85 total_ungating = g->pg_ungating_time_us +
86 (u64)pg_stat_data.ungating_time;
87
88 divisor = total_ingating + total_ungating;
89
90 /* We compute the residency on a scale of 1000 */
91 dividend = total_ingating * 1000;
92
93 if (divisor)
94 residency = div64_u64(dividend, divisor);
95 else
96 residency = 0;
97
98 seq_printf(s,
99 "Time in MSCG: %llu us\n"
100 "Time out of MSCG: %llu us\n"
101 "MSCG residency ratio: %llu\n"
102 "MSCG Entry Count: %u\n"
103 "MSCG Avg Entry latency %u\n"
104 "MSCG Avg Exit latency %u\n",
105 total_ingating, total_ungating,
106 residency, pg_stat_data.gating_cnt,
107 pg_stat_data.avg_entry_latency_us,
108 pg_stat_data.avg_exit_latency_us);
109 return 0;
110
111}
112
113static int mscg_stat_open(struct inode *inode, struct file *file)
114{
115 return single_open(file, mscg_stat_show, inode->i_private);
116}
117
118static const struct file_operations mscg_stat_fops = {
119 .open = mscg_stat_open,
120 .read = seq_read,
121 .llseek = seq_lseek,
122 .release = single_release,
123};
124
125static int mscg_transitions_show(struct seq_file *s, void *data)
126{
127 struct gk20a *g = s->private;
128 struct pmu_pg_stats_data pg_stat_data = { 0 };
129 u32 total_gating_cnt;
130 int err;
131
132 if (g->power_on) {
133 err = gk20a_busy(g);
134 if (err)
135 return err;
136
137 nvgpu_pmu_get_pg_stats(g,
138 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
139 gk20a_idle(g);
140 }
141 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
142
143 seq_printf(s, "%u\n", total_gating_cnt);
144 return 0;
145
146}
147
148static int mscg_transitions_open(struct inode *inode, struct file *file)
149{
150 return single_open(file, mscg_transitions_show, inode->i_private);
151}
152
153static const struct file_operations mscg_transitions_fops = {
154 .open = mscg_transitions_open,
155 .read = seq_read,
156 .llseek = seq_lseek,
157 .release = single_release,
158};
159
160static int elpg_stat_show(struct seq_file *s, void *data)
161{
162 struct gk20a *g = s->private;
163 struct pmu_pg_stats_data pg_stat_data = { 0 };
164 u64 total_ingating, total_ungating, residency, divisor, dividend;
165 int err;
166
167 /* Don't unnecessarily power on the device */
168 if (g->power_on) {
169 err = gk20a_busy(g);
170 if (err)
171 return err;
172
173 nvgpu_pmu_get_pg_stats(g,
174 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
175 gk20a_idle(g);
176 }
177 total_ingating = g->pg_ingating_time_us +
178 (u64)pg_stat_data.ingating_time;
179 total_ungating = g->pg_ungating_time_us +
180 (u64)pg_stat_data.ungating_time;
181 divisor = total_ingating + total_ungating;
182
183 /* We compute the residency on a scale of 1000 */
184 dividend = total_ingating * 1000;
185
186 if (divisor)
187 residency = div64_u64(dividend, divisor);
188 else
189 residency = 0;
190
191 seq_printf(s,
192 "Time in ELPG: %llu us\n"
193 "Time out of ELPG: %llu us\n"
194 "ELPG residency ratio: %llu\n"
195 "ELPG Entry Count: %u\n"
196 "ELPG Avg Entry latency %u us\n"
197 "ELPG Avg Exit latency %u us\n",
198 total_ingating, total_ungating,
199 residency, pg_stat_data.gating_cnt,
200 pg_stat_data.avg_entry_latency_us,
201 pg_stat_data.avg_exit_latency_us);
202 return 0;
203
204}
205
206static int elpg_stat_open(struct inode *inode, struct file *file)
207{
208 return single_open(file, elpg_stat_show, inode->i_private);
209}
210
211static const struct file_operations elpg_stat_fops = {
212 .open = elpg_stat_open,
213 .read = seq_read,
214 .llseek = seq_lseek,
215 .release = single_release,
216};
217
218static int elpg_transitions_show(struct seq_file *s, void *data)
219{
220 struct gk20a *g = s->private;
221 struct pmu_pg_stats_data pg_stat_data = { 0 };
222 u32 total_gating_cnt;
223 int err;
224
225 if (g->power_on) {
226 err = gk20a_busy(g);
227 if (err)
228 return err;
229
230 nvgpu_pmu_get_pg_stats(g,
231 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
232 gk20a_idle(g);
233 }
234 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
235
236 seq_printf(s, "%u\n", total_gating_cnt);
237 return 0;
238
239}
240
241static int elpg_transitions_open(struct inode *inode, struct file *file)
242{
243 return single_open(file, elpg_transitions_show, inode->i_private);
244}
245
246static const struct file_operations elpg_transitions_fops = {
247 .open = elpg_transitions_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
250 .release = single_release,
251};
252
253static int falc_trace_show(struct seq_file *s, void *data)
254{
255 struct gk20a *g = s->private;
256 struct nvgpu_pmu *pmu = &g->pmu;
257 u32 i = 0, j = 0, k, l, m;
258 char part_str[40];
259 void *tracebuffer;
260 char *trace;
261 u32 *trace1;
262
263 /* allocate system memory to copy pmu trace buffer */
264 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
265 if (tracebuffer == NULL)
266 return -ENOMEM;
267
268 /* read pmu traces into system memory buffer */
269 nvgpu_mem_rd_n(g, &pmu->trace_buf,
270 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
271
272 trace = (char *)tracebuffer;
273 trace1 = (u32 *)tracebuffer;
274
275 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
276 for (j = 0; j < 0x40; j++)
277 if (trace1[(i / 4) + j])
278 break;
279 if (j == 0x40)
280 break;
281 seq_printf(s, "Index %x: ", trace1[(i / 4)]);
282 l = 0;
283 m = 0;
284 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
285 if (k >= 40)
286 break;
287 strncpy(part_str, (trace+i+20+m), k);
288 part_str[k] = 0;
289 seq_printf(s, "%s0x%x", part_str,
290 trace1[(i / 4) + 1 + l]);
291 l++;
292 m += k + 2;
293 }
294 seq_printf(s, "%s", (trace+i+20+m));
295 }
296
297 nvgpu_kfree(g, tracebuffer);
298 return 0;
299}
300
301static int falc_trace_open(struct inode *inode, struct file *file)
302{
303 return single_open(file, falc_trace_show, inode->i_private);
304}
305
306static const struct file_operations falc_trace_fops = {
307 .open = falc_trace_open,
308 .read = seq_read,
309 .llseek = seq_lseek,
310 .release = single_release,
311};
312
313static int perfmon_events_enable_show(struct seq_file *s, void *data)
314{
315 struct gk20a *g = s->private;
316
317 seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
318 return 0;
319
320}
321
322static int perfmon_events_enable_open(struct inode *inode, struct file *file)
323{
324 return single_open(file, perfmon_events_enable_show, inode->i_private);
325}
326
327static ssize_t perfmon_events_enable_write(struct file *file,
328 const char __user *userbuf, size_t count, loff_t *ppos)
329{
330 struct seq_file *s = file->private_data;
331 struct gk20a *g = s->private;
332 unsigned long val = 0;
333 char buf[40];
334 int buf_size;
335 int err;
336
337 memset(buf, 0, sizeof(buf));
338 buf_size = min(count, (sizeof(buf)-1));
339
340 if (copy_from_user(buf, userbuf, buf_size))
341 return -EFAULT;
342
343 if (kstrtoul(buf, 10, &val) < 0)
344 return -EINVAL;
345
346 /* Don't turn on gk20a unnecessarily */
347 if (g->power_on) {
348 err = gk20a_busy(g);
349 if (err)
350 return err;
351
352 if (val && !g->pmu.perfmon_sampling_enabled &&
353 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
354 g->pmu.perfmon_sampling_enabled = true;
355 g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
356 } else if (!val && g->pmu.perfmon_sampling_enabled &&
357 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
358 g->pmu.perfmon_sampling_enabled = false;
359 g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
360 }
361 gk20a_idle(g);
362 } else {
363 g->pmu.perfmon_sampling_enabled = val ? true : false;
364 }
365
366 return count;
367}
368
369static const struct file_operations perfmon_events_enable_fops = {
370 .open = perfmon_events_enable_open,
371 .read = seq_read,
372 .write = perfmon_events_enable_write,
373 .llseek = seq_lseek,
374 .release = single_release,
375};
376
377static int perfmon_events_count_show(struct seq_file *s, void *data)
378{
379 struct gk20a *g = s->private;
380
381 seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
382 return 0;
383
384}
385
386static int perfmon_events_count_open(struct inode *inode, struct file *file)
387{
388 return single_open(file, perfmon_events_count_show, inode->i_private);
389}
390
391static const struct file_operations perfmon_events_count_fops = {
392 .open = perfmon_events_count_open,
393 .read = seq_read,
394 .llseek = seq_lseek,
395 .release = single_release,
396};
397
398static int security_show(struct seq_file *s, void *data)
399{
400 struct gk20a *g = s->private;
401
402 seq_printf(s, "%d\n", g->pmu.pmu_mode);
403 return 0;
404
405}
406
407static int security_open(struct inode *inode, struct file *file)
408{
409 return single_open(file, security_show, inode->i_private);
410}
411
412static const struct file_operations security_fops = {
413 .open = security_open,
414 .read = seq_read,
415 .llseek = seq_lseek,
416 .release = single_release,
417};
418
419int gk20a_pmu_debugfs_init(struct gk20a *g)
420{
421 struct dentry *d;
422 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
423
424 d = debugfs_create_file(
425 "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
426 &lpwr_debug_fops);
427 if (!d)
428 goto err_out;
429
430 d = debugfs_create_file(
431 "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
432 &mscg_stat_fops);
433 if (!d)
434 goto err_out;
435
436 d = debugfs_create_file(
437 "mscg_transitions", S_IRUGO, l->debugfs, g,
438 &mscg_transitions_fops);
439 if (!d)
440 goto err_out;
441
442 d = debugfs_create_file(
443 "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
444 &elpg_stat_fops);
445 if (!d)
446 goto err_out;
447
448 d = debugfs_create_file(
449 "elpg_transitions", S_IRUGO, l->debugfs, g,
450 &elpg_transitions_fops);
451 if (!d)
452 goto err_out;
453
454 d = debugfs_create_file(
455 "pmu_security", S_IRUGO, l->debugfs, g,
456 &security_fops);
457 if (!d)
458 goto err_out;
459
460 /* No access to PMU if virtual */
461 if (!g->is_virtual) {
462 d = debugfs_create_file(
463 "falc_trace", S_IRUGO, l->debugfs, g,
464 &falc_trace_fops);
465 if (!d)
466 goto err_out;
467
468 d = debugfs_create_file(
469 "perfmon_events_enable", S_IRUGO, l->debugfs, g,
470 &perfmon_events_enable_fops);
471 if (!d)
472 goto err_out;
473
474 d = debugfs_create_file(
475 "perfmon_events_count", S_IRUGO, l->debugfs, g,
476 &perfmon_events_count_fops);
477 if (!d)
478 goto err_out;
479 }
480 return 0;
481err_out:
482 pr_err("%s: Failed to make debugfs node\n", __func__);
483 return -ENOMEM;
484}
diff --git a/include/os/linux/debug_pmu.h b/include/os/linux/debug_pmu.h
deleted file mode 100644
index c4e3243..0000000
--- a/include/os/linux/debug_pmu.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_PMU_H__
16#define __NVGPU_DEBUG_PMU_H__
17
18struct gk20a;
19int gk20a_pmu_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/include/os/linux/debug_sched.c b/include/os/linux/debug_sched.c
deleted file mode 100644
index fa43dc4..0000000
--- a/include/os/linux/debug_sched.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/*
2 * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_sched.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
22{
23 struct gk20a *g = s->private;
24 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
25 bool sched_busy = true;
26
27 int n = sched->bitmap_size / sizeof(u64);
28 int i;
29 int err;
30
31 err = gk20a_busy(g);
32 if (err)
33 return err;
34
35 if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
36 sched_busy = false;
37 nvgpu_mutex_release(&sched->busy_lock);
38 }
39
40 seq_printf(s, "control_locked=%d\n", sched->control_locked);
41 seq_printf(s, "busy=%d\n", sched_busy);
42 seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
43
44 nvgpu_mutex_acquire(&sched->status_lock);
45
46 seq_puts(s, "active_tsg_bitmap\n");
47 for (i = 0; i < n; i++)
48 seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
49
50 seq_puts(s, "recent_tsg_bitmap\n");
51 for (i = 0; i < n; i++)
52 seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
53
54 nvgpu_mutex_release(&sched->status_lock);
55
56 gk20a_idle(g);
57
58 return 0;
59}
60
61static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
62{
63 return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
64}
65
66static const struct file_operations gk20a_sched_debugfs_fops = {
67 .open = gk20a_sched_debugfs_open,
68 .read = seq_read,
69 .llseek = seq_lseek,
70 .release = single_release,
71};
72
73void gk20a_sched_debugfs_init(struct gk20a *g)
74{
75 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
76
77 debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
78 g, &gk20a_sched_debugfs_fops);
79}
diff --git a/include/os/linux/debug_sched.h b/include/os/linux/debug_sched.h
deleted file mode 100644
index 34a8f55..0000000
--- a/include/os/linux/debug_sched.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_SCHED_H__
16#define __NVGPU_DEBUG_SCHED_H__
17
18struct gk20a;
19void gk20a_sched_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/include/os/linux/debug_therm_gp106.c b/include/os/linux/debug_therm_gp106.c
deleted file mode 100644
index dfe3946..0000000
--- a/include/os/linux/debug_therm_gp106.c
+++ /dev/null
@@ -1,49 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/debugfs.h>
18
19#include "os_linux.h"
20
21static int therm_get_internal_sensor_curr_temp(void *data, u64 *val)
22{
23 struct gk20a *g = (struct gk20a *)data;
24 u32 readval;
25 int err;
26
27 if (!g->ops.therm.get_internal_sensor_curr_temp)
28 return -EINVAL;
29
30 err = g->ops.therm.get_internal_sensor_curr_temp(g, &readval);
31 if (!err)
32 *val = readval;
33
34 return err;
35}
36DEFINE_SIMPLE_ATTRIBUTE(therm_ctrl_fops, therm_get_internal_sensor_curr_temp, NULL, "%llu\n");
37
38int gp106_therm_init_debugfs(struct gk20a *g)
39{
40 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
41 struct dentry *dbgentry;
42
43 dbgentry = debugfs_create_file(
44 "temp", S_IRUGO, l->debugfs, g, &therm_ctrl_fops);
45 if (!dbgentry)
46 nvgpu_err(g, "debugfs entry create failed for therm_curr_temp");
47
48 return 0;
49}
diff --git a/include/os/linux/debug_therm_gp106.h b/include/os/linux/debug_therm_gp106.h
deleted file mode 100644
index 3e9380d..0000000
--- a/include/os/linux/debug_therm_gp106.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __DEBUG_THERM_GP106_H
18#define __DEBUG_THERM_GP106_H
19
20#ifdef CONFIG_DEBUG_FS
21int gp106_therm_init_debugfs(struct gk20a *g);
22#else
23static inline int gp106_therm_init_debugfs(struct gk20a *g)
24{
25 return 0;
26}
27#endif
28
29#endif
diff --git a/include/os/linux/debug_xve.c b/include/os/linux/debug_xve.c
deleted file mode 100644
index 128d316..0000000
--- a/include/os/linux/debug_xve.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <nvgpu/types.h>
16#include <nvgpu/xve.h>
17#include <nvgpu/timers.h>
18
19#include "debug_xve.h"
20#include "os_linux.h"
21
22#include <linux/debugfs.h>
23#include <linux/uaccess.h>
24
25static ssize_t xve_link_speed_write(struct file *filp,
26 const char __user *buff,
27 size_t len, loff_t *off)
28{
29 struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
30 char kbuff[16];
31 u32 buff_size, check_len;
32 u32 link_speed = 0;
33 int ret;
34
35 buff_size = min_t(size_t, 16, len);
36
37 memset(kbuff, 0, 16);
38 if (copy_from_user(kbuff, buff, buff_size))
39 return -EFAULT;
40
41 check_len = strlen("Gen1");
42 if (strncmp(kbuff, "Gen1", check_len) == 0)
43 link_speed = GPU_XVE_SPEED_2P5;
44 else if (strncmp(kbuff, "Gen2", check_len) == 0)
45 link_speed = GPU_XVE_SPEED_5P0;
46 else if (strncmp(kbuff, "Gen3", check_len) == 0)
47 link_speed = GPU_XVE_SPEED_8P0;
48 else
49 nvgpu_err(g, "%s: Unknown PCIe speed: %s",
50 __func__, kbuff);
51
52 if (!link_speed)
53 return -EINVAL;
54
55 /* Brief pause... To help rate limit this. */
56 nvgpu_msleep(250);
57
58 /*
59 * And actually set the speed. Yay.
60 */
61 ret = g->ops.xve.set_speed(g, link_speed);
62 if (ret)
63 return ret;
64
65 return len;
66}
67
68static int xve_link_speed_show(struct seq_file *s, void *unused)
69{
70 struct gk20a *g = s->private;
71 u32 speed;
72 int err;
73
74 err = g->ops.xve.get_speed(g, &speed);
75 if (err)
76 return err;
77
78 seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed));
79
80 return 0;
81}
82
83static int xve_link_speed_open(struct inode *inode, struct file *file)
84{
85 return single_open(file, xve_link_speed_show, inode->i_private);
86}
87
88static const struct file_operations xve_link_speed_fops = {
89 .open = xve_link_speed_open,
90 .read = seq_read,
91 .write = xve_link_speed_write,
92 .llseek = seq_lseek,
93 .release = single_release,
94};
95
96static int xve_available_speeds_show(struct seq_file *s, void *unused)
97{
98 struct gk20a *g = s->private;
99 u32 available_speeds;
100
101 g->ops.xve.available_speeds(g, &available_speeds);
102
103 seq_puts(s, "Available PCIe bus speeds:\n");
104 if (available_speeds & GPU_XVE_SPEED_2P5)
105 seq_puts(s, " Gen1\n");
106 if (available_speeds & GPU_XVE_SPEED_5P0)
107 seq_puts(s, " Gen2\n");
108 if (available_speeds & GPU_XVE_SPEED_8P0)
109 seq_puts(s, " Gen3\n");
110
111 return 0;
112}
113
114static int xve_available_speeds_open(struct inode *inode, struct file *file)
115{
116 return single_open(file, xve_available_speeds_show, inode->i_private);
117}
118
119static const struct file_operations xve_available_speeds_fops = {
120 .open = xve_available_speeds_open,
121 .read = seq_read,
122 .llseek = seq_lseek,
123 .release = single_release,
124};
125
126static int xve_link_control_status_show(struct seq_file *s, void *unused)
127{
128 struct gk20a *g = s->private;
129 u32 link_status;
130
131 link_status = g->ops.xve.get_link_control_status(g);
132 seq_printf(s, "0x%08x\n", link_status);
133
134 return 0;
135}
136
137static int xve_link_control_status_open(struct inode *inode, struct file *file)
138{
139 return single_open(file, xve_link_control_status_show, inode->i_private);
140}
141
142static const struct file_operations xve_link_control_status_fops = {
143 .open = xve_link_control_status_open,
144 .read = seq_read,
145 .llseek = seq_lseek,
146 .release = single_release,
147};
148
149int nvgpu_xve_debugfs_init(struct gk20a *g)
150{
151 int err = -ENODEV;
152
153 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
154 struct dentry *gpu_root = l->debugfs;
155
156 l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
157 if (IS_ERR_OR_NULL(l->debugfs_xve))
158 goto fail;
159
160 /*
161 * These are just debug nodes. If they fail to get made it's not worth
162 * worrying the higher level SW.
163 */
164 debugfs_create_file("link_speed", S_IRUGO,
165 l->debugfs_xve, g,
166 &xve_link_speed_fops);
167 debugfs_create_file("available_speeds", S_IRUGO,
168 l->debugfs_xve, g,
169 &xve_available_speeds_fops);
170 debugfs_create_file("link_control_status", S_IRUGO,
171 l->debugfs_xve, g,
172 &xve_link_control_status_fops);
173
174 err = 0;
175fail:
176 return err;
177}
diff --git a/include/os/linux/debug_xve.h b/include/os/linux/debug_xve.h
deleted file mode 100644
index f3b1ac5..0000000
--- a/include/os/linux/debug_xve.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_XVE_H__
16#define __NVGPU_DEBUG_XVE_H__
17
18struct gk20a;
19int nvgpu_xve_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_SVE_H__ */
diff --git a/include/os/linux/dmabuf.c b/include/os/linux/dmabuf.c
deleted file mode 100644
index e8e3313..0000000
--- a/include/os/linux/dmabuf.c
+++ /dev/null
@@ -1,219 +0,0 @@
1/*
2* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/device.h>
18#include <linux/dma-buf.h>
19#include <linux/scatterlist.h>
20
21#include <nvgpu/comptags.h>
22#include <nvgpu/enabled.h>
23#include <nvgpu/gk20a.h>
24
25#include <nvgpu/linux/vm.h>
26
27#include "gk20a/fence_gk20a.h"
28
29#include "platform_gk20a.h"
30#include "dmabuf.h"
31#include "os_linux.h"
32#include "dmabuf_vidmem.h"
33
34static void gk20a_mm_delete_priv(void *_priv)
35{
36 struct gk20a_buffer_state *s, *s_tmp;
37 struct gk20a_dmabuf_priv *priv = _priv;
38 struct gk20a *g;
39
40 if (!priv)
41 return;
42
43 g = priv->g;
44
45 if (priv->comptags.allocated && priv->comptags.lines) {
46 BUG_ON(!priv->comptag_allocator);
47 gk20a_comptaglines_free(priv->comptag_allocator,
48 priv->comptags.offset,
49 priv->comptags.lines);
50 }
51
52 /* Free buffer states */
53 nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
54 gk20a_buffer_state, list) {
55 gk20a_fence_put(s->fence);
56 nvgpu_list_del(&s->list);
57 nvgpu_kfree(g, s);
58 }
59
60 nvgpu_kfree(g, priv);
61}
62
63enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
64 struct dma_buf *dmabuf)
65{
66 struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
67 bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
68
69 if (buf_owner == NULL) {
70 /* Not nvgpu-allocated, assume system memory */
71 return APERTURE_SYSMEM;
72 } else if (WARN_ON(buf_owner == g && unified_memory)) {
73 /* Looks like our video memory, but this gpu doesn't support
74 * it. Warn about a bug and bail out */
75 nvgpu_warn(g,
76 "dmabuf is our vidmem but we don't have local vidmem");
77 return APERTURE_INVALID;
78 } else if (buf_owner != g) {
79 /* Someone else's vidmem */
80 return APERTURE_INVALID;
81 } else {
82 /* Yay, buf_owner == g */
83 return APERTURE_VIDMEM;
84 }
85}
86
87struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
88 struct dma_buf_attachment **attachment)
89{
90 struct gk20a_dmabuf_priv *priv;
91
92 priv = dma_buf_get_drvdata(dmabuf, dev);
93 if (WARN_ON(!priv))
94 return ERR_PTR(-EINVAL);
95
96 nvgpu_mutex_acquire(&priv->lock);
97
98 if (priv->pin_count == 0) {
99 priv->attach = dma_buf_attach(dmabuf, dev);
100 if (IS_ERR(priv->attach)) {
101 nvgpu_mutex_release(&priv->lock);
102 return (struct sg_table *)priv->attach;
103 }
104
105 priv->sgt = dma_buf_map_attachment(priv->attach,
106 DMA_BIDIRECTIONAL);
107 if (IS_ERR(priv->sgt)) {
108 dma_buf_detach(dmabuf, priv->attach);
109 nvgpu_mutex_release(&priv->lock);
110 return priv->sgt;
111 }
112 }
113
114 priv->pin_count++;
115 nvgpu_mutex_release(&priv->lock);
116 *attachment = priv->attach;
117 return priv->sgt;
118}
119
120void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
121 struct dma_buf_attachment *attachment,
122 struct sg_table *sgt)
123{
124 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
125 dma_addr_t dma_addr;
126
127 if (IS_ERR(priv) || !priv)
128 return;
129
130 nvgpu_mutex_acquire(&priv->lock);
131 WARN_ON(priv->sgt != sgt);
132 WARN_ON(priv->attach != attachment);
133 priv->pin_count--;
134 WARN_ON(priv->pin_count < 0);
135 dma_addr = sg_dma_address(priv->sgt->sgl);
136 if (priv->pin_count == 0) {
137 dma_buf_unmap_attachment(priv->attach, priv->sgt,
138 DMA_BIDIRECTIONAL);
139 dma_buf_detach(dmabuf, priv->attach);
140 }
141 nvgpu_mutex_release(&priv->lock);
142}
143
144int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
145{
146 struct gk20a *g = gk20a_get_platform(dev)->g;
147 struct gk20a_dmabuf_priv *priv;
148
149 priv = dma_buf_get_drvdata(dmabuf, dev);
150 if (likely(priv))
151 return 0;
152
153 nvgpu_mutex_acquire(&g->mm.priv_lock);
154 priv = dma_buf_get_drvdata(dmabuf, dev);
155 if (priv)
156 goto priv_exist_or_err;
157
158 priv = nvgpu_kzalloc(g, sizeof(*priv));
159 if (!priv) {
160 priv = ERR_PTR(-ENOMEM);
161 goto priv_exist_or_err;
162 }
163
164 nvgpu_mutex_init(&priv->lock);
165 nvgpu_init_list_node(&priv->states);
166 priv->g = g;
167 dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
168
169priv_exist_or_err:
170 nvgpu_mutex_release(&g->mm.priv_lock);
171 if (IS_ERR(priv))
172 return -ENOMEM;
173
174 return 0;
175}
176
177int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
178 u64 offset, struct gk20a_buffer_state **state)
179{
180 int err = 0;
181 struct gk20a_dmabuf_priv *priv;
182 struct gk20a_buffer_state *s;
183 struct device *dev = dev_from_gk20a(g);
184
185 if (WARN_ON(offset >= (u64)dmabuf->size))
186 return -EINVAL;
187
188 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
189 if (err)
190 return err;
191
192 priv = dma_buf_get_drvdata(dmabuf, dev);
193 if (WARN_ON(!priv))
194 return -ENOSYS;
195
196 nvgpu_mutex_acquire(&priv->lock);
197
198 nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
199 if (s->offset == offset)
200 goto out;
201
202 /* State not found, create state. */
203 s = nvgpu_kzalloc(g, sizeof(*s));
204 if (!s) {
205 err = -ENOMEM;
206 goto out;
207 }
208
209 s->offset = offset;
210 nvgpu_init_list_node(&s->list);
211 nvgpu_mutex_init(&s->lock);
212 nvgpu_list_add_tail(&s->list, &priv->states);
213
214out:
215 nvgpu_mutex_release(&priv->lock);
216 if (!err)
217 *state = s;
218 return err;
219}
diff --git a/include/os/linux/dmabuf.h b/include/os/linux/dmabuf.h
deleted file mode 100644
index 8399eaa..0000000
--- a/include/os/linux/dmabuf.h
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __COMMON_LINUX_DMABUF_H__
18#define __COMMON_LINUX_DMABUF_H__
19
20#include <nvgpu/comptags.h>
21#include <nvgpu/list.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/gmmu.h>
24
25struct sg_table;
26struct dma_buf;
27struct dma_buf_attachment;
28struct device;
29
30struct gk20a;
31struct gk20a_buffer_state;
32
33struct gk20a_dmabuf_priv {
34 struct nvgpu_mutex lock;
35
36 struct gk20a *g;
37
38 struct gk20a_comptag_allocator *comptag_allocator;
39 struct gk20a_comptags comptags;
40
41 struct dma_buf_attachment *attach;
42 struct sg_table *sgt;
43
44 int pin_count;
45
46 struct nvgpu_list_node states;
47
48 u64 buffer_id;
49};
50
51struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
52 struct dma_buf_attachment **attachment);
53void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
54 struct dma_buf_attachment *attachment,
55 struct sg_table *sgt);
56
57int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
58
59int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
60 u64 offset, struct gk20a_buffer_state **state);
61
62#endif
diff --git a/include/os/linux/dmabuf_vidmem.c b/include/os/linux/dmabuf_vidmem.c
deleted file mode 100644
index bada5dc..0000000
--- a/include/os/linux/dmabuf_vidmem.c
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18#include <linux/version.h>
19#include <uapi/linux/nvgpu.h>
20
21#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
22#include <linux/platform/tegra/tegra_fd.h>
23#endif
24
25#include <nvgpu/dma.h>
26#include <nvgpu/enabled.h>
27#include <nvgpu/vidmem.h>
28#include <nvgpu/nvgpu_mem.h>
29#include <nvgpu/page_allocator.h>
30#include <nvgpu/gk20a.h>
31
32#include <nvgpu/linux/vm.h>
33#include <nvgpu/linux/dma.h>
34
35#include "gk20a/mm_gk20a.h"
36#include "dmabuf_vidmem.h"
37
38bool nvgpu_addr_is_vidmem_page_alloc(u64 addr)
39{
40 return !!(addr & 1ULL);
41}
42
43void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr)
44{
45 /* set bit 0 to indicate vidmem allocation */
46 sg_dma_address(sgl) = (addr | 1ULL);
47}
48
49struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl)
50{
51 u64 addr;
52
53 addr = sg_dma_address(sgl);
54
55 if (nvgpu_addr_is_vidmem_page_alloc(addr))
56 addr = addr & ~1ULL;
57 else
58 WARN_ON(1);
59
60 return (struct nvgpu_page_alloc *)(uintptr_t)addr;
61}
62
63static struct sg_table *gk20a_vidbuf_map_dma_buf(
64 struct dma_buf_attachment *attach, enum dma_data_direction dir)
65{
66 struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv;
67
68 return buf->mem->priv.sgt;
69}
70
71static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
72 struct sg_table *sgt,
73 enum dma_data_direction dir)
74{
75}
76
77static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
78{
79 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
80 struct nvgpu_vidmem_linux *linux_buf = buf->priv;
81 struct gk20a *g = buf->g;
82
83 vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
84 dmabuf, buf->mem->size >> 10);
85
86 if (linux_buf && linux_buf->dmabuf_priv_delete)
87 linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv);
88
89 nvgpu_kfree(g, linux_buf);
90 nvgpu_vidmem_buf_free(g, buf);
91
92 gk20a_put(g);
93}
94
95static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
96{
97 WARN_ON("Not supported");
98 return NULL;
99}
100
101static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
102 unsigned long page_num)
103{
104 WARN_ON("Not supported");
105 return NULL;
106}
107
108static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
109{
110 return -EINVAL;
111}
112
113static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
114 struct device *dev, void *priv, void (*delete)(void *priv))
115{
116 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
117 struct nvgpu_vidmem_linux *linux_buf = buf->priv;
118
119 linux_buf->dmabuf_priv = priv;
120 linux_buf->dmabuf_priv_delete = delete;
121
122 return 0;
123}
124
125static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
126 struct device *dev)
127{
128 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
129 struct nvgpu_vidmem_linux *linux_buf = buf->priv;
130
131 return linux_buf->dmabuf_priv;
132}
133
134static const struct dma_buf_ops gk20a_vidbuf_ops = {
135 .map_dma_buf = gk20a_vidbuf_map_dma_buf,
136 .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf,
137 .release = gk20a_vidbuf_release,
138#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
139 .map_atomic = gk20a_vidbuf_kmap_atomic,
140 .map = gk20a_vidbuf_kmap,
141#else
142 .kmap_atomic = gk20a_vidbuf_kmap_atomic,
143 .kmap = gk20a_vidbuf_kmap,
144#endif
145 .mmap = gk20a_vidbuf_mmap,
146 .set_drvdata = gk20a_vidbuf_set_private,
147 .get_drvdata = gk20a_vidbuf_get_private,
148};
149
150static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf)
151{
152 DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
153
154 exp_info.priv = buf;
155 exp_info.ops = &gk20a_vidbuf_ops;
156 exp_info.size = buf->mem->size;
157 exp_info.flags = O_RDWR;
158
159 return dma_buf_export(&exp_info);
160}
161
162struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
163{
164 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
165
166 if (dmabuf->ops != &gk20a_vidbuf_ops)
167 return NULL;
168
169 return buf->g;
170}
171
172int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
173{
174 struct nvgpu_vidmem_buf *buf = NULL;
175 struct nvgpu_vidmem_linux *priv;
176 int err, fd;
177
178 /*
179 * This ref is released when the dma_buf is closed.
180 */
181 if (!gk20a_get(g))
182 return -ENODEV;
183
184 vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes);
185
186 priv = nvgpu_kzalloc(g, sizeof(*priv));
187 if (!priv) {
188 err = -ENOMEM;
189 goto fail;
190 }
191
192 buf = nvgpu_vidmem_user_alloc(g, bytes);
193 if (IS_ERR(buf)) {
194 err = PTR_ERR(buf);
195 goto fail;
196 }
197
198 priv->dmabuf = gk20a_vidbuf_export(buf);
199 if (IS_ERR(priv->dmabuf)) {
200 err = PTR_ERR(priv->dmabuf);
201 goto fail;
202 }
203
204 buf->priv = priv;
205
206#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
207 fd = tegra_alloc_fd(current->files, 1024, O_RDWR);
208#else
209 fd = get_unused_fd_flags(O_RDWR);
210#endif
211 if (fd < 0) {
212 /* ->release frees what we have done */
213 dma_buf_put(priv->dmabuf);
214 return fd;
215 }
216
217 /* fclose() on this drops one ref, freeing the dma buf */
218 fd_install(fd, priv->dmabuf->file);
219
220 vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
221 priv->dmabuf, buf->mem->size >> 10);
222
223 return fd;
224
225fail:
226 nvgpu_vidmem_buf_free(g, buf);
227 nvgpu_kfree(g, priv);
228 gk20a_put(g);
229
230 vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err);
231 return err;
232}
233
234int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
235 void *buffer, u64 offset, u64 size, u32 cmd)
236{
237 struct nvgpu_vidmem_buf *vidmem_buf;
238 struct nvgpu_mem *mem;
239 int err = 0;
240
241 if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM)
242 return -EINVAL;
243
244 vidmem_buf = dmabuf->priv;
245 mem = vidmem_buf->mem;
246
247 nvgpu_speculation_barrier();
248 switch (cmd) {
249 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
250 nvgpu_mem_rd_n(g, mem, offset, buffer, size);
251 break;
252
253 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE:
254 nvgpu_mem_wr_n(g, mem, offset, buffer, size);
255 break;
256
257 default:
258 err = -EINVAL;
259 }
260
261 return err;
262}
263
264void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem)
265{
266 nvgpu_free(vidmem->allocator,
267 (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
268 nvgpu_free_sgtable(g, &vidmem->priv.sgt);
269}
diff --git a/include/os/linux/dmabuf_vidmem.h b/include/os/linux/dmabuf_vidmem.h
deleted file mode 100644
index 977fd78..0000000
--- a/include/os/linux/dmabuf_vidmem.h
+++ /dev/null
@@ -1,78 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_LINUX_DMABUF_VIDMEM_H__
18#define __NVGPU_LINUX_DMABUF_VIDMEM_H__
19
20#include <nvgpu/types.h>
21
22struct dma_buf;
23
24struct gk20a;
25struct scatterlist;
26
27#ifdef CONFIG_GK20A_VIDMEM
28
29struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf);
30int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes);
31
32void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr);
33struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl);
34
35int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
36 void *buffer, u64 offset, u64 size, u32 cmd);
37
38#else /* !CONFIG_GK20A_VIDMEM */
39
40static inline struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
41{
42 return NULL;
43}
44
45static inline int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
46{
47 return -ENOSYS;
48}
49
50static inline void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl,
51 u64 addr)
52{
53}
54
55static inline struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(
56 struct scatterlist *sgl)
57{
58 return NULL;
59}
60
61static inline int nvgpu_vidmem_buf_access_memory(struct gk20a *g,
62 struct dma_buf *dmabuf,
63 void *buffer, u64 offset,
64 u64 size, u32 cmd)
65{
66 return -ENOSYS;
67}
68
69#endif
70
71
72struct nvgpu_vidmem_linux {
73 struct dma_buf *dmabuf;
74 void *dmabuf_priv;
75 void (*dmabuf_priv_delete)(void *);
76};
77
78#endif
diff --git a/include/os/linux/driver_common.c b/include/os/linux/driver_common.c
deleted file mode 100644
index 8f5872d..0000000
--- a/include/os/linux/driver_common.c
+++ /dev/null
@@ -1,400 +0,0 @@
1/*
2 * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/reboot.h>
18#include <linux/dma-mapping.h>
19#include <linux/mm.h>
20#include <linux/slab.h>
21#include <linux/of_platform.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <nvgpu/defaults.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/nvgpu_common.h>
27#include <nvgpu/soc.h>
28#include <nvgpu/bug.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/debug.h>
31#include <nvgpu/sizes.h>
32#include <nvgpu/gk20a.h>
33
34#include "platform_gk20a.h"
35#include "module.h"
36#include "os_linux.h"
37#include "sysfs.h"
38#include "ioctl.h"
39#include "gk20a/regops_gk20a.h"
40
41#define EMC3D_DEFAULT_RATIO 750
42
43void nvgpu_kernel_restart(void *cmd)
44{
45 kernel_restart(cmd);
46}
47
48static void nvgpu_init_vars(struct gk20a *g)
49{
50 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
51 struct device *dev = dev_from_gk20a(g);
52 struct gk20a_platform *platform = dev_get_drvdata(dev);
53
54 nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq);
55 nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq);
56
57 init_rwsem(&l->busy_lock);
58 nvgpu_rwsem_init(&g->deterministic_busy);
59
60 nvgpu_spinlock_init(&g->mc_enable_lock);
61
62 nvgpu_mutex_init(&platform->railgate_lock);
63 nvgpu_mutex_init(&g->dbg_sessions_lock);
64 nvgpu_mutex_init(&g->client_lock);
65 nvgpu_mutex_init(&g->power_lock);
66 nvgpu_mutex_init(&g->ctxsw_disable_lock);
67 nvgpu_mutex_init(&g->tpc_pg_lock);
68 nvgpu_mutex_init(&g->clk_arb_enable_lock);
69 nvgpu_mutex_init(&g->cg_pg_lock);
70
71 /* Init the clock req count to 0 */
72 nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
73
74 nvgpu_mutex_init(&l->ctrl.privs_lock);
75 nvgpu_init_list_node(&l->ctrl.privs);
76
77 l->regs_saved = l->regs;
78 l->bar1_saved = l->bar1;
79
80 g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
81
82 /* Set DMA parameters to allow larger sgt lists */
83 dev->dma_parms = &l->dma_parms;
84 dma_set_max_seg_size(dev, UINT_MAX);
85
86 /*
87 * A default of 16GB is the largest supported DMA size that is
88 * acceptable to all currently supported Tegra SoCs.
89 */
90 if (!platform->dma_mask)
91 platform->dma_mask = DMA_BIT_MASK(34);
92
93 dma_set_mask(dev, platform->dma_mask);
94 dma_set_coherent_mask(dev, platform->dma_mask);
95
96 nvgpu_init_list_node(&g->profiler_objects);
97
98 nvgpu_init_list_node(&g->boardobj_head);
99 nvgpu_init_list_node(&g->boardobjgrp_head);
100
101 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints);
102}
103
104static void nvgpu_init_gr_vars(struct gk20a *g)
105{
106 gk20a_init_gr(g);
107
108 nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
109 g->gr.max_comptag_mem = totalram_size_in_mb;
110}
111
112static void nvgpu_init_timeout(struct gk20a *g)
113{
114 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
115
116 g->timeouts_disabled_by_user = false;
117 nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
118
119 if (nvgpu_platform_is_silicon(g)) {
120 g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
121 } else if (nvgpu_platform_is_fpga(g)) {
122 g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA;
123 } else {
124 g->gr_idle_timeout_default = (u32)ULONG_MAX;
125 }
126 g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
127 g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
128}
129
130static void nvgpu_init_timeslice(struct gk20a *g)
131{
132 g->runlist_interleave = true;
133
134 g->timeslice_low_priority_us = 1300;
135 g->timeslice_medium_priority_us = 2600;
136 g->timeslice_high_priority_us = 5200;
137
138 g->min_timeslice_us = 1000;
139 g->max_timeslice_us = 50000;
140}
141
142static void nvgpu_init_pm_vars(struct gk20a *g)
143{
144 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
145 u32 i = 0;
146
147 /*
148 * Set up initial power settings. For non-slicon platforms, disable
149 * power features and for silicon platforms, read from platform data
150 */
151 g->slcg_enabled =
152 nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
153 g->blcg_enabled =
154 nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
155 g->elcg_enabled =
156 nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
157 g->elpg_enabled =
158 nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
159 g->aelpg_enabled =
160 nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
161 g->mscg_enabled =
162 nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
163 g->can_elpg =
164 nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
165
166 __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
167 nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
168 __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
169 nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
170 __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
171 nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
172
173 g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
174 g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
175#ifdef CONFIG_NVGPU_SUPPORT_CDE
176 g->has_cde = platform->has_cde;
177#endif
178 g->ptimer_src_freq = platform->ptimer_src_freq;
179 g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
180 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
181 g->can_tpc_powergate = platform->can_tpc_powergate;
182
183 for (i = 0; i < MAX_TPC_PG_CONFIGS; i++)
184 g->valid_tpc_mask[i] = platform->valid_tpc_mask[i];
185
186 g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
187 /* if default delay is not set, set default delay to 500msec */
188 if (platform->railgate_delay_init)
189 g->railgate_delay = platform->railgate_delay_init;
190 else
191 g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
192 __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
193
194 /* set default values to aelpg parameters */
195 g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
196 g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
197 g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
198 g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
199 g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
200
201 __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
202}
203
204static void nvgpu_init_vbios_vars(struct gk20a *g)
205{
206 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
207
208 __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
209 g->vbios_min_version = platform->vbios_min_version;
210}
211
212static void nvgpu_init_ltc_vars(struct gk20a *g)
213{
214 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
215
216 g->ltc_streamid = platform->ltc_streamid;
217}
218
219static void nvgpu_init_mm_vars(struct gk20a *g)
220{
221 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
222
223 g->mm.disable_bigpage = platform->disable_bigpage;
224 __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
225 platform->honors_aperture);
226 __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
227 platform->unified_memory);
228 __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
229 platform->unify_address_spaces);
230 __nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM,
231 platform->force_128K_pmu_vm);
232
233 nvgpu_mutex_init(&g->mm.tlb_lock);
234 nvgpu_mutex_init(&g->mm.priv_lock);
235}
236
237int nvgpu_probe(struct gk20a *g,
238 const char *debugfs_symlink,
239 const char *interface_name,
240 struct class *class)
241{
242 struct device *dev = dev_from_gk20a(g);
243 struct gk20a_platform *platform = dev_get_drvdata(dev);
244 int err = 0;
245 struct device_node *np = dev->of_node;
246 bool disable_l3_alloc = false;
247
248 nvgpu_init_vars(g);
249 nvgpu_init_gr_vars(g);
250 nvgpu_init_timeout(g);
251 nvgpu_init_timeslice(g);
252 nvgpu_init_pm_vars(g);
253 nvgpu_init_vbios_vars(g);
254 nvgpu_init_ltc_vars(g);
255 err = nvgpu_init_soc_vars(g);
256 if (err) {
257 nvgpu_err(g, "init soc vars failed");
258 return err;
259 }
260
261 /* Initialize the platform interface. */
262 err = platform->probe(dev);
263 if (err) {
264 if (err == -EPROBE_DEFER)
265 nvgpu_info(g, "platform probe failed");
266 else
267 nvgpu_err(g, "platform probe failed");
268 return err;
269 }
270
271 disable_l3_alloc = of_property_read_bool(np, "disable_l3_alloc");
272 if (disable_l3_alloc) {
273 nvgpu_log_info(g, "L3 alloc is disabled\n");
274 __nvgpu_set_enabled(g, NVGPU_DISABLE_L3_SUPPORT, true);
275 }
276
277 nvgpu_init_mm_vars(g);
278
279 /* platform probe can defer do user init only if probe succeeds */
280 err = gk20a_user_init(dev, interface_name, class);
281 if (err)
282 return err;
283
284 if (platform->late_probe) {
285 err = platform->late_probe(dev);
286 if (err) {
287 nvgpu_err(g, "late probe failed");
288 return err;
289 }
290 }
291
292 nvgpu_create_sysfs(dev);
293 gk20a_debug_init(g, debugfs_symlink);
294
295 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
296 if (!g->dbg_regops_tmp_buf) {
297 nvgpu_err(g, "couldn't allocate regops tmp buf");
298 return -ENOMEM;
299 }
300 g->dbg_regops_tmp_buf_ops =
301 SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
302
303 g->remove_support = gk20a_remove_support;
304
305 nvgpu_ref_init(&g->refcount);
306
307 return 0;
308}
309
310/**
311 * cyclic_delta - Returns delta of cyclic integers a and b.
312 *
313 * @a - First integer
314 * @b - Second integer
315 *
316 * Note: if a is ahead of b, delta is positive.
317 */
318static int cyclic_delta(int a, int b)
319{
320 return a - b;
321}
322
323/**
324 * nvgpu_wait_for_stall_interrupts - Wait for the stalling interrupts to
325 * complete.
326 *
327 * @g - The GPU to wait on.
328 * @timeout - maximum time period to wait for.
329 *
330 * Waits until all stalling interrupt handlers that have been scheduled to run
331 * have completed.
332 */
333int nvgpu_wait_for_stall_interrupts(struct gk20a *g, u32 timeout)
334{
335 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
336 int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count);
337
338 /* wait until all stalling irqs are handled */
339 return NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq,
340 cyclic_delta(stall_irq_threshold,
341 atomic_read(&l->sw_irq_stall_last_handled))
342 <= 0, timeout);
343}
344
345/**
346 * nvgpu_wait_for_nonstall_interrupts - Wait for the nonstalling interrupts to
347 * complete.
348 *
349 * @g - The GPU to wait on.
350 * @timeout - maximum time period to wait for.
351 *
352 * Waits until all non-stalling interrupt handlers that have been scheduled to
353 * run have completed.
354 */
355int nvgpu_wait_for_nonstall_interrupts(struct gk20a *g, u32 timeout)
356{
357 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
358 int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count);
359
360 /* wait until all non-stalling irqs are handled */
361 return NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq,
362 cyclic_delta(nonstall_irq_threshold,
363 atomic_read(&l->sw_irq_nonstall_last_handled))
364 <= 0, timeout);
365}
366
367/**
368 * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete
369 *
370 * @g - The GPU to wait on.
371 *
372 * Waits until all interrupt handlers that have been scheduled to run have
373 * completed.
374 */
375void nvgpu_wait_for_deferred_interrupts(struct gk20a *g)
376{
377 int ret;
378
379 ret = nvgpu_wait_for_stall_interrupts(g, 0U);
380 if (ret != 0) {
381 nvgpu_err(g, "wait for stall interrupts failed %d", ret);
382 }
383
384 ret = nvgpu_wait_for_nonstall_interrupts(g, 0U);
385 if (ret != 0) {
386 nvgpu_err(g, "wait for nonstall interrupts failed %d", ret);
387 }
388}
389
390static void nvgpu_free_gk20a(struct gk20a *g)
391{
392 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
393
394 kfree(l);
395}
396
397void nvgpu_init_gk20a(struct gk20a *g)
398{
399 g->free = nvgpu_free_gk20a;
400}
diff --git a/include/os/linux/driver_common.h b/include/os/linux/driver_common.h
deleted file mode 100644
index 6f42f77..0000000
--- a/include/os/linux/driver_common.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_LINUX_DRIVER_COMMON
18#define NVGPU_LINUX_DRIVER_COMMON
19
20void nvgpu_init_gk20a(struct gk20a *g);
21
22#endif
diff --git a/include/os/linux/dt.c b/include/os/linux/dt.c
deleted file mode 100644
index 88e391e..0000000
--- a/include/os/linux/dt.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/dt.h>
18#include <linux/of.h>
19
20#include "os_linux.h"
21
22int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
23 u32 index, u32 *value)
24{
25 struct device *dev = dev_from_gk20a(g);
26 struct device_node *np = dev->of_node;
27
28 return of_property_read_u32_index(np, name, index, value);
29}
diff --git a/include/os/linux/ecc_linux.h b/include/os/linux/ecc_linux.h
deleted file mode 100644
index 7e0f650..0000000
--- a/include/os/linux/ecc_linux.h
+++ /dev/null
@@ -1,49 +0,0 @@
1/*
2 *
3 * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#ifndef NVGPU_OS_ECC_LINUX_H
25#define NVGPU_OS_ECC_LINUX_H
26
27#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
28
29#include <linux/tegra_l1ss_kernel_interface.h>
30#include <linux/tegra_l1ss_ioctl.h>
31#include <linux/tegra_nv_guard_service_id.h>
32#include <linux/tegra_nv_guard_group_id.h>
33
34#include <nvgpu/nvgpu_err.h>
35
36struct nvgpu_ecc_reporting_linux {
37 struct nvgpu_ecc_reporting common;
38 client_param_t priv;
39};
40
41static inline struct nvgpu_ecc_reporting_linux *get_ecc_reporting_linux(
42 struct nvgpu_ecc_reporting *ecc_report)
43{
44 return container_of(ecc_report, struct nvgpu_ecc_reporting_linux, common);
45}
46
47#endif /* CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING */
48
49#endif \ No newline at end of file
diff --git a/include/os/linux/ecc_sysfs.c b/include/os/linux/ecc_sysfs.c
deleted file mode 100644
index 73ae3dc..0000000
--- a/include/os/linux/ecc_sysfs.c
+++ /dev/null
@@ -1,80 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/ecc.h>
18#include <nvgpu/gk20a.h>
19
20#include "os_linux.h"
21
22int nvgpu_ecc_sysfs_init(struct gk20a *g)
23{
24 struct device *dev = dev_from_gk20a(g);
25 struct nvgpu_ecc *ecc = &g->ecc;
26 struct dev_ext_attribute *attr;
27 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
28 struct nvgpu_ecc_stat *stat;
29 int i = 0, err;
30
31 attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
32 if (!attr)
33 return -ENOMEM;
34
35 nvgpu_list_for_each_entry(stat,
36 &ecc->stats_list, nvgpu_ecc_stat, node) {
37 if (i >= ecc->stats_count) {
38 err = -EINVAL;
39 nvgpu_err(g, "stats_list longer than stats_count %d",
40 ecc->stats_count);
41 break;
42 }
43 sysfs_attr_init(&attr[i].attr.attr);
44 attr[i].attr.attr.name = stat->name;
45 attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
46 attr[i].var = &stat->counter;
47 attr[i].attr.show = device_show_int;
48 err = device_create_file(dev, &attr[i].attr);
49 if (err) {
50 nvgpu_err(g, "sysfs node create failed for %s\n",
51 stat->name);
52 break;
53 }
54 i++;
55 }
56
57 if (err) {
58 while (i-- > 0)
59 device_remove_file(dev, &attr[i].attr);
60 nvgpu_kfree(g, attr);
61 return err;
62 }
63
64 l->ecc_attrs = attr;
65
66 return 0;
67}
68
69void nvgpu_ecc_sysfs_remove(struct gk20a *g)
70{
71 struct device *dev = dev_from_gk20a(g);
72 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
73 struct nvgpu_ecc *ecc = &g->ecc;
74 int i;
75
76 for (i = 0; i < ecc->stats_count; i++)
77 device_remove_file(dev, &l->ecc_attrs[i].attr);
78 nvgpu_kfree(g, l->ecc_attrs);
79 l->ecc_attrs = NULL;
80}
diff --git a/include/os/linux/firmware.c b/include/os/linux/firmware.c
deleted file mode 100644
index 8f0344b..0000000
--- a/include/os/linux/firmware.c
+++ /dev/null
@@ -1,117 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/firmware.h>
18
19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h>
21#include <nvgpu/firmware.h>
22#include <nvgpu/gk20a.h>
23
24#include "platform_gk20a.h"
25#include "os_linux.h"
26
27static const struct firmware *do_request_firmware(struct device *dev,
28 const char *prefix, const char *fw_name, int flags)
29{
30 const struct firmware *fw;
31 char *fw_path = NULL;
32 int path_len, err;
33
34 if (prefix) {
35 path_len = strlen(prefix) + strlen(fw_name);
36 path_len += 2; /* for the path separator and zero terminator*/
37
38 fw_path = nvgpu_kzalloc(get_gk20a(dev),
39 sizeof(*fw_path) * path_len);
40 if (!fw_path)
41 return NULL;
42
43 sprintf(fw_path, "%s/%s", prefix, fw_name);
44 fw_name = fw_path;
45 }
46
47 if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
48 err = request_firmware_direct(&fw, fw_name, dev);
49 else
50 err = request_firmware(&fw, fw_name, dev);
51
52 nvgpu_kfree(get_gk20a(dev), fw_path);
53 if (err)
54 return NULL;
55 return fw;
56}
57
58/* This is a simple wrapper around request_firmware that takes 'fw_name' and
59 * applies an IP specific relative path prefix to it. The caller is
60 * responsible for calling nvgpu_release_firmware later. */
61struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
62 const char *fw_name,
63 int flags)
64{
65 struct device *dev = dev_from_gk20a(g);
66 struct nvgpu_firmware *fw;
67 const struct firmware *linux_fw;
68
69 /* current->fs is NULL when calling from SYS_EXIT.
70 Add a check here to prevent crash in request_firmware */
71 if (!current->fs || !fw_name)
72 return NULL;
73
74 fw = nvgpu_kzalloc(g, sizeof(*fw));
75 if (!fw)
76 return NULL;
77
78 linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
79
80#ifdef CONFIG_TEGRA_GK20A
81 /* TO BE REMOVED - Support loading from legacy SOC specific path. */
82 if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
83 struct gk20a_platform *platform = gk20a_get_platform(dev);
84 linux_fw = do_request_firmware(dev,
85 platform->soc_name, fw_name, flags);
86 }
87#endif
88
89 if (!linux_fw)
90 goto err;
91
92 fw->data = nvgpu_kmalloc(g, linux_fw->size);
93 if (!fw->data)
94 goto err_release;
95
96 memcpy(fw->data, linux_fw->data, linux_fw->size);
97 fw->size = linux_fw->size;
98
99 release_firmware(linux_fw);
100
101 return fw;
102
103err_release:
104 release_firmware(linux_fw);
105err:
106 nvgpu_kfree(g, fw);
107 return NULL;
108}
109
110void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
111{
112 if(!fw)
113 return;
114
115 nvgpu_kfree(g, fw->data);
116 nvgpu_kfree(g, fw);
117}
diff --git a/include/os/linux/fuse.c b/include/os/linux/fuse.c
deleted file mode 100644
index 27851f9..0000000
--- a/include/os/linux/fuse.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <soc/tegra/fuse.h>
15
16#include <nvgpu/fuse.h>
17
18int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g)
19{
20 return tegra_sku_info.gpu_speedo_id;
21}
22
23/*
24 * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
25 * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
26 */
27void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
28{
29 tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
30}
31
32void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
33{
34 tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
35}
36
37void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
38{
39 tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
40}
41
42void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
43{
44 tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
45}
46
47int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
48{
49 return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
50}
51
52int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
53{
54 return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
55}
diff --git a/include/os/linux/intr.c b/include/os/linux/intr.c
deleted file mode 100644
index 8838b72..0000000
--- a/include/os/linux/intr.c
+++ /dev/null
@@ -1,136 +0,0 @@
1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <trace/events/gk20a.h>
15#include <linux/irqreturn.h>
16
17#include <nvgpu/gk20a.h>
18
19#include <nvgpu/atomic.h>
20#include <nvgpu/unit.h>
21#include "os_linux.h"
22
23irqreturn_t nvgpu_intr_stall(struct gk20a *g)
24{
25 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
26 u32 mc_intr_0;
27
28 trace_mc_gk20a_intr_stall(g->name);
29
30 if (!g->power_on)
31 return IRQ_NONE;
32
33 /* not from gpu when sharing irq with others */
34 mc_intr_0 = g->ops.mc.intr_stall(g);
35 if (unlikely(!mc_intr_0))
36 return IRQ_NONE;
37
38 g->ops.mc.intr_stall_pause(g);
39
40 atomic_inc(&l->hw_irq_stall_count);
41
42 trace_mc_gk20a_intr_stall_done(g->name);
43
44 return IRQ_WAKE_THREAD;
45}
46
47irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
48{
49 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
50 int hw_irq_count;
51
52 nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
53
54 trace_mc_gk20a_intr_thread_stall(g->name);
55
56 hw_irq_count = atomic_read(&l->hw_irq_stall_count);
57 g->ops.mc.isr_stall(g);
58 g->ops.mc.intr_stall_resume(g);
59 /* sync handled irq counter before re-enabling interrupts */
60 atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count);
61
62 nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq);
63
64 trace_mc_gk20a_intr_thread_stall_done(g->name);
65
66 return IRQ_HANDLED;
67}
68
69irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
70{
71 u32 non_stall_intr_val;
72 u32 hw_irq_count;
73 int ops_old, ops_new, ops = 0;
74 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
75
76 if (!g->power_on)
77 return IRQ_NONE;
78
79 /* not from gpu when sharing irq with others */
80 non_stall_intr_val = g->ops.mc.intr_nonstall(g);
81 if (unlikely(!non_stall_intr_val))
82 return IRQ_NONE;
83
84 g->ops.mc.intr_nonstall_pause(g);
85
86 ops = g->ops.mc.isr_nonstall(g);
87 if (ops) {
88 do {
89 ops_old = atomic_read(&l->nonstall_ops);
90 ops_new = ops_old | ops;
91 } while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
92 ops_old, ops_new));
93
94 queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
95 }
96
97 hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count);
98
99 /* sync handled irq counter before re-enabling interrupts */
100 atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count);
101
102 g->ops.mc.intr_nonstall_resume(g);
103
104 nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq);
105
106 return IRQ_HANDLED;
107}
108
109static void mc_gk20a_handle_intr_nonstall(struct gk20a *g, u32 ops)
110{
111 bool semaphore_wakeup, post_events;
112
113 semaphore_wakeup =
114 (((ops & GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ?
115 true : false);
116 post_events = (((ops & GK20A_NONSTALL_OPS_POST_EVENTS) != 0U) ?
117 true: false);
118
119 if (semaphore_wakeup) {
120 g->ops.semaphore_wakeup(g, post_events);
121 }
122}
123
124void nvgpu_intr_nonstall_cb(struct work_struct *work)
125{
126 struct nvgpu_os_linux *l =
127 container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
128 struct gk20a *g = &l->g;
129
130 do {
131 u32 ops;
132
133 ops = atomic_xchg(&l->nonstall_ops, 0);
134 mc_gk20a_handle_intr_nonstall(g, ops);
135 } while (atomic_read(&l->nonstall_ops) != 0);
136}
diff --git a/include/os/linux/intr.h b/include/os/linux/intr.h
deleted file mode 100644
index d43cdcc..0000000
--- a/include/os/linux/intr.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#ifndef __NVGPU_LINUX_INTR_H__
15#define __NVGPU_LINUX_INTR_H__
16struct gk20a;
17
18irqreturn_t nvgpu_intr_stall(struct gk20a *g);
19irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g);
20irqreturn_t nvgpu_intr_nonstall(struct gk20a *g);
21void nvgpu_intr_nonstall_cb(struct work_struct *work);
22#endif
diff --git a/include/os/linux/io.c b/include/os/linux/io.c
deleted file mode 100644
index 3e84e88..0000000
--- a/include/os/linux/io.c
+++ /dev/null
@@ -1,130 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/io.h>
15#include <nvgpu/types.h>
16#include <nvgpu/gk20a.h>
17
18#include "os_linux.h"
19
20void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
21{
22 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
23
24 if (unlikely(!l->regs)) {
25 __gk20a_warn_on_no_regs();
26 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
27 } else {
28 writel_relaxed(v, l->regs + r);
29 nvgpu_wmb();
30 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
31 }
32}
33
34void nvgpu_writel_relaxed(struct gk20a *g, u32 r, u32 v)
35{
36 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
37
38 if (unlikely(!l->regs)) {
39 __gk20a_warn_on_no_regs();
40 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
41 } else {
42 writel_relaxed(v, l->regs + r);
43 }
44}
45
46u32 nvgpu_readl(struct gk20a *g, u32 r)
47{
48 u32 v = __nvgpu_readl(g, r);
49
50 if (v == 0xffffffff)
51 __nvgpu_check_gpu_state(g);
52
53 return v;
54}
55
56u32 __nvgpu_readl(struct gk20a *g, u32 r)
57{
58 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
59 u32 v = 0xffffffff;
60
61 if (unlikely(!l->regs)) {
62 __gk20a_warn_on_no_regs();
63 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
64 } else {
65 v = readl(l->regs + r);
66 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
67 }
68
69 return v;
70}
71
72void nvgpu_writel_loop(struct gk20a *g, u32 r, u32 v)
73{
74 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
75
76 if (unlikely(!l->regs)) {
77 __gk20a_warn_on_no_regs();
78 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
79 } else {
80 nvgpu_wmb();
81 do {
82 writel_relaxed(v, l->regs + r);
83 } while (readl(l->regs + r) != v);
84 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
85 }
86}
87
88void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
89{
90 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
91
92 if (unlikely(!l->bar1)) {
93 __gk20a_warn_on_no_regs();
94 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
95 } else {
96 nvgpu_wmb();
97 writel_relaxed(v, l->bar1 + b);
98 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
99 }
100}
101
102u32 nvgpu_bar1_readl(struct gk20a *g, u32 b)
103{
104 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
105 u32 v = 0xffffffff;
106
107 if (unlikely(!l->bar1)) {
108 __gk20a_warn_on_no_regs();
109 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
110 } else {
111 v = readl(l->bar1 + b);
112 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
113 }
114
115 return v;
116}
117
118bool nvgpu_io_exists(struct gk20a *g)
119{
120 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
121
122 return l->regs != NULL;
123}
124
125bool nvgpu_io_valid_reg(struct gk20a *g, u32 r)
126{
127 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
128
129 return r < resource_size(l->regs);
130}
diff --git a/include/os/linux/io_usermode.c b/include/os/linux/io_usermode.c
deleted file mode 100644
index ffc532f..0000000
--- a/include/os/linux/io_usermode.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/io.h>
15#include <nvgpu/types.h>
16#include <nvgpu/gk20a.h>
17
18#include "os_linux.h"
19
20#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
21
22void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
23{
24 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
25 void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r());
26
27 writel_relaxed(v, reg);
28 nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
29}
diff --git a/include/os/linux/ioctl.c b/include/os/linux/ioctl.c
deleted file mode 100644
index a40df2a..0000000
--- a/include/os/linux/ioctl.c
+++ /dev/null
@@ -1,297 +0,0 @@
1/*
2 * NVGPU IOCTLs
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/file.h>
20
21#include <nvgpu/nvgpu_common.h>
22#include <nvgpu/ctxsw_trace.h>
23#include <nvgpu/gk20a.h>
24
25#include "gk20a/dbg_gpu_gk20a.h"
26
27#include "ioctl_channel.h"
28#include "ioctl_ctrl.h"
29#include "ioctl_as.h"
30#include "ioctl_tsg.h"
31#include "ioctl_dbg.h"
32#include "module.h"
33#include "os_linux.h"
34#include "ctxsw_trace.h"
35#include "platform_gk20a.h"
36
37#define GK20A_NUM_CDEVS 7
38
39const struct file_operations gk20a_channel_ops = {
40 .owner = THIS_MODULE,
41 .release = gk20a_channel_release,
42 .open = gk20a_channel_open,
43#ifdef CONFIG_COMPAT
44 .compat_ioctl = gk20a_channel_ioctl,
45#endif
46 .unlocked_ioctl = gk20a_channel_ioctl,
47};
48
49static const struct file_operations gk20a_ctrl_ops = {
50 .owner = THIS_MODULE,
51 .release = gk20a_ctrl_dev_release,
52 .open = gk20a_ctrl_dev_open,
53 .unlocked_ioctl = gk20a_ctrl_dev_ioctl,
54#ifdef CONFIG_COMPAT
55 .compat_ioctl = gk20a_ctrl_dev_ioctl,
56#endif
57 .mmap = gk20a_ctrl_dev_mmap,
58};
59
60static const struct file_operations gk20a_dbg_ops = {
61 .owner = THIS_MODULE,
62 .release = gk20a_dbg_gpu_dev_release,
63 .open = gk20a_dbg_gpu_dev_open,
64 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
65 .poll = gk20a_dbg_gpu_dev_poll,
66#ifdef CONFIG_COMPAT
67 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
68#endif
69};
70
71static const struct file_operations gk20a_as_ops = {
72 .owner = THIS_MODULE,
73 .release = gk20a_as_dev_release,
74 .open = gk20a_as_dev_open,
75#ifdef CONFIG_COMPAT
76 .compat_ioctl = gk20a_as_dev_ioctl,
77#endif
78 .unlocked_ioctl = gk20a_as_dev_ioctl,
79};
80
81/*
82 * Note: We use a different 'open' to trigger handling of the profiler session.
83 * Most of the code is shared between them... Though, at some point if the
84 * code does get too tangled trying to handle each in the same path we can
85 * separate them cleanly.
86 */
87static const struct file_operations gk20a_prof_ops = {
88 .owner = THIS_MODULE,
89 .release = gk20a_dbg_gpu_dev_release,
90 .open = gk20a_prof_gpu_dev_open,
91 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
92#ifdef CONFIG_COMPAT
93 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
94#endif
95};
96
97static const struct file_operations gk20a_tsg_ops = {
98 .owner = THIS_MODULE,
99 .release = nvgpu_ioctl_tsg_dev_release,
100 .open = nvgpu_ioctl_tsg_dev_open,
101#ifdef CONFIG_COMPAT
102 .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
103#endif
104 .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
105};
106
107#ifdef CONFIG_GK20A_CTXSW_TRACE
108static const struct file_operations gk20a_ctxsw_ops = {
109 .owner = THIS_MODULE,
110 .release = gk20a_ctxsw_dev_release,
111 .open = gk20a_ctxsw_dev_open,
112#ifdef CONFIG_COMPAT
113 .compat_ioctl = gk20a_ctxsw_dev_ioctl,
114#endif
115 .unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
116 .poll = gk20a_ctxsw_dev_poll,
117 .read = gk20a_ctxsw_dev_read,
118 .mmap = gk20a_ctxsw_dev_mmap,
119};
120#endif
121
122static const struct file_operations gk20a_sched_ops = {
123 .owner = THIS_MODULE,
124 .release = gk20a_sched_dev_release,
125 .open = gk20a_sched_dev_open,
126#ifdef CONFIG_COMPAT
127 .compat_ioctl = gk20a_sched_dev_ioctl,
128#endif
129 .unlocked_ioctl = gk20a_sched_dev_ioctl,
130 .poll = gk20a_sched_dev_poll,
131 .read = gk20a_sched_dev_read,
132};
133
134static int gk20a_create_device(
135 struct device *dev, int devno,
136 const char *interface_name, const char *cdev_name,
137 struct cdev *cdev, struct device **out,
138 const struct file_operations *ops,
139 struct class *class)
140{
141 struct device *subdev;
142 int err;
143 struct gk20a *g = gk20a_from_dev(dev);
144
145 nvgpu_log_fn(g, " ");
146
147 cdev_init(cdev, ops);
148 cdev->owner = THIS_MODULE;
149
150 err = cdev_add(cdev, devno, 1);
151 if (err) {
152 dev_err(dev, "failed to add %s cdev\n", cdev_name);
153 return err;
154 }
155
156 subdev = device_create(class, NULL, devno, NULL,
157 interface_name, cdev_name);
158
159 if (IS_ERR(subdev)) {
160 err = PTR_ERR(dev);
161 cdev_del(cdev);
162 dev_err(dev, "failed to create %s device for %s\n",
163 cdev_name, dev_name(dev));
164 return err;
165 }
166
167 *out = subdev;
168 return 0;
169}
170
171void gk20a_user_deinit(struct device *dev, struct class *class)
172{
173 struct gk20a *g = gk20a_from_dev(dev);
174 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
175
176 if (l->channel.node) {
177 device_destroy(class, l->channel.cdev.dev);
178 cdev_del(&l->channel.cdev);
179 }
180
181 if (l->as_dev.node) {
182 device_destroy(class, l->as_dev.cdev.dev);
183 cdev_del(&l->as_dev.cdev);
184 }
185
186 if (l->ctrl.node) {
187 device_destroy(class, l->ctrl.cdev.dev);
188 cdev_del(&l->ctrl.cdev);
189 }
190
191 if (l->dbg.node) {
192 device_destroy(class, l->dbg.cdev.dev);
193 cdev_del(&l->dbg.cdev);
194 }
195
196 if (l->prof.node) {
197 device_destroy(class, l->prof.cdev.dev);
198 cdev_del(&l->prof.cdev);
199 }
200
201 if (l->tsg.node) {
202 device_destroy(class, l->tsg.cdev.dev);
203 cdev_del(&l->tsg.cdev);
204 }
205
206 if (l->ctxsw.node) {
207 device_destroy(class, l->ctxsw.cdev.dev);
208 cdev_del(&l->ctxsw.cdev);
209 }
210
211 if (l->sched.node) {
212 device_destroy(class, l->sched.cdev.dev);
213 cdev_del(&l->sched.cdev);
214 }
215
216 if (l->cdev_region)
217 unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS);
218}
219
220int gk20a_user_init(struct device *dev, const char *interface_name,
221 struct class *class)
222{
223 int err;
224 dev_t devno;
225 struct gk20a *g = gk20a_from_dev(dev);
226 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
227
228 err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
229 if (err) {
230 dev_err(dev, "failed to allocate devno\n");
231 goto fail;
232 }
233 l->cdev_region = devno;
234
235 err = gk20a_create_device(dev, devno++, interface_name, "",
236 &l->channel.cdev, &l->channel.node,
237 &gk20a_channel_ops,
238 class);
239 if (err)
240 goto fail;
241
242 err = gk20a_create_device(dev, devno++, interface_name, "-as",
243 &l->as_dev.cdev, &l->as_dev.node,
244 &gk20a_as_ops,
245 class);
246 if (err)
247 goto fail;
248
249 err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
250 &l->ctrl.cdev, &l->ctrl.node,
251 &gk20a_ctrl_ops,
252 class);
253 if (err)
254 goto fail;
255
256 err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
257 &l->dbg.cdev, &l->dbg.node,
258 &gk20a_dbg_ops,
259 class);
260 if (err)
261 goto fail;
262
263 err = gk20a_create_device(dev, devno++, interface_name, "-prof",
264 &l->prof.cdev, &l->prof.node,
265 &gk20a_prof_ops,
266 class);
267 if (err)
268 goto fail;
269
270 err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
271 &l->tsg.cdev, &l->tsg.node,
272 &gk20a_tsg_ops,
273 class);
274 if (err)
275 goto fail;
276
277#if defined(CONFIG_GK20A_CTXSW_TRACE)
278 err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
279 &l->ctxsw.cdev, &l->ctxsw.node,
280 &gk20a_ctxsw_ops,
281 class);
282 if (err)
283 goto fail;
284#endif
285
286 err = gk20a_create_device(dev, devno++, interface_name, "-sched",
287 &l->sched.cdev, &l->sched.node,
288 &gk20a_sched_ops,
289 class);
290 if (err)
291 goto fail;
292
293 return 0;
294fail:
295 gk20a_user_deinit(dev, &nvgpu_class);
296 return err;
297}
diff --git a/include/os/linux/ioctl.h b/include/os/linux/ioctl.h
deleted file mode 100644
index 7bf1671..0000000
--- a/include/os/linux/ioctl.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_IOCTL_H__
14#define __NVGPU_IOCTL_H__
15
16struct device;
17struct class;
18
19int gk20a_user_init(struct device *dev, const char *interface_name,
20 struct class *class);
21void gk20a_user_deinit(struct device *dev, struct class *class);
22
23#endif
diff --git a/include/os/linux/ioctl_as.c b/include/os/linux/ioctl_as.c
deleted file mode 100644
index f0cec17..0000000
--- a/include/os/linux/ioctl_as.c
+++ /dev/null
@@ -1,427 +0,0 @@
1/*
2 * GK20A Address Spaces
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/cdev.h>
17#include <linux/uaccess.h>
18#include <linux/fs.h>
19
20#include <trace/events/gk20a.h>
21
22#include <uapi/linux/nvgpu.h>
23
24#include <nvgpu/gmmu.h>
25#include <nvgpu/vm_area.h>
26#include <nvgpu/log2.h>
27#include <nvgpu/gk20a.h>
28#include <nvgpu/channel.h>
29
30#include <nvgpu/linux/vm.h>
31
32#include "platform_gk20a.h"
33#include "ioctl_as.h"
34#include "os_linux.h"
35
36static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
37{
38 u32 core_flags = 0;
39
40 if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
41 core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
42 if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
43 core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
44
45 return core_flags;
46}
47
48static int gk20a_as_ioctl_bind_channel(
49 struct gk20a_as_share *as_share,
50 struct nvgpu_as_bind_channel_args *args)
51{
52 int err = 0;
53 struct channel_gk20a *ch;
54 struct gk20a *g = gk20a_from_vm(as_share->vm);
55
56 nvgpu_log_fn(g, " ");
57
58 ch = gk20a_get_channel_from_file(args->channel_fd);
59 if (!ch)
60 return -EINVAL;
61
62 if (gk20a_channel_as_bound(ch)) {
63 err = -EINVAL;
64 goto out;
65 }
66
67 /* this will set channel_gk20a->vm */
68 err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
69
70out:
71 gk20a_channel_put(ch);
72 return err;
73}
74
75static int gk20a_as_ioctl_alloc_space(
76 struct gk20a_as_share *as_share,
77 struct nvgpu_as_alloc_space_args *args)
78{
79 struct gk20a *g = gk20a_from_vm(as_share->vm);
80
81 nvgpu_log_fn(g, " ");
82 return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
83 &args->o_a.offset,
84 gk20a_as_translate_as_alloc_space_flags(g,
85 args->flags));
86}
87
88static int gk20a_as_ioctl_free_space(
89 struct gk20a_as_share *as_share,
90 struct nvgpu_as_free_space_args *args)
91{
92 struct gk20a *g = gk20a_from_vm(as_share->vm);
93
94 nvgpu_log_fn(g, " ");
95 return nvgpu_vm_area_free(as_share->vm, args->offset);
96}
97
98static int gk20a_as_ioctl_map_buffer_ex(
99 struct gk20a_as_share *as_share,
100 struct nvgpu_as_map_buffer_ex_args *args)
101{
102 struct gk20a *g = gk20a_from_vm(as_share->vm);
103
104 nvgpu_log_fn(g, " ");
105
106 /* unsupported, direct kind control must be used */
107 if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
108 struct gk20a *g = as_share->vm->mm->g;
109 nvgpu_log_info(g, "Direct kind control must be requested");
110 return -EINVAL;
111 }
112
113 return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
114 &args->offset, args->flags,
115 args->page_size,
116 args->compr_kind,
117 args->incompr_kind,
118 args->buffer_offset,
119 args->mapping_size,
120 NULL);
121}
122
123static int gk20a_as_ioctl_unmap_buffer(
124 struct gk20a_as_share *as_share,
125 struct nvgpu_as_unmap_buffer_args *args)
126{
127 struct gk20a *g = gk20a_from_vm(as_share->vm);
128
129 nvgpu_log_fn(g, " ");
130
131 nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
132
133 return 0;
134}
135
136static int gk20a_as_ioctl_map_buffer_batch(
137 struct gk20a_as_share *as_share,
138 struct nvgpu_as_map_buffer_batch_args *args)
139{
140 struct gk20a *g = gk20a_from_vm(as_share->vm);
141 u32 i;
142 int err = 0;
143
144 struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
145 (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
146 args->unmaps;
147 struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
148 (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
149 args->maps;
150
151 struct vm_gk20a_mapping_batch batch;
152
153 nvgpu_log_fn(g, " ");
154
155 if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
156 args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
157 return -EINVAL;
158
159 nvgpu_vm_mapping_batch_start(&batch);
160
161 for (i = 0; i < args->num_unmaps; ++i) {
162 struct nvgpu_as_unmap_buffer_args unmap_args;
163
164 if (copy_from_user(&unmap_args, &user_unmap_args[i],
165 sizeof(unmap_args))) {
166 err = -EFAULT;
167 break;
168 }
169
170 nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
171 }
172
173 nvgpu_speculation_barrier();
174 if (err) {
175 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
176
177 args->num_unmaps = i;
178 args->num_maps = 0;
179 return err;
180 }
181
182 for (i = 0; i < args->num_maps; ++i) {
183 s16 compressible_kind;
184 s16 incompressible_kind;
185
186 struct nvgpu_as_map_buffer_ex_args map_args;
187 memset(&map_args, 0, sizeof(map_args));
188
189 if (copy_from_user(&map_args, &user_map_args[i],
190 sizeof(map_args))) {
191 err = -EFAULT;
192 break;
193 }
194
195 if (map_args.flags &
196 NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
197 compressible_kind = map_args.compr_kind;
198 incompressible_kind = map_args.incompr_kind;
199 } else {
200 /* direct kind control must be used */
201 err = -EINVAL;
202 break;
203 }
204
205 err = nvgpu_vm_map_buffer(
206 as_share->vm, map_args.dmabuf_fd,
207 &map_args.offset, map_args.flags, map_args.page_size,
208 compressible_kind, incompressible_kind,
209 map_args.buffer_offset,
210 map_args.mapping_size,
211 &batch);
212 if (err)
213 break;
214 }
215
216 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
217
218 if (err)
219 args->num_maps = i;
220 /* note: args->num_unmaps will be unmodified, which is ok
221 * since all unmaps are done */
222
223 return err;
224}
225
226static int gk20a_as_ioctl_get_va_regions(
227 struct gk20a_as_share *as_share,
228 struct nvgpu_as_get_va_regions_args *args)
229{
230 unsigned int i;
231 unsigned int write_entries;
232 struct nvgpu_as_va_region __user *user_region_ptr;
233 struct vm_gk20a *vm = as_share->vm;
234 struct gk20a *g = gk20a_from_vm(vm);
235 unsigned int page_sizes = GMMU_PAGE_SIZE_KERNEL;
236
237 nvgpu_log_fn(g, " ");
238
239 if (!vm->big_pages)
240 page_sizes--;
241
242 write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
243 if (write_entries > page_sizes)
244 write_entries = page_sizes;
245
246 user_region_ptr =
247 (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
248
249 for (i = 0; i < write_entries; ++i) {
250 struct nvgpu_as_va_region region;
251 struct nvgpu_allocator *vma = vm->vma[i];
252
253 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
254
255 region.page_size = vm->gmmu_page_sizes[i];
256 region.offset = nvgpu_alloc_base(vma);
257 /* No __aeabi_uldivmod() on some platforms... */
258 region.pages = (nvgpu_alloc_end(vma) -
259 nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
260
261 if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
262 return -EFAULT;
263 }
264
265 args->buf_size =
266 page_sizes * sizeof(struct nvgpu_as_va_region);
267
268 return 0;
269}
270
271static int nvgpu_as_ioctl_get_sync_ro_map(
272 struct gk20a_as_share *as_share,
273 struct nvgpu_as_get_sync_ro_map_args *args)
274{
275#ifdef CONFIG_TEGRA_GK20A_NVHOST
276 struct vm_gk20a *vm = as_share->vm;
277 struct gk20a *g = gk20a_from_vm(vm);
278 u64 base_gpuva;
279 u32 sync_size;
280 int err = 0;
281
282 if (!g->ops.fifo.get_sync_ro_map)
283 return -EINVAL;
284
285 if (!nvgpu_has_syncpoints(g))
286 return -EINVAL;
287
288 err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size);
289 if (err)
290 return err;
291
292 args->base_gpuva = base_gpuva;
293 args->sync_size = sync_size;
294
295 return err;
296#else
297 return -EINVAL;
298#endif
299}
300
301int gk20a_as_dev_open(struct inode *inode, struct file *filp)
302{
303 struct nvgpu_os_linux *l;
304 struct gk20a_as_share *as_share;
305 struct gk20a *g;
306 int err;
307
308 l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev);
309 g = &l->g;
310
311 nvgpu_log_fn(g, " ");
312
313 err = gk20a_as_alloc_share(g, 0, 0, &as_share);
314 if (err) {
315 nvgpu_log_fn(g, "failed to alloc share");
316 return err;
317 }
318
319 filp->private_data = as_share;
320 return 0;
321}
322
323int gk20a_as_dev_release(struct inode *inode, struct file *filp)
324{
325 struct gk20a_as_share *as_share = filp->private_data;
326
327 if (!as_share)
328 return 0;
329
330 return gk20a_as_release_share(as_share);
331}
332
333long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
334{
335 int err = 0;
336 struct gk20a_as_share *as_share = filp->private_data;
337 struct gk20a *g = gk20a_from_as(as_share->as);
338
339 u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
340
341 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
342
343 if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
344 (_IOC_NR(cmd) == 0) ||
345 (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
346 (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
347 return -EINVAL;
348
349 memset(buf, 0, sizeof(buf));
350 if (_IOC_DIR(cmd) & _IOC_WRITE) {
351 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
352 return -EFAULT;
353 }
354
355 err = gk20a_busy(g);
356 if (err)
357 return err;
358
359 nvgpu_speculation_barrier();
360 switch (cmd) {
361 case NVGPU_AS_IOCTL_BIND_CHANNEL:
362 trace_gk20a_as_ioctl_bind_channel(g->name);
363 err = gk20a_as_ioctl_bind_channel(as_share,
364 (struct nvgpu_as_bind_channel_args *)buf);
365
366 break;
367 case NVGPU32_AS_IOCTL_ALLOC_SPACE:
368 {
369 struct nvgpu32_as_alloc_space_args *args32 =
370 (struct nvgpu32_as_alloc_space_args *)buf;
371 struct nvgpu_as_alloc_space_args args;
372
373 args.pages = args32->pages;
374 args.page_size = args32->page_size;
375 args.flags = args32->flags;
376 args.o_a.offset = args32->o_a.offset;
377 trace_gk20a_as_ioctl_alloc_space(g->name);
378 err = gk20a_as_ioctl_alloc_space(as_share, &args);
379 args32->o_a.offset = args.o_a.offset;
380 break;
381 }
382 case NVGPU_AS_IOCTL_ALLOC_SPACE:
383 trace_gk20a_as_ioctl_alloc_space(g->name);
384 err = gk20a_as_ioctl_alloc_space(as_share,
385 (struct nvgpu_as_alloc_space_args *)buf);
386 break;
387 case NVGPU_AS_IOCTL_FREE_SPACE:
388 trace_gk20a_as_ioctl_free_space(g->name);
389 err = gk20a_as_ioctl_free_space(as_share,
390 (struct nvgpu_as_free_space_args *)buf);
391 break;
392 case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
393 trace_gk20a_as_ioctl_map_buffer(g->name);
394 err = gk20a_as_ioctl_map_buffer_ex(as_share,
395 (struct nvgpu_as_map_buffer_ex_args *)buf);
396 break;
397 case NVGPU_AS_IOCTL_UNMAP_BUFFER:
398 trace_gk20a_as_ioctl_unmap_buffer(g->name);
399 err = gk20a_as_ioctl_unmap_buffer(as_share,
400 (struct nvgpu_as_unmap_buffer_args *)buf);
401 break;
402 case NVGPU_AS_IOCTL_GET_VA_REGIONS:
403 trace_gk20a_as_ioctl_get_va_regions(g->name);
404 err = gk20a_as_ioctl_get_va_regions(as_share,
405 (struct nvgpu_as_get_va_regions_args *)buf);
406 break;
407 case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
408 err = gk20a_as_ioctl_map_buffer_batch(as_share,
409 (struct nvgpu_as_map_buffer_batch_args *)buf);
410 break;
411 case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
412 err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
413 (struct nvgpu_as_get_sync_ro_map_args *)buf);
414 break;
415 default:
416 err = -ENOTTY;
417 break;
418 }
419
420 gk20a_idle(g);
421
422 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
423 if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
424 err = -EFAULT;
425
426 return err;
427}
diff --git a/include/os/linux/ioctl_as.h b/include/os/linux/ioctl_as.h
deleted file mode 100644
index b3de378..0000000
--- a/include/os/linux/ioctl_as.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * GK20A Address Spaces
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15#ifndef __NVGPU_COMMON_LINUX_AS_H__
16#define __NVGPU_COMMON_LINUX_AS_H__
17
18struct inode;
19struct file;
20
21/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
22 * num_maps */
23#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256
24
25/* struct file_operations driver interface */
26int gk20a_as_dev_open(struct inode *inode, struct file *filp);
27int gk20a_as_dev_release(struct inode *inode, struct file *filp);
28long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
29
30#endif
diff --git a/include/os/linux/ioctl_channel.c b/include/os/linux/ioctl_channel.c
deleted file mode 100644
index 0f39cc7..0000000
--- a/include/os/linux/ioctl_channel.c
+++ /dev/null
@@ -1,1388 +0,0 @@
1/*
2 * GK20A Graphics channel
3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <trace/events/gk20a.h>
20#include <linux/file.h>
21#include <linux/anon_inodes.h>
22#include <linux/dma-buf.h>
23#include <linux/poll.h>
24#include <uapi/linux/nvgpu.h>
25
26#include <nvgpu/semaphore.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/kmem.h>
29#include <nvgpu/log.h>
30#include <nvgpu/list.h>
31#include <nvgpu/debug.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/error_notifier.h>
34#include <nvgpu/barrier.h>
35#include <nvgpu/nvhost.h>
36#include <nvgpu/os_sched.h>
37#include <nvgpu/gk20a.h>
38#include <nvgpu/channel.h>
39#include <nvgpu/channel_sync.h>
40
41#include "gk20a/dbg_gpu_gk20a.h"
42#include "gk20a/fence_gk20a.h"
43
44#include "platform_gk20a.h"
45#include "ioctl_channel.h"
46#include "channel.h"
47#include "os_linux.h"
48#include "ctxsw_trace.h"
49
50/* the minimal size of client buffer */
51#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
52 (sizeof(struct gk20a_cs_snapshot_fifo) + \
53 sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
54
55static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
56{
57 switch (graphics_preempt_mode) {
58 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
59 return "WFI";
60 default:
61 return "?";
62 }
63}
64
65static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode)
66{
67 switch (compute_preempt_mode) {
68 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
69 return "WFI";
70 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
71 return "CTA";
72 default:
73 return "?";
74 }
75}
76
77static void gk20a_channel_trace_sched_param(
78 void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice,
79 u32 timeout, const char *interleave,
80 const char *graphics_preempt_mode,
81 const char *compute_preempt_mode),
82 struct channel_gk20a *ch)
83{
84 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
85
86 if (!tsg)
87 return;
88
89 (trace)(ch->chid, ch->tsgid, ch->pid,
90 tsg_gk20a_from_ch(ch)->timeslice_us,
91 ch->timeout_ms_max,
92 gk20a_fifo_interleave_level_name(tsg->interleave_level),
93 gr_gk20a_graphics_preempt_mode_name(
94 tsg->gr_ctx.graphics_preempt_mode),
95 gr_gk20a_compute_preempt_mode_name(
96 tsg->gr_ctx.compute_preempt_mode));
97}
98
99/*
100 * Although channels do have pointers back to the gk20a struct that they were
101 * created under in cases where the driver is killed that pointer can be bad.
102 * The channel memory can be freed before the release() function for a given
103 * channel is called. This happens when the driver dies and userspace doesn't
104 * get a chance to call release() until after the entire gk20a driver data is
105 * unloaded and freed.
106 */
107struct channel_priv {
108 struct gk20a *g;
109 struct channel_gk20a *c;
110};
111
112#if defined(CONFIG_GK20A_CYCLE_STATS)
113
114void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
115{
116 struct nvgpu_channel_linux *priv = ch->os_priv;
117
118 /* disable existing cyclestats buffer */
119 nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
120 if (priv->cyclestate_buffer_handler) {
121 dma_buf_vunmap(priv->cyclestate_buffer_handler,
122 ch->cyclestate.cyclestate_buffer);
123 dma_buf_put(priv->cyclestate_buffer_handler);
124 priv->cyclestate_buffer_handler = NULL;
125 ch->cyclestate.cyclestate_buffer = NULL;
126 ch->cyclestate.cyclestate_buffer_size = 0;
127 }
128 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
129}
130
131int gk20a_channel_cycle_stats(struct channel_gk20a *ch, int dmabuf_fd)
132{
133 struct dma_buf *dmabuf;
134 void *virtual_address;
135 struct nvgpu_channel_linux *priv = ch->os_priv;
136
137 /* is it allowed to handle calls for current GPU? */
138 if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS))
139 return -ENOSYS;
140
141 if (dmabuf_fd && !priv->cyclestate_buffer_handler) {
142
143 /* set up new cyclestats buffer */
144 dmabuf = dma_buf_get(dmabuf_fd);
145 if (IS_ERR(dmabuf))
146 return PTR_ERR(dmabuf);
147 virtual_address = dma_buf_vmap(dmabuf);
148 if (!virtual_address)
149 return -ENOMEM;
150
151 priv->cyclestate_buffer_handler = dmabuf;
152 ch->cyclestate.cyclestate_buffer = virtual_address;
153 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
154 return 0;
155
156 } else if (!dmabuf_fd && priv->cyclestate_buffer_handler) {
157 gk20a_channel_free_cycle_stats_buffer(ch);
158 return 0;
159
160 } else if (!dmabuf_fd && !priv->cyclestate_buffer_handler) {
161 /* no request from GL */
162 return 0;
163
164 } else {
165 pr_err("channel already has cyclestats buffer\n");
166 return -EINVAL;
167 }
168}
169
170int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
171{
172 int ret;
173
174 nvgpu_mutex_acquire(&ch->cs_client_mutex);
175 if (ch->cs_client)
176 ret = gr_gk20a_css_flush(ch, ch->cs_client);
177 else
178 ret = -EBADF;
179 nvgpu_mutex_release(&ch->cs_client_mutex);
180
181 return ret;
182}
183
184int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
185 u32 dmabuf_fd,
186 u32 perfmon_id_count,
187 u32 *perfmon_id_start)
188{
189 int ret = 0;
190 struct gk20a *g = ch->g;
191 struct gk20a_cs_snapshot_client_linux *client_linux;
192 struct gk20a_cs_snapshot_client *client;
193
194 nvgpu_mutex_acquire(&ch->cs_client_mutex);
195 if (ch->cs_client) {
196 nvgpu_mutex_release(&ch->cs_client_mutex);
197 return -EEXIST;
198 }
199
200 client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
201 if (!client_linux) {
202 ret = -ENOMEM;
203 goto err;
204 }
205
206 client_linux->dmabuf_fd = dmabuf_fd;
207 client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
208 if (IS_ERR(client_linux->dma_handler)) {
209 ret = PTR_ERR(client_linux->dma_handler);
210 client_linux->dma_handler = NULL;
211 goto err_free;
212 }
213
214 client = &client_linux->cs_client;
215 client->snapshot_size = client_linux->dma_handler->size;
216 if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
217 ret = -ENOMEM;
218 goto err_put;
219 }
220
221 client->snapshot = (struct gk20a_cs_snapshot_fifo *)
222 dma_buf_vmap(client_linux->dma_handler);
223 if (!client->snapshot) {
224 ret = -ENOMEM;
225 goto err_put;
226 }
227
228 ch->cs_client = client;
229
230 ret = gr_gk20a_css_attach(ch,
231 perfmon_id_count,
232 perfmon_id_start,
233 ch->cs_client);
234
235 nvgpu_mutex_release(&ch->cs_client_mutex);
236
237 return ret;
238
239err_put:
240 dma_buf_put(client_linux->dma_handler);
241err_free:
242 nvgpu_kfree(g, client_linux);
243err:
244 nvgpu_mutex_release(&ch->cs_client_mutex);
245 return ret;
246}
247
248int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
249{
250 int ret;
251 struct gk20a_cs_snapshot_client_linux *client_linux;
252
253 nvgpu_mutex_acquire(&ch->cs_client_mutex);
254 if (!ch->cs_client) {
255 nvgpu_mutex_release(&ch->cs_client_mutex);
256 return 0;
257 }
258
259 client_linux = container_of(ch->cs_client,
260 struct gk20a_cs_snapshot_client_linux,
261 cs_client);
262
263 ret = gr_gk20a_css_detach(ch, ch->cs_client);
264
265 if (client_linux->dma_handler) {
266 if (ch->cs_client->snapshot)
267 dma_buf_vunmap(client_linux->dma_handler,
268 ch->cs_client->snapshot);
269 dma_buf_put(client_linux->dma_handler);
270 }
271
272 ch->cs_client = NULL;
273 nvgpu_kfree(ch->g, client_linux);
274
275 nvgpu_mutex_release(&ch->cs_client_mutex);
276
277 return ret;
278}
279#endif
280
281static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
282 struct nvgpu_channel_wdt_args *args)
283{
284 u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT |
285 NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
286
287 if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
288 ch->timeout.enabled = false;
289 else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
290 ch->timeout.enabled = true;
291 else
292 return -EINVAL;
293
294 if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
295 ch->timeout.limit_ms = args->timeout_ms;
296
297 ch->timeout.debug_dump = (args->wdt_status &
298 NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
299
300 return 0;
301}
302
303static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
304{
305 struct nvgpu_channel_linux *priv = ch->os_priv;
306
307 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
308 if (priv->error_notifier.dmabuf) {
309 dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
310 dma_buf_put(priv->error_notifier.dmabuf);
311 priv->error_notifier.dmabuf = NULL;
312 priv->error_notifier.notification = NULL;
313 priv->error_notifier.vaddr = NULL;
314 }
315 nvgpu_mutex_release(&priv->error_notifier.mutex);
316}
317
318static int gk20a_init_error_notifier(struct channel_gk20a *ch,
319 struct nvgpu_set_error_notifier *args)
320{
321 struct dma_buf *dmabuf;
322 void *va;
323 u64 end = args->offset + sizeof(struct nvgpu_notification);
324 struct nvgpu_channel_linux *priv = ch->os_priv;
325
326 if (!args->mem) {
327 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
328 return -EINVAL;
329 }
330
331 dmabuf = dma_buf_get(args->mem);
332
333 gk20a_channel_free_error_notifiers(ch);
334
335 if (IS_ERR(dmabuf)) {
336 pr_err("Invalid handle: %d\n", args->mem);
337 return -EINVAL;
338 }
339
340 if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
341 dma_buf_put(dmabuf);
342 nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset");
343 return -EINVAL;
344 }
345
346 nvgpu_speculation_barrier();
347
348 /* map handle */
349 va = dma_buf_vmap(dmabuf);
350 if (!va) {
351 dma_buf_put(dmabuf);
352 pr_err("Cannot map notifier handle\n");
353 return -ENOMEM;
354 }
355
356 priv->error_notifier.notification = va + args->offset;
357 priv->error_notifier.vaddr = va;
358 memset(priv->error_notifier.notification, 0,
359 sizeof(struct nvgpu_notification));
360
361 /* set channel notifiers pointer */
362 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
363 priv->error_notifier.dmabuf = dmabuf;
364 nvgpu_mutex_release(&priv->error_notifier.mutex);
365
366 return 0;
367}
368
369/*
370 * This returns the channel with a reference. The caller must
371 * gk20a_channel_put() the ref back after use.
372 *
373 * NULL is returned if the channel was not found.
374 */
375struct channel_gk20a *gk20a_get_channel_from_file(int fd)
376{
377 struct channel_gk20a *ch;
378 struct channel_priv *priv;
379 struct file *f = fget(fd);
380
381 if (!f)
382 return NULL;
383
384 if (f->f_op != &gk20a_channel_ops) {
385 fput(f);
386 return NULL;
387 }
388
389 priv = (struct channel_priv *)f->private_data;
390 ch = gk20a_channel_get(priv->c);
391 fput(f);
392 return ch;
393}
394
395int gk20a_channel_release(struct inode *inode, struct file *filp)
396{
397 struct channel_priv *priv = filp->private_data;
398 struct channel_gk20a *ch;
399 struct gk20a *g;
400
401 int err;
402
403 /* We could still end up here even if the channel_open failed, e.g.
404 * if we ran out of hw channel IDs.
405 */
406 if (!priv)
407 return 0;
408
409 ch = priv->c;
410 g = priv->g;
411
412 err = gk20a_busy(g);
413 if (err) {
414 nvgpu_err(g, "failed to release a channel!");
415 goto channel_release;
416 }
417
418 trace_gk20a_channel_release(dev_name(dev_from_gk20a(g)));
419
420 gk20a_channel_close(ch);
421 gk20a_channel_free_error_notifiers(ch);
422
423 gk20a_idle(g);
424
425channel_release:
426 gk20a_put(g);
427 nvgpu_kfree(g, filp->private_data);
428 filp->private_data = NULL;
429 return 0;
430}
431
432/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */
433static int __gk20a_channel_open(struct gk20a *g,
434 struct file *filp, s32 runlist_id)
435{
436 int err;
437 struct channel_gk20a *ch;
438 struct channel_priv *priv;
439
440 nvgpu_log_fn(g, " ");
441
442 g = gk20a_get(g);
443 if (!g)
444 return -ENODEV;
445
446 trace_gk20a_channel_open(dev_name(dev_from_gk20a(g)));
447
448 priv = nvgpu_kzalloc(g, sizeof(*priv));
449 if (!priv) {
450 err = -ENOMEM;
451 goto free_ref;
452 }
453
454 err = gk20a_busy(g);
455 if (err) {
456 nvgpu_err(g, "failed to power on, %d", err);
457 goto fail_busy;
458 }
459 /* All the user space channel should be non privilege */
460 ch = gk20a_open_new_channel(g, runlist_id, false,
461 nvgpu_current_pid(g), nvgpu_current_tid(g));
462 gk20a_idle(g);
463 if (!ch) {
464 nvgpu_err(g,
465 "failed to get f");
466 err = -ENOMEM;
467 goto fail_busy;
468 }
469
470 gk20a_channel_trace_sched_param(
471 trace_gk20a_channel_sched_defaults, ch);
472
473 priv->g = g;
474 priv->c = ch;
475
476 filp->private_data = priv;
477 return 0;
478
479fail_busy:
480 nvgpu_kfree(g, priv);
481free_ref:
482 gk20a_put(g);
483 return err;
484}
485
486int gk20a_channel_open(struct inode *inode, struct file *filp)
487{
488 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
489 struct nvgpu_os_linux, channel.cdev);
490 struct gk20a *g = &l->g;
491 int ret;
492
493 nvgpu_log_fn(g, "start");
494 ret = __gk20a_channel_open(g, filp, -1);
495
496 nvgpu_log_fn(g, "end");
497 return ret;
498}
499
500int gk20a_channel_open_ioctl(struct gk20a *g,
501 struct nvgpu_channel_open_args *args)
502{
503 int err;
504 int fd;
505 struct file *file;
506 char name[64];
507 s32 runlist_id = args->in.runlist_id;
508 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
509
510 err = get_unused_fd_flags(O_RDWR);
511 if (err < 0)
512 return err;
513 fd = err;
514
515 snprintf(name, sizeof(name), "nvhost-%s-fd%d",
516 dev_name(dev_from_gk20a(g)), fd);
517
518 file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR);
519 if (IS_ERR(file)) {
520 err = PTR_ERR(file);
521 goto clean_up;
522 }
523
524 err = __gk20a_channel_open(g, file, runlist_id);
525 if (err)
526 goto clean_up_file;
527
528 fd_install(fd, file);
529 args->out.channel_fd = fd;
530 return 0;
531
532clean_up_file:
533 fput(file);
534clean_up:
535 put_unused_fd(fd);
536 return err;
537}
538
539static u32 nvgpu_setup_bind_user_flags_to_common_flags(u32 user_flags)
540{
541 u32 flags = 0;
542
543 if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_VPR_ENABLED)
544 flags |= NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR;
545
546 if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_DETERMINISTIC)
547 flags |= NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC;
548
549 if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE)
550 flags |= NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE;
551
552 if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT)
553 flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT;
554
555 return flags;
556}
557
558static void nvgpu_get_setup_bind_args(
559 struct nvgpu_channel_setup_bind_args *channel_setup_bind_args,
560 struct nvgpu_setup_bind_args *setup_bind_args)
561{
562 setup_bind_args->num_gpfifo_entries =
563 channel_setup_bind_args->num_gpfifo_entries;
564 setup_bind_args->num_inflight_jobs =
565 channel_setup_bind_args->num_inflight_jobs;
566 setup_bind_args->userd_dmabuf_fd =
567 channel_setup_bind_args->userd_dmabuf_fd;
568 setup_bind_args->userd_dmabuf_offset =
569 channel_setup_bind_args->userd_dmabuf_offset;
570 setup_bind_args->gpfifo_dmabuf_fd =
571 channel_setup_bind_args->gpfifo_dmabuf_fd;
572 setup_bind_args->gpfifo_dmabuf_offset =
573 channel_setup_bind_args->gpfifo_dmabuf_offset;
574 setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags(
575 channel_setup_bind_args->flags);
576}
577
578static void nvgpu_get_gpfifo_ex_args(
579 struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args,
580 struct nvgpu_setup_bind_args *setup_bind_args)
581{
582 setup_bind_args->num_gpfifo_entries = alloc_gpfifo_ex_args->num_entries;
583 setup_bind_args->num_inflight_jobs =
584 alloc_gpfifo_ex_args->num_inflight_jobs;
585 setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags(
586 alloc_gpfifo_ex_args->flags);
587}
588
589static void nvgpu_get_gpfifo_args(
590 struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args,
591 struct nvgpu_setup_bind_args *setup_bind_args)
592{
593 /*
594 * Kernel can insert one extra gpfifo entry before user
595 * submitted gpfifos and another one after, for internal usage.
596 * Triple the requested size.
597 */
598 setup_bind_args->num_gpfifo_entries =
599 alloc_gpfifo_args->num_entries * 3;
600 setup_bind_args->num_inflight_jobs = 0;
601 setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags(
602 alloc_gpfifo_args->flags);
603}
604
605static void nvgpu_get_fence_args(
606 struct nvgpu_fence *fence_args_in,
607 struct nvgpu_channel_fence *fence_args_out)
608{
609 fence_args_out->id = fence_args_in->id;
610 fence_args_out->value = fence_args_in->value;
611}
612
613static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
614 ulong id, u32 offset,
615 u32 payload, u32 timeout)
616{
617 struct dma_buf *dmabuf;
618 void *data;
619 u32 *semaphore;
620 int ret = 0;
621
622 /* do not wait if channel has timed out */
623 if (gk20a_channel_check_timedout(ch)) {
624 return -ETIMEDOUT;
625 }
626
627 dmabuf = dma_buf_get(id);
628 if (IS_ERR(dmabuf)) {
629 nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id);
630 return -EINVAL;
631 }
632
633 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
634 if (!data) {
635 nvgpu_err(ch->g, "failed to map notifier memory");
636 ret = -EINVAL;
637 goto cleanup_put;
638 }
639
640 semaphore = data + (offset & ~PAGE_MASK);
641
642 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
643 &ch->semaphore_wq,
644 *semaphore == payload ||
645 gk20a_channel_check_timedout(ch),
646 timeout);
647
648 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
649cleanup_put:
650 dma_buf_put(dmabuf);
651 return ret;
652}
653
654static int gk20a_channel_wait(struct channel_gk20a *ch,
655 struct nvgpu_wait_args *args)
656{
657 struct dma_buf *dmabuf;
658 struct gk20a *g = ch->g;
659 struct notification *notif;
660 struct timespec tv;
661 u64 jiffies;
662 ulong id;
663 u32 offset;
664 int remain, ret = 0;
665 u64 end;
666
667 nvgpu_log_fn(g, " ");
668
669 if (gk20a_channel_check_timedout(ch)) {
670 return -ETIMEDOUT;
671 }
672
673 switch (args->type) {
674 case NVGPU_WAIT_TYPE_NOTIFIER:
675 id = args->condition.notifier.dmabuf_fd;
676 offset = args->condition.notifier.offset;
677 end = offset + sizeof(struct notification);
678
679 dmabuf = dma_buf_get(id);
680 if (IS_ERR(dmabuf)) {
681 nvgpu_err(g, "invalid notifier nvmap handle 0x%lx",
682 id);
683 return -EINVAL;
684 }
685
686 if (end > dmabuf->size || end < sizeof(struct notification)) {
687 dma_buf_put(dmabuf);
688 nvgpu_err(g, "invalid notifier offset");
689 return -EINVAL;
690 }
691
692 nvgpu_speculation_barrier();
693
694 notif = dma_buf_vmap(dmabuf);
695 if (!notif) {
696 nvgpu_err(g, "failed to map notifier memory");
697 return -ENOMEM;
698 }
699
700 notif = (struct notification *)((uintptr_t)notif + offset);
701
702 /* user should set status pending before
703 * calling this ioctl */
704 remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
705 &ch->notifier_wq,
706 notif->status == 0 ||
707 gk20a_channel_check_timedout(ch),
708 args->timeout);
709
710 if (remain == 0 && notif->status != 0) {
711 ret = -ETIMEDOUT;
712 goto notif_clean_up;
713 } else if (remain < 0) {
714 ret = -EINTR;
715 goto notif_clean_up;
716 }
717
718 /* TBD: fill in correct information */
719 jiffies = get_jiffies_64();
720 jiffies_to_timespec(jiffies, &tv);
721 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
722 notif->timestamp.nanoseconds[1] = tv.tv_sec;
723 notif->info32 = 0xDEADBEEF; /* should be object name */
724 notif->info16 = ch->chid; /* should be method offset */
725
726notif_clean_up:
727 dma_buf_vunmap(dmabuf, notif);
728 return ret;
729
730 case NVGPU_WAIT_TYPE_SEMAPHORE:
731 ret = gk20a_channel_wait_semaphore(ch,
732 args->condition.semaphore.dmabuf_fd,
733 args->condition.semaphore.offset,
734 args->condition.semaphore.payload,
735 args->timeout);
736
737 break;
738
739 default:
740 ret = -EINVAL;
741 break;
742 }
743
744 return ret;
745}
746
747static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
748 struct nvgpu_zcull_bind_args *args)
749{
750 struct gk20a *g = ch->g;
751 struct gr_gk20a *gr = &g->gr;
752
753 nvgpu_log_fn(gr->g, " ");
754
755 return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
756 args->gpu_va, args->mode);
757}
758
759static int gk20a_ioctl_channel_submit_gpfifo(
760 struct channel_gk20a *ch,
761 struct nvgpu_submit_gpfifo_args *args)
762{
763 struct nvgpu_channel_fence fence;
764 struct gk20a_fence *fence_out;
765 struct fifo_profile_gk20a *profile = NULL;
766 u32 submit_flags = 0;
767 int fd = -1;
768 struct gk20a *g = ch->g;
769 struct nvgpu_gpfifo_userdata userdata;
770
771 int ret = 0;
772 nvgpu_log_fn(g, " ");
773
774 profile = gk20a_fifo_profile_acquire(ch->g);
775 gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
776
777 if (gk20a_channel_check_timedout(ch)) {
778 return -ETIMEDOUT;
779 }
780
781 nvgpu_get_fence_args(&args->fence, &fence);
782 submit_flags =
783 nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
784
785 /* Try and allocate an fd here*/
786 if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
787 && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
788 fd = get_unused_fd_flags(O_RDWR);
789 if (fd < 0)
790 return fd;
791 }
792
793 userdata.entries = (struct nvgpu_gpfifo_entry __user *)
794 (uintptr_t)args->gpfifo;
795 userdata.context = NULL;
796
797 ret = nvgpu_submit_channel_gpfifo_user(ch,
798 userdata, args->num_entries,
799 submit_flags, &fence, &fence_out, profile);
800
801 if (ret) {
802 if (fd != -1)
803 put_unused_fd(fd);
804 goto clean_up;
805 }
806
807 /* Convert fence_out to something we can pass back to user space. */
808 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
809 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
810 ret = gk20a_fence_install_fd(fence_out, fd);
811 if (ret)
812 put_unused_fd(fd);
813 else
814 args->fence.id = fd;
815 } else {
816 args->fence.id = fence_out->syncpt_id;
817 args->fence.value = fence_out->syncpt_value;
818 }
819 }
820 gk20a_fence_put(fence_out);
821
822 gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT);
823 if (profile)
824 gk20a_fifo_profile_release(ch->g, profile);
825
826clean_up:
827 return ret;
828}
829
830/*
831 * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_*
832 * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_*
833 */
834u32 nvgpu_get_common_runlist_level(u32 level)
835{
836 nvgpu_speculation_barrier();
837 switch (level) {
838 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
839 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
840 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
841 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM;
842 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
843 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
844 default:
845 pr_err("%s: incorrect runlist level\n", __func__);
846 }
847
848 return level;
849}
850
851static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags)
852{
853 u32 flags = 0;
854
855 if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
856 flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP;
857
858 if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
859 flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP;
860
861 return flags;
862}
863
864static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch,
865 u32 class_num, u32 user_flags)
866{
867 return ch->g->ops.gr.alloc_obj_ctx(ch, class_num,
868 nvgpu_obj_ctx_user_flags_to_common_flags(user_flags));
869}
870
871/*
872 * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
873 * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
874 */
875u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags)
876{
877 u32 flags = 0;
878
879 if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI)
880 flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
881 if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)
882 flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
883
884 return flags;
885}
886
887/*
888 * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
889 * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
890 */
891u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags)
892{
893 u32 flags = 0;
894
895 if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI)
896 flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
897 if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA)
898 flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
899 if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP)
900 flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
901
902 return flags;
903}
904
905/*
906 * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
907 * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
908 */
909u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode)
910{
911 switch (graphics_preempt_mode) {
912 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
913 return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
914 case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
915 return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
916 }
917
918 return graphics_preempt_mode;
919}
920
921/*
922 * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
923 * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
924 */
925u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode)
926{
927 switch (compute_preempt_mode) {
928 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
929 return NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
930 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
931 return NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
932 case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
933 return NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
934 }
935
936 return compute_preempt_mode;
937}
938
939/*
940 * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
941 * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
942 */
943static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
944{
945 nvgpu_speculation_barrier();
946 switch (graphics_preempt_mode) {
947 case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
948 return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
949 case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
950 return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
951 }
952
953 return graphics_preempt_mode;
954}
955
956/*
957 * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
958 * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
959 */
960static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
961{
962 nvgpu_speculation_barrier();
963 switch (compute_preempt_mode) {
964 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
965 return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
966 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
967 return NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
968 case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
969 return NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
970 }
971
972 return compute_preempt_mode;
973}
974
975static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch,
976 u32 graphics_preempt_mode, u32 compute_preempt_mode)
977{
978 int err;
979
980 if (ch->g->ops.gr.set_preemption_mode) {
981 err = gk20a_busy(ch->g);
982 if (err) {
983 nvgpu_err(ch->g, "failed to power on, %d", err);
984 return err;
985 }
986 err = ch->g->ops.gr.set_preemption_mode(ch,
987 nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode),
988 nvgpu_get_common_compute_preempt_mode(compute_preempt_mode));
989 gk20a_idle(ch->g);
990 } else {
991 err = -EINVAL;
992 }
993
994 return err;
995}
996
997static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch,
998 struct nvgpu_get_user_syncpoint_args *args)
999{
1000#ifdef CONFIG_TEGRA_GK20A_NVHOST
1001 struct gk20a *g = ch->g;
1002 int err;
1003
1004 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) {
1005 nvgpu_err(g, "user syncpoints not supported");
1006 return -EINVAL;
1007 }
1008
1009 if (!nvgpu_has_syncpoints(g)) {
1010 nvgpu_err(g, "syncpoints not supported");
1011 return -EINVAL;
1012 }
1013
1014 if (g->aggressive_sync_destroy_thresh) {
1015 nvgpu_err(g, "sufficient syncpoints not available");
1016 return -EINVAL;
1017 }
1018
1019 nvgpu_mutex_acquire(&ch->sync_lock);
1020 if (ch->user_sync) {
1021 nvgpu_mutex_release(&ch->sync_lock);
1022 } else {
1023 ch->user_sync = nvgpu_channel_sync_create(ch, true);
1024 if (!ch->user_sync) {
1025 nvgpu_mutex_release(&ch->sync_lock);
1026 return -ENOMEM;
1027 }
1028 nvgpu_mutex_release(&ch->sync_lock);
1029
1030 if (g->ops.fifo.resetup_ramfc) {
1031 err = g->ops.fifo.resetup_ramfc(ch);
1032 if (err)
1033 return err;
1034 }
1035 }
1036
1037 args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync);
1038 args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev,
1039 args->syncpoint_id);
1040 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS))
1041 args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync);
1042 else
1043 args->gpu_va = 0;
1044
1045 return 0;
1046#else
1047 return -EINVAL;
1048#endif
1049}
1050
1051long gk20a_channel_ioctl(struct file *filp,
1052 unsigned int cmd, unsigned long arg)
1053{
1054 struct channel_priv *priv = filp->private_data;
1055 struct channel_gk20a *ch = priv->c;
1056 struct device *dev = dev_from_gk20a(ch->g);
1057 u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
1058 int err = 0;
1059 struct gk20a *g = ch->g;
1060
1061 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
1062
1063 if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
1064 (_IOC_NR(cmd) == 0) ||
1065 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
1066 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
1067 return -EINVAL;
1068
1069 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1070 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1071 return -EFAULT;
1072 }
1073
1074 /* take a ref or return timeout if channel refs can't be taken */
1075 ch = gk20a_channel_get(ch);
1076 if (!ch)
1077 return -ETIMEDOUT;
1078
1079 /* protect our sanity for threaded userspace - most of the channel is
1080 * not thread safe */
1081 nvgpu_mutex_acquire(&ch->ioctl_lock);
1082
1083 /* this ioctl call keeps a ref to the file which keeps a ref to the
1084 * channel */
1085
1086 nvgpu_speculation_barrier();
1087 switch (cmd) {
1088 case NVGPU_IOCTL_CHANNEL_OPEN:
1089 err = gk20a_channel_open_ioctl(ch->g,
1090 (struct nvgpu_channel_open_args *)buf);
1091 break;
1092 case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
1093 break;
1094 case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
1095 {
1096 struct nvgpu_alloc_obj_ctx_args *args =
1097 (struct nvgpu_alloc_obj_ctx_args *)buf;
1098
1099 err = gk20a_busy(ch->g);
1100 if (err) {
1101 dev_err(dev,
1102 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1103 __func__, cmd);
1104 break;
1105 }
1106 err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags);
1107 gk20a_idle(ch->g);
1108 break;
1109 }
1110 case NVGPU_IOCTL_CHANNEL_SETUP_BIND:
1111 {
1112 struct nvgpu_channel_setup_bind_args *channel_setup_bind_args =
1113 (struct nvgpu_channel_setup_bind_args *)buf;
1114 struct nvgpu_setup_bind_args setup_bind_args;
1115
1116 nvgpu_get_setup_bind_args(channel_setup_bind_args,
1117 &setup_bind_args);
1118
1119 err = gk20a_busy(ch->g);
1120 if (err) {
1121 dev_err(dev,
1122 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1123 __func__, cmd);
1124 break;
1125 }
1126
1127 if (!is_power_of_2(setup_bind_args.num_gpfifo_entries)) {
1128 err = -EINVAL;
1129 gk20a_idle(ch->g);
1130 break;
1131 }
1132 err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
1133 channel_setup_bind_args->work_submit_token =
1134 setup_bind_args.work_submit_token;
1135 gk20a_idle(ch->g);
1136 break;
1137 }
1138 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
1139 {
1140 struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args =
1141 (struct nvgpu_alloc_gpfifo_ex_args *)buf;
1142 struct nvgpu_setup_bind_args setup_bind_args;
1143
1144 nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &setup_bind_args);
1145
1146 err = gk20a_busy(ch->g);
1147 if (err) {
1148 dev_err(dev,
1149 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1150 __func__, cmd);
1151 break;
1152 }
1153
1154 if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
1155 err = -EINVAL;
1156 gk20a_idle(ch->g);
1157 break;
1158 }
1159 err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
1160 gk20a_idle(ch->g);
1161 break;
1162 }
1163 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
1164 {
1165 struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
1166 (struct nvgpu_alloc_gpfifo_args *)buf;
1167 struct nvgpu_setup_bind_args setup_bind_args;
1168
1169 nvgpu_get_gpfifo_args(alloc_gpfifo_args, &setup_bind_args);
1170
1171 err = gk20a_busy(ch->g);
1172 if (err) {
1173 dev_err(dev,
1174 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1175 __func__, cmd);
1176 break;
1177 }
1178
1179 err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
1180 gk20a_idle(ch->g);
1181 break;
1182 }
1183 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
1184 err = gk20a_ioctl_channel_submit_gpfifo(ch,
1185 (struct nvgpu_submit_gpfifo_args *)buf);
1186 break;
1187 case NVGPU_IOCTL_CHANNEL_WAIT:
1188 err = gk20a_busy(ch->g);
1189 if (err) {
1190 dev_err(dev,
1191 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1192 __func__, cmd);
1193 break;
1194 }
1195
1196 /* waiting is thread-safe, not dropping this mutex could
1197 * deadlock in certain conditions */
1198 nvgpu_mutex_release(&ch->ioctl_lock);
1199
1200 err = gk20a_channel_wait(ch,
1201 (struct nvgpu_wait_args *)buf);
1202
1203 nvgpu_mutex_acquire(&ch->ioctl_lock);
1204
1205 gk20a_idle(ch->g);
1206 break;
1207 case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
1208 err = gk20a_busy(ch->g);
1209 if (err) {
1210 dev_err(dev,
1211 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1212 __func__, cmd);
1213 break;
1214 }
1215 err = gk20a_channel_zcull_bind(ch,
1216 (struct nvgpu_zcull_bind_args *)buf);
1217 gk20a_idle(ch->g);
1218 break;
1219 case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
1220 err = gk20a_busy(ch->g);
1221 if (err) {
1222 dev_err(dev,
1223 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1224 __func__, cmd);
1225 break;
1226 }
1227 err = gk20a_init_error_notifier(ch,
1228 (struct nvgpu_set_error_notifier *)buf);
1229 gk20a_idle(ch->g);
1230 break;
1231 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
1232 {
1233 u32 timeout =
1234 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
1235 nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
1236 timeout, ch->chid);
1237 ch->timeout_ms_max = timeout;
1238 gk20a_channel_trace_sched_param(
1239 trace_gk20a_channel_set_timeout, ch);
1240 break;
1241 }
1242 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
1243 {
1244 u32 timeout =
1245 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
1246 bool timeout_debug_dump = !((u32)
1247 ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
1248 (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
1249 nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
1250 timeout, ch->chid);
1251 ch->timeout_ms_max = timeout;
1252 ch->timeout_debug_dump = timeout_debug_dump;
1253 gk20a_channel_trace_sched_param(
1254 trace_gk20a_channel_set_timeout, ch);
1255 break;
1256 }
1257 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
1258 ((struct nvgpu_get_param_args *)buf)->value =
1259 gk20a_channel_check_timedout(ch);
1260 break;
1261 case NVGPU_IOCTL_CHANNEL_ENABLE:
1262 err = gk20a_busy(ch->g);
1263 if (err) {
1264 dev_err(dev,
1265 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1266 __func__, cmd);
1267 break;
1268 }
1269 if (ch->g->ops.fifo.enable_channel)
1270 ch->g->ops.fifo.enable_channel(ch);
1271 else
1272 err = -ENOSYS;
1273 gk20a_idle(ch->g);
1274 break;
1275 case NVGPU_IOCTL_CHANNEL_DISABLE:
1276 err = gk20a_busy(ch->g);
1277 if (err) {
1278 dev_err(dev,
1279 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1280 __func__, cmd);
1281 break;
1282 }
1283 if (ch->g->ops.fifo.disable_channel)
1284 ch->g->ops.fifo.disable_channel(ch);
1285 else
1286 err = -ENOSYS;
1287 gk20a_idle(ch->g);
1288 break;
1289 case NVGPU_IOCTL_CHANNEL_PREEMPT:
1290 err = gk20a_busy(ch->g);
1291 if (err) {
1292 dev_err(dev,
1293 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1294 __func__, cmd);
1295 break;
1296 }
1297 err = gk20a_fifo_preempt(ch->g, ch);
1298 gk20a_idle(ch->g);
1299 break;
1300 case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST:
1301 if (!capable(CAP_SYS_NICE)) {
1302 err = -EPERM;
1303 break;
1304 }
1305 if (!ch->g->ops.fifo.reschedule_runlist) {
1306 err = -ENOSYS;
1307 break;
1308 }
1309 err = gk20a_busy(ch->g);
1310 if (err) {
1311 dev_err(dev,
1312 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1313 __func__, cmd);
1314 break;
1315 }
1316 err = ch->g->ops.fifo.reschedule_runlist(ch,
1317 NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT &
1318 ((struct nvgpu_reschedule_runlist_args *)buf)->flags);
1319 gk20a_idle(ch->g);
1320 break;
1321 case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
1322 err = gk20a_busy(ch->g);
1323 if (err) {
1324 dev_err(dev,
1325 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1326 __func__, cmd);
1327 break;
1328 }
1329 err = ch->g->ops.fifo.force_reset_ch(ch,
1330 NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
1331 gk20a_idle(ch->g);
1332 break;
1333 case NVGPU_IOCTL_CHANNEL_WDT:
1334 err = gk20a_channel_set_wdt_status(ch,
1335 (struct nvgpu_channel_wdt_args *)buf);
1336 break;
1337 case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
1338 err = nvgpu_ioctl_channel_set_preemption_mode(ch,
1339 ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
1340 ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
1341 break;
1342 case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX:
1343 if (ch->g->ops.gr.set_boosted_ctx) {
1344 bool boost =
1345 ((struct nvgpu_boosted_ctx_args *)buf)->boost;
1346
1347 err = gk20a_busy(ch->g);
1348 if (err) {
1349 dev_err(dev,
1350 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1351 __func__, cmd);
1352 break;
1353 }
1354 err = ch->g->ops.gr.set_boosted_ctx(ch, boost);
1355 gk20a_idle(ch->g);
1356 } else {
1357 err = -EINVAL;
1358 }
1359 break;
1360 case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT:
1361 err = gk20a_busy(ch->g);
1362 if (err) {
1363 dev_err(dev,
1364 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1365 __func__, cmd);
1366 break;
1367 }
1368 err = nvgpu_ioctl_channel_get_user_syncpoint(ch,
1369 (struct nvgpu_get_user_syncpoint_args *)buf);
1370 gk20a_idle(ch->g);
1371 break;
1372 default:
1373 dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
1374 err = -ENOTTY;
1375 break;
1376 }
1377
1378 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
1379 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
1380
1381 nvgpu_mutex_release(&ch->ioctl_lock);
1382
1383 gk20a_channel_put(ch);
1384
1385 nvgpu_log_fn(g, "end");
1386
1387 return err;
1388}
diff --git a/include/os/linux/ioctl_channel.h b/include/os/linux/ioctl_channel.h
deleted file mode 100644
index 3e80289..0000000
--- a/include/os/linux/ioctl_channel.h
+++ /dev/null
@@ -1,57 +0,0 @@
1/*
2 * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_IOCTL_CHANNEL_H__
14#define __NVGPU_IOCTL_CHANNEL_H__
15
16#include <linux/fs.h>
17
18#include "gk20a/css_gr_gk20a.h"
19
20struct inode;
21struct file;
22struct gk20a;
23struct nvgpu_channel_open_args;
24
25struct gk20a_cs_snapshot_client_linux {
26 struct gk20a_cs_snapshot_client cs_client;
27
28 u32 dmabuf_fd;
29 struct dma_buf *dma_handler;
30};
31
32int gk20a_channel_open(struct inode *inode, struct file *filp);
33int gk20a_channel_release(struct inode *inode, struct file *filp);
34long gk20a_channel_ioctl(struct file *filp,
35 unsigned int cmd, unsigned long arg);
36int gk20a_channel_open_ioctl(struct gk20a *g,
37 struct nvgpu_channel_open_args *args);
38
39int gk20a_channel_cycle_stats(struct channel_gk20a *ch, int dmabuf_fd);
40void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
41
42int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
43 u32 dmabuf_fd,
44 u32 perfmon_id_count,
45 u32 *perfmon_id_start);
46int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch);
47int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
48
49extern const struct file_operations gk20a_channel_ops;
50
51u32 nvgpu_get_common_runlist_level(u32 level);
52
53u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
54u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
55u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
56u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
57#endif
diff --git a/include/os/linux/ioctl_clk_arb.c b/include/os/linux/ioctl_clk_arb.c
deleted file mode 100644
index 9f32102..0000000
--- a/include/os/linux/ioctl_clk_arb.c
+++ /dev/null
@@ -1,583 +0,0 @@
1/*
2 * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/cdev.h>
18#include <linux/file.h>
19#include <linux/anon_inodes.h>
20#include <linux/uaccess.h>
21#include <linux/poll.h>
22#ifdef CONFIG_DEBUG_FS
23#include <linux/debugfs.h>
24#endif
25#include <uapi/linux/nvgpu.h>
26
27#include <nvgpu/bitops.h>
28#include <nvgpu/lock.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/atomic.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/kref.h>
33#include <nvgpu/log.h>
34#include <nvgpu/barrier.h>
35#include <nvgpu/cond.h>
36#include <nvgpu/list.h>
37#include <nvgpu/clk_arb.h>
38#include <nvgpu/gk20a.h>
39
40#include "clk/clk.h"
41#include "pstate/pstate.h"
42#include "lpwr/lpwr.h"
43#include "volt/volt.h"
44
45#ifdef CONFIG_DEBUG_FS
46#include "os_linux.h"
47#endif
48
49static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
50 struct file *filp)
51{
52 struct nvgpu_clk_dev *dev = filp->private_data;
53 struct nvgpu_clk_session *session = dev->session;
54 struct gk20a *g = session->g;
55 struct nvgpu_clk_arb *arb = g->clk_arb;
56
57 clk_arb_dbg(g, " ");
58
59 nvgpu_spinlock_acquire(&session->session_lock);
60 nvgpu_spinlock_acquire(&arb->requests_lock);
61
62 nvgpu_list_del(&dev->node);
63
64 nvgpu_spinlock_release(&arb->requests_lock);
65 nvgpu_spinlock_release(&session->session_lock);
66
67 /* This is done to account for the extra refcount taken in
68 * nvgpu_clk_arb_commit_request_fd without events support in iGPU
69 */
70 if (!arb->clk_arb_events_supported) {
71 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
72 }
73
74 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
75 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
76 return 0;
77}
78
79static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
80{
81 unsigned int poll_mask = 0;
82
83 if (nvgpu_poll_mask & NVGPU_POLLIN)
84 poll_mask |= POLLIN;
85 if (nvgpu_poll_mask & NVGPU_POLLPRI)
86 poll_mask |= POLLPRI;
87 if (nvgpu_poll_mask & NVGPU_POLLOUT)
88 poll_mask |= POLLOUT;
89 if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
90 poll_mask |= POLLRDNORM;
91 if (nvgpu_poll_mask & NVGPU_POLLHUP)
92 poll_mask |= POLLHUP;
93
94 return poll_mask;
95}
96
97static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
98{
99 struct nvgpu_clk_dev *dev = filp->private_data;
100
101 clk_arb_dbg(dev->session->g, " ");
102
103 poll_wait(filp, &dev->readout_wq.wq, wait);
104 return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
105}
106
107void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
108{
109 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
110}
111
112static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
113 struct file *filp)
114{
115 struct nvgpu_clk_dev *dev = filp->private_data;
116 struct nvgpu_clk_session *session = dev->session;
117 struct nvgpu_clk_arb *arb;
118
119 arb = session->g->clk_arb;
120
121 clk_arb_dbg(session->g, " ");
122
123 if (arb) {
124 nvgpu_spinlock_acquire(&arb->users_lock);
125 nvgpu_list_del(&dev->link);
126 nvgpu_spinlock_release(&arb->users_lock);
127 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
128 }
129
130 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
131 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
132
133 return 0;
134}
135
136static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
137{
138 u32 nvgpu_gpu_event;
139
140 switch (nvgpu_event) {
141 case NVGPU_EVENT_VF_UPDATE:
142 nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
143 break;
144 case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
145 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
146 break;
147 case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
148 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
149 break;
150 case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
151 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
152 break;
153 case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
154 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
155 break;
156 case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
157 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
158 break;
159 case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
160 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
161 break;
162 case NVGPU_EVENT_ALARM_GPU_LOST:
163 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
164 break;
165 default:
166 /* Control shouldn't come here */
167 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
168 break;
169 }
170 return nvgpu_gpu_event;
171}
172
173static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
174 struct nvgpu_gpu_event_info *info) {
175
176 u32 tail, head;
177 u32 events = 0;
178 struct nvgpu_clk_notification *p_notif;
179
180 tail = nvgpu_atomic_read(&dev->queue.tail);
181 head = nvgpu_atomic_read(&dev->queue.head);
182
183 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
184
185 if (_WRAPGTEQ(tail, head) && info) {
186 head++;
187 p_notif = &dev->queue.notifications[head % dev->queue.size];
188 events |= nvgpu_convert_gpu_event(p_notif->notification);
189 info->event_id = ffs(events) - 1;
190 info->timestamp = p_notif->timestamp;
191 nvgpu_atomic_set(&dev->queue.head, head);
192 }
193
194 return events;
195}
196
197static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
198 size_t size, loff_t *off)
199{
200 struct nvgpu_clk_dev *dev = filp->private_data;
201 struct nvgpu_gpu_event_info info;
202 ssize_t err;
203
204 clk_arb_dbg(dev->session->g,
205 "filp=%p, buf=%p, size=%zu", filp, buf, size);
206
207 if ((size - *off) < sizeof(info))
208 return 0;
209
210 memset(&info, 0, sizeof(info));
211 /* Get the oldest event from the queue */
212 while (!__pending_event(dev, &info)) {
213 if (filp->f_flags & O_NONBLOCK)
214 return -EAGAIN;
215 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
216 __pending_event(dev, &info), 0);
217 if (err)
218 return err;
219 if (info.timestamp)
220 break;
221 }
222
223 if (copy_to_user(buf + *off, &info, sizeof(info)))
224 return -EFAULT;
225
226 return sizeof(info);
227}
228
229static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
230 struct nvgpu_gpu_set_event_filter_args *args)
231{
232 struct gk20a *g = dev->session->g;
233 u32 mask;
234
235 nvgpu_log(g, gpu_dbg_fn, " ");
236
237 if (args->flags)
238 return -EINVAL;
239
240 if (args->size != 1)
241 return -EINVAL;
242
243 if (copy_from_user(&mask, (void __user *) args->buffer,
244 args->size * sizeof(u32)))
245 return -EFAULT;
246
247 /* update alarm mask */
248 nvgpu_atomic_set(&dev->enabled_mask, mask);
249
250 return 0;
251}
252
253static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
254 unsigned long arg)
255{
256 struct nvgpu_clk_dev *dev = filp->private_data;
257 struct gk20a *g = dev->session->g;
258 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
259 int err = 0;
260
261 nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
262
263 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
264 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
265 return -EINVAL;
266
267 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
268
269 memset(buf, 0, sizeof(buf));
270 if (_IOC_DIR(cmd) & _IOC_WRITE) {
271 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
272 return -EFAULT;
273 }
274
275 switch (cmd) {
276 case NVGPU_EVENT_IOCTL_SET_FILTER:
277 err = nvgpu_clk_arb_set_event_filter(dev,
278 (struct nvgpu_gpu_set_event_filter_args *)buf);
279 break;
280 default:
281 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
282 err = -ENOTTY;
283 }
284
285 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
286 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
287
288 return err;
289}
290
291static const struct file_operations completion_dev_ops = {
292 .owner = THIS_MODULE,
293 .release = nvgpu_clk_arb_release_completion_dev,
294 .poll = nvgpu_clk_arb_poll_dev,
295};
296
297static const struct file_operations event_dev_ops = {
298 .owner = THIS_MODULE,
299 .release = nvgpu_clk_arb_release_event_dev,
300 .poll = nvgpu_clk_arb_poll_dev,
301 .read = nvgpu_clk_arb_read_event_dev,
302#ifdef CONFIG_COMPAT
303 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
304#endif
305 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
306};
307
308static int nvgpu_clk_arb_install_fd(struct gk20a *g,
309 struct nvgpu_clk_session *session,
310 const struct file_operations *fops,
311 struct nvgpu_clk_dev **_dev)
312{
313 struct file *file;
314 int fd;
315 int err;
316 int status;
317 char name[64];
318 struct nvgpu_clk_dev *dev;
319
320 clk_arb_dbg(g, " ");
321
322 dev = nvgpu_kzalloc(g, sizeof(*dev));
323 if (!dev)
324 return -ENOMEM;
325
326 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
327 DEFAULT_EVENT_NUMBER);
328 if (status < 0) {
329 err = status;
330 goto fail;
331 }
332
333 fd = get_unused_fd_flags(O_RDWR);
334 if (fd < 0) {
335 err = fd;
336 goto fail;
337 }
338
339 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
340 file = anon_inode_getfile(name, fops, dev, O_RDWR);
341 if (IS_ERR(file)) {
342 err = PTR_ERR(file);
343 goto fail_fd;
344 }
345
346 fd_install(fd, file);
347
348 nvgpu_cond_init(&dev->readout_wq);
349
350 nvgpu_atomic_set(&dev->poll_mask, 0);
351
352 dev->session = session;
353 nvgpu_ref_init(&dev->refcount);
354
355 nvgpu_ref_get(&session->refcount);
356
357 *_dev = dev;
358
359 return fd;
360
361fail_fd:
362 put_unused_fd(fd);
363fail:
364 nvgpu_kfree(g, dev);
365
366 return err;
367}
368
369int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
370 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
371{
372 struct nvgpu_clk_arb *arb = g->clk_arb;
373 struct nvgpu_clk_dev *dev;
374 int fd;
375
376 clk_arb_dbg(g, " ");
377
378 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
379 if (fd < 0)
380 return fd;
381
382 /* TODO: alarm mask needs to be set to default value to prevent
383 * failures of legacy tests. This will be removed when sanity is
384 * updated
385 */
386 if (alarm_mask)
387 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
388 else
389 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
390
391 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
392
393 nvgpu_spinlock_acquire(&arb->users_lock);
394 nvgpu_list_add_tail(&dev->link, &arb->users);
395 nvgpu_spinlock_release(&arb->users_lock);
396
397 *event_fd = fd;
398
399 return 0;
400}
401
402int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
403 struct nvgpu_clk_session *session, int *request_fd)
404{
405 struct nvgpu_clk_dev *dev;
406 int fd;
407
408 clk_arb_dbg(g, " ");
409
410 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
411 if (fd < 0)
412 return fd;
413
414 *request_fd = fd;
415
416 return 0;
417}
418
419int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
420 struct nvgpu_clk_session *session, int request_fd)
421{
422 struct nvgpu_clk_arb *arb = g->clk_arb;
423 struct nvgpu_clk_dev *dev;
424 struct fd fd;
425 int err = 0;
426
427 clk_arb_dbg(g, " ");
428
429 fd = fdget(request_fd);
430 if (!fd.file)
431 return -EINVAL;
432
433 if (fd.file->f_op != &completion_dev_ops) {
434 err = -EINVAL;
435 goto fdput_fd;
436 }
437
438 dev = (struct nvgpu_clk_dev *) fd.file->private_data;
439
440 if (!dev || dev->session != session) {
441 err = -EINVAL;
442 goto fdput_fd;
443 }
444
445 clk_arb_dbg(g, "requested target = %u\n",
446 (u32)dev->gpc2clk_target_mhz);
447
448 nvgpu_atomic_inc(&g->clk_arb_global_nr);
449 nvgpu_ref_get(&dev->refcount);
450 nvgpu_spinlock_acquire(&session->session_lock);
451 nvgpu_list_add(&dev->node, &session->targets);
452 nvgpu_spinlock_release(&session->session_lock);
453 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
454
455fdput_fd:
456 fdput(fd);
457 return err;
458}
459
460int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
461 int request_fd, u32 api_domain, u16 target_mhz)
462{
463 struct nvgpu_clk_dev *dev;
464 struct fd fd;
465 int err = 0;
466
467 clk_arb_dbg(session->g,
468 "domain=0x%08x target_mhz=%u", api_domain, target_mhz);
469
470 fd = fdget(request_fd);
471 if (!fd.file)
472 return -EINVAL;
473
474 if (fd.file->f_op != &completion_dev_ops) {
475 err = -EINVAL;
476 goto fdput_fd;
477 }
478
479 dev = fd.file->private_data;
480 if (!dev || dev->session != session) {
481 err = -EINVAL;
482 goto fdput_fd;
483 }
484
485 switch (api_domain) {
486 case NVGPU_CLK_DOMAIN_MCLK:
487 dev->mclk_target_mhz = target_mhz;
488 break;
489
490 case NVGPU_CLK_DOMAIN_GPCCLK:
491 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
492 break;
493
494 default:
495 err = -EINVAL;
496 }
497
498fdput_fd:
499 fdput(fd);
500 return err;
501}
502
503u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
504{
505 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
506 u32 api_domains = 0;
507
508 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
509 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
510
511 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
512 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
513
514 return api_domains;
515}
516
517#ifdef CONFIG_DEBUG_FS
518static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
519{
520 struct gk20a *g = s->private;
521 struct nvgpu_clk_arb *arb = g->clk_arb;
522 struct nvgpu_clk_arb_debug *debug;
523
524 u64 num;
525 s64 tmp, avg, std, max, min;
526
527 debug = NV_ACCESS_ONCE(arb->debug);
528 /* Make copy of structure and ensure no reordering */
529 nvgpu_smp_rmb();
530 if (!debug)
531 return -EINVAL;
532
533 std = debug->switch_std;
534 avg = debug->switch_avg;
535 max = debug->switch_max;
536 min = debug->switch_min;
537 num = debug->switch_num;
538
539 tmp = std;
540 do_div(tmp, num);
541 seq_printf(s, "Number of transitions: %lld\n",
542 num);
543 seq_printf(s, "max / min : %lld / %lld usec\n",
544 max, min);
545 seq_printf(s, "avg / std : %lld / %ld usec\n",
546 avg, int_sqrt(tmp));
547
548 return 0;
549}
550
551static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
552{
553 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
554}
555
556static const struct file_operations nvgpu_clk_arb_stats_fops = {
557 .open = nvgpu_clk_arb_stats_open,
558 .read = seq_read,
559 .llseek = seq_lseek,
560 .release = single_release,
561};
562
563
564int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
565{
566 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
567 struct dentry *gpu_root = l->debugfs;
568 struct dentry *d;
569
570 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
571
572 d = debugfs_create_file(
573 "arb_stats",
574 S_IRUGO,
575 gpu_root,
576 g,
577 &nvgpu_clk_arb_stats_fops);
578 if (!d)
579 return -ENOMEM;
580
581 return 0;
582}
583#endif
diff --git a/include/os/linux/ioctl_ctrl.c b/include/os/linux/ioctl_ctrl.c
deleted file mode 100644
index 841d345..0000000
--- a/include/os/linux/ioctl_ctrl.c
+++ /dev/null
@@ -1,2144 +0,0 @@
1/*
2 * Copyright (c) 2011-2021, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/uaccess.h>
18#include <linux/cdev.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/pm_runtime.h>
23#include <uapi/linux/nvgpu.h>
24
25#include <nvgpu/bitops.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/ptimer.h>
29#include <nvgpu/vidmem.h>
30#include <nvgpu/log.h>
31#include <nvgpu/enabled.h>
32#include <nvgpu/sizes.h>
33#include <nvgpu/list.h>
34#include <nvgpu/clk_arb.h>
35#include <nvgpu/gk20a.h>
36#include <nvgpu/channel.h>
37
38#include "ioctl_ctrl.h"
39#include "ioctl_dbg.h"
40#include "ioctl_as.h"
41#include "ioctl_tsg.h"
42#include "ioctl_channel.h"
43#include "gk20a/fence_gk20a.h"
44
45#include "platform_gk20a.h"
46#include "os_linux.h"
47#include "dmabuf.h"
48#include "channel.h"
49#include "dmabuf_vidmem.h"
50
51#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \
52 (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
53#define MHZ_TO_HZ(a) ((u64)a * MHZ)
54
55struct gk20a_ctrl_priv {
56 struct device *dev;
57 struct gk20a *g;
58 struct nvgpu_clk_session *clk_session;
59
60 struct nvgpu_list_node list;
61 struct {
62 struct vm_area_struct *vma;
63 bool vma_mapped;
64 } usermode_vma;
65};
66
67static inline struct gk20a_ctrl_priv *
68gk20a_ctrl_priv_from_list(struct nvgpu_list_node *node)
69{
70 return (struct gk20a_ctrl_priv *)
71 ((uintptr_t)node - offsetof(struct gk20a_ctrl_priv, list));
72}
73
74static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags)
75{
76 u32 core_flags = 0;
77
78 if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED)
79 core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED;
80
81 return core_flags;
82}
83
84int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
85{
86 struct nvgpu_os_linux *l;
87 struct gk20a *g;
88 struct gk20a_ctrl_priv *priv;
89 int err = 0;
90
91 l = container_of(inode->i_cdev,
92 struct nvgpu_os_linux, ctrl.cdev);
93 g = gk20a_get(&l->g);
94 if (!g)
95 return -ENODEV;
96
97 nvgpu_log_fn(g, " ");
98
99 priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv));
100 if (!priv) {
101 err = -ENOMEM;
102 goto free_ref;
103 }
104 filp->private_data = priv;
105 priv->dev = dev_from_gk20a(g);
106 /*
107 * We dont close the arbiter fd's after driver teardown to support
108 * GPU_LOST events, so we store g here, instead of dereferencing the
109 * dev structure on teardown
110 */
111 priv->g = g;
112
113 if (!g->sw_ready) {
114 err = gk20a_busy(g);
115 if (err)
116 goto free_ref;
117 gk20a_idle(g);
118 }
119
120 err = nvgpu_clk_arb_init_session(g, &priv->clk_session);
121free_ref:
122 if (err != 0) {
123 gk20a_put(g);
124 if (priv)
125 nvgpu_kfree(g, priv);
126 } else {
127 nvgpu_mutex_acquire(&l->ctrl.privs_lock);
128 nvgpu_list_add(&priv->list, &l->ctrl.privs);
129 nvgpu_mutex_release(&l->ctrl.privs_lock);
130 }
131
132 return err;
133}
134int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
135{
136 struct gk20a_ctrl_priv *priv = filp->private_data;
137 struct gk20a *g = priv->g;
138 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
139
140 nvgpu_log_fn(g, " ");
141
142 nvgpu_mutex_acquire(&l->ctrl.privs_lock);
143 nvgpu_list_del(&priv->list);
144 nvgpu_mutex_release(&l->ctrl.privs_lock);
145
146 if (priv->clk_session)
147 nvgpu_clk_arb_release_session(g, priv->clk_session);
148
149 gk20a_put(g);
150 nvgpu_kfree(g, priv);
151
152 return 0;
153}
154
155struct nvgpu_flags_mapping {
156 u64 ioctl_flag;
157 int enabled_flag;
158};
159
160static struct nvgpu_flags_mapping flags_mapping[] = {
161 {NVGPU_GPU_FLAGS_CAN_RAILGATE,
162 NVGPU_CAN_RAILGATE},
163 {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS,
164 NVGPU_HAS_SYNCPOINTS},
165 {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS,
166 NVGPU_SUPPORT_PARTIAL_MAPPINGS},
167 {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS,
168 NVGPU_SUPPORT_SPARSE_ALLOCS},
169 {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS,
170 NVGPU_SUPPORT_SYNC_FENCE_FDS},
171 {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS,
172 NVGPU_SUPPORT_CYCLE_STATS},
173 {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT,
174 NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT},
175 {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS,
176 NVGPU_SUPPORT_USERSPACE_MANAGED_AS},
177 {NVGPU_GPU_FLAGS_SUPPORT_TSG,
178 NVGPU_SUPPORT_TSG},
179 {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS,
180 NVGPU_SUPPORT_CLOCK_CONTROLS},
181 {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE,
182 NVGPU_SUPPORT_GET_VOLTAGE},
183 {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT,
184 NVGPU_SUPPORT_GET_CURRENT},
185 {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER,
186 NVGPU_SUPPORT_GET_POWER},
187 {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE,
188 NVGPU_SUPPORT_GET_TEMPERATURE},
189 {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT,
190 NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT},
191 {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS,
192 NVGPU_SUPPORT_DEVICE_EVENTS},
193 {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE,
194 NVGPU_SUPPORT_FECS_CTXSW_TRACE},
195 {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
196 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
197 {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
198 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
199 {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
200 NVGPU_SUPPORT_DETERMINISTIC_OPTS},
201 {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS,
202 NVGPU_SUPPORT_SYNCPOINT_ADDRESS},
203 {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT,
204 NVGPU_SUPPORT_USER_SYNCPOINT},
205 {NVGPU_GPU_FLAGS_SUPPORT_USERMODE_SUBMIT,
206 NVGPU_SUPPORT_USERMODE_SUBMIT},
207 {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
208 NVGPU_SUPPORT_IO_COHERENCE},
209 {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
210 NVGPU_SUPPORT_RESCHEDULE_RUNLIST},
211 {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL,
212 NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL},
213 {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF,
214 NVGPU_ECC_ENABLED_SM_LRF},
215 {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM,
216 NVGPU_ECC_ENABLED_SM_SHM},
217 {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX,
218 NVGPU_ECC_ENABLED_TEX},
219 {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC,
220 NVGPU_ECC_ENABLED_LTC},
221 {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS,
222 NVGPU_SUPPORT_TSG_SUBCONTEXTS},
223 {NVGPU_GPU_FLAGS_SUPPORT_SCG,
224 NVGPU_SUPPORT_SCG},
225 {NVGPU_GPU_FLAGS_SUPPORT_VPR,
226 NVGPU_SUPPORT_VPR},
227 {NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE,
228 NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE},
229};
230
231static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
232{
233 unsigned int i;
234 u64 ioctl_flags = 0;
235
236 for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) {
237 if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag))
238 ioctl_flags |= flags_mapping[i].ioctl_flag;
239 }
240
241 if (!capable(CAP_SYS_NICE)) {
242 ioctl_flags &= ~NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST;
243 }
244
245 return ioctl_flags;
246}
247
248static void nvgpu_set_preemption_mode_flags(struct gk20a *g,
249 struct nvgpu_gpu_characteristics *gpu)
250{
251 struct nvgpu_preemption_modes_rec preemption_mode_rec;
252
253 g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec);
254
255 gpu->graphics_preemption_mode_flags =
256 nvgpu_get_ioctl_graphics_preempt_mode_flags(
257 preemption_mode_rec.graphics_preemption_mode_flags);
258 gpu->compute_preemption_mode_flags =
259 nvgpu_get_ioctl_compute_preempt_mode_flags(
260 preemption_mode_rec.compute_preemption_mode_flags);
261
262 gpu->default_graphics_preempt_mode =
263 nvgpu_get_ioctl_graphics_preempt_mode(
264 preemption_mode_rec.default_graphics_preempt_mode);
265 gpu->default_compute_preempt_mode =
266 nvgpu_get_ioctl_compute_preempt_mode(
267 preemption_mode_rec.default_compute_preempt_mode);
268}
269
270static long
271gk20a_ctrl_ioctl_gpu_characteristics(
272 struct gk20a *g,
273 struct nvgpu_gpu_get_characteristics *request)
274{
275 struct nvgpu_gpu_characteristics gpu;
276 long err = 0;
277
278 if (gk20a_busy(g)) {
279 nvgpu_err(g, "failed to power on gpu");
280 return -EINVAL;
281 }
282
283 memset(&gpu, 0, sizeof(gpu));
284
285 gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
286 gpu.on_board_video_memory_size = 0; /* integrated GPU */
287
288 gpu.num_gpc = g->gr.gpc_count;
289 gpu.max_gpc_count = g->gr.max_gpc_count;
290
291 gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
292
293 gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
294
295 gpu.compression_page_size = g->ops.fb.compression_page_size(g);
296
297 if (g->ops.gr.get_gpc_mask) {
298 gpu.gpc_mask = g->ops.gr.get_gpc_mask(g);
299 } else {
300 gpu.gpc_mask = BIT32(g->gr.gpc_count) - 1;
301 }
302
303 gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
304
305 gpu.arch = g->params.gpu_arch;
306 gpu.impl = g->params.gpu_impl;
307 gpu.rev = g->params.gpu_rev;
308 gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT;
309 gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ?
310 NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0;
311 gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS);
312 gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS);
313 gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS);
314 gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS);
315 gpu.inline_to_memory_class =
316 g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS);
317 gpu.dma_copy_class =
318 g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
319
320 gpu.vbios_version = g->bios.vbios_version;
321 gpu.vbios_oem_version = g->bios.vbios_oem_version;
322
323 gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g);
324 gpu.pde_coverage_bit_count =
325 g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0];
326 gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g);
327
328 gpu.sm_arch_sm_version = g->params.sm_arch_sm_version;
329 gpu.sm_arch_spa_version = g->params.sm_arch_spa_version;
330 gpu.sm_arch_warp_count = g->params.sm_arch_warp_count;
331
332 gpu.max_css_buffer_size = g->gr.max_css_buffer_size;
333
334 gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
335 gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
336 gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
337 gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
338 gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
339 gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST;
340 gpu.gpu_va_bit_count = 40;
341
342 strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname));
343 gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g);
344 gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
345 gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
346 gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
347 gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
348 gpu.gr_gobs_per_comptagline_per_slice =
349 g->gr.gobs_per_comptagline_per_slice;
350 gpu.num_ltc = g->ltc_count;
351 gpu.lts_per_ltc = g->gr.slices_per_ltc;
352 gpu.cbc_cache_line_size = g->gr.cacheline_size;
353 gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline;
354
355 if (g->ops.clk.get_maxrate)
356 gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK);
357
358 gpu.local_video_memory_size = g->mm.vidmem.size;
359
360 gpu.pci_vendor_id = g->pci_vendor_id;
361 gpu.pci_device_id = g->pci_device_id;
362 gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id;
363 gpu.pci_subsystem_device_id = g->pci_subsystem_device_id;
364 gpu.pci_class = g->pci_class;
365 gpu.pci_revision = g->pci_revision;
366
367 nvgpu_set_preemption_mode_flags(g, &gpu);
368
369 if (request->gpu_characteristics_buf_size > 0) {
370 size_t write_size = sizeof(gpu);
371
372 nvgpu_speculation_barrier();
373 if (write_size > request->gpu_characteristics_buf_size)
374 write_size = request->gpu_characteristics_buf_size;
375
376 err = copy_to_user((void __user *)(uintptr_t)
377 request->gpu_characteristics_buf_addr,
378 &gpu, write_size);
379 }
380
381 if (err == 0)
382 request->gpu_characteristics_buf_size = sizeof(gpu);
383
384 gk20a_idle(g);
385
386 return err;
387}
388
389static int gk20a_ctrl_prepare_compressible_read(
390 struct gk20a *g,
391 struct nvgpu_gpu_prepare_compressible_read_args *args)
392{
393 int ret = -ENOSYS;
394
395#ifdef CONFIG_NVGPU_SUPPORT_CDE
396 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
397 struct nvgpu_channel_fence fence;
398 struct gk20a_fence *fence_out = NULL;
399 int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
400 args->submit_flags);
401 int fd = -1;
402
403 fence.id = args->fence.syncpt_id;
404 fence.value = args->fence.syncpt_value;
405
406 /* Try and allocate an fd here*/
407 if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
408 && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) {
409 fd = get_unused_fd_flags(O_RDWR);
410 if (fd < 0)
411 return fd;
412 }
413
414 ret = gk20a_prepare_compressible_read(l, args->handle,
415 args->request_compbits, args->offset,
416 args->compbits_hoffset, args->compbits_voffset,
417 args->scatterbuffer_offset,
418 args->width, args->height, args->block_height_log2,
419 submit_flags, &fence, &args->valid_compbits,
420 &args->zbc_color, &fence_out);
421
422 if (ret) {
423 if (fd != -1)
424 put_unused_fd(fd);
425 return ret;
426 }
427
428 /* Convert fence_out to something we can pass back to user space. */
429 if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
430 if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
431 if (fence_out) {
432 ret = gk20a_fence_install_fd(fence_out, fd);
433 if (ret)
434 put_unused_fd(fd);
435 else
436 args->fence.fd = fd;
437 } else {
438 args->fence.fd = -1;
439 put_unused_fd(fd);
440 }
441 } else {
442 if (fence_out) {
443 args->fence.syncpt_id = fence_out->syncpt_id;
444 args->fence.syncpt_value =
445 fence_out->syncpt_value;
446 } else {
447 args->fence.syncpt_id = -1;
448 args->fence.syncpt_value = 0;
449 }
450 }
451 }
452 gk20a_fence_put(fence_out);
453#endif
454
455 return ret;
456}
457
458static int gk20a_ctrl_mark_compressible_write(
459 struct gk20a *g,
460 struct nvgpu_gpu_mark_compressible_write_args *args)
461{
462 int ret = -ENOSYS;
463
464#ifdef CONFIG_NVGPU_SUPPORT_CDE
465 ret = gk20a_mark_compressible_write(g, args->handle,
466 args->valid_compbits, args->offset, args->zbc_color);
467#endif
468
469 return ret;
470}
471
472static int gk20a_ctrl_alloc_as(
473 struct gk20a *g,
474 struct nvgpu_alloc_as_args *args)
475{
476 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
477 struct gk20a_as_share *as_share;
478 int err;
479 int fd;
480 struct file *file;
481 char name[64];
482
483 err = get_unused_fd_flags(O_RDWR);
484 if (err < 0)
485 return err;
486 fd = err;
487
488 snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd);
489
490 err = gk20a_as_alloc_share(g, args->big_page_size,
491 gk20a_as_translate_as_alloc_flags(g,
492 args->flags),
493 &as_share);
494 if (err)
495 goto clean_up;
496
497 file = anon_inode_getfile(name, l->as_dev.cdev.ops, as_share, O_RDWR);
498 if (IS_ERR(file)) {
499 err = PTR_ERR(file);
500 goto clean_up_as;
501 }
502
503 fd_install(fd, file);
504
505 args->as_fd = fd;
506 return 0;
507
508clean_up_as:
509 gk20a_as_release_share(as_share);
510clean_up:
511 put_unused_fd(fd);
512 return err;
513}
514
515static int gk20a_ctrl_open_tsg(struct gk20a *g,
516 struct nvgpu_gpu_open_tsg_args *args)
517{
518 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
519 int err;
520 int fd;
521 struct file *file;
522 char name[64];
523
524 err = get_unused_fd_flags(O_RDWR);
525 if (err < 0)
526 return err;
527 fd = err;
528
529 snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd);
530
531 file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR);
532 if (IS_ERR(file)) {
533 err = PTR_ERR(file);
534 goto clean_up;
535 }
536
537 err = nvgpu_ioctl_tsg_open(g, file);
538 if (err)
539 goto clean_up_file;
540
541 fd_install(fd, file);
542 args->tsg_fd = fd;
543 return 0;
544
545clean_up_file:
546 fput(file);
547clean_up:
548 put_unused_fd(fd);
549 return err;
550}
551
552static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
553 struct nvgpu_gpu_get_tpc_masks_args *args)
554{
555 struct gr_gk20a *gr = &g->gr;
556 int err = 0;
557 const u32 gpc_tpc_mask_size = sizeof(u32) * gr->max_gpc_count;
558
559 if (args->mask_buf_size > 0) {
560 size_t write_size = gpc_tpc_mask_size;
561
562 nvgpu_speculation_barrier();
563 if (write_size > args->mask_buf_size)
564 write_size = args->mask_buf_size;
565
566 err = copy_to_user((void __user *)(uintptr_t)
567 args->mask_buf_addr,
568 gr->gpc_tpc_mask, write_size);
569 }
570
571 if (err == 0)
572 args->mask_buf_size = gpc_tpc_mask_size;
573
574 return err;
575}
576
577static int gk20a_ctrl_get_fbp_l2_masks(
578 struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
579{
580 struct gr_gk20a *gr = &g->gr;
581 int err = 0;
582 const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
583
584 if (args->mask_buf_size > 0) {
585 size_t write_size = fbp_l2_mask_size;
586
587 nvgpu_speculation_barrier();
588 if (write_size > args->mask_buf_size)
589 write_size = args->mask_buf_size;
590
591 err = copy_to_user((void __user *)(uintptr_t)
592 args->mask_buf_addr,
593 gr->fbp_rop_l2_en_mask, write_size);
594 }
595
596 if (err == 0)
597 args->mask_buf_size = fbp_l2_mask_size;
598
599 return err;
600}
601
602static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
603 struct nvgpu_gpu_l2_fb_args *args)
604{
605 int ret;
606 bool always_poweron;
607
608 if ((!args->l2_flush && !args->fb_flush) ||
609 (!args->l2_flush && args->l2_invalidate))
610 return -EINVAL;
611
612 /* Handle this case for joint rails or DGPU */
613 always_poweron = (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
614 !pm_runtime_enabled(dev_from_gk20a(g)));
615
616 /* In case of not always power_on, exit if g->power_on is false */
617 if (!always_poweron && !gk20a_check_poweron(g)) {
618 return 0;
619 }
620
621 /* There is a small window between a call to gk20a_idle() has occured
622 * and railgate being actually triggered(setting g->power_on = false),
623 * when l2_flush can race with railgate. Its better to take a busy_lock
624 * to prevent the gk20a_idle() from proceeding. There is a very small
625 * chance that gk20a_idle() might begin before gk20a_busy(). Having
626 * a locked access to g->power_on further reduces the probability of
627 * gk20a_idle() being triggered before gk20a_busy()
628 */
629 ret = gk20a_busy(g);
630
631 if (ret != 0) {
632 nvgpu_err(g, "failed to take power ref");
633 return ret;
634 }
635
636 if (args->l2_flush)
637 g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
638
639 if (args->fb_flush)
640 g->ops.mm.fb_flush(g);
641
642 gk20a_idle(g);
643
644 return 0;
645}
646
647static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
648 struct gk20a *g,
649 struct nvgpu_gpu_mmu_debug_mode_args *args)
650{
651 if (gk20a_busy(g)) {
652 nvgpu_err(g, "failed to power on gpu");
653 return -EINVAL;
654 }
655
656 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
657 g->ops.fb.set_debug_mode(g, args->state == 1);
658 nvgpu_mutex_release(&g->dbg_sessions_lock);
659
660 gk20a_idle(g);
661 return 0;
662}
663
664static int nvgpu_gpu_ioctl_set_debug_mode(
665 struct gk20a *g,
666 struct nvgpu_gpu_sm_debug_mode_args *args)
667{
668 struct channel_gk20a *ch;
669 int err;
670
671 ch = gk20a_get_channel_from_file(args->channel_fd);
672 if (!ch)
673 return -EINVAL;
674
675 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
676 if (g->ops.gr.set_sm_debug_mode)
677 err = g->ops.gr.set_sm_debug_mode(g, ch,
678 args->sms, !!args->enable);
679 else
680 err = -ENOSYS;
681 nvgpu_mutex_release(&g->dbg_sessions_lock);
682
683 gk20a_channel_put(ch);
684 return err;
685}
686
687static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
688{
689 int err;
690
691 err = gk20a_busy(g);
692 if (err)
693 return err;
694
695 if (g->ops.gr.trigger_suspend) {
696 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
697 err = gr_gk20a_elpg_protected_call(g,
698 g->ops.gr.trigger_suspend(g));
699 nvgpu_mutex_release(&g->dbg_sessions_lock);
700 } else
701 err = -EINVAL;
702
703 gk20a_idle(g);
704
705 return err;
706}
707
708static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
709 struct nvgpu_gpu_wait_pause_args *args)
710{
711 int err;
712 struct warpstate *ioctl_w_state;
713 struct nvgpu_warpstate *w_state = NULL;
714 u32 sm_count, ioctl_size, size, sm_id;
715
716 sm_count = g->gr.gpc_count * g->gr.tpc_count;
717
718 ioctl_size = sm_count * sizeof(struct warpstate);
719 ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);
720 if (!ioctl_w_state)
721 return -ENOMEM;
722
723 size = sm_count * sizeof(struct nvgpu_warpstate);
724 w_state = nvgpu_kzalloc(g, size);
725 if (!w_state) {
726 err = -ENOMEM;
727 goto out_free;
728 }
729
730 err = gk20a_busy(g);
731 if (err)
732 goto out_free;
733
734 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
735 if (g->ops.gr.wait_for_pause) {
736 (void)gr_gk20a_elpg_protected_call(g,
737 g->ops.gr.wait_for_pause(g, w_state));
738 } else {
739 err = -EINVAL;
740 goto out_idle;
741 }
742
743 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
744 ioctl_w_state[sm_id].valid_warps[0] =
745 w_state[sm_id].valid_warps[0];
746 ioctl_w_state[sm_id].valid_warps[1] =
747 w_state[sm_id].valid_warps[1];
748 ioctl_w_state[sm_id].trapped_warps[0] =
749 w_state[sm_id].trapped_warps[0];
750 ioctl_w_state[sm_id].trapped_warps[1] =
751 w_state[sm_id].trapped_warps[1];
752 ioctl_w_state[sm_id].paused_warps[0] =
753 w_state[sm_id].paused_warps[0];
754 ioctl_w_state[sm_id].paused_warps[1] =
755 w_state[sm_id].paused_warps[1];
756 }
757 /* Copy to user space - pointed by "args->pwarpstate" */
758 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate,
759 w_state, ioctl_size)) {
760 nvgpu_log_fn(g, "copy_to_user failed!");
761 err = -EFAULT;
762 }
763
764out_idle:
765 nvgpu_mutex_release(&g->dbg_sessions_lock);
766
767 gk20a_idle(g);
768
769out_free:
770 nvgpu_kfree(g, w_state);
771 nvgpu_kfree(g, ioctl_w_state);
772
773 return err;
774}
775
776static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
777{
778 int err;
779
780 err = gk20a_busy(g);
781 if (err)
782 return err;
783
784 if (g->ops.gr.resume_from_pause) {
785 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
786 err = gr_gk20a_elpg_protected_call(g,
787 g->ops.gr.resume_from_pause(g));
788 nvgpu_mutex_release(&g->dbg_sessions_lock);
789 } else
790 err = -EINVAL;
791
792 gk20a_idle(g);
793
794 return err;
795}
796
797static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
798{
799 int err;
800
801 err = gk20a_busy(g);
802 if (err)
803 return err;
804
805 if (g->ops.gr.clear_sm_errors) {
806 err = gr_gk20a_elpg_protected_call(g,
807 g->ops.gr.clear_sm_errors(g));
808 } else
809 err = -EINVAL;
810
811 gk20a_idle(g);
812
813 return err;
814}
815
816static int nvgpu_gpu_ioctl_has_any_exception(
817 struct gk20a *g,
818 struct nvgpu_gpu_tpc_exception_en_status_args *args)
819{
820 u32 tpc_exception_en;
821
822 if (g->ops.gr.tpc_enabled_exceptions) {
823 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
824 tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
825 nvgpu_mutex_release(&g->dbg_sessions_lock);
826 } else
827 return -EINVAL;
828
829 args->tpc_exception_en_sm_mask = tpc_exception_en;
830
831 return 0;
832}
833
834static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
835 struct nvgpu_gpu_num_vsms *args)
836{
837 struct gr_gk20a *gr = &g->gr;
838 args->num_vsms = gr->no_of_sm;
839 return 0;
840}
841
842static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
843 struct nvgpu_gpu_vsms_mapping *args)
844{
845 int err = 0;
846 struct gr_gk20a *gr = &g->gr;
847 size_t write_size = gr->no_of_sm *
848 sizeof(struct nvgpu_gpu_vsms_mapping_entry);
849 struct nvgpu_gpu_vsms_mapping_entry *vsms_buf;
850 u32 i;
851
852 vsms_buf = nvgpu_kzalloc(g, write_size);
853 if (vsms_buf == NULL)
854 return -ENOMEM;
855
856 for (i = 0; i < gr->no_of_sm; i++) {
857 vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index;
858 if (g->ops.gr.get_nonpes_aware_tpc)
859 vsms_buf[i].tpc_index =
860 g->ops.gr.get_nonpes_aware_tpc(g,
861 gr->sm_to_cluster[i].gpc_index,
862 gr->sm_to_cluster[i].tpc_index);
863 else
864 vsms_buf[i].tpc_index =
865 gr->sm_to_cluster[i].tpc_index;
866 }
867
868 err = copy_to_user((void __user *)(uintptr_t)
869 args->vsms_map_buf_addr,
870 vsms_buf, write_size);
871 nvgpu_kfree(g, vsms_buf);
872
873 return err;
874}
875
876static int nvgpu_gpu_get_cpu_time_correlation_info(
877 struct gk20a *g,
878 struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
879{
880 struct nvgpu_cpu_time_correlation_sample *samples;
881 int err;
882 u32 i;
883
884 if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT ||
885 args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC)
886 return -EINVAL;
887
888 samples = nvgpu_kzalloc(g, args->count *
889 sizeof(struct nvgpu_cpu_time_correlation_sample));
890 if (!samples) {
891 return -ENOMEM;
892 }
893
894 err = g->ops.ptimer.get_timestamps_zipper(g,
895 args->source_id, args->count, samples);
896 if (!err) {
897 for (i = 0; i < args->count; i++) {
898 args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
899 args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
900 }
901 }
902
903 nvgpu_kfree(g, samples);
904
905 return err;
906}
907
908static int nvgpu_gpu_get_gpu_time(
909 struct gk20a *g,
910 struct nvgpu_gpu_get_gpu_time_args *args)
911{
912 u64 time;
913 int err;
914
915 err = gk20a_busy(g);
916 if (err)
917 return err;
918
919 err = g->ops.ptimer.read_ptimer(g, &time);
920 if (!err)
921 args->gpu_timestamp = time;
922
923 gk20a_idle(g);
924 return err;
925}
926
927static int nvgpu_gpu_get_engine_info(
928 struct gk20a *g,
929 struct nvgpu_gpu_get_engine_info_args *args)
930{
931 int err = 0;
932 u32 engine_enum = ENGINE_INVAL_GK20A;
933 u32 report_index = 0;
934 u32 engine_id_idx;
935 const u32 max_buffer_engines = args->engine_info_buf_size /
936 sizeof(struct nvgpu_gpu_get_engine_info_item);
937 struct nvgpu_gpu_get_engine_info_item __user *dst_item_list =
938 (void __user *)(uintptr_t)args->engine_info_buf_addr;
939
940 for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines;
941 ++engine_id_idx) {
942 u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx];
943 const struct fifo_engine_info_gk20a *src_info =
944 &g->fifo.engine_info[active_engine_id];
945 struct nvgpu_gpu_get_engine_info_item dst_info;
946
947 memset(&dst_info, 0, sizeof(dst_info));
948
949 engine_enum = src_info->engine_enum;
950
951 switch (engine_enum) {
952 case ENGINE_GR_GK20A:
953 dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR;
954 break;
955
956 case ENGINE_GRCE_GK20A:
957 dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY;
958 break;
959
960 case ENGINE_ASYNC_CE_GK20A:
961 dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY;
962 break;
963
964 default:
965 nvgpu_err(g, "Unmapped engine enum %u",
966 engine_enum);
967 continue;
968 }
969
970 dst_info.engine_instance = src_info->inst_id;
971 dst_info.runlist_id = src_info->runlist_id;
972
973 if (report_index < max_buffer_engines) {
974 err = copy_to_user(&dst_item_list[report_index],
975 &dst_info, sizeof(dst_info));
976 if (err)
977 goto clean_up;
978 }
979
980 ++report_index;
981 }
982
983 args->engine_info_buf_size =
984 report_index * sizeof(struct nvgpu_gpu_get_engine_info_item);
985
986clean_up:
987 return err;
988}
989
990static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
991 struct nvgpu_gpu_alloc_vidmem_args *args)
992{
993 u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
994 int fd;
995
996 nvgpu_log_fn(g, " ");
997
998 /* not yet supported */
999 if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
1000 return -EINVAL;
1001
1002 /* not yet supported */
1003 if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
1004 return -EINVAL;
1005
1006 if (args->in.size & (SZ_4K - 1))
1007 return -EINVAL;
1008
1009 if (!args->in.size)
1010 return -EINVAL;
1011
1012 if (align & (align - 1))
1013 return -EINVAL;
1014
1015 if (align > roundup_pow_of_two(args->in.size)) {
1016 /* log this special case, buddy allocator detail */
1017 nvgpu_warn(g,
1018 "alignment larger than buffer size rounded up to power of 2 is not supported");
1019 return -EINVAL;
1020 }
1021
1022 fd = nvgpu_vidmem_export_linux(g, args->in.size);
1023 if (fd < 0)
1024 return fd;
1025
1026 args->out.dmabuf_fd = fd;
1027
1028 nvgpu_log_fn(g, "done, fd=%d", fd);
1029
1030 return 0;
1031}
1032
1033static int nvgpu_gpu_get_memory_state(struct gk20a *g,
1034 struct nvgpu_gpu_get_memory_state_args *args)
1035{
1036 int err;
1037
1038 nvgpu_log_fn(g, " ");
1039
1040 if (args->reserved[0] || args->reserved[1] ||
1041 args->reserved[2] || args->reserved[3])
1042 return -EINVAL;
1043
1044 err = nvgpu_vidmem_get_space(g, &args->total_free_bytes);
1045
1046 nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes);
1047
1048 return err;
1049}
1050
1051static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain)
1052{
1053 u32 domain = 0;
1054
1055 if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK)
1056 domain = NVGPU_CLK_DOMAIN_MCLK;
1057 else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK)
1058 domain = NVGPU_CLK_DOMAIN_GPCCLK;
1059 else
1060 domain = NVGPU_CLK_DOMAIN_MAX + 1;
1061
1062 return domain;
1063}
1064
1065static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
1066 struct gk20a_ctrl_priv *priv,
1067 struct nvgpu_gpu_clk_vf_points_args *args)
1068{
1069 struct nvgpu_gpu_clk_vf_point clk_point;
1070 struct nvgpu_gpu_clk_vf_point __user *entry;
1071 struct nvgpu_clk_session *session = priv->clk_session;
1072 u32 clk_domains = 0;
1073 int err;
1074 u16 last_mhz;
1075 u16 *fpoints;
1076 u32 i;
1077 u32 max_points = 0;
1078 u32 num_points = 0;
1079 u16 min_mhz;
1080 u16 max_mhz;
1081
1082 nvgpu_log_fn(g, " ");
1083
1084 if (!session || args->flags)
1085 return -EINVAL;
1086
1087 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1088 args->num_entries = 0;
1089
1090 if (!nvgpu_clk_arb_is_valid_domain(g,
1091 nvgpu_gpu_convert_clk_domain(args->clk_domain)))
1092 return -EINVAL;
1093
1094 err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
1095 nvgpu_gpu_convert_clk_domain(args->clk_domain),
1096 &max_points, NULL);
1097 if (err)
1098 return err;
1099
1100 if (!args->max_entries) {
1101 args->max_entries = max_points;
1102 return 0;
1103 }
1104
1105 if (args->max_entries < max_points)
1106 return -EINVAL;
1107
1108 err = nvgpu_clk_arb_get_arbiter_clk_range(g,
1109 nvgpu_gpu_convert_clk_domain(args->clk_domain),
1110 &min_mhz, &max_mhz);
1111 if (err)
1112 return err;
1113
1114 fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16));
1115 if (!fpoints)
1116 return -ENOMEM;
1117
1118 err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
1119 nvgpu_gpu_convert_clk_domain(args->clk_domain),
1120 &max_points, fpoints);
1121 if (err)
1122 goto fail;
1123
1124 entry = (struct nvgpu_gpu_clk_vf_point __user *)
1125 (uintptr_t)args->clk_vf_point_entries;
1126
1127 last_mhz = 0;
1128 num_points = 0;
1129 for (i = 0; (i < max_points) && !err; i++) {
1130
1131 /* filter out duplicate frequencies */
1132 if (fpoints[i] == last_mhz)
1133 continue;
1134
1135 /* filter out out-of-range frequencies */
1136 if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz))
1137 continue;
1138
1139 last_mhz = fpoints[i];
1140 clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]);
1141
1142 err = copy_to_user((void __user *)entry, &clk_point,
1143 sizeof(clk_point));
1144
1145 num_points++;
1146 entry++;
1147 }
1148
1149 args->num_entries = num_points;
1150
1151fail:
1152 nvgpu_kfree(g, fpoints);
1153 return err;
1154}
1155
1156static int nvgpu_gpu_clk_get_range(struct gk20a *g,
1157 struct gk20a_ctrl_priv *priv,
1158 struct nvgpu_gpu_clk_range_args *args)
1159{
1160 struct nvgpu_gpu_clk_range clk_range;
1161 struct nvgpu_gpu_clk_range __user *entry;
1162 struct nvgpu_clk_session *session = priv->clk_session;
1163
1164 u32 clk_domains = 0;
1165 u32 num_domains;
1166 u32 num_entries;
1167 u32 i;
1168 int bit;
1169 int err;
1170 u16 min_mhz, max_mhz;
1171
1172 nvgpu_log_fn(g, " ");
1173
1174 if (!session)
1175 return -EINVAL;
1176
1177 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1178 num_domains = hweight_long(clk_domains);
1179
1180 if (!args->flags) {
1181 if (!args->num_entries) {
1182 args->num_entries = num_domains;
1183 return 0;
1184 }
1185
1186 if (args->num_entries < num_domains)
1187 return -EINVAL;
1188
1189 args->num_entries = 0;
1190 num_entries = num_domains;
1191
1192 } else {
1193 if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
1194 return -EINVAL;
1195
1196 num_entries = args->num_entries;
1197 if (num_entries > num_domains)
1198 return -EINVAL;
1199 }
1200
1201 entry = (struct nvgpu_gpu_clk_range __user *)
1202 (uintptr_t)args->clk_range_entries;
1203
1204 for (i = 0; i < num_entries; i++, entry++) {
1205
1206 if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
1207 if (copy_from_user(&clk_range, (void __user *)entry,
1208 sizeof(clk_range)))
1209 return -EFAULT;
1210 } else {
1211 bit = ffs(clk_domains) - 1;
1212 clk_range.clk_domain = bit;
1213 clk_domains &= ~BIT(bit);
1214 }
1215
1216 clk_range.flags = 0;
1217 err = nvgpu_clk_arb_get_arbiter_clk_range(g,
1218 nvgpu_gpu_convert_clk_domain(clk_range.clk_domain),
1219 &min_mhz, &max_mhz);
1220 clk_range.min_hz = MHZ_TO_HZ(min_mhz);
1221 clk_range.max_hz = MHZ_TO_HZ(max_mhz);
1222
1223 if (err)
1224 return err;
1225
1226 err = copy_to_user(entry, &clk_range, sizeof(clk_range));
1227 if (err)
1228 return -EFAULT;
1229 }
1230
1231 args->num_entries = num_entries;
1232
1233 return 0;
1234}
1235
1236static int nvgpu_gpu_clk_set_info(struct gk20a *g,
1237 struct gk20a_ctrl_priv *priv,
1238 struct nvgpu_gpu_clk_set_info_args *args)
1239{
1240 struct nvgpu_gpu_clk_info clk_info;
1241 struct nvgpu_gpu_clk_info __user *entry;
1242 struct nvgpu_clk_session *session = priv->clk_session;
1243
1244 int fd;
1245 u32 clk_domains = 0;
1246 u16 freq_mhz;
1247 int i;
1248 int ret;
1249
1250 nvgpu_log_fn(g, " ");
1251
1252 if (!session || args->flags)
1253 return -EINVAL;
1254
1255 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1256 if (!clk_domains)
1257 return -EINVAL;
1258
1259 entry = (struct nvgpu_gpu_clk_info __user *)
1260 (uintptr_t)args->clk_info_entries;
1261
1262 for (i = 0; i < args->num_entries; i++, entry++) {
1263
1264 if (copy_from_user(&clk_info, entry, sizeof(clk_info)))
1265 return -EFAULT;
1266
1267 if (!nvgpu_clk_arb_is_valid_domain(g,
1268 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
1269 return -EINVAL;
1270 }
1271 nvgpu_speculation_barrier();
1272
1273 entry = (struct nvgpu_gpu_clk_info __user *)
1274 (uintptr_t)args->clk_info_entries;
1275
1276 ret = nvgpu_clk_arb_install_request_fd(g, session, &fd);
1277 if (ret < 0)
1278 return ret;
1279
1280 for (i = 0; i < args->num_entries; i++, entry++) {
1281
1282 if (copy_from_user(&clk_info, (void __user *)entry,
1283 sizeof(clk_info)))
1284 return -EFAULT;
1285 freq_mhz = HZ_TO_MHZ(clk_info.freq_hz);
1286
1287 nvgpu_clk_arb_set_session_target_mhz(session, fd,
1288 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
1289 }
1290
1291 nvgpu_speculation_barrier();
1292 ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
1293 if (ret < 0)
1294 return ret;
1295
1296 args->completion_fd = fd;
1297
1298 return ret;
1299}
1300
1301static int nvgpu_gpu_clk_get_info(struct gk20a *g,
1302 struct gk20a_ctrl_priv *priv,
1303 struct nvgpu_gpu_clk_get_info_args *args)
1304{
1305 struct nvgpu_gpu_clk_info clk_info;
1306 struct nvgpu_gpu_clk_info __user *entry;
1307 struct nvgpu_clk_session *session = priv->clk_session;
1308 u32 clk_domains = 0;
1309 u32 num_domains;
1310 u32 num_entries;
1311 u32 i;
1312 u16 freq_mhz;
1313 int err;
1314 int bit;
1315
1316 nvgpu_log_fn(g, " ");
1317
1318 if (!session)
1319 return -EINVAL;
1320
1321 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1322 num_domains = hweight_long(clk_domains);
1323
1324 if (!args->flags) {
1325 if (!args->num_entries) {
1326 args->num_entries = num_domains;
1327 return 0;
1328 }
1329
1330 if (args->num_entries < num_domains)
1331 return -EINVAL;
1332
1333 args->num_entries = 0;
1334 num_entries = num_domains;
1335
1336 } else {
1337 if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
1338 return -EINVAL;
1339
1340 num_entries = args->num_entries;
1341 if (num_entries > num_domains * 3)
1342 return -EINVAL;
1343 }
1344
1345 entry = (struct nvgpu_gpu_clk_info __user *)
1346 (uintptr_t)args->clk_info_entries;
1347
1348 for (i = 0; i < num_entries; i++, entry++) {
1349
1350 if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
1351 if (copy_from_user(&clk_info, (void __user *)entry,
1352 sizeof(clk_info)))
1353 return -EFAULT;
1354 } else {
1355 bit = ffs(clk_domains) - 1;
1356 clk_info.clk_domain = bit;
1357 clk_domains &= ~BIT(bit);
1358 clk_info.clk_type = args->clk_type;
1359 }
1360
1361 nvgpu_speculation_barrier();
1362 switch (clk_info.clk_type) {
1363 case NVGPU_GPU_CLK_TYPE_TARGET:
1364 err = nvgpu_clk_arb_get_session_target_mhz(session,
1365 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
1366 &freq_mhz);
1367 break;
1368 case NVGPU_GPU_CLK_TYPE_ACTUAL:
1369 err = nvgpu_clk_arb_get_arbiter_actual_mhz(g,
1370 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
1371 &freq_mhz);
1372 break;
1373 case NVGPU_GPU_CLK_TYPE_EFFECTIVE:
1374 err = nvgpu_clk_arb_get_arbiter_effective_mhz(g,
1375 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
1376 &freq_mhz);
1377 break;
1378 default:
1379 freq_mhz = 0;
1380 err = -EINVAL;
1381 break;
1382 }
1383 if (err)
1384 return err;
1385
1386 clk_info.flags = 0;
1387 clk_info.freq_hz = MHZ_TO_HZ(freq_mhz);
1388
1389 err = copy_to_user((void __user *)entry, &clk_info,
1390 sizeof(clk_info));
1391 if (err)
1392 return -EFAULT;
1393 }
1394
1395 nvgpu_speculation_barrier();
1396 args->num_entries = num_entries;
1397
1398 return 0;
1399}
1400
1401static int nvgpu_gpu_get_event_fd(struct gk20a *g,
1402 struct gk20a_ctrl_priv *priv,
1403 struct nvgpu_gpu_get_event_fd_args *args)
1404{
1405 struct nvgpu_clk_session *session = priv->clk_session;
1406
1407 nvgpu_log_fn(g, " ");
1408
1409 if (!session)
1410 return -EINVAL;
1411
1412 return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd,
1413 args->flags);
1414}
1415
1416static int nvgpu_gpu_get_voltage(struct gk20a *g,
1417 struct nvgpu_gpu_get_voltage_args *args)
1418{
1419 int err = -EINVAL;
1420
1421 nvgpu_log_fn(g, " ");
1422
1423 if (args->reserved)
1424 return -EINVAL;
1425
1426 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE))
1427 return -EINVAL;
1428
1429 err = gk20a_busy(g);
1430 if (err)
1431 return err;
1432
1433 nvgpu_speculation_barrier();
1434 switch (args->which) {
1435 case NVGPU_GPU_VOLTAGE_CORE:
1436 err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
1437 break;
1438 case NVGPU_GPU_VOLTAGE_SRAM:
1439 err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage);
1440 break;
1441 case NVGPU_GPU_VOLTAGE_BUS:
1442 err = pmgr_pwr_devices_get_voltage(g, &args->voltage);
1443 break;
1444 default:
1445 err = -EINVAL;
1446 }
1447
1448 gk20a_idle(g);
1449
1450 return err;
1451}
1452
1453static int nvgpu_gpu_get_current(struct gk20a *g,
1454 struct nvgpu_gpu_get_current_args *args)
1455{
1456 int err;
1457
1458 nvgpu_log_fn(g, " ");
1459
1460 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1461 return -EINVAL;
1462
1463 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT))
1464 return -EINVAL;
1465
1466 err = gk20a_busy(g);
1467 if (err)
1468 return err;
1469
1470 err = pmgr_pwr_devices_get_current(g, &args->currnt);
1471
1472 gk20a_idle(g);
1473
1474 return err;
1475}
1476
1477static int nvgpu_gpu_get_power(struct gk20a *g,
1478 struct nvgpu_gpu_get_power_args *args)
1479{
1480 int err;
1481
1482 nvgpu_log_fn(g, " ");
1483
1484 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1485 return -EINVAL;
1486
1487 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER))
1488 return -EINVAL;
1489
1490 err = gk20a_busy(g);
1491 if (err)
1492 return err;
1493
1494 err = pmgr_pwr_devices_get_power(g, &args->power);
1495
1496 gk20a_idle(g);
1497
1498 return err;
1499}
1500
1501static int nvgpu_gpu_get_temperature(struct gk20a *g,
1502 struct nvgpu_gpu_get_temperature_args *args)
1503{
1504 int err;
1505 u32 temp_f24_8;
1506
1507 nvgpu_log_fn(g, " ");
1508
1509 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1510 return -EINVAL;
1511
1512 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE))
1513 return -EINVAL;
1514
1515 if (!g->ops.therm.get_internal_sensor_curr_temp)
1516 return -EINVAL;
1517
1518 err = gk20a_busy(g);
1519 if (err)
1520 return err;
1521
1522 err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8);
1523
1524 gk20a_idle(g);
1525
1526 args->temp_f24_8 = (s32)temp_f24_8;
1527
1528 return err;
1529}
1530
1531static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
1532 struct nvgpu_gpu_set_therm_alert_limit_args *args)
1533{
1534 int err;
1535
1536 nvgpu_log_fn(g, " ");
1537
1538 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1539 return -EINVAL;
1540
1541 if (!g->ops.therm.configure_therm_alert)
1542 return -EINVAL;
1543
1544 err = gk20a_busy(g);
1545 if (err)
1546 return err;
1547
1548 err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8);
1549
1550 gk20a_idle(g);
1551
1552 return err;
1553}
1554
1555static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
1556 u32 flags)
1557{
1558 int err = 0;
1559 bool allow;
1560 bool disallow;
1561
1562 allow = flags &
1563 NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
1564
1565 disallow = flags &
1566 NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
1567
1568 /* Can't be both at the same time */
1569 if (allow && disallow)
1570 return -EINVAL;
1571
1572 /* Nothing to do */
1573 if (!allow && !disallow)
1574 return 0;
1575
1576 /*
1577 * Moving into explicit idle or back from it? A call that doesn't
1578 * change the status is a no-op.
1579 */
1580 if (!ch->deterministic_railgate_allowed &&
1581 allow) {
1582 gk20a_idle(ch->g);
1583 } else if (ch->deterministic_railgate_allowed &&
1584 !allow) {
1585 err = gk20a_busy(ch->g);
1586 if (err) {
1587 nvgpu_warn(ch->g,
1588 "cannot busy to restore deterministic ch");
1589 return err;
1590 }
1591 }
1592 ch->deterministic_railgate_allowed = allow;
1593
1594 return err;
1595}
1596
1597static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
1598{
1599 if (!ch->deterministic)
1600 return -EINVAL;
1601
1602 return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
1603}
1604
1605static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
1606 struct nvgpu_gpu_set_deterministic_opts_args *args)
1607{
1608 int __user *user_channels;
1609 u32 i = 0;
1610 int err = 0;
1611
1612 nvgpu_log_fn(g, " ");
1613
1614 user_channels = (int __user *)(uintptr_t)args->channels;
1615
1616 /* Upper limit; prevent holding deterministic_busy for long */
1617 if (args->num_channels > g->fifo.num_channels) {
1618 err = -EINVAL;
1619 goto out;
1620 }
1621
1622 /* Trivial sanity check first */
1623 if (!access_ok(VERIFY_READ, user_channels,
1624 args->num_channels * sizeof(int))) {
1625 err = -EFAULT;
1626 goto out;
1627 }
1628
1629 nvgpu_rwsem_down_read(&g->deterministic_busy);
1630
1631 /* note: we exit at the first failure */
1632 for (; i < args->num_channels; i++) {
1633 int ch_fd = 0;
1634 struct channel_gk20a *ch;
1635
1636 if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
1637 /* User raced with above access_ok */
1638 err = -EFAULT;
1639 break;
1640 }
1641
1642 ch = gk20a_get_channel_from_file(ch_fd);
1643 if (!ch) {
1644 err = -EINVAL;
1645 break;
1646 }
1647
1648 err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
1649
1650 gk20a_channel_put(ch);
1651
1652 if (err)
1653 break;
1654 }
1655
1656 nvgpu_speculation_barrier();
1657 nvgpu_rwsem_up_read(&g->deterministic_busy);
1658
1659out:
1660 args->num_channels = i;
1661 return err;
1662}
1663
1664long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1665{
1666 struct gk20a_ctrl_priv *priv = filp->private_data;
1667 struct gk20a *g = priv->g;
1668 struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
1669 struct nvgpu_gpu_zcull_get_info_args *get_info_args;
1670 struct nvgpu_gpu_zbc_set_table_args *set_table_args;
1671 struct nvgpu_gpu_zbc_query_table_args *query_table_args;
1672 u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
1673 struct gr_zcull_info *zcull_info;
1674 struct zbc_entry *zbc_val;
1675 struct zbc_query_params *zbc_tbl;
1676 int i, err = 0;
1677
1678 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
1679
1680 if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) ||
1681 (_IOC_NR(cmd) == 0) ||
1682 (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) ||
1683 (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE))
1684 return -EINVAL;
1685
1686 memset(buf, 0, sizeof(buf));
1687 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1688 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1689 return -EFAULT;
1690 }
1691
1692 if (!g->sw_ready) {
1693 err = gk20a_busy(g);
1694 if (err)
1695 return err;
1696
1697 gk20a_idle(g);
1698 }
1699
1700 nvgpu_speculation_barrier();
1701 switch (cmd) {
1702 case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
1703 get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
1704
1705 get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
1706
1707 break;
1708 case NVGPU_GPU_IOCTL_ZCULL_GET_INFO:
1709 get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf;
1710
1711 memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args));
1712
1713 zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info));
1714 if (zcull_info == NULL)
1715 return -ENOMEM;
1716
1717 err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
1718 if (err) {
1719 nvgpu_kfree(g, zcull_info);
1720 break;
1721 }
1722
1723 get_info_args->width_align_pixels = zcull_info->width_align_pixels;
1724 get_info_args->height_align_pixels = zcull_info->height_align_pixels;
1725 get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
1726 get_info_args->aliquot_total = zcull_info->aliquot_total;
1727 get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
1728 get_info_args->region_header_size = zcull_info->region_header_size;
1729 get_info_args->subregion_header_size = zcull_info->subregion_header_size;
1730 get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
1731 get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
1732 get_info_args->subregion_count = zcull_info->subregion_count;
1733
1734 nvgpu_kfree(g, zcull_info);
1735 break;
1736 case NVGPU_GPU_IOCTL_ZBC_SET_TABLE:
1737 set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf;
1738
1739 zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry));
1740 if (zbc_val == NULL)
1741 return -ENOMEM;
1742
1743 zbc_val->format = set_table_args->format;
1744 zbc_val->type = set_table_args->type;
1745
1746 nvgpu_speculation_barrier();
1747 switch (zbc_val->type) {
1748 case GK20A_ZBC_TYPE_COLOR:
1749 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
1750 zbc_val->color_ds[i] = set_table_args->color_ds[i];
1751 zbc_val->color_l2[i] = set_table_args->color_l2[i];
1752 }
1753 break;
1754 case GK20A_ZBC_TYPE_DEPTH:
1755 case T19X_ZBC:
1756 zbc_val->depth = set_table_args->depth;
1757 break;
1758 default:
1759 err = -EINVAL;
1760 }
1761
1762 if (!err) {
1763 err = gk20a_busy(g);
1764 if (!err) {
1765 err = g->ops.gr.zbc_set_table(g, &g->gr,
1766 zbc_val);
1767 gk20a_idle(g);
1768 }
1769 }
1770
1771 if (zbc_val)
1772 nvgpu_kfree(g, zbc_val);
1773 break;
1774 case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE:
1775 query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf;
1776
1777 zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params));
1778 if (zbc_tbl == NULL)
1779 return -ENOMEM;
1780
1781 zbc_tbl->type = query_table_args->type;
1782 zbc_tbl->index_size = query_table_args->index_size;
1783
1784 err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl);
1785
1786 if (!err) {
1787 switch (zbc_tbl->type) {
1788 case GK20A_ZBC_TYPE_COLOR:
1789 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
1790 query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
1791 query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
1792 }
1793 break;
1794 case GK20A_ZBC_TYPE_DEPTH:
1795 case T19X_ZBC:
1796 query_table_args->depth = zbc_tbl->depth;
1797 break;
1798 case GK20A_ZBC_TYPE_INVALID:
1799 query_table_args->index_size = zbc_tbl->index_size;
1800 break;
1801 default:
1802 err = -EINVAL;
1803 }
1804 if (!err) {
1805 query_table_args->format = zbc_tbl->format;
1806 query_table_args->ref_cnt = zbc_tbl->ref_cnt;
1807 }
1808 }
1809
1810 if (zbc_tbl)
1811 nvgpu_kfree(g, zbc_tbl);
1812 break;
1813
1814 case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS:
1815 err = gk20a_ctrl_ioctl_gpu_characteristics(
1816 g, (struct nvgpu_gpu_get_characteristics *)buf);
1817 break;
1818 case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ:
1819 err = gk20a_ctrl_prepare_compressible_read(g,
1820 (struct nvgpu_gpu_prepare_compressible_read_args *)buf);
1821 break;
1822 case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE:
1823 err = gk20a_ctrl_mark_compressible_write(g,
1824 (struct nvgpu_gpu_mark_compressible_write_args *)buf);
1825 break;
1826 case NVGPU_GPU_IOCTL_ALLOC_AS:
1827 err = gk20a_ctrl_alloc_as(g,
1828 (struct nvgpu_alloc_as_args *)buf);
1829 break;
1830 case NVGPU_GPU_IOCTL_OPEN_TSG:
1831 err = gk20a_ctrl_open_tsg(g,
1832 (struct nvgpu_gpu_open_tsg_args *)buf);
1833 break;
1834 case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
1835 err = gk20a_ctrl_get_tpc_masks(g,
1836 (struct nvgpu_gpu_get_tpc_masks_args *)buf);
1837 break;
1838 case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS:
1839 err = gk20a_ctrl_get_fbp_l2_masks(g,
1840 (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf);
1841 break;
1842 case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
1843 /* this arg type here, but ..gpu_open_channel_args in nvgpu.h
1844 * for consistency - they are the same */
1845 err = gk20a_channel_open_ioctl(g,
1846 (struct nvgpu_channel_open_args *)buf);
1847 break;
1848 case NVGPU_GPU_IOCTL_FLUSH_L2:
1849 err = nvgpu_gpu_ioctl_l2_fb_ops(g,
1850 (struct nvgpu_gpu_l2_fb_args *)buf);
1851 break;
1852
1853 case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
1854 err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
1855 (struct nvgpu_gpu_mmu_debug_mode_args *)buf);
1856 break;
1857
1858 case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
1859 err = gr_gk20a_elpg_protected_call(g,
1860 nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
1861 break;
1862
1863 case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND:
1864 err = nvgpu_gpu_ioctl_trigger_suspend(g);
1865 break;
1866
1867 case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
1868 err = nvgpu_gpu_ioctl_wait_for_pause(g,
1869 (struct nvgpu_gpu_wait_pause_args *)buf);
1870 break;
1871
1872 case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE:
1873 err = nvgpu_gpu_ioctl_resume_from_pause(g);
1874 break;
1875
1876 case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS:
1877 err = nvgpu_gpu_ioctl_clear_sm_errors(g);
1878 break;
1879
1880 case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
1881 err = nvgpu_gpu_ioctl_has_any_exception(g,
1882 (struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
1883 break;
1884
1885 case NVGPU_GPU_IOCTL_NUM_VSMS:
1886 err = gk20a_ctrl_get_num_vsms(g,
1887 (struct nvgpu_gpu_num_vsms *)buf);
1888 break;
1889 case NVGPU_GPU_IOCTL_VSMS_MAPPING:
1890 err = gk20a_ctrl_vsm_mapping(g,
1891 (struct nvgpu_gpu_vsms_mapping *)buf);
1892 break;
1893
1894 case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO:
1895 err = nvgpu_gpu_get_cpu_time_correlation_info(g,
1896 (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf);
1897 break;
1898
1899 case NVGPU_GPU_IOCTL_GET_GPU_TIME:
1900 err = nvgpu_gpu_get_gpu_time(g,
1901 (struct nvgpu_gpu_get_gpu_time_args *)buf);
1902 break;
1903
1904 case NVGPU_GPU_IOCTL_GET_ENGINE_INFO:
1905 err = nvgpu_gpu_get_engine_info(g,
1906 (struct nvgpu_gpu_get_engine_info_args *)buf);
1907 break;
1908
1909 case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
1910 err = nvgpu_gpu_alloc_vidmem(g,
1911 (struct nvgpu_gpu_alloc_vidmem_args *)buf);
1912 break;
1913
1914 case NVGPU_GPU_IOCTL_GET_MEMORY_STATE:
1915 err = nvgpu_gpu_get_memory_state(g,
1916 (struct nvgpu_gpu_get_memory_state_args *)buf);
1917 break;
1918
1919 case NVGPU_GPU_IOCTL_CLK_GET_RANGE:
1920 err = nvgpu_gpu_clk_get_range(g, priv,
1921 (struct nvgpu_gpu_clk_range_args *)buf);
1922 break;
1923
1924 case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS:
1925 err = nvgpu_gpu_clk_get_vf_points(g, priv,
1926 (struct nvgpu_gpu_clk_vf_points_args *)buf);
1927 break;
1928
1929 case NVGPU_GPU_IOCTL_CLK_SET_INFO:
1930 err = nvgpu_gpu_clk_set_info(g, priv,
1931 (struct nvgpu_gpu_clk_set_info_args *)buf);
1932 break;
1933
1934 case NVGPU_GPU_IOCTL_CLK_GET_INFO:
1935 err = nvgpu_gpu_clk_get_info(g, priv,
1936 (struct nvgpu_gpu_clk_get_info_args *)buf);
1937 break;
1938
1939 case NVGPU_GPU_IOCTL_GET_EVENT_FD:
1940 err = nvgpu_gpu_get_event_fd(g, priv,
1941 (struct nvgpu_gpu_get_event_fd_args *)buf);
1942 break;
1943
1944 case NVGPU_GPU_IOCTL_GET_VOLTAGE:
1945 err = nvgpu_gpu_get_voltage(g,
1946 (struct nvgpu_gpu_get_voltage_args *)buf);
1947 break;
1948
1949 case NVGPU_GPU_IOCTL_GET_CURRENT:
1950 err = nvgpu_gpu_get_current(g,
1951 (struct nvgpu_gpu_get_current_args *)buf);
1952 break;
1953
1954 case NVGPU_GPU_IOCTL_GET_POWER:
1955 err = nvgpu_gpu_get_power(g,
1956 (struct nvgpu_gpu_get_power_args *)buf);
1957 break;
1958
1959 case NVGPU_GPU_IOCTL_GET_TEMPERATURE:
1960 err = nvgpu_gpu_get_temperature(g,
1961 (struct nvgpu_gpu_get_temperature_args *)buf);
1962 break;
1963
1964 case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT:
1965 err = nvgpu_gpu_set_therm_alert_limit(g,
1966 (struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
1967 break;
1968
1969 case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
1970 err = nvgpu_gpu_set_deterministic_opts(g,
1971 (struct nvgpu_gpu_set_deterministic_opts_args *)buf);
1972 break;
1973
1974 default:
1975 nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
1976 err = -ENOTTY;
1977 break;
1978 }
1979
1980 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
1981 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
1982
1983 return err;
1984}
1985
1986static void usermode_vma_close(struct vm_area_struct *vma)
1987{
1988 struct gk20a_ctrl_priv *priv = vma->vm_private_data;
1989 struct gk20a *g = priv->g;
1990 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
1991
1992 nvgpu_mutex_acquire(&l->ctrl.privs_lock);
1993 priv->usermode_vma.vma = NULL;
1994 priv->usermode_vma.vma_mapped = false;
1995 nvgpu_mutex_release(&l->ctrl.privs_lock);
1996}
1997
1998struct vm_operations_struct usermode_vma_ops = {
1999 /* no .open - we use VM_DONTCOPY and don't support fork */
2000 .close = usermode_vma_close,
2001};
2002
2003int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma)
2004{
2005 struct gk20a_ctrl_priv *priv = filp->private_data;
2006 struct gk20a *g = priv->g;
2007 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
2008 u64 addr;
2009 int err;
2010
2011 if (g->ops.fifo.usermode_base == NULL)
2012 return -ENOSYS;
2013
2014 if (priv->usermode_vma.vma != NULL)
2015 return -EBUSY;
2016
2017 if (vma->vm_end - vma->vm_start != SZ_4K)
2018 return -EINVAL;
2019
2020 if (vma->vm_pgoff != 0UL)
2021 return -EINVAL;
2022
2023 addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g);
2024
2025 /* Sync with poweron/poweroff, and require valid regs */
2026 err = gk20a_busy(g);
2027 if (err) {
2028 return err;
2029 }
2030
2031 nvgpu_mutex_acquire(&l->ctrl.privs_lock);
2032
2033 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
2034 VM_DONTDUMP | VM_PFNMAP;
2035 vma->vm_ops = &usermode_vma_ops;
2036 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
2037
2038 err = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
2039 vma->vm_end - vma->vm_start, vma->vm_page_prot);
2040 if (!err) {
2041 priv->usermode_vma.vma = vma;
2042 vma->vm_private_data = priv;
2043 priv->usermode_vma.vma_mapped = true;
2044 }
2045 nvgpu_mutex_release(&l->ctrl.privs_lock);
2046
2047 gk20a_idle(g);
2048
2049 return err;
2050}
2051
2052static int alter_usermode_mapping(struct gk20a *g,
2053 struct gk20a_ctrl_priv *priv,
2054 bool poweroff)
2055{
2056 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
2057 struct vm_area_struct *vma = priv->usermode_vma.vma;
2058 bool vma_mapped = priv->usermode_vma.vma_mapped;
2059 u64 addr;
2060 int err = 0;
2061
2062 if (!vma) {
2063 /* Nothing to do - no mmap called */
2064 return 0;
2065 }
2066
2067 addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g);
2068
2069 /*
2070 * This is a no-op for the below cases
2071 * a) poweroff and !vma_mapped - > do nothing as no map exists
2072 * b) !poweroff and vmap_mapped -> do nothing as already mapped
2073 */
2074 if (poweroff != vma_mapped) {
2075 return 0;
2076 }
2077
2078 /*
2079 * We use trylock due to lock inversion: we need to acquire
2080 * mmap_lock while holding ctrl_privs_lock. usermode_vma_close
2081 * does it in reverse order. Trylock is a way to avoid deadlock.
2082 */
2083 if (!down_write_trylock(&vma->vm_mm->mmap_sem)) {
2084 return -EBUSY;
2085 }
2086
2087 if (poweroff) {
2088 err = zap_vma_ptes(vma, vma->vm_start, SZ_4K);
2089 if (err == 0) {
2090 priv->usermode_vma.vma_mapped = false;
2091 } else {
2092 nvgpu_err(g, "can't remove usermode mapping");
2093 }
2094 } else {
2095 err = io_remap_pfn_range(vma, vma->vm_start,
2096 addr >> PAGE_SHIFT,
2097 SZ_4K, vma->vm_page_prot);
2098 if (err != 0) {
2099 nvgpu_err(g, "can't restore usermode mapping");
2100 } else {
2101 priv->usermode_vma.vma_mapped = true;
2102 }
2103 }
2104
2105 up_write(&vma->vm_mm->mmap_sem);
2106
2107 return err;
2108}
2109
2110static void alter_usermode_mappings(struct gk20a *g, bool poweroff)
2111{
2112 struct gk20a_ctrl_priv *priv;
2113 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
2114 int err = 0;
2115
2116 do {
2117 nvgpu_mutex_acquire(&l->ctrl.privs_lock);
2118 nvgpu_list_for_each_entry(priv, &l->ctrl.privs,
2119 gk20a_ctrl_priv, list) {
2120 err = alter_usermode_mapping(g, priv, poweroff);
2121 if (err != 0) {
2122 break;
2123 }
2124 }
2125 nvgpu_mutex_release(&l->ctrl.privs_lock);
2126
2127 if (err == -EBUSY) {
2128 nvgpu_log_info(g, "ctrl_privs_lock lock contended. retry altering usermode mappings");
2129 nvgpu_udelay(10);
2130 } else if (err != 0) {
2131 nvgpu_err(g, "can't alter usermode mapping. err = %d", err);
2132 }
2133 } while (err == -EBUSY);
2134}
2135
2136void nvgpu_hide_usermode_for_poweroff(struct gk20a *g)
2137{
2138 alter_usermode_mappings(g, true);
2139}
2140
2141void nvgpu_restore_usermode_for_poweron(struct gk20a *g)
2142{
2143 alter_usermode_mappings(g, false);
2144}
diff --git a/include/os/linux/ioctl_ctrl.h b/include/os/linux/ioctl_ctrl.h
deleted file mode 100644
index 3e1f798..0000000
--- a/include/os/linux/ioctl_ctrl.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_IOCTL_CTRL_H__
17#define __NVGPU_IOCTL_CTRL_H__
18
19int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
20int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
21long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
22int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma);
23
24void nvgpu_hide_usermode_for_poweroff(struct gk20a *g);
25void nvgpu_restore_usermode_for_poweron(struct gk20a *g);
26
27#endif
diff --git a/include/os/linux/ioctl_dbg.c b/include/os/linux/ioctl_dbg.c
deleted file mode 100644
index b5a1071..0000000
--- a/include/os/linux/ioctl_dbg.c
+++ /dev/null
@@ -1,2210 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver
3 *
4 * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/fs.h>
20#include <linux/file.h>
21#include <linux/cdev.h>
22#include <linux/uaccess.h>
23#include <linux/dma-buf.h>
24#include <linux/poll.h>
25#include <uapi/linux/nvgpu.h>
26
27#include <nvgpu/kmem.h>
28#include <nvgpu/log.h>
29#include <nvgpu/vm.h>
30#include <nvgpu/atomic.h>
31#include <nvgpu/cond.h>
32#include <nvgpu/utils.h>
33#include <nvgpu/gk20a.h>
34#include <nvgpu/channel.h>
35#include <nvgpu/tsg.h>
36
37#include <nvgpu/linux/vm.h>
38
39#include "gk20a/gr_gk20a.h"
40#include "gk20a/regops_gk20a.h"
41#include "gk20a/dbg_gpu_gk20a.h"
42#include "os_linux.h"
43#include "platform_gk20a.h"
44#include "ioctl_dbg.h"
45#include "ioctl_channel.h"
46#include "dmabuf_vidmem.h"
47
48struct dbg_session_gk20a_linux {
49 struct device *dev;
50 struct dbg_session_gk20a dbg_s;
51};
52
53struct dbg_session_channel_data_linux {
54 /*
55 * We have to keep a ref to the _file_, not the channel, because
56 * close(channel_fd) is synchronous and would deadlock if we had an
57 * open debug session fd holding a channel ref at that time. Holding a
58 * ref to the file makes close(channel_fd) just drop a kernel ref to
59 * the file; the channel will close when the last file ref is dropped.
60 */
61 struct file *ch_f;
62 struct dbg_session_channel_data ch_data;
63};
64/* turn seriously unwieldy names -> something shorter */
65#define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x
66
67/* silly allocator - just increment id */
68static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
69static int generate_unique_id(void)
70{
71 return nvgpu_atomic_add_return(1, &unique_id);
72}
73
74static int alloc_profiler(struct gk20a *g,
75 struct dbg_profiler_object_data **_prof)
76{
77 struct dbg_profiler_object_data *prof;
78 *_prof = NULL;
79
80 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
81
82 prof = nvgpu_kzalloc(g, sizeof(*prof));
83 if (!prof)
84 return -ENOMEM;
85
86 prof->prof_handle = generate_unique_id();
87 *_prof = prof;
88 return 0;
89}
90
91static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux)
92{
93 struct dbg_session_gk20a_linux *dbg_s_linux;
94 *_dbg_s_linux = NULL;
95
96 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
97
98 dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux));
99 if (!dbg_s_linux)
100 return -ENOMEM;
101
102 dbg_s_linux->dbg_s.id = generate_unique_id();
103 *_dbg_s_linux = dbg_s_linux;
104 return 0;
105}
106
107static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
108
109static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
110 struct nvgpu_dbg_gpu_exec_reg_ops_args *args);
111
112static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
113 struct nvgpu_dbg_gpu_powergate_args *args);
114
115static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
116 struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
117
118static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
119 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
120
121static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
122 struct dbg_session_gk20a *dbg_s,
123 struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args);
124
125static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
126 struct dbg_session_gk20a *dbg_s,
127 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
128
129static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s,
130 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
131
132static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux,
133 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
134
135static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
136 struct nvgpu_dbg_gpu_profiler_reserve_args *args);
137
138static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
139 struct nvgpu_dbg_gpu_perfbuf_map_args *args);
140
141static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
142 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
143
144static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
145 int timeout_mode);
146
147static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
148 u32 profiler_handle);
149
150static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s);
151
152static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s);
153
154static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
155 u32 profiler_handle);
156
157static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
158
159static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
160 struct file *filp, bool is_profiler);
161
162static int nvgpu_set_sm_exception_type_mask_locked(
163 struct dbg_session_gk20a *dbg_s,
164 u32 exception_mask);
165
166unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
167{
168 unsigned int mask = 0;
169 struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data;
170 struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
171 struct gk20a *g = dbg_s->g;
172
173 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
174
175 poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait);
176
177 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
178
179 if (dbg_s->dbg_events.events_enabled &&
180 dbg_s->dbg_events.num_pending_events > 0) {
181 nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d",
182 dbg_s->id);
183 nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
184 dbg_s->dbg_events.num_pending_events);
185 mask = (POLLPRI | POLLIN);
186 }
187
188 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
189
190 return mask;
191}
192
193int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
194{
195 struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data;
196 struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
197 struct gk20a *g = dbg_s->g;
198 struct dbg_profiler_object_data *prof_obj, *tmp_obj;
199
200 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name);
201
202 /* unbind channels */
203 dbg_unbind_all_channels_gk20a(dbg_s);
204
205 /* Powergate/Timeout enable is called here as possibility of dbg_session
206 * which called powergate/timeout disable ioctl, to be killed without
207 * calling powergate/timeout enable ioctl
208 */
209 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
210 if (dbg_s->is_pg_disabled) {
211 nvgpu_set_powergate_locked(dbg_s, false);
212 }
213 nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
214
215 /* If this session owned the perf buffer, release it */
216 if (g->perfbuf.owner == dbg_s)
217 gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
218
219 /* Per-context profiler objects were released when we called
220 * dbg_unbind_all_channels. We could still have global ones.
221 */
222 nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
223 dbg_profiler_object_data, prof_obj_entry) {
224 if (prof_obj->session_id == dbg_s->id) {
225 if (prof_obj->has_reservation)
226 g->ops.dbg_session_ops.
227 release_profiler_reservation(dbg_s, prof_obj);
228 nvgpu_list_del(&prof_obj->prof_obj_entry);
229 nvgpu_kfree(g, prof_obj);
230 }
231 }
232 nvgpu_mutex_release(&g->dbg_sessions_lock);
233
234 nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
235 nvgpu_mutex_destroy(&dbg_s->ioctl_lock);
236
237 nvgpu_kfree(g, dbg_session_linux);
238 gk20a_put(g);
239
240 return 0;
241}
242
243int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
244{
245 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
246 struct nvgpu_os_linux, prof.cdev);
247 struct gk20a *g = &l->g;
248
249 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
250 return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
251}
252
253static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
254 struct nvgpu_dbg_gpu_timeout_args *args)
255{
256 int err;
257 struct gk20a *g = dbg_s->g;
258
259 nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable);
260
261 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
262 err = nvgpu_dbg_timeout_enable(dbg_s, args->enable);
263 nvgpu_mutex_release(&g->dbg_sessions_lock);
264
265 return err;
266}
267
268static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
269 struct dbg_session_gk20a *dbg_s,
270 struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
271{
272 struct gk20a *g = dbg_s->g;
273 struct gr_gk20a *gr = &g->gr;
274 struct nvgpu_tsg_sm_error_state *sm_error_state;
275 struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
276 struct channel_gk20a *ch;
277 struct tsg_gk20a *tsg;
278 u32 sm_id;
279 int err = 0;
280
281 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
282 if (ch == NULL) {
283 return -EINVAL;
284 }
285
286 tsg = tsg_gk20a_from_ch(ch);
287 if (tsg == NULL) {
288 nvgpu_err(g, "no valid tsg from ch");
289 return -EINVAL;
290 }
291
292 sm_id = args->sm_id;
293 if (sm_id >= gr->no_of_sm) {
294 return -EINVAL;
295 }
296
297 if (tsg->sm_error_states == NULL) {
298 return -EINVAL;
299 }
300
301 nvgpu_speculation_barrier();
302
303 sm_error_state = tsg->sm_error_states + sm_id;
304 sm_error_state_record.hww_global_esr =
305 sm_error_state->hww_global_esr;
306 sm_error_state_record.hww_warp_esr =
307 sm_error_state->hww_warp_esr;
308 sm_error_state_record.hww_warp_esr_pc =
309 sm_error_state->hww_warp_esr_pc;
310 sm_error_state_record.hww_global_esr_report_mask =
311 sm_error_state->hww_global_esr_report_mask;
312 sm_error_state_record.hww_warp_esr_report_mask =
313 sm_error_state->hww_warp_esr_report_mask;
314
315 if (args->sm_error_state_record_size > 0) {
316 size_t write_size = sizeof(*sm_error_state);
317
318 nvgpu_speculation_barrier();
319 if (write_size > args->sm_error_state_record_size)
320 write_size = args->sm_error_state_record_size;
321
322 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
323 err = copy_to_user((void __user *)(uintptr_t)
324 args->sm_error_state_record_mem,
325 &sm_error_state_record,
326 write_size);
327 nvgpu_mutex_release(&g->dbg_sessions_lock);
328 if (err != 0) {
329 nvgpu_err(g, "copy_to_user failed!");
330 return err;
331 }
332
333 args->sm_error_state_record_size = write_size;
334 }
335
336 return 0;
337}
338
339
340static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
341 struct dbg_session_gk20a *dbg_s,
342 struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args)
343{
344 struct gk20a *g = dbg_s->g;
345
346 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
347
348 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
349
350 dbg_s->broadcast_stop_trigger = (args->broadcast != 0);
351
352 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
353
354 return 0;
355}
356
357static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
358 int timeout_mode)
359{
360 struct gk20a *g = dbg_s->g;
361 int err = 0;
362
363 nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
364 timeout_mode);
365
366 nvgpu_speculation_barrier();
367 switch (timeout_mode) {
368 case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
369 if (dbg_s->is_timeout_disabled == true)
370 nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
371 dbg_s->is_timeout_disabled = false;
372 break;
373
374 case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE:
375 if (dbg_s->is_timeout_disabled == false)
376 nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
377 dbg_s->is_timeout_disabled = true;
378 break;
379
380 default:
381 nvgpu_err(g,
382 "unrecognized dbg gpu timeout mode : 0x%x",
383 timeout_mode);
384 err = -EINVAL;
385 break;
386 }
387
388 if (!err)
389 nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, "
390 "timeouts disabled refcount %d",
391 dbg_s->is_timeout_disabled ? "true" : "false",
392 nvgpu_atomic_read(&g->timeouts_disabled_refcount));
393 return err;
394}
395
396static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
397 struct file *filp, bool is_profiler)
398{
399 struct nvgpu_os_linux *l;
400 struct dbg_session_gk20a_linux *dbg_session_linux;
401 struct dbg_session_gk20a *dbg_s;
402 struct gk20a *g;
403
404 struct device *dev;
405
406 int err;
407
408 if (!is_profiler)
409 l = container_of(inode->i_cdev,
410 struct nvgpu_os_linux, dbg.cdev);
411 else
412 l = container_of(inode->i_cdev,
413 struct nvgpu_os_linux, prof.cdev);
414 g = gk20a_get(&l->g);
415 if (!g)
416 return -ENODEV;
417
418 dev = dev_from_gk20a(g);
419
420 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name);
421
422 err = alloc_session(g, &dbg_session_linux);
423 if (err)
424 goto free_ref;
425
426 dbg_s = &dbg_session_linux->dbg_s;
427
428 filp->private_data = dbg_session_linux;
429 dbg_session_linux->dev = dev;
430 dbg_s->g = g;
431 dbg_s->is_profiler = is_profiler;
432 dbg_s->is_pg_disabled = false;
433 dbg_s->is_timeout_disabled = false;
434
435 nvgpu_cond_init(&dbg_s->dbg_events.wait_queue);
436 nvgpu_init_list_node(&dbg_s->ch_list);
437 err = nvgpu_mutex_init(&dbg_s->ch_list_lock);
438 if (err)
439 goto err_free_session;
440 err = nvgpu_mutex_init(&dbg_s->ioctl_lock);
441 if (err)
442 goto err_destroy_lock;
443 dbg_s->dbg_events.events_enabled = false;
444 dbg_s->dbg_events.num_pending_events = 0;
445
446 return 0;
447
448err_destroy_lock:
449 nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
450err_free_session:
451 nvgpu_kfree(g, dbg_session_linux);
452free_ref:
453 gk20a_put(g);
454 return err;
455}
456
457void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
458{
459 nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue);
460}
461
462static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
463 struct dbg_session_channel_data *ch_data)
464{
465 struct gk20a *g = dbg_s->g;
466 u32 chid;
467 struct dbg_session_data *session_data;
468 struct dbg_profiler_object_data *prof_obj, *tmp_obj;
469 struct dbg_session_channel_data_linux *ch_data_linux;
470
471 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
472
473 chid = ch_data->chid;
474
475 /* If there's a profiler ctx reservation record associated with this
476 * session/channel pair, release it.
477 */
478 nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
479 dbg_profiler_object_data, prof_obj_entry) {
480 if ((prof_obj->session_id == dbg_s->id) &&
481 (prof_obj->ch->chid == chid)) {
482 if (prof_obj->has_reservation) {
483 g->ops.dbg_session_ops.
484 release_profiler_reservation(dbg_s, prof_obj);
485 }
486 nvgpu_list_del(&prof_obj->prof_obj_entry);
487 nvgpu_kfree(g, prof_obj);
488 }
489 }
490
491 nvgpu_list_del(&ch_data->ch_entry);
492
493 session_data = ch_data->session_data;
494 nvgpu_list_del(&session_data->dbg_s_entry);
495 nvgpu_kfree(dbg_s->g, session_data);
496
497 ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux,
498 ch_data);
499
500 fput(ch_data_linux->ch_f);
501 nvgpu_kfree(dbg_s->g, ch_data_linux);
502
503 return 0;
504}
505
506static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
507 struct nvgpu_dbg_gpu_bind_channel_args *args)
508{
509 struct file *f;
510 struct gk20a *g = dbg_s->g;
511 struct channel_gk20a *ch;
512 struct dbg_session_channel_data_linux *ch_data_linux;
513 struct dbg_session_data *session_data;
514 int err = 0;
515
516 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
517 g->name, args->channel_fd);
518
519 /*
520 * Although gk20a_get_channel_from_file gives us a channel ref, need to
521 * hold a ref to the file during the session lifetime. See comment in
522 * struct dbg_session_channel_data.
523 */
524 f = fget(args->channel_fd);
525 if (!f)
526 return -ENODEV;
527
528 ch = gk20a_get_channel_from_file(args->channel_fd);
529 if (!ch) {
530 nvgpu_log_fn(g, "no channel found for fd");
531 err = -EINVAL;
532 goto out_fput;
533 }
534
535 nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid);
536
537 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
538 nvgpu_mutex_acquire(&ch->dbg_s_lock);
539
540 ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux));
541 if (!ch_data_linux) {
542 err = -ENOMEM;
543 goto out_chput;
544 }
545 ch_data_linux->ch_f = f;
546 ch_data_linux->ch_data.channel_fd = args->channel_fd;
547 ch_data_linux->ch_data.chid = ch->chid;
548 ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a;
549 nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry);
550
551 session_data = nvgpu_kzalloc(g, sizeof(*session_data));
552 if (!session_data) {
553 err = -ENOMEM;
554 goto out_kfree;
555 }
556 session_data->dbg_s = dbg_s;
557 nvgpu_init_list_node(&session_data->dbg_s_entry);
558 ch_data_linux->ch_data.session_data = session_data;
559
560 nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list);
561
562 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
563 nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list);
564 nvgpu_mutex_release(&dbg_s->ch_list_lock);
565
566 nvgpu_mutex_release(&ch->dbg_s_lock);
567 nvgpu_mutex_release(&g->dbg_sessions_lock);
568
569 gk20a_channel_put(ch);
570
571 return 0;
572
573out_kfree:
574 nvgpu_kfree(g, ch_data_linux);
575out_chput:
576 gk20a_channel_put(ch);
577 nvgpu_mutex_release(&ch->dbg_s_lock);
578 nvgpu_mutex_release(&g->dbg_sessions_lock);
579out_fput:
580 fput(f);
581 return err;
582}
583
584static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
585{
586 struct dbg_session_channel_data *ch_data, *tmp;
587 struct gk20a *g = dbg_s->g;
588
589 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
590 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
591 nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
592 dbg_session_channel_data, ch_entry)
593 ch_data->unbind_single_channel(dbg_s, ch_data);
594 nvgpu_mutex_release(&dbg_s->ch_list_lock);
595 nvgpu_mutex_release(&g->dbg_sessions_lock);
596
597 return 0;
598}
599
600/*
601 * Convert common regops op values of the form of NVGPU_DBG_REG_OP_*
602 * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
603 */
604static u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
605{
606 switch (regops_op) {
607 case REGOP(READ_32):
608 return REGOP_LINUX(READ_32);
609 case REGOP(WRITE_32):
610 return REGOP_LINUX(WRITE_32);
611 case REGOP(READ_64):
612 return REGOP_LINUX(READ_64);
613 case REGOP(WRITE_64):
614 return REGOP_LINUX(WRITE_64);
615 case REGOP(READ_08):
616 return REGOP_LINUX(READ_08);
617 case REGOP(WRITE_08):
618 return REGOP_LINUX(WRITE_08);
619 }
620
621 return regops_op;
622}
623
624/*
625 * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
626 * into common regops op values of the form of NVGPU_DBG_REG_OP_*
627 */
628static u32 nvgpu_get_regops_op_values_common(u32 regops_op)
629{
630 switch (regops_op) {
631 case REGOP_LINUX(READ_32):
632 return REGOP(READ_32);
633 case REGOP_LINUX(WRITE_32):
634 return REGOP(WRITE_32);
635 case REGOP_LINUX(READ_64):
636 return REGOP(READ_64);
637 case REGOP_LINUX(WRITE_64):
638 return REGOP(WRITE_64);
639 case REGOP_LINUX(READ_08):
640 return REGOP(READ_08);
641 case REGOP_LINUX(WRITE_08):
642 return REGOP(WRITE_08);
643 }
644
645 return regops_op;
646}
647
648/*
649 * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
650 * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
651 */
652static u32 nvgpu_get_regops_type_values_linux(u32 regops_type)
653{
654 switch (regops_type) {
655 case REGOP(TYPE_GLOBAL):
656 return REGOP_LINUX(TYPE_GLOBAL);
657 case REGOP(TYPE_GR_CTX):
658 return REGOP_LINUX(TYPE_GR_CTX);
659 case REGOP(TYPE_GR_CTX_TPC):
660 return REGOP_LINUX(TYPE_GR_CTX_TPC);
661 case REGOP(TYPE_GR_CTX_SM):
662 return REGOP_LINUX(TYPE_GR_CTX_SM);
663 case REGOP(TYPE_GR_CTX_CROP):
664 return REGOP_LINUX(TYPE_GR_CTX_CROP);
665 case REGOP(TYPE_GR_CTX_ZROP):
666 return REGOP_LINUX(TYPE_GR_CTX_ZROP);
667 case REGOP(TYPE_GR_CTX_QUAD):
668 return REGOP_LINUX(TYPE_GR_CTX_QUAD);
669 }
670
671 return regops_type;
672}
673
674/*
675 * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
676 * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
677 */
678static u32 nvgpu_get_regops_type_values_common(u32 regops_type)
679{
680 switch (regops_type) {
681 case REGOP_LINUX(TYPE_GLOBAL):
682 return REGOP(TYPE_GLOBAL);
683 case REGOP_LINUX(TYPE_GR_CTX):
684 return REGOP(TYPE_GR_CTX);
685 case REGOP_LINUX(TYPE_GR_CTX_TPC):
686 return REGOP(TYPE_GR_CTX_TPC);
687 case REGOP_LINUX(TYPE_GR_CTX_SM):
688 return REGOP(TYPE_GR_CTX_SM);
689 case REGOP_LINUX(TYPE_GR_CTX_CROP):
690 return REGOP(TYPE_GR_CTX_CROP);
691 case REGOP_LINUX(TYPE_GR_CTX_ZROP):
692 return REGOP(TYPE_GR_CTX_ZROP);
693 case REGOP_LINUX(TYPE_GR_CTX_QUAD):
694 return REGOP(TYPE_GR_CTX_QUAD);
695 }
696
697 return regops_type;
698}
699
700/*
701 * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_*
702 * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
703 */
704static u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
705{
706 switch (regops_status) {
707 case REGOP(STATUS_SUCCESS):
708 return REGOP_LINUX(STATUS_SUCCESS);
709 case REGOP(STATUS_INVALID_OP):
710 return REGOP_LINUX(STATUS_INVALID_OP);
711 case REGOP(STATUS_INVALID_TYPE):
712 return REGOP_LINUX(STATUS_INVALID_TYPE);
713 case REGOP(STATUS_INVALID_OFFSET):
714 return REGOP_LINUX(STATUS_INVALID_OFFSET);
715 case REGOP(STATUS_UNSUPPORTED_OP):
716 return REGOP_LINUX(STATUS_UNSUPPORTED_OP);
717 case REGOP(STATUS_INVALID_MASK ):
718 return REGOP_LINUX(STATUS_INVALID_MASK);
719 }
720
721 return regops_status;
722}
723
724/*
725 * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
726 * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_*
727 */
728static u32 nvgpu_get_regops_status_values_common(u32 regops_status)
729{
730 switch (regops_status) {
731 case REGOP_LINUX(STATUS_SUCCESS):
732 return REGOP(STATUS_SUCCESS);
733 case REGOP_LINUX(STATUS_INVALID_OP):
734 return REGOP(STATUS_INVALID_OP);
735 case REGOP_LINUX(STATUS_INVALID_TYPE):
736 return REGOP(STATUS_INVALID_TYPE);
737 case REGOP_LINUX(STATUS_INVALID_OFFSET):
738 return REGOP(STATUS_INVALID_OFFSET);
739 case REGOP_LINUX(STATUS_UNSUPPORTED_OP):
740 return REGOP(STATUS_UNSUPPORTED_OP);
741 case REGOP_LINUX(STATUS_INVALID_MASK ):
742 return REGOP(STATUS_INVALID_MASK);
743 }
744
745 return regops_status;
746}
747
748static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in,
749 struct nvgpu_dbg_reg_op *out, u32 num_ops)
750{
751 u32 i;
752
753 if(in == NULL || out == NULL)
754 return -ENOMEM;
755
756 for (i = 0; i < num_ops; i++) {
757 out[i].op = nvgpu_get_regops_op_values_common(in[i].op);
758 out[i].type = nvgpu_get_regops_type_values_common(in[i].type);
759 out[i].status = nvgpu_get_regops_status_values_common(in[i].status);
760 out[i].quad = in[i].quad;
761 out[i].group_mask = in[i].group_mask;
762 out[i].sub_group_mask = in[i].sub_group_mask;
763 out[i].offset = in[i].offset;
764 out[i].value_lo = in[i].value_lo;
765 out[i].value_hi = in[i].value_hi;
766 out[i].and_n_mask_lo = in[i].and_n_mask_lo;
767 out[i].and_n_mask_hi = in[i].and_n_mask_hi;
768 }
769
770 return 0;
771}
772
773static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in,
774 struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops)
775{
776 u32 i;
777
778 if(in == NULL || out == NULL)
779 return -ENOMEM;
780
781 for (i = 0; i < num_ops; i++) {
782 out[i].op = nvgpu_get_regops_op_values_linux(in[i].op);
783 out[i].type = nvgpu_get_regops_type_values_linux(in[i].type);
784 out[i].status = nvgpu_get_regops_status_values_linux(in[i].status);
785 out[i].quad = in[i].quad;
786 out[i].group_mask = in[i].group_mask;
787 out[i].sub_group_mask = in[i].sub_group_mask;
788 out[i].offset = in[i].offset;
789 out[i].value_lo = in[i].value_lo;
790 out[i].value_hi = in[i].value_hi;
791 out[i].and_n_mask_lo = in[i].and_n_mask_lo;
792 out[i].and_n_mask_hi = in[i].and_n_mask_hi;
793 }
794
795 return 0;
796}
797
798static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
799 struct nvgpu_dbg_gpu_exec_reg_ops_args *args)
800{
801 int err = 0, powergate_err = 0;
802 bool is_pg_disabled = false;
803
804 struct gk20a *g = dbg_s->g;
805 struct channel_gk20a *ch;
806
807 bool is_current_ctx;
808
809
810 nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
811
812 if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) {
813 nvgpu_err(g, "regops limit exceeded");
814 return -EINVAL;
815 }
816
817 if (args->num_ops == 0) {
818 /* Nothing to do */
819 return 0;
820 }
821
822 if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
823 nvgpu_err(g, "reg ops work buffer not allocated");
824 return -ENODEV;
825 }
826
827 if (!dbg_s->id) {
828 nvgpu_err(g, "can't call reg_ops on an unbound debugger session");
829 return -EINVAL;
830 }
831
832 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
833 if (!dbg_s->is_profiler && !ch) {
834 nvgpu_err(g, "bind a channel before regops for a debugging session");
835 return -EINVAL;
836 }
837
838 /* since exec_reg_ops sends methods to the ucode, it must take the
839 * global gpu lock to protect against mixing methods from debug sessions
840 * on other channels */
841 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
842
843 if (!dbg_s->is_pg_disabled && !g->is_virtual) {
844 /* In the virtual case, the server will handle
845 * disabling/enabling powergating when processing reg ops
846 */
847 powergate_err = nvgpu_set_powergate_locked(dbg_s, true);
848 if (!powergate_err) {
849 is_pg_disabled = true;
850 }
851 }
852
853 if (!powergate_err) {
854 u64 ops_offset = 0; /* index offset */
855
856 struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL;
857
858 linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops *
859 sizeof(struct nvgpu_dbg_gpu_reg_op));
860
861 if (!linux_fragment)
862 return -ENOMEM;
863
864 while (ops_offset < args->num_ops && !err) {
865 const u64 num_ops =
866 min(args->num_ops - ops_offset,
867 (u64)(g->dbg_regops_tmp_buf_ops));
868 const u64 fragment_size =
869 num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op);
870
871 void __user *const fragment =
872 (void __user *)(uintptr_t)
873 (args->ops +
874 ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op));
875
876 nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu",
877 ops_offset, num_ops);
878
879 nvgpu_log_fn(g, "Copying regops from userspace");
880
881 if (copy_from_user(linux_fragment,
882 fragment, fragment_size)) {
883 nvgpu_err(g, "copy_from_user failed!");
884 err = -EFAULT;
885 break;
886 }
887
888 err = nvgpu_get_regops_data_common(linux_fragment,
889 g->dbg_regops_tmp_buf, num_ops);
890
891 if (err)
892 break;
893
894 err = g->ops.regops.exec_regops(
895 dbg_s, g->dbg_regops_tmp_buf, num_ops, &is_current_ctx);
896
897 if (err) {
898 break;
899 }
900
901 if (ops_offset == 0) {
902 args->gr_ctx_resident = is_current_ctx;
903 }
904
905 err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf,
906 linux_fragment, num_ops);
907
908 if (err)
909 break;
910
911 nvgpu_log_fn(g, "Copying result to userspace");
912
913 if (copy_to_user(fragment, linux_fragment,
914 fragment_size)) {
915 nvgpu_err(g, "copy_to_user failed!");
916 err = -EFAULT;
917 break;
918 }
919
920 ops_offset += num_ops;
921 }
922
923 nvgpu_speculation_barrier();
924 nvgpu_kfree(g, linux_fragment);
925
926 /* enable powergate, if previously disabled */
927 if (is_pg_disabled) {
928 powergate_err = nvgpu_set_powergate_locked(dbg_s,
929 false);
930 }
931 }
932
933 nvgpu_mutex_release(&g->dbg_sessions_lock);
934
935 if (!err && powergate_err)
936 err = powergate_err;
937
938 if (err)
939 nvgpu_err(g, "dbg regops failed");
940
941 return err;
942}
943
944static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
945 struct nvgpu_dbg_gpu_powergate_args *args)
946{
947 int err;
948 struct gk20a *g = dbg_s->g;
949 nvgpu_log_fn(g, "%s powergate mode = %d",
950 g->name, args->mode);
951
952 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
953 if ((args->mode != NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) &&
954 (args->mode != NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE)) {
955 nvgpu_err(g, "invalid powergate mode");
956 err = -EINVAL;
957 goto pg_err_end;
958 }
959
960 err = nvgpu_set_powergate_locked(dbg_s,
961 args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE);
962pg_err_end:
963 nvgpu_mutex_release(&g->dbg_sessions_lock);
964 return err;
965}
966
967static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
968 struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args)
969{
970 int err;
971 struct gk20a *g = dbg_s->g;
972 struct channel_gk20a *ch_gk20a;
973
974 nvgpu_log_fn(g, "%s smpc ctxsw mode = %d",
975 g->name, args->mode);
976
977 err = gk20a_busy(g);
978 if (err) {
979 nvgpu_err(g, "failed to poweron");
980 return err;
981 }
982
983 /* Take the global lock, since we'll be doing global regops */
984 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
985
986 ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
987 if (!ch_gk20a) {
988 nvgpu_err(g,
989 "no bound channel for smpc ctxsw mode update");
990 err = -EINVAL;
991 goto clean_up;
992 }
993
994 err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a,
995 args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
996 if (err) {
997 nvgpu_err(g,
998 "error (%d) during smpc ctxsw mode update", err);
999 }
1000
1001 clean_up:
1002 nvgpu_mutex_release(&g->dbg_sessions_lock);
1003 gk20a_idle(g);
1004 return err;
1005}
1006
1007/*
1008 * Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_*
1009 * into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_*
1010 */
1011
1012static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
1013{
1014 nvgpu_speculation_barrier();
1015 switch (mode){
1016 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
1017 return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
1018 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW:
1019 return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW;
1020 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1021 return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW;
1022 }
1023
1024 return mode;
1025}
1026
1027
1028static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
1029 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
1030{
1031 int err;
1032 struct gk20a *g = dbg_s->g;
1033 struct channel_gk20a *ch_gk20a;
1034 u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode);
1035
1036 nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
1037
1038 /* Must have a valid reservation to enable/disable hwpm cxtsw.
1039 * Just print an error message for now, but eventually this should
1040 * return an error, at the point where all client sw has been
1041 * cleaned up.
1042 */
1043 if (!dbg_s->has_profiler_reservation) {
1044 nvgpu_err(g,
1045 "session doesn't have a valid reservation");
1046 }
1047
1048 err = gk20a_busy(g);
1049 if (err) {
1050 nvgpu_err(g, "failed to poweron");
1051 return err;
1052 }
1053
1054 /* Take the global lock, since we'll be doing global regops */
1055 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1056
1057 ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1058 if (!ch_gk20a) {
1059 nvgpu_err(g,
1060 "no bound channel for pm ctxsw mode update");
1061 err = -EINVAL;
1062 goto clean_up;
1063 }
1064 if (g->dbg_powergating_disabled_refcount == 0) {
1065 nvgpu_err(g, "powergate is not disabled");
1066 err = -ENOSYS;
1067 goto clean_up;
1068 }
1069 err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
1070 mode);
1071
1072 if (err)
1073 nvgpu_err(g,
1074 "error (%d) during pm ctxsw mode update", err);
1075 /* gk20a would require a WAR to set the core PM_ENABLE bit, not
1076 * added here with gk20a being deprecated
1077 */
1078 clean_up:
1079 nvgpu_mutex_release(&g->dbg_sessions_lock);
1080 gk20a_idle(g);
1081 return err;
1082}
1083
1084static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
1085 struct dbg_session_gk20a *dbg_s,
1086 struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args)
1087{
1088 int err;
1089 struct gk20a *g = dbg_s->g;
1090 struct channel_gk20a *ch;
1091 bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED);
1092
1093 nvgpu_log_fn(g, "mode=%u", args->mode);
1094
1095 if (args->reserved != 0U) {
1096 return -EINVAL;
1097 }
1098
1099 if ((g->ops.fb.set_mmu_debug_mode == NULL) &&
1100 (g->ops.gr.set_mmu_debug_mode == NULL)) {
1101 return -ENOSYS;
1102 }
1103
1104 err = gk20a_busy(g);
1105 if (err) {
1106 nvgpu_err(g, "failed to poweron");
1107 return err;
1108 }
1109
1110 /* Take the global lock, since we'll be doing global regops */
1111 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1112
1113 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1114 if (!ch) {
1115 nvgpu_err(g, "no bound channel for mmu debug mode");
1116 err = -EINVAL;
1117 goto clean_up;
1118 }
1119
1120 err = nvgpu_tsg_set_mmu_debug_mode(ch, enable);
1121 if (err) {
1122 nvgpu_err(g, "set mmu debug mode failed, err=%d", err);
1123 }
1124
1125clean_up:
1126 nvgpu_mutex_release(&g->dbg_sessions_lock);
1127 gk20a_idle(g);
1128 return err;
1129}
1130
1131static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
1132 struct dbg_session_gk20a *dbg_s,
1133 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
1134{
1135 struct gk20a *g = dbg_s->g;
1136 struct channel_gk20a *ch;
1137 int err = 0, action = args->mode;
1138
1139 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
1140
1141 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1142 if (!ch)
1143 return -EINVAL;
1144
1145 err = gk20a_busy(g);
1146 if (err) {
1147 nvgpu_err(g, "failed to poweron");
1148 return err;
1149 }
1150
1151 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1152
1153 /* Suspend GPU context switching */
1154 err = gr_gk20a_disable_ctxsw(g);
1155 if (err) {
1156 nvgpu_err(g, "unable to stop gr ctxsw");
1157 /* this should probably be ctx-fatal... */
1158 goto clean_up;
1159 }
1160
1161 nvgpu_speculation_barrier();
1162 switch (action) {
1163 case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
1164 gr_gk20a_suspend_context(ch);
1165 break;
1166
1167 case NVGPU_DBG_GPU_RESUME_ALL_SMS:
1168 gr_gk20a_resume_context(ch);
1169 break;
1170 }
1171
1172 err = gr_gk20a_enable_ctxsw(g);
1173 if (err)
1174 nvgpu_err(g, "unable to restart ctxsw!");
1175
1176clean_up:
1177 nvgpu_mutex_release(&g->dbg_sessions_lock);
1178 gk20a_idle(g);
1179
1180 return err;
1181}
1182
1183static int nvgpu_ioctl_allocate_profiler_object(
1184 struct dbg_session_gk20a_linux *dbg_session_linux,
1185 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
1186{
1187 int err = 0;
1188 struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
1189 struct gk20a *g = get_gk20a(dbg_session_linux->dev);
1190 struct dbg_profiler_object_data *prof_obj;
1191
1192 nvgpu_log_fn(g, "%s", g->name);
1193
1194 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1195
1196 err = alloc_profiler(g, &prof_obj);
1197 if (err)
1198 goto clean_up;
1199
1200 prof_obj->session_id = dbg_s->id;
1201
1202 if (dbg_s->is_profiler)
1203 prof_obj->ch = NULL;
1204 else {
1205 prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1206 if (prof_obj->ch == NULL) {
1207 nvgpu_err(g,
1208 "bind a channel for dbg session");
1209 nvgpu_kfree(g, prof_obj);
1210 err = -EINVAL;
1211 goto clean_up;
1212 }
1213 }
1214
1215 /* Return handle to client */
1216 args->profiler_handle = prof_obj->prof_handle;
1217
1218 nvgpu_init_list_node(&prof_obj->prof_obj_entry);
1219
1220 nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
1221clean_up:
1222 nvgpu_mutex_release(&g->dbg_sessions_lock);
1223 return err;
1224}
1225
1226static int nvgpu_ioctl_free_profiler_object(
1227 struct dbg_session_gk20a_linux *dbg_s_linux,
1228 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
1229{
1230 int err = 0;
1231 struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
1232 struct gk20a *g = get_gk20a(dbg_s_linux->dev);
1233 struct dbg_profiler_object_data *prof_obj, *tmp_obj;
1234 bool obj_found = false;
1235
1236 nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x",
1237 g->name, dbg_s->id, args->profiler_handle);
1238
1239 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1240
1241 /* Remove profiler object from the list, if a match is found */
1242 nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
1243 dbg_profiler_object_data, prof_obj_entry) {
1244 if (prof_obj->prof_handle == args->profiler_handle) {
1245 if (prof_obj->session_id != dbg_s->id) {
1246 nvgpu_err(g,
1247 "invalid handle %x",
1248 args->profiler_handle);
1249 err = -EINVAL;
1250 break;
1251 }
1252 if (prof_obj->has_reservation)
1253 g->ops.dbg_session_ops.
1254 release_profiler_reservation(dbg_s, prof_obj);
1255 nvgpu_list_del(&prof_obj->prof_obj_entry);
1256 nvgpu_kfree(g, prof_obj);
1257 obj_found = true;
1258 break;
1259 }
1260 }
1261 if (!obj_found) {
1262 nvgpu_err(g, "profiler %x not found",
1263 args->profiler_handle);
1264 err = -EINVAL;
1265 }
1266
1267 nvgpu_mutex_release(&g->dbg_sessions_lock);
1268 return err;
1269}
1270
1271static struct dbg_profiler_object_data *find_matching_prof_obj(
1272 struct dbg_session_gk20a *dbg_s,
1273 u32 profiler_handle)
1274{
1275 struct gk20a *g = dbg_s->g;
1276 struct dbg_profiler_object_data *prof_obj;
1277
1278 nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
1279 dbg_profiler_object_data, prof_obj_entry) {
1280 if (prof_obj->prof_handle == profiler_handle) {
1281 if (prof_obj->session_id != dbg_s->id) {
1282 nvgpu_err(g,
1283 "invalid handle %x",
1284 profiler_handle);
1285 return NULL;
1286 }
1287 return prof_obj;
1288 }
1289 }
1290 return NULL;
1291}
1292
1293/* used in scenarios where the debugger session can take just the inter-session
1294 * lock for performance, but the profiler session must take the per-gpu lock
1295 * since it might not have an associated channel. */
1296static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s)
1297{
1298 struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1299
1300 if (dbg_s->is_profiler || !ch)
1301 nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock);
1302 else
1303 nvgpu_mutex_acquire(&ch->dbg_s_lock);
1304}
1305
1306static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s)
1307{
1308 struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1309
1310 if (dbg_s->is_profiler || !ch)
1311 nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock);
1312 else
1313 nvgpu_mutex_release(&ch->dbg_s_lock);
1314}
1315
1316static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
1317{
1318 struct gk20a *g = dbg_s->g;
1319
1320 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1321
1322 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
1323
1324 dbg_s->dbg_events.events_enabled = true;
1325 dbg_s->dbg_events.num_pending_events = 0;
1326
1327 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
1328}
1329
1330static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
1331{
1332 struct gk20a *g = dbg_s->g;
1333
1334 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1335
1336 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
1337
1338 dbg_s->dbg_events.events_enabled = false;
1339 dbg_s->dbg_events.num_pending_events = 0;
1340
1341 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
1342}
1343
1344static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
1345{
1346 struct gk20a *g = dbg_s->g;
1347
1348 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1349
1350 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
1351
1352 if (dbg_s->dbg_events.events_enabled &&
1353 dbg_s->dbg_events.num_pending_events > 0)
1354 dbg_s->dbg_events.num_pending_events--;
1355
1356 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
1357}
1358
1359
1360static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
1361 struct nvgpu_dbg_gpu_events_ctrl_args *args)
1362{
1363 int ret = 0;
1364 struct channel_gk20a *ch;
1365 struct gk20a *g = dbg_s->g;
1366
1367 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
1368
1369 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1370 if (!ch) {
1371 nvgpu_err(g, "no channel bound to dbg session");
1372 return -EINVAL;
1373 }
1374
1375 nvgpu_speculation_barrier();
1376 switch (args->cmd) {
1377 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
1378 gk20a_dbg_gpu_events_enable(dbg_s);
1379 break;
1380
1381 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
1382 gk20a_dbg_gpu_events_disable(dbg_s);
1383 break;
1384
1385 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
1386 gk20a_dbg_gpu_events_clear(dbg_s);
1387 break;
1388
1389 default:
1390 nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x",
1391 args->cmd);
1392 ret = -EINVAL;
1393 break;
1394 }
1395
1396 return ret;
1397}
1398
1399static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1400 struct nvgpu_dbg_gpu_perfbuf_map_args *args)
1401{
1402 struct gk20a *g = dbg_s->g;
1403 struct mm_gk20a *mm = &g->mm;
1404 int err;
1405 u32 virt_size;
1406 u32 big_page_size = g->ops.mm.get_default_big_page_size();
1407
1408 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1409
1410 if (g->perfbuf.owner) {
1411 nvgpu_mutex_release(&g->dbg_sessions_lock);
1412 return -EBUSY;
1413 }
1414
1415 mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
1416 big_page_size << 10,
1417 NV_MM_DEFAULT_KERNEL_SIZE,
1418 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
1419 false, false, "perfbuf");
1420 if (!mm->perfbuf.vm) {
1421 nvgpu_mutex_release(&g->dbg_sessions_lock);
1422 return -ENOMEM;
1423 }
1424
1425 err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
1426 args->dmabuf_fd,
1427 &args->offset,
1428 0,
1429 SZ_4K,
1430 0,
1431 0,
1432 0,
1433 0,
1434 NULL);
1435 if (err)
1436 goto err_remove_vm;
1437
1438 /* perf output buffer may not cross a 4GB boundary */
1439 virt_size = u64_lo32(args->mapping_size);
1440 if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size - 1)) {
1441 err = -EINVAL;
1442 goto err_unmap;
1443 }
1444
1445 err = g->ops.dbg_session_ops.perfbuffer_enable(g,
1446 args->offset, virt_size);
1447 if (err)
1448 goto err_unmap;
1449
1450 g->perfbuf.owner = dbg_s;
1451 g->perfbuf.offset = args->offset;
1452 nvgpu_mutex_release(&g->dbg_sessions_lock);
1453
1454 return 0;
1455
1456err_unmap:
1457 nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
1458err_remove_vm:
1459 nvgpu_vm_put(mm->perfbuf.vm);
1460 nvgpu_mutex_release(&g->dbg_sessions_lock);
1461 return err;
1462}
1463
1464static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
1465 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
1466{
1467 struct gk20a *g = dbg_s->g;
1468 int err;
1469
1470 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1471 if ((g->perfbuf.owner != dbg_s) ||
1472 (g->perfbuf.offset != args->offset)) {
1473 nvgpu_mutex_release(&g->dbg_sessions_lock);
1474 return -EINVAL;
1475 }
1476
1477 err = gk20a_perfbuf_release_locked(g, args->offset);
1478
1479 nvgpu_mutex_release(&g->dbg_sessions_lock);
1480
1481 return err;
1482}
1483
1484static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
1485 struct nvgpu_dbg_gpu_pc_sampling_args *args)
1486{
1487 struct channel_gk20a *ch;
1488 struct gk20a *g = dbg_s->g;
1489
1490 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1491 if (!ch)
1492 return -EINVAL;
1493
1494 nvgpu_log_fn(g, " ");
1495
1496 return g->ops.gr.update_pc_sampling ?
1497 g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
1498}
1499
1500static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
1501 struct dbg_session_gk20a *dbg_s,
1502 struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args)
1503{
1504 struct gk20a *g = dbg_s->g;
1505 struct gr_gk20a *gr = &g->gr;
1506 u32 sm_id;
1507 struct channel_gk20a *ch;
1508 int err = 0;
1509
1510 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1511 if (ch == NULL) {
1512 return -EINVAL;
1513 }
1514
1515 sm_id = args->sm_id;
1516 if (sm_id >= gr->no_of_sm)
1517 return -EINVAL;
1518
1519 nvgpu_speculation_barrier();
1520
1521 err = gk20a_busy(g);
1522 if (err != 0) {
1523 return err;
1524 }
1525
1526 err = gr_gk20a_elpg_protected_call(g,
1527 g->ops.gr.clear_sm_error_state(g, ch, sm_id));
1528
1529 gk20a_idle(g);
1530
1531 return err;
1532}
1533
1534static int
1535nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
1536 struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args)
1537{
1538 struct gk20a *g = dbg_s->g;
1539 int err = 0;
1540 int ctx_resident_ch_fd = -1;
1541
1542 err = gk20a_busy(g);
1543 if (err)
1544 return err;
1545
1546 nvgpu_speculation_barrier();
1547 switch (args->action) {
1548 case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
1549 err = g->ops.gr.suspend_contexts(g, dbg_s,
1550 &ctx_resident_ch_fd);
1551 break;
1552
1553 case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS:
1554 err = g->ops.gr.resume_contexts(g, dbg_s,
1555 &ctx_resident_ch_fd);
1556 break;
1557 }
1558
1559 if (ctx_resident_ch_fd < 0) {
1560 args->is_resident_context = 0;
1561 } else {
1562 args->is_resident_context = 1;
1563 args->resident_context_fd = ctx_resident_ch_fd;
1564 }
1565
1566 gk20a_idle(g);
1567
1568 return err;
1569}
1570
1571static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
1572 struct nvgpu_dbg_gpu_access_fb_memory_args *args)
1573{
1574 struct gk20a *g = dbg_s->g;
1575 struct dma_buf *dmabuf;
1576 void __user *user_buffer = (void __user *)(uintptr_t)args->buffer;
1577 void *buffer;
1578 u64 size, access_size, offset;
1579 u64 access_limit_size = SZ_4K;
1580 int err = 0;
1581
1582 if ((args->offset & 3) || (!args->size) || (args->size & 3))
1583 return -EINVAL;
1584
1585 dmabuf = dma_buf_get(args->dmabuf_fd);
1586 if (IS_ERR(dmabuf))
1587 return -EINVAL;
1588
1589 if ((args->offset > dmabuf->size) ||
1590 (args->size > dmabuf->size) ||
1591 (args->offset + args->size > dmabuf->size)) {
1592 err = -EINVAL;
1593 goto fail_dmabuf_put;
1594 }
1595
1596 buffer = nvgpu_big_zalloc(g, access_limit_size);
1597 if (!buffer) {
1598 err = -ENOMEM;
1599 goto fail_dmabuf_put;
1600 }
1601
1602 size = args->size;
1603 offset = 0;
1604
1605 err = gk20a_busy(g);
1606 if (err)
1607 goto fail_free_buffer;
1608
1609 while (size) {
1610 /* Max access size of access_limit_size in one loop */
1611 access_size = min(access_limit_size, size);
1612
1613 if (args->cmd ==
1614 NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) {
1615 err = copy_from_user(buffer, user_buffer + offset,
1616 access_size);
1617 if (err)
1618 goto fail_idle;
1619 }
1620
1621 err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer,
1622 args->offset + offset, access_size,
1623 args->cmd);
1624 if (err)
1625 goto fail_idle;
1626
1627 if (args->cmd ==
1628 NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) {
1629 err = copy_to_user(user_buffer + offset,
1630 buffer, access_size);
1631 if (err)
1632 goto fail_idle;
1633 }
1634
1635 size -= access_size;
1636 offset += access_size;
1637 }
1638 nvgpu_speculation_barrier();
1639
1640fail_idle:
1641 gk20a_idle(g);
1642fail_free_buffer:
1643 nvgpu_big_free(g, buffer);
1644fail_dmabuf_put:
1645 dma_buf_put(dmabuf);
1646
1647 return err;
1648}
1649
1650static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
1651 struct nvgpu_dbg_gpu_profiler_reserve_args *args)
1652{
1653 if (args->acquire)
1654 return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
1655
1656 return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
1657}
1658
1659static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
1660 struct nvgpu_dbg_gpu_timeout_args *args)
1661{
1662 bool status;
1663 struct gk20a *g = dbg_s->g;
1664
1665 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1666 status = nvgpu_is_timeouts_enabled(g);
1667 nvgpu_mutex_release(&g->dbg_sessions_lock);
1668
1669 if (status)
1670 args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE;
1671 else
1672 args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE;
1673}
1674
1675static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
1676{
1677 struct mm_gk20a *mm = &g->mm;
1678 struct vm_gk20a *vm = mm->perfbuf.vm;
1679 int err;
1680
1681 err = g->ops.dbg_session_ops.perfbuffer_disable(g);
1682
1683 nvgpu_vm_unmap(vm, offset, NULL);
1684 nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
1685 nvgpu_vm_put(vm);
1686
1687 g->perfbuf.owner = NULL;
1688 g->perfbuf.offset = 0;
1689 return err;
1690}
1691
1692static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
1693 u32 profiler_handle)
1694{
1695 struct gk20a *g = dbg_s->g;
1696 struct dbg_profiler_object_data *prof_obj;
1697 int err = 0;
1698
1699 nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
1700
1701 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1702
1703 /* Find matching object. */
1704 prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
1705
1706 if (!prof_obj) {
1707 nvgpu_err(g, "object not found");
1708 err = -EINVAL;
1709 goto exit;
1710 }
1711
1712 if (prof_obj->has_reservation)
1713 g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj);
1714 else {
1715 nvgpu_err(g, "No reservation found");
1716 err = -EINVAL;
1717 goto exit;
1718 }
1719exit:
1720 nvgpu_mutex_release(&g->dbg_sessions_lock);
1721 return err;
1722}
1723
1724static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
1725 u32 profiler_handle)
1726{
1727 struct gk20a *g = dbg_s->g;
1728 struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
1729 int err = 0;
1730 struct tsg_gk20a *tsg;
1731
1732 nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
1733
1734 if (g->profiler_reservation_count < 0) {
1735 nvgpu_err(g, "Negative reservation count!");
1736 return -EINVAL;
1737 }
1738
1739 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1740
1741 /* Find matching object. */
1742 my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
1743
1744 if (!my_prof_obj) {
1745 nvgpu_err(g, "object not found");
1746 err = -EINVAL;
1747 goto exit;
1748 }
1749
1750 /* If we already have the reservation, we're done */
1751 if (my_prof_obj->has_reservation) {
1752 err = 0;
1753 goto exit;
1754 }
1755
1756 if (my_prof_obj->ch == NULL) {
1757 /* Global reservations are only allowed if there are no other
1758 * global or per-context reservations currently held
1759 */
1760 if (!g->ops.dbg_session_ops.check_and_set_global_reservation(
1761 dbg_s, my_prof_obj)) {
1762 nvgpu_err(g,
1763 "global reserve: have existing reservation");
1764 err = -EBUSY;
1765 }
1766 } else if (g->global_profiler_reservation_held) {
1767 /* If there's a global reservation,
1768 * we can't take a per-context one.
1769 */
1770 nvgpu_err(g,
1771 "per-ctxt reserve: global reservation in effect");
1772 err = -EBUSY;
1773 } else if ((tsg = tsg_gk20a_from_ch(my_prof_obj->ch)) != NULL) {
1774 /* TSG: check that another channel in the TSG
1775 * doesn't already have the reservation
1776 */
1777 u32 my_tsgid = tsg->tsgid;
1778
1779 nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
1780 dbg_profiler_object_data, prof_obj_entry) {
1781 if (prof_obj->has_reservation &&
1782 (prof_obj->ch->tsgid == my_tsgid)) {
1783 nvgpu_err(g,
1784 "per-ctxt reserve (tsg): already reserved");
1785 err = -EBUSY;
1786 goto exit;
1787 }
1788 }
1789
1790 if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
1791 dbg_s, my_prof_obj)) {
1792 /* Another guest OS has the global reservation */
1793 nvgpu_err(g,
1794 "per-ctxt reserve: global reservation in effect");
1795 err = -EBUSY;
1796 }
1797 } else {
1798 /* channel: check that some other profiler object doesn't
1799 * already have the reservation.
1800 */
1801 struct channel_gk20a *my_ch = my_prof_obj->ch;
1802
1803 nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
1804 dbg_profiler_object_data, prof_obj_entry) {
1805 if (prof_obj->has_reservation &&
1806 (prof_obj->ch == my_ch)) {
1807 nvgpu_err(g,
1808 "per-ctxt reserve (ch): already reserved");
1809 err = -EBUSY;
1810 goto exit;
1811 }
1812 }
1813
1814 if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
1815 dbg_s, my_prof_obj)) {
1816 /* Another guest OS has the global reservation */
1817 nvgpu_err(g,
1818 "per-ctxt reserve: global reservation in effect");
1819 err = -EBUSY;
1820 }
1821 }
1822exit:
1823 nvgpu_mutex_release(&g->dbg_sessions_lock);
1824 return err;
1825}
1826
1827static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
1828 struct nvgpu_dbg_gpu_unbind_channel_args *args)
1829{
1830 struct dbg_session_channel_data *ch_data;
1831 struct gk20a *g = dbg_s->g;
1832 bool channel_found = false;
1833 struct channel_gk20a *ch;
1834 int err;
1835
1836 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
1837 g->name, args->channel_fd);
1838
1839 ch = gk20a_get_channel_from_file(args->channel_fd);
1840 if (!ch) {
1841 nvgpu_log_fn(g, "no channel found for fd");
1842 return -EINVAL;
1843 }
1844
1845 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
1846 nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
1847 dbg_session_channel_data, ch_entry) {
1848 if (ch->chid == ch_data->chid) {
1849 channel_found = true;
1850 break;
1851 }
1852 }
1853 nvgpu_mutex_release(&dbg_s->ch_list_lock);
1854
1855 if (!channel_found) {
1856 nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd);
1857 err = -EINVAL;
1858 goto out;
1859 }
1860
1861 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1862 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
1863 err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
1864 nvgpu_mutex_release(&dbg_s->ch_list_lock);
1865 nvgpu_mutex_release(&g->dbg_sessions_lock);
1866
1867out:
1868 gk20a_channel_put(ch);
1869 return err;
1870}
1871
1872static int nvgpu_set_sm_exception_type_mask_locked(
1873 struct dbg_session_gk20a *dbg_s,
1874 u32 exception_mask)
1875{
1876 struct gk20a *g = dbg_s->g;
1877 int err = 0;
1878 struct channel_gk20a *ch = NULL;
1879
1880 /*
1881 * Obtain the fisrt channel from the channel list in
1882 * dbg_session, find the context associated with channel
1883 * and set the sm_mask_type to that context
1884 */
1885 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1886 if (ch != NULL) {
1887 struct tsg_gk20a *tsg;
1888
1889 tsg = tsg_gk20a_from_ch(ch);
1890 if (tsg != NULL) {
1891 tsg->sm_exception_mask_type = exception_mask;
1892 goto type_mask_end;
1893 }
1894 }
1895
1896 nvgpu_log_fn(g, "unable to find the TSG\n");
1897 err = -EINVAL;
1898
1899type_mask_end:
1900 return err;
1901}
1902
1903static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
1904 struct dbg_session_gk20a *dbg_s,
1905 struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *args)
1906{
1907 int err = 0;
1908 struct gk20a *g = dbg_s->g;
1909 u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
1910
1911 nvgpu_speculation_barrier();
1912 switch (args->exception_type_mask) {
1913 case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL:
1914 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL;
1915 break;
1916 case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE:
1917 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
1918 break;
1919 default:
1920 nvgpu_err(g,
1921 "unrecognized dbg sm exception type mask: 0x%x",
1922 args->exception_type_mask);
1923 err = -EINVAL;
1924 break;
1925 }
1926
1927 if (err != 0) {
1928 return err;
1929 }
1930
1931 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1932 err = nvgpu_set_sm_exception_type_mask_locked(dbg_s,
1933 sm_exception_mask_type);
1934 nvgpu_mutex_release(&g->dbg_sessions_lock);
1935
1936 return err;
1937}
1938
1939#if defined(CONFIG_GK20A_CYCLE_STATS)
1940static int nvgpu_dbg_gpu_cycle_stats(struct dbg_session_gk20a *dbg_s,
1941 struct nvgpu_dbg_gpu_cycle_stats_args *args)
1942{
1943 struct channel_gk20a *ch = NULL;
1944 int err;
1945
1946 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1947 if (ch == NULL) {
1948 return -EINVAL;
1949 }
1950
1951 err = gk20a_busy(ch->g);
1952 if (err != 0) {
1953 return err;
1954 }
1955
1956 err = gk20a_channel_cycle_stats(ch, args->dmabuf_fd);
1957
1958 gk20a_idle(ch->g);
1959 return err;
1960}
1961
1962static int nvgpu_dbg_gpu_cycle_stats_snapshot(struct dbg_session_gk20a *dbg_s,
1963 struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *args)
1964{
1965 struct channel_gk20a *ch = NULL;
1966 int err;
1967
1968 if (!args->dmabuf_fd) {
1969 return -EINVAL;
1970 }
1971
1972 nvgpu_speculation_barrier();
1973
1974 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1975 if (ch == NULL) {
1976 return -EINVAL;
1977 }
1978
1979 /* is it allowed to handle calls for current GPU? */
1980 if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) {
1981 return -ENOSYS;
1982 }
1983
1984 err = gk20a_busy(ch->g);
1985 if (err != 0) {
1986 return err;
1987 }
1988
1989 /* handle the command (most frequent cases first) */
1990 switch (args->cmd) {
1991 case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
1992 err = gk20a_flush_cycle_stats_snapshot(ch);
1993 args->extra = 0;
1994 break;
1995
1996 case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
1997 err = gk20a_attach_cycle_stats_snapshot(ch,
1998 args->dmabuf_fd,
1999 args->extra,
2000 &args->extra);
2001 break;
2002
2003 case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
2004 err = gk20a_channel_free_cycle_stats_snapshot(ch);
2005 args->extra = 0;
2006 break;
2007
2008 default:
2009 pr_err("cyclestats: unknown command %u\n", args->cmd);
2010 err = -EINVAL;
2011 break;
2012 }
2013
2014 gk20a_idle(ch->g);
2015 return err;
2016}
2017
2018#endif
2019
2020int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
2021{
2022 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
2023 struct nvgpu_os_linux, dbg.cdev);
2024 struct gk20a *g = &l->g;
2025
2026 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
2027 return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
2028}
2029
2030long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
2031 unsigned long arg)
2032{
2033 struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data;
2034 struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
2035 struct gk20a *g = dbg_s->g;
2036 u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE];
2037 int err = 0;
2038
2039 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
2040
2041 if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) ||
2042 (_IOC_NR(cmd) == 0) ||
2043 (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) ||
2044 (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE))
2045 return -EINVAL;
2046
2047 memset(buf, 0, sizeof(buf));
2048 if (_IOC_DIR(cmd) & _IOC_WRITE) {
2049 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2050 return -EFAULT;
2051 }
2052
2053 if (!g->sw_ready) {
2054 err = gk20a_busy(g);
2055 if (err)
2056 return err;
2057
2058 gk20a_idle(g);
2059 }
2060
2061 /* protect from threaded user space calls */
2062 nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
2063
2064 nvgpu_speculation_barrier();
2065 switch (cmd) {
2066 case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
2067 err = dbg_bind_channel_gk20a(dbg_s,
2068 (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
2069 break;
2070
2071 case NVGPU_DBG_GPU_IOCTL_REG_OPS:
2072 err = nvgpu_ioctl_channel_reg_ops(dbg_s,
2073 (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
2074 break;
2075
2076 case NVGPU_DBG_GPU_IOCTL_POWERGATE:
2077 err = nvgpu_ioctl_powergate_gk20a(dbg_s,
2078 (struct nvgpu_dbg_gpu_powergate_args *)buf);
2079 break;
2080
2081 case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
2082 err = gk20a_dbg_gpu_events_ctrl(dbg_s,
2083 (struct nvgpu_dbg_gpu_events_ctrl_args *)buf);
2084 break;
2085
2086 case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
2087 err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
2088 (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
2089 break;
2090
2091 case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE:
2092 err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s,
2093 (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf);
2094 break;
2095
2096 case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
2097 err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
2098 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
2099 break;
2100
2101 case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
2102 err = gk20a_perfbuf_map(dbg_s,
2103 (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
2104 break;
2105
2106 case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
2107 err = gk20a_perfbuf_unmap(dbg_s,
2108 (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
2109 break;
2110
2111 case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
2112 err = gk20a_dbg_pc_sampling(dbg_s,
2113 (struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
2114 break;
2115
2116 case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE:
2117 err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s,
2118 (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf);
2119 break;
2120
2121 case NVGPU_DBG_GPU_IOCTL_TIMEOUT:
2122 err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s,
2123 (struct nvgpu_dbg_gpu_timeout_args *)buf);
2124 break;
2125
2126 case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT:
2127 nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s,
2128 (struct nvgpu_dbg_gpu_timeout_args *)buf);
2129 break;
2130
2131 case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
2132 err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
2133 (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
2134 break;
2135
2136 case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE:
2137 err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s,
2138 (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf);
2139 break;
2140
2141 case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL:
2142 err = dbg_unbind_channel_gk20a(dbg_s,
2143 (struct nvgpu_dbg_gpu_unbind_channel_args *)buf);
2144 break;
2145
2146 case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS:
2147 err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s,
2148 (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf);
2149 break;
2150
2151 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY:
2152 err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s,
2153 (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
2154 break;
2155
2156 case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
2157 err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux,
2158 (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
2159 break;
2160
2161 case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
2162 err = nvgpu_ioctl_free_profiler_object(dbg_s_linux,
2163 (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
2164 break;
2165
2166 case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
2167 err = nvgpu_ioctl_profiler_reserve(dbg_s,
2168 (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
2169 break;
2170
2171 case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK:
2172 err = nvgpu_dbg_gpu_set_sm_exception_type_mask(dbg_s,
2173 (struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *)buf);
2174 break;
2175
2176 case NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE:
2177 err = nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(dbg_s,
2178 (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf);
2179 break;
2180
2181#ifdef CONFIG_GK20A_CYCLE_STATS
2182 case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS:
2183 err = nvgpu_dbg_gpu_cycle_stats(dbg_s,
2184 (struct nvgpu_dbg_gpu_cycle_stats_args *)buf);
2185 break;
2186
2187 case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT:
2188 err = nvgpu_dbg_gpu_cycle_stats_snapshot(dbg_s,
2189 (struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *)buf);
2190 break;
2191#endif
2192
2193 default:
2194 nvgpu_err(g,
2195 "unrecognized dbg gpu ioctl cmd: 0x%x",
2196 cmd);
2197 err = -ENOTTY;
2198 break;
2199 }
2200
2201 nvgpu_mutex_release(&dbg_s->ioctl_lock);
2202
2203 nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
2204
2205 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2206 err = copy_to_user((void __user *)arg,
2207 buf, _IOC_SIZE(cmd));
2208
2209 return err;
2210}
diff --git a/include/os/linux/ioctl_dbg.h b/include/os/linux/ioctl_dbg.h
deleted file mode 100644
index 2e188cc..0000000
--- a/include/os/linux/ioctl_dbg.h
+++ /dev/null
@@ -1,38 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger Driver
3 *
4 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#ifndef DBG_GPU_IOCTL_GK20A_H
19#define DBG_GPU_IOCTL_GK20A_H
20
21struct inode;
22struct file;
23typedef struct poll_table_struct poll_table;
24
25/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
26 * of regops */
27#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
28
29/* module debug driver interface */
30int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
31int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
32long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
33unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
34
35/* used by profiler driver interface */
36int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
37
38#endif
diff --git a/include/os/linux/ioctl_tsg.c b/include/os/linux/ioctl_tsg.c
deleted file mode 100644
index 296b02b..0000000
--- a/include/os/linux/ioctl_tsg.c
+++ /dev/null
@@ -1,750 +0,0 @@
1/*
2 * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/cdev.h>
20#include <linux/uaccess.h>
21#include <linux/poll.h>
22#include <uapi/linux/nvgpu.h>
23#include <linux/anon_inodes.h>
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/log.h>
27#include <nvgpu/os_sched.h>
28#include <nvgpu/gk20a.h>
29#include <nvgpu/channel.h>
30#include <nvgpu/tsg.h>
31
32#include "gv11b/fifo_gv11b.h"
33#include "platform_gk20a.h"
34#include "ioctl_tsg.h"
35#include "ioctl_channel.h"
36#include "os_linux.h"
37
38struct tsg_private {
39 struct gk20a *g;
40 struct tsg_gk20a *tsg;
41};
42
43static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
44{
45 struct channel_gk20a *ch;
46 int err;
47
48 ch = gk20a_get_channel_from_file(ch_fd);
49 if (!ch)
50 return -EINVAL;
51
52 err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
53
54 gk20a_channel_put(ch);
55 return err;
56}
57
58static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
59 struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg)
60{
61 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
62 struct channel_gk20a *ch;
63 struct gr_gk20a *gr = &g->gr;
64 int err = 0;
65
66 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
67
68 nvgpu_mutex_acquire(&sched->control_lock);
69 if (sched->control_locked) {
70 err = -EPERM;
71 goto mutex_release;
72 }
73 err = gk20a_busy(g);
74 if (err) {
75 nvgpu_err(g, "failed to power on gpu");
76 goto mutex_release;
77 }
78
79 ch = gk20a_get_channel_from_file(arg->channel_fd);
80 if (!ch) {
81 err = -EINVAL;
82 goto idle;
83 }
84
85 if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
86 if ((arg->num_active_tpcs > gr->max_tpc_count) ||
87 !(arg->num_active_tpcs)) {
88 nvgpu_err(g, "Invalid num of active TPCs");
89 err = -EINVAL;
90 goto ch_put;
91 }
92 tsg->tpc_num_initialized = true;
93 tsg->num_active_tpcs = arg->num_active_tpcs;
94 tsg->tpc_pg_enabled = true;
95 } else {
96 tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
97 }
98
99 if (arg->subcontext_id < g->fifo.max_subctx_count) {
100 ch->subctx_id = arg->subcontext_id;
101 } else {
102 err = -EINVAL;
103 goto ch_put;
104 }
105
106 nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
107 ch->chid, ch->subctx_id);
108
109 /* Use runqueue selector 1 for all ASYNC ids */
110 if (ch->subctx_id > CHANNEL_INFO_VEID0)
111 ch->runqueue_sel = 1;
112
113 err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
114ch_put:
115 gk20a_channel_put(ch);
116idle:
117 gk20a_idle(g);
118mutex_release:
119 nvgpu_mutex_release(&sched->control_lock);
120 return err;
121}
122
123static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
124{
125 struct channel_gk20a *ch;
126 int err = 0;
127
128 ch = gk20a_get_channel_from_file(ch_fd);
129 if (!ch)
130 return -EINVAL;
131
132 if (ch->tsgid != tsg->tsgid) {
133 err = -EINVAL;
134 goto out;
135 }
136
137 err = gk20a_tsg_unbind_channel(ch, false);
138 if (err == -EAGAIN) {
139 goto out;
140 }
141
142 /*
143 * Mark the channel timedout since channel unbound from TSG
144 * has no context of its own so it can't serve any job
145 */
146 gk20a_channel_set_timedout(ch);
147
148out:
149 gk20a_channel_put(ch);
150 return err;
151}
152
153static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
154 unsigned int event_id,
155 struct gk20a_event_id_data **event_id_data)
156{
157 struct gk20a_event_id_data *local_event_id_data;
158 bool event_found = false;
159
160 nvgpu_mutex_acquire(&tsg->event_id_list_lock);
161 nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
162 gk20a_event_id_data, event_id_node) {
163 if (local_event_id_data->event_id == event_id) {
164 event_found = true;
165 break;
166 }
167 }
168 nvgpu_mutex_release(&tsg->event_id_list_lock);
169
170 if (event_found) {
171 *event_id_data = local_event_id_data;
172 return 0;
173 } else {
174 return -1;
175 }
176}
177
178/*
179 * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
180 * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
181 */
182static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id)
183{
184 switch (event_id) {
185 case NVGPU_EVENT_ID_BPT_INT:
186 return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
187 case NVGPU_EVENT_ID_BPT_PAUSE:
188 return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
189 case NVGPU_EVENT_ID_BLOCKING_SYNC:
190 return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
191 case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
192 return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
193 case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
194 return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
195 case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
196 return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
197 }
198
199 return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
200}
201
202void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
203 int __event_id)
204{
205 struct gk20a_event_id_data *event_id_data;
206 u32 event_id;
207 int err = 0;
208 struct gk20a *g = tsg->g;
209
210 event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id);
211 if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
212 return;
213
214 err = gk20a_tsg_get_event_data_from_id(tsg, event_id,
215 &event_id_data);
216 if (err)
217 return;
218
219 nvgpu_mutex_acquire(&event_id_data->lock);
220
221 nvgpu_log_info(g,
222 "posting event for event_id=%d on tsg=%d\n",
223 event_id, tsg->tsgid);
224 event_id_data->event_posted = true;
225
226 nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq);
227
228 nvgpu_mutex_release(&event_id_data->lock);
229}
230
231static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
232{
233 unsigned int mask = 0;
234 struct gk20a_event_id_data *event_id_data = filep->private_data;
235 struct gk20a *g = event_id_data->g;
236 u32 event_id = event_id_data->event_id;
237 struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
238
239 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
240
241 poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
242
243 nvgpu_mutex_acquire(&event_id_data->lock);
244
245 if (event_id_data->event_posted) {
246 nvgpu_log_info(g,
247 "found pending event_id=%d on TSG=%d\n",
248 event_id, tsg->tsgid);
249 mask = (POLLPRI | POLLIN);
250 event_id_data->event_posted = false;
251 }
252
253 nvgpu_mutex_release(&event_id_data->lock);
254
255 return mask;
256}
257
258static int gk20a_event_id_release(struct inode *inode, struct file *filp)
259{
260 struct gk20a_event_id_data *event_id_data = filp->private_data;
261 struct gk20a *g = event_id_data->g;
262 struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
263
264 nvgpu_mutex_acquire(&tsg->event_id_list_lock);
265 nvgpu_list_del(&event_id_data->event_id_node);
266 nvgpu_mutex_release(&tsg->event_id_list_lock);
267
268 nvgpu_mutex_destroy(&event_id_data->lock);
269 gk20a_put(g);
270 nvgpu_kfree(g, event_id_data);
271 filp->private_data = NULL;
272
273 return 0;
274}
275
276const struct file_operations gk20a_event_id_ops = {
277 .owner = THIS_MODULE,
278 .poll = gk20a_event_id_poll,
279 .release = gk20a_event_id_release,
280};
281
282static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
283 int event_id,
284 int *fd)
285{
286 int err = 0;
287 int local_fd;
288 struct file *file;
289 char name[64];
290 struct gk20a_event_id_data *event_id_data;
291 struct gk20a *g;
292
293 g = gk20a_get(tsg->g);
294 if (!g)
295 return -ENODEV;
296
297 err = gk20a_tsg_get_event_data_from_id(tsg,
298 event_id, &event_id_data);
299 if (err == 0) {
300 /* We already have event enabled */
301 err = -EINVAL;
302 goto free_ref;
303 }
304
305 err = get_unused_fd_flags(O_RDWR);
306 if (err < 0)
307 goto free_ref;
308 local_fd = err;
309
310 snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
311 event_id, local_fd);
312
313 event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
314 if (!event_id_data) {
315 err = -ENOMEM;
316 goto clean_up;
317 }
318 event_id_data->g = g;
319 event_id_data->id = tsg->tsgid;
320 event_id_data->event_id = event_id;
321
322 nvgpu_cond_init(&event_id_data->event_id_wq);
323 err = nvgpu_mutex_init(&event_id_data->lock);
324 if (err)
325 goto clean_up_free;
326
327 nvgpu_init_list_node(&event_id_data->event_id_node);
328
329 file = anon_inode_getfile(name, &gk20a_event_id_ops,
330 event_id_data, O_RDWR);
331 if (IS_ERR(file)) {
332 err = PTR_ERR(file);
333 goto clean_up_free;
334 }
335
336 nvgpu_mutex_acquire(&tsg->event_id_list_lock);
337 nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
338 nvgpu_mutex_release(&tsg->event_id_list_lock);
339
340 fd_install(local_fd, file);
341
342 *fd = local_fd;
343
344 return 0;
345
346clean_up_free:
347 nvgpu_kfree(g, event_id_data);
348clean_up:
349 put_unused_fd(local_fd);
350free_ref:
351 gk20a_put(g);
352 return err;
353}
354
355static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
356 struct nvgpu_event_id_ctrl_args *args)
357{
358 int err = 0;
359 int fd = -1;
360
361 if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
362 return -EINVAL;
363
364 nvgpu_speculation_barrier();
365 switch (args->cmd) {
366 case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
367 err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
368 if (!err)
369 args->event_fd = fd;
370 break;
371
372 default:
373 nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
374 args->cmd);
375 err = -EINVAL;
376 break;
377 }
378
379 return err;
380}
381
382int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp)
383{
384 struct tsg_private *priv;
385 struct tsg_gk20a *tsg;
386 struct device *dev;
387 int err;
388
389 g = gk20a_get(g);
390 if (!g)
391 return -ENODEV;
392
393 dev = dev_from_gk20a(g);
394
395 nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
396
397 priv = nvgpu_kmalloc(g, sizeof(*priv));
398 if (!priv) {
399 err = -ENOMEM;
400 goto free_ref;
401 }
402
403 err = gk20a_busy(g);
404 if (err) {
405 nvgpu_err(g, "failed to power on, %d", err);
406 goto free_mem;
407 }
408
409 tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
410 gk20a_idle(g);
411 if (!tsg) {
412 err = -ENOMEM;
413 goto free_mem;
414 }
415
416 priv->g = g;
417 priv->tsg = tsg;
418 filp->private_data = priv;
419
420 gk20a_sched_ctrl_tsg_added(g, tsg);
421
422 return 0;
423
424free_mem:
425 nvgpu_kfree(g, priv);
426free_ref:
427 gk20a_put(g);
428 return err;
429}
430
431int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
432{
433 struct nvgpu_os_linux *l;
434 struct gk20a *g;
435 int ret;
436
437 l = container_of(inode->i_cdev,
438 struct nvgpu_os_linux, tsg.cdev);
439 g = &l->g;
440
441 nvgpu_log_fn(g, " ");
442
443 ret = gk20a_busy(g);
444 if (ret) {
445 nvgpu_err(g, "failed to power on, %d", ret);
446 return ret;
447 }
448
449 ret = nvgpu_ioctl_tsg_open(&l->g, filp);
450
451 gk20a_idle(g);
452 nvgpu_log_fn(g, "done");
453 return ret;
454}
455
456void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
457{
458 struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
459 struct gk20a *g = tsg->g;
460
461 gk20a_sched_ctrl_tsg_removed(g, tsg);
462
463 gk20a_tsg_release(ref);
464 gk20a_put(g);
465}
466
467int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
468{
469 struct tsg_private *priv = filp->private_data;
470 struct tsg_gk20a *tsg;
471
472 if (!priv) {
473 /* open failed, never got a tsg for this file */
474 return 0;
475 }
476
477 tsg = priv->tsg;
478
479 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
480 nvgpu_kfree(tsg->g, priv);
481 return 0;
482}
483
484static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
485 struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
486{
487 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
488 u32 level = arg->level;
489 int err;
490
491 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
492
493 nvgpu_mutex_acquire(&sched->control_lock);
494 if (sched->control_locked) {
495 err = -EPERM;
496 goto done;
497 }
498 err = gk20a_busy(g);
499 if (err) {
500 nvgpu_err(g, "failed to power on gpu");
501 goto done;
502 }
503
504 level = nvgpu_get_common_runlist_level(level);
505 err = gk20a_tsg_set_runlist_interleave(tsg, level);
506
507 gk20a_idle(g);
508done:
509 nvgpu_mutex_release(&sched->control_lock);
510 return err;
511}
512
513static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
514 struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
515{
516 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
517 int err;
518
519 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
520
521 nvgpu_mutex_acquire(&sched->control_lock);
522 if (sched->control_locked) {
523 err = -EPERM;
524 goto done;
525 }
526 err = gk20a_busy(g);
527 if (err) {
528 nvgpu_err(g, "failed to power on gpu");
529 goto done;
530 }
531 err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
532 gk20a_idle(g);
533done:
534 nvgpu_mutex_release(&sched->control_lock);
535 return err;
536}
537
538static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
539 struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
540{
541 arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
542 return 0;
543}
544
545static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
546 struct tsg_gk20a *tsg,
547 struct nvgpu_tsg_read_single_sm_error_state_args *args)
548{
549 struct gr_gk20a *gr = &g->gr;
550 struct nvgpu_tsg_sm_error_state *sm_error_state;
551 struct nvgpu_tsg_sm_error_state_record sm_error_state_record;
552 u32 sm_id;
553 int err = 0;
554
555 sm_id = args->sm_id;
556 if (sm_id >= gr->no_of_sm)
557 return -EINVAL;
558
559 nvgpu_speculation_barrier();
560
561 sm_error_state = tsg->sm_error_states + sm_id;
562 sm_error_state_record.global_esr =
563 sm_error_state->hww_global_esr;
564 sm_error_state_record.warp_esr =
565 sm_error_state->hww_warp_esr;
566 sm_error_state_record.warp_esr_pc =
567 sm_error_state->hww_warp_esr_pc;
568 sm_error_state_record.global_esr_report_mask =
569 sm_error_state->hww_global_esr_report_mask;
570 sm_error_state_record.warp_esr_report_mask =
571 sm_error_state->hww_warp_esr_report_mask;
572
573 if (args->record_size > 0) {
574 size_t write_size = sizeof(*sm_error_state);
575
576 nvgpu_speculation_barrier();
577 if (write_size > args->record_size)
578 write_size = args->record_size;
579
580 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
581 err = copy_to_user((void __user *)(uintptr_t)
582 args->record_mem,
583 &sm_error_state_record,
584 write_size);
585 nvgpu_mutex_release(&g->dbg_sessions_lock);
586 if (err) {
587 nvgpu_err(g, "copy_to_user failed!");
588 return err;
589 }
590
591 args->record_size = write_size;
592 }
593
594 return 0;
595}
596
597long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
598 unsigned long arg)
599{
600 struct tsg_private *priv = filp->private_data;
601 struct tsg_gk20a *tsg = priv->tsg;
602 struct gk20a *g = tsg->g;
603 u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
604 int err = 0;
605
606 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
607
608 if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
609 (_IOC_NR(cmd) == 0) ||
610 (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
611 (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
612 return -EINVAL;
613
614 memset(buf, 0, sizeof(buf));
615 if (_IOC_DIR(cmd) & _IOC_WRITE) {
616 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
617 return -EFAULT;
618 }
619
620 if (!g->sw_ready) {
621 err = gk20a_busy(g);
622 if (err)
623 return err;
624
625 gk20a_idle(g);
626 }
627
628 switch (cmd) {
629 case NVGPU_TSG_IOCTL_BIND_CHANNEL:
630 {
631 int ch_fd = *(int *)buf;
632 if (ch_fd < 0) {
633 err = -EINVAL;
634 break;
635 }
636 err = gk20a_tsg_bind_channel_fd(tsg, ch_fd);
637 break;
638 }
639
640 case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
641 {
642 err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg,
643 (struct nvgpu_tsg_bind_channel_ex_args *)buf);
644 break;
645 }
646
647 case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
648 {
649 int ch_fd = *(int *)buf;
650
651 if (ch_fd < 0) {
652 err = -EINVAL;
653 break;
654 }
655 err = gk20a_busy(g);
656 if (err) {
657 nvgpu_err(g,
658 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
659 break;
660 }
661 err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd);
662 gk20a_idle(g);
663 break;
664 }
665
666 case NVGPU_IOCTL_TSG_ENABLE:
667 {
668 err = gk20a_busy(g);
669 if (err) {
670 nvgpu_err(g,
671 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
672 return err;
673 }
674 g->ops.fifo.enable_tsg(tsg);
675 gk20a_idle(g);
676 break;
677 }
678
679 case NVGPU_IOCTL_TSG_DISABLE:
680 {
681 err = gk20a_busy(g);
682 if (err) {
683 nvgpu_err(g,
684 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
685 return err;
686 }
687 g->ops.fifo.disable_tsg(tsg);
688 gk20a_idle(g);
689 break;
690 }
691
692 case NVGPU_IOCTL_TSG_PREEMPT:
693 {
694 err = gk20a_busy(g);
695 if (err) {
696 nvgpu_err(g,
697 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
698 return err;
699 }
700 /* preempt TSG */
701 err = g->ops.fifo.preempt_tsg(g, tsg);
702 gk20a_idle(g);
703 break;
704 }
705
706 case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
707 {
708 err = gk20a_tsg_event_id_ctrl(g, tsg,
709 (struct nvgpu_event_id_ctrl_args *)buf);
710 break;
711 }
712
713 case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
714 err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
715 (struct nvgpu_runlist_interleave_args *)buf);
716 break;
717
718 case NVGPU_IOCTL_TSG_SET_TIMESLICE:
719 {
720 err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
721 (struct nvgpu_timeslice_args *)buf);
722 break;
723 }
724 case NVGPU_IOCTL_TSG_GET_TIMESLICE:
725 {
726 err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
727 (struct nvgpu_timeslice_args *)buf);
728 break;
729 }
730
731 case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE:
732 {
733 err = gk20a_tsg_ioctl_read_single_sm_error_state(g, tsg,
734 (struct nvgpu_tsg_read_single_sm_error_state_args *)buf);
735 break;
736 }
737
738 default:
739 nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
740 cmd);
741 err = -ENOTTY;
742 break;
743 }
744
745 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
746 err = copy_to_user((void __user *)arg,
747 buf, _IOC_SIZE(cmd));
748
749 return err;
750}
diff --git a/include/os/linux/ioctl_tsg.h b/include/os/linux/ioctl_tsg.h
deleted file mode 100644
index 67399fd..0000000
--- a/include/os/linux/ioctl_tsg.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef NVGPU_IOCTL_TSG_H
14#define NVGPU_IOCTL_TSG_H
15
16struct inode;
17struct file;
18struct gk20a;
19struct nvgpu_ref;
20
21int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
22int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
23int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp);
24long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
25 unsigned int cmd, unsigned long arg);
26void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
27
28#endif
diff --git a/include/os/linux/kmem.c b/include/os/linux/kmem.c
deleted file mode 100644
index 395cc45..0000000
--- a/include/os/linux/kmem.c
+++ /dev/null
@@ -1,653 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/mm.h>
18#include <linux/slab.h>
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21#include <linux/vmalloc.h>
22#include <linux/stacktrace.h>
23
24#include <nvgpu/lock.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/gk20a.h>
29
30#include "kmem_priv.h"
31
32/*
33 * Statically declared because this needs to be shared across all nvgpu driver
34 * instances. This makes sure that all kmem caches are _definitely_ uniquely
35 * named.
36 */
37static atomic_t kmem_cache_id;
38
39void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
40{
41 void *p;
42
43 if (size > PAGE_SIZE) {
44 if (clear)
45 p = nvgpu_vzalloc(g, size);
46 else
47 p = nvgpu_vmalloc(g, size);
48 } else {
49 if (clear)
50 p = nvgpu_kzalloc(g, size);
51 else
52 p = nvgpu_kmalloc(g, size);
53 }
54
55 return p;
56}
57
58void nvgpu_big_free(struct gk20a *g, void *p)
59{
60 /*
61 * This will have to be fixed eventually. Allocs that use
62 * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
63 * when freeing.
64 */
65 if (is_vmalloc_addr(p))
66 nvgpu_vfree(g, p);
67 else
68 nvgpu_kfree(g, p);
69}
70
71void *__nvgpu_kmalloc(struct gk20a *g, size_t size, void *ip)
72{
73 void *alloc;
74
75#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
76 alloc = __nvgpu_track_kmalloc(g, size, ip);
77#else
78 alloc = kmalloc(size, GFP_KERNEL);
79#endif
80
81 kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
82 size, alloc, GFP_KERNEL);
83
84 return alloc;
85}
86
87void *__nvgpu_kzalloc(struct gk20a *g, size_t size, void *ip)
88{
89 void *alloc;
90
91#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
92 alloc = __nvgpu_track_kzalloc(g, size, ip);
93#else
94 alloc = kzalloc(size, GFP_KERNEL);
95#endif
96
97 kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
98 size, alloc, GFP_KERNEL);
99
100 return alloc;
101}
102
103void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, void *ip)
104{
105 void *alloc;
106
107#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
108 alloc = __nvgpu_track_kcalloc(g, n, size, ip);
109#else
110 alloc = kcalloc(n, size, GFP_KERNEL);
111#endif
112
113 kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
114 n * size, alloc, GFP_KERNEL);
115
116 return alloc;
117}
118
119void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, void *ip)
120{
121 void *alloc;
122
123#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
124 alloc = __nvgpu_track_vmalloc(g, size, ip);
125#else
126 alloc = vmalloc(size);
127#endif
128
129 kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
130
131 return alloc;
132}
133
134void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, void *ip)
135{
136 void *alloc;
137
138#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
139 alloc = __nvgpu_track_vzalloc(g, size, ip);
140#else
141 alloc = vzalloc(size);
142#endif
143
144 kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
145
146 return alloc;
147}
148
149void __nvgpu_kfree(struct gk20a *g, void *addr)
150{
151 kmem_dbg(g, "kfree: addr=0x%p", addr);
152#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
153 __nvgpu_track_kfree(g, addr);
154#else
155 kfree(addr);
156#endif
157}
158
159void __nvgpu_vfree(struct gk20a *g, void *addr)
160{
161 kmem_dbg(g, "vfree: addr=0x%p", addr);
162#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
163 __nvgpu_track_vfree(g, addr);
164#else
165 vfree(addr);
166#endif
167}
168
169#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
170
171void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
172{
173 nvgpu_mutex_acquire(&tracker->lock);
174}
175
176void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
177{
178 nvgpu_mutex_release(&tracker->lock);
179}
180
181void kmem_print_mem_alloc(struct gk20a *g,
182 struct nvgpu_mem_alloc *alloc,
183 struct seq_file *s)
184{
185#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
186 int i;
187
188 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
189 alloc->addr, alloc->size);
190 for (i = 0; i < alloc->stack_length; i++)
191 __pstat(s, " %3d [<%p>] %pS\n", i,
192 (void *)alloc->stack[i],
193 (void *)alloc->stack[i]);
194 __pstat(s, "\n");
195#else
196 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
197 alloc->addr, alloc->size, alloc->ip);
198#endif
199}
200
201static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
202 struct nvgpu_mem_alloc *alloc)
203{
204 alloc->allocs_entry.key_start = alloc->addr;
205 alloc->allocs_entry.key_end = alloc->addr + alloc->size;
206
207 nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
208 return 0;
209}
210
211static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
212 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
213{
214 struct nvgpu_mem_alloc *alloc;
215 struct nvgpu_rbtree_node *node = NULL;
216
217 nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
218 if (!node)
219 return NULL;
220
221 alloc = nvgpu_mem_alloc_from_rbtree_node(node);
222
223 nvgpu_rbtree_unlink(node, &tracker->allocs);
224
225 return alloc;
226}
227
228static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
229 unsigned long size, unsigned long real_size,
230 u64 addr, void *ip)
231{
232 int ret;
233 struct nvgpu_mem_alloc *alloc;
234#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
235 struct stack_trace stack_trace;
236#endif
237
238 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
239 if (!alloc)
240 return -ENOMEM;
241
242 alloc->owner = tracker;
243 alloc->size = size;
244 alloc->real_size = real_size;
245 alloc->addr = addr;
246 alloc->ip = ip;
247
248#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
249 stack_trace.max_entries = MAX_STACK_TRACE;
250 stack_trace.nr_entries = 0;
251 stack_trace.entries = alloc->stack;
252 /*
253 * This 4 here skips the 2 function calls that happen for all traced
254 * allocs due to nvgpu:
255 *
256 * __nvgpu_save_kmem_alloc+0x7c/0x128
257 * __nvgpu_track_kzalloc+0xcc/0xf8
258 *
259 * And the function calls that get made by the stack trace code itself.
260 * If the trace savings code changes this will likely have to change
261 * as well.
262 */
263 stack_trace.skip = 4;
264 save_stack_trace(&stack_trace);
265 alloc->stack_length = stack_trace.nr_entries;
266#endif
267
268 nvgpu_lock_tracker(tracker);
269 tracker->bytes_alloced += size;
270 tracker->bytes_alloced_real += real_size;
271 tracker->nr_allocs++;
272
273 /* Keep track of this for building a histogram later on. */
274 if (tracker->max_alloc < size)
275 tracker->max_alloc = size;
276 if (tracker->min_alloc > size)
277 tracker->min_alloc = size;
278
279 ret = nvgpu_add_alloc(tracker, alloc);
280 if (ret) {
281 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
282 kfree(alloc);
283 nvgpu_unlock_tracker(tracker);
284 return ret;
285 }
286 nvgpu_unlock_tracker(tracker);
287
288 return 0;
289}
290
291static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
292 u64 addr)
293{
294 struct nvgpu_mem_alloc *alloc;
295
296 nvgpu_lock_tracker(tracker);
297 alloc = nvgpu_rem_alloc(tracker, addr);
298 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
299 nvgpu_unlock_tracker(tracker);
300 return -EINVAL;
301 }
302
303 memset((void *)alloc->addr, 0, alloc->size);
304
305 tracker->nr_frees++;
306 tracker->bytes_freed += alloc->size;
307 tracker->bytes_freed_real += alloc->real_size;
308 nvgpu_unlock_tracker(tracker);
309
310 return 0;
311}
312
313static void __nvgpu_check_valloc_size(unsigned long size)
314{
315 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
316}
317
318static void __nvgpu_check_kalloc_size(size_t size)
319{
320 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
321}
322
323void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
324 void *ip)
325{
326 void *alloc = vmalloc(size);
327
328 if (!alloc)
329 return NULL;
330
331 __nvgpu_check_valloc_size(size);
332
333 /*
334 * Ignore the return message. If this fails let's not cause any issues
335 * for the rest of the driver.
336 */
337 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
338 (u64)(uintptr_t)alloc, ip);
339
340 return alloc;
341}
342
343void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
344 void *ip)
345{
346 void *alloc = vzalloc(size);
347
348 if (!alloc)
349 return NULL;
350
351 __nvgpu_check_valloc_size(size);
352
353 /*
354 * Ignore the return message. If this fails let's not cause any issues
355 * for the rest of the driver.
356 */
357 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
358 (u64)(uintptr_t)alloc, ip);
359
360 return alloc;
361}
362
363void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, void *ip)
364{
365 void *alloc = kmalloc(size, GFP_KERNEL);
366
367 if (!alloc)
368 return NULL;
369
370 __nvgpu_check_kalloc_size(size);
371
372 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
373 (u64)(uintptr_t)alloc, ip);
374
375 return alloc;
376}
377
378void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, void *ip)
379{
380 void *alloc = kzalloc(size, GFP_KERNEL);
381
382 if (!alloc)
383 return NULL;
384
385 __nvgpu_check_kalloc_size(size);
386
387 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
388 (u64)(uintptr_t)alloc, ip);
389
390 return alloc;
391}
392
393void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
394 void *ip)
395{
396 void *alloc = kcalloc(n, size, GFP_KERNEL);
397
398 if (!alloc)
399 return NULL;
400
401 __nvgpu_check_kalloc_size(n * size);
402
403 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
404 roundup_pow_of_two(n * size),
405 (u64)(uintptr_t)alloc, ip);
406
407 return alloc;
408}
409
410void __nvgpu_track_vfree(struct gk20a *g, void *addr)
411{
412 /*
413 * Often it is accepted practice to pass NULL pointers into free
414 * functions to save code.
415 */
416 if (!addr)
417 return;
418
419 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
420
421 vfree(addr);
422}
423
424void __nvgpu_track_kfree(struct gk20a *g, void *addr)
425{
426 if (!addr)
427 return;
428
429 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
430
431 kfree(addr);
432}
433
434static int __do_check_for_outstanding_allocs(
435 struct gk20a *g,
436 struct nvgpu_mem_alloc_tracker *tracker,
437 const char *type, bool silent)
438{
439 struct nvgpu_rbtree_node *node;
440 int count = 0;
441
442 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
443 while (node) {
444 struct nvgpu_mem_alloc *alloc =
445 nvgpu_mem_alloc_from_rbtree_node(node);
446
447 if (!silent)
448 kmem_print_mem_alloc(g, alloc, NULL);
449
450 count++;
451 nvgpu_rbtree_enum_next(&node, node);
452 }
453
454 return count;
455}
456
457/**
458 * check_for_outstanding_allocs - Count and display outstanding allocs
459 *
460 * @g - The GPU.
461 * @silent - If set don't print anything about the allocs.
462 *
463 * Dump (or just count) the number of allocations left outstanding.
464 */
465static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
466{
467 int count = 0;
468
469 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
470 silent);
471 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
472 silent);
473
474 return count;
475}
476
477static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
478 void (*force_free_func)(const void *))
479{
480 struct nvgpu_rbtree_node *node;
481
482 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
483 while (node) {
484 struct nvgpu_mem_alloc *alloc =
485 nvgpu_mem_alloc_from_rbtree_node(node);
486
487 if (force_free_func)
488 force_free_func((void *)alloc->addr);
489
490 nvgpu_rbtree_unlink(node, &tracker->allocs);
491 kfree(alloc);
492
493 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
494 }
495}
496
497/**
498 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
499 *
500 * @g - The GPU.
501 * @force_free - If set will also free leaked objects if possible.
502 *
503 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
504 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
505 * though, as it is possible that the memory is still in use by other parts of
506 * the GPU driver not aware that this has happened.
507 *
508 * In theory it should be fine if the GPU driver has been deinitialized and
509 * there are no bugs in that code. However, if there are any bugs in that code
510 * then they could likely manifest as odd crashes indeterminate amounts of time
511 * in the future. So use @force_free at your own risk.
512 */
513static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
514{
515 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
516 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
517}
518
519void nvgpu_kmem_fini(struct gk20a *g, int flags)
520{
521 int count;
522 bool silent, force_free;
523
524 if (!flags)
525 return;
526
527 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
528 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
529
530 count = check_for_outstanding_allocs(g, silent);
531 nvgpu_kmem_cleanup(g, force_free);
532
533 /*
534 * If we leak objects we can either BUG() out or just WARN(). In general
535 * it doesn't make sense to BUG() on here since leaking a few objects
536 * won't crash the kernel but it can be helpful for development.
537 *
538 * If neither flag is set then we just silently do nothing.
539 */
540 if (count > 0) {
541 if (flags & NVGPU_KMEM_FINI_WARN) {
542 WARN(1, "Letting %d allocs leak!!\n", count);
543 } else if (flags & NVGPU_KMEM_FINI_BUG) {
544 nvgpu_err(g, "Letting %d allocs leak!!", count);
545 BUG();
546 }
547 }
548}
549
550int nvgpu_kmem_init(struct gk20a *g)
551{
552 int err;
553
554 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
555 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
556
557 if (!g->vmallocs || !g->kmallocs) {
558 err = -ENOMEM;
559 goto fail;
560 }
561
562 g->vmallocs->name = "vmalloc";
563 g->kmallocs->name = "kmalloc";
564
565 g->vmallocs->allocs = NULL;
566 g->kmallocs->allocs = NULL;
567
568 nvgpu_mutex_init(&g->vmallocs->lock);
569 nvgpu_mutex_init(&g->kmallocs->lock);
570
571 g->vmallocs->min_alloc = PAGE_SIZE;
572 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
573
574 /*
575 * This needs to go after all the other initialization since they use
576 * the nvgpu_kzalloc() API.
577 */
578 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
579 sizeof(struct nvgpu_mem_alloc));
580 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
581 sizeof(struct nvgpu_mem_alloc));
582
583 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
584 err = -ENOMEM;
585 if (g->vmallocs->allocs_cache)
586 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
587 if (g->kmallocs->allocs_cache)
588 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
589 goto fail;
590 }
591
592 return 0;
593
594fail:
595 if (g->vmallocs)
596 kfree(g->vmallocs);
597 if (g->kmallocs)
598 kfree(g->kmallocs);
599 return err;
600}
601
602#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
603
604int nvgpu_kmem_init(struct gk20a *g)
605{
606 return 0;
607}
608
609void nvgpu_kmem_fini(struct gk20a *g, int flags)
610{
611}
612#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
613
614struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
615{
616 struct nvgpu_kmem_cache *cache =
617 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
618
619 if (!cache)
620 return NULL;
621
622 cache->g = g;
623
624 snprintf(cache->name, sizeof(cache->name),
625 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
626 atomic_inc_return(&kmem_cache_id));
627 cache->cache = kmem_cache_create(cache->name,
628 size, size, 0, NULL);
629 if (!cache->cache) {
630 nvgpu_kfree(g, cache);
631 return NULL;
632 }
633
634 return cache;
635}
636
637void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
638{
639 struct gk20a *g = cache->g;
640
641 kmem_cache_destroy(cache->cache);
642 nvgpu_kfree(g, cache);
643}
644
645void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
646{
647 return kmem_cache_alloc(cache->cache, GFP_KERNEL);
648}
649
650void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
651{
652 kmem_cache_free(cache->cache, ptr);
653}
diff --git a/include/os/linux/kmem_priv.h b/include/os/linux/kmem_priv.h
deleted file mode 100644
index a41762a..0000000
--- a/include/os/linux/kmem_priv.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __KMEM_PRIV_H__
18#define __KMEM_PRIV_H__
19
20#include <nvgpu/rbtree.h>
21#include <nvgpu/lock.h>
22
23struct seq_file;
24
25#define __pstat(s, fmt, msg...) \
26 do { \
27 if (s) \
28 seq_printf(s, fmt, ##msg); \
29 else \
30 pr_info(fmt, ##msg); \
31 } while (0)
32
33#define MAX_STACK_TRACE 20
34
35/*
36 * Linux specific version of the nvgpu_kmem_cache struct. This type is
37 * completely opaque to the rest of the driver.
38 */
39struct nvgpu_kmem_cache {
40 struct gk20a *g;
41 struct kmem_cache *cache;
42
43 /*
44 * Memory to hold the kmem_cache unique name. Only necessary on our
45 * k3.10 kernel when not using the SLUB allocator but it's easier to
46 * just carry this on to newer kernels.
47 */
48 char name[128];
49};
50
51#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
52
53struct nvgpu_mem_alloc {
54 struct nvgpu_mem_alloc_tracker *owner;
55
56 void *ip;
57#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
58 unsigned long stack[MAX_STACK_TRACE];
59 int stack_length;
60#endif
61
62 u64 addr;
63
64 unsigned long size;
65 unsigned long real_size;
66
67 struct nvgpu_rbtree_node allocs_entry;
68};
69
70static inline struct nvgpu_mem_alloc *
71nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
72{
73 return (struct nvgpu_mem_alloc *)
74 ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
75};
76
77/*
78 * Linux specific tracking of vmalloc, kmalloc, etc.
79 */
80struct nvgpu_mem_alloc_tracker {
81 const char *name;
82 struct nvgpu_kmem_cache *allocs_cache;
83 struct nvgpu_rbtree_node *allocs;
84 struct nvgpu_mutex lock;
85
86 u64 bytes_alloced;
87 u64 bytes_freed;
88 u64 bytes_alloced_real;
89 u64 bytes_freed_real;
90 u64 nr_allocs;
91 u64 nr_frees;
92
93 unsigned long min_alloc;
94 unsigned long max_alloc;
95};
96
97void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
98void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
99
100void kmem_print_mem_alloc(struct gk20a *g,
101 struct nvgpu_mem_alloc *alloc,
102 struct seq_file *s);
103#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
104
105#endif /* __KMEM_PRIV_H__ */
diff --git a/include/os/linux/linux-channel.c b/include/os/linux/linux-channel.c
deleted file mode 100644
index d035baf..0000000
--- a/include/os/linux/linux-channel.c
+++ /dev/null
@@ -1,657 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h>
19#include <nvgpu/error_notifier.h>
20#include <nvgpu/os_sched.h>
21#include <nvgpu/gk20a.h>
22#include <nvgpu/channel.h>
23#include <nvgpu/dma.h>
24
25/*
26 * This is required for nvgpu_vm_find_buf() which is used in the tracing
27 * code. Once we can get and access userspace buffers without requiring
28 * direct dma_buf usage this can be removed.
29 */
30#include <nvgpu/linux/vm.h>
31
32#include "channel.h"
33#include "ioctl_channel.h"
34#include "os_linux.h"
35#include "dmabuf.h"
36
37#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
38
39#include <linux/uaccess.h>
40#include <linux/dma-buf.h>
41#include <trace/events/gk20a.h>
42#include <uapi/linux/nvgpu.h>
43
44#include "sync_sema_android.h"
45
46u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
47{
48 u32 flags = 0;
49
50 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
51 flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
52
53 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
54 flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
55
56 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
57 flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
58
59 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
60 flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
61
62 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
63 flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
64
65 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
66 flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
67
68 return flags;
69}
70
71/*
72 * API to convert error_notifiers in common code and of the form
73 * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
74 * space and of the form NVGPU_CHANNEL_*
75 */
76static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
77{
78 switch (error_notifier) {
79 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
80 return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
81 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
82 return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
83 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
84 return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
85 case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
86 return NVGPU_CHANNEL_GR_EXCEPTION;
87 case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
88 return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
89 case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
90 return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
91 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
92 return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
93 case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
94 return NVGPU_CHANNEL_PBDMA_ERROR;
95 case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
96 return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
97 case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
98 return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
99 case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
100 return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
101 }
102
103 pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
104
105 return error_notifier;
106}
107
108/**
109 * nvgpu_set_error_notifier_locked()
110 * Should be called with ch->error_notifier_mutex held
111 *
112 * error should be of the form NVGPU_ERR_NOTIFIER_*
113 */
114void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
115{
116 struct nvgpu_channel_linux *priv = ch->os_priv;
117
118 error = nvgpu_error_notifier_to_channel_notifier(error);
119
120 if (priv->error_notifier.dmabuf) {
121 struct nvgpu_notification *notification =
122 priv->error_notifier.notification;
123 struct timespec time_data;
124 u64 nsec;
125
126 getnstimeofday(&time_data);
127 nsec = ((u64)time_data.tv_sec) * 1000000000u +
128 (u64)time_data.tv_nsec;
129 notification->time_stamp.nanoseconds[0] =
130 (u32)nsec;
131 notification->time_stamp.nanoseconds[1] =
132 (u32)(nsec >> 32);
133 notification->info32 = error;
134 notification->status = 0xffff;
135
136 nvgpu_err(ch->g,
137 "error notifier set to %d for ch %d", error, ch->chid);
138 }
139}
140
141/* error should be of the form NVGPU_ERR_NOTIFIER_* */
142void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
143{
144 struct nvgpu_channel_linux *priv = ch->os_priv;
145
146 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
147 nvgpu_set_error_notifier_locked(ch, error);
148 nvgpu_mutex_release(&priv->error_notifier.mutex);
149}
150
151void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
152{
153 struct nvgpu_channel_linux *priv = ch->os_priv;
154
155 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
156 if (priv->error_notifier.dmabuf) {
157 struct nvgpu_notification *notification =
158 priv->error_notifier.notification;
159
160 /* Don't overwrite error flag if it is already set */
161 if (notification->status != 0xffff)
162 nvgpu_set_error_notifier_locked(ch, error);
163 }
164 nvgpu_mutex_release(&priv->error_notifier.mutex);
165}
166
167/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
168bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
169{
170 struct nvgpu_channel_linux *priv = ch->os_priv;
171 bool notifier_set = false;
172
173 error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
174
175 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
176 if (priv->error_notifier.dmabuf) {
177 struct nvgpu_notification *notification =
178 priv->error_notifier.notification;
179 u32 err = notification->info32;
180
181 if (err == error_notifier)
182 notifier_set = true;
183 }
184 nvgpu_mutex_release(&priv->error_notifier.mutex);
185
186 return notifier_set;
187}
188
189static void gk20a_channel_update_runcb_fn(struct work_struct *work)
190{
191 struct nvgpu_channel_completion_cb *completion_cb =
192 container_of(work, struct nvgpu_channel_completion_cb, work);
193 struct nvgpu_channel_linux *priv =
194 container_of(completion_cb,
195 struct nvgpu_channel_linux, completion_cb);
196 struct channel_gk20a *ch = priv->ch;
197 void (*fn)(struct channel_gk20a *, void *);
198 void *user_data;
199
200 nvgpu_spinlock_acquire(&completion_cb->lock);
201 fn = completion_cb->fn;
202 user_data = completion_cb->user_data;
203 nvgpu_spinlock_release(&completion_cb->lock);
204
205 if (fn)
206 fn(ch, user_data);
207}
208
209static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch)
210{
211 struct nvgpu_channel_linux *priv = ch->os_priv;
212
213 priv->completion_cb.fn = NULL;
214 priv->completion_cb.user_data = NULL;
215 nvgpu_spinlock_init(&priv->completion_cb.lock);
216 INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
217}
218
219static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch)
220{
221 struct nvgpu_channel_linux *priv = ch->os_priv;
222
223 nvgpu_spinlock_acquire(&priv->completion_cb.lock);
224 priv->completion_cb.fn = NULL;
225 priv->completion_cb.user_data = NULL;
226 nvgpu_spinlock_release(&priv->completion_cb.lock);
227 cancel_work_sync(&priv->completion_cb.work);
228}
229
230static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch)
231{
232 struct nvgpu_channel_linux *priv = ch->os_priv;
233
234 if (priv->completion_cb.fn)
235 schedule_work(&priv->completion_cb.work);
236}
237
238static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch)
239{
240 struct nvgpu_channel_linux *priv = ch->os_priv;
241
242 if (priv->completion_cb.fn)
243 cancel_work_sync(&priv->completion_cb.work);
244}
245
246struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
247 void (*update_fn)(struct channel_gk20a *, void *),
248 void *update_fn_data,
249 int runlist_id,
250 bool is_privileged_channel)
251{
252 struct channel_gk20a *ch;
253 struct nvgpu_channel_linux *priv;
254
255 ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel,
256 nvgpu_current_pid(g), nvgpu_current_tid(g));
257
258 if (ch) {
259 priv = ch->os_priv;
260 nvgpu_spinlock_acquire(&priv->completion_cb.lock);
261 priv->completion_cb.fn = update_fn;
262 priv->completion_cb.user_data = update_fn_data;
263 nvgpu_spinlock_release(&priv->completion_cb.lock);
264 }
265
266 return ch;
267}
268
269static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
270{
271}
272
273static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
274{
275 nvgpu_channel_work_completion_clear(ch);
276
277#if defined(CONFIG_GK20A_CYCLE_STATS)
278 gk20a_channel_free_cycle_stats_buffer(ch);
279 gk20a_channel_free_cycle_stats_snapshot(ch);
280#endif
281}
282
283static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
284{
285 struct nvgpu_channel_linux *priv;
286 int err;
287
288 priv = nvgpu_kzalloc(g, sizeof(*priv));
289 if (!priv)
290 return -ENOMEM;
291
292 ch->os_priv = priv;
293 priv->ch = ch;
294
295#ifdef CONFIG_SYNC
296 ch->has_os_fence_framework_support = true;
297#endif
298
299 err = nvgpu_mutex_init(&priv->error_notifier.mutex);
300 if (err) {
301 nvgpu_kfree(g, priv);
302 return err;
303 }
304
305 nvgpu_channel_work_completion_init(ch);
306
307 return 0;
308}
309
310static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
311{
312 struct nvgpu_channel_linux *priv = ch->os_priv;
313
314 nvgpu_mutex_destroy(&priv->error_notifier.mutex);
315 nvgpu_kfree(g, priv);
316
317 ch->os_priv = NULL;
318
319#ifdef CONFIG_SYNC
320 ch->has_os_fence_framework_support = false;
321#endif
322}
323
324static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch,
325 const char *fmt, ...)
326{
327 struct nvgpu_channel_linux *priv = ch->os_priv;
328 struct nvgpu_os_fence_framework *fence_framework;
329 char name[30];
330 va_list args;
331
332 fence_framework = &priv->fence_framework;
333
334 va_start(args, fmt);
335 vsnprintf(name, sizeof(name), fmt, args);
336 va_end(args);
337
338 fence_framework->timeline = gk20a_sync_timeline_create(name);
339
340 if (!fence_framework->timeline)
341 return -EINVAL;
342
343 return 0;
344}
345static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch)
346{
347 struct nvgpu_channel_linux *priv = ch->os_priv;
348 struct nvgpu_os_fence_framework *fence_framework;
349
350 fence_framework = &priv->fence_framework;
351
352 gk20a_sync_timeline_signal(fence_framework->timeline);
353}
354
355static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch)
356{
357 struct nvgpu_channel_linux *priv = ch->os_priv;
358 struct nvgpu_os_fence_framework *fence_framework;
359
360 fence_framework = &priv->fence_framework;
361
362 gk20a_sync_timeline_destroy(fence_framework->timeline);
363 fence_framework->timeline = NULL;
364}
365
366static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch)
367{
368 struct nvgpu_channel_linux *priv = ch->os_priv;
369 struct nvgpu_os_fence_framework *fence_framework;
370
371 fence_framework = &priv->fence_framework;
372
373 return (fence_framework->timeline != NULL);
374}
375
376static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest,
377 struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length)
378{
379 struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries;
380 unsigned long n;
381
382 n = copy_from_user(dest, user_gpfifo + start,
383 length * sizeof(struct nvgpu_gpfifo_entry));
384
385 return n == 0 ? 0 : -EFAULT;
386}
387
388int nvgpu_usermode_buf_from_dmabuf(struct gk20a *g, int dmabuf_fd,
389 struct nvgpu_mem *mem, struct nvgpu_usermode_buf_linux *buf)
390{
391 struct device *dev = dev_from_gk20a(g);
392 struct dma_buf *dmabuf;
393 struct sg_table *sgt;
394 struct dma_buf_attachment *attachment;
395 int err;
396
397 dmabuf = dma_buf_get(dmabuf_fd);
398 if (IS_ERR(dmabuf)) {
399 return PTR_ERR(dmabuf);
400 }
401
402 if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
403 err = -EINVAL;
404 goto put_dmabuf;
405 }
406
407 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
408 if (err != 0) {
409 goto put_dmabuf;
410 }
411
412 sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
413 if (IS_ERR(sgt)) {
414 nvgpu_warn(g, "Failed to pin dma_buf!");
415 err = PTR_ERR(sgt);
416 goto put_dmabuf;
417 }
418
419 buf->dmabuf = dmabuf;
420 buf->attachment = attachment;
421 buf->sgt = sgt;
422
423 /*
424 * This mem is unmapped and freed in a common path; for Linux, we'll
425 * also need to unref the dmabuf stuff (above) but the sgt here is only
426 * borrowed, so it cannot be freed by nvgpu_mem_*.
427 */
428 mem->mem_flags = NVGPU_MEM_FLAG_FOREIGN_SGT;
429 mem->aperture = APERTURE_SYSMEM;
430 mem->skip_wmb = 0;
431 mem->size = dmabuf->size;
432
433 mem->priv.flags = 0;
434 mem->priv.pages = NULL;
435 mem->priv.sgt = sgt;
436
437 return 0;
438put_dmabuf:
439 dma_buf_put(dmabuf);
440 return err;
441}
442
443void nvgpu_channel_free_usermode_buffers(struct channel_gk20a *c)
444{
445 struct nvgpu_channel_linux *priv = c->os_priv;
446 struct gk20a *g = c->g;
447 struct device *dev = dev_from_gk20a(g);
448
449 if (priv->usermode.gpfifo.dmabuf != NULL) {
450 gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
451 priv->usermode.gpfifo.attachment,
452 priv->usermode.gpfifo.sgt);
453 dma_buf_put(priv->usermode.gpfifo.dmabuf);
454 priv->usermode.gpfifo.dmabuf = NULL;
455 }
456
457 if (priv->usermode.userd.dmabuf != NULL) {
458 gk20a_mm_unpin(dev, priv->usermode.userd.dmabuf,
459 priv->usermode.userd.attachment,
460 priv->usermode.userd.sgt);
461 dma_buf_put(priv->usermode.userd.dmabuf);
462 priv->usermode.userd.dmabuf = NULL;
463 }
464}
465
466static int nvgpu_channel_alloc_usermode_buffers(struct channel_gk20a *c,
467 struct nvgpu_setup_bind_args *args)
468{
469 struct nvgpu_channel_linux *priv = c->os_priv;
470 struct gk20a *g = c->g;
471 struct device *dev = dev_from_gk20a(g);
472 size_t gpfifo_size;
473 int err;
474
475 if (args->gpfifo_dmabuf_fd == 0 || args->userd_dmabuf_fd == 0) {
476 return -EINVAL;
477 }
478
479 if (args->gpfifo_dmabuf_offset != 0 ||
480 args->userd_dmabuf_offset != 0) {
481 /* TODO - not yet supported */
482 return -EINVAL;
483 }
484
485 err = nvgpu_usermode_buf_from_dmabuf(g, args->gpfifo_dmabuf_fd,
486 &c->usermode_gpfifo, &priv->usermode.gpfifo);
487 if (err < 0) {
488 return err;
489 }
490
491 gpfifo_size = max_t(u32, SZ_4K,
492 args->num_gpfifo_entries *
493 nvgpu_get_gpfifo_entry_size());
494
495 if (c->usermode_gpfifo.size < gpfifo_size) {
496 err = -EINVAL;
497 goto free_gpfifo;
498 }
499
500 c->usermode_gpfifo.gpu_va = nvgpu_gmmu_map(c->vm, &c->usermode_gpfifo,
501 c->usermode_gpfifo.size, 0, gk20a_mem_flag_none,
502 false, c->usermode_gpfifo.aperture);
503
504 if (c->usermode_gpfifo.gpu_va == 0) {
505 err = -ENOMEM;
506 goto unmap_free_gpfifo;
507 }
508
509 err = nvgpu_usermode_buf_from_dmabuf(g, args->userd_dmabuf_fd,
510 &c->usermode_userd, &priv->usermode.userd);
511 if (err < 0) {
512 goto unmap_free_gpfifo;
513 }
514
515 args->work_submit_token = g->fifo.channel_base + c->chid;
516
517 return 0;
518unmap_free_gpfifo:
519 nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
520free_gpfifo:
521 gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
522 priv->usermode.gpfifo.attachment,
523 priv->usermode.gpfifo.sgt);
524 dma_buf_put(priv->usermode.gpfifo.dmabuf);
525 priv->usermode.gpfifo.dmabuf = NULL;
526 return err;
527}
528
529int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
530{
531 struct gk20a *g = &l->g;
532 struct fifo_gk20a *f = &g->fifo;
533 int chid;
534 int err;
535
536 for (chid = 0; chid < (int)f->num_channels; chid++) {
537 struct channel_gk20a *ch = &f->channel[chid];
538
539 err = nvgpu_channel_alloc_linux(g, ch);
540 if (err)
541 goto err_clean;
542 }
543
544 g->os_channel.open = nvgpu_channel_open_linux;
545 g->os_channel.close = nvgpu_channel_close_linux;
546 g->os_channel.work_completion_signal =
547 nvgpu_channel_work_completion_signal;
548 g->os_channel.work_completion_cancel_sync =
549 nvgpu_channel_work_completion_cancel_sync;
550
551 g->os_channel.os_fence_framework_inst_exists =
552 nvgpu_channel_fence_framework_exists;
553 g->os_channel.init_os_fence_framework =
554 nvgpu_channel_init_os_fence_framework;
555 g->os_channel.signal_os_fence_framework =
556 nvgpu_channel_signal_os_fence_framework;
557 g->os_channel.destroy_os_fence_framework =
558 nvgpu_channel_destroy_os_fence_framework;
559
560 g->os_channel.copy_user_gpfifo =
561 nvgpu_channel_copy_user_gpfifo;
562
563 g->os_channel.alloc_usermode_buffers =
564 nvgpu_channel_alloc_usermode_buffers;
565
566 g->os_channel.free_usermode_buffers =
567 nvgpu_channel_free_usermode_buffers;
568
569 return 0;
570
571err_clean:
572 for (; chid >= 0; chid--) {
573 struct channel_gk20a *ch = &f->channel[chid];
574
575 nvgpu_channel_free_linux(g, ch);
576 }
577 return err;
578}
579
580void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
581{
582 struct gk20a *g = &l->g;
583 struct fifo_gk20a *f = &g->fifo;
584 unsigned int chid;
585
586 for (chid = 0; chid < f->num_channels; chid++) {
587 struct channel_gk20a *ch = &f->channel[chid];
588
589 nvgpu_channel_free_linux(g, ch);
590 }
591
592 g->os_channel.os_fence_framework_inst_exists = NULL;
593 g->os_channel.init_os_fence_framework = NULL;
594 g->os_channel.signal_os_fence_framework = NULL;
595 g->os_channel.destroy_os_fence_framework = NULL;
596}
597
598u32 nvgpu_get_gpfifo_entry_size(void)
599{
600 return sizeof(struct nvgpu_gpfifo_entry);
601}
602
603#ifdef CONFIG_DEBUG_FS
604static void trace_write_pushbuffer(struct channel_gk20a *c,
605 struct nvgpu_gpfifo_entry *g)
606{
607 void *mem = NULL;
608 unsigned int words;
609 u64 offset;
610 struct dma_buf *dmabuf = NULL;
611
612 if (gk20a_debug_trace_cmdbuf) {
613 u64 gpu_va = (u64)g->entry0 |
614 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
615 int err;
616
617 words = pbdma_gp_entry1_length_v(g->entry1);
618 err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
619 if (!err)
620 mem = dma_buf_vmap(dmabuf);
621 }
622
623 if (mem) {
624 u32 i;
625 /*
626 * Write in batches of 128 as there seems to be a limit
627 * of how much you can output to ftrace at once.
628 */
629 for (i = 0; i < words; i += 128U) {
630 trace_gk20a_push_cmdbuf(
631 c->g->name,
632 0,
633 min(words - i, 128U),
634 offset + i * sizeof(u32),
635 mem);
636 }
637 dma_buf_vunmap(dmabuf, mem);
638 }
639}
640
641void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
642{
643 struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
644 u32 n = c->gpfifo.entry_num;
645 u32 start = c->gpfifo.put;
646 u32 i;
647
648 if (!gk20a_debug_trace_cmdbuf)
649 return;
650
651 if (!gp)
652 return;
653
654 for (i = 0; i < count; i++)
655 trace_write_pushbuffer(c, &gp[(start + i) % n]);
656}
657#endif
diff --git a/include/os/linux/linux-dma.c b/include/os/linux/linux-dma.c
deleted file mode 100644
index d704b2a..0000000
--- a/include/os/linux/linux-dma.c
+++ /dev/null
@@ -1,534 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-mapping.h>
18#include <linux/version.h>
19
20#include <nvgpu/log.h>
21#include <nvgpu/dma.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/bug.h>
24#include <nvgpu/gmmu.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/enabled.h>
27#include <nvgpu/vidmem.h>
28#include <nvgpu/gk20a.h>
29
30#include <nvgpu/linux/dma.h>
31
32#include "platform_gk20a.h"
33#include "os_linux.h"
34#include "dmabuf_vidmem.h"
35
36#ifdef __DMA_ATTRS_LONGS
37#define NVGPU_DEFINE_DMA_ATTRS(x) \
38 struct dma_attrs x = { \
39 .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \
40 }
41#define NVGPU_DMA_ATTR(attrs) &attrs
42#else
43#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
44#define NVGPU_DMA_ATTR(attrs) attrs
45#endif
46
47/*
48 * Enough to hold all the possible flags in string form. When a new flag is
49 * added it must be added here as well!!
50 */
51#define NVGPU_DMA_STR_SIZE \
52 sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
53
54/*
55 * The returned string is kmalloc()ed here but must be freed by the caller.
56 */
57static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
58{
59 char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
60 int bytes_available = NVGPU_DMA_STR_SIZE;
61
62 /*
63 * Return the empty buffer if there's no flags. Makes it easier on the
64 * calling code to just print it instead of any if (NULL) type logic.
65 */
66 if (!flags)
67 return buf;
68
69#define APPEND_FLAG(flag, str_flag) \
70 do { \
71 if (flags & flag) { \
72 strncat(buf, str_flag, bytes_available); \
73 bytes_available -= strlen(str_flag); \
74 } \
75 } while (0)
76
77 APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
78 APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS ");
79#undef APPEND_FLAG
80
81 return buf;
82}
83
84/**
85 * __dma_dbg - Debug print for DMA allocs and frees.
86 *
87 * @g - The GPU.
88 * @size - The requested size of the alloc (size_t).
89 * @flags - The flags (unsigned long).
90 * @type - A string describing the type (i.e: sysmem or vidmem).
91 * @what - A string with 'alloc' or 'free'.
92 *
93 * @flags is the DMA flags. If there are none or it doesn't make sense to print
94 * flags just pass 0.
95 *
96 * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
97 */
98static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
99 const char *type, const char *what,
100 const char *func, int line)
101{
102 char *flags_str = NULL;
103
104 /*
105 * Don't bother making the flags_str if debugging is
106 * not enabled. This saves a malloc and a free.
107 */
108 if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
109 return;
110
111 flags_str = nvgpu_dma_flags_to_str(g, flags);
112
113 __nvgpu_log_dbg(g, gpu_dbg_dma,
114 func, line,
115 "DMA %s: [%s] size=%-7zu "
116 "aligned=%-7zu total=%-10llukB %s",
117 what, type,
118 size, PAGE_ALIGN(size),
119 g->dma_memory_used >> 10,
120 flags_str);
121
122 if (flags_str)
123 nvgpu_kfree(g, flags_str);
124}
125
126#define dma_dbg_alloc(g, size, flags, type) \
127 __dma_dbg(g, size, flags, type, "alloc", __func__, __LINE__)
128#define dma_dbg_free(g, size, flags, type) \
129 __dma_dbg(g, size, flags, type, "free", __func__, __LINE__)
130
131/*
132 * For after the DMA alloc is done.
133 */
134#define __dma_dbg_done(g, size, type, what) \
135 nvgpu_log(g, gpu_dbg_dma, \
136 "DMA %s: [%s] size=%-7zu Done!", \
137 what, type, size); \
138
139#define dma_dbg_alloc_done(g, size, type) \
140 __dma_dbg_done(g, size, type, "alloc")
141#define dma_dbg_free_done(g, size, type) \
142 __dma_dbg_done(g, size, type, "free")
143
144#if defined(CONFIG_GK20A_VIDMEM)
145static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
146 size_t size)
147{
148 u64 addr = 0;
149
150 if (at)
151 addr = nvgpu_alloc_fixed(allocator, at, size, 0);
152 else
153 addr = nvgpu_alloc(allocator, size);
154
155 return addr;
156}
157#endif
158
159#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
160static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
161 unsigned long flags)
162#define ATTR_ARG(x) *x
163#else
164static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
165 unsigned long flags)
166#define ATTR_ARG(x) x
167#endif
168{
169 if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
170 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
171 if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
172 dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
173#undef ATTR_ARG
174}
175
176int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
177 size_t size, struct nvgpu_mem *mem)
178{
179 struct device *d = dev_from_gk20a(g);
180 int err;
181 dma_addr_t iova;
182 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
183 void *alloc_ret;
184
185 if (nvgpu_mem_is_valid(mem)) {
186 nvgpu_warn(g, "memory leak !!");
187 WARN_ON(1);
188 }
189
190 /*
191 * WAR for IO coherent chips: the DMA API does not seem to generate
192 * mappings that work correctly. Unclear why - Bug ID: 2040115.
193 *
194 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
195 * and then make a vmap() ourselves.
196 */
197 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
198 flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
199
200 /*
201 * Before the debug print so we see this in the total. But during
202 * cleanup in the fail path this has to be subtracted.
203 */
204 g->dma_memory_used += PAGE_ALIGN(size);
205
206 dma_dbg_alloc(g, size, flags, "sysmem");
207
208 /*
209 * Save the old size but for actual allocation purposes the size is
210 * going to be page aligned.
211 */
212 mem->size = size;
213 size = PAGE_ALIGN(size);
214
215 nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
216
217 alloc_ret = dma_alloc_attrs(d, size, &iova,
218 GFP_KERNEL|__GFP_ZERO,
219 NVGPU_DMA_ATTR(dma_attrs));
220 if (!alloc_ret)
221 return -ENOMEM;
222
223 if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
224 mem->priv.pages = alloc_ret;
225 err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
226 mem->priv.pages,
227 iova, size);
228 } else {
229 mem->cpu_va = alloc_ret;
230 err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
231 iova, size, flags);
232 }
233 if (err)
234 goto fail_free_dma;
235
236 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
237 mem->cpu_va = vmap(mem->priv.pages,
238 size >> PAGE_SHIFT,
239 0, PAGE_KERNEL);
240 if (!mem->cpu_va) {
241 err = -ENOMEM;
242 goto fail_free_sgt;
243 }
244 }
245
246 mem->aligned_size = size;
247 mem->aperture = APERTURE_SYSMEM;
248 mem->priv.flags = flags;
249
250 dma_dbg_alloc_done(g, mem->size, "sysmem");
251
252 return 0;
253
254fail_free_sgt:
255 nvgpu_free_sgtable(g, &mem->priv.sgt);
256fail_free_dma:
257 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
258 mem->cpu_va = NULL;
259 mem->priv.sgt = NULL;
260 mem->size = 0;
261 g->dma_memory_used -= mem->aligned_size;
262 return err;
263}
264
265int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
266 size_t size, struct nvgpu_mem *mem, u64 at)
267{
268#if defined(CONFIG_GK20A_VIDMEM)
269 u64 addr;
270 int err;
271 struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
272 &g->mm.vidmem.allocator :
273 &g->mm.vidmem.bootstrap_allocator;
274 u64 before_pending;
275
276 if (nvgpu_mem_is_valid(mem)) {
277 nvgpu_warn(g, "memory leak !!");
278 WARN_ON(1);
279 }
280
281 dma_dbg_alloc(g, size, flags, "vidmem");
282
283 mem->size = size;
284 size = PAGE_ALIGN(size);
285
286 if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
287 return -ENOSYS;
288
289 /*
290 * Our own allocator doesn't have any flags yet, and we can't
291 * kernel-map these, so require explicit flags.
292 */
293 WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
294
295 nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
296 before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
297 addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
298 nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
299 if (!addr) {
300 /*
301 * If memory is known to be freed soon, let the user know that
302 * it may be available after a while.
303 */
304 if (before_pending)
305 return -EAGAIN;
306 else
307 return -ENOMEM;
308 }
309
310 if (at)
311 mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
312
313 mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
314 if (!mem->priv.sgt) {
315 err = -ENOMEM;
316 goto fail_physfree;
317 }
318
319 err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
320 if (err)
321 goto fail_kfree;
322
323 nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
324 sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
325
326 mem->aligned_size = size;
327 mem->aperture = APERTURE_VIDMEM;
328 mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
329 mem->allocator = vidmem_alloc;
330 mem->priv.flags = flags;
331
332 nvgpu_init_list_node(&mem->clear_list_entry);
333
334 dma_dbg_alloc_done(g, mem->size, "vidmem");
335
336 return 0;
337
338fail_kfree:
339 nvgpu_kfree(g, mem->priv.sgt);
340fail_physfree:
341 nvgpu_free(&g->mm.vidmem.allocator, addr);
342 mem->size = 0;
343 return err;
344#else
345 return -ENOSYS;
346#endif
347}
348
349void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
350{
351 struct device *d = dev_from_gk20a(g);
352
353 g->dma_memory_used -= mem->aligned_size;
354
355 dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
356
357 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
358 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
359 (mem->cpu_va || mem->priv.pages)) {
360 /*
361 * Free side of WAR for bug 2040115.
362 */
363 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
364 vunmap(mem->cpu_va);
365
366 if (mem->priv.flags) {
367 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
368
369 nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
370
371 if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
372 dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
373 sg_dma_address(mem->priv.sgt->sgl),
374 NVGPU_DMA_ATTR(dma_attrs));
375 } else {
376 dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
377 sg_dma_address(mem->priv.sgt->sgl),
378 NVGPU_DMA_ATTR(dma_attrs));
379 }
380 } else {
381 dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
382 sg_dma_address(mem->priv.sgt->sgl));
383 }
384 mem->cpu_va = NULL;
385 mem->priv.pages = NULL;
386 }
387
388 /*
389 * When this flag is set we expect that pages is still populated but not
390 * by the DMA API.
391 */
392 if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
393 nvgpu_kfree(g, mem->priv.pages);
394
395 if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 &&
396 mem->priv.sgt != NULL) {
397 nvgpu_free_sgtable(g, &mem->priv.sgt);
398 }
399
400 dma_dbg_free_done(g, mem->size, "sysmem");
401
402 mem->size = 0;
403 mem->aligned_size = 0;
404 mem->aperture = APERTURE_INVALID;
405}
406
407void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
408{
409#if defined(CONFIG_GK20A_VIDMEM)
410 size_t mem_size = mem->size;
411
412 dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
413
414 /* Sanity check - only this supported when allocating. */
415 WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
416
417 if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
418 int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
419
420 /*
421 * If there's an error here then that means we can't clear the
422 * vidmem. That's too bad; however, we still own the nvgpu_mem
423 * buf so we have to free that.
424 *
425 * We don't need to worry about the vidmem allocator itself
426 * since when that gets cleaned up in the driver shutdown path
427 * all the outstanding allocs are force freed.
428 */
429 if (err)
430 nvgpu_kfree(g, mem);
431 } else {
432 nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
433 nvgpu_free(mem->allocator,
434 (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
435 nvgpu_free_sgtable(g, &mem->priv.sgt);
436
437 mem->size = 0;
438 mem->aligned_size = 0;
439 mem->aperture = APERTURE_INVALID;
440 }
441
442 dma_dbg_free_done(g, mem_size, "vidmem");
443#endif
444}
445
446int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
447 void *cpuva, u64 iova, size_t size, unsigned long flags)
448{
449 int err = 0;
450 struct sg_table *tbl;
451 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
452
453 tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
454 if (!tbl) {
455 err = -ENOMEM;
456 goto fail;
457 }
458
459 nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
460 err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
461 size, NVGPU_DMA_ATTR(dma_attrs));
462 if (err)
463 goto fail;
464
465 sg_dma_address(tbl->sgl) = iova;
466 *sgt = tbl;
467
468 return 0;
469
470fail:
471 if (tbl)
472 nvgpu_kfree(g, tbl);
473
474 return err;
475}
476
477int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
478 void *cpuva, u64 iova, size_t size)
479{
480 return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
481}
482
483int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
484 struct page **pages, u64 iova, size_t size)
485{
486 int err = 0;
487 struct sg_table *tbl;
488
489 tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
490 if (!tbl) {
491 err = -ENOMEM;
492 goto fail;
493 }
494
495 err = sg_alloc_table_from_pages(tbl, pages,
496 DIV_ROUND_UP(size, PAGE_SIZE),
497 0, size, GFP_KERNEL);
498 if (err)
499 goto fail;
500
501 sg_dma_address(tbl->sgl) = iova;
502 *sgt = tbl;
503
504 return 0;
505
506fail:
507 if (tbl)
508 nvgpu_kfree(g, tbl);
509
510 return err;
511}
512
513void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
514{
515 sg_free_table(*sgt);
516 nvgpu_kfree(g, *sgt);
517 *sgt = NULL;
518}
519
520bool nvgpu_iommuable(struct gk20a *g)
521{
522#ifdef CONFIG_TEGRA_GK20A
523 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
524
525 /*
526 * Check against the nvgpu device to see if it's been marked as
527 * IOMMU'able.
528 */
529 if (!device_is_iommuable(l->dev))
530 return false;
531#endif
532
533 return true;
534}
diff --git a/include/os/linux/log.c b/include/os/linux/log.c
deleted file mode 100644
index bd9f67d..0000000
--- a/include/os/linux/log.c
+++ /dev/null
@@ -1,132 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/device.h>
19
20#include <nvgpu/log.h>
21#include <nvgpu/gk20a.h>
22
23#include "platform_gk20a.h"
24#include "os_linux.h"
25
26/*
27 * Define a length for log buffers. This is the buffer that the 'fmt, ...' part
28 * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
29 * needs to not be overly sized since we have limited kernel stack space. But at
30 * the same time we don't want it to be restrictive either.
31 */
32#define LOG_BUFFER_LENGTH 160
33
34/*
35 * Annoying quirk of Linux: this has to be a string literal since the printk()
36 * function and friends use the preprocessor to concatenate stuff to the start
37 * of this string when printing.
38 */
39#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n"
40
41static const char *log_types[] = {
42 "ERR",
43 "WRN",
44 "DBG",
45 "INFO",
46};
47
48int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask)
49{
50 return !!(g->log_mask & log_mask);
51}
52
53static inline const char *nvgpu_log_name(struct gk20a *g)
54{
55 return dev_name(dev_from_gk20a(g));
56}
57
58#ifdef CONFIG_GK20A_TRACE_PRINTK
59static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
60 const char *func_name, int line,
61 const char *log_type, const char *log)
62{
63 trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
64}
65#endif
66
67static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
68 const char *func_name, int line,
69 enum nvgpu_log_type type, const char *log)
70{
71 const char *name = gpu_name ? gpu_name : "";
72 const char *log_type = log_types[type];
73
74#ifdef CONFIG_GK20A_TRACE_PRINTK
75 if (trace)
76 return __nvgpu_trace_printk_log(trace, name, func_name,
77 line, log_type, log);
78#endif
79 switch (type) {
80 case NVGPU_DEBUG:
81 /*
82 * We could use pr_debug() here but we control debug enablement
83 * separately from the Linux kernel. Perhaps this is a bug in
84 * nvgpu.
85 */
86 pr_info(LOG_FMT, name, func_name, line, log_type, log);
87 break;
88 case NVGPU_INFO:
89 pr_info(LOG_FMT, name, func_name, line, log_type, log);
90 break;
91 case NVGPU_WARNING:
92 pr_warn(LOG_FMT, name, func_name, line, log_type, log);
93 break;
94 case NVGPU_ERROR:
95 pr_err(LOG_FMT, name, func_name, line, log_type, log);
96 break;
97 }
98}
99
100__attribute__((format (printf, 5, 6)))
101void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line,
102 enum nvgpu_log_type type, const char *fmt, ...)
103{
104 char log[LOG_BUFFER_LENGTH];
105 va_list args;
106
107 va_start(args, fmt);
108 vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
109 va_end(args);
110
111 __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
112 func_name, line, type, log);
113}
114
115__attribute__((format (printf, 5, 6)))
116void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
117 const char *func_name, int line,
118 const char *fmt, ...)
119{
120 char log[LOG_BUFFER_LENGTH];
121 va_list args;
122
123 if ((log_mask & g->log_mask) == 0)
124 return;
125
126 va_start(args, fmt);
127 vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
128 va_end(args);
129
130 __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g),
131 func_name, line, NVGPU_DEBUG, log);
132}
diff --git a/include/os/linux/ltc.c b/include/os/linux/ltc.c
deleted file mode 100644
index baeb20b..0000000
--- a/include/os/linux/ltc.c
+++ /dev/null
@@ -1,60 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/ltc.h>
24#include <nvgpu/dma.h>
25#include <nvgpu/nvgpu_mem.h>
26#include <nvgpu/gk20a.h>
27
28#include "gk20a/gr_gk20a.h"
29
30int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size,
31 bool vidmem_alloc)
32{
33 struct gr_gk20a *gr = &g->gr;
34 unsigned long flags = 0;
35
36 if (nvgpu_mem_is_valid(&gr->compbit_store.mem))
37 return 0;
38
39 if (vidmem_alloc) {
40 /*
41 * Backing store MUST be physically contiguous and allocated in
42 * one chunk
43 * Vidmem allocation API does not support FORCE_CONTIGUOUS like
44 * flag to allocate contiguous memory
45 * But this allocation will happen in vidmem bootstrap allocator
46 * which always allocates contiguous memory
47 */
48 return nvgpu_dma_alloc_vid(g,
49 compbit_backing_size,
50 &gr->compbit_store.mem);
51 } else {
52 if (!nvgpu_iommuable(g))
53 flags = NVGPU_DMA_FORCE_CONTIGUOUS;
54
55 return nvgpu_dma_alloc_flags_sys(g,
56 flags,
57 compbit_backing_size,
58 &gr->compbit_store.mem);
59 }
60}
diff --git a/include/os/linux/module.c b/include/os/linux/module.c
deleted file mode 100644
index fdbab46..0000000
--- a/include/os/linux/module.c
+++ /dev/null
@@ -1,1547 +0,0 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/module.h>
20#include <linux/of.h>
21#include <linux/of_device.h>
22#include <linux/of_platform.h>
23#include <linux/of_address.h>
24#include <linux/interrupt.h>
25#include <linux/pm_runtime.h>
26#include <linux/reset.h>
27#include <linux/reboot.h>
28#include <linux/notifier.h>
29#include <linux/platform/tegra/common.h>
30#include <linux/pci.h>
31
32#include <uapi/linux/nvgpu.h>
33#include <dt-bindings/soc/gm20b-fuse.h>
34#include <dt-bindings/soc/gp10b-fuse.h>
35#include <dt-bindings/soc/gv11b-fuse.h>
36
37#include <soc/tegra/fuse.h>
38
39#include <nvgpu/hal_init.h>
40#include <nvgpu/dma.h>
41#include <nvgpu/kmem.h>
42#include <nvgpu/nvgpu_common.h>
43#include <nvgpu/soc.h>
44#include <nvgpu/enabled.h>
45#include <nvgpu/debug.h>
46#include <nvgpu/ctxsw_trace.h>
47#include <nvgpu/vidmem.h>
48#include <nvgpu/sim.h>
49#include <nvgpu/clk_arb.h>
50#include <nvgpu/timers.h>
51#include <nvgpu/channel.h>
52#include <nvgpu/nvgpu_err.h>
53
54#include "platform_gk20a.h"
55#include "sysfs.h"
56#include "vgpu/vgpu_linux.h"
57#include "scale.h"
58#include "pci.h"
59#include "module.h"
60#include "module_usermode.h"
61#include "intr.h"
62#include "ioctl.h"
63#include "ioctl_ctrl.h"
64
65#include "os_linux.h"
66#include "os_ops.h"
67#include "ctxsw_trace.h"
68#include "driver_common.h"
69#include "channel.h"
70#include "debug_pmgr.h"
71
72#ifdef CONFIG_NVGPU_SUPPORT_CDE
73#include "cde.h"
74#endif
75
76#define CLASS_NAME "nvidia-gpu"
77/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
78
79#define GK20A_WAIT_FOR_IDLE_MS 2000
80
81#define CREATE_TRACE_POINTS
82#include <trace/events/gk20a.h>
83
84static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
85 unsigned long event, void *unused)
86{
87 struct gk20a *g = container_of(nb, struct gk20a, nvgpu_reboot_nb);
88
89 __nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
90 return NOTIFY_DONE;
91}
92
93struct device_node *nvgpu_get_node(struct gk20a *g)
94{
95 struct device *dev = dev_from_gk20a(g);
96
97 if (dev_is_pci(dev)) {
98 struct pci_bus *bus = to_pci_dev(dev)->bus;
99
100 while (!pci_is_root_bus(bus))
101 bus = bus->parent;
102
103 return bus->bridge->parent->of_node;
104 }
105
106 return dev->of_node;
107}
108
109void gk20a_busy_noresume(struct gk20a *g)
110{
111 pm_runtime_get_noresume(dev_from_gk20a(g));
112}
113
114/*
115 * Check if the device can go busy.
116 */
117static int nvgpu_can_busy(struct gk20a *g)
118{
119 /* Can't do anything if the system is rebooting/shutting down. */
120 if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING))
121 return 0;
122
123 /* Can't do anything if the driver is restarting. */
124 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
125 return 0;
126
127 return 1;
128}
129
130int gk20a_busy(struct gk20a *g)
131{
132 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
133 int ret = 0;
134 struct device *dev;
135
136 if (!g)
137 return -ENODEV;
138
139 atomic_inc(&g->usage_count.atomic_var);
140
141 down_read(&l->busy_lock);
142
143 if (!nvgpu_can_busy(g)) {
144 ret = -ENODEV;
145 atomic_dec(&g->usage_count.atomic_var);
146 goto fail;
147 }
148
149 dev = dev_from_gk20a(g);
150
151 if (pm_runtime_enabled(dev)) {
152 /* Increment usage count and attempt to resume device */
153 ret = pm_runtime_get_sync(dev);
154 if (ret < 0) {
155 /* Mark suspended so runtime pm will retry later */
156 pm_runtime_set_suspended(dev);
157 pm_runtime_put_noidle(dev);
158 atomic_dec(&g->usage_count.atomic_var);
159 goto fail;
160 }
161 } else {
162 ret = gk20a_gpu_is_virtual(dev) ?
163 vgpu_pm_finalize_poweron(dev) :
164 gk20a_pm_finalize_poweron(dev);
165 if (ret) {
166 atomic_dec(&g->usage_count.atomic_var);
167 goto fail;
168 }
169 }
170
171fail:
172 up_read(&l->busy_lock);
173
174 return ret < 0 ? ret : 0;
175}
176
177void gk20a_idle_nosuspend(struct gk20a *g)
178{
179 pm_runtime_put_noidle(dev_from_gk20a(g));
180}
181
182void gk20a_idle(struct gk20a *g)
183{
184 struct device *dev;
185
186 atomic_dec(&g->usage_count.atomic_var);
187
188 dev = dev_from_gk20a(g);
189
190 if (!(dev && nvgpu_can_busy(g)))
191 return;
192
193 if (pm_runtime_enabled(dev)) {
194 pm_runtime_mark_last_busy(dev);
195 pm_runtime_put_sync_autosuspend(dev);
196 }
197}
198
199/*
200 * Undoes gk20a_lockout_registers().
201 */
202static int gk20a_restore_registers(struct gk20a *g)
203{
204 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
205
206 l->regs = l->regs_saved;
207 l->bar1 = l->bar1_saved;
208
209 nvgpu_restore_usermode_registers(g);
210
211 return 0;
212}
213
214int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
215{
216 struct gk20a *g = &l->g;
217 int err;
218
219 if (l->init_done)
220 return 0;
221
222 err = nvgpu_init_channel_support_linux(l);
223 if (err) {
224 nvgpu_err(g, "failed to init linux channel support");
225 return err;
226 }
227
228 if (l->ops.clk.init_debugfs) {
229 err = l->ops.clk.init_debugfs(g);
230 if (err) {
231 nvgpu_err(g, "failed to init linux clk debugfs");
232 return err;
233 }
234 }
235
236 if (l->ops.therm.init_debugfs) {
237 err = l->ops.therm.init_debugfs(g);
238 if (err) {
239 nvgpu_err(g, "failed to init linux therm debugfs");
240 return err;
241 }
242 }
243
244 if (l->ops.fecs_trace.init_debugfs) {
245 err = l->ops.fecs_trace.init_debugfs(g);
246 if (err) {
247 nvgpu_err(g, "failed to init linux fecs trace debugfs");
248 return err;
249 }
250 }
251
252 err = nvgpu_pmgr_init_debugfs_linux(l);
253 if (err) {
254 nvgpu_err(g, "failed to init linux pmgr debugfs");
255 return err;
256 }
257
258 l->init_done = true;
259
260 return 0;
261}
262
263bool gk20a_check_poweron(struct gk20a *g)
264{
265 bool ret;
266
267 nvgpu_mutex_acquire(&g->power_lock);
268 ret = g->power_on;
269 nvgpu_mutex_release(&g->power_lock);
270
271 return ret;
272}
273
274int gk20a_pm_finalize_poweron(struct device *dev)
275{
276 struct gk20a *g = get_gk20a(dev);
277 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
278 struct gk20a_platform *platform = gk20a_get_platform(dev);
279 int err = 0;
280
281 nvgpu_log_fn(g, " ");
282
283 nvgpu_mutex_acquire(&g->power_lock);
284
285 if (g->power_on)
286 goto done;
287
288 trace_gk20a_finalize_poweron(dev_name(dev));
289
290 /* Increment platform power refcount */
291 if (platform->busy) {
292 err = platform->busy(dev);
293 if (err < 0) {
294 nvgpu_err(g, "failed to poweron platform dependency");
295 goto done;
296 }
297 }
298
299 err = gk20a_restore_registers(g);
300 if (err)
301 goto done;
302
303 nvgpu_restore_usermode_for_poweron(g);
304
305 /* Enable interrupt workqueue */
306 if (!l->nonstall_work_queue) {
307 l->nonstall_work_queue = alloc_workqueue("%s",
308 WQ_HIGHPRI, 1, "mc_nonstall");
309 INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb);
310 }
311
312 err = nvgpu_detect_chip(g);
313 if (err)
314 goto done;
315
316 if (g->sim) {
317 if (g->sim->sim_init_late)
318 g->sim->sim_init_late(g);
319 }
320
321 err = gk20a_finalize_poweron(g);
322 if (err)
323 goto done;
324
325 err = nvgpu_init_os_linux_ops(l);
326 if (err)
327 goto done;
328
329 err = nvgpu_finalize_poweron_linux(l);
330 if (err)
331 goto done;
332
333 nvgpu_init_mm_ce_context(g);
334
335 nvgpu_vidmem_thread_unpause(&g->mm);
336
337 /* Initialise scaling: it will initialize scaling drive only once */
338 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) &&
339 nvgpu_platform_is_silicon(g)) {
340 gk20a_scale_init(dev);
341 if (platform->initscale)
342 platform->initscale(dev);
343 }
344
345 trace_gk20a_finalize_poweron_done(dev_name(dev));
346
347 enable_irq(g->irq_stall);
348 if (g->irq_stall != g->irq_nonstall)
349 enable_irq(g->irq_nonstall);
350 g->irqs_enabled = 1;
351
352 gk20a_scale_resume(dev_from_gk20a(g));
353
354#ifdef CONFIG_NVGPU_SUPPORT_CDE
355 if (platform->has_cde)
356 gk20a_init_cde_support(l);
357#endif
358
359#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
360 nvgpu_enable_ecc_reporting(g);
361#endif
362
363 err = gk20a_sched_ctrl_init(g);
364 if (err) {
365 nvgpu_err(g, "failed to init sched control");
366 goto done;
367 }
368
369 g->sw_ready = true;
370
371done:
372 if (err) {
373 g->power_on = false;
374
375#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
376 nvgpu_disable_ecc_reporting(g);
377#endif
378 }
379
380 nvgpu_mutex_release(&g->power_lock);
381 return err;
382}
383
384/*
385 * Locks out the driver from accessing GPU registers. This prevents access to
386 * thse registers after the GPU has been clock or power gated. This should help
387 * find annoying bugs where register reads and writes are silently dropped
388 * after the GPU has been turned off. On older chips these reads and writes can
389 * also lock the entire CPU up.
390 */
391static int gk20a_lockout_registers(struct gk20a *g)
392{
393 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
394
395 l->regs = NULL;
396 l->bar1 = NULL;
397
398 nvgpu_lockout_usermode_registers(g);
399
400 return 0;
401}
402
403static int gk20a_pm_prepare_poweroff(struct device *dev)
404{
405 struct gk20a *g = get_gk20a(dev);
406#ifdef CONFIG_NVGPU_SUPPORT_CDE
407 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
408#endif
409 struct gk20a_platform *platform = gk20a_get_platform(dev);
410 bool irqs_enabled;
411 int ret = 0;
412
413 nvgpu_log_fn(g, " ");
414
415 nvgpu_mutex_acquire(&g->power_lock);
416
417 if (!g->power_on)
418 goto done;
419
420 /* disable IRQs and wait for completion */
421 irqs_enabled = g->irqs_enabled;
422 if (irqs_enabled) {
423 disable_irq(g->irq_stall);
424 if (g->irq_stall != g->irq_nonstall)
425 disable_irq(g->irq_nonstall);
426 g->irqs_enabled = 0;
427 }
428
429 gk20a_scale_suspend(dev);
430
431#ifdef CONFIG_NVGPU_SUPPORT_CDE
432 gk20a_cde_suspend(l);
433#endif
434
435 ret = gk20a_prepare_poweroff(g);
436 if (ret)
437 goto error;
438
439 /* Decrement platform power refcount */
440 if (platform->idle)
441 platform->idle(dev);
442
443 /* Stop CPU from accessing the GPU registers. */
444 gk20a_lockout_registers(g);
445
446#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
447 nvgpu_disable_ecc_reporting(g);
448#endif
449
450 nvgpu_hide_usermode_for_poweroff(g);
451 nvgpu_mutex_release(&g->power_lock);
452 return 0;
453
454error:
455 /* re-enabled IRQs if previously enabled */
456 if (irqs_enabled) {
457 enable_irq(g->irq_stall);
458 if (g->irq_stall != g->irq_nonstall)
459 enable_irq(g->irq_nonstall);
460 g->irqs_enabled = 1;
461 }
462
463 gk20a_scale_resume(dev);
464done:
465 nvgpu_mutex_release(&g->power_lock);
466
467 return ret;
468}
469
470static struct of_device_id tegra_gk20a_of_match[] = {
471#ifdef CONFIG_TEGRA_GK20A
472 { .compatible = "nvidia,tegra210-gm20b",
473 .data = &gm20b_tegra_platform },
474 { .compatible = "nvidia,tegra186-gp10b",
475 .data = &gp10b_tegra_platform },
476 { .compatible = "nvidia,gv11b",
477 .data = &gv11b_tegra_platform },
478#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
479 { .compatible = "nvidia,gv11b-vgpu",
480 .data = &gv11b_vgpu_tegra_platform},
481#endif
482#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
483 { .compatible = "nvidia,tegra124-gk20a-vgpu",
484 .data = &vgpu_tegra_platform },
485#endif
486#endif
487
488 { },
489};
490MODULE_DEVICE_TABLE(of, tegra_gk20a_of_match);
491
492#ifdef CONFIG_PM
493/**
494 * __gk20a_do_idle() - force the GPU to idle and railgate
495 *
496 * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
497 *
498 * Acquires two locks : &l->busy_lock and &platform->railgate_lock
499 * In success, we hold these locks and return
500 * In failure, we release these locks and return
501 */
502int __gk20a_do_idle(struct gk20a *g, bool force_reset)
503{
504 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
505 struct device *dev = dev_from_gk20a(g);
506 struct gk20a_platform *platform = dev_get_drvdata(dev);
507 struct nvgpu_timeout timeout;
508 int ref_cnt;
509 int target_ref_cnt = 0;
510 bool is_railgated;
511 int err = 0;
512
513 /*
514 * Hold back deterministic submits and changes to deterministic
515 * channels - this must be outside the power busy locks.
516 */
517 gk20a_channel_deterministic_idle(g);
518
519 /* acquire busy lock to block other busy() calls */
520 down_write(&l->busy_lock);
521
522 /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
523 nvgpu_mutex_acquire(&platform->railgate_lock);
524
525 /* check if it is already railgated ? */
526 if (platform->is_railgated(dev))
527 return 0;
528
529 /*
530 * release railgate_lock, prevent suspend by incrementing usage counter,
531 * re-acquire railgate_lock
532 */
533 nvgpu_mutex_release(&platform->railgate_lock);
534 pm_runtime_get_sync(dev);
535
536 /*
537 * One refcount taken in this API
538 * If User disables rail gating, we take one more
539 * extra refcount
540 */
541 if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
542 target_ref_cnt = 1;
543 else
544 target_ref_cnt = 2;
545 nvgpu_mutex_acquire(&platform->railgate_lock);
546
547 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
548 NVGPU_TIMER_CPU_TIMER);
549
550 /* check and wait until GPU is idle (with a timeout) */
551 do {
552 nvgpu_usleep_range(1000, 1100);
553 ref_cnt = atomic_read(&dev->power.usage_count);
554 } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
555
556 if (ref_cnt != target_ref_cnt) {
557 nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt",
558 ref_cnt);
559 goto fail_drop_usage_count;
560 }
561
562 /* check if global force_reset flag is set */
563 force_reset |= platform->force_reset_in_do_idle;
564
565 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
566 NVGPU_TIMER_CPU_TIMER);
567
568 if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) && !force_reset) {
569 /*
570 * Case 1 : GPU railgate is supported
571 *
572 * if GPU is now idle, we will have only one ref count,
573 * drop this ref which will rail gate the GPU
574 */
575 pm_runtime_put_sync(dev);
576
577 /* add sufficient delay to allow GPU to rail gate */
578 nvgpu_msleep(g->railgate_delay);
579
580 /* check in loop if GPU is railgated or not */
581 do {
582 nvgpu_usleep_range(1000, 1100);
583 is_railgated = platform->is_railgated(dev);
584 } while (!is_railgated && !nvgpu_timeout_expired(&timeout));
585
586 if (is_railgated) {
587 return 0;
588 } else {
589 nvgpu_err(g, "failed to idle in timeout");
590 goto fail_timeout;
591 }
592 } else {
593 /*
594 * Case 2 : GPU railgate is not supported or we explicitly
595 * do not want to depend on runtime PM
596 *
597 * if GPU is now idle, call prepare_poweroff() to save the
598 * state and then do explicit railgate
599 *
600 * __gk20a_do_unidle() needs to unrailgate, call
601 * finalize_poweron(), and then call pm_runtime_put_sync()
602 * to balance the GPU usage counter
603 */
604
605 /* Save the GPU state */
606 err = gk20a_pm_prepare_poweroff(dev);
607 if (err)
608 goto fail_drop_usage_count;
609
610 /* railgate GPU */
611 platform->railgate(dev);
612
613 nvgpu_udelay(10);
614
615 g->forced_reset = true;
616 return 0;
617 }
618
619fail_drop_usage_count:
620 pm_runtime_put_noidle(dev);
621fail_timeout:
622 nvgpu_mutex_release(&platform->railgate_lock);
623 up_write(&l->busy_lock);
624 gk20a_channel_deterministic_unidle(g);
625 return -EBUSY;
626}
627
628/**
629 * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
630 * from outside of GPU driver
631 *
632 * In success, this call MUST be balanced by caller with gk20a_do_unidle()
633 */
634static int gk20a_do_idle(void *_g)
635{
636 struct gk20a *g = (struct gk20a *)_g;
637
638 return __gk20a_do_idle(g, true);
639}
640
641/**
642 * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
643 */
644int __gk20a_do_unidle(struct gk20a *g)
645{
646 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
647 struct device *dev = dev_from_gk20a(g);
648 struct gk20a_platform *platform = dev_get_drvdata(dev);
649 int err;
650
651 if (g->forced_reset) {
652 /*
653 * If we did a forced-reset/railgate
654 * then unrailgate the GPU here first
655 */
656 platform->unrailgate(dev);
657
658 /* restore the GPU state */
659 err = gk20a_pm_finalize_poweron(dev);
660 if (err)
661 return err;
662
663 /* balance GPU usage counter */
664 pm_runtime_put_sync(dev);
665
666 g->forced_reset = false;
667 }
668
669 /* release the lock and open up all other busy() calls */
670 nvgpu_mutex_release(&platform->railgate_lock);
671 up_write(&l->busy_lock);
672
673 gk20a_channel_deterministic_unidle(g);
674
675 return 0;
676}
677
678/**
679 * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
680 */
681static int gk20a_do_unidle(void *_g)
682{
683 struct gk20a *g = (struct gk20a *)_g;
684
685 return __gk20a_do_unidle(g);
686}
687#endif
688
689void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i,
690 struct resource **out)
691{
692 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
693
694 if (!r)
695 return NULL;
696 if (out)
697 *out = r;
698 return devm_ioremap_resource(&dev->dev, r);
699}
700
701void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset,
702 resource_size_t size)
703{
704 return devm_ioremap(dev, offset, size);
705}
706
707u64 nvgpu_resource_addr(struct platform_device *dev, int i)
708{
709 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
710
711 if (!r)
712 return 0;
713
714 return r->start;
715}
716
717static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
718{
719 struct gk20a *g = dev_id;
720
721 return nvgpu_intr_stall(g);
722}
723
724static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
725{
726 struct gk20a *g = dev_id;
727
728 return nvgpu_intr_nonstall(g);
729}
730
731static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
732{
733 struct gk20a *g = dev_id;
734
735 return nvgpu_intr_thread_stall(g);
736}
737
738void gk20a_remove_support(struct gk20a *g)
739{
740 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
741 struct sim_nvgpu_linux *sim_linux;
742
743 tegra_unregister_idle_unidle(gk20a_do_idle);
744
745 nvgpu_kfree(g, g->dbg_regops_tmp_buf);
746
747 nvgpu_remove_channel_support_linux(l);
748
749 if (g->pmu.remove_support)
750 g->pmu.remove_support(&g->pmu);
751
752 if (g->acr.remove_support != NULL) {
753 g->acr.remove_support(&g->acr);
754 }
755
756 if (g->gr.remove_support)
757 g->gr.remove_support(&g->gr);
758
759 if (g->mm.remove_ce_support)
760 g->mm.remove_ce_support(&g->mm);
761
762 if (g->fifo.remove_support)
763 g->fifo.remove_support(&g->fifo);
764
765 if (g->mm.remove_support)
766 g->mm.remove_support(&g->mm);
767
768 if (g->sim) {
769 sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
770 if (g->sim->remove_support)
771 g->sim->remove_support(g);
772 if (sim_linux->remove_support_linux)
773 sim_linux->remove_support_linux(g);
774 }
775
776 nvgpu_remove_usermode_support(g);
777
778 nvgpu_free_enabled_flags(g);
779
780 gk20a_lockout_registers(g);
781}
782
783static int gk20a_init_support(struct platform_device *pdev)
784{
785 struct device *dev = &pdev->dev;
786 struct gk20a *g = get_gk20a(dev);
787 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
788 int err = -ENOMEM;
789
790 tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g);
791
792 l->regs = nvgpu_devm_ioremap_resource(pdev,
793 GK20A_BAR0_IORESOURCE_MEM,
794 &l->reg_mem);
795 if (IS_ERR(l->regs)) {
796 nvgpu_err(g, "failed to remap gk20a registers");
797 err = PTR_ERR(l->regs);
798 goto fail;
799 }
800
801 l->regs_bus_addr = nvgpu_resource_addr(pdev,
802 GK20A_BAR0_IORESOURCE_MEM);
803 if (!l->regs_bus_addr) {
804 nvgpu_err(g, "failed to read register bus offset");
805 err = -ENODEV;
806 goto fail;
807 }
808
809 l->bar1 = nvgpu_devm_ioremap_resource(pdev,
810 GK20A_BAR1_IORESOURCE_MEM,
811 &l->bar1_mem);
812 if (IS_ERR(l->bar1)) {
813 nvgpu_err(g, "failed to remap gk20a bar1");
814 err = PTR_ERR(l->bar1);
815 goto fail;
816 }
817
818 err = nvgpu_init_sim_support_linux(g, pdev);
819 if (err)
820 goto fail;
821 err = nvgpu_init_sim_support(g);
822 if (err)
823 goto fail_sim;
824
825 nvgpu_init_usermode_support(g);
826 return 0;
827
828fail_sim:
829 nvgpu_remove_sim_support_linux(g);
830fail:
831 if (l->regs)
832 l->regs = NULL;
833
834 if (l->bar1)
835 l->bar1 = NULL;
836
837 return err;
838}
839
840static int gk20a_pm_railgate(struct device *dev)
841{
842 struct gk20a_platform *platform = dev_get_drvdata(dev);
843 int ret = 0;
844 struct gk20a *g = get_gk20a(dev);
845
846 /* return early if platform didn't implement railgate */
847 if (!platform->railgate)
848 return 0;
849
850 /* if platform is already railgated, then just return */
851 if (platform->is_railgated && platform->is_railgated(dev))
852 return ret;
853
854#ifdef CONFIG_DEBUG_FS
855 g->pstats.last_rail_gate_start = jiffies;
856
857 if (g->pstats.railgating_cycle_count >= 1)
858 g->pstats.total_rail_ungate_time_ms =
859 g->pstats.total_rail_ungate_time_ms +
860 jiffies_to_msecs(g->pstats.last_rail_gate_start -
861 g->pstats.last_rail_ungate_complete);
862#endif
863
864 ret = platform->railgate(dev);
865 if (ret) {
866 nvgpu_err(g, "failed to railgate platform, err=%d", ret);
867 return ret;
868 }
869
870#ifdef CONFIG_DEBUG_FS
871 g->pstats.last_rail_gate_complete = jiffies;
872#endif
873 ret = tegra_fuse_clock_disable();
874 if (ret)
875 nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret);
876
877 return ret;
878}
879
880static int gk20a_pm_unrailgate(struct device *dev)
881{
882 struct gk20a_platform *platform = dev_get_drvdata(dev);
883 int ret = 0;
884 struct gk20a *g = get_gk20a(dev);
885
886 /* return early if platform didn't implement unrailgate */
887 if (!platform->unrailgate)
888 return 0;
889
890 ret = tegra_fuse_clock_enable();
891 if (ret) {
892 nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret);
893 return ret;
894 }
895#ifdef CONFIG_DEBUG_FS
896 g->pstats.last_rail_ungate_start = jiffies;
897 if (g->pstats.railgating_cycle_count >= 1)
898 g->pstats.total_rail_gate_time_ms =
899 g->pstats.total_rail_gate_time_ms +
900 jiffies_to_msecs(g->pstats.last_rail_ungate_start -
901 g->pstats.last_rail_gate_complete);
902
903 g->pstats.railgating_cycle_count++;
904#endif
905
906 trace_gk20a_pm_unrailgate(dev_name(dev));
907
908 nvgpu_mutex_acquire(&platform->railgate_lock);
909 ret = platform->unrailgate(dev);
910 nvgpu_mutex_release(&platform->railgate_lock);
911
912#ifdef CONFIG_DEBUG_FS
913 g->pstats.last_rail_ungate_complete = jiffies;
914#endif
915
916 return ret;
917}
918
919/*
920 * Remove association of the driver with OS interrupt handler
921 */
922void nvgpu_free_irq(struct gk20a *g)
923{
924 struct device *dev = dev_from_gk20a(g);
925
926 devm_free_irq(dev, g->irq_stall, g);
927 if (g->irq_stall != g->irq_nonstall)
928 devm_free_irq(dev, g->irq_nonstall, g);
929}
930
931/*
932 * Idle the GPU in preparation of shutdown/remove.
933 * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
934 * state to prevent further activity on the driver SW side.
935 * On driver removal quiesce() should be called after start_unload()
936 */
937int nvgpu_quiesce(struct gk20a *g)
938{
939 int err;
940 struct device *dev = dev_from_gk20a(g);
941
942 if (g->power_on) {
943 err = gk20a_wait_for_idle(g);
944 if (err) {
945 nvgpu_err(g, "failed to idle GPU, err=%d", err);
946 return err;
947 }
948
949 err = gk20a_fifo_disable_all_engine_activity(g, true);
950 if (err) {
951 nvgpu_err(g,
952 "failed to disable engine activity, err=%d",
953 err);
954 return err;
955 }
956
957 err = gk20a_fifo_wait_engine_idle(g);
958 if (err) {
959 nvgpu_err(g, "failed to idle engines, err=%d",
960 err);
961 return err;
962 }
963 }
964
965 if (gk20a_gpu_is_virtual(dev))
966 err = vgpu_pm_prepare_poweroff(dev);
967 else
968 err = gk20a_pm_prepare_poweroff(dev);
969
970 if (err)
971 nvgpu_err(g, "failed to prepare for poweroff, err=%d",
972 err);
973
974 return err;
975}
976
977static void gk20a_pm_shutdown(struct platform_device *pdev)
978{
979 struct gk20a_platform *platform = platform_get_drvdata(pdev);
980 struct gk20a *g = platform->g;
981 int err;
982
983 nvgpu_info(g, "shutting down");
984
985 /* vgpu has nothing to clean up currently */
986 if (gk20a_gpu_is_virtual(&pdev->dev))
987 return;
988
989 if (!g->power_on)
990 goto finish;
991
992 gk20a_driver_start_unload(g);
993
994 /* If GPU is already railgated,
995 * just prevent more requests, and return */
996 if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
997 __pm_runtime_disable(&pdev->dev, false);
998 nvgpu_info(g, "already railgated, shut down complete");
999 return;
1000 }
1001
1002 /* Prevent more requests by disabling Runtime PM */
1003 __pm_runtime_disable(&pdev->dev, false);
1004
1005 err = nvgpu_quiesce(g);
1006 if (err)
1007 goto finish;
1008
1009 err = gk20a_pm_railgate(&pdev->dev);
1010 if (err)
1011 nvgpu_err(g, "failed to railgate, err=%d", err);
1012
1013finish:
1014 nvgpu_info(g, "shut down complete");
1015}
1016
1017#ifdef CONFIG_PM
1018static int gk20a_pm_runtime_resume(struct device *dev)
1019{
1020 int err = 0;
1021
1022 err = gk20a_pm_unrailgate(dev);
1023 if (err)
1024 goto fail;
1025
1026 if (gk20a_gpu_is_virtual(dev))
1027 err = vgpu_pm_finalize_poweron(dev);
1028 else
1029 err = gk20a_pm_finalize_poweron(dev);
1030 if (err)
1031 goto fail_poweron;
1032
1033 return 0;
1034
1035fail_poweron:
1036 gk20a_pm_railgate(dev);
1037fail:
1038 return err;
1039}
1040
1041static int gk20a_pm_runtime_suspend(struct device *dev)
1042{
1043 int err = 0;
1044 struct gk20a *g = get_gk20a(dev);
1045
1046 if (!g)
1047 return 0;
1048
1049 if (gk20a_gpu_is_virtual(dev))
1050 err = vgpu_pm_prepare_poweroff(dev);
1051 else
1052 err = gk20a_pm_prepare_poweroff(dev);
1053 if (err) {
1054 nvgpu_err(g, "failed to power off, err=%d", err);
1055 goto fail;
1056 }
1057
1058 err = gk20a_pm_railgate(dev);
1059 if (err)
1060 goto fail;
1061
1062 return 0;
1063
1064fail:
1065 gk20a_pm_finalize_poweron(dev);
1066 pm_runtime_mark_last_busy(dev);
1067 return err;
1068}
1069
1070static int gk20a_pm_suspend(struct device *dev)
1071{
1072 struct gk20a_platform *platform = dev_get_drvdata(dev);
1073 struct gk20a *g = get_gk20a(dev);
1074 int ret = 0;
1075 int usage_count;
1076 struct nvgpu_timeout timeout;
1077
1078 if (!g->power_on) {
1079 if (platform->suspend)
1080 ret = platform->suspend(dev);
1081
1082 if (ret)
1083 return ret;
1084
1085 if (!pm_runtime_enabled(dev))
1086 ret = gk20a_pm_railgate(dev);
1087
1088 return ret;
1089 }
1090
1091 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
1092 NVGPU_TIMER_CPU_TIMER);
1093 /*
1094 * Hold back deterministic submits and changes to deterministic
1095 * channels - this must be outside the power busy locks.
1096 */
1097 gk20a_channel_deterministic_idle(g);
1098
1099 /* check and wait until GPU is idle (with a timeout) */
1100 do {
1101 nvgpu_usleep_range(1000, 1100);
1102 usage_count = nvgpu_atomic_read(&g->usage_count);
1103 } while (usage_count != 0 && !nvgpu_timeout_expired(&timeout));
1104
1105 if (usage_count != 0) {
1106 nvgpu_err(g, "failed to idle - usage_count %d", usage_count);
1107 ret = -EINVAL;
1108 goto fail_idle;
1109 }
1110
1111 ret = gk20a_pm_runtime_suspend(dev);
1112 if (ret)
1113 goto fail_idle;
1114
1115 if (platform->suspend)
1116 ret = platform->suspend(dev);
1117 if (ret)
1118 goto fail_suspend;
1119
1120 g->suspended = true;
1121
1122 return 0;
1123
1124fail_suspend:
1125 gk20a_pm_runtime_resume(dev);
1126fail_idle:
1127 gk20a_channel_deterministic_unidle(g);
1128 return ret;
1129}
1130
1131static int gk20a_pm_resume(struct device *dev)
1132{
1133 struct gk20a_platform *platform = dev_get_drvdata(dev);
1134 struct gk20a *g = get_gk20a(dev);
1135 int ret = 0;
1136
1137 if (!g->suspended) {
1138 if (platform->resume)
1139 ret = platform->resume(dev);
1140 if (ret)
1141 return ret;
1142
1143 if (!pm_runtime_enabled(dev))
1144 ret = gk20a_pm_unrailgate(dev);
1145
1146 return ret;
1147 }
1148
1149 if (platform->resume)
1150 ret = platform->resume(dev);
1151 if (ret)
1152 return ret;
1153
1154 ret = gk20a_pm_runtime_resume(dev);
1155 if (ret)
1156 return ret;
1157
1158 g->suspended = false;
1159
1160 gk20a_channel_deterministic_unidle(g);
1161
1162 return ret;
1163}
1164
1165static const struct dev_pm_ops gk20a_pm_ops = {
1166 .runtime_resume = gk20a_pm_runtime_resume,
1167 .runtime_suspend = gk20a_pm_runtime_suspend,
1168 .resume = gk20a_pm_resume,
1169 .suspend = gk20a_pm_suspend,
1170};
1171#endif
1172
1173static int gk20a_pm_init(struct device *dev)
1174{
1175 struct gk20a *g = get_gk20a(dev);
1176 int err = 0;
1177
1178 nvgpu_log_fn(g, " ");
1179
1180 /*
1181 * Initialise pm runtime. For railgate disable
1182 * case, set autosuspend delay to negative which
1183 * will suspend runtime pm
1184 */
1185 if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
1186 pm_runtime_set_autosuspend_delay(dev,
1187 g->railgate_delay);
1188 else
1189 pm_runtime_set_autosuspend_delay(dev, -1);
1190
1191 pm_runtime_use_autosuspend(dev);
1192 pm_runtime_enable(dev);
1193
1194 return err;
1195}
1196
1197static int gk20a_pm_deinit(struct device *dev)
1198{
1199 pm_runtime_dont_use_autosuspend(dev);
1200 pm_runtime_disable(dev);
1201 return 0;
1202}
1203
1204/*
1205 * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING.
1206 */
1207void gk20a_driver_start_unload(struct gk20a *g)
1208{
1209 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
1210
1211 nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n");
1212
1213 down_write(&l->busy_lock);
1214 __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
1215 /* GR SW ready needs to be invalidated at this time with the busy lock
1216 * held to prevent a racing condition on the gr/mm code */
1217 g->gr.sw_ready = false;
1218 g->sw_ready = false;
1219 up_write(&l->busy_lock);
1220
1221 if (g->is_virtual)
1222 return;
1223
1224 gk20a_wait_for_idle(g);
1225
1226 nvgpu_wait_for_deferred_interrupts(g);
1227
1228 if (l->nonstall_work_queue) {
1229 cancel_work_sync(&l->nonstall_fn_work);
1230 destroy_workqueue(l->nonstall_work_queue);
1231 l->nonstall_work_queue = NULL;
1232 }
1233}
1234
1235static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
1236{
1237 gk20a_get_platform(&pdev->dev)->g = gk20a;
1238}
1239
1240static int nvgpu_read_fuse_overrides(struct gk20a *g)
1241{
1242 struct device_node *np = nvgpu_get_node(g);
1243 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
1244 u32 *fuses;
1245 int count, i;
1246
1247 if (!np) /* may be pcie device */
1248 return 0;
1249
1250 count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
1251 if (count <= 0)
1252 return count;
1253
1254 fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2);
1255 if (!fuses)
1256 return -ENOMEM;
1257 of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
1258 for (i = 0; i < count; i++) {
1259 u32 fuse, value;
1260
1261 fuse = fuses[2 * i];
1262 value = fuses[2 * i + 1];
1263 switch (fuse) {
1264 case GM20B_FUSE_OPT_TPC_DISABLE:
1265 g->tpc_fs_mask_user = ~value;
1266 break;
1267 case GP10B_FUSE_OPT_ECC_EN:
1268 g->gr.fecs_feature_override_ecc_val = value;
1269 break;
1270 case GV11B_FUSE_OPT_TPC_DISABLE:
1271 if (platform->set_tpc_pg_mask != NULL)
1272 platform->set_tpc_pg_mask(dev_from_gk20a(g),
1273 value);
1274 break;
1275 default:
1276 nvgpu_err(g, "ignore unknown fuse override %08x", fuse);
1277 break;
1278 }
1279 }
1280
1281 nvgpu_kfree(g, fuses);
1282
1283 return 0;
1284}
1285
1286static int gk20a_probe(struct platform_device *dev)
1287{
1288 struct nvgpu_os_linux *l = NULL;
1289 struct gk20a *gk20a;
1290 int err;
1291 struct gk20a_platform *platform = NULL;
1292 struct device_node *np;
1293
1294 if (dev->dev.of_node) {
1295 const struct of_device_id *match;
1296
1297 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
1298 if (match)
1299 platform = (struct gk20a_platform *)match->data;
1300 } else
1301 platform = (struct gk20a_platform *)dev->dev.platform_data;
1302
1303 if (!platform) {
1304 dev_err(&dev->dev, "no platform data\n");
1305 return -ENODATA;
1306 }
1307
1308 platform_set_drvdata(dev, platform);
1309
1310 if (gk20a_gpu_is_virtual(&dev->dev))
1311 return vgpu_probe(dev);
1312
1313 l = kzalloc(sizeof(*l), GFP_KERNEL);
1314 if (!l) {
1315 dev_err(&dev->dev, "couldn't allocate gk20a support");
1316 return -ENOMEM;
1317 }
1318
1319 hash_init(l->ecc_sysfs_stats_htable);
1320
1321 gk20a = &l->g;
1322
1323 nvgpu_log_fn(gk20a, " ");
1324
1325 nvgpu_init_gk20a(gk20a);
1326 set_gk20a(dev, gk20a);
1327 l->dev = &dev->dev;
1328 gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK;
1329
1330 nvgpu_kmem_init(gk20a);
1331
1332 err = nvgpu_init_enabled_flags(gk20a);
1333 if (err)
1334 goto return_err;
1335
1336 np = nvgpu_get_node(gk20a);
1337 if (of_dma_is_coherent(np)) {
1338 __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
1339 __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
1340 }
1341
1342 if (nvgpu_platform_is_simulation(gk20a))
1343 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
1344
1345 gk20a->irq_stall = platform_get_irq(dev, 0);
1346 gk20a->irq_nonstall = platform_get_irq(dev, 1);
1347 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) {
1348 err = -ENXIO;
1349 goto return_err;
1350 }
1351
1352 err = devm_request_threaded_irq(&dev->dev,
1353 gk20a->irq_stall,
1354 gk20a_intr_isr_stall,
1355 gk20a_intr_thread_stall,
1356 0, "gk20a_stall", gk20a);
1357 if (err) {
1358 dev_err(&dev->dev,
1359 "failed to request stall intr irq @ %d\n",
1360 gk20a->irq_stall);
1361 goto return_err;
1362 }
1363 err = devm_request_irq(&dev->dev,
1364 gk20a->irq_nonstall,
1365 gk20a_intr_isr_nonstall,
1366 0, "gk20a_nonstall", gk20a);
1367 if (err) {
1368 dev_err(&dev->dev,
1369 "failed to request non-stall intr irq @ %d\n",
1370 gk20a->irq_nonstall);
1371 goto return_err;
1372 }
1373 disable_irq(gk20a->irq_stall);
1374 if (gk20a->irq_stall != gk20a->irq_nonstall)
1375 disable_irq(gk20a->irq_nonstall);
1376
1377 err = gk20a_init_support(dev);
1378 if (err)
1379 goto return_err;
1380
1381 err = nvgpu_read_fuse_overrides(gk20a);
1382
1383#ifdef CONFIG_RESET_CONTROLLER
1384 platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
1385 if (IS_ERR(platform->reset_control))
1386 platform->reset_control = NULL;
1387#endif
1388
1389 err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
1390 if (err)
1391 goto return_err;
1392
1393 err = gk20a_pm_init(&dev->dev);
1394 if (err) {
1395 dev_err(&dev->dev, "pm init failed");
1396 goto return_err;
1397 }
1398
1399#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
1400 nvgpu_init_ecc_reporting(gk20a);
1401#endif
1402
1403 gk20a->nvgpu_reboot_nb.notifier_call =
1404 nvgpu_kernel_shutdown_notification;
1405 err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);
1406 if (err)
1407 goto return_err;
1408
1409 return 0;
1410
1411return_err:
1412 nvgpu_free_enabled_flags(gk20a);
1413
1414 /*
1415 * Last since the above allocs may use data structures in here.
1416 */
1417 nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP);
1418
1419 kfree(l);
1420
1421 return err;
1422}
1423
1424int nvgpu_remove(struct device *dev, struct class *class)
1425{
1426 struct gk20a *g = get_gk20a(dev);
1427#ifdef CONFIG_NVGPU_SUPPORT_CDE
1428 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
1429#endif
1430 struct gk20a_platform *platform = gk20a_get_platform(dev);
1431 int err;
1432
1433 nvgpu_log_fn(g, " ");
1434
1435 err = nvgpu_quiesce(g);
1436 WARN(err, "gpu failed to idle during driver removal");
1437
1438 if (nvgpu_mem_is_valid(&g->syncpt_mem))
1439 nvgpu_dma_free(g, &g->syncpt_mem);
1440
1441#ifdef CONFIG_NVGPU_SUPPORT_CDE
1442 if (platform->has_cde)
1443 gk20a_cde_destroy(l);
1444#endif
1445
1446#ifdef CONFIG_GK20A_CTXSW_TRACE
1447 gk20a_ctxsw_trace_cleanup(g);
1448#endif
1449
1450 gk20a_sched_ctrl_cleanup(g);
1451
1452 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1453 gk20a_scale_exit(dev);
1454
1455 nvgpu_clk_arb_cleanup_arbiter(g);
1456
1457 gk20a_user_deinit(dev, class);
1458
1459 gk20a_debug_deinit(g);
1460
1461 nvgpu_remove_sysfs(dev);
1462
1463 if (platform->secure_buffer.destroy)
1464 platform->secure_buffer.destroy(g,
1465 &platform->secure_buffer);
1466
1467 if (platform->remove)
1468 platform->remove(dev);
1469
1470 nvgpu_mutex_destroy(&g->clk_arb_enable_lock);
1471
1472 nvgpu_log_fn(g, "removed");
1473
1474 return err;
1475}
1476
1477static int __exit gk20a_remove(struct platform_device *pdev)
1478{
1479 int err;
1480 struct device *dev = &pdev->dev;
1481 struct gk20a *g = get_gk20a(dev);
1482
1483 if (gk20a_gpu_is_virtual(dev))
1484 return vgpu_remove(pdev);
1485
1486 err = nvgpu_remove(dev, &nvgpu_class);
1487
1488 unregister_reboot_notifier(&g->nvgpu_reboot_nb);
1489
1490 set_gk20a(pdev, NULL);
1491
1492 gk20a_put(g);
1493
1494 gk20a_pm_deinit(dev);
1495
1496 return err;
1497}
1498
1499static struct platform_driver gk20a_driver = {
1500 .probe = gk20a_probe,
1501 .remove = __exit_p(gk20a_remove),
1502 .shutdown = gk20a_pm_shutdown,
1503 .driver = {
1504 .owner = THIS_MODULE,
1505 .name = "gk20a",
1506 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
1507#ifdef CONFIG_OF
1508 .of_match_table = tegra_gk20a_of_match,
1509#endif
1510#ifdef CONFIG_PM
1511 .pm = &gk20a_pm_ops,
1512#endif
1513 .suppress_bind_attrs = true,
1514 }
1515};
1516
1517struct class nvgpu_class = {
1518 .owner = THIS_MODULE,
1519 .name = CLASS_NAME,
1520};
1521
1522static int __init gk20a_init(void)
1523{
1524
1525 int ret;
1526
1527 ret = class_register(&nvgpu_class);
1528 if (ret)
1529 return ret;
1530
1531 ret = nvgpu_pci_init();
1532 if (ret)
1533 return ret;
1534
1535 return platform_driver_register(&gk20a_driver);
1536}
1537
1538static void __exit gk20a_exit(void)
1539{
1540 nvgpu_pci_exit();
1541 platform_driver_unregister(&gk20a_driver);
1542 class_unregister(&nvgpu_class);
1543}
1544
1545MODULE_LICENSE("GPL v2");
1546module_init(gk20a_init);
1547module_exit(gk20a_exit);
diff --git a/include/os/linux/module.h b/include/os/linux/module.h
deleted file mode 100644
index 76c7274..0000000
--- a/include/os/linux/module.h
+++ /dev/null
@@ -1,35 +0,0 @@
1/*
2 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
14#define __NVGPU_COMMON_LINUX_MODULE_H__
15
16struct gk20a;
17struct device;
18struct nvgpu_os_linux;
19
20int gk20a_pm_finalize_poweron(struct device *dev);
21int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
22void gk20a_remove_support(struct gk20a *g);
23void gk20a_driver_start_unload(struct gk20a *g);
24int nvgpu_quiesce(struct gk20a *g);
25int nvgpu_remove(struct device *dev, struct class *class);
26void nvgpu_free_irq(struct gk20a *g);
27struct device_node *nvgpu_get_node(struct gk20a *g);
28void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i,
29 struct resource **out);
30void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset,
31 resource_size_t size);
32u64 nvgpu_resource_addr(struct platform_device *dev, int i);
33extern struct class nvgpu_class;
34
35#endif
diff --git a/include/os/linux/module_usermode.c b/include/os/linux/module_usermode.c
deleted file mode 100644
index ea01c1b..0000000
--- a/include/os/linux/module_usermode.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/types.h>
18
19#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
20
21#include "os_linux.h"
22
23/*
24 * Locks out the driver from accessing GPU registers. This prevents access to
25 * thse registers after the GPU has been clock or power gated. This should help
26 * find annoying bugs where register reads and writes are silently dropped
27 * after the GPU has been turned off. On older chips these reads and writes can
28 * also lock the entire CPU up.
29 */
30void nvgpu_lockout_usermode_registers(struct gk20a *g)
31{
32 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
33
34 l->usermode_regs = NULL;
35}
36
37/*
38 * Undoes t19x_lockout_registers().
39 */
40void nvgpu_restore_usermode_registers(struct gk20a *g)
41{
42 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
43
44 l->usermode_regs = l->usermode_regs_saved;
45}
46
47void nvgpu_remove_usermode_support(struct gk20a *g)
48{
49 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
50
51 if (l->usermode_regs) {
52 l->usermode_regs = NULL;
53 }
54}
55
56void nvgpu_init_usermode_support(struct gk20a *g)
57{
58 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
59
60 l->usermode_regs = l->regs + usermode_cfg0_r();
61 l->usermode_regs_saved = l->usermode_regs;
62}
diff --git a/include/os/linux/module_usermode.h b/include/os/linux/module_usermode.h
deleted file mode 100644
index b17053c..0000000
--- a/include/os/linux/module_usermode.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_MODULE_T19X_H__
18#define __NVGPU_MODULE_T19X_H__
19
20struct gk20a;
21
22void nvgpu_init_usermode_support(struct gk20a *g);
23void nvgpu_remove_usermode_support(struct gk20a *g);
24void nvgpu_lockout_usermode_registers(struct gk20a *g);
25void nvgpu_restore_usermode_registers(struct gk20a *g);
26
27#endif
diff --git a/include/os/linux/nvgpu_mem.c b/include/os/linux/nvgpu_mem.c
deleted file mode 100644
index d6a3189..0000000
--- a/include/os/linux/nvgpu_mem.c
+++ /dev/null
@@ -1,348 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/dma.h>
18#include <nvgpu/gmmu.h>
19#include <nvgpu/nvgpu_mem.h>
20#include <nvgpu/page_allocator.h>
21#include <nvgpu/log.h>
22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
25#include <nvgpu/vidmem.h>
26#include <nvgpu/gk20a.h>
27
28#include <nvgpu/linux/dma.h>
29
30#include <linux/vmalloc.h>
31#include <linux/dma-mapping.h>
32
33#include "os_linux.h"
34#include "dmabuf_vidmem.h"
35
36#include "gk20a/mm_gk20a.h"
37#include "platform_gk20a.h"
38
39static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
40{
41 struct device *dev = dev_from_gk20a(g);
42 struct gk20a_platform *platform = gk20a_get_platform(dev);
43 u64 ipa = sg_phys((struct scatterlist *)sgl);
44
45 if (platform->phys_addr)
46 return platform->phys_addr(g, ipa);
47
48 return ipa;
49}
50
51/*
52 * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
53 * and/or become private to this file once all bad usages of Linux SGLs are
54 * cleaned up in the driver.
55 */
56u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
57{
58 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
59 !nvgpu_iommuable(g))
60 return g->ops.mm.gpu_phys_addr(g, NULL,
61 __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
62
63 if (sg_dma_address(sgl) == 0)
64 return g->ops.mm.gpu_phys_addr(g, NULL,
65 __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
66
67 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
68 return 0;
69
70 return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
71}
72
73/*
74 * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
75 * allocation.
76 */
77static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
78{
79 return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
80}
81
82/*
83 * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
84 * allocation.
85 *
86 * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
87 * than one scatterlist chunk. If there's more than one scatterlist chunk then
88 * the buffer will not be contiguous. As such the base address probably isn't
89 * very useful. This is true for SYSMEM as well, if there's no IOMMU.
90 *
91 * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
92 * an IOMMU present and enabled for the GPU.
93 *
94 * %attrs can be NULL. If it is not NULL then it may be inspected to determine
95 * if the address needs to be modified before writing into a PTE.
96 */
97u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
98{
99 struct nvgpu_page_alloc *alloc;
100
101 if (mem->aperture == APERTURE_SYSMEM)
102 return nvgpu_mem_get_addr_sysmem(g, mem);
103
104 /*
105 * Otherwise get the vidmem address.
106 */
107 alloc = mem->vidmem_alloc;
108
109 /* This API should not be used with > 1 chunks */
110 WARN_ON(alloc->nr_chunks != 1);
111
112 return alloc->base;
113}
114
115/*
116 * This should only be used on contiguous buffers regardless of whether
117 * there's an IOMMU present/enabled. This applies to both SYSMEM and
118 * VIDMEM.
119 */
120u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
121{
122 /*
123 * For a VIDMEM buf, this is identical to simply get_addr() so just fall
124 * back to that.
125 */
126 if (mem->aperture == APERTURE_VIDMEM)
127 return nvgpu_mem_get_addr(g, mem);
128
129 return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
130}
131
132/*
133 * Be careful how you use this! You are responsible for correctly freeing this
134 * memory.
135 */
136int nvgpu_mem_create_from_mem(struct gk20a *g,
137 struct nvgpu_mem *dest, struct nvgpu_mem *src,
138 u64 start_page, int nr_pages)
139{
140 int ret;
141 u64 start = start_page * PAGE_SIZE;
142 u64 size = nr_pages * PAGE_SIZE;
143 dma_addr_t new_iova;
144
145 if (src->aperture != APERTURE_SYSMEM)
146 return -EINVAL;
147
148 /* Some silly things a caller might do... */
149 if (size > src->size)
150 return -EINVAL;
151 if ((start + size) > src->size)
152 return -EINVAL;
153
154 dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
155 dest->aperture = src->aperture;
156 dest->skip_wmb = src->skip_wmb;
157 dest->size = size;
158
159 /*
160 * Re-use the CPU mapping only if the mapping was made by the DMA API.
161 *
162 * Bug 2040115: the DMA API wrapper makes the mapping that we should
163 * re-use.
164 */
165 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
166 nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
167 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
168
169 dest->priv.pages = src->priv.pages + start_page;
170 dest->priv.flags = src->priv.flags;
171
172 new_iova = sg_dma_address(src->priv.sgt->sgl) ?
173 sg_dma_address(src->priv.sgt->sgl) + start : 0;
174
175 /*
176 * Make a new SG table that is based only on the subset of pages that
177 * is passed to us. This table gets freed by the dma free routines.
178 */
179 if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
180 ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
181 src->priv.pages + start_page,
182 new_iova, size);
183 else
184 ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
185 new_iova, size);
186
187 return ret;
188}
189
190int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
191 struct page **pages, int nr_pages)
192{
193 struct sg_table *sgt;
194 struct page **our_pages =
195 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
196
197 if (!our_pages)
198 return -ENOMEM;
199
200 memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
201
202 if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
203 nr_pages * PAGE_SIZE)) {
204 nvgpu_kfree(g, our_pages);
205 return -ENOMEM;
206 }
207
208 /*
209 * If we are making an SGT from physical pages we can be reasonably
210 * certain that this should bypass the SMMU - thus we set the DMA (aka
211 * IOVA) address to 0. This tells the GMMU mapping code to not make a
212 * mapping directed to the SMMU.
213 */
214 sg_dma_address(sgt->sgl) = 0;
215
216 dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
217 dest->aperture = APERTURE_SYSMEM;
218 dest->skip_wmb = 0;
219 dest->size = PAGE_SIZE * nr_pages;
220
221 dest->priv.flags = 0;
222 dest->priv.pages = our_pages;
223 dest->priv.sgt = sgt;
224
225 return 0;
226}
227
228#ifdef CONFIG_TEGRA_GK20A_NVHOST
229int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
230 u64 src_phys, int nr_pages)
231{
232 struct page **pages =
233 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
234 int i, ret = 0;
235
236 if (!pages)
237 return -ENOMEM;
238
239 for (i = 0; i < nr_pages; i++)
240 pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
241
242 ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
243 nvgpu_kfree(g, pages);
244
245 return ret;
246}
247#endif
248
249static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
250{
251 return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
252}
253
254static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
255{
256 return (u64)__nvgpu_sgl_phys(g, sgl);
257}
258
259static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
260{
261 return (u64)sg_dma_address((struct scatterlist *)sgl);
262}
263
264static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
265{
266 return (u64)((struct scatterlist *)sgl)->length;
267}
268
269static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
270 struct nvgpu_sgl *sgl,
271 struct nvgpu_gmmu_attrs *attrs)
272{
273 if (sg_dma_address((struct scatterlist *)sgl) == 0)
274 return g->ops.mm.gpu_phys_addr(g, attrs,
275 __nvgpu_sgl_phys(g, sgl));
276
277 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
278 return 0;
279
280 return nvgpu_mem_iommu_translate(g,
281 sg_dma_address((struct scatterlist *)sgl));
282}
283
284static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
285 struct nvgpu_sgt *sgt)
286{
287 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
288 return false;
289 return true;
290}
291
292static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
293{
294 /*
295 * Free this SGT. All we do is free the passed SGT. The actual Linux
296 * SGT/SGL needs to be freed separately.
297 */
298 nvgpu_kfree(g, sgt);
299}
300
301static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
302 .sgl_next = nvgpu_mem_linux_sgl_next,
303 .sgl_phys = nvgpu_mem_linux_sgl_phys,
304 .sgl_dma = nvgpu_mem_linux_sgl_dma,
305 .sgl_length = nvgpu_mem_linux_sgl_length,
306 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
307 .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
308 .sgt_free = nvgpu_mem_linux_sgl_free,
309};
310
311static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
312 struct gk20a *g,
313 struct scatterlist *linux_sgl)
314{
315 struct nvgpu_page_alloc *vidmem_alloc;
316
317 vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
318 if (!vidmem_alloc)
319 return NULL;
320
321 return &vidmem_alloc->sgt;
322}
323
324struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
325{
326 struct nvgpu_sgt *nvgpu_sgt;
327 struct scatterlist *linux_sgl = sgt->sgl;
328
329 if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
330 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
331
332 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
333 if (!nvgpu_sgt)
334 return NULL;
335
336 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
337
338 nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
339 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
340
341 return nvgpu_sgt;
342}
343
344struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
345 struct nvgpu_mem *mem)
346{
347 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
348}
diff --git a/include/os/linux/nvhost.c b/include/os/linux/nvhost.c
deleted file mode 100644
index a9341c7..0000000
--- a/include/os/linux/nvhost.c
+++ /dev/null
@@ -1,295 +0,0 @@
1/*
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/nvhost.h>
18#include <linux/nvhost_t194.h>
19#include <uapi/linux/nvhost_ioctl.h>
20#include <linux/of_platform.h>
21
22#include <nvgpu/gk20a.h>
23#include <nvgpu/nvhost.h>
24#include <nvgpu/enabled.h>
25
26#include "nvhost_priv.h"
27
28#include "os_linux.h"
29#include "module.h"
30
31int nvgpu_get_nvhost_dev(struct gk20a *g)
32{
33 struct device_node *np = nvgpu_get_node(g);
34 struct platform_device *host1x_pdev = NULL;
35 const __be32 *host1x_ptr;
36
37 host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
38 if (host1x_ptr) {
39 struct device_node *host1x_node =
40 of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
41
42 host1x_pdev = of_find_device_by_node(host1x_node);
43 if (!host1x_pdev) {
44 nvgpu_warn(g, "host1x device not available");
45 return -EPROBE_DEFER;
46 }
47
48 } else {
49 if (nvgpu_has_syncpoints(g)) {
50 nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
51 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
52 }
53 return 0;
54 }
55
56 g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
57 if (!g->nvhost_dev)
58 return -ENOMEM;
59
60 g->nvhost_dev->host1x_pdev = host1x_pdev;
61
62 return 0;
63}
64
65void nvgpu_free_nvhost_dev(struct gk20a *g)
66{
67 nvgpu_kfree(g, g->nvhost_dev);
68}
69
70int nvgpu_nvhost_module_busy_ext(
71 struct nvgpu_nvhost_dev *nvhost_dev)
72{
73 return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
74}
75
76void nvgpu_nvhost_module_idle_ext(
77 struct nvgpu_nvhost_dev *nvhost_dev)
78{
79 nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
80}
81
82void nvgpu_nvhost_debug_dump_device(
83 struct nvgpu_nvhost_dev *nvhost_dev)
84{
85 nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
86}
87
88const char *nvgpu_nvhost_syncpt_get_name(
89 struct nvgpu_nvhost_dev *nvhost_dev, int id)
90{
91 return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
92}
93
94bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
95 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
96{
97 return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
98}
99
100int nvgpu_nvhost_syncpt_is_expired_ext(
101 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
102{
103 return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
104 id, thresh);
105}
106
107u32 nvgpu_nvhost_syncpt_incr_max_ext(
108 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs)
109{
110 return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs);
111}
112
113int nvgpu_nvhost_intr_register_notifier(
114 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
115 void (*callback)(void *, int), void *private_data)
116{
117 return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
118 id, thresh,
119 callback, private_data);
120}
121
122void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
123 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
124{
125 nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
126}
127
128void nvgpu_nvhost_syncpt_put_ref_ext(
129 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
130{
131 nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
132}
133
134u32 nvgpu_nvhost_get_syncpt_host_managed(
135 struct nvgpu_nvhost_dev *nvhost_dev,
136 u32 param, const char *syncpt_name)
137{
138 return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev,
139 param, syncpt_name);
140}
141
142u32 nvgpu_nvhost_get_syncpt_client_managed(
143 struct nvgpu_nvhost_dev *nvhost_dev,
144 const char *syncpt_name)
145{
146 return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
147 syncpt_name);
148}
149
150int nvgpu_nvhost_syncpt_wait_timeout_ext(
151 struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
152 u32 thresh, u32 timeout, u32 *value, struct timespec *ts)
153{
154 return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
155 id, thresh, timeout, value, ts);
156}
157
158int nvgpu_nvhost_syncpt_read_ext_check(
159 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
160{
161 return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
162}
163
164u32 nvgpu_nvhost_syncpt_read_maxval(
165 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
166{
167 return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
168}
169
170void nvgpu_nvhost_syncpt_set_safe_state(
171 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
172{
173 u32 val;
174
175 /*
176 * Add large number of increments to current value
177 * so that all waiters on this syncpoint are released
178 *
179 * We don't expect any case where more than 0x10000 increments
180 * are pending
181 */
182 val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
183 val += 0x10000;
184
185 nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
186 nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
187}
188
189int nvgpu_nvhost_create_symlink(struct gk20a *g)
190{
191 struct device *dev = dev_from_gk20a(g);
192 int err = 0;
193
194 if (g->nvhost_dev &&
195 (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
196 err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
197 &dev->kobj,
198 dev_name(dev));
199 }
200
201 return err;
202}
203
204void nvgpu_nvhost_remove_symlink(struct gk20a *g)
205{
206 struct device *dev = dev_from_gk20a(g);
207
208 if (g->nvhost_dev &&
209 (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
210 sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
211 dev_name(dev));
212 }
213}
214
215#ifdef CONFIG_SYNC
216u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt)
217{
218 return nvhost_sync_pt_id(pt);
219}
220
221u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt)
222{
223 return nvhost_sync_pt_thresh(pt);
224}
225
226struct sync_fence *nvgpu_nvhost_sync_fdget(int fd)
227{
228 return nvhost_sync_fdget(fd);
229}
230
231int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence)
232{
233 return nvhost_sync_num_pts(fence);
234}
235
236struct sync_fence *nvgpu_nvhost_sync_create_fence(
237 struct nvgpu_nvhost_dev *nvhost_dev,
238 u32 id, u32 thresh, const char *name)
239{
240 struct nvhost_ctrl_sync_fence_info pt = {
241 .id = id,
242 .thresh = thresh,
243 };
244
245 return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name);
246}
247#endif /* CONFIG_SYNC */
248
249#ifdef CONFIG_TEGRA_T19X_GRHOST
250int nvgpu_nvhost_syncpt_unit_interface_get_aperture(
251 struct nvgpu_nvhost_dev *nvhost_dev,
252 u64 *base, size_t *size)
253{
254 return nvhost_syncpt_unit_interface_get_aperture(
255 nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
256}
257
258u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id)
259{
260 return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
261}
262
263int nvgpu_nvhost_syncpt_init(struct gk20a *g)
264{
265 int err = 0;
266
267 if (!nvgpu_has_syncpoints(g))
268 return -ENOSYS;
269
270 err = nvgpu_get_nvhost_dev(g);
271 if (err) {
272 nvgpu_err(g, "host1x device not available");
273 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
274 return -ENOSYS;
275 }
276
277 err = nvgpu_nvhost_syncpt_unit_interface_get_aperture(
278 g->nvhost_dev,
279 &g->syncpt_unit_base,
280 &g->syncpt_unit_size);
281 if (err) {
282 nvgpu_err(g, "Failed to get syncpt interface");
283 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
284 return -ENOSYS;
285 }
286
287 g->syncpt_size =
288 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
289 nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
290 g->syncpt_unit_base, g->syncpt_unit_size,
291 g->syncpt_size);
292
293 return 0;
294}
295#endif
diff --git a/include/os/linux/nvhost_priv.h b/include/os/linux/nvhost_priv.h
deleted file mode 100644
index c03390a..0000000
--- a/include/os/linux/nvhost_priv.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_NVHOST_PRIV_H__
18#define __NVGPU_NVHOST_PRIV_H__
19
20struct nvgpu_nvhost_dev {
21 struct platform_device *host1x_pdev;
22};
23
24#endif /* __NVGPU_NVHOST_PRIV_H__ */
diff --git a/include/os/linux/nvidia_p2p.c b/include/os/linux/nvidia_p2p.c
deleted file mode 100644
index 87db8c5..0000000
--- a/include/os/linux/nvidia_p2p.c
+++ /dev/null
@@ -1,299 +0,0 @@
1/*
2 * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/slab.h>
24#include <linux/nv-p2p.h>
25
26static void nvidia_p2p_mn_release(struct mmu_notifier *mn,
27 struct mm_struct *mm)
28{
29 struct nvidia_p2p_page_table *page_table = container_of(mn,
30 struct nvidia_p2p_page_table,
31 mn);
32
33 page_table->free_callback(page_table->data);
34}
35
36static void nvidia_p2p_mn_invl_range_start(struct mmu_notifier *mn,
37 struct mm_struct *mm, unsigned long start, unsigned long end)
38{
39 struct nvidia_p2p_page_table *page_table = container_of(mn,
40 struct nvidia_p2p_page_table,
41 mn);
42 u64 vaddr = 0;
43 u64 size = 0;
44
45 vaddr = page_table->vaddr;
46 size = page_table->size;
47
48 if (vaddr >= start && vaddr <= end) {
49 mmu_notifier_unregister_no_release(&page_table->mn, page_table->mm);
50 page_table->free_callback(page_table->data);
51 }
52}
53
54static struct mmu_notifier_ops nvidia_p2p_mmu_ops = {
55 .release = nvidia_p2p_mn_release,
56 .invalidate_range_start = nvidia_p2p_mn_invl_range_start,
57};
58
59int nvidia_p2p_get_pages(u64 vaddr, u64 size,
60 struct nvidia_p2p_page_table **page_table,
61 void (*free_callback)(void *data), void *data)
62{
63 int ret = 0;
64 int user_pages = 0;
65 int locked = 0;
66 int nr_pages = size >> PAGE_SHIFT;
67 struct page **pages;
68
69 if (nr_pages <= 0) {
70 return -EINVAL;
71 }
72
73 *page_table = kzalloc(sizeof(**page_table), GFP_KERNEL);
74 if (!*page_table) {
75 return -ENOMEM;
76 }
77
78 pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
79 if (!pages) {
80 ret = -ENOMEM;
81 goto free_page_table;
82 }
83 down_read(&current->mm->mmap_sem);
84 locked = 1;
85 user_pages = get_user_pages_locked(vaddr & PAGE_MASK, nr_pages,
86 FOLL_WRITE | FOLL_FORCE,
87 pages, &locked);
88 up_read(&current->mm->mmap_sem);
89 if (user_pages != nr_pages) {
90 ret = user_pages < 0 ? user_pages : -ENOMEM;
91 goto free_pages;
92 }
93
94 (*page_table)->version = NVIDIA_P2P_PAGE_TABLE_VERSION;
95 (*page_table)->pages = pages;
96 (*page_table)->entries = user_pages;
97 (*page_table)->page_size = NVIDIA_P2P_PAGE_SIZE_4KB;
98 (*page_table)->size = size;
99
100 (*page_table)->mn.ops = &nvidia_p2p_mmu_ops;
101 (*page_table)->mm = current->mm;
102 (*page_table)->free_callback = free_callback;
103 (*page_table)->data = data;
104 (*page_table)->vaddr = vaddr;
105 mutex_init(&(*page_table)->lock);
106 (*page_table)->mapped = NVIDIA_P2P_PINNED;
107
108 ret = mmu_notifier_register(&(*page_table)->mn, (*page_table)->mm);
109 if (ret) {
110 goto free_pages;
111 }
112
113 return 0;
114free_pages:
115 while (--user_pages >= 0) {
116 put_page(pages[user_pages]);
117 }
118 kfree(pages);
119free_page_table:
120 kfree(*page_table);
121 *page_table = NULL;
122 return ret;
123}
124EXPORT_SYMBOL(nvidia_p2p_get_pages);
125
126int nvidia_p2p_put_pages(struct nvidia_p2p_page_table *page_table)
127{
128 if (!page_table) {
129 return -EINVAL;
130 }
131
132 mmu_notifier_unregister(&page_table->mn, page_table->mm);
133
134 return 0;
135}
136EXPORT_SYMBOL(nvidia_p2p_put_pages);
137
138int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table)
139{
140 int user_pages = 0;
141 struct page **pages = NULL;
142
143 if (!page_table) {
144 return 0;
145 }
146
147 mutex_lock(&page_table->lock);
148
149 if (page_table->mapped & NVIDIA_P2P_MAPPED) {
150 WARN(1, "Attempting to free unmapped pages");
151 }
152
153 if (page_table->mapped & NVIDIA_P2P_PINNED) {
154 pages = page_table->pages;
155 user_pages = page_table->entries;
156
157 while (--user_pages >= 0) {
158 put_page(pages[user_pages]);
159 }
160
161 kfree(pages);
162 page_table->mapped &= (u32)~NVIDIA_P2P_PINNED;
163 }
164
165 mutex_unlock(&page_table->lock);
166
167 return 0;
168}
169EXPORT_SYMBOL(nvidia_p2p_free_page_table);
170
171int nvidia_p2p_dma_map_pages(struct device *dev,
172 struct nvidia_p2p_page_table *page_table,
173 struct nvidia_p2p_dma_mapping **dma_mapping,
174 enum dma_data_direction direction)
175{
176 struct sg_table *sgt = NULL;
177 struct scatterlist *sg;
178 struct page **pages = NULL;
179 u32 nr_pages = 0;
180 int ret = 0;
181 int i, count;
182
183 if (!page_table) {
184 return -EINVAL;
185 }
186
187 mutex_lock(&page_table->lock);
188
189 pages = page_table->pages;
190 nr_pages = page_table->entries;
191 if (nr_pages <= 0) {
192 mutex_unlock(&page_table->lock);
193 return -EINVAL;
194 }
195
196 *dma_mapping = kzalloc(sizeof(**dma_mapping), GFP_KERNEL);
197 if (!*dma_mapping) {
198 mutex_unlock(&page_table->lock);
199 return -ENOMEM;
200 }
201 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
202 if (!sgt) {
203 ret = -ENOMEM;
204 goto free_dma_mapping;
205 }
206 ret = sg_alloc_table_from_pages(sgt, pages,
207 nr_pages, 0, page_table->size, GFP_KERNEL);
208 if (ret) {
209 goto free_sgt;
210 }
211
212 (*dma_mapping)->version = NVIDIA_P2P_DMA_MAPPING_VERSION;
213 (*dma_mapping)->sgt = sgt;
214 (*dma_mapping)->dev = dev;
215 (*dma_mapping)->direction = direction;
216 (*dma_mapping)->page_table = page_table;
217
218 count = dma_map_sg(dev, sgt->sgl, sgt->nents, direction);
219 if (count < 1) {
220 goto free_sg_table;
221 }
222
223 (*dma_mapping)->entries = count;
224
225 (*dma_mapping)->hw_address = kcalloc(count, sizeof(u64), GFP_KERNEL);
226 if (!((*dma_mapping)->hw_address)) {
227 ret = -ENOMEM;
228 goto unmap_sg;
229 }
230 (*dma_mapping)->hw_len = kcalloc(count, sizeof(u64), GFP_KERNEL);
231 if (!((*dma_mapping)->hw_len)) {
232 ret = -ENOMEM;
233 goto free_hw_address;
234 }
235
236 for_each_sg(sgt->sgl, sg, count, i) {
237 (*dma_mapping)->hw_address[i] = sg_dma_address(sg);
238 (*dma_mapping)->hw_len[i] = sg_dma_len(sg);
239 }
240 (*dma_mapping)->page_table->mapped |= NVIDIA_P2P_MAPPED;
241 mutex_unlock(&page_table->lock);
242
243 return 0;
244free_hw_address:
245 kfree((*dma_mapping)->hw_address);
246unmap_sg:
247 dma_unmap_sg(dev, sgt->sgl,
248 sgt->nents, direction);
249free_sg_table:
250 sg_free_table(sgt);
251free_sgt:
252 kfree(sgt);
253free_dma_mapping:
254 kfree(*dma_mapping);
255 *dma_mapping = NULL;
256 mutex_unlock(&page_table->lock);
257
258 return ret;
259}
260EXPORT_SYMBOL(nvidia_p2p_dma_map_pages);
261
262int nvidia_p2p_dma_unmap_pages(struct nvidia_p2p_dma_mapping *dma_mapping)
263{
264 struct nvidia_p2p_page_table *page_table = NULL;
265
266 if (!dma_mapping) {
267 return -EINVAL;
268 }
269
270 page_table = dma_mapping->page_table;
271 if (!page_table) {
272 return -EFAULT;
273 }
274
275 mutex_lock(&page_table->lock);
276 if (page_table->mapped & NVIDIA_P2P_MAPPED) {
277 kfree(dma_mapping->hw_len);
278 kfree(dma_mapping->hw_address);
279 if (dma_mapping->entries)
280 dma_unmap_sg(dma_mapping->dev,
281 dma_mapping->sgt->sgl,
282 dma_mapping->sgt->nents,
283 dma_mapping->direction);
284 sg_free_table(dma_mapping->sgt);
285 kfree(dma_mapping->sgt);
286 kfree(dma_mapping);
287 page_table->mapped &= (u32)~NVIDIA_P2P_MAPPED;
288 }
289 mutex_unlock(&page_table->lock);
290
291 return 0;
292}
293EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages);
294
295int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping)
296{
297 return nvidia_p2p_dma_unmap_pages(dma_mapping);
298}
299EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);
diff --git a/include/os/linux/nvlink.c b/include/os/linux/nvlink.c
deleted file mode 100644
index dd7c02c..0000000
--- a/include/os/linux/nvlink.c
+++ /dev/null
@@ -1,132 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifdef CONFIG_TEGRA_NVLINK
18#include <linux/platform/tegra/tegra-nvlink.h>
19#endif
20
21#include <nvgpu/gk20a.h>
22#include <nvgpu/nvlink.h>
23#include <nvgpu/enabled.h>
24#include "module.h"
25
26#ifdef CONFIG_TEGRA_NVLINK
27int nvgpu_nvlink_read_dt_props(struct gk20a *g)
28{
29 struct device_node *np;
30 struct nvlink_device *ndev = g->nvlink.priv;
31 u32 local_dev_id;
32 u32 local_link_id;
33 u32 remote_dev_id;
34 u32 remote_link_id;
35 bool is_master;
36
37 /* Parse DT */
38 np = nvgpu_get_node(g);
39 if (!np)
40 goto fail;
41
42 np = of_get_child_by_name(np, "nvidia,nvlink");
43 if (!np)
44 goto fail;
45
46 np = of_get_child_by_name(np, "endpoint");
47 if (!np)
48 goto fail;
49
50 /* Parse DT structure to detect endpoint topology */
51 of_property_read_u32(np, "local_dev_id", &local_dev_id);
52 of_property_read_u32(np, "local_link_id", &local_link_id);
53 of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
54 of_property_read_u32(np, "remote_link_id", &remote_link_id);
55 is_master = of_property_read_bool(np, "is_master");
56
57 /* Check that we are in dGPU mode */
58 if (local_dev_id != NVLINK_ENDPT_GV100) {
59 nvgpu_err(g, "Local nvlink device is not dGPU");
60 return -EINVAL;
61 }
62
63 ndev->is_master = is_master;
64 ndev->device_id = local_dev_id;
65 ndev->link.link_id = local_link_id;
66 ndev->link.remote_dev_info.device_id = remote_dev_id;
67 ndev->link.remote_dev_info.link_id = remote_link_id;
68
69 return 0;
70
71fail:
72 nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
73 return -ENODEV;
74}
75#endif /* CONFIG_TEGRA_NVLINK */
76
77void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
78{
79 /* MSS_NVLINK_1_BASE */
80 void __iomem *soc1 = ioremap(0x01f20010, 4096);
81 /* MSS_NVLINK_2_BASE */
82 void __iomem *soc2 = ioremap(0x01f40010, 4096);
83 /* MSS_NVLINK_3_BASE */
84 void __iomem *soc3 = ioremap(0x01f60010, 4096);
85 /* MSS_NVLINK_4_BASE */
86 void __iomem *soc4 = ioremap(0x01f80010, 4096);
87 u32 val;
88
89 nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
90
91 val = readl_relaxed(soc1);
92 writel_relaxed(val, soc1);
93 val = readl_relaxed(soc1 + 4);
94 writel_relaxed(val, soc1 + 4);
95
96 val = readl_relaxed(soc2);
97 writel_relaxed(val, soc2);
98 val = readl_relaxed(soc2 + 4);
99 writel_relaxed(val, soc2 + 4);
100
101 val = readl_relaxed(soc3);
102 writel_relaxed(val, soc3);
103 val = readl_relaxed(soc3 + 4);
104 writel_relaxed(val, soc3 + 4);
105
106 val = readl_relaxed(soc4);
107 writel_relaxed(val, soc4);
108 val = readl_relaxed(soc4 + 4);
109 writel_relaxed(val, soc4 + 4);
110}
111
112int nvgpu_nvlink_deinit(struct gk20a *g)
113{
114#ifdef CONFIG_TEGRA_NVLINK
115 struct nvlink_device *ndev = g->nvlink.priv;
116 int err;
117
118 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK))
119 return -ENODEV;
120
121 err = nvlink_shutdown(ndev);
122 if (err) {
123 nvgpu_err(g, "failed to shut down nvlink");
124 return err;
125 }
126
127 nvgpu_nvlink_remove(g);
128
129 return 0;
130#endif
131 return -ENODEV;
132}
diff --git a/include/os/linux/nvlink.h b/include/os/linux/nvlink.h
deleted file mode 100644
index 4dc54f6..0000000
--- a/include/os/linux/nvlink.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_OS_LINUX_NVLINK_H
18
19struct gk20a;
20int nvgpu_nvlink_deinit(struct gk20a *g);
21
22#endif
diff --git a/include/os/linux/os_fence_android.c b/include/os/linux/os_fence_android.c
deleted file mode 100644
index 013989e..0000000
--- a/include/os/linux/os_fence_android.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#include <nvgpu/types.h>
17#include <nvgpu/os_fence.h>
18#include <nvgpu/linux/os_fence_android.h>
19#include <nvgpu/gk20a.h>
20#include <nvgpu/channel.h>
21
22#include "../drivers/staging/android/sync.h"
23
24inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
25{
26 struct sync_fence *fence = (struct sync_fence *)s->priv;
27 return fence;
28}
29
30static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
31{
32 fence_out->priv = NULL;
33 fence_out->g = NULL;
34 fence_out->ops = NULL;
35}
36
37void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
38 struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
39 struct sync_fence *fence)
40{
41 fence_out->g = g;
42 fence_out->ops = fops;
43 fence_out->priv = (void *)fence;
44}
45
46void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
47{
48 struct sync_fence *fence = nvgpu_get_sync_fence(s);
49
50 sync_fence_put(fence);
51
52 nvgpu_os_fence_clear(s);
53}
54
55void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
56{
57 struct sync_fence *fence = nvgpu_get_sync_fence(s);
58
59 sync_fence_get(fence);
60 sync_fence_install(fence, fd);
61}
62
63int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
64 struct channel_gk20a *c, int fd)
65{
66 int err = -ENOSYS;
67
68#ifdef CONFIG_TEGRA_GK20A_NVHOST
69 err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
70#endif
71
72 if (err)
73 err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
74
75 if (err)
76 nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
77
78 return err;
79}
diff --git a/include/os/linux/os_fence_android_sema.c b/include/os/linux/os_fence_android_sema.c
deleted file mode 100644
index eb60600..0000000
--- a/include/os/linux/os_fence_android_sema.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/errno.h>
18
19#include <nvgpu/types.h>
20#include <nvgpu/os_fence.h>
21#include <nvgpu/linux/os_fence_android.h>
22#include <nvgpu/semaphore.h>
23#include <nvgpu/gk20a.h>
24#include <nvgpu/channel.h>
25#include <nvgpu/channel_sync.h>
26
27#include "gk20a/mm_gk20a.h"
28
29#include "sync_sema_android.h"
30
31#include "../drivers/staging/android/sync.h"
32
33int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
34 struct priv_cmd_entry *wait_cmd,
35 struct channel_gk20a *c,
36 int max_wait_cmds)
37{
38 int err;
39 int wait_cmd_size;
40 int num_wait_cmds;
41 int i;
42 struct nvgpu_semaphore *sema;
43 struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
44
45 wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
46
47 num_wait_cmds = sync_fence->num_fences;
48 if (num_wait_cmds == 0)
49 return 0;
50
51 if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
52 return -EINVAL;
53
54 err = gk20a_channel_alloc_priv_cmdbuf(c,
55 wait_cmd_size * num_wait_cmds,
56 wait_cmd);
57 if (err) {
58 return err;
59 }
60
61 for (i = 0; i < num_wait_cmds; i++) {
62 struct sync_pt *pt = sync_pt_from_fence(
63 sync_fence->cbs[i].sync_pt);
64
65 sema = gk20a_sync_pt_sema(pt);
66 channel_sync_semaphore_gen_wait_cmd(c, sema, wait_cmd,
67 wait_cmd_size, i);
68 }
69
70 return 0;
71}
72
73static const struct nvgpu_os_fence_ops sema_ops = {
74 .program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
75 .drop_ref = nvgpu_os_fence_android_drop_ref,
76 .install_fence = nvgpu_os_fence_android_install_fd,
77};
78
79int nvgpu_os_fence_sema_create(
80 struct nvgpu_os_fence *fence_out,
81 struct channel_gk20a *c,
82 struct nvgpu_semaphore *sema)
83{
84 struct sync_fence *fence;
85
86 fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
87 nvgpu_semaphore_gpu_ro_va(sema));
88
89 if (!fence) {
90 nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
91 (u32)nvgpu_semaphore_gpu_ro_va(sema));
92
93 return -ENOMEM;
94 }
95
96 nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
97
98 return 0;
99}
100
101int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
102 struct channel_gk20a *c, int fd)
103{
104 struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
105
106 if (!fence)
107 return -EINVAL;
108
109 nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
110
111 return 0;
112}
diff --git a/include/os/linux/os_fence_android_syncpt.c b/include/os/linux/os_fence_android_syncpt.c
deleted file mode 100644
index 368a03c..0000000
--- a/include/os/linux/os_fence_android_syncpt.c
+++ /dev/null
@@ -1,121 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/err.h>
18#include <nvgpu/errno.h>
19
20#include <nvgpu/types.h>
21#include <nvgpu/os_fence.h>
22#include <nvgpu/linux/os_fence_android.h>
23#include <nvgpu/nvhost.h>
24#include <nvgpu/atomic.h>
25#include <nvgpu/gk20a.h>
26#include <nvgpu/channel.h>
27#include <nvgpu/channel_sync.h>
28
29#include "gk20a/mm_gk20a.h"
30
31#include "../drivers/staging/android/sync.h"
32
33int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
34 struct priv_cmd_entry *wait_cmd,
35 struct channel_gk20a *c,
36 int max_wait_cmds)
37{
38 int err;
39 int wait_cmd_size;
40 int num_wait_cmds;
41 int i;
42 u32 wait_id;
43 struct sync_pt *pt;
44
45 struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
46
47 if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
48 return -EINVAL;
49
50 /* validate syncpt ids */
51 for (i = 0; i < sync_fence->num_fences; i++) {
52 pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
53 wait_id = nvgpu_nvhost_sync_pt_id(pt);
54 if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
55 c->g->nvhost_dev, wait_id)) {
56 return -EINVAL;
57 }
58 }
59
60 num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
61 if (num_wait_cmds == 0)
62 return 0;
63
64 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
65 err = gk20a_channel_alloc_priv_cmdbuf(c,
66 wait_cmd_size * num_wait_cmds, wait_cmd);
67 if (err) {
68 return err;
69 }
70
71 for (i = 0; i < sync_fence->num_fences; i++) {
72 struct sync_pt *pt = sync_pt_from_fence(
73 sync_fence->cbs[i].sync_pt);
74 u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
75 u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
76
77 err = channel_sync_syncpt_gen_wait_cmd(c, wait_id, wait_value,
78 wait_cmd, wait_cmd_size, i, true);
79 }
80
81 WARN_ON(i != num_wait_cmds);
82
83 return 0;
84}
85
86static const struct nvgpu_os_fence_ops syncpt_ops = {
87 .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
88 .drop_ref = nvgpu_os_fence_android_drop_ref,
89 .install_fence = nvgpu_os_fence_android_install_fd,
90};
91
92int nvgpu_os_fence_syncpt_create(
93 struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
94 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
95{
96 struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
97 nvhost_dev, id, thresh, "fence");
98
99 if (IS_ERR(fence)) {
100 nvgpu_err(c->g, "error %d during construction of fence.", (int)PTR_ERR(fence));
101 return PTR_ERR(fence);
102 }
103
104 nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
105
106 return 0;
107}
108
109int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
110 struct channel_gk20a *c, int fd)
111{
112 struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
113
114 if (fence == NULL) {
115 return -ENOMEM;
116 }
117
118 nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
119
120 return 0;
121}
diff --git a/include/os/linux/os_linux.h b/include/os/linux/os_linux.h
deleted file mode 100644
index adcfdb2..0000000
--- a/include/os/linux/os_linux.h
+++ /dev/null
@@ -1,192 +0,0 @@
1/*
2 * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_OS_LINUX_H
18#define NVGPU_OS_LINUX_H
19
20#include <linux/cdev.h>
21#include <linux/iommu.h>
22#include <linux/hashtable.h>
23
24#include <nvgpu/gk20a.h>
25
26#include "cde.h"
27#include "sched.h"
28#include "ecc_linux.h"
29
30struct nvgpu_os_linux_ops {
31 struct {
32 void (*get_program_numbers)(struct gk20a *g,
33 u32 block_height_log2,
34 u32 shader_parameter,
35 int *hprog, int *vprog);
36 bool (*need_scatter_buffer)(struct gk20a *g);
37 int (*populate_scatter_buffer)(struct gk20a *g,
38 struct sg_table *sgt,
39 size_t surface_size,
40 void *scatter_buffer_ptr,
41 size_t scatter_buffer_size);
42 } cde;
43
44 struct {
45 int (*init_debugfs)(struct gk20a *g);
46 } clk;
47
48 struct {
49 int (*init_debugfs)(struct gk20a *g);
50 } therm;
51
52 struct {
53 int (*init_debugfs)(struct gk20a *g);
54 } fecs_trace;
55};
56
57struct nvgpu_os_linux {
58 struct gk20a g;
59 struct device *dev;
60
61 struct {
62 struct cdev cdev;
63 struct device *node;
64 } channel;
65
66 struct {
67 struct cdev cdev;
68 struct device *node;
69 /* see gk20a_ctrl_priv */
70 struct nvgpu_list_node privs;
71 /* guards modifications to the list and its contents */
72 struct nvgpu_mutex privs_lock;
73 } ctrl;
74
75 struct {
76 struct cdev cdev;
77 struct device *node;
78 } as_dev;
79
80 struct {
81 struct cdev cdev;
82 struct device *node;
83 } dbg;
84
85 struct {
86 struct cdev cdev;
87 struct device *node;
88 } prof;
89
90 struct {
91 struct cdev cdev;
92 struct device *node;
93 } tsg;
94
95 struct {
96 struct cdev cdev;
97 struct device *node;
98 } ctxsw;
99
100 struct {
101 struct cdev cdev;
102 struct device *node;
103 } sched;
104
105 dev_t cdev_region;
106
107 struct devfreq *devfreq;
108
109 struct device_dma_parameters dma_parms;
110
111 atomic_t hw_irq_stall_count;
112 atomic_t hw_irq_nonstall_count;
113
114 struct nvgpu_cond sw_irq_stall_last_handled_wq;
115 atomic_t sw_irq_stall_last_handled;
116
117 atomic_t nonstall_ops;
118
119 struct nvgpu_cond sw_irq_nonstall_last_handled_wq;
120 atomic_t sw_irq_nonstall_last_handled;
121
122 struct work_struct nonstall_fn_work;
123 struct workqueue_struct *nonstall_work_queue;
124
125 struct resource *reg_mem;
126 void __iomem *regs;
127 void __iomem *regs_saved;
128
129 struct resource *bar1_mem;
130 void __iomem *bar1;
131 void __iomem *bar1_saved;
132
133 void __iomem *usermode_regs;
134 void __iomem *usermode_regs_saved;
135
136 u64 regs_bus_addr;
137
138#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
139 struct nvgpu_ecc_reporting_linux ecc_reporting_linux;
140#endif
141
142 struct nvgpu_os_linux_ops ops;
143
144#ifdef CONFIG_DEBUG_FS
145 struct dentry *debugfs;
146 struct dentry *debugfs_alias;
147
148 struct dentry *debugfs_ltc_enabled;
149 struct dentry *debugfs_timeouts_enabled;
150 struct dentry *debugfs_gr_idle_timeout_default;
151 struct dentry *debugfs_disable_bigpage;
152 struct dentry *debugfs_gr_default_attrib_cb_size;
153
154 struct dentry *debugfs_timeslice_low_priority_us;
155 struct dentry *debugfs_timeslice_medium_priority_us;
156 struct dentry *debugfs_timeslice_high_priority_us;
157 struct dentry *debugfs_runlist_interleave;
158 struct dentry *debugfs_allocators;
159 struct dentry *debugfs_xve;
160 struct dentry *debugfs_kmem;
161 struct dentry *debugfs_hal;
162 struct dentry *debugfs_ltc;
163
164 struct dentry *debugfs_force_preemption_cilp;
165 struct dentry *debugfs_force_preemption_gfxp;
166 struct dentry *debugfs_dump_ctxsw_stats;
167#endif
168 DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
169 struct dev_ext_attribute *ecc_attrs;
170
171 struct gk20a_cde_app cde_app;
172
173 struct rw_semaphore busy_lock;
174
175 bool init_done;
176};
177
178static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
179{
180 return container_of(g, struct nvgpu_os_linux, g);
181}
182
183static inline struct device *dev_from_gk20a(struct gk20a *g)
184{
185 return nvgpu_os_linux_from_gk20a(g)->dev;
186}
187
188#define INTERFACE_NAME "nvhost%s-gpu"
189
190#define totalram_size_in_mb (totalram_pages >> (10 - (PAGE_SHIFT - 10)))
191
192#endif
diff --git a/include/os/linux/os_ops.c b/include/os/linux/os_ops.c
deleted file mode 100644
index f1ab4b1..0000000
--- a/include/os/linux/os_ops.c
+++ /dev/null
@@ -1,61 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "os_linux.h"
18
19#include "os_ops_gm20b.h"
20#include "os_ops_gp10b.h"
21#include "os_ops_gp106.h"
22#include "os_ops_gv11b.h"
23#include "os_ops_gv100.h"
24
25#if defined(CONFIG_TEGRA_GPU_NEXT)
26#include "nvgpu_gpuid_next.h"
27#endif
28
29int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l)
30{
31 struct gk20a *g = &l->g;
32 u32 ver = g->params.gpu_arch + g->params.gpu_impl;
33
34 switch (ver) {
35 case GK20A_GPUID_GM20B:
36 case GK20A_GPUID_GM20B_B:
37 nvgpu_gm20b_init_os_ops(l);
38 break;
39 case NVGPU_GPUID_GP10B:
40 nvgpu_gp10b_init_os_ops(l);
41 break;
42 case NVGPU_GPUID_GP106:
43 nvgpu_gp106_init_os_ops(l);
44 break;
45 case NVGPU_GPUID_GV100:
46 nvgpu_gv100_init_os_ops(l);
47 break;
48 case NVGPU_GPUID_GV11B:
49 nvgpu_gv11b_init_os_ops(l);
50 break;
51#if defined(CONFIG_TEGRA_GPU_NEXT)
52 case NVGPU_GPUID_NEXT:
53 NVGPU_NEXT_INIT_OS_OPS(l);
54 break;
55#endif
56 default:
57 break;
58 }
59
60 return 0;
61}
diff --git a/include/os/linux/os_ops.h b/include/os/linux/os_ops.h
deleted file mode 100644
index af3ce0a..0000000
--- a/include/os/linux/os_ops.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __LINUX_OS_OPS_H
18#define __LINUX_OS_OPS_H
19
20int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l);
21
22#endif
diff --git a/include/os/linux/os_ops_gm20b.c b/include/os/linux/os_ops_gm20b.c
deleted file mode 100644
index 77aee39..0000000
--- a/include/os/linux/os_ops_gm20b.c
+++ /dev/null
@@ -1,47 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "os_linux.h"
18
19#include "cde_gm20b.h"
20#include "debug_clk_gm20b.h"
21#include "debug_fecs_trace.h"
22
23
24static struct nvgpu_os_linux_ops gm20b_os_linux_ops = {
25#ifdef CONFIG_NVGPU_SUPPORT_CDE
26 .cde = {
27 .get_program_numbers = gm20b_cde_get_program_numbers,
28 },
29#endif
30 .clk = {
31 .init_debugfs = gm20b_clk_init_debugfs,
32 },
33
34 .fecs_trace = {
35 .init_debugfs = nvgpu_fecs_trace_init_debugfs,
36 },
37};
38
39void nvgpu_gm20b_init_os_ops(struct nvgpu_os_linux *l)
40{
41#ifdef CONFIG_NVGPU_SUPPORT_CDE
42 l->ops.cde = gm20b_os_linux_ops.cde;
43#endif
44 l->ops.clk = gm20b_os_linux_ops.clk;
45
46 l->ops.fecs_trace = gm20b_os_linux_ops.fecs_trace;
47}
diff --git a/include/os/linux/os_ops_gm20b.h b/include/os/linux/os_ops_gm20b.h
deleted file mode 100644
index 7d27e40..0000000
--- a/include/os/linux/os_ops_gm20b.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __LINUX_OS_OPS_GM20B_H
18#define __LINUX_OS_OPS_GM20B_H
19
20void nvgpu_gm20b_init_os_ops(struct nvgpu_os_linux *l);
21
22#endif
diff --git a/include/os/linux/os_ops_gp106.c b/include/os/linux/os_ops_gp106.c
deleted file mode 100644
index 14f1b00..0000000
--- a/include/os/linux/os_ops_gp106.c
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "os_linux.h"
18
19#include "debug_clk_gp106.h"
20#include "debug_therm_gp106.h"
21#include "debug_fecs_trace.h"
22
23static struct nvgpu_os_linux_ops gp106_os_linux_ops = {
24 .clk = {
25 .init_debugfs = gp106_clk_init_debugfs,
26 },
27 .therm = {
28 .init_debugfs = gp106_therm_init_debugfs,
29 },
30 .fecs_trace = {
31 .init_debugfs = nvgpu_fecs_trace_init_debugfs,
32 },
33};
34
35void nvgpu_gp106_init_os_ops(struct nvgpu_os_linux *l)
36{
37 l->ops.clk = gp106_os_linux_ops.clk;
38 l->ops.therm = gp106_os_linux_ops.therm;
39 l->ops.fecs_trace = gp106_os_linux_ops.fecs_trace;
40}
diff --git a/include/os/linux/os_ops_gp106.h b/include/os/linux/os_ops_gp106.h
deleted file mode 100644
index 7d423d5..0000000
--- a/include/os/linux/os_ops_gp106.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __LINUX_OS_OPS_GP106_H
18#define __LINUX_OS_OPS_GP106_H
19
20void nvgpu_gp106_init_os_ops(struct nvgpu_os_linux *l);
21
22#endif
diff --git a/include/os/linux/os_ops_gp10b.c b/include/os/linux/os_ops_gp10b.c
deleted file mode 100644
index e2891f7..0000000
--- a/include/os/linux/os_ops_gp10b.c
+++ /dev/null
@@ -1,41 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "os_linux.h"
18
19#include "cde_gp10b.h"
20#include "debug_fecs_trace.h"
21
22static struct nvgpu_os_linux_ops gp10b_os_linux_ops = {
23#ifdef CONFIG_NVGPU_SUPPORT_CDE
24 .cde = {
25 .get_program_numbers = gp10b_cde_get_program_numbers,
26 .need_scatter_buffer = gp10b_need_scatter_buffer,
27 .populate_scatter_buffer = gp10b_populate_scatter_buffer,
28 },
29#endif
30 .fecs_trace = {
31 .init_debugfs = nvgpu_fecs_trace_init_debugfs,
32 },
33};
34
35void nvgpu_gp10b_init_os_ops(struct nvgpu_os_linux *l)
36{
37#ifdef CONFIG_NVGPU_SUPPORT_CDE
38 l->ops.cde = gp10b_os_linux_ops.cde;
39#endif
40 l->ops.fecs_trace = gp10b_os_linux_ops.fecs_trace;
41}
diff --git a/include/os/linux/os_ops_gp10b.h b/include/os/linux/os_ops_gp10b.h
deleted file mode 100644
index 0be1bca..0000000
--- a/include/os/linux/os_ops_gp10b.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __LINUX_OS_OPS_GP10B_H
18#define __LINUX_OS_OPS_GP10B_H
19
20void nvgpu_gp10b_init_os_ops(struct nvgpu_os_linux *l);
21
22#endif
diff --git a/include/os/linux/os_ops_gv100.c b/include/os/linux/os_ops_gv100.c
deleted file mode 100644
index 9d92bdf..0000000
--- a/include/os/linux/os_ops_gv100.c
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "os_linux.h"
18
19#include "debug_clk_gv100.h"
20#include "debug_therm_gp106.h"
21#include "debug_fecs_trace.h"
22
23static struct nvgpu_os_linux_ops gv100_os_linux_ops = {
24 .clk = {
25 .init_debugfs = gv100_clk_init_debugfs,
26 },
27 .therm = {
28 .init_debugfs = gp106_therm_init_debugfs,
29 },
30 .fecs_trace = {
31 .init_debugfs = nvgpu_fecs_trace_init_debugfs,
32 },
33};
34
35void nvgpu_gv100_init_os_ops(struct nvgpu_os_linux *l)
36{
37 l->ops.clk = gv100_os_linux_ops.clk;
38 l->ops.therm = gv100_os_linux_ops.therm;
39 l->ops.fecs_trace = gv100_os_linux_ops.fecs_trace;
40}
diff --git a/include/os/linux/os_ops_gv100.h b/include/os/linux/os_ops_gv100.h
deleted file mode 100644
index 43923b2..0000000
--- a/include/os/linux/os_ops_gv100.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __LINUX_OS_OPS_GV100_H
18#define __LINUX_OS_OPS_GV100_H
19
20void nvgpu_gv100_init_os_ops(struct nvgpu_os_linux *l);
21
22#endif
diff --git a/include/os/linux/os_ops_gv11b.c b/include/os/linux/os_ops_gv11b.c
deleted file mode 100644
index a82ad0a..0000000
--- a/include/os/linux/os_ops_gv11b.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "os_linux.h"
18
19#include "debug_fecs_trace.h"
20
21static struct nvgpu_os_linux_ops gv11b_os_linux_ops = {
22 .fecs_trace = {
23 .init_debugfs = nvgpu_fecs_trace_init_debugfs,
24 },
25};
26
27void nvgpu_gv11b_init_os_ops(struct nvgpu_os_linux *l)
28{
29 l->ops.fecs_trace = gv11b_os_linux_ops.fecs_trace;
30}
diff --git a/include/os/linux/os_ops_gv11b.h b/include/os/linux/os_ops_gv11b.h
deleted file mode 100644
index eef6c4a..0000000
--- a/include/os/linux/os_ops_gv11b.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef LINUX_OS_OPS_GV11B_H
18#define LINUX_OS_OPS_GV11B_H
19
20struct nvgpu_os_linux;
21
22void nvgpu_gv11b_init_os_ops(struct nvgpu_os_linux *l);
23
24#endif
diff --git a/include/os/linux/os_sched.c b/include/os/linux/os_sched.c
deleted file mode 100644
index 9a25da1..0000000
--- a/include/os/linux/os_sched.c
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/os_sched.h>
15
16#include <linux/sched.h>
17
18int nvgpu_current_tid(struct gk20a *g)
19{
20 return current->pid;
21}
22
23int nvgpu_current_pid(struct gk20a *g)
24{
25 return current->tgid;
26}
27
28void __nvgpu_print_current(struct gk20a *g, const char *func_name, int line,
29 void *ctx, enum nvgpu_log_type type)
30{
31 __nvgpu_log_msg(g, func_name, line, type, current->comm);
32}
diff --git a/include/os/linux/pci.c b/include/os/linux/pci.c
deleted file mode 100644
index 07071d1..0000000
--- a/include/os/linux/pci.c
+++ /dev/null
@@ -1,854 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/pci.h>
18#include <linux/interrupt.h>
19#include <linux/pm_runtime.h>
20#include <linux/of_platform.h>
21#include <linux/of_address.h>
22
23#include <nvgpu/nvhost.h>
24#include <nvgpu/nvgpu_common.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/enabled.h>
27#include <nvgpu/nvlink.h>
28#include <nvgpu/soc.h>
29#include <nvgpu/sim.h>
30#include <nvgpu/gk20a.h>
31
32#include "nvlink.h"
33#include "clk/clk.h"
34#include "clk/clk_mclk.h"
35#include "module.h"
36#include "intr.h"
37#include "sysfs.h"
38#include "os_linux.h"
39#include "platform_gk20a.h"
40
41#include "pci.h"
42#include "pci_usermode.h"
43
44#include "driver_common.h"
45
46#define PCI_INTERFACE_NAME "card-%s%%s"
47
48static int nvgpu_pci_tegra_probe(struct device *dev)
49{
50 return 0;
51}
52
53static int nvgpu_pci_tegra_remove(struct device *dev)
54{
55 return 0;
56}
57
58static bool nvgpu_pci_tegra_is_railgated(struct device *pdev)
59{
60 return false;
61}
62
63static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate)
64{
65 long ret = (long)rate;
66
67 if (rate == UINT_MAX)
68 ret = BOOT_GPC2CLK_MHZ * 1000000UL;
69
70 return ret;
71}
72
73static struct gk20a_platform nvgpu_pci_device[] = {
74 { /* DEVICE=0x1c35 */
75 /* ptimer src frequency in hz */
76 .ptimer_src_freq = 31250000,
77
78 .probe = nvgpu_pci_tegra_probe,
79 .remove = nvgpu_pci_tegra_remove,
80
81 /* power management configuration */
82 .railgate_delay_init = 500,
83 .can_railgate_init = false,
84 .can_elpg_init = true,
85 .enable_elpg = true,
86 .enable_elcg = false,
87 .enable_slcg = true,
88 .enable_blcg = true,
89 .enable_mscg = true,
90 .can_slcg = true,
91 .can_blcg = true,
92 .can_elcg = true,
93
94 .disable_aspm = true,
95
96 /* power management callbacks */
97 .is_railgated = nvgpu_pci_tegra_is_railgated,
98 .clk_round_rate = nvgpu_pci_clk_round_rate,
99
100 .ch_wdt_timeout_ms = 7000,
101
102 .honors_aperture = true,
103 .dma_mask = DMA_BIT_MASK(40),
104 .vbios_min_version = 0x86063000,
105 .hardcode_sw_threshold = true,
106 .ina3221_dcb_index = 0,
107 .ina3221_i2c_address = 0x84,
108 .ina3221_i2c_port = 0x2,
109 },
110 { /* DEVICE=0x1c36 */
111 /* ptimer src frequency in hz */
112 .ptimer_src_freq = 31250000,
113
114 .probe = nvgpu_pci_tegra_probe,
115 .remove = nvgpu_pci_tegra_remove,
116
117 /* power management configuration */
118 .railgate_delay_init = 500,
119 .can_railgate_init = false,
120 .can_elpg_init = true,
121 .enable_elpg = true,
122 .enable_elcg = false,
123 .enable_slcg = true,
124 .enable_blcg = true,
125 .enable_mscg = true,
126 .can_slcg = true,
127 .can_blcg = true,
128 .can_elcg = true,
129
130 .disable_aspm = true,
131
132 /* power management callbacks */
133 .is_railgated = nvgpu_pci_tegra_is_railgated,
134 .clk_round_rate = nvgpu_pci_clk_round_rate,
135
136 .ch_wdt_timeout_ms = 7000,
137
138 .honors_aperture = true,
139 .dma_mask = DMA_BIT_MASK(40),
140 .vbios_min_version = 0x86062d00,
141 .hardcode_sw_threshold = true,
142 .ina3221_dcb_index = 0,
143 .ina3221_i2c_address = 0x84,
144 .ina3221_i2c_port = 0x2,
145 },
146 { /* DEVICE=0x1c37 */
147 /* ptimer src frequency in hz */
148 .ptimer_src_freq = 31250000,
149
150 .probe = nvgpu_pci_tegra_probe,
151 .remove = nvgpu_pci_tegra_remove,
152
153 /* power management configuration */
154 .railgate_delay_init = 500,
155 .can_railgate_init = false,
156 .can_elpg_init = true,
157 .enable_elpg = true,
158 .enable_elcg = false,
159 .enable_slcg = true,
160 .enable_blcg = true,
161 .enable_mscg = true,
162 .can_slcg = true,
163 .can_blcg = true,
164 .can_elcg = true,
165
166 .disable_aspm = true,
167
168 /* power management callbacks */
169 .is_railgated = nvgpu_pci_tegra_is_railgated,
170 .clk_round_rate = nvgpu_pci_clk_round_rate,
171
172 .ch_wdt_timeout_ms = 7000,
173
174 .honors_aperture = true,
175 .dma_mask = DMA_BIT_MASK(40),
176 .vbios_min_version = 0x86063000,
177 .hardcode_sw_threshold = true,
178 .ina3221_dcb_index = 0,
179 .ina3221_i2c_address = 0x84,
180 .ina3221_i2c_port = 0x2,
181 },
182 { /* DEVICE=0x1c75 */
183 /* ptimer src frequency in hz */
184 .ptimer_src_freq = 31250000,
185
186 .probe = nvgpu_pci_tegra_probe,
187 .remove = nvgpu_pci_tegra_remove,
188
189 /* power management configuration */
190 .railgate_delay_init = 500,
191 .can_railgate_init = false,
192 .can_elpg_init = true,
193 .enable_elpg = true,
194 .enable_elcg = false,
195 .enable_slcg = true,
196 .enable_blcg = true,
197 .enable_mscg = true,
198 .can_slcg = true,
199 .can_blcg = true,
200 .can_elcg = true,
201
202 .disable_aspm = true,
203
204 /* power management callbacks */
205 .is_railgated = nvgpu_pci_tegra_is_railgated,
206 .clk_round_rate = nvgpu_pci_clk_round_rate,
207
208 .ch_wdt_timeout_ms = 7000,
209
210 .honors_aperture = true,
211 .dma_mask = DMA_BIT_MASK(40),
212 .vbios_min_version = 0x86065300,
213 .hardcode_sw_threshold = false,
214 .ina3221_dcb_index = 1,
215 .ina3221_i2c_address = 0x80,
216 .ina3221_i2c_port = 0x1,
217 },
218 { /* DEVICE=PG503 SKU 201 */
219 /* ptimer src frequency in hz */
220 .ptimer_src_freq = 31250000,
221
222 .probe = nvgpu_pci_tegra_probe,
223 .remove = nvgpu_pci_tegra_remove,
224
225 /* power management configuration */
226 .railgate_delay_init = 500,
227 .can_railgate_init = false,
228 .can_elpg_init = false,
229 .enable_elpg = false,
230 .enable_elcg = false,
231 .enable_slcg = false,
232 .enable_blcg = false,
233 .enable_mscg = false,
234 .can_slcg = false,
235 .can_blcg = false,
236 .can_elcg = false,
237
238 .disable_aspm = true,
239
240 /* power management callbacks */
241 .is_railgated = nvgpu_pci_tegra_is_railgated,
242 .clk_round_rate = nvgpu_pci_clk_round_rate,
243
244 .ch_wdt_timeout_ms = 7000,
245
246 .honors_aperture = true,
247 .dma_mask = DMA_BIT_MASK(40),
248 .vbios_min_version = 0x88001e00,
249 .hardcode_sw_threshold = false,
250 .run_preos = true,
251 },
252 { /* DEVICE=PG503 SKU 200 ES */
253 /* ptimer src frequency in hz */
254 .ptimer_src_freq = 31250000,
255
256 .probe = nvgpu_pci_tegra_probe,
257 .remove = nvgpu_pci_tegra_remove,
258
259 /* power management configuration */
260 .railgate_delay_init = 500,
261 .can_railgate_init = false,
262 .can_elpg_init = false,
263 .enable_elpg = false,
264 .enable_elcg = false,
265 .enable_slcg = false,
266 .enable_blcg = false,
267 .enable_mscg = false,
268 .can_slcg = false,
269 .can_blcg = false,
270 .can_elcg = false,
271
272 .disable_aspm = true,
273
274 /* power management callbacks */
275 .is_railgated = nvgpu_pci_tegra_is_railgated,
276 .clk_round_rate = nvgpu_pci_clk_round_rate,
277
278 .ch_wdt_timeout_ms = 7000,
279
280 .honors_aperture = true,
281 .dma_mask = DMA_BIT_MASK(40),
282 .vbios_min_version = 0x88001e00,
283 .hardcode_sw_threshold = false,
284 .run_preos = true,
285 },
286 {
287 /* ptimer src frequency in hz */
288 .ptimer_src_freq = 31250000,
289
290 .probe = nvgpu_pci_tegra_probe,
291 .remove = nvgpu_pci_tegra_remove,
292
293 /* power management configuration */
294 .railgate_delay_init = 500,
295 .can_railgate_init = false,
296 .can_elpg_init = false,
297 .enable_elpg = false,
298 .enable_elcg = false,
299 .enable_slcg = false,
300 .enable_blcg = false,
301 .enable_mscg = false,
302 .can_slcg = false,
303 .can_blcg = false,
304 .can_elcg = false,
305
306 .disable_aspm = true,
307
308 /* power management callbacks */
309 .is_railgated = nvgpu_pci_tegra_is_railgated,
310 .clk_round_rate = nvgpu_pci_clk_round_rate,
311
312 .ch_wdt_timeout_ms = 7000,
313
314 .honors_aperture = true,
315 .dma_mask = DMA_BIT_MASK(40),
316 .vbios_min_version = 0x88000126,
317 .hardcode_sw_threshold = false,
318 .run_preos = true,
319 .has_syncpoints = true,
320 },
321 { /* SKU250 */
322 /* ptimer src frequency in hz */
323 .ptimer_src_freq = 31250000,
324
325 .probe = nvgpu_pci_tegra_probe,
326 .remove = nvgpu_pci_tegra_remove,
327
328 /* power management configuration */
329 .railgate_delay_init = 500,
330 .can_railgate_init = false,
331 .can_elpg_init = false,
332 .enable_elpg = false,
333 .enable_elcg = false,
334 .enable_slcg = true,
335 .enable_blcg = true,
336 .enable_mscg = false,
337 .can_slcg = true,
338 .can_blcg = true,
339 .can_elcg = false,
340
341 .disable_aspm = true,
342
343 /* power management callbacks */
344 .is_railgated = nvgpu_pci_tegra_is_railgated,
345 .clk_round_rate = nvgpu_pci_clk_round_rate,
346
347 .ch_wdt_timeout_ms = 7000,
348
349 .honors_aperture = true,
350 .dma_mask = DMA_BIT_MASK(40),
351 .vbios_min_version = 0x1,
352 .hardcode_sw_threshold = false,
353 .run_preos = true,
354 .has_syncpoints = true,
355 },
356 { /* SKU 0x1e3f */
357 /* ptimer src frequency in hz */
358 .ptimer_src_freq = 31250000,
359
360 .probe = nvgpu_pci_tegra_probe,
361 .remove = nvgpu_pci_tegra_remove,
362
363 /* power management configuration */
364 .railgate_delay_init = 500,
365 .can_railgate_init = false,
366 .can_elpg_init = false,
367 .enable_elpg = false,
368 .enable_elcg = false,
369 .enable_slcg = false,
370 .enable_blcg = false,
371 .enable_mscg = false,
372 .can_slcg = false,
373 .can_blcg = false,
374 .can_elcg = false,
375
376 .disable_aspm = true,
377
378 /* power management callbacks */
379 .is_railgated = nvgpu_pci_tegra_is_railgated,
380 .clk_round_rate = nvgpu_pci_clk_round_rate,
381
382 /*
383 * WAR: PCIE X1 is very slow, set to very high value till nvlink is up
384 */
385 .ch_wdt_timeout_ms = 30000,
386
387 .honors_aperture = true,
388 .dma_mask = DMA_BIT_MASK(40),
389 .vbios_min_version = 0x1,
390 .hardcode_sw_threshold = false,
391 .unified_memory = false,
392 },
393
394};
395
396static struct pci_device_id nvgpu_pci_table[] = {
397 {
398 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35),
399 .class = PCI_BASE_CLASS_DISPLAY << 16,
400 .class_mask = 0xff << 16,
401 .driver_data = 0,
402 },
403 {
404 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36),
405 .class = PCI_BASE_CLASS_DISPLAY << 16,
406 .class_mask = 0xff << 16,
407 .driver_data = 1,
408 },
409 {
410 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37),
411 .class = PCI_BASE_CLASS_DISPLAY << 16,
412 .class_mask = 0xff << 16,
413 .driver_data = 2,
414 },
415 {
416 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75),
417 .class = PCI_BASE_CLASS_DISPLAY << 16,
418 .class_mask = 0xff << 16,
419 .driver_data = 3,
420 },
421 {
422 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1),
423 .class = PCI_BASE_CLASS_DISPLAY << 16,
424 .class_mask = 0xff << 16,
425 .driver_data = 4,
426 },
427 {
428 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0),
429 .class = PCI_BASE_CLASS_DISPLAY << 16,
430 .class_mask = 0xff << 16,
431 .driver_data = 5,
432 },
433 {
434 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe),
435 .class = PCI_BASE_CLASS_DISPLAY << 16,
436 .class_mask = 0xff << 16,
437 .driver_data = 6,
438 },
439 {
440 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1),
441 .class = PCI_BASE_CLASS_DISPLAY << 16,
442 .class_mask = 0xff << 16,
443 .driver_data = 7,
444 },
445 {
446 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f),
447 .class = PCI_BASE_CLASS_DISPLAY << 16,
448 .class_mask = 0xff << 16,
449 .driver_data = 8,
450 },
451 {}
452};
453
454static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
455{
456 struct gk20a *g = dev_id;
457 irqreturn_t ret_stall;
458 irqreturn_t ret_nonstall;
459
460 ret_stall = nvgpu_intr_stall(g);
461 ret_nonstall = nvgpu_intr_nonstall(g);
462
463#if defined(CONFIG_PCI_MSI)
464 /* Send MSI EOI */
465 if (g->ops.xve.rearm_msi && g->msi_enabled)
466 g->ops.xve.rearm_msi(g);
467#endif
468
469 return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
470}
471
472static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
473{
474 struct gk20a *g = dev_id;
475
476 return nvgpu_intr_thread_stall(g);
477}
478
479static int nvgpu_pci_init_support(struct pci_dev *pdev)
480{
481 int err = 0;
482 struct gk20a *g = get_gk20a(&pdev->dev);
483 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
484 struct device *dev = &pdev->dev;
485
486 l->regs = nvgpu_devm_ioremap(dev, pci_resource_start(pdev, 0),
487 pci_resource_len(pdev, 0));
488 if (IS_ERR(l->regs)) {
489 nvgpu_err(g, "failed to remap gk20a registers");
490 err = PTR_ERR(l->regs);
491 goto fail;
492 }
493
494 l->regs_bus_addr = pci_resource_start(pdev, 0);
495 if (!l->regs_bus_addr) {
496 nvgpu_err(g, "failed to read register bus offset");
497 err = -ENODEV;
498 goto fail;
499 }
500
501 l->bar1 = nvgpu_devm_ioremap(dev, pci_resource_start(pdev, 1),
502 pci_resource_len(pdev, 1));
503 if (IS_ERR(l->bar1)) {
504 nvgpu_err(g, "failed to remap gk20a bar1");
505 err = PTR_ERR(l->bar1);
506 goto fail;
507 }
508
509 err = nvgpu_init_sim_support_linux_pci(g);
510 if (err)
511 goto fail;
512 err = nvgpu_init_sim_support_pci(g);
513 if (err)
514 goto fail_sim;
515
516 nvgpu_pci_init_usermode_support(l);
517
518 return 0;
519
520 fail_sim:
521 nvgpu_remove_sim_support_linux_pci(g);
522 fail:
523 if (l->regs)
524 l->regs = NULL;
525
526 if (l->bar1)
527 l->bar1 = NULL;
528
529 return err;
530}
531
532static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
533{
534 if (mode)
535 *mode = S_IRUGO | S_IWUGO;
536 return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev));
537}
538
539static struct class nvgpu_pci_class = {
540 .owner = THIS_MODULE,
541 .name = "nvidia-pci-gpu",
542 .devnode = nvgpu_pci_devnode,
543};
544
545#ifdef CONFIG_PM
546static int nvgpu_pci_pm_runtime_resume(struct device *dev)
547{
548 return gk20a_pm_finalize_poweron(dev);
549}
550
551static int nvgpu_pci_pm_runtime_suspend(struct device *dev)
552{
553 return 0;
554}
555
556static int nvgpu_pci_pm_resume(struct device *dev)
557{
558 return gk20a_pm_finalize_poweron(dev);
559}
560
561static int nvgpu_pci_pm_suspend(struct device *dev)
562{
563 return 0;
564}
565
566static const struct dev_pm_ops nvgpu_pci_pm_ops = {
567 .runtime_resume = nvgpu_pci_pm_runtime_resume,
568 .runtime_suspend = nvgpu_pci_pm_runtime_suspend,
569 .resume = nvgpu_pci_pm_resume,
570 .suspend = nvgpu_pci_pm_suspend,
571};
572#endif
573
574static int nvgpu_pci_pm_init(struct device *dev)
575{
576#ifdef CONFIG_PM
577 struct gk20a *g = get_gk20a(dev);
578
579 if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) {
580 pm_runtime_disable(dev);
581 } else {
582 if (g->railgate_delay)
583 pm_runtime_set_autosuspend_delay(dev,
584 g->railgate_delay);
585
586 /*
587 * set gpu dev's use_autosuspend flag to allow
588 * runtime power management of GPU
589 */
590 pm_runtime_use_autosuspend(dev);
591
592 /*
593 * runtime PM for PCI devices is forbidden
594 * by default, so unblock RTPM of GPU
595 */
596 pm_runtime_put_noidle(dev);
597 pm_runtime_allow(dev);
598 }
599#endif
600 return 0;
601}
602
603static int nvgpu_pci_pm_deinit(struct device *dev)
604{
605#ifdef CONFIG_PM
606 struct gk20a *g = get_gk20a(dev);
607
608 if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
609 pm_runtime_enable(dev);
610 else
611 pm_runtime_forbid(dev);
612#endif
613 return 0;
614}
615
616static int nvgpu_pci_probe(struct pci_dev *pdev,
617 const struct pci_device_id *pent)
618{
619 struct gk20a_platform *platform = NULL;
620 struct nvgpu_os_linux *l;
621 struct gk20a *g;
622 int err;
623 char nodefmt[64];
624 struct device_node *np;
625
626 /* make sure driver_data is a sane index */
627 if (pent->driver_data >= sizeof(nvgpu_pci_device) /
628 sizeof(nvgpu_pci_device[0])) {
629 return -EINVAL;
630 }
631
632 l = kzalloc(sizeof(*l), GFP_KERNEL);
633 if (!l) {
634 dev_err(&pdev->dev, "couldn't allocate gk20a support");
635 return -ENOMEM;
636 }
637
638 hash_init(l->ecc_sysfs_stats_htable);
639
640 g = &l->g;
641
642 g->log_mask = NVGPU_DEFAULT_DBG_MASK;
643
644 nvgpu_init_gk20a(g);
645
646 nvgpu_kmem_init(g);
647
648 /* Allocate memory to hold platform data*/
649 platform = (struct gk20a_platform *)nvgpu_kzalloc( g,
650 sizeof(struct gk20a_platform));
651 if (!platform) {
652 dev_err(&pdev->dev, "couldn't allocate platform data");
653 err = -ENOMEM;
654 goto err_free_l;
655 }
656
657 /* copy detected device data to allocated platform space*/
658 memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data],
659 sizeof(struct gk20a_platform));
660
661 pci_set_drvdata(pdev, platform);
662
663 err = nvgpu_init_enabled_flags(g);
664 if (err)
665 goto err_free_platform;
666
667 platform->g = g;
668 l->dev = &pdev->dev;
669
670 np = nvgpu_get_node(g);
671 if (of_dma_is_coherent(np)) {
672 __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
673 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
674 }
675
676 err = pci_enable_device(pdev);
677 if (err)
678 goto err_free_platform;
679 pci_set_master(pdev);
680
681 g->pci_vendor_id = pdev->vendor;
682 g->pci_device_id = pdev->device;
683 g->pci_subsystem_vendor_id = pdev->subsystem_vendor;
684 g->pci_subsystem_device_id = pdev->subsystem_device;
685 g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub
686 g->pci_revision = pdev->revision;
687
688 g->ina3221_dcb_index = platform->ina3221_dcb_index;
689 g->ina3221_i2c_address = platform->ina3221_i2c_address;
690 g->ina3221_i2c_port = platform->ina3221_i2c_port;
691 g->hardcode_sw_threshold = platform->hardcode_sw_threshold;
692
693#if defined(CONFIG_PCI_MSI)
694 err = pci_enable_msi(pdev);
695 if (err) {
696 nvgpu_err(g,
697 "MSI could not be enabled, falling back to legacy");
698 g->msi_enabled = false;
699 } else
700 g->msi_enabled = true;
701#endif
702
703 g->irq_stall = pdev->irq;
704 g->irq_nonstall = pdev->irq;
705 if (g->irq_stall < 0) {
706 err = -ENXIO;
707 goto err_disable_msi;
708 }
709
710 err = devm_request_threaded_irq(&pdev->dev,
711 g->irq_stall,
712 nvgpu_pci_isr,
713 nvgpu_pci_intr_thread,
714#if defined(CONFIG_PCI_MSI)
715 g->msi_enabled ? 0 :
716#endif
717 IRQF_SHARED, "nvgpu", g);
718 if (err) {
719 nvgpu_err(g,
720 "failed to request irq @ %d", g->irq_stall);
721 goto err_disable_msi;
722 }
723 disable_irq(g->irq_stall);
724
725 err = nvgpu_pci_init_support(pdev);
726 if (err)
727 goto err_free_irq;
728
729 if (strchr(dev_name(&pdev->dev), '%')) {
730 nvgpu_err(g, "illegal character in device name");
731 err = -EINVAL;
732 goto err_free_irq;
733 }
734
735 snprintf(nodefmt, sizeof(nodefmt),
736 PCI_INTERFACE_NAME, dev_name(&pdev->dev));
737
738 err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class);
739 if (err)
740 goto err_free_irq;
741
742 err = nvgpu_pci_pm_init(&pdev->dev);
743 if (err) {
744 nvgpu_err(g, "pm init failed");
745 goto err_free_irq;
746 }
747
748 err = nvgpu_nvlink_probe(g);
749 /*
750 * ENODEV is a legal error which means there is no NVLINK
751 * any other error is fatal
752 */
753 if (err) {
754 if (err != -ENODEV) {
755 nvgpu_err(g, "fatal error probing nvlink, bailing out");
756 goto err_free_irq;
757 }
758 /* Enable Semaphore SHIM on nvlink only for now. */
759 __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false);
760 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
761 } else {
762 err = nvgpu_nvhost_syncpt_init(g);
763 if (err) {
764 if (err != -ENOSYS) {
765 nvgpu_err(g, "syncpt init failed");
766 goto err_free_irq;
767 }
768 }
769 }
770
771 return 0;
772
773err_free_irq:
774 nvgpu_free_irq(g);
775err_disable_msi:
776#if defined(CONFIG_PCI_MSI)
777 if (g->msi_enabled)
778 pci_disable_msi(pdev);
779#endif
780err_free_platform:
781 nvgpu_kfree(g, platform);
782err_free_l:
783 kfree(l);
784 return err;
785}
786
787static void nvgpu_pci_remove(struct pci_dev *pdev)
788{
789 struct gk20a *g = get_gk20a(&pdev->dev);
790 struct device *dev = dev_from_gk20a(g);
791 int err;
792
793 /* no support yet for unbind if DGPU is in VGPU mode */
794 if (gk20a_gpu_is_virtual(dev))
795 return;
796
797 err = nvgpu_nvlink_deinit(g);
798 WARN(err, "gpu failed to remove nvlink");
799
800 gk20a_driver_start_unload(g);
801
802 err = nvgpu_quiesce(g);
803 /* TODO: handle failure to idle */
804 WARN(err, "gpu failed to idle during driver removal");
805
806 nvgpu_free_irq(g);
807
808 nvgpu_remove(dev, &nvgpu_pci_class);
809
810#if defined(CONFIG_PCI_MSI)
811 if (g->msi_enabled)
812 pci_disable_msi(pdev);
813 else {
814 /* IRQ does not need to be enabled in MSI as the line is not
815 * shared
816 */
817 enable_irq(g->irq_stall);
818 }
819#endif
820 nvgpu_pci_pm_deinit(&pdev->dev);
821
822 /* free allocated platform data space */
823 gk20a_get_platform(&pdev->dev)->g = NULL;
824 nvgpu_kfree(g, gk20a_get_platform(&pdev->dev));
825
826 gk20a_put(g);
827}
828
829static struct pci_driver nvgpu_pci_driver = {
830 .name = "nvgpu",
831 .id_table = nvgpu_pci_table,
832 .probe = nvgpu_pci_probe,
833 .remove = nvgpu_pci_remove,
834#ifdef CONFIG_PM
835 .driver.pm = &nvgpu_pci_pm_ops,
836#endif
837};
838
839int __init nvgpu_pci_init(void)
840{
841 int ret;
842
843 ret = class_register(&nvgpu_pci_class);
844 if (ret)
845 return ret;
846
847 return pci_register_driver(&nvgpu_pci_driver);
848}
849
850void __exit nvgpu_pci_exit(void)
851{
852 pci_unregister_driver(&nvgpu_pci_driver);
853 class_unregister(&nvgpu_pci_class);
854}
diff --git a/include/os/linux/pci.h b/include/os/linux/pci.h
deleted file mode 100644
index cc6b77b..0000000
--- a/include/os/linux/pci.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef NVGPU_PCI_H
17#define NVGPU_PCI_H
18
19#ifdef CONFIG_GK20A_PCI
20int nvgpu_pci_init(void);
21void nvgpu_pci_exit(void);
22#else
23static inline int nvgpu_pci_init(void) { return 0; }
24static inline void nvgpu_pci_exit(void) {}
25#endif
26
27#endif
diff --git a/include/os/linux/pci_usermode.c b/include/os/linux/pci_usermode.c
deleted file mode 100644
index 270b834..0000000
--- a/include/os/linux/pci_usermode.c
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/types.h>
15
16#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
17
18#include "os_linux.h"
19
20void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l)
21{
22 l->usermode_regs = l->regs + usermode_cfg0_r();
23 l->usermode_regs_saved = l->usermode_regs;
24}
diff --git a/include/os/linux/pci_usermode.h b/include/os/linux/pci_usermode.h
deleted file mode 100644
index 25a08d2..0000000
--- a/include/os/linux/pci_usermode.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_PCI_USERMODE_H__
17#define __NVGPU_PCI_USERMODE_H__
18
19struct nvgpu_os_linux;
20
21void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l);
22
23#endif
diff --git a/include/os/linux/platform_gk20a.h b/include/os/linux/platform_gk20a.h
deleted file mode 100644
index adec860..0000000
--- a/include/os/linux/platform_gk20a.h
+++ /dev/null
@@ -1,329 +0,0 @@
1/*
2 * GK20A Platform (SoC) Interface
3 *
4 * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _GK20A_PLATFORM_H_
17#define _GK20A_PLATFORM_H_
18
19#include <linux/device.h>
20
21#include <nvgpu/lock.h>
22#include <nvgpu/gk20a.h>
23
24#define GK20A_CLKS_MAX 4
25
26struct gk20a;
27struct channel_gk20a;
28struct gr_ctx_buffer_desc;
29struct gk20a_scale_profile;
30
31struct secure_page_buffer {
32 void (*destroy)(struct gk20a *, struct secure_page_buffer *);
33 size_t size;
34 dma_addr_t phys;
35 size_t used;
36};
37
38struct gk20a_platform {
39 /* Populated by the gk20a driver before probing the platform. */
40 struct gk20a *g;
41
42 /* Should be populated at probe. */
43 bool can_railgate_init;
44
45 /* Should be populated at probe. */
46 bool can_tpc_powergate;
47
48 /* Should be populated at probe. */
49 bool can_elpg_init;
50
51 /* Should be populated at probe. */
52 bool has_syncpoints;
53
54 /* channel limit after which to start aggressive sync destroy */
55 unsigned int aggressive_sync_destroy_thresh;
56
57 /* flag to set sync destroy aggressiveness */
58 bool aggressive_sync_destroy;
59
60 /* set if ASPM should be disabled on boot; only makes sense for PCI */
61 bool disable_aspm;
62
63 /* Set if the platform can unify the small/large address spaces. */
64 bool unify_address_spaces;
65
66 /* Clock configuration is stored here. Platform probe is responsible
67 * for filling this data. */
68 struct clk *clk[GK20A_CLKS_MAX];
69 int num_clks;
70 int maxmin_clk_id;
71
72#ifdef CONFIG_RESET_CONTROLLER
73 /* Reset control for device */
74 struct reset_control *reset_control;
75#endif
76 /* valid TPC-MASK */
77 u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS];
78
79 /* Delay before rail gated */
80 int railgate_delay_init;
81
82 /* init value for slowdown factor */
83 u8 ldiv_slowdown_factor_init;
84
85 /* Second Level Clock Gating: true = enable false = disable */
86 bool enable_slcg;
87
88 /* Block Level Clock Gating: true = enable flase = disable */
89 bool enable_blcg;
90
91 /* Engine Level Clock Gating: true = enable flase = disable */
92 bool enable_elcg;
93
94 /* Should be populated at probe. */
95 bool can_slcg;
96
97 /* Should be populated at probe. */
98 bool can_blcg;
99
100 /* Should be populated at probe. */
101 bool can_elcg;
102
103 /* Engine Level Power Gating: true = enable flase = disable */
104 bool enable_elpg;
105
106 /* Adaptative ELPG: true = enable flase = disable */
107 bool enable_aelpg;
108
109 /* PMU Perfmon: true = enable false = disable */
110 bool enable_perfmon;
111
112 /* Memory System Clock Gating: true = enable flase = disable*/
113 bool enable_mscg;
114
115 /* Timeout for per-channel watchdog (in mS) */
116 u32 ch_wdt_timeout_ms;
117
118 /* Disable big page support */
119 bool disable_bigpage;
120
121 /*
122 * gk20a_do_idle() API can take GPU either into rail gate or CAR reset
123 * This flag can be used to force CAR reset case instead of rail gate
124 */
125 bool force_reset_in_do_idle;
126
127 /* guest/vm id, needed for IPA to PA transation */
128 int vmid;
129
130 /* Initialize the platform interface of the gk20a driver.
131 *
132 * The platform implementation of this function must
133 * - set the power and clocks of the gk20a device to a known
134 * state, and
135 * - populate the gk20a_platform structure (a pointer to the
136 * structure can be obtained by calling gk20a_get_platform).
137 *
138 * After this function is finished, the driver will initialise
139 * pm runtime and genpd based on the platform configuration.
140 */
141 int (*probe)(struct device *dev);
142
143 /* Second stage initialisation - called once all power management
144 * initialisations are done.
145 */
146 int (*late_probe)(struct device *dev);
147
148 /* Remove device after power management has been done
149 */
150 int (*remove)(struct device *dev);
151
152 /* Poweron platform dependencies */
153 int (*busy)(struct device *dev);
154
155 /* Powerdown platform dependencies */
156 void (*idle)(struct device *dev);
157
158 /* Preallocated VPR buffer for kernel */
159 size_t secure_buffer_size;
160 struct secure_page_buffer secure_buffer;
161
162 /* Device is going to be suspended */
163 int (*suspend)(struct device *);
164
165 /* Device is going to be resumed */
166 int (*resume)(struct device *);
167
168 /* Called to turn off the device */
169 int (*railgate)(struct device *dev);
170
171 /* Called to turn on the device */
172 int (*unrailgate)(struct device *dev);
173 struct nvgpu_mutex railgate_lock;
174
175 /* Called to check state of device */
176 bool (*is_railgated)(struct device *dev);
177
178 /* get supported frequency list */
179 int (*get_clk_freqs)(struct device *pdev,
180 unsigned long **freqs, int *num_freqs);
181
182 /* clk related supported functions */
183 long (*clk_round_rate)(struct device *dev,
184 unsigned long rate);
185
186 /* Called to register GPCPLL with common clk framework */
187 int (*clk_register)(struct gk20a *g);
188
189 /* platform specific scale init quirks */
190 void (*initscale)(struct device *dev);
191
192 /* Postscale callback is called after frequency change */
193 void (*postscale)(struct device *dev,
194 unsigned long freq);
195
196 /* Pre callback is called before frequency change */
197 void (*prescale)(struct device *dev);
198
199 /* Set TPC_PG_MASK during probe */
200 void (*set_tpc_pg_mask)(struct device *dev, u32 tpc_pg_mask);
201
202 /* Devfreq governor name. If scaling is enabled, we request
203 * this governor to be used in scaling */
204 const char *devfreq_governor;
205
206 /* Quality of service notifier callback. If this is set, the scaling
207 * routines will register a callback to Qos. Each time we receive
208 * a new value, this callback gets called. */
209 int (*qos_notify)(struct notifier_block *nb,
210 unsigned long n, void *p);
211
212 /* Called as part of debug dump. If the gpu gets hung, this function
213 * is responsible for delivering all necessary debug data of other
214 * hw units which may interact with the gpu without direct supervision
215 * of the CPU.
216 */
217 void (*dump_platform_dependencies)(struct device *dev);
218
219 /* Defined when SMMU stage-2 is enabled, and we need to use physical
220 * addresses (not IPA). This is the case for GV100 nvlink in HV+L
221 * configuration, when dGPU is in pass-through mode.
222 */
223 u64 (*phys_addr)(struct gk20a *g, u64 ipa);
224
225 /* Callbacks to assert/deassert GPU reset */
226 int (*reset_assert)(struct device *dev);
227 int (*reset_deassert)(struct device *dev);
228 struct clk *clk_reset;
229 struct dvfs_rail *gpu_rail;
230
231 bool virtual_dev;
232#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
233 void *vgpu_priv;
234#endif
235 /* source frequency for ptimer in hz */
236 u32 ptimer_src_freq;
237
238#ifdef CONFIG_NVGPU_SUPPORT_CDE
239 bool has_cde;
240#endif
241
242 /* soc name for finding firmware files */
243 const char *soc_name;
244
245 /* false if vidmem aperture actually points to sysmem */
246 bool honors_aperture;
247 /* unified or split memory with separate vidmem? */
248 bool unified_memory;
249 /* WAR for gm20b chips. */
250 bool force_128K_pmu_vm;
251
252 /*
253 * DMA mask for Linux (both coh and non-coh). If not set defaults to
254 * 0x3ffffffff (i.e a 34 bit mask).
255 */
256 u64 dma_mask;
257
258 /* minimum supported VBIOS version */
259 u32 vbios_min_version;
260
261 /* true if we run preos microcode on this board */
262 bool run_preos;
263
264 /* true if we need to program sw threshold for
265 * power limits
266 */
267 bool hardcode_sw_threshold;
268
269 /* i2c device index, port and address for INA3221 */
270 u32 ina3221_dcb_index;
271 u32 ina3221_i2c_address;
272 u32 ina3221_i2c_port;
273
274 /* stream id to use */
275 u32 ltc_streamid;
276
277 /* synchronized access to platform->clk_get_freqs */
278 struct nvgpu_mutex clk_get_freq_lock;
279};
280
281static inline struct gk20a_platform *gk20a_get_platform(
282 struct device *dev)
283{
284 return (struct gk20a_platform *)dev_get_drvdata(dev);
285}
286
287#ifdef CONFIG_TEGRA_GK20A
288extern struct gk20a_platform gm20b_tegra_platform;
289extern struct gk20a_platform gp10b_tegra_platform;
290extern struct gk20a_platform gv11b_tegra_platform;
291#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
292extern struct gk20a_platform vgpu_tegra_platform;
293extern struct gk20a_platform gv11b_vgpu_tegra_platform;
294#endif
295#endif
296
297int gk20a_tegra_busy(struct device *dev);
298void gk20a_tegra_idle(struct device *dev);
299void gk20a_tegra_debug_dump(struct device *pdev);
300
301static inline struct gk20a *get_gk20a(struct device *dev)
302{
303 return gk20a_get_platform(dev)->g;
304}
305static inline struct gk20a *gk20a_from_dev(struct device *dev)
306{
307 if (!dev)
308 return NULL;
309
310 return ((struct gk20a_platform *)dev_get_drvdata(dev))->g;
311}
312static inline bool gk20a_gpu_is_virtual(struct device *dev)
313{
314 struct gk20a_platform *platform = dev_get_drvdata(dev);
315
316 return platform->virtual_dev;
317}
318
319static inline int support_gk20a_pmu(struct device *dev)
320{
321 if (IS_ENABLED(CONFIG_GK20A_PMU)) {
322 /* gPMU is not supported for vgpu */
323 return !gk20a_gpu_is_virtual(dev);
324 }
325
326 return 0;
327}
328
329#endif
diff --git a/include/os/linux/platform_gk20a_tegra.c b/include/os/linux/platform_gk20a_tegra.c
deleted file mode 100644
index c39e4f0..0000000
--- a/include/os/linux/platform_gk20a_tegra.c
+++ /dev/null
@@ -1,966 +0,0 @@
1/*
2 * GK20A Tegra Platform Interface
3 *
4 * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/clkdev.h>
17#include <linux/of_platform.h>
18#include <linux/debugfs.h>
19#include <linux/platform_data/tegra_edp.h>
20#include <linux/delay.h>
21#include <uapi/linux/nvgpu.h>
22#include <linux/dma-buf.h>
23#include <linux/dma-attrs.h>
24#include <linux/nvmap.h>
25#include <linux/reset.h>
26#if defined(CONFIG_TEGRA_DVFS)
27#include <linux/tegra_soctherm.h>
28#endif
29#include <linux/platform/tegra/common.h>
30#include <linux/platform/tegra/mc.h>
31#include <linux/clk/tegra.h>
32#if defined(CONFIG_COMMON_CLK)
33#include <soc/tegra/tegra-dvfs.h>
34#endif
35#ifdef CONFIG_TEGRA_BWMGR
36#include <linux/platform/tegra/emc_bwmgr.h>
37#endif
38
39#include <linux/platform/tegra/tegra_emc.h>
40#include <soc/tegra/chip-id.h>
41
42#include <nvgpu/kmem.h>
43#include <nvgpu/bug.h>
44#include <nvgpu/enabled.h>
45#include <nvgpu/gk20a.h>
46#include <nvgpu/nvhost.h>
47
48#include <nvgpu/linux/dma.h>
49
50#include "gm20b/clk_gm20b.h"
51
52#include "scale.h"
53#include "platform_gk20a.h"
54#include "clk.h"
55#include "os_linux.h"
56
57#include "../../../arch/arm/mach-tegra/iomap.h"
58#include <soc/tegra/pmc.h>
59
60#define TEGRA_GK20A_BW_PER_FREQ 32
61#define TEGRA_GM20B_BW_PER_FREQ 64
62#define TEGRA_DDR3_BW_PER_FREQ 16
63#define TEGRA_DDR4_BW_PER_FREQ 16
64#define MC_CLIENT_GPU 34
65#define PMC_GPU_RG_CNTRL_0 0x2d4
66
67#ifdef CONFIG_COMMON_CLK
68#define GPU_RAIL_NAME "vdd-gpu"
69#else
70#define GPU_RAIL_NAME "vdd_gpu"
71#endif
72
73extern struct device tegra_vpr_dev;
74
75#ifdef CONFIG_TEGRA_BWMGR
76struct gk20a_emc_params {
77 unsigned long bw_ratio;
78 unsigned long freq_last_set;
79 struct tegra_bwmgr_client *bwmgr_cl;
80};
81#else
82struct gk20a_emc_params {
83 unsigned long bw_ratio;
84 unsigned long freq_last_set;
85};
86#endif
87
88#define MHZ_TO_HZ(x) ((x) * 1000000)
89#define HZ_TO_MHZ(x) ((x) / 1000000)
90
91static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
92 struct secure_page_buffer *secure_buffer)
93{
94 DEFINE_DMA_ATTRS(attrs);
95 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
96 dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
97 (void *)(uintptr_t)secure_buffer->phys,
98 secure_buffer->phys, __DMA_ATTR(attrs));
99
100 secure_buffer->destroy = NULL;
101}
102
103static int gk20a_tegra_secure_alloc(struct gk20a *g,
104 struct gr_ctx_buffer_desc *desc,
105 size_t size)
106{
107 struct device *dev = dev_from_gk20a(g);
108 struct gk20a_platform *platform = dev_get_drvdata(dev);
109 struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
110 dma_addr_t phys;
111 struct sg_table *sgt;
112 struct page *page;
113 int err = 0;
114 size_t aligned_size = PAGE_ALIGN(size);
115
116 if (nvgpu_mem_is_valid(&desc->mem))
117 return 0;
118
119 /* We ran out of preallocated memory */
120 if (secure_buffer->used + aligned_size > secure_buffer->size) {
121 nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
122 size, secure_buffer->used, secure_buffer->size);
123 return -ENOMEM;
124 }
125
126 phys = secure_buffer->phys + secure_buffer->used;
127
128 sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
129 if (!sgt) {
130 nvgpu_err(platform->g, "failed to allocate memory");
131 return -ENOMEM;
132 }
133 err = sg_alloc_table(sgt, 1, GFP_KERNEL);
134 if (err) {
135 nvgpu_err(platform->g, "failed to allocate sg_table");
136 goto fail_sgt;
137 }
138 page = phys_to_page(phys);
139 sg_set_page(sgt->sgl, page, size, 0);
140 /* This bypasses SMMU for VPR during gmmu_map. */
141 sg_dma_address(sgt->sgl) = 0;
142
143 desc->destroy = NULL;
144
145 desc->mem.priv.sgt = sgt;
146 desc->mem.size = size;
147 desc->mem.aperture = APERTURE_SYSMEM;
148
149 secure_buffer->used += aligned_size;
150
151 return err;
152
153fail_sgt:
154 nvgpu_kfree(platform->g, sgt);
155 return err;
156}
157
158/*
159 * gk20a_tegra_get_emc_rate()
160 *
161 * This function returns the minimum emc clock based on gpu frequency
162 */
163
164static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
165 struct gk20a_emc_params *emc_params)
166{
167 unsigned long gpu_freq, gpu_fmax_at_vmin;
168 unsigned long emc_rate, emc_scale;
169
170 gpu_freq = clk_get_rate(g->clk.tegra_clk);
171 gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
172 clk_get_parent(g->clk.tegra_clk));
173
174 /* When scaling emc, account for the gpu load when the
175 * gpu frequency is less than or equal to fmax@vmin. */
176 if (gpu_freq <= gpu_fmax_at_vmin)
177 emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
178 else
179 emc_scale = g->emc3d_ratio;
180
181 emc_rate =
182 (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
183
184 return MHZ_TO_HZ(emc_rate);
185}
186
187/*
188 * gk20a_tegra_prescale(profile, freq)
189 *
190 * This function informs EDP about changed constraints.
191 */
192
193static void gk20a_tegra_prescale(struct device *dev)
194{
195 struct gk20a *g = get_gk20a(dev);
196 u32 avg = 0;
197
198 nvgpu_pmu_load_norm(g, &avg);
199 tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
200}
201
202/*
203 * gk20a_tegra_calibrate_emc()
204 *
205 */
206
207static void gk20a_tegra_calibrate_emc(struct device *dev,
208 struct gk20a_emc_params *emc_params)
209{
210 enum tegra_chipid cid = tegra_get_chip_id();
211 long gpu_bw, emc_bw;
212
213 /* store gpu bw based on soc */
214 switch (cid) {
215 case TEGRA210:
216 gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
217 break;
218 case TEGRA124:
219 case TEGRA132:
220 gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
221 break;
222 default:
223 gpu_bw = 0;
224 break;
225 }
226
227 /* TODO detect DDR type.
228 * Okay for now since DDR3 and DDR4 have the same BW ratio */
229 emc_bw = TEGRA_DDR3_BW_PER_FREQ;
230
231 /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
232 * NOTE the ratio must come out as an integer */
233 emc_params->bw_ratio = (gpu_bw / emc_bw);
234}
235
236#ifdef CONFIG_TEGRA_BWMGR
237#ifdef CONFIG_TEGRA_DVFS
238static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb)
239{
240 struct gk20a_scale_profile *profile = platform->g->scale_profile;
241 struct gk20a_emc_params *params;
242 unsigned long rate;
243
244 if (!profile || !profile->private_data)
245 return;
246
247 params = (struct gk20a_emc_params *)profile->private_data;
248 rate = (enb) ? params->freq_last_set : 0;
249 tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR);
250}
251#endif
252
253static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
254{
255 struct gk20a_platform *platform = dev_get_drvdata(dev);
256 struct gk20a_scale_profile *profile = platform->g->scale_profile;
257 struct gk20a_emc_params *emc_params;
258 unsigned long emc_rate;
259
260 if (!profile || !profile->private_data)
261 return;
262
263 emc_params = profile->private_data;
264 emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params);
265
266 if (emc_rate > tegra_bwmgr_get_max_emc_rate())
267 emc_rate = tegra_bwmgr_get_max_emc_rate();
268
269 emc_params->freq_last_set = emc_rate;
270 if (platform->is_railgated && platform->is_railgated(dev))
271 return;
272
273 tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate,
274 TEGRA_BWMGR_SET_EMC_FLOOR);
275
276}
277
278#endif
279
280#if defined(CONFIG_TEGRA_DVFS)
281/*
282 * gk20a_tegra_is_railgated()
283 *
284 * Check status of gk20a power rail
285 */
286
287static bool gk20a_tegra_is_railgated(struct device *dev)
288{
289 struct gk20a *g = get_gk20a(dev);
290 struct gk20a_platform *platform = dev_get_drvdata(dev);
291 bool ret = false;
292
293 if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
294 ret = !tegra_dvfs_is_rail_up(platform->gpu_rail);
295
296 return ret;
297}
298
299/*
300 * gm20b_tegra_railgate()
301 *
302 * Gate (disable) gm20b power rail
303 */
304
305static int gm20b_tegra_railgate(struct device *dev)
306{
307 struct gk20a *g = get_gk20a(dev);
308 struct gk20a_platform *platform = dev_get_drvdata(dev);
309 int ret = 0;
310
311 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) ||
312 !tegra_dvfs_is_rail_up(platform->gpu_rail))
313 return 0;
314
315 tegra_mc_flush(MC_CLIENT_GPU);
316
317 udelay(10);
318
319 /* enable clamp */
320 tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0);
321 tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
322
323 udelay(10);
324
325 platform->reset_assert(dev);
326
327 udelay(10);
328
329 /*
330 * GPCPLL is already disabled before entering this function; reference
331 * clocks are enabled until now - disable them just before rail gating
332 */
333 clk_disable_unprepare(platform->clk_reset);
334 clk_disable_unprepare(platform->clk[0]);
335 clk_disable_unprepare(platform->clk[1]);
336 if (platform->clk[3])
337 clk_disable_unprepare(platform->clk[3]);
338
339 udelay(10);
340
341 tegra_soctherm_gpu_tsens_invalidate(1);
342
343 if (tegra_dvfs_is_rail_up(platform->gpu_rail)) {
344 ret = tegra_dvfs_rail_power_down(platform->gpu_rail);
345 if (ret)
346 goto err_power_off;
347 } else
348 pr_info("No GPU regulator?\n");
349
350#ifdef CONFIG_TEGRA_BWMGR
351 gm20b_bwmgr_set_rate(platform, false);
352#endif
353
354 return 0;
355
356err_power_off:
357 nvgpu_err(platform->g, "Could not railgate GPU");
358 return ret;
359}
360
361
362/*
363 * gm20b_tegra_unrailgate()
364 *
365 * Ungate (enable) gm20b power rail
366 */
367
368static int gm20b_tegra_unrailgate(struct device *dev)
369{
370 struct gk20a_platform *platform = dev_get_drvdata(dev);
371 struct gk20a *g = platform->g;
372 int ret = 0;
373 bool first = false;
374
375 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
376 return 0;
377
378 ret = tegra_dvfs_rail_power_up(platform->gpu_rail);
379 if (ret)
380 return ret;
381
382#ifdef CONFIG_TEGRA_BWMGR
383 gm20b_bwmgr_set_rate(platform, true);
384#endif
385
386 tegra_soctherm_gpu_tsens_invalidate(0);
387
388 if (!platform->clk_reset) {
389 platform->clk_reset = clk_get(dev, "gpu_gate");
390 if (IS_ERR(platform->clk_reset)) {
391 nvgpu_err(g, "fail to get gpu reset clk");
392 goto err_clk_on;
393 }
394 }
395
396 if (!first) {
397 ret = clk_prepare_enable(platform->clk_reset);
398 if (ret) {
399 nvgpu_err(g, "could not turn on gpu_gate");
400 goto err_clk_on;
401 }
402
403 ret = clk_prepare_enable(platform->clk[0]);
404 if (ret) {
405 nvgpu_err(g, "could not turn on gpu pll");
406 goto err_clk_on;
407 }
408 ret = clk_prepare_enable(platform->clk[1]);
409 if (ret) {
410 nvgpu_err(g, "could not turn on pwr clock");
411 goto err_clk_on;
412 }
413
414 if (platform->clk[3]) {
415 ret = clk_prepare_enable(platform->clk[3]);
416 if (ret) {
417 nvgpu_err(g, "could not turn on fuse clock");
418 goto err_clk_on;
419 }
420 }
421 }
422
423 udelay(10);
424
425 platform->reset_assert(dev);
426
427 udelay(10);
428
429 tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0);
430 tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
431
432 udelay(10);
433
434 clk_disable(platform->clk_reset);
435 platform->reset_deassert(dev);
436 clk_enable(platform->clk_reset);
437
438 /* Flush MC after boot/railgate/SC7 */
439 tegra_mc_flush(MC_CLIENT_GPU);
440
441 udelay(10);
442
443 tegra_mc_flush_done(MC_CLIENT_GPU);
444
445 udelay(10);
446
447 return 0;
448
449err_clk_on:
450 tegra_dvfs_rail_power_down(platform->gpu_rail);
451
452 return ret;
453}
454#endif
455
456
457static struct {
458 char *name;
459 unsigned long default_rate;
460} tegra_gk20a_clocks[] = {
461 {"gpu_ref", UINT_MAX},
462 {"pll_p_out5", 204000000},
463 {"emc", UINT_MAX},
464 {"fuse", UINT_MAX},
465};
466
467
468
469/*
470 * gk20a_tegra_get_clocks()
471 *
472 * This function finds clocks in tegra platform and populates
473 * the clock information to gk20a platform data.
474 */
475
476static int gk20a_tegra_get_clocks(struct device *dev)
477{
478 struct gk20a_platform *platform = dev_get_drvdata(dev);
479 char devname[16];
480 unsigned int i;
481 int ret = 0;
482
483 BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks));
484
485 snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev));
486
487 platform->num_clks = 0;
488 for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
489 long rate = tegra_gk20a_clocks[i].default_rate;
490 struct clk *c;
491
492 c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
493 if (IS_ERR(c)) {
494 ret = PTR_ERR(c);
495 goto err_get_clock;
496 }
497 rate = clk_round_rate(c, rate);
498 clk_set_rate(c, rate);
499 platform->clk[i] = c;
500 }
501 platform->num_clks = i;
502
503 return 0;
504
505err_get_clock:
506
507 while (i--)
508 clk_put(platform->clk[i]);
509 return ret;
510}
511
512#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
513static int gm20b_tegra_reset_assert(struct device *dev)
514{
515 struct gk20a_platform *platform = gk20a_get_platform(dev);
516
517 if (!platform->reset_control) {
518 WARN(1, "Reset control not initialized\n");
519 return -ENOSYS;
520 }
521
522 return reset_control_assert(platform->reset_control);
523}
524
525static int gm20b_tegra_reset_deassert(struct device *dev)
526{
527 struct gk20a_platform *platform = gk20a_get_platform(dev);
528
529 if (!platform->reset_control) {
530 WARN(1, "Reset control not initialized\n");
531 return -ENOSYS;
532 }
533
534 return reset_control_deassert(platform->reset_control);
535}
536#endif
537
538static void gk20a_tegra_scale_init(struct device *dev)
539{
540 struct gk20a_platform *platform = gk20a_get_platform(dev);
541 struct gk20a_scale_profile *profile = platform->g->scale_profile;
542 struct gk20a_emc_params *emc_params;
543 struct gk20a *g = platform->g;
544
545 if (!profile)
546 return;
547
548 if (profile->private_data)
549 return;
550
551 emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params));
552 if (!emc_params)
553 return;
554
555 emc_params->freq_last_set = -1;
556 gk20a_tegra_calibrate_emc(dev, emc_params);
557
558#ifdef CONFIG_TEGRA_BWMGR
559 emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
560 if (!emc_params->bwmgr_cl) {
561 nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__);
562 return;
563 }
564#endif
565
566 profile->private_data = emc_params;
567}
568
569static void gk20a_tegra_scale_exit(struct device *dev)
570{
571 struct gk20a_platform *platform = dev_get_drvdata(dev);
572 struct gk20a_scale_profile *profile = platform->g->scale_profile;
573 struct gk20a_emc_params *emc_params;
574
575 if (!profile)
576 return;
577
578 emc_params = profile->private_data;
579#ifdef CONFIG_TEGRA_BWMGR
580 tegra_bwmgr_unregister(emc_params->bwmgr_cl);
581#endif
582
583 nvgpu_kfree(platform->g, profile->private_data);
584}
585
586void gk20a_tegra_debug_dump(struct device *dev)
587{
588#ifdef CONFIG_TEGRA_GK20A_NVHOST
589 struct gk20a_platform *platform = gk20a_get_platform(dev);
590 struct gk20a *g = platform->g;
591
592 if (g->nvhost_dev)
593 nvgpu_nvhost_debug_dump_device(g->nvhost_dev);
594#endif
595}
596
597int gk20a_tegra_busy(struct device *dev)
598{
599#ifdef CONFIG_TEGRA_GK20A_NVHOST
600 struct gk20a_platform *platform = gk20a_get_platform(dev);
601 struct gk20a *g = platform->g;
602
603 if (g->nvhost_dev)
604 return nvgpu_nvhost_module_busy_ext(g->nvhost_dev);
605#endif
606 return 0;
607}
608
609void gk20a_tegra_idle(struct device *dev)
610{
611#ifdef CONFIG_TEGRA_GK20A_NVHOST
612 struct gk20a_platform *platform = gk20a_get_platform(dev);
613 struct gk20a *g = platform->g;
614
615 if (g->nvhost_dev)
616 nvgpu_nvhost_module_idle_ext(g->nvhost_dev);
617#endif
618}
619
620int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
621{
622 struct gk20a *g = platform->g;
623 struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
624 DEFINE_DMA_ATTRS(attrs);
625 dma_addr_t iova;
626
627 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
628 return 0;
629
630 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
631 (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
632 GFP_KERNEL, __DMA_ATTR(attrs));
633 /* Some platforms disable VPR. In that case VPR allocations always
634 * fail. Just disable VPR usage in nvgpu in that case. */
635 if (dma_mapping_error(&tegra_vpr_dev, iova))
636 return 0;
637
638 secure_buffer->size = platform->secure_buffer_size;
639 secure_buffer->phys = iova;
640 secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
641
642 g->ops.secure_alloc = gk20a_tegra_secure_alloc;
643 __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
644
645 return 0;
646}
647
648#ifdef CONFIG_COMMON_CLK
649static struct clk *gk20a_clk_get(struct gk20a *g)
650{
651 if (!g->clk.tegra_clk) {
652 struct clk *clk, *clk_parent;
653 char clk_dev_id[32];
654 struct device *dev = dev_from_gk20a(g);
655
656 snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));
657
658 clk = clk_get_sys(clk_dev_id, "gpu");
659 if (IS_ERR(clk)) {
660 nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n",
661 clk_dev_id);
662 return NULL;
663 }
664
665 clk_parent = clk_get_parent(clk);
666 if (IS_ERR_OR_NULL(clk_parent)) {
667 nvgpu_err(g, "fail to get tegra gpu clk parent%s/gpu\n",
668 clk_dev_id);
669 return NULL;
670 }
671
672 g->clk.tegra_clk = clk;
673 g->clk.tegra_clk_parent = clk_parent;
674 }
675
676 return g->clk.tegra_clk;
677}
678
679static int gm20b_clk_prepare_ops(struct clk_hw *hw)
680{
681 struct clk_gk20a *clk = to_clk_gk20a(hw);
682 return gm20b_clk_prepare(clk);
683}
684
685static void gm20b_clk_unprepare_ops(struct clk_hw *hw)
686{
687 struct clk_gk20a *clk = to_clk_gk20a(hw);
688 gm20b_clk_unprepare(clk);
689}
690
691static int gm20b_clk_is_prepared_ops(struct clk_hw *hw)
692{
693 struct clk_gk20a *clk = to_clk_gk20a(hw);
694 return gm20b_clk_is_prepared(clk);
695}
696
697static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate)
698{
699 struct clk_gk20a *clk = to_clk_gk20a(hw);
700 return gm20b_recalc_rate(clk, parent_rate);
701}
702
703static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate,
704 unsigned long parent_rate)
705{
706 struct clk_gk20a *clk = to_clk_gk20a(hw);
707 return gm20b_gpcclk_set_rate(clk, rate, parent_rate);
708}
709
710static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate,
711 unsigned long *parent_rate)
712{
713 struct clk_gk20a *clk = to_clk_gk20a(hw);
714 return gm20b_round_rate(clk, rate, parent_rate);
715}
716
717static const struct clk_ops gm20b_clk_ops = {
718 .prepare = gm20b_clk_prepare_ops,
719 .unprepare = gm20b_clk_unprepare_ops,
720 .is_prepared = gm20b_clk_is_prepared_ops,
721 .recalc_rate = gm20b_recalc_rate_ops,
722 .set_rate = gm20b_gpcclk_set_rate_ops,
723 .round_rate = gm20b_round_rate_ops,
724};
725
726static int gm20b_register_gpcclk(struct gk20a *g)
727{
728 const char *parent_name = "pllg_ref";
729 struct clk_gk20a *clk = &g->clk;
730 struct clk_init_data init;
731 struct clk *c;
732 int err = 0;
733
734 /* make sure the clock is available */
735 if (!gk20a_clk_get(g))
736 return -ENOSYS;
737
738 err = gm20b_init_clk_setup_sw(g);
739 if (err)
740 return err;
741
742 init.name = "gpcclk";
743 init.ops = &gm20b_clk_ops;
744 init.parent_names = &parent_name;
745 init.num_parents = 1;
746 init.flags = 0;
747
748 /* Data in .init is copied by clk_register(), so stack variable OK */
749 clk->hw.init = &init;
750 c = clk_register(dev_from_gk20a(g), &clk->hw);
751 if (IS_ERR(c)) {
752 nvgpu_err(g, "Failed to register GPCPLL clock");
753 return -EINVAL;
754 }
755
756 clk->g = g;
757 clk_register_clkdev(c, "gpcclk", "gpcclk");
758
759 return err;
760}
761#endif /* CONFIG_COMMON_CLK */
762
763static int gk20a_tegra_probe(struct device *dev)
764{
765 struct gk20a_platform *platform = dev_get_drvdata(dev);
766 struct device_node *np = dev->of_node;
767 bool joint_xpu_rail = false;
768 int ret;
769 struct gk20a *g = platform->g;
770
771#ifdef CONFIG_COMMON_CLK
772 /* DVFS is not guaranteed to be initialized at the time of probe on
773 * kernels with Common Clock Framework enabled.
774 */
775 if (!platform->gpu_rail) {
776 platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME);
777 if (!platform->gpu_rail) {
778 nvgpu_log_info(g, "deferring probe no gpu_rail");
779 return -EPROBE_DEFER;
780 }
781 }
782
783 if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) {
784 nvgpu_log_info(g, "deferring probe gpu_rail not ready");
785 return -EPROBE_DEFER;
786 }
787#endif
788
789#ifdef CONFIG_TEGRA_GK20A_NVHOST
790 ret = nvgpu_get_nvhost_dev(platform->g);
791 if (ret)
792 return ret;
793#endif
794
795#ifdef CONFIG_OF
796 joint_xpu_rail = of_property_read_bool(of_chosen,
797 "nvidia,tegra-joint_xpu_rail");
798#endif
799
800 if (joint_xpu_rail) {
801 nvgpu_log_info(g, "XPU rails are joint\n");
802 platform->can_railgate_init = false;
803 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false);
804 }
805
806 platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
807 if (tegra_get_chip_id() == TEGRA210) {
808 /* WAR for bug 1547668: Disable railgating and scaling
809 irrespective of platform data if the rework was not made. */
810 np = of_find_node_by_path("/gpu-dvfs-rework");
811 if (!(np && of_device_is_available(np))) {
812 platform->devfreq_governor = "";
813 dev_warn(dev, "board does not support scaling");
814 }
815 platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1;
816 if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p)
817 platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1;
818 }
819
820 if (tegra_get_chip_id() == TEGRA132)
821 platform->soc_name = "tegra13x";
822
823 gk20a_tegra_get_clocks(dev);
824 nvgpu_linux_init_clk_support(platform->g);
825 ret = gk20a_tegra_init_secure_alloc(platform);
826 if (ret)
827 return ret;
828
829 if (platform->clk_register) {
830 ret = platform->clk_register(platform->g);
831 if (ret)
832 return ret;
833 }
834
835 return 0;
836}
837
838static int gk20a_tegra_late_probe(struct device *dev)
839{
840 return 0;
841}
842
843static int gk20a_tegra_remove(struct device *dev)
844{
845 /* deinitialise tegra specific scaling quirks */
846 gk20a_tegra_scale_exit(dev);
847
848#ifdef CONFIG_TEGRA_GK20A_NVHOST
849 nvgpu_free_nvhost_dev(get_gk20a(dev));
850#endif
851
852 return 0;
853}
854
855static int gk20a_tegra_suspend(struct device *dev)
856{
857 tegra_edp_notify_gpu_load(0, 0);
858 return 0;
859}
860
861#if defined(CONFIG_COMMON_CLK)
862static long gk20a_round_clk_rate(struct device *dev, unsigned long rate)
863{
864 struct gk20a_platform *platform = gk20a_get_platform(dev);
865 struct gk20a *g = platform->g;
866
867 /* make sure the clock is available */
868 if (!gk20a_clk_get(g))
869 return rate;
870
871 return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
872}
873
874static int gk20a_clk_get_freqs(struct device *dev,
875 unsigned long **freqs, int *num_freqs)
876{
877 struct gk20a_platform *platform = gk20a_get_platform(dev);
878 struct gk20a *g = platform->g;
879
880 /* make sure the clock is available */
881 if (!gk20a_clk_get(g))
882 return -ENOSYS;
883
884 return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
885 freqs, num_freqs);
886}
887#endif
888
889struct gk20a_platform gm20b_tegra_platform = {
890 .has_syncpoints = true,
891 .aggressive_sync_destroy_thresh = 64,
892
893 /* power management configuration */
894 .railgate_delay_init = 500,
895 .can_railgate_init = true,
896 .can_elpg_init = true,
897 .enable_slcg = true,
898 .enable_blcg = true,
899 .enable_elcg = true,
900 .can_slcg = true,
901 .can_blcg = true,
902 .can_elcg = true,
903 .enable_elpg = true,
904 .enable_aelpg = true,
905 .enable_perfmon = true,
906 .ptimer_src_freq = 19200000,
907
908 .force_reset_in_do_idle = false,
909
910 .ch_wdt_timeout_ms = 5000,
911
912 .probe = gk20a_tegra_probe,
913 .late_probe = gk20a_tegra_late_probe,
914 .remove = gk20a_tegra_remove,
915 /* power management callbacks */
916 .suspend = gk20a_tegra_suspend,
917
918#if defined(CONFIG_TEGRA_DVFS)
919 .railgate = gm20b_tegra_railgate,
920 .unrailgate = gm20b_tegra_unrailgate,
921 .is_railgated = gk20a_tegra_is_railgated,
922#endif
923
924 .busy = gk20a_tegra_busy,
925 .idle = gk20a_tegra_idle,
926
927#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
928 .reset_assert = gm20b_tegra_reset_assert,
929 .reset_deassert = gm20b_tegra_reset_deassert,
930#else
931 .reset_assert = gk20a_tegra_reset_assert,
932 .reset_deassert = gk20a_tegra_reset_deassert,
933#endif
934
935#if defined(CONFIG_COMMON_CLK)
936 .clk_round_rate = gk20a_round_clk_rate,
937 .get_clk_freqs = gk20a_clk_get_freqs,
938#endif
939
940#ifdef CONFIG_COMMON_CLK
941 .clk_register = gm20b_register_gpcclk,
942#endif
943
944 /* frequency scaling configuration */
945 .initscale = gk20a_tegra_scale_init,
946 .prescale = gk20a_tegra_prescale,
947#ifdef CONFIG_TEGRA_BWMGR
948 .postscale = gm20b_tegra_postscale,
949#endif
950 .devfreq_governor = "nvhost_podgov",
951 .qos_notify = gk20a_scale_qos_notify,
952
953 .dump_platform_dependencies = gk20a_tegra_debug_dump,
954
955#ifdef CONFIG_NVGPU_SUPPORT_CDE
956 .has_cde = true,
957#endif
958
959 .soc_name = "tegra21x",
960
961 .unified_memory = true,
962 .dma_mask = DMA_BIT_MASK(34),
963 .force_128K_pmu_vm = true,
964
965 .secure_buffer_size = 335872,
966};
diff --git a/include/os/linux/platform_gk20a_tegra.h b/include/os/linux/platform_gk20a_tegra.h
deleted file mode 100644
index f7d5040..0000000
--- a/include/os/linux/platform_gk20a_tegra.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * GK20A Platform (SoC) Interface
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
17#define _NVGPU_PLATFORM_GK20A_TEGRA_H_
18
19struct gk20a_platform;
20
21int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
22
23#endif
diff --git a/include/os/linux/platform_gp10b.h b/include/os/linux/platform_gp10b.h
deleted file mode 100644
index d256d12..0000000
--- a/include/os/linux/platform_gp10b.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * GP10B Platform (SoC) Interface
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _GP10B_PLATFORM_H_
26#define _GP10B_PLATFORM_H_
27
28struct device;
29
30int gp10b_tegra_get_clocks(struct device *dev);
31int gp10b_tegra_reset_assert(struct device *dev);
32int gp10b_tegra_reset_deassert(struct device *dev);
33void gp10b_tegra_scale_init(struct device *dev);
34long gp10b_round_clk_rate(struct device *dev, unsigned long rate);
35int gp10b_clk_get_freqs(struct device *dev,
36 unsigned long **freqs, int *num_freqs);
37void gp10b_tegra_prescale(struct device *dev);
38void gp10b_tegra_postscale(struct device *pdev, unsigned long freq);
39#endif
diff --git a/include/os/linux/platform_gp10b_tegra.c b/include/os/linux/platform_gp10b_tegra.c
deleted file mode 100644
index 9bf8d63..0000000
--- a/include/os/linux/platform_gp10b_tegra.c
+++ /dev/null
@@ -1,510 +0,0 @@
1/*
2 * GP10B Tegra Platform Interface
3 *
4 * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/of_platform.h>
17#include <linux/debugfs.h>
18#include <linux/dma-buf.h>
19#include <linux/nvmap.h>
20#include <linux/reset.h>
21#include <linux/platform/tegra/emc_bwmgr.h>
22
23#include <uapi/linux/nvgpu.h>
24
25#include <soc/tegra/tegra_bpmp.h>
26#include <soc/tegra/tegra_powergate.h>
27#include <soc/tegra/tegra-bpmp-dvfs.h>
28
29#include <dt-bindings/memory/tegra-swgroup.h>
30
31#include <nvgpu/kmem.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/enabled.h>
34#include <nvgpu/hashtable.h>
35#include <nvgpu/gk20a.h>
36#include <nvgpu/nvhost.h>
37
38#include "os_linux.h"
39
40#include "clk.h"
41
42#include "platform_gk20a.h"
43#include "platform_gk20a_tegra.h"
44#include "platform_gp10b.h"
45#include "platform_gp10b_tegra.h"
46#include "scale.h"
47
48/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
49#define GP10B_FREQ_SELECT_STEP 8
50/* Allow limited set of frequencies to be available */
51#define GP10B_NUM_SUPPORTED_FREQS 15
52/* Max number of freq supported in h/w */
53#define GP10B_MAX_SUPPORTED_FREQS 120
54static unsigned long
55gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP];
56
57static bool freq_table_init_complete;
58static int num_supported_freq;
59
60#define TEGRA_GP10B_BW_PER_FREQ 64
61#define TEGRA_DDR4_BW_PER_FREQ 16
62
63#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
64
65#define GPCCLK_INIT_RATE 1000000000
66
67static struct {
68 char *name;
69 unsigned long default_rate;
70} tegra_gp10b_clocks[] = {
71 {"gpu", GPCCLK_INIT_RATE},
72 {"gpu_sys", 204000000} };
73
74/*
75 * gp10b_tegra_get_clocks()
76 *
77 * This function finds clocks in tegra platform and populates
78 * the clock information to gp10b platform data.
79 */
80
81int gp10b_tegra_get_clocks(struct device *dev)
82{
83 struct gk20a_platform *platform = dev_get_drvdata(dev);
84 unsigned int i;
85
86 platform->num_clks = 0;
87 for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
88 long rate = tegra_gp10b_clocks[i].default_rate;
89 struct clk *c;
90
91 c = clk_get(dev, tegra_gp10b_clocks[i].name);
92 if (IS_ERR(c)) {
93 nvgpu_err(platform->g, "cannot get clock %s",
94 tegra_gp10b_clocks[i].name);
95 } else {
96 clk_set_rate(c, rate);
97 platform->clk[i] = c;
98 }
99 }
100 platform->num_clks = i;
101
102 if (platform->clk[0]) {
103 i = tegra_bpmp_dvfs_get_clk_id(dev->of_node,
104 tegra_gp10b_clocks[0].name);
105 if (i > 0)
106 platform->maxmin_clk_id = i;
107 }
108
109 return 0;
110}
111
112void gp10b_tegra_scale_init(struct device *dev)
113{
114 struct gk20a_platform *platform = gk20a_get_platform(dev);
115 struct gk20a_scale_profile *profile = platform->g->scale_profile;
116 struct tegra_bwmgr_client *bwmgr_handle;
117
118 if (!profile)
119 return;
120
121 if ((struct tegra_bwmgr_client *)profile->private_data)
122 return;
123
124 bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
125 if (!bwmgr_handle)
126 return;
127
128 profile->private_data = (void *)bwmgr_handle;
129}
130
131static void gp10b_tegra_scale_exit(struct device *dev)
132{
133 struct gk20a_platform *platform = gk20a_get_platform(dev);
134 struct gk20a_scale_profile *profile = platform->g->scale_profile;
135
136 if (profile && profile->private_data)
137 tegra_bwmgr_unregister(
138 (struct tegra_bwmgr_client *)profile->private_data);
139}
140
141static int gp10b_tegra_probe(struct device *dev)
142{
143 struct gk20a_platform *platform = dev_get_drvdata(dev);
144 bool joint_xpu_rail = false;
145 struct gk20a *g = platform->g;
146#ifdef CONFIG_TEGRA_GK20A_NVHOST
147 int ret;
148
149 ret = nvgpu_get_nvhost_dev(platform->g);
150 if (ret)
151 return ret;
152#endif
153
154 ret = gk20a_tegra_init_secure_alloc(platform);
155 if (ret)
156 return ret;
157
158 platform->disable_bigpage = !device_is_iommuable(dev);
159
160 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
161 = false;
162 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
163 = false;
164
165 platform->g->gr.ctx_vars.force_preemption_gfxp = false;
166 platform->g->gr.ctx_vars.force_preemption_cilp = false;
167
168#ifdef CONFIG_OF
169 joint_xpu_rail = of_property_read_bool(of_chosen,
170 "nvidia,tegra-joint_xpu_rail");
171#endif
172
173 if (joint_xpu_rail) {
174 nvgpu_log_info(g, "XPU rails are joint\n");
175 platform->can_railgate_init = false;
176 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false);
177 }
178
179 gp10b_tegra_get_clocks(dev);
180 nvgpu_linux_init_clk_support(platform->g);
181
182 nvgpu_mutex_init(&platform->clk_get_freq_lock);
183
184 platform->g->ops.clk.support_clk_freq_controller = true;
185
186 return 0;
187}
188
189static int gp10b_tegra_late_probe(struct device *dev)
190{
191 return 0;
192}
193
194static int gp10b_tegra_remove(struct device *dev)
195{
196 struct gk20a_platform *platform = gk20a_get_platform(dev);
197
198 /* deinitialise tegra specific scaling quirks */
199 gp10b_tegra_scale_exit(dev);
200
201#ifdef CONFIG_TEGRA_GK20A_NVHOST
202 nvgpu_free_nvhost_dev(get_gk20a(dev));
203#endif
204
205 nvgpu_mutex_destroy(&platform->clk_get_freq_lock);
206
207 return 0;
208}
209
210static bool gp10b_tegra_is_railgated(struct device *dev)
211{
212 bool ret = false;
213
214 if (tegra_bpmp_running())
215 ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU);
216
217 return ret;
218}
219
220static int gp10b_tegra_railgate(struct device *dev)
221{
222 struct gk20a_platform *platform = gk20a_get_platform(dev);
223 struct gk20a_scale_profile *profile = platform->g->scale_profile;
224
225 /* remove emc frequency floor */
226 if (profile)
227 tegra_bwmgr_set_emc(
228 (struct tegra_bwmgr_client *)profile->private_data,
229 0, TEGRA_BWMGR_SET_EMC_FLOOR);
230
231 if (tegra_bpmp_running() &&
232 tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) {
233 int i;
234 for (i = 0; i < platform->num_clks; i++) {
235 if (platform->clk[i])
236 clk_disable_unprepare(platform->clk[i]);
237 }
238 tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU);
239 }
240 return 0;
241}
242
243static int gp10b_tegra_unrailgate(struct device *dev)
244{
245 int ret = 0;
246 struct gk20a_platform *platform = gk20a_get_platform(dev);
247 struct gk20a_scale_profile *profile = platform->g->scale_profile;
248
249 if (tegra_bpmp_running()) {
250 int i;
251 ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU);
252 for (i = 0; i < platform->num_clks; i++) {
253 if (platform->clk[i])
254 clk_prepare_enable(platform->clk[i]);
255 }
256 }
257
258 /* to start with set emc frequency floor to max rate*/
259 if (profile)
260 tegra_bwmgr_set_emc(
261 (struct tegra_bwmgr_client *)profile->private_data,
262 tegra_bwmgr_get_max_emc_rate(),
263 TEGRA_BWMGR_SET_EMC_FLOOR);
264 return ret;
265}
266
267static int gp10b_tegra_suspend(struct device *dev)
268{
269 return 0;
270}
271
272int gp10b_tegra_reset_assert(struct device *dev)
273{
274 struct gk20a_platform *platform = gk20a_get_platform(dev);
275 int ret = 0;
276
277 if (!platform->reset_control)
278 return -EINVAL;
279
280 ret = reset_control_assert(platform->reset_control);
281
282 return ret;
283}
284
285int gp10b_tegra_reset_deassert(struct device *dev)
286{
287 struct gk20a_platform *platform = gk20a_get_platform(dev);
288 int ret = 0;
289
290 if (!platform->reset_control)
291 return -EINVAL;
292
293 ret = reset_control_deassert(platform->reset_control);
294
295 return ret;
296}
297
298void gp10b_tegra_prescale(struct device *dev)
299{
300 struct gk20a *g = get_gk20a(dev);
301 u32 avg = 0;
302
303 nvgpu_log_fn(g, " ");
304
305 nvgpu_pmu_load_norm(g, &avg);
306
307 nvgpu_log_fn(g, "done");
308}
309
310void gp10b_tegra_postscale(struct device *pdev,
311 unsigned long freq)
312{
313 struct gk20a_platform *platform = gk20a_get_platform(pdev);
314 struct gk20a_scale_profile *profile = platform->g->scale_profile;
315 struct gk20a *g = get_gk20a(pdev);
316 unsigned long emc_rate;
317
318 nvgpu_log_fn(g, " ");
319 if (profile && profile->private_data &&
320 !platform->is_railgated(pdev)) {
321 unsigned long emc_scale;
322
323 if (freq <= gp10b_freq_table[0])
324 emc_scale = 0;
325 else
326 emc_scale = g->emc3d_ratio;
327
328 emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000;
329
330 if (emc_rate > tegra_bwmgr_get_max_emc_rate())
331 emc_rate = tegra_bwmgr_get_max_emc_rate();
332
333 tegra_bwmgr_set_emc(
334 (struct tegra_bwmgr_client *)profile->private_data,
335 emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
336 }
337 nvgpu_log_fn(g, "done");
338}
339
340long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
341{
342 struct gk20a *g = get_gk20a(dev);
343 struct gk20a_scale_profile *profile = g->scale_profile;
344 unsigned long *freq_table = profile->devfreq_profile.freq_table;
345 int max_states = profile->devfreq_profile.max_state;
346 int i;
347
348 for (i = 0; i < max_states; ++i)
349 if (freq_table[i] >= rate)
350 return freq_table[i];
351
352 return freq_table[max_states - 1];
353}
354
355int gp10b_clk_get_freqs(struct device *dev,
356 unsigned long **freqs, int *num_freqs)
357{
358 struct gk20a_platform *platform = gk20a_get_platform(dev);
359 struct gk20a *g = platform->g;
360 unsigned long max_rate;
361 unsigned long new_rate = 0, prev_rate = 0;
362 int i, freq_counter = 0;
363 int sel_freq_cnt;
364 unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS];
365
366 nvgpu_mutex_acquire(&platform->clk_get_freq_lock);
367
368 if (freq_table_init_complete) {
369
370 *freqs = gp10b_freq_table;
371 *num_freqs = num_supported_freq;
372
373 nvgpu_mutex_release(&platform->clk_get_freq_lock);
374
375 return 0;
376 }
377
378 max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
379
380 /*
381 * Walk the h/w frequency table and update the local table
382 */
383 for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
384 prev_rate = new_rate;
385 new_rate = clk_round_rate(platform->clk[0],
386 prev_rate + 1);
387 loc_freq_table[i] = new_rate;
388 if (new_rate == max_rate)
389 break;
390 }
391 freq_counter = i + 1;
392 WARN_ON(freq_counter == GP10B_MAX_SUPPORTED_FREQS);
393
394 /*
395 * If the number of achievable frequencies is less than or
396 * equal to GP10B_NUM_SUPPORTED_FREQS, select all frequencies
397 * else, select one out of every 8 frequencies
398 */
399 if (freq_counter <= GP10B_NUM_SUPPORTED_FREQS) {
400 for (sel_freq_cnt = 0; sel_freq_cnt < freq_counter; ++sel_freq_cnt)
401 gp10b_freq_table[sel_freq_cnt] =
402 loc_freq_table[sel_freq_cnt];
403 } else {
404 /*
405 * Walk the h/w frequency table and only select
406 * GP10B_FREQ_SELECT_STEP'th frequencies and
407 * add MAX freq to last
408 */
409 sel_freq_cnt = 0;
410 for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
411 new_rate = loc_freq_table[i];
412
413 if (i % GP10B_FREQ_SELECT_STEP == 0 ||
414 new_rate == max_rate) {
415 gp10b_freq_table[sel_freq_cnt++] =
416 new_rate;
417
418 if (new_rate == max_rate)
419 break;
420 }
421 }
422 WARN_ON(sel_freq_cnt == GP10B_MAX_SUPPORTED_FREQS);
423 }
424
425 /* Fill freq table */
426 *freqs = gp10b_freq_table;
427 *num_freqs = sel_freq_cnt;
428 num_supported_freq = sel_freq_cnt;
429
430 freq_table_init_complete = true;
431
432 nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n",
433 gp10b_freq_table[0], max_rate, *num_freqs);
434
435 nvgpu_mutex_release(&platform->clk_get_freq_lock);
436
437 return 0;
438}
439
440struct gk20a_platform gp10b_tegra_platform = {
441 .has_syncpoints = true,
442
443 /* power management configuration */
444 .railgate_delay_init = 500,
445
446 /* ldiv slowdown factor */
447 .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16,
448
449 /* power management configuration */
450 .can_railgate_init = true,
451 .enable_elpg = true,
452 .can_elpg_init = true,
453 .enable_blcg = true,
454 .enable_slcg = true,
455 .enable_elcg = true,
456 .can_slcg = true,
457 .can_blcg = true,
458 .can_elcg = true,
459 .enable_aelpg = true,
460 .enable_perfmon = true,
461
462 /* ptimer src frequency in hz*/
463 .ptimer_src_freq = 31250000,
464
465 .ch_wdt_timeout_ms = 5000,
466
467 .probe = gp10b_tegra_probe,
468 .late_probe = gp10b_tegra_late_probe,
469 .remove = gp10b_tegra_remove,
470
471 /* power management callbacks */
472 .suspend = gp10b_tegra_suspend,
473 .railgate = gp10b_tegra_railgate,
474 .unrailgate = gp10b_tegra_unrailgate,
475 .is_railgated = gp10b_tegra_is_railgated,
476
477 .busy = gk20a_tegra_busy,
478 .idle = gk20a_tegra_idle,
479
480 .dump_platform_dependencies = gk20a_tegra_debug_dump,
481
482#ifdef CONFIG_NVGPU_SUPPORT_CDE
483 .has_cde = true,
484#endif
485
486 .clk_round_rate = gp10b_round_clk_rate,
487 .get_clk_freqs = gp10b_clk_get_freqs,
488
489 /* frequency scaling configuration */
490 .initscale = gp10b_tegra_scale_init,
491 .prescale = gp10b_tegra_prescale,
492 .postscale = gp10b_tegra_postscale,
493 .devfreq_governor = "nvhost_podgov",
494
495 .qos_notify = gk20a_scale_qos_notify,
496
497 .reset_assert = gp10b_tegra_reset_assert,
498 .reset_deassert = gp10b_tegra_reset_deassert,
499
500 .force_reset_in_do_idle = false,
501
502 .soc_name = "tegra18x",
503
504 .unified_memory = true,
505 .dma_mask = DMA_BIT_MASK(36),
506
507 .ltc_streamid = TEGRA_SID_GPUB,
508
509 .secure_buffer_size = 401408,
510};
diff --git a/include/os/linux/platform_gp10b_tegra.h b/include/os/linux/platform_gp10b_tegra.h
deleted file mode 100644
index 85b46b9..0000000
--- a/include/os/linux/platform_gp10b_tegra.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef _PLATFORM_GP10B_TEGRA_H_
18#define _PLATFORM_GP10B_TEGRA_H_
19
20#include "gp10b/gr_gp10b.h"
21
22#endif
diff --git a/include/os/linux/platform_gv11b_tegra.c b/include/os/linux/platform_gv11b_tegra.c
deleted file mode 100644
index 7900eaa..0000000
--- a/include/os/linux/platform_gv11b_tegra.c
+++ /dev/null
@@ -1,331 +0,0 @@
1/*
2 * GV11B Tegra Platform Interface
3 *
4 * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/of_platform.h>
20#include <linux/debugfs.h>
21#include <linux/dma-buf.h>
22#include <linux/nvmap.h>
23#include <linux/reset.h>
24#include <linux/hashtable.h>
25#include <linux/clk.h>
26#include <linux/platform/tegra/emc_bwmgr.h>
27
28#include <nvgpu/gk20a.h>
29#include <nvgpu/nvhost.h>
30
31#include <uapi/linux/nvgpu.h>
32
33#include <soc/tegra/tegra_bpmp.h>
34#include <soc/tegra/tegra_powergate.h>
35
36#include "platform_gk20a.h"
37#include "clk.h"
38#include "scale.h"
39
40#include "platform_gp10b.h"
41#include "platform_gp10b_tegra.h"
42
43#include "os_linux.h"
44#include "platform_gk20a_tegra.h"
45#include "gv11b/gr_gv11b.h"
46
47#define EMC3D_GV11B_RATIO 500
48
49void gv11b_tegra_scale_init(struct device *dev)
50{
51 struct gk20a_platform *platform = gk20a_get_platform(dev);
52 struct gk20a_scale_profile *profile = platform->g->scale_profile;
53
54 if (!profile)
55 return;
56
57 platform->g->emc3d_ratio = EMC3D_GV11B_RATIO;
58
59 gp10b_tegra_scale_init(dev);
60}
61
62static void gv11b_tegra_scale_exit(struct device *dev)
63{
64 struct gk20a_platform *platform = gk20a_get_platform(dev);
65 struct gk20a_scale_profile *profile = platform->g->scale_profile;
66
67 if (profile)
68 tegra_bwmgr_unregister(
69 (struct tegra_bwmgr_client *)profile->private_data);
70}
71
72static int gv11b_tegra_probe(struct device *dev)
73{
74 struct gk20a_platform *platform = dev_get_drvdata(dev);
75 int err;
76 bool joint_xpu_rail = false;
77 struct gk20a *g = platform->g;
78
79 err = nvgpu_nvhost_syncpt_init(platform->g);
80 if (err) {
81 if (err != -ENOSYS)
82 return err;
83 }
84
85 err = gk20a_tegra_init_secure_alloc(platform);
86 if (err)
87 return err;
88
89 platform->disable_bigpage = !device_is_iommuable(dev);
90
91 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
92 = false;
93 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
94 = false;
95
96 platform->g->gr.ctx_vars.force_preemption_gfxp = false;
97 platform->g->gr.ctx_vars.force_preemption_cilp = false;
98
99#ifdef CONFIG_OF
100 joint_xpu_rail = of_property_read_bool(of_chosen,
101 "nvidia,tegra-joint_xpu_rail");
102#endif
103
104 if (joint_xpu_rail) {
105 nvgpu_log_info(g, "XPU rails are joint\n");
106 platform->can_railgate_init = false;
107 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false);
108 }
109
110 gp10b_tegra_get_clocks(dev);
111 nvgpu_linux_init_clk_support(platform->g);
112
113 nvgpu_mutex_init(&platform->clk_get_freq_lock);
114
115 platform->g->ops.clk.support_clk_freq_controller = true;
116
117 return 0;
118}
119
120static int gv11b_tegra_late_probe(struct device *dev)
121{
122 return 0;
123}
124
125
126static int gv11b_tegra_remove(struct device *dev)
127{
128 struct gk20a_platform *platform = gk20a_get_platform(dev);
129
130 gv11b_tegra_scale_exit(dev);
131
132#ifdef CONFIG_TEGRA_GK20A_NVHOST
133 nvgpu_free_nvhost_dev(get_gk20a(dev));
134#endif
135
136 nvgpu_mutex_destroy(&platform->clk_get_freq_lock);
137
138 return 0;
139}
140
141static bool gv11b_tegra_is_railgated(struct device *dev)
142{
143 bool ret = false;
144#ifdef TEGRA194_POWER_DOMAIN_GPU
145 struct gk20a *g = get_gk20a(dev);
146
147 if (tegra_bpmp_running()) {
148 nvgpu_log(g, gpu_dbg_info, "bpmp running");
149 ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU);
150
151 nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no");
152 } else {
153 nvgpu_log(g, gpu_dbg_info, "bpmp not running");
154 }
155#endif
156 return ret;
157}
158
159static int gv11b_tegra_railgate(struct device *dev)
160{
161#ifdef TEGRA194_POWER_DOMAIN_GPU
162 struct gk20a_platform *platform = gk20a_get_platform(dev);
163 struct gk20a_scale_profile *profile = platform->g->scale_profile;
164 struct gk20a *g = get_gk20a(dev);
165 int i;
166
167 /* remove emc frequency floor */
168 if (profile)
169 tegra_bwmgr_set_emc(
170 (struct tegra_bwmgr_client *)profile->private_data,
171 0, TEGRA_BWMGR_SET_EMC_FLOOR);
172
173 if (tegra_bpmp_running()) {
174 nvgpu_log(g, gpu_dbg_info, "bpmp running");
175 if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) {
176 nvgpu_log(g, gpu_dbg_info, "powergate is not powered");
177 return 0;
178 }
179 nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare");
180 for (i = 0; i < platform->num_clks; i++) {
181 if (platform->clk[i])
182 clk_disable_unprepare(platform->clk[i]);
183 }
184 nvgpu_log(g, gpu_dbg_info, "powergate_partition");
185 tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU);
186 } else {
187 nvgpu_log(g, gpu_dbg_info, "bpmp not running");
188 }
189#endif
190 return 0;
191}
192
193static int gv11b_tegra_unrailgate(struct device *dev)
194{
195 int ret = 0;
196#ifdef TEGRA194_POWER_DOMAIN_GPU
197 struct gk20a_platform *platform = gk20a_get_platform(dev);
198 struct gk20a *g = get_gk20a(dev);
199 struct gk20a_scale_profile *profile = platform->g->scale_profile;
200 int i;
201
202 if (tegra_bpmp_running()) {
203 nvgpu_log(g, gpu_dbg_info, "bpmp running");
204 ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU);
205 if (ret) {
206 nvgpu_log(g, gpu_dbg_info,
207 "unpowergate partition failed");
208 return ret;
209 }
210 nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable");
211 for (i = 0; i < platform->num_clks; i++) {
212 if (platform->clk[i])
213 clk_prepare_enable(platform->clk[i]);
214 }
215 } else {
216 nvgpu_log(g, gpu_dbg_info, "bpmp not running");
217 }
218
219 /* to start with set emc frequency floor to max rate*/
220 if (profile)
221 tegra_bwmgr_set_emc(
222 (struct tegra_bwmgr_client *)profile->private_data,
223 tegra_bwmgr_get_max_emc_rate(),
224 TEGRA_BWMGR_SET_EMC_FLOOR);
225#endif
226 return ret;
227}
228
229static int gv11b_tegra_suspend(struct device *dev)
230{
231 return 0;
232}
233
234static bool is_tpc_mask_valid(struct gk20a_platform *platform, u32 tpc_pg_mask)
235{
236 u32 i;
237 bool valid = false;
238
239 for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) {
240 if (tpc_pg_mask == platform->valid_tpc_mask[i]) {
241 valid = true;
242 break;
243 }
244 }
245 return valid;
246}
247
248static void gv11b_tegra_set_tpc_pg_mask(struct device *dev, u32 tpc_pg_mask)
249{
250 struct gk20a_platform *platform = gk20a_get_platform(dev);
251 struct gk20a *g = get_gk20a(dev);
252
253 if (is_tpc_mask_valid(platform, tpc_pg_mask)) {
254 g->tpc_pg_mask = tpc_pg_mask;
255 }
256
257}
258
259struct gk20a_platform gv11b_tegra_platform = {
260 .has_syncpoints = true,
261
262 /* ptimer src frequency in hz*/
263 .ptimer_src_freq = 31250000,
264
265 .ch_wdt_timeout_ms = 5000,
266
267 .probe = gv11b_tegra_probe,
268 .late_probe = gv11b_tegra_late_probe,
269 .remove = gv11b_tegra_remove,
270 .railgate_delay_init = 500,
271 .can_railgate_init = true,
272
273 .can_tpc_powergate = true,
274 .valid_tpc_mask[0] = 0x0,
275 .valid_tpc_mask[1] = 0x1,
276 .valid_tpc_mask[2] = 0x2,
277 .valid_tpc_mask[3] = 0x4,
278 .valid_tpc_mask[4] = 0x8,
279 .valid_tpc_mask[5] = 0x5,
280 .valid_tpc_mask[6] = 0x6,
281 .valid_tpc_mask[7] = 0x9,
282 .valid_tpc_mask[8] = 0xa,
283
284 .set_tpc_pg_mask = gv11b_tegra_set_tpc_pg_mask,
285
286 .can_slcg = true,
287 .can_blcg = true,
288 .can_elcg = true,
289 .enable_slcg = true,
290 .enable_blcg = true,
291 .enable_elcg = true,
292 .enable_perfmon = true,
293
294 /* power management configuration */
295 .enable_elpg = true,
296 .can_elpg_init = true,
297 .enable_aelpg = true,
298
299 /* power management callbacks */
300 .suspend = gv11b_tegra_suspend,
301 .railgate = gv11b_tegra_railgate,
302 .unrailgate = gv11b_tegra_unrailgate,
303 .is_railgated = gv11b_tegra_is_railgated,
304
305 .busy = gk20a_tegra_busy,
306 .idle = gk20a_tegra_idle,
307
308 .clk_round_rate = gp10b_round_clk_rate,
309 .get_clk_freqs = gp10b_clk_get_freqs,
310
311 /* frequency scaling configuration */
312 .initscale = gv11b_tegra_scale_init,
313 .prescale = gp10b_tegra_prescale,
314 .postscale = gp10b_tegra_postscale,
315 .devfreq_governor = "nvhost_podgov",
316
317 .qos_notify = gk20a_scale_qos_notify,
318
319 .dump_platform_dependencies = gk20a_tegra_debug_dump,
320
321 .soc_name = "tegra19x",
322
323 .honors_aperture = true,
324 .unified_memory = true,
325 .dma_mask = DMA_BIT_MASK(38),
326
327 .reset_assert = gp10b_tegra_reset_assert,
328 .reset_deassert = gp10b_tegra_reset_deassert,
329
330 .secure_buffer_size = 667648,
331};
diff --git a/include/os/linux/rwsem.c b/include/os/linux/rwsem.c
deleted file mode 100644
index 297ddf1..0000000
--- a/include/os/linux/rwsem.c
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/rwsem.h>
15
16void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem)
17{
18 init_rwsem(&rwsem->rwsem);
19}
20
21void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem)
22{
23 up_read(&rwsem->rwsem);
24}
25
26void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem)
27{
28 down_read(&rwsem->rwsem);
29}
30
31void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem)
32{
33 up_write(&rwsem->rwsem);
34}
35
36void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem)
37{
38 down_write(&rwsem->rwsem);
39}
diff --git a/include/os/linux/scale.c b/include/os/linux/scale.c
deleted file mode 100644
index f8f0ef9..0000000
--- a/include/os/linux/scale.c
+++ /dev/null
@@ -1,428 +0,0 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2023, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/devfreq.h>
20#include <linux/export.h>
21#include <soc/tegra/chip-id.h>
22#include <linux/pm_qos.h>
23
24#include <governor.h>
25
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28#include <nvgpu/gk20a.h>
29#include <nvgpu/clk_arb.h>
30
31#include "platform_gk20a.h"
32#include "scale.h"
33#include "os_linux.h"
34
35/*
36 * gk20a_scale_qos_notify()
37 *
38 * This function is called when the minimum QoS requirement for the device
39 * has changed. The function calls postscaling callback if it is defined.
40 */
41
42#if defined(CONFIG_GK20A_PM_QOS) && defined(CONFIG_COMMON_CLK)
43int gk20a_scale_qos_notify(struct notifier_block *nb,
44 unsigned long n, void *p)
45{
46 struct gk20a_scale_profile *profile =
47 container_of(nb, struct gk20a_scale_profile,
48 qos_notify_block);
49 struct gk20a *g = get_gk20a(profile->dev);
50 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
51 struct devfreq *devfreq = l->devfreq;
52
53 if (!devfreq)
54 return NOTIFY_OK;
55
56 mutex_lock(&devfreq->lock);
57 /* check for pm_qos min and max frequency requirement */
58 profile->qos_min_freq =
59 (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
60 profile->qos_max_freq =
61 (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
62
63 if (profile->qos_min_freq > profile->qos_max_freq) {
64 nvgpu_err(g,
65 "QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
66 profile->qos_min_freq, profile->qos_max_freq);
67 profile->qos_min_freq = profile->qos_max_freq;
68 }
69
70 update_devfreq(devfreq);
71 mutex_unlock(&devfreq->lock);
72
73 return NOTIFY_OK;
74}
75#elif defined(CONFIG_GK20A_PM_QOS)
76int gk20a_scale_qos_notify(struct notifier_block *nb,
77 unsigned long n, void *p)
78{
79 struct gk20a_scale_profile *profile =
80 container_of(nb, struct gk20a_scale_profile,
81 qos_notify_block);
82 struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
83 struct gk20a *g = get_gk20a(profile->dev);
84 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
85 unsigned long freq;
86
87 if (!platform->postscale)
88 return NOTIFY_OK;
89
90 /* get the frequency requirement. if devfreq is enabled, check if it
91 * has higher demand than qos */
92 freq = platform->clk_round_rate(profile->dev,
93 (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
94 if (l->devfreq)
95 freq = max(l->devfreq->previous_freq, freq);
96
97 /* Update gpu load because we may scale the emc target
98 * if the gpu load changed. */
99 nvgpu_pmu_load_update(g);
100 platform->postscale(profile->dev, freq);
101
102 return NOTIFY_OK;
103}
104#else
105int gk20a_scale_qos_notify(struct notifier_block *nb,
106 unsigned long n, void *p)
107{
108 return 0;
109}
110#endif
111
112/*
113 * gk20a_scale_make_freq_table(profile)
114 *
115 * This function initialises the frequency table for the given device profile
116 */
117
118static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
119{
120 struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
121 int num_freqs, err;
122 unsigned long *freqs;
123
124 if (platform->get_clk_freqs) {
125 /* get gpu frequency table */
126 err = platform->get_clk_freqs(profile->dev, &freqs,
127 &num_freqs);
128
129 if (err)
130 return -ENOSYS;
131 } else
132 return -ENOSYS;
133
134 profile->devfreq_profile.freq_table = (unsigned long *)freqs;
135 profile->devfreq_profile.max_state = num_freqs;
136
137 return 0;
138}
139
140/*
141 * gk20a_scale_target(dev, *freq, flags)
142 *
143 * This function scales the clock
144 */
145
146static int gk20a_scale_target(struct device *dev, unsigned long *freq,
147 u32 flags)
148{
149 struct gk20a_platform *platform = dev_get_drvdata(dev);
150 struct gk20a *g = platform->g;
151 struct gk20a_scale_profile *profile = g->scale_profile;
152 unsigned long local_freq = *freq;
153 unsigned long rounded_rate;
154#ifdef CONFIG_GK20A_PM_QOS
155 unsigned long min_freq = 0, max_freq = 0;
156#endif
157
158 if (nvgpu_clk_arb_has_active_req(g))
159 return 0;
160
161#ifdef CONFIG_GK20A_PM_QOS
162 /*
163 * devfreq takes care of min/max freq clipping in update_devfreq() then
164 * invoked devfreq->profile->target(), thus we only need to do freq
165 * clipping based on pm_qos constraint
166 */
167 min_freq = profile->qos_min_freq;
168 max_freq = profile->qos_max_freq;
169
170 if (min_freq > max_freq)
171 min_freq = max_freq;
172
173 /* Clip requested frequency */
174 if (local_freq < min_freq)
175 local_freq = min_freq;
176
177 if (local_freq > max_freq)
178 local_freq = max_freq;
179#endif
180
181 /* set the final frequency */
182 rounded_rate = platform->clk_round_rate(dev, local_freq);
183
184 /* Check for duplicate request */
185 if (rounded_rate == g->last_freq)
186 return 0;
187
188 if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
189 *freq = rounded_rate;
190 else {
191 g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
192 *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
193 }
194
195 g->last_freq = *freq;
196
197 /* postscale will only scale emc (dram clock) if evaluating
198 * gk20a_tegra_get_emc_rate() produces a new or different emc
199 * target because the load or_and gpufreq has changed */
200 if (platform->postscale)
201 platform->postscale(dev, rounded_rate);
202
203 return 0;
204}
205
206/*
207 * update_load_estimate_busy_cycles(dev)
208 *
209 * Update load estimate using pmu idle counters. Result is normalised
210 * based on the time it was asked last time.
211 */
212
213static void update_load_estimate_busy_cycles(struct device *dev)
214{
215 struct gk20a *g = get_gk20a(dev);
216 struct gk20a_scale_profile *profile = g->scale_profile;
217 unsigned long dt;
218 u32 busy_cycles_norm;
219 ktime_t t;
220
221 t = ktime_get();
222 dt = ktime_us_delta(t, profile->last_event_time);
223
224 profile->dev_stat.total_time = dt;
225 profile->last_event_time = t;
226 nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm);
227 profile->dev_stat.busy_time =
228 (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX;
229}
230
231/*
232 * gk20a_scale_suspend(dev)
233 *
234 * This function informs devfreq of suspend
235 */
236
237void gk20a_scale_suspend(struct device *dev)
238{
239 struct gk20a *g = get_gk20a(dev);
240 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
241 struct devfreq *devfreq = l->devfreq;
242
243 if (!devfreq)
244 return;
245
246 devfreq_suspend_device(devfreq);
247}
248
249/*
250 * gk20a_scale_resume(dev)
251 *
252 * This functions informs devfreq of resume
253 */
254
255void gk20a_scale_resume(struct device *dev)
256{
257 struct gk20a *g = get_gk20a(dev);
258 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
259 struct devfreq *devfreq = l->devfreq;
260
261 if (!devfreq)
262 return;
263
264 g->last_freq = 0;
265 devfreq_resume_device(devfreq);
266}
267
268/*
269 * gk20a_scale_get_dev_status(dev, *stat)
270 *
271 * This function queries the current device status.
272 */
273
274static int gk20a_scale_get_dev_status(struct device *dev,
275 struct devfreq_dev_status *stat)
276{
277 struct gk20a *g = get_gk20a(dev);
278 struct gk20a_scale_profile *profile = g->scale_profile;
279 struct gk20a_platform *platform = dev_get_drvdata(dev);
280
281 /* inform edp about new constraint */
282 if (platform->prescale)
283 platform->prescale(dev);
284
285 /* Make sure there are correct values for the current frequency */
286 profile->dev_stat.current_frequency =
287 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
288
289 /* Update load estimate */
290 update_load_estimate_busy_cycles(dev);
291
292 /* Copy the contents of the current device status */
293 *stat = profile->dev_stat;
294
295 /* Finally, clear out the local values */
296 profile->dev_stat.total_time = 0;
297 profile->dev_stat.busy_time = 0;
298
299 return 0;
300}
301
302/*
303 * get_cur_freq(struct device *dev, unsigned long *freq)
304 *
305 * This function gets the current GPU clock rate.
306 */
307
308static int get_cur_freq(struct device *dev, unsigned long *freq)
309{
310 struct gk20a *g = get_gk20a(dev);
311 *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
312 return 0;
313}
314
315
316/*
317 * gk20a_scale_init(dev)
318 */
319
320void gk20a_scale_init(struct device *dev)
321{
322 struct gk20a_platform *platform = dev_get_drvdata(dev);
323 struct gk20a *g = platform->g;
324 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
325 struct gk20a_scale_profile *profile;
326 int err;
327
328 if (g->scale_profile)
329 return;
330
331 if (!platform->devfreq_governor && !platform->qos_notify)
332 return;
333
334 profile = nvgpu_kzalloc(g, sizeof(*profile));
335 if (!profile)
336 return;
337
338 profile->dev = dev;
339 profile->dev_stat.busy = false;
340
341 /* Create frequency table */
342 err = gk20a_scale_make_freq_table(profile);
343 if (err || !profile->devfreq_profile.max_state)
344 goto err_get_freqs;
345
346 profile->qos_min_freq = 0;
347 profile->qos_max_freq = UINT_MAX;
348
349 /* Store device profile so we can access it if devfreq governor
350 * init needs that */
351 g->scale_profile = profile;
352
353 if (platform->devfreq_governor) {
354 struct devfreq *devfreq;
355
356 profile->devfreq_profile.initial_freq =
357 profile->devfreq_profile.freq_table[0];
358 profile->devfreq_profile.target = gk20a_scale_target;
359 profile->devfreq_profile.get_dev_status =
360 gk20a_scale_get_dev_status;
361 profile->devfreq_profile.get_cur_freq = get_cur_freq;
362 profile->devfreq_profile.polling_ms = 25;
363
364 devfreq = devm_devfreq_add_device(dev,
365 &profile->devfreq_profile,
366 platform->devfreq_governor, NULL);
367
368 if (IS_ERR_OR_NULL(devfreq))
369 devfreq = NULL;
370
371 l->devfreq = devfreq;
372 }
373
374#ifdef CONFIG_GK20A_PM_QOS
375 /* Should we register QoS callback for this device? */
376 if (platform->qos_notify) {
377 profile->qos_notify_block.notifier_call =
378 platform->qos_notify;
379
380 pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
381 &profile->qos_notify_block);
382 pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
383 &profile->qos_notify_block);
384 }
385#endif
386
387 return;
388
389err_get_freqs:
390 nvgpu_kfree(g, profile);
391}
392
393void gk20a_scale_exit(struct device *dev)
394{
395 struct gk20a_platform *platform = dev_get_drvdata(dev);
396 struct gk20a *g = platform->g;
397
398#ifdef CONFIG_GK20A_PM_QOS
399 if (platform->qos_notify) {
400 pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
401 &g->scale_profile->qos_notify_block);
402 pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
403 &g->scale_profile->qos_notify_block);
404 }
405#endif
406
407 nvgpu_kfree(g, g->scale_profile);
408 g->scale_profile = NULL;
409}
410
411/*
412 * gk20a_scale_hw_init(dev)
413 *
414 * Initialize hardware portion of the device
415 */
416
417void gk20a_scale_hw_init(struct device *dev)
418{
419 struct gk20a_platform *platform = dev_get_drvdata(dev);
420 struct gk20a_scale_profile *profile = platform->g->scale_profile;
421
422 /* make sure that scaling has bee initialised */
423 if (!profile)
424 return;
425
426 profile->dev_stat.total_time = 0;
427 profile->last_event_time = ktime_get();
428}
diff --git a/include/os/linux/scale.h b/include/os/linux/scale.h
deleted file mode 100644
index c1e6fe8..0000000
--- a/include/os/linux/scale.h
+++ /dev/null
@@ -1,66 +0,0 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef GK20A_SCALE_H
20#define GK20A_SCALE_H
21
22#include <linux/devfreq.h>
23
24struct clk;
25
26struct gk20a_scale_profile {
27 struct device *dev;
28 ktime_t last_event_time;
29 struct devfreq_dev_profile devfreq_profile;
30 struct devfreq_dev_status dev_stat;
31 struct notifier_block qos_notify_block;
32 unsigned long qos_min_freq;
33 unsigned long qos_max_freq;
34 void *private_data;
35};
36
37/* Initialization and de-initialization for module */
38void gk20a_scale_init(struct device *);
39void gk20a_scale_exit(struct device *);
40void gk20a_scale_hw_init(struct device *dev);
41
42#if defined(CONFIG_GK20A_DEVFREQ)
43/*
44 * call when performing submit to notify scaling mechanism that the module is
45 * in use
46 */
47void gk20a_scale_notify_busy(struct device *);
48void gk20a_scale_notify_idle(struct device *);
49
50void gk20a_scale_suspend(struct device *);
51void gk20a_scale_resume(struct device *);
52int gk20a_scale_qos_notify(struct notifier_block *nb,
53 unsigned long n, void *p);
54#else
55static inline void gk20a_scale_notify_busy(struct device *dev) {}
56static inline void gk20a_scale_notify_idle(struct device *dev) {}
57static inline void gk20a_scale_suspend(struct device *dev) {}
58static inline void gk20a_scale_resume(struct device *dev) {}
59static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
60 unsigned long n, void *p)
61{
62 return -ENOSYS;
63}
64#endif
65
66#endif
diff --git a/include/os/linux/sched.c b/include/os/linux/sched.c
deleted file mode 100644
index 30c58a1..0000000
--- a/include/os/linux/sched.c
+++ /dev/null
@@ -1,666 +0,0 @@
1/*
2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#include <asm/barrier.h>
17#include <linux/wait.h>
18#include <linux/uaccess.h>
19#include <linux/poll.h>
20#include <uapi/linux/nvgpu.h>
21
22#include <nvgpu/kmem.h>
23#include <nvgpu/log.h>
24#include <nvgpu/bug.h>
25#include <nvgpu/barrier.h>
26#include <nvgpu/gk20a.h>
27
28#include "gk20a/gr_gk20a.h"
29#include "sched.h"
30#include "os_linux.h"
31#include "ioctl_tsg.h"
32
33#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
35
36ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
37 size_t size, loff_t *off)
38{
39 struct gk20a *g = filp->private_data;
40 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
41 struct nvgpu_sched_event_arg event = { 0 };
42 int err;
43
44 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched,
45 "filp=%p buf=%p size=%zu", filp, buf, size);
46
47 if (size < sizeof(event))
48 return -EINVAL;
49 size = sizeof(event);
50
51 nvgpu_mutex_acquire(&sched->status_lock);
52 while (!sched->status) {
53 nvgpu_mutex_release(&sched->status_lock);
54 if (filp->f_flags & O_NONBLOCK)
55 return -EAGAIN;
56 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq,
57 sched->status, 0);
58 if (err)
59 return err;
60 nvgpu_mutex_acquire(&sched->status_lock);
61 }
62
63 event.reserved = 0;
64 event.status = sched->status;
65
66 if (copy_to_user(buf, &event, size)) {
67 nvgpu_mutex_release(&sched->status_lock);
68 return -EFAULT;
69 }
70
71 sched->status = 0;
72
73 nvgpu_mutex_release(&sched->status_lock);
74
75 return size;
76}
77
78unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
79{
80 struct gk20a *g = filp->private_data;
81 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
82 unsigned int mask = 0;
83
84 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
85
86 nvgpu_mutex_acquire(&sched->status_lock);
87 poll_wait(filp, &sched->readout_wq.wq, wait);
88 if (sched->status)
89 mask |= POLLIN | POLLRDNORM;
90 nvgpu_mutex_release(&sched->status_lock);
91
92 return mask;
93}
94
95static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a *g,
96 struct nvgpu_sched_get_tsgs_args *arg)
97{
98 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
99
100 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
101 arg->size, arg->buffer);
102
103 if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
104 arg->size = sched->bitmap_size;
105 return -ENOSPC;
106 }
107
108 nvgpu_mutex_acquire(&sched->status_lock);
109 if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
110 sched->active_tsg_bitmap, sched->bitmap_size)) {
111 nvgpu_mutex_release(&sched->status_lock);
112 return -EFAULT;
113 }
114 nvgpu_mutex_release(&sched->status_lock);
115
116 return 0;
117}
118
119static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a *g,
120 struct nvgpu_sched_get_tsgs_args *arg)
121{
122 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
123
124 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
125 arg->size, arg->buffer);
126
127 if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
128 arg->size = sched->bitmap_size;
129 return -ENOSPC;
130 }
131
132 nvgpu_mutex_acquire(&sched->status_lock);
133 if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
134 sched->recent_tsg_bitmap, sched->bitmap_size)) {
135 nvgpu_mutex_release(&sched->status_lock);
136 return -EFAULT;
137 }
138
139 memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
140 nvgpu_mutex_release(&sched->status_lock);
141
142 return 0;
143}
144
145static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a *g,
146 struct nvgpu_sched_get_tsgs_by_pid_args *arg)
147{
148 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
149 struct fifo_gk20a *f = &g->fifo;
150 struct tsg_gk20a *tsg;
151 u64 *bitmap;
152 unsigned int tsgid;
153 /* pid at user level corresponds to kernel tgid */
154 pid_t tgid = (pid_t)arg->pid;
155 int err = 0;
156
157 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
158 (pid_t)arg->pid, arg->size, arg->buffer);
159
160 if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
161 arg->size = sched->bitmap_size;
162 return -ENOSPC;
163 }
164
165 bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
166 if (!bitmap)
167 return -ENOMEM;
168
169 nvgpu_mutex_acquire(&sched->status_lock);
170 for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
171 if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
172 tsg = &f->tsg[tsgid];
173 if (tsg->tgid == tgid)
174 NVGPU_SCHED_SET(tsgid, bitmap);
175 }
176 }
177 nvgpu_mutex_release(&sched->status_lock);
178
179 if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
180 bitmap, sched->bitmap_size))
181 err = -EFAULT;
182
183 nvgpu_kfree(g, bitmap);
184
185 return err;
186}
187
188static int gk20a_sched_dev_ioctl_get_params(struct gk20a *g,
189 struct nvgpu_sched_tsg_get_params_args *arg)
190{
191 struct fifo_gk20a *f = &g->fifo;
192 struct tsg_gk20a *tsg;
193 u32 tsgid = arg->tsgid;
194
195 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
196
197 if (tsgid >= f->num_channels)
198 return -EINVAL;
199
200 nvgpu_speculation_barrier();
201
202 tsg = &f->tsg[tsgid];
203 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
204 return -ENXIO;
205
206 arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */
207 arg->runlist_interleave = tsg->interleave_level;
208 arg->timeslice = gk20a_tsg_get_timeslice(tsg);
209
210 arg->graphics_preempt_mode =
211 tsg->gr_ctx.graphics_preempt_mode;
212 arg->compute_preempt_mode =
213 tsg->gr_ctx.compute_preempt_mode;
214
215 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
216
217 return 0;
218}
219
220static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
221 struct gk20a *g,
222 struct nvgpu_sched_tsg_timeslice_args *arg)
223{
224 struct fifo_gk20a *f = &g->fifo;
225 struct tsg_gk20a *tsg;
226 u32 tsgid = arg->tsgid;
227 int err;
228
229 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
230
231 if (tsgid >= f->num_channels)
232 return -EINVAL;
233
234 nvgpu_speculation_barrier();
235
236 tsg = &f->tsg[tsgid];
237 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
238 return -ENXIO;
239
240 err = gk20a_busy(g);
241 if (err)
242 goto done;
243
244 err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
245
246 gk20a_idle(g);
247
248done:
249 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
250
251 return err;
252}
253
254static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
255 struct gk20a *g,
256 struct nvgpu_sched_tsg_runlist_interleave_args *arg)
257{
258 struct fifo_gk20a *f = &g->fifo;
259 struct tsg_gk20a *tsg;
260 u32 tsgid = arg->tsgid;
261 int err;
262
263 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
264
265 if (tsgid >= f->num_channels)
266 return -EINVAL;
267
268 nvgpu_speculation_barrier();
269
270 tsg = &f->tsg[tsgid];
271 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
272 return -ENXIO;
273
274 err = gk20a_busy(g);
275 if (err)
276 goto done;
277
278 err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
279
280 gk20a_idle(g);
281
282done:
283 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
284
285 return err;
286}
287
288static int gk20a_sched_dev_ioctl_lock_control(struct gk20a *g)
289{
290 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
291
292 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
293
294 nvgpu_mutex_acquire(&sched->control_lock);
295 sched->control_locked = true;
296 nvgpu_mutex_release(&sched->control_lock);
297 return 0;
298}
299
300static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a *g)
301{
302 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
303
304 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
305
306 nvgpu_mutex_acquire(&sched->control_lock);
307 sched->control_locked = false;
308 nvgpu_mutex_release(&sched->control_lock);
309 return 0;
310}
311
312static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a *g,
313 struct nvgpu_sched_api_version_args *args)
314{
315 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
316
317 args->version = NVGPU_SCHED_API_VERSION;
318 return 0;
319}
320
321static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a *g,
322 struct nvgpu_sched_tsg_refcount_args *arg)
323{
324 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
325 struct fifo_gk20a *f = &g->fifo;
326 struct tsg_gk20a *tsg;
327 u32 tsgid = arg->tsgid;
328
329 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
330
331 if (tsgid >= f->num_channels)
332 return -EINVAL;
333
334 nvgpu_speculation_barrier();
335
336 tsg = &f->tsg[tsgid];
337 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
338 return -ENXIO;
339
340 nvgpu_mutex_acquire(&sched->status_lock);
341 if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
342 nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
343 /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */
344 nvgpu_mutex_release(&sched->status_lock);
345 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
346 return -ENXIO;
347 }
348
349 /* keep reference on TSG, will be released on
350 * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
351 */
352 NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
353 nvgpu_mutex_release(&sched->status_lock);
354
355 return 0;
356}
357
358static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a *g,
359 struct nvgpu_sched_tsg_refcount_args *arg)
360{
361 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
362 struct fifo_gk20a *f = &g->fifo;
363 struct tsg_gk20a *tsg;
364 u32 tsgid = arg->tsgid;
365
366 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
367
368 if (tsgid >= f->num_channels)
369 return -EINVAL;
370
371 nvgpu_speculation_barrier();
372
373 nvgpu_mutex_acquire(&sched->status_lock);
374 if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
375 nvgpu_mutex_release(&sched->status_lock);
376 nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid);
377 return -ENXIO;
378 }
379 NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
380 nvgpu_mutex_release(&sched->status_lock);
381
382 tsg = &f->tsg[tsgid];
383 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
384
385 return 0;
386}
387
388int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
389{
390 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
391 struct nvgpu_os_linux, sched.cdev);
392 struct gk20a *g;
393 struct nvgpu_sched_ctrl *sched;
394 int err = 0;
395
396 g = gk20a_get(&l->g);
397 if (!g)
398 return -ENODEV;
399 sched = &g->sched_ctrl;
400
401 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
402
403 if (!sched->sw_ready) {
404 err = gk20a_busy(g);
405 if (err)
406 goto free_ref;
407
408 gk20a_idle(g);
409 }
410
411 if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
412 err = -EBUSY;
413 goto free_ref;
414 }
415
416 memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
417 sched->bitmap_size);
418 memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size);
419
420 filp->private_data = g;
421 nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
422
423free_ref:
424 if (err)
425 gk20a_put(g);
426 return err;
427}
428
429long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
430 unsigned long arg)
431{
432 struct gk20a *g = filp->private_data;
433 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
434 int err = 0;
435
436 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
437
438 if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
439 (_IOC_NR(cmd) == 0) ||
440 (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
441 (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
442 return -EINVAL;
443
444 memset(buf, 0, sizeof(buf));
445 if (_IOC_DIR(cmd) & _IOC_WRITE) {
446 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
447 return -EFAULT;
448 }
449
450 nvgpu_speculation_barrier();
451 switch (cmd) {
452 case NVGPU_SCHED_IOCTL_GET_TSGS:
453 err = gk20a_sched_dev_ioctl_get_tsgs(g,
454 (struct nvgpu_sched_get_tsgs_args *)buf);
455 break;
456 case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
457 err = gk20a_sched_dev_ioctl_get_recent_tsgs(g,
458 (struct nvgpu_sched_get_tsgs_args *)buf);
459 break;
460 case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
461 err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(g,
462 (struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
463 break;
464 case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
465 err = gk20a_sched_dev_ioctl_get_params(g,
466 (struct nvgpu_sched_tsg_get_params_args *)buf);
467 break;
468 case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
469 err = gk20a_sched_dev_ioctl_tsg_set_timeslice(g,
470 (struct nvgpu_sched_tsg_timeslice_args *)buf);
471 break;
472 case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
473 err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(g,
474 (struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
475 break;
476 case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
477 err = gk20a_sched_dev_ioctl_lock_control(g);
478 break;
479 case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
480 err = gk20a_sched_dev_ioctl_unlock_control(g);
481 break;
482 case NVGPU_SCHED_IOCTL_GET_API_VERSION:
483 err = gk20a_sched_dev_ioctl_get_api_version(g,
484 (struct nvgpu_sched_api_version_args *)buf);
485 break;
486 case NVGPU_SCHED_IOCTL_GET_TSG:
487 err = gk20a_sched_dev_ioctl_get_tsg(g,
488 (struct nvgpu_sched_tsg_refcount_args *)buf);
489 break;
490 case NVGPU_SCHED_IOCTL_PUT_TSG:
491 err = gk20a_sched_dev_ioctl_put_tsg(g,
492 (struct nvgpu_sched_tsg_refcount_args *)buf);
493 break;
494 default:
495 nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
496 err = -ENOTTY;
497 }
498
499 /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
500 * purpose with NULL buffer and/or zero size to discover TSG bitmap
501 * size. We need to update user arguments in this case too, even
502 * if we return an error.
503 */
504 if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
505 if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
506 err = -EFAULT;
507 }
508
509 return err;
510}
511
512int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
513{
514 struct gk20a *g = filp->private_data;
515 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
516 struct fifo_gk20a *f = &g->fifo;
517 struct tsg_gk20a *tsg;
518 unsigned int tsgid;
519
520 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
521
522 /* release any reference to TSGs */
523 for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
524 if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
525 tsg = &f->tsg[tsgid];
526 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
527 }
528 }
529
530 /* unlock control */
531 nvgpu_mutex_acquire(&sched->control_lock);
532 sched->control_locked = false;
533 nvgpu_mutex_release(&sched->control_lock);
534
535 nvgpu_mutex_release(&sched->busy_lock);
536 gk20a_put(g);
537 return 0;
538}
539
540void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
541{
542 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
543 int err;
544
545 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
546
547 if (!sched->sw_ready) {
548 err = gk20a_busy(g);
549 if (err) {
550 WARN_ON(err);
551 return;
552 }
553
554 gk20a_idle(g);
555 }
556
557 nvgpu_mutex_acquire(&sched->status_lock);
558 NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
559 NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
560 sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
561 nvgpu_mutex_release(&sched->status_lock);
562 nvgpu_cond_signal_interruptible(&sched->readout_wq);
563}
564
565void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
566{
567 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
568
569 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
570
571 nvgpu_mutex_acquire(&sched->status_lock);
572 NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
573
574 /* clear recent_tsg_bitmap as well: if app manager did not
575 * notice that TSG was previously added, no need to notify it
576 * if the TSG has been released in the meantime. If the
577 * TSG gets reallocated, app manager will be notified as usual.
578 */
579 NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
580
581 /* do not set event_pending, we only want to notify app manager
582 * when TSGs are added, so that it can apply sched params
583 */
584 nvgpu_mutex_release(&sched->status_lock);
585}
586
587int gk20a_sched_ctrl_init(struct gk20a *g)
588{
589 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
590 struct fifo_gk20a *f = &g->fifo;
591 int err;
592
593 if (sched->sw_ready)
594 return 0;
595
596 sched->bitmap_size = roundup(f->num_channels, 64) / 8;
597 sched->status = 0;
598
599 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
600 g, sched, sched->bitmap_size);
601
602 sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
603 if (!sched->active_tsg_bitmap)
604 return -ENOMEM;
605
606 sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
607 if (!sched->recent_tsg_bitmap) {
608 err = -ENOMEM;
609 goto free_active;
610 }
611
612 sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
613 if (!sched->ref_tsg_bitmap) {
614 err = -ENOMEM;
615 goto free_recent;
616 }
617
618 nvgpu_cond_init(&sched->readout_wq);
619
620 err = nvgpu_mutex_init(&sched->status_lock);
621 if (err)
622 goto free_ref;
623
624 err = nvgpu_mutex_init(&sched->control_lock);
625 if (err)
626 goto free_status_lock;
627
628 err = nvgpu_mutex_init(&sched->busy_lock);
629 if (err)
630 goto free_control_lock;
631
632 sched->sw_ready = true;
633
634 return 0;
635
636free_control_lock:
637 nvgpu_mutex_destroy(&sched->control_lock);
638free_status_lock:
639 nvgpu_mutex_destroy(&sched->status_lock);
640free_ref:
641 nvgpu_kfree(g, sched->ref_tsg_bitmap);
642free_recent:
643 nvgpu_kfree(g, sched->recent_tsg_bitmap);
644free_active:
645 nvgpu_kfree(g, sched->active_tsg_bitmap);
646
647 return err;
648}
649
650void gk20a_sched_ctrl_cleanup(struct gk20a *g)
651{
652 struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
653
654 nvgpu_kfree(g, sched->active_tsg_bitmap);
655 nvgpu_kfree(g, sched->recent_tsg_bitmap);
656 nvgpu_kfree(g, sched->ref_tsg_bitmap);
657 sched->active_tsg_bitmap = NULL;
658 sched->recent_tsg_bitmap = NULL;
659 sched->ref_tsg_bitmap = NULL;
660
661 nvgpu_mutex_destroy(&sched->status_lock);
662 nvgpu_mutex_destroy(&sched->control_lock);
663 nvgpu_mutex_destroy(&sched->busy_lock);
664
665 sched->sw_ready = false;
666}
diff --git a/include/os/linux/sched.h b/include/os/linux/sched.h
deleted file mode 100644
index e88f37f..0000000
--- a/include/os/linux/sched.h
+++ /dev/null
@@ -1,36 +0,0 @@
1/*
2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_SCHED_H
17#define __NVGPU_SCHED_H
18
19struct gk20a;
20struct gpu_ops;
21struct tsg_gk20a;
22struct poll_table_struct;
23
24int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
25int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
26long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
27ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
28unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
29
30void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
31void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
32int gk20a_sched_ctrl_init(struct gk20a *);
33
34void gk20a_sched_ctrl_cleanup(struct gk20a *g);
35
36#endif /* __NVGPU_SCHED_H */
diff --git a/include/os/linux/sdl.c b/include/os/linux/sdl.c
deleted file mode 100644
index c4dccdc..0000000
--- a/include/os/linux/sdl.c
+++ /dev/null
@@ -1,341 +0,0 @@
1/*
2 * Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/gk20a.h>
18#include <nvgpu/types.h>
19#include <nvgpu/nvgpu_err.h>
20#include <nvgpu/timers.h>
21#include <nvgpu/bug.h>
22
23#include "ecc_linux.h"
24#include "os_linux.h"
25#include "module.h"
26
27/* This look-up table initializes the list of hw units and their errors.
28 * It also specifies the error injection mechanism supported, for each error.
29 * In case of hw error injection support, this initialization will be overriden
30 * by the values provided from the hal layes of corresponding hw units.
31 */
32static struct nvgpu_err_hw_module gv11b_err_lut[] = {
33 {
34 .name = "sm",
35 .hw_unit = (u32)NVGPU_ERR_MODULE_SM,
36 .num_errs = 21U,
37 .base_ecc_service_id =
38 NVGUARD_SERVICE_IGPU_SM_SWERR_L1_TAG_ECC_CORRECTED,
39 .errs = (struct nvgpu_err_desc[]) {
40 GPU_NONCRITERR("l1_tag_ecc_corrected",
41 GPU_SM_L1_TAG_ECC_CORRECTED, 0, 0),
42 GPU_CRITERR("l1_tag_ecc_uncorrected",
43 GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, 0),
44 GPU_NONCRITERR("cbu_ecc_corrected", 0, 0, 0),
45 GPU_CRITERR("cbu_ecc_uncorrected",
46 GPU_SM_CBU_ECC_UNCORRECTED, 0, 0),
47 GPU_NONCRITERR("lrf_ecc_corrected", 0, 0, 0),
48 GPU_CRITERR("lrf_ecc_uncorrected",
49 GPU_SM_LRF_ECC_UNCORRECTED, 0, 0),
50 GPU_NONCRITERR("l1_data_ecc_corrected", 0, 0, 0),
51 GPU_CRITERR("l1_data_ecc_uncorrected",
52 GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, 0),
53 GPU_NONCRITERR("icache_l0_data_ecc_corrected", 0, 0, 0),
54 GPU_CRITERR("icache_l0_data_ecc_uncorrected",
55 GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, 0),
56 GPU_NONCRITERR("icache_l1_data_ecc_corrected", 0, 0, 0),
57 GPU_CRITERR("icache_l1_data_ecc_uncorrected",
58 GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, 0),
59 GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", 0, 0, 0),
60 GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
61 GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, 0),
62 GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", 0, 0, 0),
63 GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
64 GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, 0),
65 GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", 0, 0, 0),
66 GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
67 GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, 0),
68 GPU_CRITERR("machine_check_error", 0, 0, 0),
69 GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", 0, 0, 0),
70 GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
71 GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, 0),
72 },
73 },
74 {
75 .name = "fecs",
76 .hw_unit = (u32)NVGPU_ERR_MODULE_FECS,
77 .num_errs = 4U,
78 .base_ecc_service_id =
79 NVGUARD_SERVICE_IGPU_FECS_SWERR_FALCON_IMEM_ECC_CORRECTED,
80 .errs = (struct nvgpu_err_desc[]) {
81 GPU_NONCRITERR("falcon_imem_ecc_corrected",
82 GPU_FECS_FALCON_IMEM_ECC_CORRECTED, 0, 0),
83 GPU_CRITERR("falcon_imem_ecc_uncorrected",
84 GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
85 GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
86 GPU_CRITERR("falcon_dmem_ecc_uncorrected",
87 GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
88 },
89 },
90 {
91 .name = "pmu",
92 .hw_unit = NVGPU_ERR_MODULE_PMU,
93 .num_errs = 4U,
94 .base_ecc_service_id =
95 NVGUARD_SERVICE_IGPU_PMU_SWERR_FALCON_IMEM_ECC_CORRECTED,
96 .errs = (struct nvgpu_err_desc[]) {
97 GPU_NONCRITERR("falcon_imem_ecc_corrected",
98 GPU_PMU_FALCON_IMEM_ECC_CORRECTED, 0, 0),
99 GPU_CRITERR("falcon_imem_ecc_uncorrected",
100 GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
101 GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
102 GPU_CRITERR("falcon_dmem_ecc_uncorrected",
103 GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
104 },
105 },
106};
107
108static void nvgpu_init_err_msg_header(struct gpu_err_header *header)
109{
110 header->version.major = (u16)1U;
111 header->version.minor = (u16)0U;
112 header->sub_err_type = 0U;
113 header->sub_unit_id = 0UL;
114 header->address = 0UL;
115 header->timestamp_ns = 0UL;
116}
117
118static void nvgpu_init_ecc_err_msg(struct gpu_ecc_error_info *err_info)
119{
120 nvgpu_init_err_msg_header(&err_info->header);
121 err_info->err_cnt = 0UL;
122}
123
124static void nvgpu_report_ecc_error_linux(struct gk20a *g, u32 hw_unit, u32 inst,
125 u32 err_id, u64 err_addr, u64 err_count)
126{
127 int err = 0;
128 u32 s_id = 0;
129 u8 err_status = 0;
130 u8 err_info_size = 0;
131 u64 timestamp = 0ULL;
132 int err_threshold_counter = 0;
133 struct gpu_ecc_error_info err_pkt;
134 struct nvgpu_err_desc *err_desc = NULL;
135 struct nvgpu_err_hw_module *hw_module = NULL;
136 nv_guard_request_t req;
137
138 memset(&req, 0, sizeof(req));
139 nvgpu_init_ecc_err_msg(&err_pkt);
140 if (hw_unit >= sizeof(gv11b_err_lut)/sizeof(gv11b_err_lut[0])) {
141 err = -EINVAL;
142 goto done;
143 }
144
145 hw_module = &gv11b_err_lut[hw_unit];
146 if (err_id >= hw_module->num_errs) {
147 nvgpu_err(g, "invalid err_id (%u) for hw module (%u)",
148 err_id, hw_module->hw_unit);
149 err = -EINVAL;
150 goto done;
151 }
152 err_desc = &hw_module->errs[err_id];
153 timestamp = (u64)nvgpu_current_time_ns();
154
155 err_pkt.header.timestamp_ns = timestamp;
156 err_pkt.header.sub_unit_id = inst;
157 err_pkt.header.address = err_addr;
158 err_pkt.err_cnt = err_count;
159 err_info_size = sizeof(err_pkt);
160
161 s_id = hw_module->base_ecc_service_id + err_id;
162
163 if (err_desc->is_critical) {
164 err_status = NVGUARD_ERROR_DETECTED;
165 } else {
166 err_status = NVGUARD_NO_ERROR;
167 }
168
169 nvgpu_atomic_inc(&err_desc->err_count);
170 err_threshold_counter = nvgpu_atomic_cmpxchg(&err_desc->err_count,
171 err_desc->err_threshold + 1, 0);
172
173 if (unlikely(err_threshold_counter != err_desc->err_threshold + 1)) {
174 goto done;
175 }
176
177 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting hw: %s, desc:%s, count:%llu",
178 hw_module->name, err_desc->name, err_count);
179
180 req.srv_id_cmd = NVGUARD_SERVICESTATUS_NOTIFICATION;
181 req.srv_status.srv_id = (nv_guard_service_id_t)s_id;
182 req.srv_status.status = err_status;
183 req.srv_status.timestamp = timestamp;
184 req.srv_status.error_info_size = err_info_size;
185 memcpy(req.srv_status.error_info, (u8*)&err_pkt, err_info_size);
186
187 /*
188 * l1ss_submit_rq may fail due to kmalloc failures but may pass in
189 * subsequent calls
190 */
191 err = l1ss_submit_rq(&req, true);
192 if (err != 0) {
193 nvgpu_err(g, "Error returned from L1SS submit %d", err);
194 }
195
196 if (err_desc->is_critical) {
197 nvgpu_quiesce(g);
198 }
199
200done:
201 return;
202}
203
204static void nvgpu_report_ecc_error_empty(struct gk20a *g, u32 hw_unit, u32 inst,
205 u32 err_id, u64 err_addr, u64 err_count) {
206 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting empty");
207}
208
209const struct nvgpu_ecc_reporting_ops default_disabled_ecc_report_ops = {
210 .report_ecc_err = nvgpu_report_ecc_error_empty,
211};
212
213const struct nvgpu_ecc_reporting_ops ecc_enable_report_ops = {
214 .report_ecc_err = nvgpu_report_ecc_error_linux,
215};
216
217static int nvgpu_l1ss_callback(l1ss_cli_callback_param param, void *data)
218{
219 struct gk20a *g = (struct gk20a *)data;
220 struct nvgpu_os_linux *l = NULL;
221 struct nvgpu_ecc_reporting_linux *ecc_reporting_linux = NULL;
222 int err = 0;
223 /* Ensure we have a valid gk20a struct before proceeding */
224 if ((g == NULL) || (gk20a_get(g) == NULL)) {
225 return -ENODEV;
226 }
227
228 l = nvgpu_os_linux_from_gk20a(g);
229 ecc_reporting_linux = &l->ecc_reporting_linux;
230
231 nvgpu_spinlock_acquire(&ecc_reporting_linux->common.lock);
232 if (param == L1SS_READY) {
233 if (!ecc_reporting_linux->common.ecc_reporting_service_enabled) {
234 ecc_reporting_linux->common.ecc_reporting_service_enabled = true;
235 ecc_reporting_linux->common.ops = &ecc_enable_report_ops;
236 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
237 }
238 } else if (param == L1SS_NOT_READY) {
239 if (ecc_reporting_linux->common.ecc_reporting_service_enabled) {
240 ecc_reporting_linux->common.ecc_reporting_service_enabled = false;
241 ecc_reporting_linux->common.ops = &default_disabled_ecc_report_ops;
242 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
243 }
244 } else {
245 err = -EINVAL;
246 }
247 nvgpu_spinlock_release(&ecc_reporting_linux->common.lock);
248
249 gk20a_put(g);
250
251 return err;
252}
253
254void nvgpu_init_ecc_reporting(struct gk20a *g)
255{
256 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
257 struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
258 int err = 0;
259 /* This will invoke the registration API */
260 nvgpu_spinlock_init(&ecc_report_linux->common.lock);
261 ecc_report_linux->priv.id = (NVGUARD_GROUPID_IGPU & NVGUARD_GROUPINDEX_FIELDMASK);
262 ecc_report_linux->priv.cli_callback = nvgpu_l1ss_callback;
263 ecc_report_linux->priv.data = g;
264 ecc_report_linux->common.ops = &default_disabled_ecc_report_ops;
265
266 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting Init");
267
268 /*
269 * err == 0 indicates service is available but not active yet.
270 * err == 1 indicates service is available and active
271 * error for other cases.
272 */
273 err = l1ss_register_client(&ecc_report_linux->priv);
274 if (err == 0) {
275 ecc_report_linux->common.ecc_reporting_service_enabled = false;
276 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init success");
277 } else if (err == 1) {
278 ecc_report_linux->common.ecc_reporting_service_enabled = true;
279 /* Actual Ops will be replaced during nvgpu_enable_ecc_reporting
280 * called as part of gk20a_busy()
281 */
282 } else {
283 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init failure %d", err);
284 }
285}
286
287void nvgpu_deinit_ecc_reporting(struct gk20a *g)
288{
289 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
290 struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
291
292 if (ecc_report_linux->common.ecc_reporting_service_enabled) {
293 ecc_report_linux->common.ecc_reporting_service_enabled = false;
294 l1ss_deregister_client(ecc_report_linux->priv.id);
295 memset(ecc_report_linux, 0, sizeof(*ecc_report_linux));
296 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting de-init success");
297 }
298
299}
300
301void nvgpu_enable_ecc_reporting(struct gk20a *g)
302{
303 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
304 struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
305 struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
306
307 nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
308 if (error_reporting->ecc_reporting_service_enabled) {
309 error_reporting->ops = &ecc_enable_report_ops;
310 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
311 }
312 nvgpu_spinlock_release(&ecc_report_linux->common.lock);
313}
314
315void nvgpu_disable_ecc_reporting(struct gk20a *g)
316{
317 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
318 struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
319 struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
320
321 nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
322 error_reporting->ops = &default_disabled_ecc_report_ops;
323 nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
324 nvgpu_spinlock_release(&ecc_report_linux->common.lock);
325}
326
327void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
328 u32 err_id, u64 err_addr, u64 err_count)
329{
330 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
331 struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
332 struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
333 void (*report_ecc_err_func)(struct gk20a *g, u32 hw_unit, u32 inst,
334 u32 err_id, u64 err_addr, u64 err_count);
335
336 nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
337 report_ecc_err_func = error_reporting->ops->report_ecc_err;
338 nvgpu_spinlock_release(&ecc_report_linux->common.lock);
339
340 report_ecc_err_func(g, hw_unit, inst, err_id, err_addr, err_count);
341}
diff --git a/include/os/linux/sim.c b/include/os/linux/sim.c
deleted file mode 100644
index 792ce80..0000000
--- a/include/os/linux/sim.c
+++ /dev/null
@@ -1,96 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/io.h>
18#include <linux/highmem.h>
19#include <linux/platform_device.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/linux/vm.h>
23#include <nvgpu/bitops.h>
24#include <nvgpu/nvgpu_mem.h>
25#include <nvgpu/dma.h>
26#include <nvgpu/soc.h>
27#include <nvgpu/hw_sim.h>
28#include <nvgpu/sim.h>
29#include <nvgpu/gk20a.h>
30
31#include "platform_gk20a.h"
32#include "os_linux.h"
33#include "module.h"
34
35void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v)
36{
37 struct sim_nvgpu_linux *sim_linux =
38 container_of(sim, struct sim_nvgpu_linux, sim);
39
40 writel(v, sim_linux->regs + r);
41}
42
43u32 sim_readl(struct sim_nvgpu *sim, u32 r)
44{
45 struct sim_nvgpu_linux *sim_linux =
46 container_of(sim, struct sim_nvgpu_linux, sim);
47
48 return readl(sim_linux->regs + r);
49}
50
51void nvgpu_remove_sim_support_linux(struct gk20a *g)
52{
53 struct sim_nvgpu_linux *sim_linux;
54
55 if (!g->sim)
56 return;
57
58 sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
59 if (sim_linux->regs) {
60 sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
61 iounmap(sim_linux->regs);
62 sim_linux->regs = NULL;
63 }
64 nvgpu_kfree(g, sim_linux);
65 g->sim = NULL;
66}
67
68int nvgpu_init_sim_support_linux(struct gk20a *g,
69 struct platform_device *dev)
70{
71 struct sim_nvgpu_linux *sim_linux;
72 int err = -ENOMEM;
73
74 if (!nvgpu_platform_is_simulation(g))
75 return 0;
76
77 sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
78 if (!sim_linux)
79 return err;
80 g->sim = &sim_linux->sim;
81 g->sim->g = g;
82 sim_linux->regs = nvgpu_devm_ioremap_resource(dev,
83 GK20A_SIM_IORESOURCE_MEM,
84 &sim_linux->reg_mem);
85 if (IS_ERR(sim_linux->regs)) {
86 nvgpu_err(g, "failed to remap gk20a sim regs");
87 err = PTR_ERR(sim_linux->regs);
88 goto fail;
89 }
90 sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux;
91 return 0;
92
93fail:
94 nvgpu_remove_sim_support_linux(g);
95 return err;
96}
diff --git a/include/os/linux/sim_pci.c b/include/os/linux/sim_pci.c
deleted file mode 100644
index 340f1fa..0000000
--- a/include/os/linux/sim_pci.c
+++ /dev/null
@@ -1,93 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/io.h>
18#include <linux/highmem.h>
19#include <linux/platform_device.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/linux/vm.h>
23#include <nvgpu/bitops.h>
24#include <nvgpu/nvgpu_mem.h>
25#include <nvgpu/dma.h>
26#include <nvgpu/hw_sim_pci.h>
27#include <nvgpu/sim.h>
28#include <nvgpu/io.h>
29#include <nvgpu/gk20a.h>
30
31#include "os_linux.h"
32#include "module.h"
33
34static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base)
35{
36 u32 cfg;
37 bool is_simulation = false;
38
39 cfg = nvgpu_readl(g, sim_base + sim_config_r());
40 if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v())
41 is_simulation = true;
42
43 return is_simulation;
44}
45
46void nvgpu_remove_sim_support_linux_pci(struct gk20a *g)
47{
48 struct sim_nvgpu_linux *sim_linux;
49 bool is_simulation;
50
51 is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
52
53 if (!is_simulation) {
54 return;
55 }
56
57 if (!g->sim) {
58 nvgpu_warn(g, "sim_gk20a not allocated");
59 return;
60 }
61 sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
62
63 if (sim_linux->regs) {
64 sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
65 sim_linux->regs = NULL;
66 }
67 nvgpu_kfree(g, sim_linux);
68 g->sim = NULL;
69}
70
71int nvgpu_init_sim_support_linux_pci(struct gk20a *g)
72{
73 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
74 struct sim_nvgpu_linux *sim_linux;
75 int err = -ENOMEM;
76 bool is_simulation;
77
78 is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
79 __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation);
80
81 if (!is_simulation)
82 return 0;
83
84 sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
85 if (!sim_linux)
86 return err;
87 g->sim = &sim_linux->sim;
88 g->sim->g = g;
89 sim_linux->regs = l->regs + sim_r();
90 sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci;
91
92 return 0;
93}
diff --git a/include/os/linux/soc.c b/include/os/linux/soc.c
deleted file mode 100644
index 1b27d6f..0000000
--- a/include/os/linux/soc.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <soc/tegra/chip-id.h>
15#include <soc/tegra/fuse.h>
16#include <soc/tegra/tegra_bpmp.h>
17#ifdef CONFIG_TEGRA_HV_MANAGER
18#include <soc/tegra/virt/syscalls.h>
19#endif
20
21#include <nvgpu/soc.h>
22#include "os_linux.h"
23#include "platform_gk20a.h"
24
25bool nvgpu_platform_is_silicon(struct gk20a *g)
26{
27 return tegra_platform_is_silicon();
28}
29
30bool nvgpu_platform_is_simulation(struct gk20a *g)
31{
32 return tegra_platform_is_vdk();
33}
34
35bool nvgpu_platform_is_fpga(struct gk20a *g)
36{
37 return tegra_platform_is_fpga();
38}
39
40bool nvgpu_is_hypervisor_mode(struct gk20a *g)
41{
42 return is_tegra_hypervisor_mode();
43}
44
45bool nvgpu_is_bpmp_running(struct gk20a *g)
46{
47 return tegra_bpmp_running();
48}
49
50bool nvgpu_is_soc_t194_a01(struct gk20a *g)
51{
52 return ((tegra_get_chip_id() == TEGRA194 &&
53 tegra_chip_get_revision() == TEGRA194_REVISION_A01) ?
54 true : false);
55}
56
57#ifdef CONFIG_TEGRA_HV_MANAGER
58/* When nvlink is enabled on dGPU, we need to use physical memory addresses.
59 * There is no SMMU translation. However, the device initially enumerates as a
60 * PCIe device. As such, when allocation memory for this PCIe device, the DMA
61 * framework ends up allocating memory using SMMU (if enabled in device tree).
62 * As a result, when we switch to nvlink, we need to use underlying physical
63 * addresses, even if memory mappings exist in SMMU.
64 * In addition, when stage-2 SMMU translation is enabled (for instance when HV
65 * is enabled), the addresses we get from dma_alloc are IPAs. We need to
66 * convert them to PA.
67 */
68static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa)
69{
70 struct device *dev = dev_from_gk20a(g);
71 struct gk20a_platform *platform = gk20a_get_platform(dev);
72 struct hyp_ipa_pa_info info;
73 int err;
74 u64 pa = 0ULL;
75
76 err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa);
77 if (err < 0) {
78 /* WAR for bug 2096877
79 * hyp_read_ipa_pa_info only looks up RAM mappings.
80 * assume one to one IPA:PA mapping for syncpt aperture
81 */
82 u64 start = g->syncpt_unit_base;
83 u64 end = g->syncpt_unit_base + g->syncpt_unit_size;
84 if ((ipa >= start) && (ipa < end)) {
85 pa = ipa;
86 nvgpu_log(g, gpu_dbg_map_v,
87 "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n",
88 ipa, platform->vmid, pa);
89 } else {
90 nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d",
91 ipa, platform->vmid, err);
92 }
93 } else {
94 pa = info.base + info.offset;
95 nvgpu_log(g, gpu_dbg_map_v,
96 "ipa=%llx vmid=%d -> pa=%llx "
97 "base=%llx offset=%llx size=%llx\n",
98 ipa, platform->vmid, pa, info.base,
99 info.offset, info.size);
100 }
101 return pa;
102}
103#endif
104
105int nvgpu_init_soc_vars(struct gk20a *g)
106{
107#ifdef CONFIG_TEGRA_HV_MANAGER
108 struct device *dev = dev_from_gk20a(g);
109 struct gk20a_platform *platform = gk20a_get_platform(dev);
110 int err;
111
112 if (nvgpu_is_hypervisor_mode(g)) {
113 err = hyp_read_gid(&platform->vmid);
114 if (err) {
115 nvgpu_err(g, "failed to read vmid");
116 return err;
117 }
118 platform->phys_addr = nvgpu_tegra_hv_ipa_pa;
119 }
120#endif
121 return 0;
122}
diff --git a/include/os/linux/sync_sema_android.c b/include/os/linux/sync_sema_android.c
deleted file mode 100644
index 59e3b7a..0000000
--- a/include/os/linux/sync_sema_android.c
+++ /dev/null
@@ -1,418 +0,0 @@
1/*
2 * Semaphore Sync Framework Integration
3 *
4 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#include <linux/file.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/module.h>
22#include <nvgpu/lock.h>
23
24#include <nvgpu/kmem.h>
25#include <nvgpu/semaphore.h>
26#include <nvgpu/bug.h>
27#include <nvgpu/kref.h>
28#include <nvgpu/channel.h>
29#include "../linux/channel.h"
30
31#include "../drivers/staging/android/sync.h"
32
33#include "sync_sema_android.h"
34
35static const struct sync_timeline_ops gk20a_sync_timeline_ops;
36
37struct gk20a_sync_timeline {
38 struct sync_timeline obj;
39 u32 max;
40 u32 min;
41};
42
43/**
44 * The sync framework dups pts when merging fences. We share a single
45 * refcounted gk20a_sync_pt for each duped pt.
46 */
47struct gk20a_sync_pt {
48 struct gk20a *g;
49 struct nvgpu_ref refcount;
50 u32 thresh;
51 struct nvgpu_semaphore *sema;
52 struct gk20a_sync_timeline *obj;
53
54 /*
55 * Use a spin lock here since it will have better performance
56 * than a mutex - there should be very little contention on this
57 * lock.
58 */
59 struct nvgpu_spinlock lock;
60};
61
62struct gk20a_sync_pt_inst {
63 struct sync_pt pt;
64 struct gk20a_sync_pt *shared;
65};
66
67/**
68 * Compares sync pt values a and b, both of which will trigger either before
69 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
70 * ref). Supplying ref allows us to handle wrapping correctly.
71 *
72 * Returns -1 if a < b (a triggers before b)
73 * 0 if a = b (a and b trigger at the same time)
74 * 1 if a > b (b triggers before a)
75 */
76static int __gk20a_sync_pt_compare_ref(
77 u32 ref,
78 u32 a,
79 u32 b)
80{
81 /*
82 * We normalize both a and b by subtracting ref from them.
83 * Denote the normalized values by a_n and b_n. Note that because
84 * of wrapping, a_n and/or b_n may be negative.
85 *
86 * The normalized values a_n and b_n satisfy:
87 * - a positive value triggers before a negative value
88 * - a smaller positive value triggers before a greater positive value
89 * - a smaller negative value (greater in absolute value) triggers
90 * before a greater negative value (smaller in absolute value).
91 *
92 * Thus we can just stick to unsigned arithmetic and compare
93 * (u32)a_n to (u32)b_n.
94 *
95 * Just to reiterate the possible cases:
96 *
97 * 1A) ...ref..a....b....
98 * 1B) ...ref..b....a....
99 * 2A) ...b....ref..a.... b_n < 0
100 * 2B) ...a....ref..b.... a_n > 0
101 * 3A) ...a....b....ref.. a_n < 0, b_n < 0
102 * 3A) ...b....a....ref.. a_n < 0, b_n < 0
103 */
104 u32 a_n = a - ref;
105 u32 b_n = b - ref;
106 if (a_n < b_n)
107 return -1;
108 else if (a_n > b_n)
109 return 1;
110 else
111 return 0;
112}
113
114static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
115{
116 struct gk20a_sync_pt_inst *pti =
117 container_of(pt, struct gk20a_sync_pt_inst, pt);
118 return pti->shared;
119}
120static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
121{
122 if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
123 return NULL;
124 return (struct gk20a_sync_timeline *)obj;
125}
126
127static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
128{
129 struct gk20a_sync_pt *pt =
130 container_of(ref, struct gk20a_sync_pt, refcount);
131 struct gk20a *g = pt->g;
132
133 if (pt->sema)
134 nvgpu_semaphore_put(pt->sema);
135 nvgpu_kfree(g, pt);
136}
137
138static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
139 struct gk20a *g,
140 struct gk20a_sync_timeline *obj,
141 struct nvgpu_semaphore *sema)
142{
143 struct gk20a_sync_pt *shared;
144
145 shared = nvgpu_kzalloc(g, sizeof(*shared));
146 if (!shared)
147 return NULL;
148
149 nvgpu_ref_init(&shared->refcount);
150 shared->g = g;
151 shared->obj = obj;
152 shared->sema = sema;
153 shared->thresh = ++obj->max; /* sync framework has a lock */
154
155 nvgpu_spinlock_init(&shared->lock);
156
157 nvgpu_semaphore_get(sema);
158
159 return shared;
160}
161
162static struct sync_pt *gk20a_sync_pt_create_inst(
163 struct gk20a *g,
164 struct gk20a_sync_timeline *obj,
165 struct nvgpu_semaphore *sema)
166{
167 struct gk20a_sync_pt_inst *pti;
168
169 pti = (struct gk20a_sync_pt_inst *)
170 sync_pt_create(&obj->obj, sizeof(*pti));
171 if (!pti)
172 return NULL;
173
174 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
175 if (!pti->shared) {
176 sync_pt_free(&pti->pt);
177 return NULL;
178 }
179 return &pti->pt;
180}
181
182static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
183{
184 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
185 if (pt)
186 nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
187}
188
189static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
190{
191 struct gk20a_sync_pt_inst *pti;
192 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
193
194 pti = (struct gk20a_sync_pt_inst *)
195 sync_pt_create(&pt->obj->obj, sizeof(*pti));
196 if (!pti)
197 return NULL;
198 pti->shared = pt;
199 nvgpu_ref_get(&pt->refcount);
200 return &pti->pt;
201}
202
203/*
204 * This function must be able to run on the same sync_pt concurrently. This
205 * requires a lock to protect access to the sync_pt's internal data structures
206 * which are modified as a side effect of calling this function.
207 */
208static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
209{
210 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
211 struct gk20a_sync_timeline *obj = pt->obj;
212 bool signaled = true;
213
214 nvgpu_spinlock_acquire(&pt->lock);
215 if (!pt->sema)
216 goto done;
217
218 /* Acquired == not realeased yet == active == not signaled. */
219 signaled = !nvgpu_semaphore_is_acquired(pt->sema);
220
221 if (signaled) {
222 /* Update min if necessary. */
223 if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
224 obj->min) == 1)
225 obj->min = pt->thresh;
226
227 /* Release the semaphore to the pool. */
228 nvgpu_semaphore_put(pt->sema);
229 pt->sema = NULL;
230 }
231done:
232 nvgpu_spinlock_release(&pt->lock);
233
234 return signaled;
235}
236
237static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
238{
239 bool a_expired;
240 bool b_expired;
241 struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
242 struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
243
244 if (WARN_ON(pt_a->obj != pt_b->obj))
245 return 0;
246
247 /* Early out */
248 if (a == b)
249 return 0;
250
251 a_expired = gk20a_sync_pt_has_signaled(a);
252 b_expired = gk20a_sync_pt_has_signaled(b);
253 if (a_expired && !b_expired) {
254 /* Easy, a was earlier */
255 return -1;
256 } else if (!a_expired && b_expired) {
257 /* Easy, b was earlier */
258 return 1;
259 }
260
261 /* Both a and b are expired (trigger before min) or not
262 * expired (trigger after min), so we can use min
263 * as a reference value for __gk20a_sync_pt_compare_ref.
264 */
265 return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
266 pt_a->thresh, pt_b->thresh);
267}
268
269static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
270{
271 return obj->min;
272}
273
274static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
275 char *str, int size)
276{
277 struct gk20a_sync_timeline *obj =
278 (struct gk20a_sync_timeline *)timeline;
279 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
280}
281
282static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
283 char *str, int size)
284{
285 struct nvgpu_semaphore *s = pt->sema;
286
287 snprintf(str, size, "S: pool=%llu [v=%u,r_v=%u]",
288 s->location.pool->page_idx,
289 nvgpu_semaphore_get_value(s),
290 nvgpu_semaphore_read(s));
291}
292
293static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
294 int size)
295{
296 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
297
298 if (pt->sema) {
299 gk20a_sync_pt_value_str_for_sema(pt, str, size);
300 return;
301 }
302
303 snprintf(str, size, "%d", pt->thresh);
304}
305
306static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
307 .driver_name = "nvgpu_semaphore",
308 .dup = gk20a_sync_pt_dup_inst,
309 .has_signaled = gk20a_sync_pt_has_signaled,
310 .compare = gk20a_sync_pt_compare,
311 .free_pt = gk20a_sync_pt_free_inst,
312 .timeline_value_str = gk20a_sync_timeline_value_str,
313 .pt_value_str = gk20a_sync_pt_value_str,
314};
315
316/* Public API */
317
318struct sync_fence *gk20a_sync_fence_fdget(int fd)
319{
320 struct sync_fence *fence = sync_fence_fdget(fd);
321 int i;
322
323 if (!fence)
324 return NULL;
325
326 for (i = 0; i < fence->num_fences; i++) {
327 struct sync_pt *spt = sync_pt_from_fence(fence->cbs[i].sync_pt);
328 struct sync_timeline *t;
329
330 if (spt == NULL) {
331 sync_fence_put(fence);
332 return NULL;
333 }
334
335 t = sync_pt_parent(spt);
336 if (t->ops != &gk20a_sync_timeline_ops) {
337 sync_fence_put(fence);
338 return NULL;
339 }
340 }
341
342 return fence;
343}
344
345struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
346{
347 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
348 struct nvgpu_semaphore *sema;
349
350 nvgpu_spinlock_acquire(&pt->lock);
351 sema = pt->sema;
352 if (sema)
353 nvgpu_semaphore_get(sema);
354 nvgpu_spinlock_release(&pt->lock);
355
356 return sema;
357}
358
359void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
360{
361 sync_timeline_signal(timeline, 0);
362}
363
364void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
365{
366 sync_timeline_destroy(timeline);
367}
368
369struct sync_timeline *gk20a_sync_timeline_create(
370 const char *name)
371{
372 struct gk20a_sync_timeline *obj;
373
374 obj = (struct gk20a_sync_timeline *)
375 sync_timeline_create(&gk20a_sync_timeline_ops,
376 sizeof(struct gk20a_sync_timeline),
377 name);
378 if (!obj)
379 return NULL;
380 obj->max = 0;
381 obj->min = 0;
382 return &obj->obj;
383}
384
385struct sync_fence *gk20a_sync_fence_create(
386 struct channel_gk20a *c,
387 struct nvgpu_semaphore *sema,
388 const char *fmt, ...)
389{
390 char name[30];
391 va_list args;
392 struct sync_pt *pt;
393 struct sync_fence *fence;
394 struct gk20a *g = c->g;
395
396 struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
397 struct nvgpu_os_fence_framework *fence_framework = NULL;
398 struct gk20a_sync_timeline *timeline = NULL;
399
400 fence_framework = &os_channel_priv->fence_framework;
401
402 timeline = to_gk20a_timeline(fence_framework->timeline);
403
404 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
405 if (pt == NULL)
406 return NULL;
407
408 va_start(args, fmt);
409 vsnprintf(name, sizeof(name), fmt, args);
410 va_end(args);
411
412 fence = sync_fence_create(name, pt);
413 if (fence == NULL) {
414 sync_pt_free(pt);
415 return NULL;
416 }
417 return fence;
418}
diff --git a/include/os/linux/sync_sema_android.h b/include/os/linux/sync_sema_android.h
deleted file mode 100644
index 4fca7be..0000000
--- a/include/os/linux/sync_sema_android.h
+++ /dev/null
@@ -1,51 +0,0 @@
1/*
2 * Semaphore Sync Framework Integration
3 *
4 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _GK20A_SYNC_H_
20#define _GK20A_SYNC_H_
21
22struct sync_timeline;
23struct sync_fence;
24struct sync_pt;
25struct nvgpu_semaphore;
26struct fence;
27
28#ifdef CONFIG_SYNC
29struct sync_timeline *gk20a_sync_timeline_create(const char *name);
30void gk20a_sync_timeline_destroy(struct sync_timeline *);
31void gk20a_sync_timeline_signal(struct sync_timeline *);
32struct sync_fence *gk20a_sync_fence_create(
33 struct channel_gk20a *c,
34 struct nvgpu_semaphore *,
35 const char *fmt, ...);
36struct sync_fence *gk20a_sync_fence_fdget(int fd);
37struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
38#else
39static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
40static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
41static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
42{
43 return NULL;
44}
45static inline struct sync_timeline *gk20a_sync_timeline_create(
46 const char *name) {
47 return NULL;
48}
49#endif
50
51#endif
diff --git a/include/os/linux/sysfs.c b/include/os/linux/sysfs.c
deleted file mode 100644
index 221ea0c..0000000
--- a/include/os/linux/sysfs.c
+++ /dev/null
@@ -1,1275 +0,0 @@
1/*
2 * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/device.h>
18#include <linux/pm_runtime.h>
19#include <linux/fb.h>
20
21#include <nvgpu/kmem.h>
22#include <nvgpu/nvhost.h>
23#include <nvgpu/ptimer.h>
24#include <nvgpu/power_features/cg.h>
25#include <nvgpu/power_features/pg.h>
26
27#include "os_linux.h"
28#include "sysfs.h"
29#include "platform_gk20a.h"
30#include "gk20a/gr_gk20a.h"
31#include "gv11b/gr_gv11b.h"
32
33#define PTIMER_FP_FACTOR 1000000
34
35#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
36
37#define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0
38
39static ssize_t elcg_enable_store(struct device *dev,
40 struct device_attribute *attr, const char *buf, size_t count)
41{
42 struct gk20a *g = get_gk20a(dev);
43 unsigned long val = 0;
44 int err;
45
46 if (kstrtoul(buf, 10, &val) < 0)
47 return -EINVAL;
48
49 err = gk20a_busy(g);
50 if (err)
51 return err;
52
53 if (val) {
54 nvgpu_cg_elcg_set_elcg_enabled(g, true);
55 } else {
56 nvgpu_cg_elcg_set_elcg_enabled(g, false);
57 }
58
59 gk20a_idle(g);
60
61 nvgpu_info(g, "ELCG is %s.", val ? "enabled" :
62 "disabled");
63
64 return count;
65}
66
67static ssize_t elcg_enable_read(struct device *dev,
68 struct device_attribute *attr, char *buf)
69{
70 struct gk20a *g = get_gk20a(dev);
71
72 return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0);
73}
74
75static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store);
76
77static ssize_t blcg_enable_store(struct device *dev,
78 struct device_attribute *attr, const char *buf, size_t count)
79{
80 struct gk20a *g = get_gk20a(dev);
81 unsigned long val = 0;
82 int err;
83
84 if (kstrtoul(buf, 10, &val) < 0)
85 return -EINVAL;
86
87 err = gk20a_busy(g);
88 if (err)
89 return err;
90
91 if (val) {
92 nvgpu_cg_blcg_set_blcg_enabled(g, true);
93 } else {
94 nvgpu_cg_blcg_set_blcg_enabled(g, false);
95 }
96
97 gk20a_idle(g);
98
99 nvgpu_info(g, "BLCG is %s.", val ? "enabled" :
100 "disabled");
101
102 return count;
103}
104
105static ssize_t blcg_enable_read(struct device *dev,
106 struct device_attribute *attr, char *buf)
107{
108 struct gk20a *g = get_gk20a(dev);
109
110 return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0);
111}
112
113
114static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store);
115
116static ssize_t slcg_enable_store(struct device *dev,
117 struct device_attribute *attr, const char *buf, size_t count)
118{
119 struct gk20a *g = get_gk20a(dev);
120 unsigned long val = 0;
121 int err;
122
123 if (kstrtoul(buf, 10, &val) < 0)
124 return -EINVAL;
125
126 err = gk20a_busy(g);
127 if (err) {
128 return err;
129 }
130
131 if (val) {
132 nvgpu_cg_slcg_set_slcg_enabled(g, true);
133 } else {
134 nvgpu_cg_slcg_set_slcg_enabled(g, false);
135 }
136
137 /*
138 * TODO: slcg_therm_load_gating is not enabled anywhere during
139 * init. Therefore, it would be incongruous to add it here. Once
140 * it is added to init, we should add it here too.
141 */
142 gk20a_idle(g);
143
144 nvgpu_info(g, "SLCG is %s.", val ? "enabled" :
145 "disabled");
146
147 return count;
148}
149
150static ssize_t slcg_enable_read(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct gk20a *g = get_gk20a(dev);
154
155 return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0);
156}
157
158static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store);
159
160static ssize_t ptimer_scale_factor_show(struct device *dev,
161 struct device_attribute *attr,
162 char *buf)
163{
164 struct gk20a *g = get_gk20a(dev);
165 struct gk20a_platform *platform = dev_get_drvdata(dev);
166 u32 src_freq_hz = platform->ptimer_src_freq;
167 u32 scaling_factor_fp;
168 ssize_t res;
169
170 if (!src_freq_hz) {
171 nvgpu_err(g, "reference clk_m rate is not set correctly");
172 return -EINVAL;
173 }
174
175 scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) /
176 ((u32)(src_freq_hz) /
177 (u32)(PTIMER_FP_FACTOR));
178 res = snprintf(buf,
179 PAGE_SIZE,
180 "%u.%u\n",
181 scaling_factor_fp / PTIMER_FP_FACTOR,
182 scaling_factor_fp % PTIMER_FP_FACTOR);
183
184 return res;
185
186}
187
188static DEVICE_ATTR(ptimer_scale_factor,
189 S_IRUGO,
190 ptimer_scale_factor_show,
191 NULL);
192
193static ssize_t ptimer_ref_freq_show(struct device *dev,
194 struct device_attribute *attr,
195 char *buf)
196{
197 struct gk20a *g = get_gk20a(dev);
198 struct gk20a_platform *platform = dev_get_drvdata(dev);
199 u32 src_freq_hz = platform->ptimer_src_freq;
200 ssize_t res;
201
202 if (!src_freq_hz) {
203 nvgpu_err(g, "reference clk_m rate is not set correctly");
204 return -EINVAL;
205 }
206
207 res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ);
208
209 return res;
210
211}
212
213static DEVICE_ATTR(ptimer_ref_freq,
214 S_IRUGO,
215 ptimer_ref_freq_show,
216 NULL);
217
218static ssize_t ptimer_src_freq_show(struct device *dev,
219 struct device_attribute *attr,
220 char *buf)
221{
222 struct gk20a *g = get_gk20a(dev);
223 struct gk20a_platform *platform = dev_get_drvdata(dev);
224 u32 src_freq_hz = platform->ptimer_src_freq;
225 ssize_t res;
226
227 if (!src_freq_hz) {
228 nvgpu_err(g, "reference clk_m rate is not set correctly");
229 return -EINVAL;
230 }
231
232 res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz);
233
234 return res;
235
236}
237
238static DEVICE_ATTR(ptimer_src_freq,
239 S_IRUGO,
240 ptimer_src_freq_show,
241 NULL);
242
243
244static ssize_t gpu_powered_on_show(struct device *dev,
245 struct device_attribute *attr,
246 char *buf)
247{
248 struct gk20a *g = get_gk20a(dev);
249
250 return snprintf(buf, PAGE_SIZE, "%u\n", g->power_on);
251}
252
253static DEVICE_ATTR(gpu_powered_on, S_IRUGO, gpu_powered_on_show, NULL);
254
255#if defined(CONFIG_PM)
256static ssize_t railgate_enable_store(struct device *dev,
257 struct device_attribute *attr, const char *buf, size_t count)
258{
259 unsigned long railgate_enable = 0;
260 /* dev is guaranteed to be valid here. Ok to de-reference */
261 struct gk20a *g = get_gk20a(dev);
262 struct gk20a_platform *platform = dev_get_drvdata(dev);
263 bool enabled = nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE);
264 int err;
265
266 if (kstrtoul(buf, 10, &railgate_enable) < 0)
267 return -EINVAL;
268
269 /* convert to boolean */
270 railgate_enable = !!railgate_enable;
271
272 /* writing same value should be treated as nop and successful */
273 if (railgate_enable == enabled)
274 goto out;
275
276 if (!platform->can_railgate_init) {
277 nvgpu_err(g, "Railgating is not supported");
278 return -EINVAL;
279 }
280
281 if (railgate_enable) {
282 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, true);
283 pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
284 } else {
285 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false);
286 pm_runtime_set_autosuspend_delay(dev, -1);
287 }
288 /* wake-up system to make rail-gating setting effective */
289 err = gk20a_busy(g);
290 if (err)
291 return err;
292 gk20a_idle(g);
293
294out:
295 nvgpu_info(g, "railgate is %s.",
296 nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ?
297 "enabled" : "disabled");
298
299 return count;
300}
301
302static ssize_t railgate_enable_read(struct device *dev,
303 struct device_attribute *attr, char *buf)
304{
305 struct gk20a *g = get_gk20a(dev);
306
307 return snprintf(buf, PAGE_SIZE, "%d\n",
308 nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ? 1 : 0);
309}
310
311static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read,
312 railgate_enable_store);
313#endif
314
315static ssize_t railgate_delay_store(struct device *dev,
316 struct device_attribute *attr,
317 const char *buf, size_t count)
318{
319 int railgate_delay = 0, ret = 0;
320 struct gk20a *g = get_gk20a(dev);
321 int err;
322
323 if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) {
324 nvgpu_info(g, "does not support power-gating");
325 return count;
326 }
327
328 ret = sscanf(buf, "%d", &railgate_delay);
329 if (ret == 1 && railgate_delay >= 0) {
330 g->railgate_delay = railgate_delay;
331 pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
332 } else
333 nvgpu_err(g, "Invalid powergate delay");
334
335 /* wake-up system to make rail-gating delay effective immediately */
336 err = gk20a_busy(g);
337 if (err)
338 return err;
339 gk20a_idle(g);
340
341 return count;
342}
343static ssize_t railgate_delay_show(struct device *dev,
344 struct device_attribute *attr, char *buf)
345{
346 struct gk20a *g = get_gk20a(dev);
347
348 return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay);
349}
350static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show,
351 railgate_delay_store);
352
353static ssize_t is_railgated_show(struct device *dev,
354 struct device_attribute *attr, char *buf)
355{
356 struct gk20a_platform *platform = dev_get_drvdata(dev);
357 bool is_railgated = 0;
358
359 if (platform->is_railgated)
360 is_railgated = platform->is_railgated(dev);
361
362 return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no");
363}
364static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL);
365
366static ssize_t counters_show(struct device *dev,
367 struct device_attribute *attr, char *buf)
368{
369 struct gk20a *g = get_gk20a(dev);
370 u32 busy_cycles, total_cycles;
371 ssize_t res;
372
373 nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
374
375 res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
376
377 return res;
378}
379static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL);
380
381static ssize_t counters_show_reset(struct device *dev,
382 struct device_attribute *attr, char *buf)
383{
384 ssize_t res = counters_show(dev, attr, buf);
385 struct gk20a *g = get_gk20a(dev);
386
387 nvgpu_pmu_reset_load_counters(g);
388
389 return res;
390}
391static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL);
392
393static ssize_t gk20a_load_show(struct device *dev,
394 struct device_attribute *attr,
395 char *buf)
396{
397 struct gk20a *g = get_gk20a(dev);
398 u32 busy_time;
399 ssize_t res;
400 int err;
401
402 if (!g->power_on) {
403 busy_time = 0;
404 } else {
405 err = gk20a_busy(g);
406 if (err)
407 return err;
408
409 nvgpu_pmu_load_update(g);
410 nvgpu_pmu_load_norm(g, &busy_time);
411 gk20a_idle(g);
412 }
413
414 res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
415
416 return res;
417}
418static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL);
419
420static ssize_t elpg_enable_store(struct device *dev,
421 struct device_attribute *attr, const char *buf, size_t count)
422{
423 struct gk20a *g = get_gk20a(dev);
424 unsigned long val = 0;
425 int err;
426
427 if (kstrtoul(buf, 10, &val) < 0)
428 return -EINVAL;
429
430 if (!g->power_on) {
431 return -EINVAL;
432 } else {
433 err = gk20a_busy(g);
434 if (err)
435 return -EAGAIN;
436 /*
437 * Since elpg is refcounted, we should not unnecessarily call
438 * enable/disable if it is already so.
439 */
440 if (val != 0) {
441 nvgpu_pg_elpg_set_elpg_enabled(g, true);
442 } else {
443 nvgpu_pg_elpg_set_elpg_enabled(g, false);
444 }
445 gk20a_idle(g);
446 }
447 nvgpu_info(g, "ELPG is %s.", val ? "enabled" :
448 "disabled");
449
450 return count;
451}
452
453static ssize_t elpg_enable_read(struct device *dev,
454 struct device_attribute *attr, char *buf)
455{
456 struct gk20a *g = get_gk20a(dev);
457
458 return snprintf(buf, PAGE_SIZE, "%d\n",
459 nvgpu_pg_elpg_is_enabled(g) ? 1 : 0);
460}
461
462static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store);
463
464static ssize_t ldiv_slowdown_factor_store(struct device *dev,
465 struct device_attribute *attr, const char *buf, size_t count)
466{
467 struct gk20a *g = get_gk20a(dev);
468 unsigned long val = 0;
469 int err;
470
471 if (kstrtoul(buf, 10, &val) < 0) {
472 nvgpu_err(g, "parse error for input SLOWDOWN factor\n");
473 return -EINVAL;
474 }
475
476 if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) {
477 nvgpu_err(g, "Invalid SLOWDOWN factor\n");
478 return -EINVAL;
479 }
480
481 if (val == g->ldiv_slowdown_factor)
482 return count;
483
484 if (!g->power_on) {
485 g->ldiv_slowdown_factor = val;
486 } else {
487 err = gk20a_busy(g);
488 if (err)
489 return -EAGAIN;
490
491 g->ldiv_slowdown_factor = val;
492
493 if (g->ops.pmu.pmu_pg_init_param)
494 g->ops.pmu.pmu_pg_init_param(g,
495 PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
496
497 gk20a_idle(g);
498 }
499
500 nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor);
501
502 return count;
503}
504
505static ssize_t ldiv_slowdown_factor_read(struct device *dev,
506 struct device_attribute *attr, char *buf)
507{
508 struct gk20a *g = get_gk20a(dev);
509
510 return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor);
511}
512
513static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW,
514 ldiv_slowdown_factor_read, ldiv_slowdown_factor_store);
515
516static ssize_t mscg_enable_store(struct device *dev,
517 struct device_attribute *attr, const char *buf, size_t count)
518{
519 struct gk20a *g = get_gk20a(dev);
520 struct nvgpu_pmu *pmu = &g->pmu;
521 unsigned long val = 0;
522 int err;
523
524 if (kstrtoul(buf, 10, &val) < 0)
525 return -EINVAL;
526
527 if (!g->power_on) {
528 g->mscg_enabled = val ? true : false;
529 } else {
530 err = gk20a_busy(g);
531 if (err)
532 return -EAGAIN;
533 /*
534 * Since elpg is refcounted, we should not unnecessarily call
535 * enable/disable if it is already so.
536 */
537 if (val && !g->mscg_enabled) {
538 g->mscg_enabled = true;
539 if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
540 PMU_PG_LPWR_FEATURE_MSCG)) {
541 if (!ACCESS_ONCE(pmu->mscg_stat)) {
542 WRITE_ONCE(pmu->mscg_stat,
543 PMU_MSCG_ENABLED);
544 /* make status visible */
545 smp_mb();
546 }
547 }
548
549 } else if (!val && g->mscg_enabled) {
550 if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
551 PMU_PG_LPWR_FEATURE_MSCG)) {
552 nvgpu_pmu_pg_global_enable(g, false);
553 WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
554 /* make status visible */
555 smp_mb();
556 g->mscg_enabled = false;
557 if (nvgpu_pg_elpg_is_enabled(g)) {
558 nvgpu_pg_elpg_enable(g);
559 }
560 }
561 g->mscg_enabled = false;
562 }
563 gk20a_idle(g);
564 }
565 nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" :
566 "disabled");
567
568 return count;
569}
570
571static ssize_t mscg_enable_read(struct device *dev,
572 struct device_attribute *attr, char *buf)
573{
574 struct gk20a *g = get_gk20a(dev);
575
576 return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0);
577}
578
579static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store);
580
581static ssize_t aelpg_param_store(struct device *dev,
582 struct device_attribute *attr, const char *buf, size_t count)
583{
584 struct gk20a *g = get_gk20a(dev);
585 int status = 0;
586 union pmu_ap_cmd ap_cmd;
587 int *paramlist = (int *)g->pmu.aelpg_param;
588 u32 defaultparam[5] = {
589 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US,
590 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US,
591 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US,
592 APCTRL_POWER_BREAKEVEN_DEFAULT_US,
593 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT
594 };
595
596 /* Get each parameter value from input string*/
597 sscanf(buf, "%d %d %d %d %d", &paramlist[0], &paramlist[1],
598 &paramlist[2], &paramlist[3], &paramlist[4]);
599
600 /* If parameter value is 0 then reset to SW default values*/
601 if ((paramlist[0] | paramlist[1] | paramlist[2]
602 | paramlist[3] | paramlist[4]) == 0x00) {
603 memcpy(paramlist, defaultparam, sizeof(defaultparam));
604 }
605
606 /* If aelpg is enabled & pmu is ready then post values to
607 * PMU else store then post later
608 */
609 if (g->aelpg_enabled && g->pmu.pmu_ready) {
610 /* Disable AELPG */
611 ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
612 ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
613 status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
614
615 /* Enable AELPG */
616 nvgpu_aelpg_init(g);
617 nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
618 }
619
620 return count;
621}
622
623static ssize_t aelpg_param_read(struct device *dev,
624 struct device_attribute *attr, char *buf)
625{
626 struct gk20a *g = get_gk20a(dev);
627
628 return snprintf(buf, PAGE_SIZE,
629 "%d %d %d %d %d\n", g->pmu.aelpg_param[0],
630 g->pmu.aelpg_param[1], g->pmu.aelpg_param[2],
631 g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]);
632}
633
634static DEVICE_ATTR(aelpg_param, ROOTRW,
635 aelpg_param_read, aelpg_param_store);
636
637static ssize_t aelpg_enable_store(struct device *dev,
638 struct device_attribute *attr, const char *buf, size_t count)
639{
640 struct gk20a *g = get_gk20a(dev);
641 unsigned long val = 0;
642 int status = 0;
643 union pmu_ap_cmd ap_cmd;
644 int err;
645
646 if (kstrtoul(buf, 10, &val) < 0)
647 return -EINVAL;
648
649 err = gk20a_busy(g);
650 if (err)
651 return err;
652
653 if (g->pmu.pmu_ready) {
654 if (val && !g->aelpg_enabled) {
655 g->aelpg_enabled = true;
656 /* Enable AELPG */
657 ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL;
658 ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
659 status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
660 } else if (!val && g->aelpg_enabled) {
661 g->aelpg_enabled = false;
662 /* Disable AELPG */
663 ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
664 ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
665 status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
666 }
667 } else {
668 nvgpu_info(g, "PMU is not ready, AELPG request failed");
669 }
670 gk20a_idle(g);
671
672 nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" :
673 "disabled");
674
675 return count;
676}
677
678static ssize_t aelpg_enable_read(struct device *dev,
679 struct device_attribute *attr, char *buf)
680{
681 struct gk20a *g = get_gk20a(dev);
682
683 return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0);
684}
685
686static DEVICE_ATTR(aelpg_enable, ROOTRW,
687 aelpg_enable_read, aelpg_enable_store);
688
689
690static ssize_t allow_all_enable_read(struct device *dev,
691 struct device_attribute *attr, char *buf)
692{
693 struct gk20a *g = get_gk20a(dev);
694
695 return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0);
696}
697
698static ssize_t allow_all_enable_store(struct device *dev,
699 struct device_attribute *attr, const char *buf, size_t count)
700{
701 struct gk20a *g = get_gk20a(dev);
702 unsigned long val = 0;
703 int err;
704
705 if (kstrtoul(buf, 10, &val) < 0)
706 return -EINVAL;
707
708 err = gk20a_busy(g);
709 g->allow_all = (val ? true : false);
710 gk20a_idle(g);
711
712 return count;
713}
714
715static DEVICE_ATTR(allow_all, ROOTRW,
716 allow_all_enable_read, allow_all_enable_store);
717
718static ssize_t emc3d_ratio_store(struct device *dev,
719 struct device_attribute *attr, const char *buf, size_t count)
720{
721 struct gk20a *g = get_gk20a(dev);
722 unsigned long val = 0;
723
724 if (kstrtoul(buf, 10, &val) < 0)
725 return -EINVAL;
726
727 g->emc3d_ratio = val;
728
729 return count;
730}
731
732static ssize_t emc3d_ratio_read(struct device *dev,
733 struct device_attribute *attr, char *buf)
734{
735 struct gk20a *g = get_gk20a(dev);
736
737 return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio);
738}
739
740static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
741
742static ssize_t fmax_at_vmin_safe_read(struct device *dev,
743 struct device_attribute *attr, char *buf)
744{
745 struct gk20a *g = get_gk20a(dev);
746 unsigned long gpu_fmax_at_vmin_hz = 0;
747
748 if (g->ops.clk.get_fmax_at_vmin_safe)
749 gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g);
750
751 return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz));
752}
753
754static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL);
755
756#ifdef CONFIG_PM
757static ssize_t force_idle_store(struct device *dev,
758 struct device_attribute *attr, const char *buf, size_t count)
759{
760 struct gk20a *g = get_gk20a(dev);
761 unsigned long val = 0;
762 int err = 0;
763
764 if (kstrtoul(buf, 10, &val) < 0)
765 return -EINVAL;
766
767 if (val) {
768 if (g->forced_idle)
769 return count; /* do nothing */
770 else {
771 err = __gk20a_do_idle(g, false);
772 if (!err) {
773 g->forced_idle = 1;
774 nvgpu_info(g, "gpu is idle : %d",
775 g->forced_idle);
776 }
777 }
778 } else {
779 if (!g->forced_idle)
780 return count; /* do nothing */
781 else {
782 err = __gk20a_do_unidle(g);
783 if (!err) {
784 g->forced_idle = 0;
785 nvgpu_info(g, "gpu is idle : %d",
786 g->forced_idle);
787 }
788 }
789 }
790
791 return count;
792}
793
794static ssize_t force_idle_read(struct device *dev,
795 struct device_attribute *attr, char *buf)
796{
797 struct gk20a *g = get_gk20a(dev);
798
799 return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0);
800}
801
802static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
803#endif
804
805static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask)
806{
807 u32 i;
808 bool valid = false;
809
810 for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) {
811 if (tpc_mask == g->valid_tpc_mask[i]) {
812 valid = true;
813 break;
814 }
815 }
816 return valid;
817}
818
819static ssize_t tpc_pg_mask_read(struct device *dev,
820 struct device_attribute *attr, char *buf)
821{
822 struct gk20a *g = get_gk20a(dev);
823
824 return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
825}
826
827static ssize_t tpc_pg_mask_store(struct device *dev,
828 struct device_attribute *attr, const char *buf, size_t count)
829{
830 struct gk20a *g = get_gk20a(dev);
831 struct gr_gk20a *gr = &g->gr;
832 unsigned long val = 0;
833
834 nvgpu_mutex_acquire(&g->tpc_pg_lock);
835
836 if (kstrtoul(buf, 10, &val) < 0) {
837 nvgpu_err(g, "invalid value");
838 nvgpu_mutex_release(&g->tpc_pg_lock);
839 return -EINVAL;
840 }
841
842 if (val == g->tpc_pg_mask) {
843 nvgpu_info(g, "no value change, same mask already set");
844 goto exit;
845 }
846
847 if (gr->ctx_vars.golden_image_size) {
848 nvgpu_err(g, "golden image size already initialized");
849 nvgpu_mutex_release(&g->tpc_pg_lock);
850 return -ENODEV;
851 }
852
853 /* checking that the value from userspace is within
854 * the possible valid TPC configurations.
855 */
856 if (is_tpc_mask_valid(g, (u32)val)) {
857 g->tpc_pg_mask = val;
858 } else {
859 nvgpu_err(g, "TPC-PG mask is invalid");
860 nvgpu_mutex_release(&g->tpc_pg_lock);
861 return -EINVAL;
862 }
863exit:
864 nvgpu_mutex_release(&g->tpc_pg_lock);
865
866 return count;
867}
868
869static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store);
870
871static ssize_t tpc_fs_mask_store(struct device *dev,
872 struct device_attribute *attr, const char *buf, size_t count)
873{
874 struct gk20a *g = get_gk20a(dev);
875 unsigned long val = 0;
876
877 if (kstrtoul(buf, 10, &val) < 0)
878 return -EINVAL;
879
880 if (!g->gr.gpc_tpc_mask)
881 return -ENODEV;
882
883 if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
884 g->gr.gpc_tpc_mask[0] = val;
885 g->tpc_fs_mask_user = val;
886
887 g->ops.gr.set_gpc_tpc_mask(g, 0);
888
889 nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image);
890 g->gr.ctx_vars.local_golden_image = NULL;
891 g->gr.ctx_vars.golden_image_initialized = false;
892 g->gr.ctx_vars.golden_image_size = 0;
893 /* Cause next poweron to reinit just gr */
894 g->gr.sw_ready = false;
895 }
896
897 return count;
898}
899
900static ssize_t tpc_fs_mask_read(struct device *dev,
901 struct device_attribute *attr, char *buf)
902{
903 struct gk20a *g = get_gk20a(dev);
904 struct gr_gk20a *gr = &g->gr;
905 u32 gpc_index;
906 u32 tpc_fs_mask = 0;
907 int err = 0;
908
909 err = gk20a_busy(g);
910 if (err)
911 return err;
912
913 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
914 if (g->ops.gr.get_gpc_tpc_mask)
915 tpc_fs_mask |=
916 g->ops.gr.get_gpc_tpc_mask(g, gpc_index) <<
917 (gr->max_tpc_per_gpc_count * gpc_index);
918 }
919
920 gk20a_idle(g);
921
922 return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask);
923}
924
925static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store);
926
927static ssize_t min_timeslice_us_read(struct device *dev,
928 struct device_attribute *attr, char *buf)
929{
930 struct gk20a *g = get_gk20a(dev);
931
932 return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us);
933}
934
935static ssize_t min_timeslice_us_store(struct device *dev,
936 struct device_attribute *attr, const char *buf, size_t count)
937{
938 struct gk20a *g = get_gk20a(dev);
939 unsigned long val;
940
941 if (kstrtoul(buf, 10, &val) < 0)
942 return -EINVAL;
943
944 if (val > g->max_timeslice_us)
945 return -EINVAL;
946
947 g->min_timeslice_us = val;
948
949 return count;
950}
951
952static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read,
953 min_timeslice_us_store);
954
955static ssize_t max_timeslice_us_read(struct device *dev,
956 struct device_attribute *attr, char *buf)
957{
958 struct gk20a *g = get_gk20a(dev);
959
960 return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us);
961}
962
963static ssize_t max_timeslice_us_store(struct device *dev,
964 struct device_attribute *attr, const char *buf, size_t count)
965{
966 struct gk20a *g = get_gk20a(dev);
967 unsigned long val;
968
969 if (kstrtoul(buf, 10, &val) < 0)
970 return -EINVAL;
971
972 if (val < g->min_timeslice_us)
973 return -EINVAL;
974
975 g->max_timeslice_us = val;
976
977 return count;
978}
979
980static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read,
981 max_timeslice_us_store);
982
983static ssize_t czf_bypass_store(struct device *dev,
984 struct device_attribute *attr, const char *buf, size_t count)
985{
986 struct gk20a *g = get_gk20a(dev);
987 unsigned long val;
988
989 if (kstrtoul(buf, 10, &val) < 0)
990 return -EINVAL;
991
992 if (val >= 4)
993 return -EINVAL;
994
995 g->gr.czf_bypass = val;
996
997 return count;
998}
999
1000static ssize_t czf_bypass_read(struct device *dev,
1001 struct device_attribute *attr, char *buf)
1002{
1003 struct gk20a *g = get_gk20a(dev);
1004
1005 return sprintf(buf, "%d\n", g->gr.czf_bypass);
1006}
1007
1008static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
1009
1010static ssize_t pd_max_batches_store(struct device *dev,
1011 struct device_attribute *attr, const char *buf, size_t count)
1012{
1013 struct gk20a *g = get_gk20a(dev);
1014 unsigned long val;
1015
1016 if (kstrtoul(buf, 10, &val) < 0)
1017 return -EINVAL;
1018
1019 if (val > 64)
1020 return -EINVAL;
1021
1022 g->gr.pd_max_batches = val;
1023
1024 return count;
1025}
1026
1027static ssize_t pd_max_batches_read(struct device *dev,
1028 struct device_attribute *attr, char *buf)
1029{
1030 struct gk20a *g = get_gk20a(dev);
1031
1032 return sprintf(buf, "%d\n", g->gr.pd_max_batches);
1033}
1034
1035static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store);
1036
1037static ssize_t gfxp_wfi_timeout_count_store(struct device *dev,
1038 struct device_attribute *attr, const char *buf, size_t count)
1039{
1040 struct gk20a *g = get_gk20a(dev);
1041 struct gr_gk20a *gr = &g->gr;
1042 unsigned long val = 0;
1043 int err = -1;
1044
1045 if (kstrtoul(buf, 10, &val) < 0)
1046 return -EINVAL;
1047
1048 if (g->ops.gr.get_max_gfxp_wfi_timeout_count) {
1049 if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g))
1050 return -EINVAL;
1051 }
1052
1053 gr->gfxp_wfi_timeout_count = val;
1054
1055 if (g->ops.gr.init_preemption_state && g->power_on) {
1056 err = gk20a_busy(g);
1057 if (err)
1058 return err;
1059
1060 err = gr_gk20a_elpg_protected_call(g,
1061 g->ops.gr.init_preemption_state(g));
1062
1063 gk20a_idle(g);
1064
1065 if (err)
1066 return err;
1067 }
1068 return count;
1069}
1070
1071static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev,
1072 struct device_attribute *attr, const char *buf, size_t count)
1073{
1074 struct gk20a *g = get_gk20a(dev);
1075 struct gr_gk20a *gr = &g->gr;
1076 int err = -1;
1077
1078 if (count > 0 && buf[0] == 's')
1079 /* sysclk */
1080 gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK;
1081 else
1082 /* usec */
1083 gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC;
1084
1085 if (g->ops.gr.init_preemption_state && g->power_on) {
1086 err = gk20a_busy(g);
1087 if (err)
1088 return err;
1089
1090 err = gr_gk20a_elpg_protected_call(g,
1091 g->ops.gr.init_preemption_state(g));
1092
1093 gk20a_idle(g);
1094
1095 if (err)
1096 return err;
1097 }
1098
1099 return count;
1100}
1101
1102static ssize_t gfxp_wfi_timeout_count_read(struct device *dev,
1103 struct device_attribute *attr, char *buf)
1104{
1105 struct gk20a *g = get_gk20a(dev);
1106 struct gr_gk20a *gr = &g->gr;
1107 u32 val = gr->gfxp_wfi_timeout_count;
1108
1109 return snprintf(buf, PAGE_SIZE, "%d\n", val);
1110}
1111
1112static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev,
1113 struct device_attribute *attr, char *buf)
1114{
1115 struct gk20a *g = get_gk20a(dev);
1116 struct gr_gk20a *gr = &g->gr;
1117
1118 if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC)
1119 return snprintf(buf, PAGE_SIZE, "usec\n");
1120 else
1121 return snprintf(buf, PAGE_SIZE, "sysclk\n");
1122}
1123
1124static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH),
1125 gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store);
1126
1127static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH),
1128 gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store);
1129
1130static ssize_t comptag_mem_deduct_store(struct device *dev,
1131 struct device_attribute *attr,
1132 const char *buf, size_t count)
1133{
1134 struct gk20a *g = get_gk20a(dev);
1135 unsigned long val;
1136
1137 if (kstrtoul(buf, 10, &val) < 0)
1138 return -EINVAL;
1139
1140 if (val >= totalram_size_in_mb) {
1141 dev_err(dev, "comptag_mem_deduct can not be set above %lu",
1142 totalram_size_in_mb);
1143 return -EINVAL;
1144 }
1145
1146 g->gr.comptag_mem_deduct = val;
1147 /* Deduct the part taken by the running system */
1148 g->gr.max_comptag_mem -= val;
1149
1150 return count;
1151}
1152
1153static ssize_t comptag_mem_deduct_show(struct device *dev,
1154 struct device_attribute *attr, char *buf)
1155{
1156 struct gk20a *g = get_gk20a(dev);
1157
1158 return sprintf(buf, "%d\n", g->gr.comptag_mem_deduct);
1159}
1160
1161static DEVICE_ATTR(comptag_mem_deduct, ROOTRW,
1162 comptag_mem_deduct_show, comptag_mem_deduct_store);
1163
1164void nvgpu_remove_sysfs(struct device *dev)
1165{
1166 device_remove_file(dev, &dev_attr_elcg_enable);
1167 device_remove_file(dev, &dev_attr_blcg_enable);
1168 device_remove_file(dev, &dev_attr_slcg_enable);
1169 device_remove_file(dev, &dev_attr_ptimer_scale_factor);
1170 device_remove_file(dev, &dev_attr_ptimer_ref_freq);
1171 device_remove_file(dev, &dev_attr_ptimer_src_freq);
1172 device_remove_file(dev, &dev_attr_elpg_enable);
1173 device_remove_file(dev, &dev_attr_mscg_enable);
1174 device_remove_file(dev, &dev_attr_emc3d_ratio);
1175 device_remove_file(dev, &dev_attr_ldiv_slowdown_factor);
1176
1177 device_remove_file(dev, &dev_attr_fmax_at_vmin_safe);
1178
1179 device_remove_file(dev, &dev_attr_counters);
1180 device_remove_file(dev, &dev_attr_counters_reset);
1181 device_remove_file(dev, &dev_attr_load);
1182 device_remove_file(dev, &dev_attr_railgate_delay);
1183 device_remove_file(dev, &dev_attr_is_railgated);
1184#ifdef CONFIG_PM
1185 device_remove_file(dev, &dev_attr_force_idle);
1186 device_remove_file(dev, &dev_attr_railgate_enable);
1187#endif
1188 device_remove_file(dev, &dev_attr_aelpg_param);
1189 device_remove_file(dev, &dev_attr_aelpg_enable);
1190 device_remove_file(dev, &dev_attr_allow_all);
1191 device_remove_file(dev, &dev_attr_tpc_fs_mask);
1192 device_remove_file(dev, &dev_attr_tpc_pg_mask);
1193 device_remove_file(dev, &dev_attr_min_timeslice_us);
1194 device_remove_file(dev, &dev_attr_max_timeslice_us);
1195
1196#ifdef CONFIG_TEGRA_GK20A_NVHOST
1197 nvgpu_nvhost_remove_symlink(get_gk20a(dev));
1198#endif
1199
1200 device_remove_file(dev, &dev_attr_czf_bypass);
1201 device_remove_file(dev, &dev_attr_pd_max_batches);
1202 device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count);
1203 device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
1204 device_remove_file(dev, &dev_attr_gpu_powered_on);
1205
1206 device_remove_file(dev, &dev_attr_comptag_mem_deduct);
1207
1208 if (strcmp(dev_name(dev), "gpu.0")) {
1209 struct kobject *kobj = &dev->kobj;
1210 struct device *parent = container_of((kobj->parent),
1211 struct device, kobj);
1212 sysfs_remove_link(&parent->kobj, "gpu.0");
1213 }
1214}
1215
1216int nvgpu_create_sysfs(struct device *dev)
1217{
1218 struct gk20a *g = get_gk20a(dev);
1219 int error = 0;
1220
1221 error |= device_create_file(dev, &dev_attr_elcg_enable);
1222 error |= device_create_file(dev, &dev_attr_blcg_enable);
1223 error |= device_create_file(dev, &dev_attr_slcg_enable);
1224 error |= device_create_file(dev, &dev_attr_ptimer_scale_factor);
1225 error |= device_create_file(dev, &dev_attr_ptimer_ref_freq);
1226 error |= device_create_file(dev, &dev_attr_ptimer_src_freq);
1227 error |= device_create_file(dev, &dev_attr_elpg_enable);
1228 error |= device_create_file(dev, &dev_attr_mscg_enable);
1229 error |= device_create_file(dev, &dev_attr_emc3d_ratio);
1230 error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor);
1231
1232 error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe);
1233
1234 error |= device_create_file(dev, &dev_attr_counters);
1235 error |= device_create_file(dev, &dev_attr_counters_reset);
1236 error |= device_create_file(dev, &dev_attr_load);
1237 error |= device_create_file(dev, &dev_attr_railgate_delay);
1238 error |= device_create_file(dev, &dev_attr_is_railgated);
1239#ifdef CONFIG_PM
1240 error |= device_create_file(dev, &dev_attr_force_idle);
1241 error |= device_create_file(dev, &dev_attr_railgate_enable);
1242#endif
1243 error |= device_create_file(dev, &dev_attr_aelpg_param);
1244 error |= device_create_file(dev, &dev_attr_aelpg_enable);
1245 error |= device_create_file(dev, &dev_attr_allow_all);
1246 error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
1247 error |= device_create_file(dev, &dev_attr_tpc_pg_mask);
1248 error |= device_create_file(dev, &dev_attr_min_timeslice_us);
1249 error |= device_create_file(dev, &dev_attr_max_timeslice_us);
1250
1251#ifdef CONFIG_TEGRA_GK20A_NVHOST
1252 error |= nvgpu_nvhost_create_symlink(g);
1253#endif
1254
1255 error |= device_create_file(dev, &dev_attr_czf_bypass);
1256 error |= device_create_file(dev, &dev_attr_pd_max_batches);
1257 error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count);
1258 error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
1259 error |= device_create_file(dev, &dev_attr_gpu_powered_on);
1260
1261 error |= device_create_file(dev, &dev_attr_comptag_mem_deduct);
1262
1263 if (strcmp(dev_name(dev), "gpu.0")) {
1264 struct kobject *kobj = &dev->kobj;
1265 struct device *parent = container_of((kobj->parent),
1266 struct device, kobj);
1267 error |= sysfs_create_link(&parent->kobj,
1268 &dev->kobj, "gpu.0");
1269 }
1270
1271 if (error)
1272 nvgpu_err(g, "Failed to create sysfs attributes!\n");
1273
1274 return error;
1275}
diff --git a/include/os/linux/sysfs.h b/include/os/linux/sysfs.h
deleted file mode 100644
index 8092584..0000000
--- a/include/os/linux/sysfs.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef NVGPU_SYSFS_H
17#define NVGPU_SYSFS_H
18
19struct device;
20
21int nvgpu_create_sysfs(struct device *dev);
22void nvgpu_remove_sysfs(struct device *dev);
23
24#endif
diff --git a/include/os/linux/thread.c b/include/os/linux/thread.c
deleted file mode 100644
index c56bff6..0000000
--- a/include/os/linux/thread.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kthread.h>
18
19#include <nvgpu/thread.h>
20#include <nvgpu/timers.h>
21
22int nvgpu_thread_proxy(void *threaddata)
23{
24 struct nvgpu_thread *thread = threaddata;
25 int ret = thread->fn(thread->data);
26
27 thread->running = false;
28 return ret;
29}
30
31int nvgpu_thread_create(struct nvgpu_thread *thread,
32 void *data,
33 int (*threadfn)(void *data), const char *name)
34{
35 struct task_struct *task = kthread_create(nvgpu_thread_proxy,
36 thread, name);
37 if (IS_ERR(task))
38 return PTR_ERR(task);
39
40 thread->task = task;
41 thread->fn = threadfn;
42 thread->data = data;
43 thread->running = true;
44 wake_up_process(task);
45 return 0;
46};
47
48void nvgpu_thread_stop(struct nvgpu_thread *thread)
49{
50 if (thread->task) {
51 kthread_stop(thread->task);
52 thread->task = NULL;
53 }
54};
55
56bool nvgpu_thread_should_stop(struct nvgpu_thread *thread)
57{
58 return kthread_should_stop();
59};
60
61bool nvgpu_thread_is_running(struct nvgpu_thread *thread)
62{
63 return ACCESS_ONCE(thread->running);
64};
65
66void nvgpu_thread_join(struct nvgpu_thread *thread)
67{
68 while (ACCESS_ONCE(thread->running))
69 nvgpu_msleep(10);
70};
diff --git a/include/os/linux/timers.c b/include/os/linux/timers.c
deleted file mode 100644
index 018fd2d..0000000
--- a/include/os/linux/timers.c
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/ktime.h>
18#include <linux/delay.h>
19
20#include <nvgpu/timers.h>
21#include <nvgpu/soc.h>
22#include <nvgpu/gk20a.h>
23
24#include "platform_gk20a.h"
25
26/*
27 * Returns 1 if the platform is pre-Si and should ignore the timeout checking.
28 * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the
29 * timeout check regardless of platform).
30 */
31static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout)
32{
33 if (timeout->flags & NVGPU_TIMER_NO_PRE_SI)
34 return 0;
35
36 return !nvgpu_platform_is_silicon(timeout->g);
37}
38
39/**
40 * nvgpu_timeout_init - Init timer.
41 *
42 * @g - nvgpu device.
43 * @timeout - The timer.
44 * @duration - Timeout in milliseconds or number of retries.
45 * @flags - Flags for timer.
46 *
47 * This configures the timeout to start the timeout duration now, i.e: when this
48 * function is called. Available flags to pass to @flags:
49 *
50 * %NVGPU_TIMER_CPU_TIMER
51 * %NVGPU_TIMER_RETRY_TIMER
52 * %NVGPU_TIMER_NO_PRE_SI
53 * %NVGPU_TIMER_SILENT_TIMEOUT
54 *
55 * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then
56 * a CPU timer is used by default.
57 */
58int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout,
59 u32 duration, unsigned long flags)
60{
61 if (flags & ~NVGPU_TIMER_FLAG_MASK)
62 return -EINVAL;
63
64 memset(timeout, 0, sizeof(*timeout));
65
66 timeout->g = g;
67 timeout->flags = flags;
68
69 if (flags & NVGPU_TIMER_RETRY_TIMER)
70 timeout->retries.max = duration;
71 else
72 timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(),
73 (s64)NSEC_PER_MSEC * duration));
74
75 return 0;
76}
77
78static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout,
79 void *caller,
80 const char *fmt, va_list args)
81{
82 struct gk20a *g = timeout->g;
83 ktime_t now = ktime_get();
84
85 if (nvgpu_timeout_is_pre_silicon(timeout))
86 return 0;
87
88 if (ktime_after(now, ns_to_ktime(timeout->time))) {
89 if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
90 char buf[128];
91
92 vsnprintf(buf, sizeof(buf), fmt, args);
93
94 nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf);
95 }
96
97 return -ETIMEDOUT;
98 }
99
100 return 0;
101}
102
103static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout,
104 void *caller,
105 const char *fmt, va_list args)
106{
107 struct gk20a *g = timeout->g;
108
109 if (nvgpu_timeout_is_pre_silicon(timeout))
110 return 0;
111
112 if (timeout->retries.attempted >= timeout->retries.max) {
113 if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
114 char buf[128];
115
116 vsnprintf(buf, sizeof(buf), fmt, args);
117
118 nvgpu_err(g, "No more retries @ %pF %s", caller, buf);
119 }
120
121 return -ETIMEDOUT;
122 }
123
124 timeout->retries.attempted++;
125
126 return 0;
127}
128
129/**
130 * __nvgpu_timeout_expired_msg - Check if a timeout has expired.
131 *
132 * @timeout - The timeout to check.
133 * @caller - Address of the caller of this function.
134 * @fmt - The fmt string.
135 *
136 * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise.
137 *
138 * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout
139 * then a message is printed based on %fmt.
140 */
141int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout,
142 void *caller, const char *fmt, ...)
143{
144 int ret;
145 va_list args;
146
147 va_start(args, fmt);
148 if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
149 ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt,
150 args);
151 else
152 ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt,
153 args);
154 va_end(args);
155
156 return ret;
157}
158
159/**
160 * nvgpu_timeout_peek_expired - Check the status of a timeout.
161 *
162 * @timeout - The timeout to check.
163 *
164 * Returns non-zero if the timeout is expired, zero otherwise. In the case of
165 * retry timers this will not increment the underlying retry count. Also if the
166 * timer has expired no messages will be printed.
167 *
168 * This function honors the pre-Si check as well.
169 */
170int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout)
171{
172 if (nvgpu_timeout_is_pre_silicon(timeout))
173 return 0;
174
175 if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
176 return timeout->retries.attempted >= timeout->retries.max;
177 else
178 return ktime_after(ktime_get(), ns_to_ktime(timeout->time));
179}
180
181/**
182 * nvgpu_udelay - Delay for some number of microseconds.
183 *
184 * @usecs - Microseconds to wait for.
185 *
186 * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly
187 * accurate. This is normally backed by a busy-loop so this means waits should
188 * be kept short, below 100us. If longer delays are necessary then
189 * nvgpu_msleep() should be preferred.
190 *
191 * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This
192 * function will attempt to not use a busy-loop.
193 */
194void nvgpu_udelay(unsigned int usecs)
195{
196 udelay(usecs);
197}
198
199/**
200 * nvgpu_usleep_range - Sleep for a range of microseconds.
201 *
202 * @min_us - Minimum wait time.
203 * @max_us - Maximum wait time.
204 *
205 * Wait for some number of microseconds between @min_us and @max_us. This,
206 * unlike nvgpu_udelay(), will attempt to sleep for the passed number of
207 * microseconds instead of busy looping. Not all platforms support this,
208 * and in that case this reduces to nvgpu_udelay(min_us).
209 *
210 * Linux note: this is not safe to use in atomic context. If you are in
211 * atomic context you must use nvgpu_udelay().
212 */
213void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us)
214{
215 usleep_range(min_us, max_us);
216}
217
218/**
219 * nvgpu_msleep - Sleep for some milliseconds.
220 *
221 * @msecs - Sleep for at least this many milliseconds.
222 *
223 * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms
224 * or so) the sleep will be significantly longer due to scheduling overhead and
225 * mechanics.
226 */
227void nvgpu_msleep(unsigned int msecs)
228{
229 msleep(msecs);
230}
231
232/**
233 * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock.
234 *
235 * Return a clock in millisecond units. The start time of the clock is
236 * unspecified; the time returned can be compared with older ones to measure
237 * durations. The source clock does not jump when the system clock is adjusted.
238 */
239s64 nvgpu_current_time_ms(void)
240{
241 return ktime_to_ms(ktime_get());
242}
243
244/**
245 * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock.
246 *
247 * Return a clock in nanosecond units. The start time of the clock is
248 * unspecified; the time returned can be compared with older ones to measure
249 * durations. The source clock does not jump when the system clock is adjusted.
250 */
251s64 nvgpu_current_time_ns(void)
252{
253 return ktime_to_ns(ktime_get());
254}
255
256/**
257 * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp.
258 *
259 * Return a "high resolution" time stamp. It does not really matter exactly what
260 * it is, so long as it generally returns unique values and monotonically
261 * increases - wrap around _is_ possible though in a system running for long
262 * enough.
263 *
264 * Note: what high resolution means is system dependent.
265 */
266u64 nvgpu_hr_timestamp(void)
267{
268 return get_cycles();
269}
diff --git a/include/os/linux/vgpu/fecs_trace_vgpu.c b/include/os/linux/vgpu/fecs_trace_vgpu.c
deleted file mode 100644
index 02a381e..0000000
--- a/include/os/linux/vgpu/fecs_trace_vgpu.c
+++ /dev/null
@@ -1,225 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <uapi/linux/nvgpu.h>
18
19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h>
21#include <nvgpu/enabled.h>
22#include <nvgpu/ctxsw_trace.h>
23#include <nvgpu/vgpu/vgpu_ivm.h>
24#include <nvgpu/vgpu/tegra_vgpu.h>
25#include <nvgpu/vgpu/vgpu.h>
26#include <nvgpu/gk20a.h>
27
28#include "os/linux/os_linux.h"
29#include "gk20a/fecs_trace_gk20a.h"
30#include "vgpu/fecs_trace_vgpu.h"
31
32struct vgpu_fecs_trace {
33 struct tegra_hv_ivm_cookie *cookie;
34 struct nvgpu_ctxsw_ring_header *header;
35 struct nvgpu_gpu_ctxsw_trace_entry *entries;
36 int num_entries;
37 bool enabled;
38 void *buf;
39};
40
41int vgpu_fecs_trace_init(struct gk20a *g)
42{
43 struct device *dev = dev_from_gk20a(g);
44 struct device_node *np = dev->of_node;
45 struct of_phandle_args args;
46 struct vgpu_fecs_trace *vcst;
47 u32 mempool;
48 int err;
49
50 nvgpu_log_fn(g, " ");
51
52 vcst = nvgpu_kzalloc(g, sizeof(*vcst));
53 if (!vcst)
54 return -ENOMEM;
55
56 err = of_parse_phandle_with_fixed_args(np,
57 "mempool-fecs-trace", 1, 0, &args);
58 if (err) {
59 nvgpu_info(g, "does not support fecs trace");
60 goto fail;
61 }
62 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
63
64 mempool = args.args[0];
65 vcst->cookie = vgpu_ivm_mempool_reserve(mempool);
66 if (IS_ERR(vcst->cookie)) {
67 nvgpu_info(g,
68 "mempool %u reserve failed", mempool);
69 vcst->cookie = NULL;
70 err = -EINVAL;
71 goto fail;
72 }
73
74 vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie),
75 vgpu_ivm_get_size(vcst->cookie));
76 if (!vcst->buf) {
77 nvgpu_info(g, "ioremap_cache failed");
78 err = -EINVAL;
79 goto fail;
80 }
81 vcst->header = vcst->buf;
82 vcst->num_entries = vcst->header->num_ents;
83 if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) {
84 nvgpu_err(g, "entry size mismatch");
85 goto fail;
86 }
87 vcst->entries = vcst->buf + sizeof(*vcst->header);
88 g->fecs_trace = (struct gk20a_fecs_trace *)vcst;
89
90 return 0;
91fail:
92 iounmap(vcst->buf);
93 if (vcst->cookie)
94 vgpu_ivm_mempool_unreserve(vcst->cookie);
95 nvgpu_kfree(g, vcst);
96 return err;
97}
98
99int vgpu_fecs_trace_deinit(struct gk20a *g)
100{
101 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
102
103 iounmap(vcst->buf);
104 vgpu_ivm_mempool_unreserve(vcst->cookie);
105 nvgpu_kfree(g, vcst);
106 return 0;
107}
108
109int vgpu_fecs_trace_enable(struct gk20a *g)
110{
111 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
112 struct tegra_vgpu_cmd_msg msg = {
113 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE,
114 .handle = vgpu_get_handle(g),
115 };
116 int err;
117
118 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
119 err = err ? err : msg.ret;
120 WARN_ON(err);
121 vcst->enabled = !err;
122 return err;
123}
124
125int vgpu_fecs_trace_disable(struct gk20a *g)
126{
127 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
128 struct tegra_vgpu_cmd_msg msg = {
129 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE,
130 .handle = vgpu_get_handle(g),
131 };
132 int err;
133
134 vcst->enabled = false;
135 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
136 err = err ? err : msg.ret;
137 WARN_ON(err);
138 return err;
139}
140
141bool vgpu_fecs_trace_is_enabled(struct gk20a *g)
142{
143 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
144
145 return (vcst && vcst->enabled);
146}
147
148int vgpu_fecs_trace_poll(struct gk20a *g)
149{
150 struct tegra_vgpu_cmd_msg msg = {
151 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL,
152 .handle = vgpu_get_handle(g),
153 };
154 int err;
155
156 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
157 err = err ? err : msg.ret;
158 WARN_ON(err);
159 return err;
160}
161
162int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size)
163{
164 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
165
166 *buf = vcst->buf;
167 *size = vgpu_ivm_get_size(vcst->cookie);
168 return 0;
169}
170
171int vgpu_free_user_buffer(struct gk20a *g)
172{
173 return 0;
174}
175
176int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma)
177{
178 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
179 unsigned long size = vgpu_ivm_get_size(vcst->cookie);
180 unsigned long vsize = vma->vm_end - vma->vm_start;
181
182 size = min(size, vsize);
183 size = round_up(size, PAGE_SIZE);
184
185 return remap_pfn_range(vma, vma->vm_start,
186 vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT,
187 size,
188 vma->vm_page_prot);
189}
190
191#ifdef CONFIG_GK20A_CTXSW_TRACE
192int vgpu_fecs_trace_max_entries(struct gk20a *g,
193 struct nvgpu_gpu_ctxsw_trace_filter *filter)
194{
195 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
196
197 return vcst->header->num_ents;
198}
199
200#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE
201#error "FECS trace filter size mismatch!"
202#endif
203
204int vgpu_fecs_trace_set_filter(struct gk20a *g,
205 struct nvgpu_gpu_ctxsw_trace_filter *filter)
206{
207 struct tegra_vgpu_cmd_msg msg = {
208 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER,
209 .handle = vgpu_get_handle(g),
210 };
211 struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter;
212 int err;
213
214 memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits));
215 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
216 err = err ? err : msg.ret;
217 WARN_ON(err);
218 return err;
219}
220
221void vgpu_fecs_trace_data_update(struct gk20a *g)
222{
223 gk20a_ctxsw_trace_wake_up(g, 0);
224}
225#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
deleted file mode 100644
index 0304bcc..0000000
--- a/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
+++ /dev/null
@@ -1,103 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/platform_device.h>
18
19#include <nvgpu/nvhost.h>
20#include <nvgpu/gk20a.h>
21
22#include "vgpu/clk_vgpu.h"
23#include "os/linux/platform_gk20a.h"
24#include "os/linux/os_linux.h"
25#include "os/linux/vgpu/vgpu_linux.h"
26#include "os/linux/vgpu/platform_vgpu_tegra.h"
27
28static int gv11b_vgpu_probe(struct device *dev)
29{
30 struct platform_device *pdev = to_platform_device(dev);
31 struct gk20a_platform *platform = dev_get_drvdata(dev);
32 struct resource *r;
33 void __iomem *regs;
34 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g);
35 struct gk20a *g = platform->g;
36 int ret;
37
38 r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode");
39 if (!r) {
40 nvgpu_err(g, "failed to get usermode regs");
41 return -ENXIO;
42 }
43 regs = devm_ioremap_resource(dev, r);
44 if (IS_ERR(regs)) {
45 nvgpu_err(g, "failed to map usermode regs");
46 return PTR_ERR(regs);
47 }
48 l->usermode_regs = regs;
49
50#ifdef CONFIG_TEGRA_GK20A_NVHOST
51 ret = nvgpu_get_nvhost_dev(g);
52 if (ret) {
53 l->usermode_regs = NULL;
54 return ret;
55 }
56
57 ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev,
58 &g->syncpt_unit_base,
59 &g->syncpt_unit_size);
60 if (ret) {
61 nvgpu_err(g, "Failed to get syncpt interface");
62 return -ENOSYS;
63 }
64 g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
65 nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
66 g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size);
67#endif
68 vgpu_init_clk_support(platform->g);
69
70 return 0;
71}
72
73struct gk20a_platform gv11b_vgpu_tegra_platform = {
74 .has_syncpoints = true,
75
76 /* power management configuration */
77 .can_railgate_init = false,
78 .can_elpg_init = false,
79 .enable_slcg = false,
80 .enable_blcg = false,
81 .enable_elcg = false,
82 .enable_elpg = false,
83 .enable_aelpg = false,
84 .can_slcg = false,
85 .can_blcg = false,
86 .can_elcg = false,
87
88 .ch_wdt_timeout_ms = 5000,
89
90 .probe = gv11b_vgpu_probe,
91
92 .clk_round_rate = vgpu_plat_clk_round_rate,
93 .get_clk_freqs = vgpu_plat_clk_get_freqs,
94
95 /* frequency scaling configuration */
96 .devfreq_governor = "userspace",
97
98 .virtual_dev = true,
99
100 /* power management callbacks */
101 .suspend = vgpu_tegra_suspend,
102 .resume = vgpu_tegra_resume,
103};
diff --git a/include/os/linux/vgpu/platform_vgpu_tegra.c b/include/os/linux/vgpu/platform_vgpu_tegra.c
deleted file mode 100644
index 948323e..0000000
--- a/include/os/linux/vgpu/platform_vgpu_tegra.c
+++ /dev/null
@@ -1,97 +0,0 @@
1/*
2 * Tegra Virtualized GPU Platform Interface
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <nvgpu/nvhost.h>
20#include <nvgpu/gk20a.h>
21
22#include "os/linux/platform_gk20a.h"
23#include "vgpu/clk_vgpu.h"
24#include "vgpu_linux.h"
25
26static int gk20a_tegra_probe(struct device *dev)
27{
28#ifdef CONFIG_TEGRA_GK20A_NVHOST
29 struct gk20a_platform *platform = dev_get_drvdata(dev);
30 int ret;
31
32 ret = nvgpu_get_nvhost_dev(platform->g);
33 if (ret)
34 return ret;
35
36 vgpu_init_clk_support(platform->g);
37 return 0;
38#else
39 return 0;
40#endif
41}
42
43long vgpu_plat_clk_round_rate(struct device *dev, unsigned long rate)
44{
45 /* server will handle frequency rounding */
46 return rate;
47}
48
49int vgpu_plat_clk_get_freqs(struct device *dev, unsigned long **freqs,
50 int *num_freqs)
51{
52 struct gk20a_platform *platform = gk20a_get_platform(dev);
53 struct gk20a *g = platform->g;
54
55 return vgpu_clk_get_freqs(g, freqs, num_freqs);
56}
57
58int vgpu_plat_clk_cap_rate(struct device *dev, unsigned long rate)
59{
60 struct gk20a_platform *platform = gk20a_get_platform(dev);
61 struct gk20a *g = platform->g;
62
63 return vgpu_clk_cap_rate(g, rate);
64}
65
66struct gk20a_platform vgpu_tegra_platform = {
67 .has_syncpoints = true,
68 .aggressive_sync_destroy_thresh = 64,
69
70 /* power management configuration */
71 .can_railgate_init = false,
72 .can_elpg_init = false,
73 .enable_slcg = false,
74 .enable_blcg = false,
75 .enable_elcg = false,
76 .enable_elpg = false,
77 .enable_aelpg = false,
78 .can_slcg = false,
79 .can_blcg = false,
80 .can_elcg = false,
81
82 .ch_wdt_timeout_ms = 5000,
83
84 .probe = gk20a_tegra_probe,
85
86 .clk_round_rate = vgpu_plat_clk_round_rate,
87 .get_clk_freqs = vgpu_plat_clk_get_freqs,
88
89 /* frequency scaling configuration */
90 .devfreq_governor = "userspace",
91
92 .virtual_dev = true,
93
94 /* power management callbacks */
95 .suspend = vgpu_tegra_suspend,
96 .resume = vgpu_tegra_resume,
97};
diff --git a/include/os/linux/vgpu/platform_vgpu_tegra.h b/include/os/linux/vgpu/platform_vgpu_tegra.h
deleted file mode 100644
index fef346d..0000000
--- a/include/os/linux/vgpu/platform_vgpu_tegra.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef _VGPU_PLATFORM_H_
18#define _VGPU_PLATFORM_H_
19
20long vgpu_plat_clk_round_rate(struct device *dev, unsigned long rate);
21int vgpu_plat_clk_get_freqs(struct device *dev, unsigned long **freqs,
22 int *num_freqs);
23int vgpu_plat_clk_cap_rate(struct device *dev, unsigned long rate);
24#endif
diff --git a/include/os/linux/vgpu/sysfs_vgpu.c b/include/os/linux/vgpu/sysfs_vgpu.c
deleted file mode 100644
index ade5d82..0000000
--- a/include/os/linux/vgpu/sysfs_vgpu.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/device.h>
18#include <nvgpu/vgpu/vgpu.h>
19
20#include "os/linux/platform_gk20a.h"
21#include "os/linux/os_linux.h"
22#include "vgpu/ecc_vgpu.h"
23
24static ssize_t vgpu_load_show(struct device *dev,
25 struct device_attribute *attr,
26 char *buf)
27{
28 struct gk20a *g = get_gk20a(dev);
29 struct tegra_vgpu_cmd_msg msg = {0};
30 struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load;
31 int err;
32
33 msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD;
34 msg.handle = vgpu_get_handle(g);
35 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
36 if (err)
37 return err;
38
39 return snprintf(buf, PAGE_SIZE, "%u\n", p->load);
40}
41static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
42
43static ssize_t vgpu_ecc_stat_show(struct device *dev,
44 struct device_attribute *attr,
45 char *buf)
46{
47 struct gk20a *g = get_gk20a(dev);
48 struct tegra_vgpu_cmd_msg msg = {0};
49 struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter;
50 struct dev_ext_attribute *ext_attr = container_of(attr,
51 struct dev_ext_attribute, attr);
52 struct vgpu_ecc_stat *ecc_stat = ext_attr->var;
53 int err;
54
55 p->ecc_id = ecc_stat->ecc_id;
56
57 msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE;
58 msg.handle = vgpu_get_handle(g);
59 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
60 err = err ? err : msg.ret;
61 if (unlikely(err)) {
62 nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err);
63 return err;
64 }
65
66 return snprintf(buf, PAGE_SIZE, "%u\n", p->value);
67}
68
69static int vgpu_create_ecc_sysfs(struct device *dev)
70{
71 struct gk20a *g = get_gk20a(dev);
72 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
73 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
74 struct vgpu_ecc_stat *stats;
75 struct dev_ext_attribute *attrs;
76 int err, i, count;
77
78 err = vgpu_ecc_get_info(g);
79 if (unlikely(err)) {
80 nvgpu_err(g, "ecc: cannot get ECC info: %d", err);
81 return err;
82 }
83
84 stats = priv->ecc_stats;
85 count = priv->ecc_stats_count;
86
87 attrs = nvgpu_kzalloc(g, count * sizeof(*attrs));
88 if (unlikely(!attrs)) {
89 nvgpu_err(g, "ecc: no memory");
90 vgpu_ecc_remove_info(g);
91 return -ENOMEM;
92 }
93
94 for (i = 0; i < count; i++) {
95 sysfs_attr_init(&attrs[i].attr.attr);
96 attrs[i].attr.attr.name = stats[i].name;
97 attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
98 attrs[i].attr.show = vgpu_ecc_stat_show;
99 attrs[i].attr.store = NULL;
100 attrs[i].var = &stats[i];
101
102 err = device_create_file(dev, &attrs[i].attr);
103 if (unlikely(err)) {
104 nvgpu_warn(g, "ecc: cannot create file \"%s\": %d",
105 stats[i].name, err);
106 }
107 }
108
109 l->ecc_attrs = attrs;
110 return 0;
111}
112
113static void vgpu_remove_ecc_sysfs(struct device *dev)
114{
115 struct gk20a *g = get_gk20a(dev);
116 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
117 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
118 int i;
119
120 if (l->ecc_attrs) {
121 for (i = 0; i < priv->ecc_stats_count; i++)
122 device_remove_file(dev, &l->ecc_attrs[i].attr);
123
124 nvgpu_kfree(g, l->ecc_attrs);
125 l->ecc_attrs = NULL;
126 }
127
128 vgpu_ecc_remove_info(g);
129}
130
131void vgpu_create_sysfs(struct device *dev)
132{
133 if (device_create_file(dev, &dev_attr_load))
134 dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
135
136 vgpu_create_ecc_sysfs(dev);
137}
138
139void vgpu_remove_sysfs(struct device *dev)
140{
141 device_remove_file(dev, &dev_attr_load);
142 vgpu_remove_ecc_sysfs(dev);
143}
diff --git a/include/os/linux/vgpu/vgpu_ivc.c b/include/os/linux/vgpu/vgpu_ivc.c
deleted file mode 100644
index 950f0d4..0000000
--- a/include/os/linux/vgpu/vgpu_ivc.c
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/types.h>
18#include <linux/tegra_gr_comm.h>
19
20#include "os/linux/os_linux.h"
21
22int vgpu_ivc_init(struct gk20a *g, u32 elems,
23 const size_t *queue_sizes, u32 queue_start, u32 num_queues)
24{
25 struct platform_device *pdev = to_platform_device(dev_from_gk20a(g));
26
27 return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start,
28 num_queues);
29}
30
31void vgpu_ivc_deinit(u32 queue_start, u32 num_queues)
32{
33 tegra_gr_comm_deinit(queue_start, num_queues);
34}
35
36void vgpu_ivc_release(void *handle)
37{
38 tegra_gr_comm_release(handle);
39}
40
41u32 vgpu_ivc_get_server_vmid(void)
42{
43 return tegra_gr_comm_get_server_vmid();
44}
45
46int vgpu_ivc_recv(u32 index, void **handle, void **data,
47 size_t *size, u32 *sender)
48{
49 return tegra_gr_comm_recv(index, handle, data, size, sender);
50}
51
52int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size)
53{
54 return tegra_gr_comm_send(peer, index, data, size);
55}
56
57int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle,
58 void **data, size_t *size)
59{
60 return tegra_gr_comm_sendrecv(peer, index, handle, data, size);
61}
62
63u32 vgpu_ivc_get_peer_self(void)
64{
65 return TEGRA_GR_COMM_ID_SELF;
66}
67
68void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr,
69 size_t *size)
70{
71 return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size);
72}
73
74void vgpu_ivc_oob_put_ptr(void *handle)
75{
76 tegra_gr_comm_oob_put_ptr(handle);
77}
diff --git a/include/os/linux/vgpu/vgpu_ivm.c b/include/os/linux/vgpu/vgpu_ivm.c
deleted file mode 100644
index bbd444d..0000000
--- a/include/os/linux/vgpu/vgpu_ivm.c
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/vgpu/vgpu_ivm.h>
18
19#include <linux/tegra-ivc.h>
20
21#include "os/linux/os_linux.h"
22
23struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id)
24{
25 return tegra_hv_mempool_reserve(id);
26}
27
28int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie)
29{
30 return tegra_hv_mempool_unreserve(cookie);
31}
32
33u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie)
34{
35 return cookie->ipa;
36}
37
38u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie)
39{
40 return cookie->size;
41}
42
43void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie)
44{
45 return ioremap_cache(vgpu_ivm_get_ipa(cookie),
46 vgpu_ivm_get_size(cookie));
47}
48
49void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie,
50 void *addr)
51{
52 iounmap(addr);
53}
diff --git a/include/os/linux/vgpu/vgpu_linux.c b/include/os/linux/vgpu/vgpu_linux.c
deleted file mode 100644
index 80bcfff..0000000
--- a/include/os/linux/vgpu/vgpu_linux.c
+++ /dev/null
@@ -1,525 +0,0 @@
1/*
2 * Virtualized GPU for Linux
3 *
4 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/mm.h>
20#include <linux/slab.h>
21#include <linux/dma-mapping.h>
22#include <linux/pm_runtime.h>
23#include <linux/pm_qos.h>
24#include <linux/platform_device.h>
25#include <soc/tegra/chip-id.h>
26
27#include <nvgpu/kmem.h>
28#include <nvgpu/bug.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/debug.h>
31#include <nvgpu/soc.h>
32#include <nvgpu/ctxsw_trace.h>
33#include <nvgpu/defaults.h>
34#include <nvgpu/ltc.h>
35#include <nvgpu/channel.h>
36#include <nvgpu/clk_arb.h>
37
38#include "vgpu_linux.h"
39#include "vgpu/fecs_trace_vgpu.h"
40#include "vgpu/clk_vgpu.h"
41#include "gk20a/regops_gk20a.h"
42#include "gm20b/hal_gm20b.h"
43
44#include "os/linux/module.h"
45#include "os/linux/os_linux.h"
46#include "os/linux/ioctl.h"
47#include "os/linux/scale.h"
48#include "os/linux/driver_common.h"
49#include "os/linux/platform_gk20a.h"
50#include "os/linux/vgpu/platform_vgpu_tegra.h"
51
52struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g)
53{
54 struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g));
55
56 return (struct vgpu_priv_data *)plat->vgpu_priv;
57}
58
59static void vgpu_remove_support(struct gk20a *g)
60{
61 vgpu_remove_support_common(g);
62}
63
64static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
65{
66 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
67 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
68
69 nvgpu_mutex_init(&g->power_lock);
70 nvgpu_mutex_init(&g->ctxsw_disable_lock);
71 nvgpu_mutex_init(&g->clk_arb_enable_lock);
72 nvgpu_mutex_init(&g->cg_pg_lock);
73
74 nvgpu_mutex_init(&priv->vgpu_clk_get_freq_lock);
75
76 nvgpu_mutex_init(&l->ctrl.privs_lock);
77 nvgpu_init_list_node(&l->ctrl.privs);
78
79 l->regs_saved = l->regs;
80 l->bar1_saved = l->bar1;
81
82 nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
83
84 g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
85 g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
86 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints);
87 g->ptimer_src_freq = platform->ptimer_src_freq;
88 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
89 g->railgate_delay = platform->railgate_delay_init;
90
91 __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
92 platform->unify_address_spaces);
93}
94
95static int vgpu_init_support(struct platform_device *pdev)
96{
97 struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
98 struct gk20a *g = get_gk20a(&pdev->dev);
99 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
100 void __iomem *regs;
101 int err = 0;
102
103 if (!r) {
104 nvgpu_err(g, "failed to get gk20a bar1");
105 err = -ENXIO;
106 goto fail;
107 }
108
109 if (r->name && !strcmp(r->name, "/vgpu")) {
110 regs = devm_ioremap_resource(&pdev->dev, r);
111 if (IS_ERR(regs)) {
112 nvgpu_err(g, "failed to remap gk20a bar1");
113 err = PTR_ERR(regs);
114 goto fail;
115 }
116 l->bar1 = regs;
117 l->bar1_mem = r;
118 }
119
120 nvgpu_mutex_init(&g->dbg_sessions_lock);
121 nvgpu_mutex_init(&g->client_lock);
122
123 nvgpu_init_list_node(&g->profiler_objects);
124
125 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
126 if (!g->dbg_regops_tmp_buf) {
127 nvgpu_err(g, "couldn't allocate regops tmp buf");
128 return -ENOMEM;
129 }
130 g->dbg_regops_tmp_buf_ops =
131 SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
132
133 g->remove_support = vgpu_remove_support;
134 return 0;
135
136 fail:
137 vgpu_remove_support(g);
138 return err;
139}
140
141int vgpu_pm_prepare_poweroff(struct device *dev)
142{
143 struct gk20a *g = get_gk20a(dev);
144 int ret = 0;
145
146 nvgpu_log_fn(g, " ");
147
148 nvgpu_mutex_acquire(&g->power_lock);
149
150 if (!g->power_on)
151 goto done;
152
153 if (g->ops.fifo.channel_suspend)
154 ret = g->ops.fifo.channel_suspend(g);
155 if (ret)
156 goto done;
157
158 g->power_on = false;
159 done:
160 nvgpu_mutex_release(&g->power_lock);
161
162 return ret;
163}
164
165int vgpu_pm_finalize_poweron(struct device *dev)
166{
167 struct gk20a *g = get_gk20a(dev);
168 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
169 int err = 0;
170
171 nvgpu_log_fn(g, " ");
172
173 nvgpu_mutex_acquire(&g->power_lock);
174
175 if (g->power_on)
176 goto done;
177
178 g->power_on = true;
179
180 vgpu_detect_chip(g);
181 err = vgpu_init_hal(g);
182 if (err)
183 goto done;
184
185 if (g->ops.ltc.init_fs_state)
186 g->ops.ltc.init_fs_state(g);
187
188 err = nvgpu_init_ltc_support(g);
189 if (err) {
190 nvgpu_err(g, "failed to init ltc");
191 goto done;
192 }
193
194 err = vgpu_init_mm_support(g);
195 if (err) {
196 nvgpu_err(g, "failed to init gk20a mm");
197 goto done;
198 }
199
200 err = vgpu_init_fifo_support(g);
201 if (err) {
202 nvgpu_err(g, "failed to init gk20a fifo");
203 goto done;
204 }
205
206 err = vgpu_init_gr_support(g);
207 if (err) {
208 nvgpu_err(g, "failed to init gk20a gr");
209 goto done;
210 }
211
212 err = nvgpu_clk_arb_init_arbiter(g);
213 if (err) {
214 nvgpu_err(g, "failed to init clk arb");
215 goto done;
216 }
217
218 err = g->ops.chip_init_gpu_characteristics(g);
219 if (err) {
220 nvgpu_err(g, "failed to init gk20a gpu characteristics");
221 goto done;
222 }
223
224 err = nvgpu_finalize_poweron_linux(l);
225 if (err)
226 goto done;
227
228#ifdef CONFIG_GK20A_CTXSW_TRACE
229 gk20a_ctxsw_trace_init(g);
230#endif
231 gk20a_sched_ctrl_init(g);
232 gk20a_channel_resume(g);
233
234 g->sw_ready = true;
235
236done:
237 if (err)
238 g->power_on = false;
239
240 nvgpu_mutex_release(&g->power_lock);
241 return err;
242}
243
244static int vgpu_qos_notify(struct notifier_block *nb,
245 unsigned long n, void *data)
246{
247 struct gk20a_scale_profile *profile =
248 container_of(nb, struct gk20a_scale_profile,
249 qos_notify_block);
250 struct gk20a *g = get_gk20a(profile->dev);
251 u32 max_freq;
252 int err;
253
254 nvgpu_log_fn(g, " ");
255
256 max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
257 err = vgpu_plat_clk_cap_rate(profile->dev, max_freq);
258 if (err)
259 nvgpu_err(g, "%s failed, err=%d", __func__, err);
260
261 return NOTIFY_OK; /* need notify call further */
262}
263
264static int vgpu_pm_qos_init(struct device *dev)
265{
266 struct gk20a *g = get_gk20a(dev);
267 struct gk20a_scale_profile *profile = g->scale_profile;
268
269 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) {
270 if (!profile)
271 return -EINVAL;
272 } else {
273 profile = nvgpu_kzalloc(g, sizeof(*profile));
274 if (!profile)
275 return -ENOMEM;
276 g->scale_profile = profile;
277 }
278
279 profile->dev = dev;
280 profile->qos_notify_block.notifier_call = vgpu_qos_notify;
281 pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
282 &profile->qos_notify_block);
283 return 0;
284}
285
286static void vgpu_pm_qos_remove(struct device *dev)
287{
288 struct gk20a *g = get_gk20a(dev);
289
290 pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
291 &g->scale_profile->qos_notify_block);
292 nvgpu_kfree(g, g->scale_profile);
293 g->scale_profile = NULL;
294}
295
296static int vgpu_pm_init(struct device *dev)
297{
298 struct gk20a *g = get_gk20a(dev);
299 struct gk20a_platform *platform = gk20a_get_platform(dev);
300 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
301 unsigned long *freqs;
302 int num_freqs;
303 int err = 0;
304
305 nvgpu_log_fn(g, " ");
306
307 if (nvgpu_platform_is_simulation(g))
308 return 0;
309
310 __pm_runtime_disable(dev, false);
311
312 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
313 gk20a_scale_init(dev);
314
315 if (l->devfreq) {
316 /* set min/max frequency based on frequency table */
317 err = platform->get_clk_freqs(dev, &freqs, &num_freqs);
318 if (err)
319 return err;
320
321 if (num_freqs < 1)
322 return -EINVAL;
323
324 l->devfreq->min_freq = freqs[0];
325 l->devfreq->max_freq = freqs[num_freqs - 1];
326 }
327
328 err = vgpu_pm_qos_init(dev);
329 if (err)
330 return err;
331
332 return err;
333}
334
335int vgpu_probe(struct platform_device *pdev)
336{
337 struct nvgpu_os_linux *l;
338 struct gk20a *gk20a;
339 int err;
340 struct device *dev = &pdev->dev;
341 struct gk20a_platform *platform = gk20a_get_platform(dev);
342 struct vgpu_priv_data *priv;
343
344 if (!platform) {
345 dev_err(dev, "no platform data\n");
346 return -ENODATA;
347 }
348
349 l = kzalloc(sizeof(*l), GFP_KERNEL);
350 if (!l) {
351 dev_err(dev, "couldn't allocate gk20a support");
352 return -ENOMEM;
353 }
354 gk20a = &l->g;
355
356 nvgpu_log_fn(gk20a, " ");
357
358 nvgpu_init_gk20a(gk20a);
359
360 nvgpu_kmem_init(gk20a);
361
362 err = nvgpu_init_enabled_flags(gk20a);
363 if (err) {
364 kfree(gk20a);
365 return err;
366 }
367
368 l->dev = dev;
369 if (tegra_platform_is_vdk())
370 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
371
372 gk20a->is_virtual = true;
373
374 priv = nvgpu_kzalloc(gk20a, sizeof(*priv));
375 if (!priv) {
376 kfree(gk20a);
377 return -ENOMEM;
378 }
379
380 platform->g = gk20a;
381 platform->vgpu_priv = priv;
382
383 err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
384 if (err)
385 return err;
386
387 vgpu_init_support(pdev);
388
389 vgpu_init_vars(gk20a, platform);
390
391 init_rwsem(&l->busy_lock);
392
393 nvgpu_spinlock_init(&gk20a->mc_enable_lock);
394
395 gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
396
397 /* Initialize the platform interface. */
398 err = platform->probe(dev);
399 if (err) {
400 if (err == -EPROBE_DEFER)
401 nvgpu_info(gk20a, "platform probe failed");
402 else
403 nvgpu_err(gk20a, "platform probe failed");
404 return err;
405 }
406
407 if (platform->late_probe) {
408 err = platform->late_probe(dev);
409 if (err) {
410 nvgpu_err(gk20a, "late probe failed");
411 return err;
412 }
413 }
414
415 err = vgpu_comm_init(gk20a);
416 if (err) {
417 nvgpu_err(gk20a, "failed to init comm interface");
418 return -ENOSYS;
419 }
420
421 priv->virt_handle = vgpu_connect();
422 if (!priv->virt_handle) {
423 nvgpu_err(gk20a, "failed to connect to server node");
424 vgpu_comm_deinit();
425 return -ENOSYS;
426 }
427
428 err = vgpu_get_constants(gk20a);
429 if (err) {
430 vgpu_comm_deinit();
431 return err;
432 }
433
434 err = vgpu_pm_init(dev);
435 if (err) {
436 nvgpu_err(gk20a, "pm init failed");
437 return err;
438 }
439
440 err = nvgpu_thread_create(&priv->intr_handler, gk20a,
441 vgpu_intr_thread, "gk20a");
442 if (err)
443 return err;
444
445 gk20a_debug_init(gk20a, "gpu.0");
446
447 /* Set DMA parameters to allow larger sgt lists */
448 dev->dma_parms = &l->dma_parms;
449 dma_set_max_seg_size(dev, UINT_MAX);
450
451 gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
452 gk20a->timeouts_disabled_by_user = false;
453 nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0);
454
455 vgpu_create_sysfs(dev);
456 gk20a_init_gr(gk20a);
457
458 nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages);
459 gk20a->gr.max_comptag_mem = totalram_size_in_mb;
460
461 nvgpu_ref_init(&gk20a->refcount);
462
463 return 0;
464}
465
466int vgpu_remove(struct platform_device *pdev)
467{
468 struct device *dev = &pdev->dev;
469 struct gk20a *g = get_gk20a(dev);
470
471 nvgpu_log_fn(g, " ");
472
473 vgpu_pm_qos_remove(dev);
474 if (g->remove_support)
475 g->remove_support(g);
476
477 vgpu_comm_deinit();
478 gk20a_sched_ctrl_cleanup(g);
479 gk20a_user_deinit(dev, &nvgpu_class);
480 vgpu_remove_sysfs(dev);
481 gk20a_get_platform(dev)->g = NULL;
482 gk20a_put(g);
483
484 return 0;
485}
486
487bool vgpu_is_reduced_bar1(struct gk20a *g)
488{
489 struct fifo_gk20a *f = &g->fifo;
490 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
491
492 return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size;
493}
494
495int vgpu_tegra_suspend(struct device *dev)
496{
497 struct tegra_vgpu_cmd_msg msg = {};
498 struct gk20a *g = get_gk20a(dev);
499 int err = 0;
500
501 msg.cmd = TEGRA_VGPU_CMD_SUSPEND;
502 msg.handle = vgpu_get_handle(g);
503 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
504 err = err ? err : msg.ret;
505 if (err)
506 nvgpu_err(g, "vGPU suspend failed\n");
507
508 return err;
509}
510
511int vgpu_tegra_resume(struct device *dev)
512{
513 struct tegra_vgpu_cmd_msg msg = {};
514 struct gk20a *g = get_gk20a(dev);
515 int err = 0;
516
517 msg.cmd = TEGRA_VGPU_CMD_RESUME;
518 msg.handle = vgpu_get_handle(g);
519 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
520 err = err ? err : msg.ret;
521 if (err)
522 nvgpu_err(g, "vGPU resume failed\n");
523
524 return err;
525}
diff --git a/include/os/linux/vgpu/vgpu_linux.h b/include/os/linux/vgpu/vgpu_linux.h
deleted file mode 100644
index ff7d3a6..0000000
--- a/include/os/linux/vgpu/vgpu_linux.h
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * Virtualized GPU Linux Interfaces
3 *
4 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef __VGPU_LINUX_H__
20#define __VGPU_LINUX_H__
21
22struct device;
23struct platform_device;
24
25#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
26
27#include <nvgpu/vgpu/vgpu.h>
28
29int vgpu_pm_prepare_poweroff(struct device *dev);
30int vgpu_pm_finalize_poweron(struct device *dev);
31int vgpu_probe(struct platform_device *dev);
32int vgpu_remove(struct platform_device *dev);
33
34void vgpu_create_sysfs(struct device *dev);
35void vgpu_remove_sysfs(struct device *dev);
36
37int vgpu_tegra_suspend(struct device *dev);
38int vgpu_tegra_resume(struct device *dev);
39#else
40/* define placeholders for functions used outside of vgpu */
41
42static inline int vgpu_pm_prepare_poweroff(struct device *dev)
43{
44 return -ENOSYS;
45}
46static inline int vgpu_pm_finalize_poweron(struct device *dev)
47{
48 return -ENOSYS;
49}
50static inline int vgpu_probe(struct platform_device *dev)
51{
52 return -ENOSYS;
53}
54static inline int vgpu_remove(struct platform_device *dev)
55{
56 return -ENOSYS;
57}
58static inline int vgpu_tegra_suspend(struct device *dev)
59{
60 return -ENOSYS;
61}
62static inline int vgpu_tegra_resume(struct device *dev)
63{
64 return -ENOSYS;
65}
66#endif
67
68#endif
diff --git a/include/os/linux/vm.c b/include/os/linux/vm.c
deleted file mode 100644
index 8956cce..0000000
--- a/include/os/linux/vm.c
+++ /dev/null
@@ -1,358 +0,0 @@
1/*
2 * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18#include <linux/scatterlist.h>
19#include <uapi/linux/nvgpu.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/rbtree.h>
24#include <nvgpu/vm_area.h>
25#include <nvgpu/nvgpu_mem.h>
26#include <nvgpu/page_allocator.h>
27#include <nvgpu/vidmem.h>
28#include <nvgpu/utils.h>
29#include <nvgpu/gk20a.h>
30
31#include <nvgpu/linux/vm.h>
32#include <nvgpu/linux/nvgpu_mem.h>
33
34#include "gk20a/mm_gk20a.h"
35
36#include "platform_gk20a.h"
37#include "os_linux.h"
38#include "dmabuf.h"
39#include "dmabuf_vidmem.h"
40
41static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags)
42{
43 u32 core_flags = 0;
44
45 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
46 core_flags |= NVGPU_VM_MAP_FIXED_OFFSET;
47 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE)
48 core_flags |= NVGPU_VM_MAP_CACHEABLE;
49 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT)
50 core_flags |= NVGPU_VM_MAP_IO_COHERENT;
51 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE)
52 core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE;
53 if (!nvgpu_is_enabled(g, NVGPU_DISABLE_L3_SUPPORT)) {
54 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC)
55 core_flags |= NVGPU_VM_MAP_L3_ALLOC;
56 }
57 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)
58 core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL;
59 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC)
60 core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC;
61
62 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS)
63 nvgpu_warn(g, "Ignoring deprecated flag: "
64 "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS");
65
66 return core_flags;
67}
68
69static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
70 struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
71{
72 struct nvgpu_rbtree_node *node = NULL;
73 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
74
75 nvgpu_rbtree_enum_start(0, &node, root);
76
77 while (node) {
78 struct nvgpu_mapped_buf *mapped_buffer =
79 mapped_buffer_from_rbtree_node(node);
80
81 if (mapped_buffer->os_priv.dmabuf == dmabuf &&
82 mapped_buffer->kind == kind)
83 return mapped_buffer;
84
85 nvgpu_rbtree_enum_next(&node, node);
86 }
87
88 return NULL;
89}
90
91int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
92 struct dma_buf **dmabuf,
93 u64 *offset)
94{
95 struct nvgpu_mapped_buf *mapped_buffer;
96 struct gk20a *g = gk20a_from_vm(vm);
97
98 nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va);
99
100 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
101
102 mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
103 if (!mapped_buffer) {
104 nvgpu_mutex_release(&vm->update_gmmu_lock);
105 return -EINVAL;
106 }
107
108 *dmabuf = mapped_buffer->os_priv.dmabuf;
109 *offset = gpu_va - mapped_buffer->addr;
110
111 nvgpu_mutex_release(&vm->update_gmmu_lock);
112
113 return 0;
114}
115
116u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
117{
118 return os_buf->dmabuf->size;
119}
120
121/*
122 * vm->update_gmmu_lock must be held. This checks to see if we already have
123 * mapped the passed buffer into this VM. If so, just return the existing
124 * mapping address.
125 */
126struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
127 struct nvgpu_os_buffer *os_buf,
128 u64 map_addr,
129 u32 flags,
130 int kind)
131{
132 struct gk20a *g = gk20a_from_vm(vm);
133 struct nvgpu_mapped_buf *mapped_buffer = NULL;
134
135 if (flags & NVGPU_VM_MAP_FIXED_OFFSET) {
136 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
137 if (!mapped_buffer)
138 return NULL;
139
140 if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
141 mapped_buffer->kind != (u32)kind)
142 return NULL;
143 } else {
144 mapped_buffer =
145 __nvgpu_vm_find_mapped_buf_reverse(vm,
146 os_buf->dmabuf,
147 kind);
148 if (!mapped_buffer)
149 return NULL;
150 }
151
152 if (mapped_buffer->flags != flags)
153 return NULL;
154
155 /*
156 * If we find the mapping here then that means we have mapped it already
157 * and the prior pin and get must be undone.
158 */
159 gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment,
160 mapped_buffer->os_priv.sgt);
161 dma_buf_put(os_buf->dmabuf);
162
163 nvgpu_log(g, gpu_dbg_map,
164 "gv: 0x%04x_%08x + 0x%-7zu "
165 "[dma: 0x%010llx, pa: 0x%010llx] "
166 "pgsz=%-3dKb as=%-2d "
167 "flags=0x%x apt=%s (reused)",
168 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
169 os_buf->dmabuf->size,
170 (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
171 (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
172 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
173 vm_aspace_id(vm),
174 mapped_buffer->flags,
175 nvgpu_aperture_str(g,
176 gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
177
178 return mapped_buffer;
179}
180
181int nvgpu_vm_map_linux(struct vm_gk20a *vm,
182 struct dma_buf *dmabuf,
183 u64 map_addr,
184 u32 flags,
185 u32 page_size,
186 s16 compr_kind,
187 s16 incompr_kind,
188 int rw_flag,
189 u64 buffer_offset,
190 u64 mapping_size,
191 struct vm_gk20a_mapping_batch *batch,
192 u64 *gpu_va)
193{
194 struct gk20a *g = gk20a_from_vm(vm);
195 struct device *dev = dev_from_gk20a(g);
196 struct nvgpu_os_buffer os_buf;
197 struct sg_table *sgt;
198 struct nvgpu_sgt *nvgpu_sgt = NULL;
199 struct nvgpu_mapped_buf *mapped_buffer = NULL;
200 struct dma_buf_attachment *attachment;
201 int err = 0;
202
203 sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
204 if (IS_ERR(sgt)) {
205 nvgpu_warn(g, "Failed to pin dma_buf!");
206 return PTR_ERR(sgt);
207 }
208 os_buf.dmabuf = dmabuf;
209 os_buf.attachment = attachment;
210 os_buf.dev = dev;
211
212 if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
213 err = -EINVAL;
214 goto clean_up;
215 }
216
217 nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
218 if (!nvgpu_sgt) {
219 err = -ENOMEM;
220 goto clean_up;
221 }
222
223 mapped_buffer = nvgpu_vm_map(vm,
224 &os_buf,
225 nvgpu_sgt,
226 map_addr,
227 mapping_size,
228 buffer_offset,
229 rw_flag,
230 flags,
231 compr_kind,
232 incompr_kind,
233 batch,
234 gk20a_dmabuf_aperture(g, dmabuf));
235
236 nvgpu_sgt_free(g, nvgpu_sgt);
237
238 if (IS_ERR(mapped_buffer)) {
239 err = PTR_ERR(mapped_buffer);
240 goto clean_up;
241 }
242
243 mapped_buffer->os_priv.dmabuf = dmabuf;
244 mapped_buffer->os_priv.attachment = attachment;
245 mapped_buffer->os_priv.sgt = sgt;
246
247 *gpu_va = mapped_buffer->addr;
248 return 0;
249
250clean_up:
251 gk20a_mm_unpin(dev, dmabuf, attachment, sgt);
252
253 return err;
254}
255
256int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
257 int dmabuf_fd,
258 u64 *map_addr,
259 u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
260 u32 page_size,
261 s16 compr_kind,
262 s16 incompr_kind,
263 u64 buffer_offset,
264 u64 mapping_size,
265 struct vm_gk20a_mapping_batch *batch)
266{
267 struct gk20a *g = gk20a_from_vm(vm);
268 struct dma_buf *dmabuf;
269 u64 ret_va;
270 int err = 0;
271
272 /* get ref to the mem handle (released on unmap_locked) */
273 dmabuf = dma_buf_get(dmabuf_fd);
274 if (IS_ERR(dmabuf)) {
275 nvgpu_warn(g, "%s: fd %d is not a dmabuf",
276 __func__, dmabuf_fd);
277 return PTR_ERR(dmabuf);
278 }
279
280 /*
281 * For regular maps we do not accept either an input address or a
282 * buffer_offset.
283 */
284 if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) &&
285 (buffer_offset || *map_addr)) {
286 nvgpu_err(g,
287 "Regular map with addr/buf offset is not supported!");
288 dma_buf_put(dmabuf);
289 return -EINVAL;
290 }
291
292 /*
293 * Map size is always buffer size for non fixed mappings. As such map
294 * size should be left as zero by userspace for non-fixed maps.
295 */
296 if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
297 nvgpu_err(g, "map_size && non-fixed-mapping!");
298 dma_buf_put(dmabuf);
299 return -EINVAL;
300 }
301
302 /* verify that we're not overflowing the buffer, i.e.
303 * (buffer_offset + mapping_size) > dmabuf->size.
304 *
305 * Since buffer_offset + mapping_size could overflow, first check
306 * that mapping size < dmabuf_size, at which point we can subtract
307 * mapping_size from both sides for the final comparison.
308 */
309 if ((mapping_size > dmabuf->size) ||
310 (buffer_offset > (dmabuf->size - mapping_size))) {
311 nvgpu_err(g,
312 "buf size %llx < (offset(%llx) + map_size(%llx))",
313 (u64)dmabuf->size, buffer_offset, mapping_size);
314 dma_buf_put(dmabuf);
315 return -EINVAL;
316 }
317
318 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
319 if (err) {
320 dma_buf_put(dmabuf);
321 return err;
322 }
323
324 err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr,
325 nvgpu_vm_translate_linux_flags(g, flags),
326 page_size,
327 compr_kind, incompr_kind,
328 gk20a_mem_flag_none,
329 buffer_offset,
330 mapping_size,
331 batch,
332 &ret_va);
333
334 if (!err)
335 *map_addr = ret_va;
336 else
337 dma_buf_put(dmabuf);
338
339 return err;
340}
341
342/*
343 * This is the function call-back for freeing OS specific components of an
344 * nvgpu_mapped_buf. This should most likely never be called outside of the
345 * core MM framework!
346 *
347 * Note: the VM lock will be held.
348 */
349void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
350{
351 struct vm_gk20a *vm = mapped_buffer->vm;
352
353 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
354 mapped_buffer->os_priv.attachment,
355 mapped_buffer->os_priv.sgt);
356
357 dma_buf_put(mapped_buffer->os_priv.dmabuf);
358}
diff --git a/include/os/linux/vpr.c b/include/os/linux/vpr.c
deleted file mode 100644
index 3a98125..0000000
--- a/include/os/linux/vpr.c
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/vpr.h>
15
16#include <linux/init.h>
17#include <linux/platform/tegra/common.h>
18
19bool nvgpu_is_vpr_resize_enabled(void)
20{
21 return tegra_is_vpr_resize_supported();
22}