summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2018-04-18 15:59:00 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-15 20:47:31 -0400
commit2a2c16af5f9f1ccfc93a13e820d5381e5c881e92 (patch)
tree2e5d7b042270a649978e5bb540857012c85fb5b5 /drivers/gpu/nvgpu/common
parent98d996f4ffb0137d119b5849cae46d7b7e5693e1 (diff)
gpu: nvgpu: Move Linux files away from common
Move all Linux source code files to drivers/gpu/nvgpu/os/linux from drivers/gpu/nvgpu/common/linux. This changes the meaning of common to be OS independent. JIRA NVGPU-598 JIRA NVGPU-601 Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1747714 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c1786
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.h326
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde_gm20b.c64
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde_gm20b.h32
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde_gp10b.c161
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde_gp10b.h32
-rw-r--r--drivers/gpu/nvgpu/common/linux/ce2.c155
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c1021
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.h96
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk.c165
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/comptags.c140
-rw-r--r--drivers/gpu/nvgpu/common/linux/cond.c73
-rw-r--r--drivers/gpu/nvgpu/common/linux/ctxsw_trace.c730
-rw-r--r--drivers/gpu/nvgpu/common/linux/ctxsw_trace.h39
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug.c452
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_allocator.c69
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_allocator.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.c53
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_ce.c30
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_ce.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_clk.c271
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.c378
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_gr.c31
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_gr.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_hal.c95
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_hal.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.c312
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.c481
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_sched.c80
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_sched.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_xve.c176
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_xve.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/dma.c694
-rw-r--r--drivers/gpu/nvgpu/common/linux/dmabuf.c218
-rw-r--r--drivers/gpu/nvgpu/common/linux/dmabuf.h62
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.c334
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/dt.c29
-rw-r--r--drivers/gpu/nvgpu/common/linux/firmware.c117
-rw-r--r--drivers/gpu/nvgpu/common/linux/fuse.c55
-rw-r--r--drivers/gpu/nvgpu/common/linux/intr.c122
-rw-r--r--drivers/gpu/nvgpu/common/linux/intr.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/io.c118
-rw-r--r--drivers/gpu/nvgpu/common/linux/io_usermode.c29
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl.c296
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_as.c423
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_as.h30
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c1388
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.h50
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c562
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c1962
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_dbg.c2003
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_dbg.h54
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_tsg.c677
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_tsg.h28
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c654
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem_priv.h105
-rw-r--r--drivers/gpu/nvgpu/common/linux/log.c132
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c1365
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.h32
-rw-r--r--drivers/gpu/nvgpu/common/linux/module_usermode.c62
-rw-r--r--drivers/gpu/nvgpu/common/linux/module_usermode.h27
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c613
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvhost.c294
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvhost_priv.h24
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvlink.c106
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android.c79
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c111
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c121
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_linux.h166
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_sched.c26
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.c861
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.h27
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci_usermode.c24
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci_usermode.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c269
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h37
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gk20a.h317
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c957
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c607
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c588
-rw-r--r--drivers/gpu/nvgpu/common/linux/rwsem.c39
-rw-r--r--drivers/gpu/nvgpu/common/linux/scale.c428
-rw-r--r--drivers/gpu/nvgpu/common/linux/scale.h66
-rw-r--r--drivers/gpu/nvgpu/common/linux/sched.c676
-rw-r--r--drivers/gpu/nvgpu/common/linux/sched.h55
-rw-r--r--drivers/gpu/nvgpu/common/linux/sim.c95
-rw-r--r--drivers/gpu/nvgpu/common/linux/sim_pci.c91
-rw-r--r--drivers/gpu/nvgpu/common/linux/soc.c122
-rw-r--r--drivers/gpu/nvgpu/common/linux/sync_sema_android.c419
-rw-r--r--drivers/gpu/nvgpu/common/linux/sync_sema_android.h51
-rw-r--r--drivers/gpu/nvgpu/common/linux/sysfs.c1205
-rw-r--r--drivers/gpu/nvgpu/common/linux/sysfs.h24
-rw-r--r--drivers/gpu/nvgpu/common/linux/thread.c63
-rw-r--r--drivers/gpu/nvgpu/common/linux/timers.c270
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c168
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h27
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c224
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c97
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c69
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c50
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c77
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c53
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c475
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h57
-rw-r--r--drivers/gpu/nvgpu/common/linux/vidmem.c262
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c332
116 files changed, 0 insertions, 29993 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
deleted file mode 100644
index 32b333f1..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ /dev/null
@@ -1,1786 +0,0 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <trace/events/gk20a.h>
25
26#include <nvgpu/dma.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/kmem.h>
31#include <nvgpu/log.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/firmware.h>
34#include <nvgpu/os_sched.h>
35
36#include <nvgpu/linux/vm.h>
37
38#include "gk20a/gk20a.h"
39#include "gk20a/channel_gk20a.h"
40#include "gk20a/mm_gk20a.h"
41#include "gk20a/fence_gk20a.h"
42#include "gk20a/gr_gk20a.h"
43
44#include "cde.h"
45#include "os_linux.h"
46#include "dmabuf.h"
47#include "channel.h"
48#include "cde_gm20b.h"
49#include "cde_gp10b.h"
50
51#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
52#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
53
54static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
55static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
56
57#define CTX_DELETE_TIME 1000
58
59#define MAX_CTX_USE_COUNT 42
60#define MAX_CTX_RETRY_TIME 2000
61
62static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
63{
64 struct nvgpu_mapped_buf *buffer;
65 dma_addr_t addr = 0;
66 struct gk20a *g = gk20a_from_vm(vm);
67
68 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
69 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
70 if (buffer)
71 addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
72 nvgpu_mutex_release(&vm->update_gmmu_lock);
73
74 return addr;
75}
76
77static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
78{
79 unsigned int i;
80
81 for (i = 0; i < cde_ctx->num_bufs; i++) {
82 struct nvgpu_mem *mem = cde_ctx->mem + i;
83 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
84 }
85
86 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
87
88 cde_ctx->convert_cmd = NULL;
89 cde_ctx->init_convert_cmd = NULL;
90 cde_ctx->num_bufs = 0;
91 cde_ctx->num_params = 0;
92 cde_ctx->init_cmd_num_entries = 0;
93 cde_ctx->convert_cmd_num_entries = 0;
94 cde_ctx->init_cmd_executed = false;
95}
96
97static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
98__must_hold(&cde_app->mutex)
99{
100 struct nvgpu_os_linux *l = cde_ctx->l;
101 struct gk20a *g = &l->g;
102 struct channel_gk20a *ch = cde_ctx->ch;
103 struct vm_gk20a *vm = ch->vm;
104
105 trace_gk20a_cde_remove_ctx(cde_ctx);
106
107 /* release mapped memory */
108 gk20a_deinit_cde_img(cde_ctx);
109 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
110 cde_ctx->backing_store_vaddr);
111
112 /*
113 * free the channel
114 * gk20a_channel_close() will also unbind the channel from TSG
115 */
116 gk20a_channel_close(ch);
117 nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release);
118
119 /* housekeeping on app */
120 nvgpu_list_del(&cde_ctx->list);
121 l->cde_app.ctx_count--;
122 nvgpu_kfree(g, cde_ctx);
123}
124
125static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
126 bool wait_finish)
127__releases(&cde_app->mutex)
128__acquires(&cde_app->mutex)
129{
130 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
131
132 /* permanent contexts do not have deleter works */
133 if (!cde_ctx->is_temporary)
134 return;
135
136 if (wait_finish) {
137 nvgpu_mutex_release(&cde_app->mutex);
138 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
139 nvgpu_mutex_acquire(&cde_app->mutex);
140 } else {
141 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
142 }
143}
144
145static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
146__must_hold(&l->cde_app->mutex)
147{
148 struct gk20a_cde_app *cde_app = &l->cde_app;
149 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
150
151 /* safe to go off the mutex in cancel_deleter since app is
152 * deinitialised; no new jobs are started. deleter works may be only at
153 * waiting for the mutex or before, going to abort */
154
155 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
156 &cde_app->free_contexts, gk20a_cde_ctx, list) {
157 gk20a_cde_cancel_deleter(cde_ctx, true);
158 gk20a_cde_remove_ctx(cde_ctx);
159 }
160
161 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
162 &cde_app->used_contexts, gk20a_cde_ctx, list) {
163 gk20a_cde_cancel_deleter(cde_ctx, true);
164 gk20a_cde_remove_ctx(cde_ctx);
165 }
166}
167
168static void gk20a_cde_stop(struct nvgpu_os_linux *l)
169__must_hold(&l->cde_app->mutex)
170{
171 struct gk20a_cde_app *cde_app = &l->cde_app;
172
173 /* prevent further conversions and delayed works from working */
174 cde_app->initialised = false;
175 /* free all data, empty the list */
176 gk20a_cde_remove_contexts(l);
177}
178
179void gk20a_cde_destroy(struct nvgpu_os_linux *l)
180__acquires(&l->cde_app->mutex)
181__releases(&l->cde_app->mutex)
182{
183 struct gk20a_cde_app *cde_app = &l->cde_app;
184
185 if (!cde_app->initialised)
186 return;
187
188 nvgpu_mutex_acquire(&cde_app->mutex);
189 gk20a_cde_stop(l);
190 nvgpu_mutex_release(&cde_app->mutex);
191
192 nvgpu_mutex_destroy(&cde_app->mutex);
193}
194
195void gk20a_cde_suspend(struct nvgpu_os_linux *l)
196__acquires(&l->cde_app->mutex)
197__releases(&l->cde_app->mutex)
198{
199 struct gk20a_cde_app *cde_app = &l->cde_app;
200 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
201
202 if (!cde_app->initialised)
203 return;
204
205 nvgpu_mutex_acquire(&cde_app->mutex);
206
207 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
208 &cde_app->free_contexts, gk20a_cde_ctx, list) {
209 gk20a_cde_cancel_deleter(cde_ctx, false);
210 }
211
212 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
213 &cde_app->used_contexts, gk20a_cde_ctx, list) {
214 gk20a_cde_cancel_deleter(cde_ctx, false);
215 }
216
217 nvgpu_mutex_release(&cde_app->mutex);
218
219}
220
221static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
222__must_hold(&l->cde_app->mutex)
223{
224 struct gk20a_cde_app *cde_app = &l->cde_app;
225 struct gk20a_cde_ctx *cde_ctx;
226
227 cde_ctx = gk20a_cde_allocate_context(l);
228 if (IS_ERR(cde_ctx))
229 return PTR_ERR(cde_ctx);
230
231 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
232 cde_app->ctx_count++;
233 if (cde_app->ctx_count > cde_app->ctx_count_top)
234 cde_app->ctx_count_top = cde_app->ctx_count;
235
236 return 0;
237}
238
239static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
240__must_hold(&l->cde_app->mutex)
241{
242 int err;
243 int i;
244
245 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
246 err = gk20a_cde_create_context(l);
247 if (err)
248 goto out;
249 }
250
251 return 0;
252out:
253 gk20a_cde_remove_contexts(l);
254 return err;
255}
256
257static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
258 struct nvgpu_firmware *img,
259 struct gk20a_cde_hdr_buf *buf)
260{
261 struct nvgpu_mem *mem;
262 struct nvgpu_os_linux *l = cde_ctx->l;
263 struct gk20a *g = &l->g;
264 int err;
265
266 /* check that the file can hold the buf */
267 if (buf->data_byte_offset != 0 &&
268 buf->data_byte_offset + buf->num_bytes > img->size) {
269 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
270 cde_ctx->num_bufs);
271 return -EINVAL;
272 }
273
274 /* check that we have enough buf elems available */
275 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
276 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
277 cde_ctx->num_bufs);
278 return -ENOMEM;
279 }
280
281 /* allocate buf */
282 mem = cde_ctx->mem + cde_ctx->num_bufs;
283 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
284 if (err) {
285 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
286 cde_ctx->num_bufs);
287 return -ENOMEM;
288 }
289
290 /* copy the content */
291 if (buf->data_byte_offset != 0)
292 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
293 buf->num_bytes);
294
295 cde_ctx->num_bufs++;
296
297 return 0;
298}
299
300static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
301 int type, s32 shift, u64 mask, u64 value)
302{
303 struct nvgpu_os_linux *l = cde_ctx->l;
304 struct gk20a *g = &l->g;
305 u32 *target_mem_ptr = target;
306 u64 *target_mem_ptr_u64 = target;
307 u64 current_value, new_value;
308
309 value = (shift >= 0) ? value << shift : value >> -shift;
310 value &= mask;
311
312 /* read current data from the location */
313 current_value = 0;
314 if (type == TYPE_PARAM_TYPE_U32) {
315 if (mask != 0xfffffffful)
316 current_value = *target_mem_ptr;
317 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
318 if (mask != ~0ul)
319 current_value = *target_mem_ptr_u64;
320 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
321 current_value = *target_mem_ptr_u64;
322 current_value = (u64)(current_value >> 32) |
323 (u64)(current_value << 32);
324 } else {
325 nvgpu_warn(g, "cde: unknown type. type=%d",
326 type);
327 return -EINVAL;
328 }
329
330 current_value &= ~mask;
331 new_value = current_value | value;
332
333 /* store the element data back */
334 if (type == TYPE_PARAM_TYPE_U32)
335 *target_mem_ptr = (u32)new_value;
336 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
337 *target_mem_ptr_u64 = new_value;
338 else {
339 new_value = (u64)(new_value >> 32) |
340 (u64)(new_value << 32);
341 *target_mem_ptr_u64 = new_value;
342 }
343
344 return 0;
345}
346
347static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
348 struct nvgpu_firmware *img,
349 struct gk20a_cde_hdr_replace *replace)
350{
351 struct nvgpu_mem *source_mem;
352 struct nvgpu_mem *target_mem;
353 struct nvgpu_os_linux *l = cde_ctx->l;
354 struct gk20a *g = &l->g;
355 u32 *target_mem_ptr;
356 u64 vaddr;
357 int err;
358
359 if (replace->target_buf >= cde_ctx->num_bufs ||
360 replace->source_buf >= cde_ctx->num_bufs) {
361 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
362 replace->target_buf, replace->source_buf,
363 cde_ctx->num_bufs);
364 return -EINVAL;
365 }
366
367 source_mem = cde_ctx->mem + replace->source_buf;
368 target_mem = cde_ctx->mem + replace->target_buf;
369 target_mem_ptr = target_mem->cpu_va;
370
371 if (source_mem->size < (replace->source_byte_offset + 3) ||
372 target_mem->size < (replace->target_byte_offset + 3)) {
373 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
374 replace->target_byte_offset,
375 replace->source_byte_offset,
376 source_mem->size,
377 target_mem->size);
378 return -EINVAL;
379 }
380
381 /* calculate the target pointer */
382 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
383
384 /* determine patch value */
385 vaddr = source_mem->gpu_va + replace->source_byte_offset;
386 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
387 replace->shift, replace->mask,
388 vaddr);
389 if (err) {
390 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
391 err, replace->target_buf,
392 replace->target_byte_offset,
393 replace->source_buf,
394 replace->source_byte_offset);
395 }
396
397 return err;
398}
399
400static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
401{
402 struct nvgpu_os_linux *l = cde_ctx->l;
403 struct gk20a *g = &l->g;
404 struct nvgpu_mem *target_mem;
405 u32 *target_mem_ptr;
406 u64 new_data;
407 int user_id = 0, err;
408 unsigned int i;
409
410 for (i = 0; i < cde_ctx->num_params; i++) {
411 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
412 target_mem = cde_ctx->mem + param->target_buf;
413 target_mem_ptr = target_mem->cpu_va;
414 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
415
416 switch (param->id) {
417 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
418 new_data = g->gr.comptags_per_cacheline;
419 break;
420 case TYPE_PARAM_GPU_CONFIGURATION:
421 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
422 g->gr.cacheline_size;
423 break;
424 case TYPE_PARAM_FIRSTPAGEOFFSET:
425 new_data = cde_ctx->surf_param_offset;
426 break;
427 case TYPE_PARAM_NUMPAGES:
428 new_data = cde_ctx->surf_param_lines;
429 break;
430 case TYPE_PARAM_BACKINGSTORE:
431 new_data = cde_ctx->backing_store_vaddr;
432 break;
433 case TYPE_PARAM_DESTINATION:
434 new_data = cde_ctx->compbit_vaddr;
435 break;
436 case TYPE_PARAM_DESTINATION_SIZE:
437 new_data = cde_ctx->compbit_size;
438 break;
439 case TYPE_PARAM_BACKINGSTORE_SIZE:
440 new_data = g->gr.compbit_store.mem.size;
441 break;
442 case TYPE_PARAM_SOURCE_SMMU_ADDR:
443 new_data = gpuva_to_iova_base(cde_ctx->vm,
444 cde_ctx->surf_vaddr);
445 if (new_data == 0) {
446 nvgpu_warn(g, "cde: failed to find 0x%llx",
447 cde_ctx->surf_vaddr);
448 return -EINVAL;
449 }
450 break;
451 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
452 new_data = g->gr.compbit_store.base_hw;
453 break;
454 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
455 new_data = g->gr.gobs_per_comptagline_per_slice;
456 break;
457 case TYPE_PARAM_SCATTERBUFFER:
458 new_data = cde_ctx->scatterbuffer_vaddr;
459 break;
460 case TYPE_PARAM_SCATTERBUFFER_SIZE:
461 new_data = cde_ctx->scatterbuffer_size;
462 break;
463 default:
464 user_id = param->id - NUM_RESERVED_PARAMS;
465 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
466 continue;
467 new_data = cde_ctx->user_param_values[user_id];
468 }
469
470 nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
471 i, param->id, param->target_buf,
472 param->target_byte_offset, new_data,
473 param->data_offset, param->type, param->shift,
474 param->mask);
475
476 new_data += param->data_offset;
477
478 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
479 param->shift, param->mask, new_data);
480
481 if (err) {
482 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
483 err, i, param->id, param->target_buf,
484 param->target_byte_offset, new_data);
485 return err;
486 }
487 }
488
489 return 0;
490}
491
492static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
493 struct nvgpu_firmware *img,
494 struct gk20a_cde_hdr_param *param)
495{
496 struct nvgpu_mem *target_mem;
497 struct nvgpu_os_linux *l = cde_ctx->l;
498 struct gk20a *g = &l->g;
499
500 if (param->target_buf >= cde_ctx->num_bufs) {
501 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
502 cde_ctx->num_params, param->target_buf,
503 cde_ctx->num_bufs);
504 return -EINVAL;
505 }
506
507 target_mem = cde_ctx->mem + param->target_buf;
508 if (target_mem->size < (param->target_byte_offset + 3)) {
509 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
510 cde_ctx->num_params, param->target_byte_offset,
511 target_mem->size);
512 return -EINVAL;
513 }
514
515 /* does this parameter fit into our parameter structure */
516 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
517 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
518 cde_ctx->num_params);
519 return -ENOMEM;
520 }
521
522 /* is the given id valid? */
523 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
524 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
525 param->id, cde_ctx->num_params,
526 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
527 return -EINVAL;
528 }
529
530 cde_ctx->params[cde_ctx->num_params] = *param;
531 cde_ctx->num_params++;
532
533 return 0;
534}
535
536static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
537 struct nvgpu_firmware *img,
538 u32 required_class)
539{
540 struct nvgpu_os_linux *l = cde_ctx->l;
541 struct gk20a *g = &l->g;
542 int err;
543
544 /* CDE enabled */
545 cde_ctx->ch->cde = true;
546
547 err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
548 if (err) {
549 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
550 err);
551 return err;
552 }
553
554 return 0;
555}
556
557static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
558 struct nvgpu_firmware *img,
559 u32 op,
560 struct gk20a_cde_cmd_elem *cmd_elem,
561 u32 num_elems)
562{
563 struct nvgpu_os_linux *l = cde_ctx->l;
564 struct gk20a *g = &l->g;
565 struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem;
566 u32 *num_entries;
567 unsigned int i;
568
569 /* check command type */
570 if (op == TYPE_BUF_COMMAND_INIT) {
571 gpfifo = &cde_ctx->init_convert_cmd;
572 num_entries = &cde_ctx->init_cmd_num_entries;
573 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
574 gpfifo = &cde_ctx->convert_cmd;
575 num_entries = &cde_ctx->convert_cmd_num_entries;
576 } else {
577 nvgpu_warn(g, "cde: unknown command. op=%u",
578 op);
579 return -EINVAL;
580 }
581
582 /* allocate gpfifo entries to be pushed */
583 *gpfifo = nvgpu_kzalloc(g,
584 sizeof(struct nvgpu_gpfifo_entry) * num_elems);
585 if (!*gpfifo) {
586 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
587 return -ENOMEM;
588 }
589
590 gpfifo_elem = *gpfifo;
591 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
592 struct nvgpu_mem *target_mem;
593
594 /* validate the current entry */
595 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
596 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
597 cmd_elem->target_buf, cde_ctx->num_bufs);
598 return -EINVAL;
599 }
600
601 target_mem = cde_ctx->mem + cmd_elem->target_buf;
602 if (target_mem->size<
603 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
604 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
605 target_mem->size,
606 cmd_elem->target_byte_offset,
607 cmd_elem->num_bytes);
608 return -EINVAL;
609 }
610
611 /* store the element into gpfifo */
612 gpfifo_elem->entry0 =
613 u64_lo32(target_mem->gpu_va +
614 cmd_elem->target_byte_offset);
615 gpfifo_elem->entry1 =
616 u64_hi32(target_mem->gpu_va +
617 cmd_elem->target_byte_offset) |
618 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
619 sizeof(u32));
620 }
621
622 *num_entries = num_elems;
623 return 0;
624}
625
626static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
627{
628 struct nvgpu_os_linux *l = cde_ctx->l;
629 struct gk20a *g = &l->g;
630 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
631 sizeof(struct nvgpu_gpfifo_entry);
632 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
633 sizeof(struct nvgpu_gpfifo_entry);
634 unsigned long total_bytes = init_bytes + conv_bytes;
635 struct nvgpu_gpfifo_entry *combined_cmd;
636
637 /* allocate buffer that has space for both */
638 combined_cmd = nvgpu_kzalloc(g, total_bytes);
639 if (!combined_cmd) {
640 nvgpu_warn(g,
641 "cde: could not allocate memory for gpfifo entries");
642 return -ENOMEM;
643 }
644
645 /* move the original init here and append convert */
646 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
647 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
648 cde_ctx->convert_cmd, conv_bytes);
649
650 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
651 nvgpu_kfree(g, cde_ctx->convert_cmd);
652
653 cde_ctx->init_convert_cmd = combined_cmd;
654 cde_ctx->convert_cmd = combined_cmd
655 + cde_ctx->init_cmd_num_entries;
656
657 return 0;
658}
659
660static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
661 struct nvgpu_firmware *img)
662{
663 struct nvgpu_os_linux *l = cde_ctx->l;
664 struct gk20a *g = &l->g;
665 struct gk20a_cde_app *cde_app = &l->cde_app;
666 u32 *data = (u32 *)img->data;
667 u32 num_of_elems;
668 struct gk20a_cde_hdr_elem *elem;
669 u32 min_size = 0;
670 int err = 0;
671 unsigned int i;
672
673 min_size += 2 * sizeof(u32);
674 if (img->size < min_size) {
675 nvgpu_warn(g, "cde: invalid image header");
676 return -EINVAL;
677 }
678
679 cde_app->firmware_version = data[0];
680 num_of_elems = data[1];
681
682 min_size += num_of_elems * sizeof(*elem);
683 if (img->size < min_size) {
684 nvgpu_warn(g, "cde: bad image");
685 return -EINVAL;
686 }
687
688 elem = (struct gk20a_cde_hdr_elem *)&data[2];
689 for (i = 0; i < num_of_elems; i++) {
690 int err = 0;
691 switch (elem->type) {
692 case TYPE_BUF:
693 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
694 break;
695 case TYPE_REPLACE:
696 err = gk20a_init_cde_replace(cde_ctx, img,
697 &elem->replace);
698 break;
699 case TYPE_PARAM:
700 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
701 break;
702 case TYPE_REQUIRED_CLASS:
703 err = gk20a_init_cde_required_class(cde_ctx, img,
704 elem->required_class);
705 break;
706 case TYPE_COMMAND:
707 {
708 struct gk20a_cde_cmd_elem *cmd = (void *)
709 &img->data[elem->command.data_byte_offset];
710 err = gk20a_init_cde_command(cde_ctx, img,
711 elem->command.op, cmd,
712 elem->command.num_entries);
713 break;
714 }
715 case TYPE_ARRAY:
716 memcpy(&cde_app->arrays[elem->array.id][0],
717 elem->array.data,
718 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
719 break;
720 default:
721 nvgpu_warn(g, "cde: unknown header element");
722 err = -EINVAL;
723 }
724
725 if (err)
726 goto deinit_image;
727
728 elem++;
729 }
730
731 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
732 nvgpu_warn(g, "cde: convert command not defined");
733 err = -EINVAL;
734 goto deinit_image;
735 }
736
737 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
738 nvgpu_warn(g, "cde: convert command not defined");
739 err = -EINVAL;
740 goto deinit_image;
741 }
742
743 err = gk20a_cde_pack_cmdbufs(cde_ctx);
744 if (err)
745 goto deinit_image;
746
747 return 0;
748
749deinit_image:
750 gk20a_deinit_cde_img(cde_ctx);
751 return err;
752}
753
754static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
755 u32 op, struct nvgpu_channel_fence *fence,
756 u32 flags, struct gk20a_fence **fence_out)
757{
758 struct nvgpu_os_linux *l = cde_ctx->l;
759 struct gk20a *g = &l->g;
760 struct nvgpu_gpfifo_entry *gpfifo = NULL;
761 int num_entries = 0;
762
763 /* check command type */
764 if (op == TYPE_BUF_COMMAND_INIT) {
765 /* both init and convert combined */
766 gpfifo = cde_ctx->init_convert_cmd;
767 num_entries = cde_ctx->init_cmd_num_entries
768 + cde_ctx->convert_cmd_num_entries;
769 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
770 gpfifo = cde_ctx->convert_cmd;
771 num_entries = cde_ctx->convert_cmd_num_entries;
772 } else if (op == TYPE_BUF_COMMAND_NOOP) {
773 /* Any non-null gpfifo will suffice with 0 num_entries */
774 gpfifo = cde_ctx->init_convert_cmd;
775 num_entries = 0;
776 } else {
777 nvgpu_warn(g, "cde: unknown buffer");
778 return -EINVAL;
779 }
780
781 if (gpfifo == NULL) {
782 nvgpu_warn(g, "cde: buffer not available");
783 return -ENOSYS;
784 }
785
786 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
787 num_entries, flags, fence, fence_out,
788 NULL);
789}
790
791static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
792__acquires(&cde_app->mutex)
793__releases(&cde_app->mutex)
794{
795 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
796 struct gk20a *g = &cde_ctx->l->g;
797
798 nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
799 trace_gk20a_cde_release(cde_ctx);
800
801 nvgpu_mutex_acquire(&cde_app->mutex);
802
803 if (cde_ctx->in_use) {
804 cde_ctx->in_use = false;
805 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
806 cde_app->ctx_usecount--;
807 } else {
808 nvgpu_log_info(g, "double release cde context %p", cde_ctx);
809 }
810
811 nvgpu_mutex_release(&cde_app->mutex);
812}
813
814static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
815__acquires(&cde_app->mutex)
816__releases(&cde_app->mutex)
817{
818 struct delayed_work *delay_work = to_delayed_work(work);
819 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
820 struct gk20a_cde_ctx, ctx_deleter_work);
821 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
822 struct nvgpu_os_linux *l = cde_ctx->l;
823 struct gk20a *g = &l->g;
824 int err;
825
826 /* someone has just taken it? engine deletion started? */
827 if (cde_ctx->in_use || !cde_app->initialised)
828 return;
829
830 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
831 "cde: attempting to delete temporary %p", cde_ctx);
832
833 err = gk20a_busy(g);
834 if (err) {
835 /* this context would find new use anyway later, so not freeing
836 * here does not leak anything */
837 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
838 " temp ctx deletion");
839 return;
840 }
841
842 nvgpu_mutex_acquire(&cde_app->mutex);
843 if (cde_ctx->in_use || !cde_app->initialised) {
844 nvgpu_log(g, gpu_dbg_cde_ctx,
845 "cde: context use raced, not deleting %p",
846 cde_ctx);
847 goto out;
848 }
849
850 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
851 "double pending %p", cde_ctx);
852
853 gk20a_cde_remove_ctx(cde_ctx);
854 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
855 "cde: destroyed %p count=%d use=%d max=%d",
856 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
857 cde_app->ctx_count_top);
858
859out:
860 nvgpu_mutex_release(&cde_app->mutex);
861 gk20a_idle(g);
862}
863
864static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
865__must_hold(&cde_app->mutex)
866{
867 struct gk20a *g = &l->g;
868 struct gk20a_cde_app *cde_app = &l->cde_app;
869 struct gk20a_cde_ctx *cde_ctx;
870
871 /* exhausted? */
872
873 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
874 return ERR_PTR(-EAGAIN);
875
876 /* idle context available? */
877
878 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
879 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
880 gk20a_cde_ctx, list);
881 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
882 "cde: got free %p count=%d use=%d max=%d",
883 cde_ctx, cde_app->ctx_count,
884 cde_app->ctx_usecount,
885 cde_app->ctx_count_top);
886 trace_gk20a_cde_get_context(cde_ctx);
887
888 /* deleter work may be scheduled, but in_use prevents it */
889 cde_ctx->in_use = true;
890 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
891 cde_app->ctx_usecount++;
892
893 /* cancel any deletions now that ctx is in use */
894 gk20a_cde_cancel_deleter(cde_ctx, true);
895 return cde_ctx;
896 }
897
898 /* no free contexts, get a temporary one */
899
900 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
901 "cde: no free contexts, count=%d",
902 cde_app->ctx_count);
903
904 cde_ctx = gk20a_cde_allocate_context(l);
905 if (IS_ERR(cde_ctx)) {
906 nvgpu_warn(g, "cde: cannot allocate context: %ld",
907 PTR_ERR(cde_ctx));
908 return cde_ctx;
909 }
910
911 trace_gk20a_cde_get_context(cde_ctx);
912 cde_ctx->in_use = true;
913 cde_ctx->is_temporary = true;
914 cde_app->ctx_usecount++;
915 cde_app->ctx_count++;
916 if (cde_app->ctx_count > cde_app->ctx_count_top)
917 cde_app->ctx_count_top = cde_app->ctx_count;
918 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
919
920 return cde_ctx;
921}
922
923static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
924__releases(&cde_app->mutex)
925__acquires(&cde_app->mutex)
926{
927 struct gk20a *g = &l->g;
928 struct gk20a_cde_app *cde_app = &l->cde_app;
929 struct gk20a_cde_ctx *cde_ctx = NULL;
930 struct nvgpu_timeout timeout;
931
932 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
933 NVGPU_TIMER_CPU_TIMER);
934
935 do {
936 cde_ctx = gk20a_cde_do_get_context(l);
937 if (PTR_ERR(cde_ctx) != -EAGAIN)
938 break;
939
940 /* exhausted, retry */
941 nvgpu_mutex_release(&cde_app->mutex);
942 cond_resched();
943 nvgpu_mutex_acquire(&cde_app->mutex);
944 } while (!nvgpu_timeout_expired(&timeout));
945
946 return cde_ctx;
947}
948
949static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
950{
951 struct gk20a *g = &l->g;
952 struct gk20a_cde_ctx *cde_ctx;
953 int ret;
954
955 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
956 if (!cde_ctx)
957 return ERR_PTR(-ENOMEM);
958
959 cde_ctx->l = l;
960 cde_ctx->dev = dev_from_gk20a(g);
961
962 ret = gk20a_cde_load(cde_ctx);
963 if (ret) {
964 nvgpu_kfree(g, cde_ctx);
965 return ERR_PTR(ret);
966 }
967
968 nvgpu_init_list_node(&cde_ctx->list);
969 cde_ctx->is_temporary = false;
970 cde_ctx->in_use = false;
971 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
972 gk20a_cde_ctx_deleter_fn);
973
974 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
975 trace_gk20a_cde_allocate_context(cde_ctx);
976 return cde_ctx;
977}
978
979int gk20a_cde_convert(struct nvgpu_os_linux *l,
980 struct dma_buf *compbits_scatter_buf,
981 u64 compbits_byte_offset,
982 u64 scatterbuffer_byte_offset,
983 struct nvgpu_channel_fence *fence,
984 u32 __flags, struct gk20a_cde_param *params,
985 int num_params, struct gk20a_fence **fence_out)
986__acquires(&l->cde_app->mutex)
987__releases(&l->cde_app->mutex)
988{
989 struct gk20a *g = &l->g;
990 struct gk20a_cde_ctx *cde_ctx = NULL;
991 struct gk20a_comptags comptags;
992 struct nvgpu_os_buffer os_buf = {
993 compbits_scatter_buf,
994 NULL,
995 dev_from_gk20a(g)
996 };
997 u64 mapped_compbits_offset = 0;
998 u64 compbits_size = 0;
999 u64 mapped_scatterbuffer_offset = 0;
1000 u64 scatterbuffer_size = 0;
1001 u64 map_vaddr = 0;
1002 u64 map_offset = 0;
1003 u64 map_size = 0;
1004 u8 *surface = NULL;
1005 u64 big_page_mask = 0;
1006 u32 flags;
1007 int err, i;
1008 const s16 compbits_kind = 0;
1009 u32 submit_op;
1010 struct dma_buf_attachment *attachment;
1011
1012 nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
1013 compbits_byte_offset, scatterbuffer_byte_offset);
1014
1015 /* scatter buffer must be after compbits buffer */
1016 if (scatterbuffer_byte_offset &&
1017 scatterbuffer_byte_offset < compbits_byte_offset)
1018 return -EINVAL;
1019
1020 err = gk20a_busy(g);
1021 if (err)
1022 return err;
1023
1024 nvgpu_mutex_acquire(&l->cde_app.mutex);
1025 cde_ctx = gk20a_cde_get_context(l);
1026 nvgpu_mutex_release(&l->cde_app.mutex);
1027 if (IS_ERR(cde_ctx)) {
1028 err = PTR_ERR(cde_ctx);
1029 goto exit_idle;
1030 }
1031
1032 /* First, map the buffer to local va */
1033
1034 /* ensure that the compbits buffer has drvdata */
1035 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
1036 dev_from_gk20a(g));
1037 if (err)
1038 goto exit_idle;
1039
1040 /* compbits don't start at page aligned offset, so we need to align
1041 the region to be mapped */
1042 big_page_mask = cde_ctx->vm->big_page_size - 1;
1043 map_offset = compbits_byte_offset & ~big_page_mask;
1044 map_size = compbits_scatter_buf->size - map_offset;
1045
1046
1047 /* compute compbit start offset from the beginning of the mapped
1048 area */
1049 mapped_compbits_offset = compbits_byte_offset - map_offset;
1050 if (scatterbuffer_byte_offset) {
1051 compbits_size = scatterbuffer_byte_offset -
1052 compbits_byte_offset;
1053 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1054 map_offset;
1055 scatterbuffer_size = compbits_scatter_buf->size -
1056 scatterbuffer_byte_offset;
1057 } else {
1058 compbits_size = compbits_scatter_buf->size -
1059 compbits_byte_offset;
1060 }
1061
1062 nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1063 map_offset, map_size);
1064 nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1065 mapped_compbits_offset, compbits_size);
1066 nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1067 mapped_scatterbuffer_offset, scatterbuffer_size);
1068
1069
1070 /* map the destination buffer */
1071 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
1072 err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
1073 NVGPU_VM_MAP_CACHEABLE |
1074 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
1075 NVGPU_KIND_INVALID,
1076 compbits_kind, /* incompressible kind */
1077 gk20a_mem_flag_none,
1078 map_offset, map_size,
1079 NULL,
1080 &map_vaddr);
1081 if (err) {
1082 dma_buf_put(compbits_scatter_buf);
1083 err = -EINVAL;
1084 goto exit_idle;
1085 }
1086
1087 if (scatterbuffer_byte_offset &&
1088 l->ops.cde.need_scatter_buffer &&
1089 l->ops.cde.need_scatter_buffer(g)) {
1090 struct sg_table *sgt;
1091 void *scatter_buffer;
1092
1093 surface = dma_buf_vmap(compbits_scatter_buf);
1094 if (IS_ERR(surface)) {
1095 nvgpu_warn(g,
1096 "dma_buf_vmap failed");
1097 err = -EINVAL;
1098 goto exit_unmap_vaddr;
1099 }
1100
1101 scatter_buffer = surface + scatterbuffer_byte_offset;
1102
1103 nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1104 surface, scatter_buffer);
1105 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf,
1106 &attachment);
1107 if (IS_ERR(sgt)) {
1108 nvgpu_warn(g,
1109 "mm_pin failed");
1110 err = -EINVAL;
1111 goto exit_unmap_surface;
1112 } else {
1113 err = l->ops.cde.populate_scatter_buffer(g, sgt,
1114 compbits_byte_offset, scatter_buffer,
1115 scatterbuffer_size);
1116 WARN_ON(err);
1117
1118 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1119 attachment, sgt);
1120 if (err)
1121 goto exit_unmap_surface;
1122 }
1123
1124 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1125 dma_buf_vunmap(compbits_scatter_buf, surface);
1126 surface = NULL;
1127 }
1128
1129 /* store source buffer compression tags */
1130 gk20a_get_comptags(&os_buf, &comptags);
1131 cde_ctx->surf_param_offset = comptags.offset;
1132 cde_ctx->surf_param_lines = comptags.lines;
1133
1134 /* store surface vaddr. This is actually compbit vaddr, but since
1135 compbits live in the same surface, and we can get the alloc base
1136 address by using gpuva_to_iova_base, this will do */
1137 cde_ctx->surf_vaddr = map_vaddr;
1138
1139 /* store information about destination */
1140 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1141 cde_ctx->compbit_size = compbits_size;
1142
1143 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1144 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1145
1146 /* remove existing argument data */
1147 memset(cde_ctx->user_param_values, 0,
1148 sizeof(cde_ctx->user_param_values));
1149
1150 /* read user space arguments for the conversion */
1151 for (i = 0; i < num_params; i++) {
1152 struct gk20a_cde_param *param = params + i;
1153 int id = param->id - NUM_RESERVED_PARAMS;
1154
1155 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1156 nvgpu_warn(g, "cde: unknown user parameter");
1157 err = -EINVAL;
1158 goto exit_unmap_surface;
1159 }
1160 cde_ctx->user_param_values[id] = param->value;
1161 }
1162
1163 /* patch data */
1164 err = gk20a_cde_patch_params(cde_ctx);
1165 if (err) {
1166 nvgpu_warn(g, "cde: failed to patch parameters");
1167 goto exit_unmap_surface;
1168 }
1169
1170 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1171 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1172 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1173 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1174 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1175 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1176
1177 /* take always the postfence as it is needed for protecting the
1178 * cde context */
1179 flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
1180
1181 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1182 gk20a_idle(g);
1183
1184 if (comptags.lines == 0) {
1185 /*
1186 * Nothing to do on the buffer, but do a null kickoff for
1187 * managing the pre and post fences.
1188 */
1189 submit_op = TYPE_BUF_COMMAND_NOOP;
1190 } else if (!cde_ctx->init_cmd_executed) {
1191 /*
1192 * First time, so include the init pushbuf too in addition to
1193 * the conversion code.
1194 */
1195 submit_op = TYPE_BUF_COMMAND_INIT;
1196 } else {
1197 /*
1198 * The usual condition: execute just the conversion.
1199 */
1200 submit_op = TYPE_BUF_COMMAND_CONVERT;
1201 }
1202 err = gk20a_cde_execute_buffer(cde_ctx, submit_op,
1203 fence, flags, fence_out);
1204
1205 if (comptags.lines != 0 && !err)
1206 cde_ctx->init_cmd_executed = true;
1207
1208 /* unmap the buffers - channel holds references to them now */
1209 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1210
1211 return err;
1212
1213exit_unmap_surface:
1214 if (surface)
1215 dma_buf_vunmap(compbits_scatter_buf, surface);
1216exit_unmap_vaddr:
1217 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1218exit_idle:
1219 gk20a_idle(g);
1220 return err;
1221}
1222
1223static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1224__acquires(&cde_app->mutex)
1225__releases(&cde_app->mutex)
1226{
1227 struct gk20a_cde_ctx *cde_ctx = data;
1228 struct nvgpu_os_linux *l = cde_ctx->l;
1229 struct gk20a *g = &l->g;
1230 struct gk20a_cde_app *cde_app = &l->cde_app;
1231 bool channel_idle;
1232
1233 channel_gk20a_joblist_lock(ch);
1234 channel_idle = channel_gk20a_joblist_is_empty(ch);
1235 channel_gk20a_joblist_unlock(ch);
1236
1237 if (!channel_idle)
1238 return;
1239
1240 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1241 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1242 if (!cde_ctx->in_use)
1243 nvgpu_log_info(g, "double finish cde context %p on channel %p",
1244 cde_ctx, ch);
1245
1246 if (ch->has_timedout) {
1247 if (cde_ctx->is_temporary) {
1248 nvgpu_warn(g,
1249 "cde: channel had timed out"
1250 " (temporary channel)");
1251 /* going to be deleted anyway */
1252 } else {
1253 nvgpu_warn(g,
1254 "cde: channel had timed out"
1255 ", reloading");
1256 /* mark it to be deleted, replace with a new one */
1257 nvgpu_mutex_acquire(&cde_app->mutex);
1258 cde_ctx->is_temporary = true;
1259 if (gk20a_cde_create_context(l)) {
1260 nvgpu_err(g, "cde: can't replace context");
1261 }
1262 nvgpu_mutex_release(&cde_app->mutex);
1263 }
1264 }
1265
1266 /* delete temporary contexts later (watch for doubles) */
1267 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1268 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1269 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1270 msecs_to_jiffies(CTX_DELETE_TIME));
1271 }
1272
1273 if (!ch->has_timedout)
1274 gk20a_cde_ctx_release(cde_ctx);
1275}
1276
1277static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1278{
1279 struct nvgpu_os_linux *l = cde_ctx->l;
1280 struct gk20a *g = &l->g;
1281 struct nvgpu_firmware *img;
1282 struct channel_gk20a *ch;
1283 struct tsg_gk20a *tsg;
1284 struct gr_gk20a *gr = &g->gr;
1285 struct nvgpu_gpfifo_args gpfifo_args;
1286 int err = 0;
1287 u64 vaddr;
1288
1289 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1290 if (!img) {
1291 nvgpu_err(g, "cde: could not fetch the firmware");
1292 return -ENOSYS;
1293 }
1294
1295 tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
1296 if (!tsg) {
1297 nvgpu_err(g, "cde: could not create TSG");
1298 err = -ENOMEM;
1299 goto err_get_gk20a_channel;
1300 }
1301
1302 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1303 cde_ctx,
1304 -1,
1305 false);
1306 if (!ch) {
1307 nvgpu_warn(g, "cde: gk20a channel not available");
1308 err = -ENOMEM;
1309 goto err_get_gk20a_channel;
1310 }
1311
1312 ch->timeout.enabled = false;
1313
1314 /* bind the channel to the vm */
1315 err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
1316 if (err) {
1317 nvgpu_warn(g, "cde: could not bind vm");
1318 goto err_commit_va;
1319 }
1320
1321 err = gk20a_tsg_bind_channel(tsg, ch);
1322 if (err) {
1323 nvgpu_err(g, "cde: unable to bind to tsg");
1324 goto err_alloc_gpfifo;
1325 }
1326
1327 gpfifo_args.num_entries = 1024;
1328 gpfifo_args.num_inflight_jobs = 0;
1329 gpfifo_args.flags = 0;
1330 /* allocate gpfifo (1024 should be more than enough) */
1331 err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
1332 if (err) {
1333 nvgpu_warn(g, "cde: unable to allocate gpfifo");
1334 goto err_alloc_gpfifo;
1335 }
1336
1337 /* map backing store to gpu virtual space */
1338 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1339 g->gr.compbit_store.mem.size,
1340 NVGPU_VM_MAP_CACHEABLE,
1341 gk20a_mem_flag_read_only,
1342 false,
1343 gr->compbit_store.mem.aperture);
1344
1345 if (!vaddr) {
1346 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1347 err = -ENOMEM;
1348 goto err_map_backingstore;
1349 }
1350
1351 /* store initialisation data */
1352 cde_ctx->ch = ch;
1353 cde_ctx->tsg = tsg;
1354 cde_ctx->vm = ch->vm;
1355 cde_ctx->backing_store_vaddr = vaddr;
1356
1357 /* initialise the firmware */
1358 err = gk20a_init_cde_img(cde_ctx, img);
1359 if (err) {
1360 nvgpu_warn(g, "cde: image initialisation failed");
1361 goto err_init_cde_img;
1362 }
1363
1364 /* initialisation done */
1365 nvgpu_release_firmware(g, img);
1366
1367 return 0;
1368
1369err_init_cde_img:
1370 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1371err_map_backingstore:
1372err_alloc_gpfifo:
1373 nvgpu_vm_put(ch->vm);
1374err_commit_va:
1375err_get_gk20a_channel:
1376 nvgpu_release_firmware(g, img);
1377 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1378 return err;
1379}
1380
1381int gk20a_cde_reload(struct nvgpu_os_linux *l)
1382__acquires(&l->cde_app->mutex)
1383__releases(&l->cde_app->mutex)
1384{
1385 struct gk20a *g = &l->g;
1386 struct gk20a_cde_app *cde_app = &l->cde_app;
1387 int err;
1388
1389 if (!cde_app->initialised)
1390 return -ENOSYS;
1391
1392 err = gk20a_busy(g);
1393 if (err)
1394 return err;
1395
1396 nvgpu_mutex_acquire(&cde_app->mutex);
1397
1398 gk20a_cde_stop(l);
1399
1400 err = gk20a_cde_create_contexts(l);
1401 if (!err)
1402 cde_app->initialised = true;
1403
1404 nvgpu_mutex_release(&cde_app->mutex);
1405
1406 gk20a_idle(g);
1407 return err;
1408}
1409
1410int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1411__acquires(&cde_app->mutex)
1412__releases(&cde_app->mutex)
1413{
1414 struct gk20a_cde_app *cde_app = &l->cde_app;
1415 struct gk20a *g = &l->g;
1416 int err;
1417
1418 if (cde_app->initialised)
1419 return 0;
1420
1421 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1422
1423 err = nvgpu_mutex_init(&cde_app->mutex);
1424 if (err)
1425 return err;
1426
1427 nvgpu_mutex_acquire(&cde_app->mutex);
1428
1429 nvgpu_init_list_node(&cde_app->free_contexts);
1430 nvgpu_init_list_node(&cde_app->used_contexts);
1431 cde_app->ctx_count = 0;
1432 cde_app->ctx_count_top = 0;
1433 cde_app->ctx_usecount = 0;
1434
1435 err = gk20a_cde_create_contexts(l);
1436 if (!err)
1437 cde_app->initialised = true;
1438
1439 nvgpu_mutex_release(&cde_app->mutex);
1440 nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1441
1442 if (err)
1443 nvgpu_mutex_destroy(&cde_app->mutex);
1444
1445 return err;
1446}
1447
1448enum cde_launch_patch_id {
1449 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1450 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1451 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1452 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1453 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1454 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1455 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1456 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1457 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1458 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1459 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1460 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1461 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1462 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1463 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1464 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1465 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1466 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1467 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1468 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1469 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1470 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1471 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1472 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1473 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1474 PATCH_H_LAUNCH_WORD1_ID = 1049,
1475 PATCH_H_LAUNCH_WORD2_ID = 1050,
1476 PATCH_V_LAUNCH_WORD1_ID = 1051,
1477 PATCH_V_LAUNCH_WORD2_ID = 1052,
1478 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1479 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1480 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1481 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1482};
1483
1484/* maximum number of WRITE_PATCHes in the below function */
1485#define MAX_CDE_LAUNCH_PATCHES 32
1486
1487static int gk20a_buffer_convert_gpu_to_cde_v1(
1488 struct nvgpu_os_linux *l,
1489 struct dma_buf *dmabuf, u32 consumer,
1490 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1491 u64 scatterbuffer_offset,
1492 u32 width, u32 height, u32 block_height_log2,
1493 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1494 struct gk20a_buffer_state *state)
1495{
1496 struct gk20a *g = &l->g;
1497 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1498 int param = 0;
1499 int err = 0;
1500 struct gk20a_fence *new_fence = NULL;
1501 const int wgx = 8;
1502 const int wgy = 8;
1503 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1504 const int xalign = compbits_per_byte * wgx;
1505 const int yalign = wgy;
1506
1507 /* Compute per launch parameters */
1508 const int xtiles = (width + 7) >> 3;
1509 const int ytiles = (height + 7) >> 3;
1510 const int gridw_h = roundup(xtiles, xalign) / xalign;
1511 const int gridh_h = roundup(ytiles, yalign) / yalign;
1512 const int gridw_v = roundup(ytiles, xalign) / xalign;
1513 const int gridh_v = roundup(xtiles, yalign) / yalign;
1514 const int xblocks = (xtiles + 1) >> 1;
1515 const int voffset = compbits_voffset - compbits_hoffset;
1516
1517 int hprog = -1;
1518 int vprog = -1;
1519
1520 if (l->ops.cde.get_program_numbers)
1521 l->ops.cde.get_program_numbers(g, block_height_log2,
1522 l->cde_app.shader_parameter,
1523 &hprog, &vprog);
1524 else {
1525 nvgpu_warn(g, "cde: chip not supported");
1526 return -ENOSYS;
1527 }
1528
1529 if (hprog < 0 || vprog < 0) {
1530 nvgpu_warn(g, "cde: could not determine programs");
1531 return -ENOSYS;
1532 }
1533
1534 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1535 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1536 xtiles, ytiles);
1537
1538 nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1539 width, height, block_height_log2,
1540 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1541 nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1542 width, height, xtiles, ytiles);
1543 nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1544 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1545 nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1546 hprog,
1547 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1548 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1549 vprog,
1550 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1551 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1552
1553 /* Write parameters */
1554#define WRITE_PATCH(NAME, VALUE) \
1555 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1556 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1557 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1558 block_height_log2);
1559 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1560 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1561 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1562 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1563 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1564
1565 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1566 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1567 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1568 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1569 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1570 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1571
1572 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1573 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1574 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1575 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1576 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1577 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1578
1579 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1580 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1581 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1582 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1583 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1584 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1585 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1586 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1587
1588 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1589 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1590 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1591 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1592 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1593 } else {
1594 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1595 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1596 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1597 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1598 }
1599
1600 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1601 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1602 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1603 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1604 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1605 } else {
1606 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1607 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1608 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1609 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1610 }
1611#undef WRITE_PATCH
1612
1613 err = gk20a_cde_convert(l, dmabuf,
1614 compbits_hoffset,
1615 scatterbuffer_offset,
1616 fence_in, submit_flags,
1617 params, param, &new_fence);
1618 if (err)
1619 goto out;
1620
1621 /* compbits generated, update state & fence */
1622 gk20a_fence_put(state->fence);
1623 state->fence = new_fence;
1624 state->valid_compbits |= consumer &
1625 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1626out:
1627 return err;
1628}
1629
1630static int gk20a_buffer_convert_gpu_to_cde(
1631 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1632 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1633 u64 scatterbuffer_offset,
1634 u32 width, u32 height, u32 block_height_log2,
1635 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1636 struct gk20a_buffer_state *state)
1637{
1638 struct gk20a *g = &l->g;
1639 int err = 0;
1640
1641 if (!l->cde_app.initialised)
1642 return -ENOSYS;
1643
1644 nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n",
1645 l->cde_app.firmware_version);
1646
1647 if (l->cde_app.firmware_version == 1) {
1648 err = gk20a_buffer_convert_gpu_to_cde_v1(
1649 l, dmabuf, consumer, offset, compbits_hoffset,
1650 compbits_voffset, scatterbuffer_offset,
1651 width, height, block_height_log2,
1652 submit_flags, fence_in, state);
1653 } else {
1654 nvgpu_err(g, "unsupported CDE firmware version %d",
1655 l->cde_app.firmware_version);
1656 err = -EINVAL;
1657 }
1658
1659 return err;
1660}
1661
1662int gk20a_prepare_compressible_read(
1663 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1664 u64 compbits_hoffset, u64 compbits_voffset,
1665 u64 scatterbuffer_offset,
1666 u32 width, u32 height, u32 block_height_log2,
1667 u32 submit_flags, struct nvgpu_channel_fence *fence,
1668 u32 *valid_compbits, u32 *zbc_color,
1669 struct gk20a_fence **fence_out)
1670{
1671 struct gk20a *g = &l->g;
1672 int err = 0;
1673 struct gk20a_buffer_state *state;
1674 struct dma_buf *dmabuf;
1675 u32 missing_bits;
1676
1677 dmabuf = dma_buf_get(buffer_fd);
1678 if (IS_ERR(dmabuf))
1679 return -EINVAL;
1680
1681 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1682 if (err) {
1683 dma_buf_put(dmabuf);
1684 return err;
1685 }
1686
1687 missing_bits = (state->valid_compbits ^ request) & request;
1688
1689 nvgpu_mutex_acquire(&state->lock);
1690
1691 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1692
1693 gk20a_fence_put(state->fence);
1694 state->fence = NULL;
1695 /* state->fence = decompress();
1696 state->valid_compbits = 0; */
1697 err = -EINVAL;
1698 goto out;
1699 } else if (missing_bits) {
1700 u32 missing_cde_bits = missing_bits &
1701 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1702 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1703 missing_cde_bits) {
1704 err = gk20a_buffer_convert_gpu_to_cde(
1705 l, dmabuf,
1706 missing_cde_bits,
1707 offset, compbits_hoffset,
1708 compbits_voffset, scatterbuffer_offset,
1709 width, height, block_height_log2,
1710 submit_flags, fence,
1711 state);
1712 if (err)
1713 goto out;
1714 }
1715 }
1716
1717 if (state->fence && fence_out)
1718 *fence_out = gk20a_fence_get(state->fence);
1719
1720 if (valid_compbits)
1721 *valid_compbits = state->valid_compbits;
1722
1723 if (zbc_color)
1724 *zbc_color = state->zbc_color;
1725
1726out:
1727 nvgpu_mutex_release(&state->lock);
1728 dma_buf_put(dmabuf);
1729 return err;
1730}
1731
1732int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1733 u32 valid_compbits, u64 offset, u32 zbc_color)
1734{
1735 int err;
1736 struct gk20a_buffer_state *state;
1737 struct dma_buf *dmabuf;
1738
1739 dmabuf = dma_buf_get(buffer_fd);
1740 if (IS_ERR(dmabuf)) {
1741 nvgpu_err(g, "invalid dmabuf");
1742 return -EINVAL;
1743 }
1744
1745 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1746 if (err) {
1747 nvgpu_err(g, "could not get state from dmabuf");
1748 dma_buf_put(dmabuf);
1749 return err;
1750 }
1751
1752 nvgpu_mutex_acquire(&state->lock);
1753
1754 /* Update the compbits state. */
1755 state->valid_compbits = valid_compbits;
1756 state->zbc_color = zbc_color;
1757
1758 /* Discard previous compbit job fence. */
1759 gk20a_fence_put(state->fence);
1760 state->fence = NULL;
1761
1762 nvgpu_mutex_release(&state->lock);
1763 dma_buf_put(dmabuf);
1764 return 0;
1765}
1766
1767int nvgpu_cde_init_ops(struct nvgpu_os_linux *l)
1768{
1769 struct gk20a *g = &l->g;
1770 u32 ver = g->params.gpu_arch + g->params.gpu_impl;
1771
1772 switch (ver) {
1773 case GK20A_GPUID_GM20B:
1774 case GK20A_GPUID_GM20B_B:
1775 l->ops.cde = gm20b_cde_ops.cde;
1776 break;
1777 case NVGPU_GPUID_GP10B:
1778 l->ops.cde = gp10b_cde_ops.cde;
1779 break;
1780 default:
1781 /* CDE is optional, so today ignoring unknown chip is fine */
1782 break;
1783 }
1784
1785 return 0;
1786}
diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h
deleted file mode 100644
index 5928b624..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde.h
+++ /dev/null
@@ -1,326 +0,0 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#include <nvgpu/nvgpu_mem.h>
23#include <nvgpu/list.h>
24#include <nvgpu/lock.h>
25
26#include <linux/kobject.h>
27#include <linux/workqueue.h>
28
29#define MAX_CDE_BUFS 10
30#define MAX_CDE_PARAMS 64
31#define MAX_CDE_USER_PARAMS 40
32#define MAX_CDE_ARRAY_ENTRIES 9
33
34/*
35 * The size of the context ring buffer that is dedicated for handling cde
36 * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
37 * wait on the previous job to that channel, so increasing this value
38 * reduces the likelihood of stalls.
39 */
40#define NUM_CDE_CONTEXTS 4
41
42struct dma_buf;
43struct device;
44struct nvgpu_os_linux;
45struct gk20a;
46struct gk20a_fence;
47struct nvgpu_channel_fence;
48struct channel_gk20a;
49struct vm_gk20a;
50struct nvgpu_gpfifo_entry;
51
52/*
53 * this element defines a buffer that is allocated and mapped into gpu address
54 * space. data_byte_offset defines the beginning of the buffer inside the
55 * firmare. num_bytes defines how many bytes the firmware contains.
56 *
57 * If data_byte_offset is zero, we allocate an empty buffer.
58 */
59
60struct gk20a_cde_hdr_buf {
61 u64 data_byte_offset;
62 u64 num_bytes;
63};
64
65/*
66 * this element defines a constant patching in buffers. It basically
67 * computes physical address to <source_buf>+source_byte_offset. The
68 * address is then modified into patch value as per:
69 * value = (current_value & ~mask) | (address << shift) & mask .
70 *
71 * The type field defines the register size as:
72 * 0=u32,
73 * 1=u64 (little endian),
74 * 2=u64 (big endian)
75 */
76
77struct gk20a_cde_hdr_replace {
78 u32 target_buf;
79 u32 source_buf;
80 s32 shift;
81 u32 type;
82 u64 target_byte_offset;
83 u64 source_byte_offset;
84 u64 mask;
85};
86
87enum {
88 TYPE_PARAM_TYPE_U32 = 0,
89 TYPE_PARAM_TYPE_U64_LITTLE,
90 TYPE_PARAM_TYPE_U64_BIG
91};
92
93/*
94 * this element defines a runtime patching in buffers. Parameters with id from
95 * 0 to 1024 are reserved for special usage as follows:
96 * 0 = comptags_per_cacheline,
97 * 1 = slices_per_fbp,
98 * 2 = num_fbps
99 * 3 = source buffer first page offset
100 * 4 = source buffer block height log2
101 * 5 = backing store memory address
102 * 6 = destination memory address
103 * 7 = destination size (bytes)
104 * 8 = backing store size (bytes)
105 * 9 = cache line size
106 *
107 * Parameters above id 1024 are user-specified. I.e. they determine where a
108 * parameters from user space should be placed in buffers, what is their
109 * type, etc.
110 *
111 * Once the value is available, we add data_offset to the value.
112 *
113 * The value address is then modified into patch value as per:
114 * value = (current_value & ~mask) | (address << shift) & mask .
115 *
116 * The type field defines the register size as:
117 * 0=u32,
118 * 1=u64 (little endian),
119 * 2=u64 (big endian)
120 */
121
122struct gk20a_cde_hdr_param {
123 u32 id;
124 u32 target_buf;
125 s32 shift;
126 u32 type;
127 s64 data_offset;
128 u64 target_byte_offset;
129 u64 mask;
130};
131
132enum {
133 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
134 TYPE_PARAM_GPU_CONFIGURATION,
135 TYPE_PARAM_FIRSTPAGEOFFSET,
136 TYPE_PARAM_NUMPAGES,
137 TYPE_PARAM_BACKINGSTORE,
138 TYPE_PARAM_DESTINATION,
139 TYPE_PARAM_DESTINATION_SIZE,
140 TYPE_PARAM_BACKINGSTORE_SIZE,
141 TYPE_PARAM_SOURCE_SMMU_ADDR,
142 TYPE_PARAM_BACKINGSTORE_BASE_HW,
143 TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
144 TYPE_PARAM_SCATTERBUFFER,
145 TYPE_PARAM_SCATTERBUFFER_SIZE,
146 NUM_RESERVED_PARAMS = 1024,
147};
148
149/*
150 * This header element defines a command. The op field determines whether the
151 * element is defining an init (0) or convert command (1). data_byte_offset
152 * denotes the beginning address of command elements in the file.
153 */
154
155struct gk20a_cde_hdr_command {
156 u32 op;
157 u32 num_entries;
158 u64 data_byte_offset;
159};
160
161enum {
162 TYPE_BUF_COMMAND_INIT = 0,
163 TYPE_BUF_COMMAND_CONVERT,
164 TYPE_BUF_COMMAND_NOOP
165};
166
167/*
168 * This is a command element defines one entry inside push buffer. target_buf
169 * defines the buffer including the pushbuffer entries, target_byte_offset the
170 * offset inside the buffer and num_bytes the number of words in the buffer.
171 */
172
173struct gk20a_cde_cmd_elem {
174 u32 target_buf;
175 u32 padding;
176 u64 target_byte_offset;
177 u64 num_bytes;
178};
179
180/*
181 * This element is used for storing a small array of data.
182 */
183
184enum {
185 ARRAY_PROGRAM_OFFSET = 0,
186 ARRAY_REGISTER_COUNT,
187 ARRAY_LAUNCH_COMMAND,
188 NUM_CDE_ARRAYS
189};
190
191struct gk20a_cde_hdr_array {
192 u32 id;
193 u32 data[MAX_CDE_ARRAY_ENTRIES];
194};
195
196/*
197 * Following defines a single header element. Each element has a type and
198 * some of the data structures.
199 */
200
201struct gk20a_cde_hdr_elem {
202 u32 type;
203 u32 padding;
204 union {
205 struct gk20a_cde_hdr_buf buf;
206 struct gk20a_cde_hdr_replace replace;
207 struct gk20a_cde_hdr_param param;
208 u32 required_class;
209 struct gk20a_cde_hdr_command command;
210 struct gk20a_cde_hdr_array array;
211 };
212};
213
214enum {
215 TYPE_BUF = 0,
216 TYPE_REPLACE,
217 TYPE_PARAM,
218 TYPE_REQUIRED_CLASS,
219 TYPE_COMMAND,
220 TYPE_ARRAY
221};
222
223struct gk20a_cde_param {
224 u32 id;
225 u32 padding;
226 u64 value;
227};
228
229struct gk20a_cde_ctx {
230 struct nvgpu_os_linux *l;
231 struct device *dev;
232
233 /* channel related data */
234 struct channel_gk20a *ch;
235 struct tsg_gk20a *tsg;
236 struct vm_gk20a *vm;
237
238 /* buf converter configuration */
239 struct nvgpu_mem mem[MAX_CDE_BUFS];
240 unsigned int num_bufs;
241
242 /* buffer patching params (where should patching be done) */
243 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
244 unsigned int num_params;
245
246 /* storage for user space parameter values */
247 u32 user_param_values[MAX_CDE_USER_PARAMS];
248
249 u32 surf_param_offset;
250 u32 surf_param_lines;
251 u64 surf_vaddr;
252
253 u64 compbit_vaddr;
254 u64 compbit_size;
255
256 u64 scatterbuffer_vaddr;
257 u64 scatterbuffer_size;
258
259 u64 backing_store_vaddr;
260
261 struct nvgpu_gpfifo_entry *init_convert_cmd;
262 int init_cmd_num_entries;
263
264 struct nvgpu_gpfifo_entry *convert_cmd;
265 int convert_cmd_num_entries;
266
267 struct kobj_attribute attr;
268
269 bool init_cmd_executed;
270
271 struct nvgpu_list_node list;
272 bool is_temporary;
273 bool in_use;
274 struct delayed_work ctx_deleter_work;
275};
276
277static inline struct gk20a_cde_ctx *
278gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
279{
280 return (struct gk20a_cde_ctx *)
281 ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
282};
283
284struct gk20a_cde_app {
285 bool initialised;
286 struct nvgpu_mutex mutex;
287
288 struct nvgpu_list_node free_contexts;
289 struct nvgpu_list_node used_contexts;
290 unsigned int ctx_count;
291 unsigned int ctx_usecount;
292 unsigned int ctx_count_top;
293
294 u32 firmware_version;
295
296 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
297
298 u32 shader_parameter;
299};
300
301void gk20a_cde_destroy(struct nvgpu_os_linux *l);
302void gk20a_cde_suspend(struct nvgpu_os_linux *l);
303int gk20a_init_cde_support(struct nvgpu_os_linux *l);
304int gk20a_cde_reload(struct nvgpu_os_linux *l);
305int gk20a_cde_convert(struct nvgpu_os_linux *l,
306 struct dma_buf *compbits_buf,
307 u64 compbits_byte_offset,
308 u64 scatterbuffer_byte_offset,
309 struct nvgpu_channel_fence *fence,
310 u32 __flags, struct gk20a_cde_param *params,
311 int num_params, struct gk20a_fence **fence_out);
312
313int gk20a_prepare_compressible_read(
314 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
315 u64 compbits_hoffset, u64 compbits_voffset,
316 u64 scatterbuffer_offset,
317 u32 width, u32 height, u32 block_height_log2,
318 u32 submit_flags, struct nvgpu_channel_fence *fence,
319 u32 *valid_compbits, u32 *zbc_color,
320 struct gk20a_fence **fence_out);
321int gk20a_mark_compressible_write(
322 struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
323 u32 zbc_color);
324int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
325
326#endif
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gm20b.c b/drivers/gpu/nvgpu/common/linux/cde_gm20b.c
deleted file mode 100644
index 1cd15c54..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gm20b.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/*
2 * GM20B CDE
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "cde_gm20b.h"
27
28enum programs {
29 PROG_HPASS = 0,
30 PROG_VPASS_LARGE = 1,
31 PROG_VPASS_SMALL = 2,
32 PROG_HPASS_DEBUG = 3,
33 PROG_VPASS_LARGE_DEBUG = 4,
34 PROG_VPASS_SMALL_DEBUG = 5,
35 PROG_PASSTHROUGH = 6,
36};
37
38static void gm20b_cde_get_program_numbers(struct gk20a *g,
39 u32 block_height_log2,
40 u32 shader_parameter,
41 int *hprog_out, int *vprog_out)
42{
43 int hprog = PROG_HPASS;
44 int vprog = (block_height_log2 >= 2) ?
45 PROG_VPASS_LARGE : PROG_VPASS_SMALL;
46 if (shader_parameter == 1) {
47 hprog = PROG_PASSTHROUGH;
48 vprog = PROG_PASSTHROUGH;
49 } else if (shader_parameter == 2) {
50 hprog = PROG_HPASS_DEBUG;
51 vprog = (block_height_log2 >= 2) ?
52 PROG_VPASS_LARGE_DEBUG :
53 PROG_VPASS_SMALL_DEBUG;
54 }
55
56 *hprog_out = hprog;
57 *vprog_out = vprog;
58}
59
60struct nvgpu_os_linux_ops gm20b_cde_ops = {
61 .cde = {
62 .get_program_numbers = gm20b_cde_get_program_numbers,
63 },
64};
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gm20b.h b/drivers/gpu/nvgpu/common/linux/cde_gm20b.h
deleted file mode 100644
index 640d6ab6..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gm20b.h
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * GM20B CDE
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_CDE
26#define _NVHOST_GM20B_CDE
27
28#include "os_linux.h"
29
30extern struct nvgpu_os_linux_ops gm20b_cde_ops;
31
32#endif
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
deleted file mode 100644
index 5c0e79a7..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
+++ /dev/null
@@ -1,161 +0,0 @@
1/*
2 * GP10B CDE
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "cde_gp10b.h"
27
28#include <nvgpu/log.h>
29#include <nvgpu/dma.h>
30
31enum gp10b_programs {
32 GP10B_PROG_HPASS = 0,
33 GP10B_PROG_HPASS_4K = 1,
34 GP10B_PROG_VPASS = 2,
35 GP10B_PROG_VPASS_4K = 3,
36 GP10B_PROG_HPASS_DEBUG = 4,
37 GP10B_PROG_HPASS_4K_DEBUG = 5,
38 GP10B_PROG_VPASS_DEBUG = 6,
39 GP10B_PROG_VPASS_4K_DEBUG = 7,
40 GP10B_PROG_PASSTHROUGH = 8,
41};
42
43void gp10b_cde_get_program_numbers(struct gk20a *g,
44 u32 block_height_log2,
45 u32 shader_parameter,
46 int *hprog_out, int *vprog_out)
47{
48 int hprog, vprog;
49
50 if (shader_parameter == 1) {
51 hprog = GP10B_PROG_PASSTHROUGH;
52 vprog = GP10B_PROG_PASSTHROUGH;
53 } else {
54 hprog = GP10B_PROG_HPASS;
55 vprog = GP10B_PROG_VPASS;
56 if (shader_parameter == 2) {
57 hprog = GP10B_PROG_HPASS_DEBUG;
58 vprog = GP10B_PROG_VPASS_DEBUG;
59 }
60 if (!nvgpu_iommuable(g)) {
61 if (!g->mm.disable_bigpage) {
62 nvgpu_warn(g,
63 "When no IOMMU big pages cannot be used");
64 }
65 hprog |= 1;
66 vprog |= 1;
67 }
68 }
69
70 *hprog_out = hprog;
71 *vprog_out = vprog;
72}
73
74bool gp10b_need_scatter_buffer(struct gk20a *g)
75{
76 return !nvgpu_iommuable(g);
77}
78
79static u8 parity(u32 a)
80{
81 a ^= a>>16u;
82 a ^= a>>8u;
83 a ^= a>>4u;
84 a &= 0xfu;
85 return (0x6996u >> a) & 1u;
86}
87
88int gp10b_populate_scatter_buffer(struct gk20a *g,
89 struct sg_table *sgt,
90 size_t surface_size,
91 void *scatter_buffer_ptr,
92 size_t scatter_buffer_size)
93{
94 /* map scatter buffer to CPU VA and fill it */
95 const u32 page_size_log2 = 12;
96 const u32 page_size = 1 << page_size_log2;
97 const u32 page_size_shift = page_size_log2 - 7u;
98
99 /* 0011 1111 1111 1111 1111 1110 0100 1000 */
100 const u32 getSliceMaskGP10B = 0x3ffffe48;
101 u8 *scatter_buffer = scatter_buffer_ptr;
102
103 size_t i;
104 struct scatterlist *sg = NULL;
105 u8 d = 0;
106 size_t page = 0;
107 size_t pages_left;
108
109 surface_size = round_up(surface_size, page_size);
110
111 pages_left = surface_size >> page_size_log2;
112 if ((pages_left >> 3) > scatter_buffer_size)
113 return -ENOMEM;
114
115 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
116 unsigned int j;
117 u64 surf_pa = sg_phys(sg);
118 unsigned int n = (int)(sg->length >> page_size_log2);
119
120 nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
121
122 for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
123 u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
124 u8 scatter_bit = parity(addr);
125 u8 bit = page & 7;
126
127 d |= scatter_bit << bit;
128 if (bit == 7) {
129 scatter_buffer[page >> 3] = d;
130 d = 0;
131 }
132
133 ++page;
134 --pages_left;
135 }
136
137 if (pages_left == 0)
138 break;
139 }
140
141 /* write the last byte in case the number of pages is not divisible by 8 */
142 if ((page & 7) != 0)
143 scatter_buffer[page >> 3] = d;
144
145 if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
146 nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
147 for (i = 0; i < page >> 3; i++) {
148 nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
149 }
150 }
151
152 return 0;
153}
154
155struct nvgpu_os_linux_ops gp10b_cde_ops = {
156 .cde = {
157 .get_program_numbers = gp10b_cde_get_program_numbers,
158 .need_scatter_buffer = gp10b_need_scatter_buffer,
159 .populate_scatter_buffer = gp10b_populate_scatter_buffer,
160 },
161};
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.h b/drivers/gpu/nvgpu/common/linux/cde_gp10b.h
deleted file mode 100644
index 52e9f292..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.h
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * GP10B CDE
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GP10B_CDE
26#define _NVHOST_GP10B_CDE
27
28#include "os_linux.h"
29
30extern struct nvgpu_os_linux_ops gp10b_cde_ops;
31
32#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
deleted file mode 100644
index 165f33db..00000000
--- a/drivers/gpu/nvgpu/common/linux/ce2.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/types.h>
18
19#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
20
21#include "gk20a/ce2_gk20a.h"
22#include "gk20a/gk20a.h"
23#include "channel.h"
24
25static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
26{
27 /* there is no local memory available,
28 don't allow local memory related CE flags */
29 if (!g->mm.vidmem.size) {
30 launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
31 NVGPU_CE_DST_LOCATION_LOCAL_FB);
32 }
33 return launch_flags;
34}
35
36int gk20a_ce_execute_ops(struct gk20a *g,
37 u32 ce_ctx_id,
38 u64 src_buf,
39 u64 dst_buf,
40 u64 size,
41 unsigned int payload,
42 int launch_flags,
43 int request_operation,
44 u32 submit_flags,
45 struct gk20a_fence **gk20a_fence_out)
46{
47 int ret = -EPERM;
48 struct gk20a_ce_app *ce_app = &g->ce_app;
49 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
50 bool found = false;
51 u32 *cmd_buf_cpu_va;
52 u64 cmd_buf_gpu_va = 0;
53 u32 methodSize;
54 u32 cmd_buf_read_offset;
55 u32 dma_copy_class;
56 struct nvgpu_gpfifo_entry gpfifo;
57 struct nvgpu_channel_fence fence = {0, 0};
58 struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
59
60 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
61 goto end;
62
63 nvgpu_mutex_acquire(&ce_app->app_mutex);
64
65 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
66 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
67 if (ce_ctx->ctx_id == ce_ctx_id) {
68 found = true;
69 break;
70 }
71 }
72
73 nvgpu_mutex_release(&ce_app->app_mutex);
74
75 if (!found) {
76 ret = -EINVAL;
77 goto end;
78 }
79
80 if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
81 ret = -ENODEV;
82 goto end;
83 }
84
85 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
86
87 ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
88
89 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
90 (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
91
92 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
93
94 if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
95 struct gk20a_fence **prev_post_fence =
96 &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
97
98 ret = gk20a_fence_wait(g, *prev_post_fence,
99 gk20a_get_gr_idle_timeout(g));
100
101 gk20a_fence_put(*prev_post_fence);
102 *prev_post_fence = NULL;
103 if (ret)
104 goto noop;
105 }
106
107 cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
108
109 dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
110 methodSize = gk20a_ce_prepare_submit(src_buf,
111 dst_buf,
112 size,
113 &cmd_buf_cpu_va[cmd_buf_read_offset],
114 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
115 payload,
116 gk20a_get_valid_launch_flags(g, launch_flags),
117 request_operation,
118 dma_copy_class);
119
120 if (methodSize) {
121 /* store the element into gpfifo */
122 gpfifo.entry0 =
123 u64_lo32(cmd_buf_gpu_va);
124 gpfifo.entry1 =
125 (u64_hi32(cmd_buf_gpu_va) |
126 pbdma_gp_entry1_length_f(methodSize));
127
128 /* take always the postfence as it is needed for protecting the ce context */
129 submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
130
131 nvgpu_smp_wmb();
132
133 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
134 1, submit_flags, &fence,
135 &ce_cmd_buf_fence_out, NULL);
136
137 if (!ret) {
138 ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
139 ce_cmd_buf_fence_out;
140 if (gk20a_fence_out) {
141 gk20a_fence_get(ce_cmd_buf_fence_out);
142 *gk20a_fence_out = ce_cmd_buf_fence_out;
143 }
144
145 /* Next available command buffer queue Index */
146 ++ce_ctx->cmd_buf_read_queue_offset;
147 }
148 } else {
149 ret = -ENOMEM;
150 }
151noop:
152 nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
153end:
154 return ret;
155}
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
deleted file mode 100644
index 7810bc21..00000000
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ /dev/null
@@ -1,1021 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h>
19#include <nvgpu/ltc.h>
20#include <nvgpu/error_notifier.h>
21#include <nvgpu/os_sched.h>
22
23/*
24 * This is required for nvgpu_vm_find_buf() which is used in the tracing
25 * code. Once we can get and access userspace buffers without requiring
26 * direct dma_buf usage this can be removed.
27 */
28#include <nvgpu/linux/vm.h>
29
30#include "gk20a/gk20a.h"
31
32#include "channel.h"
33#include "ioctl_channel.h"
34#include "os_linux.h"
35
36#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
37
38#include <linux/uaccess.h>
39#include <linux/dma-buf.h>
40#include <trace/events/gk20a.h>
41#include <uapi/linux/nvgpu.h>
42
43#include "sync_sema_android.h"
44
45u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
46{
47 u32 flags = 0;
48
49 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
50 flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
51
52 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
53 flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
54
55 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
56 flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
57
58 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
59 flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
60
61 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
62 flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
63
64 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
65 flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
66
67 return flags;
68}
69
70/*
71 * API to convert error_notifiers in common code and of the form
72 * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
73 * space and of the form NVGPU_CHANNEL_*
74 */
75static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
76{
77 switch (error_notifier) {
78 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
79 return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
80 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
81 return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
82 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
83 return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
84 case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
85 return NVGPU_CHANNEL_GR_EXCEPTION;
86 case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
87 return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
88 case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
89 return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
90 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
91 return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
92 case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
93 return NVGPU_CHANNEL_PBDMA_ERROR;
94 case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
95 return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
96 case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
97 return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
98 case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
99 return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
100 }
101
102 pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
103
104 return error_notifier;
105}
106
107/**
108 * nvgpu_set_error_notifier_locked()
109 * Should be called with ch->error_notifier_mutex held
110 *
111 * error should be of the form NVGPU_ERR_NOTIFIER_*
112 */
113void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
114{
115 struct nvgpu_channel_linux *priv = ch->os_priv;
116
117 error = nvgpu_error_notifier_to_channel_notifier(error);
118
119 if (priv->error_notifier.dmabuf) {
120 struct nvgpu_notification *notification =
121 priv->error_notifier.notification;
122 struct timespec time_data;
123 u64 nsec;
124
125 getnstimeofday(&time_data);
126 nsec = ((u64)time_data.tv_sec) * 1000000000u +
127 (u64)time_data.tv_nsec;
128 notification->time_stamp.nanoseconds[0] =
129 (u32)nsec;
130 notification->time_stamp.nanoseconds[1] =
131 (u32)(nsec >> 32);
132 notification->info32 = error;
133 notification->status = 0xffff;
134
135 nvgpu_err(ch->g,
136 "error notifier set to %d for ch %d", error, ch->chid);
137 }
138}
139
140/* error should be of the form NVGPU_ERR_NOTIFIER_* */
141void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
142{
143 struct nvgpu_channel_linux *priv = ch->os_priv;
144
145 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
146 nvgpu_set_error_notifier_locked(ch, error);
147 nvgpu_mutex_release(&priv->error_notifier.mutex);
148}
149
150void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
151{
152 struct nvgpu_channel_linux *priv = ch->os_priv;
153
154 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
155 if (priv->error_notifier.dmabuf) {
156 struct nvgpu_notification *notification =
157 priv->error_notifier.notification;
158
159 /* Don't overwrite error flag if it is already set */
160 if (notification->status != 0xffff)
161 nvgpu_set_error_notifier_locked(ch, error);
162 }
163 nvgpu_mutex_release(&priv->error_notifier.mutex);
164}
165
166/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
167bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
168{
169 struct nvgpu_channel_linux *priv = ch->os_priv;
170 bool notifier_set = false;
171
172 error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
173
174 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
175 if (priv->error_notifier.dmabuf) {
176 struct nvgpu_notification *notification =
177 priv->error_notifier.notification;
178 u32 err = notification->info32;
179
180 if (err == error_notifier)
181 notifier_set = true;
182 }
183 nvgpu_mutex_release(&priv->error_notifier.mutex);
184
185 return notifier_set;
186}
187
188static void gk20a_channel_update_runcb_fn(struct work_struct *work)
189{
190 struct nvgpu_channel_completion_cb *completion_cb =
191 container_of(work, struct nvgpu_channel_completion_cb, work);
192 struct nvgpu_channel_linux *priv =
193 container_of(completion_cb,
194 struct nvgpu_channel_linux, completion_cb);
195 struct channel_gk20a *ch = priv->ch;
196 void (*fn)(struct channel_gk20a *, void *);
197 void *user_data;
198
199 nvgpu_spinlock_acquire(&completion_cb->lock);
200 fn = completion_cb->fn;
201 user_data = completion_cb->user_data;
202 nvgpu_spinlock_release(&completion_cb->lock);
203
204 if (fn)
205 fn(ch, user_data);
206}
207
208static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch)
209{
210 struct nvgpu_channel_linux *priv = ch->os_priv;
211
212 priv->completion_cb.fn = NULL;
213 priv->completion_cb.user_data = NULL;
214 nvgpu_spinlock_init(&priv->completion_cb.lock);
215 INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
216}
217
218static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch)
219{
220 struct nvgpu_channel_linux *priv = ch->os_priv;
221
222 nvgpu_spinlock_acquire(&priv->completion_cb.lock);
223 priv->completion_cb.fn = NULL;
224 priv->completion_cb.user_data = NULL;
225 nvgpu_spinlock_release(&priv->completion_cb.lock);
226 cancel_work_sync(&priv->completion_cb.work);
227}
228
229static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch)
230{
231 struct nvgpu_channel_linux *priv = ch->os_priv;
232
233 if (priv->completion_cb.fn)
234 schedule_work(&priv->completion_cb.work);
235}
236
237static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch)
238{
239 struct nvgpu_channel_linux *priv = ch->os_priv;
240
241 if (priv->completion_cb.fn)
242 cancel_work_sync(&priv->completion_cb.work);
243}
244
245struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
246 void (*update_fn)(struct channel_gk20a *, void *),
247 void *update_fn_data,
248 int runlist_id,
249 bool is_privileged_channel)
250{
251 struct channel_gk20a *ch;
252 struct nvgpu_channel_linux *priv;
253
254 ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel,
255 nvgpu_current_pid(g), nvgpu_current_tid(g));
256
257 if (ch) {
258 priv = ch->os_priv;
259 nvgpu_spinlock_acquire(&priv->completion_cb.lock);
260 priv->completion_cb.fn = update_fn;
261 priv->completion_cb.user_data = update_fn_data;
262 nvgpu_spinlock_release(&priv->completion_cb.lock);
263 }
264
265 return ch;
266}
267
268static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
269{
270}
271
272static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
273{
274 nvgpu_channel_work_completion_clear(ch);
275
276#if defined(CONFIG_GK20A_CYCLE_STATS)
277 gk20a_channel_free_cycle_stats_buffer(ch);
278 gk20a_channel_free_cycle_stats_snapshot(ch);
279#endif
280}
281
282static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
283{
284 struct nvgpu_channel_linux *priv;
285 int err;
286
287 priv = nvgpu_kzalloc(g, sizeof(*priv));
288 if (!priv)
289 return -ENOMEM;
290
291 ch->os_priv = priv;
292 priv->ch = ch;
293
294#ifdef CONFIG_SYNC
295 ch->has_os_fence_framework_support = true;
296#endif
297
298 err = nvgpu_mutex_init(&priv->error_notifier.mutex);
299 if (err) {
300 nvgpu_kfree(g, priv);
301 return err;
302 }
303
304 nvgpu_channel_work_completion_init(ch);
305
306 return 0;
307}
308
309static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
310{
311 struct nvgpu_channel_linux *priv = ch->os_priv;
312
313 nvgpu_mutex_destroy(&priv->error_notifier.mutex);
314 nvgpu_kfree(g, priv);
315
316 ch->os_priv = NULL;
317
318#ifdef CONFIG_SYNC
319 ch->has_os_fence_framework_support = false;
320#endif
321}
322
323static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch,
324 const char *fmt, ...)
325{
326 struct nvgpu_channel_linux *priv = ch->os_priv;
327 struct nvgpu_os_fence_framework *fence_framework;
328 char name[30];
329 va_list args;
330
331 fence_framework = &priv->fence_framework;
332
333 va_start(args, fmt);
334 vsnprintf(name, sizeof(name), fmt, args);
335 va_end(args);
336
337 fence_framework->timeline = gk20a_sync_timeline_create(name);
338
339 if (!fence_framework->timeline)
340 return -EINVAL;
341
342 return 0;
343}
344static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch)
345{
346 struct nvgpu_channel_linux *priv = ch->os_priv;
347 struct nvgpu_os_fence_framework *fence_framework;
348
349 fence_framework = &priv->fence_framework;
350
351 gk20a_sync_timeline_signal(fence_framework->timeline);
352}
353
354static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch)
355{
356 struct nvgpu_channel_linux *priv = ch->os_priv;
357 struct nvgpu_os_fence_framework *fence_framework;
358
359 fence_framework = &priv->fence_framework;
360
361 gk20a_sync_timeline_destroy(fence_framework->timeline);
362 fence_framework->timeline = NULL;
363}
364
365static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch)
366{
367 struct nvgpu_channel_linux *priv = ch->os_priv;
368 struct nvgpu_os_fence_framework *fence_framework;
369
370 fence_framework = &priv->fence_framework;
371
372 return (fence_framework->timeline != NULL);
373}
374
375int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
376{
377 struct gk20a *g = &l->g;
378 struct fifo_gk20a *f = &g->fifo;
379 int chid;
380 int err;
381
382 for (chid = 0; chid < (int)f->num_channels; chid++) {
383 struct channel_gk20a *ch = &f->channel[chid];
384
385 err = nvgpu_channel_alloc_linux(g, ch);
386 if (err)
387 goto err_clean;
388 }
389
390 g->os_channel.open = nvgpu_channel_open_linux;
391 g->os_channel.close = nvgpu_channel_close_linux;
392 g->os_channel.work_completion_signal =
393 nvgpu_channel_work_completion_signal;
394 g->os_channel.work_completion_cancel_sync =
395 nvgpu_channel_work_completion_cancel_sync;
396
397 g->os_channel.os_fence_framework_inst_exists =
398 nvgpu_channel_fence_framework_exists;
399 g->os_channel.init_os_fence_framework =
400 nvgpu_channel_init_os_fence_framework;
401 g->os_channel.signal_os_fence_framework =
402 nvgpu_channel_signal_os_fence_framework;
403 g->os_channel.destroy_os_fence_framework =
404 nvgpu_channel_destroy_os_fence_framework;
405
406 return 0;
407
408err_clean:
409 for (; chid >= 0; chid--) {
410 struct channel_gk20a *ch = &f->channel[chid];
411
412 nvgpu_channel_free_linux(g, ch);
413 }
414 return err;
415}
416
417void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
418{
419 struct gk20a *g = &l->g;
420 struct fifo_gk20a *f = &g->fifo;
421 unsigned int chid;
422
423 for (chid = 0; chid < f->num_channels; chid++) {
424 struct channel_gk20a *ch = &f->channel[chid];
425
426 nvgpu_channel_free_linux(g, ch);
427 }
428
429 g->os_channel.os_fence_framework_inst_exists = NULL;
430 g->os_channel.init_os_fence_framework = NULL;
431 g->os_channel.signal_os_fence_framework = NULL;
432 g->os_channel.destroy_os_fence_framework = NULL;
433}
434
435u32 nvgpu_get_gpfifo_entry_size(void)
436{
437 return sizeof(struct nvgpu_gpfifo_entry);
438}
439
440#ifdef CONFIG_DEBUG_FS
441static void trace_write_pushbuffer(struct channel_gk20a *c,
442 struct nvgpu_gpfifo_entry *g)
443{
444 void *mem = NULL;
445 unsigned int words;
446 u64 offset;
447 struct dma_buf *dmabuf = NULL;
448
449 if (gk20a_debug_trace_cmdbuf) {
450 u64 gpu_va = (u64)g->entry0 |
451 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
452 int err;
453
454 words = pbdma_gp_entry1_length_v(g->entry1);
455 err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
456 if (!err)
457 mem = dma_buf_vmap(dmabuf);
458 }
459
460 if (mem) {
461 u32 i;
462 /*
463 * Write in batches of 128 as there seems to be a limit
464 * of how much you can output to ftrace at once.
465 */
466 for (i = 0; i < words; i += 128U) {
467 trace_gk20a_push_cmdbuf(
468 c->g->name,
469 0,
470 min(words - i, 128U),
471 offset + i * sizeof(u32),
472 mem);
473 }
474 dma_buf_vunmap(dmabuf, mem);
475 }
476}
477#endif
478
479static void trace_write_pushbuffer_range(struct channel_gk20a *c,
480 struct nvgpu_gpfifo_entry *g,
481 struct nvgpu_gpfifo_entry __user *user_gpfifo,
482 int offset,
483 int count)
484{
485#ifdef CONFIG_DEBUG_FS
486 u32 size;
487 int i;
488 struct nvgpu_gpfifo_entry *gp;
489 bool gpfifo_allocated = false;
490
491 if (!gk20a_debug_trace_cmdbuf)
492 return;
493
494 if (!g && !user_gpfifo)
495 return;
496
497 if (!g) {
498 size = count * sizeof(struct nvgpu_gpfifo_entry);
499 if (size) {
500 g = nvgpu_big_malloc(c->g, size);
501 if (!g)
502 return;
503
504 if (copy_from_user(g, user_gpfifo, size)) {
505 nvgpu_big_free(c->g, g);
506 return;
507 }
508 }
509 gpfifo_allocated = true;
510 }
511
512 gp = g + offset;
513 for (i = 0; i < count; i++, gp++)
514 trace_write_pushbuffer(c, gp);
515
516 if (gpfifo_allocated)
517 nvgpu_big_free(c->g, g);
518#endif
519}
520
521/*
522 * Handle the submit synchronization - pre-fences and post-fences.
523 */
524static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
525 struct nvgpu_channel_fence *fence,
526 struct channel_gk20a_job *job,
527 struct priv_cmd_entry **wait_cmd,
528 struct priv_cmd_entry **incr_cmd,
529 struct gk20a_fence **post_fence,
530 bool register_irq,
531 u32 flags)
532{
533 struct gk20a *g = c->g;
534 bool need_sync_fence = false;
535 bool new_sync_created = false;
536 int wait_fence_fd = -1;
537 int err = 0;
538 bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
539 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
540
541 if (g->aggressive_sync_destroy_thresh) {
542 nvgpu_mutex_acquire(&c->sync_lock);
543 if (!c->sync) {
544 c->sync = gk20a_channel_sync_create(c, false);
545 if (!c->sync) {
546 err = -ENOMEM;
547 nvgpu_mutex_release(&c->sync_lock);
548 goto fail;
549 }
550 new_sync_created = true;
551 }
552 nvgpu_atomic_inc(&c->sync->refcount);
553 nvgpu_mutex_release(&c->sync_lock);
554 }
555
556 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
557 err = g->ops.fifo.resetup_ramfc(c);
558 if (err)
559 goto fail;
560 }
561
562 /*
563 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
564 * submission when user requested and the wait hasn't expired.
565 */
566 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
567 int max_wait_cmds = c->deterministic ? 1 : 0;
568
569 if (!pre_alloc_enabled)
570 job->wait_cmd = nvgpu_kzalloc(g,
571 sizeof(struct priv_cmd_entry));
572
573 if (!job->wait_cmd) {
574 err = -ENOMEM;
575 goto fail;
576 }
577
578 if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
579 wait_fence_fd = fence->id;
580 err = c->sync->wait_fd(c->sync, wait_fence_fd,
581 job->wait_cmd, max_wait_cmds);
582 } else {
583 err = c->sync->wait_syncpt(c->sync, fence->id,
584 fence->value,
585 job->wait_cmd);
586 }
587
588 if (err)
589 goto clean_up_wait_cmd;
590
591 if (job->wait_cmd->valid)
592 *wait_cmd = job->wait_cmd;
593 }
594
595 if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
596 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
597 need_sync_fence = true;
598
599 /*
600 * Always generate an increment at the end of a GPFIFO submission. This
601 * is used to keep track of method completion for idle railgating. The
602 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
603 */
604 job->post_fence = gk20a_alloc_fence(c);
605 if (!job->post_fence) {
606 err = -ENOMEM;
607 goto clean_up_wait_cmd;
608 }
609 if (!pre_alloc_enabled)
610 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
611
612 if (!job->incr_cmd) {
613 err = -ENOMEM;
614 goto clean_up_post_fence;
615 }
616
617 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
618 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
619 job->post_fence, need_wfi, need_sync_fence,
620 register_irq);
621 else
622 err = c->sync->incr(c->sync, job->incr_cmd,
623 job->post_fence, need_sync_fence,
624 register_irq);
625 if (!err) {
626 *incr_cmd = job->incr_cmd;
627 *post_fence = job->post_fence;
628 } else
629 goto clean_up_incr_cmd;
630
631 return 0;
632
633clean_up_incr_cmd:
634 free_priv_cmdbuf(c, job->incr_cmd);
635 if (!pre_alloc_enabled)
636 job->incr_cmd = NULL;
637clean_up_post_fence:
638 gk20a_fence_put(job->post_fence);
639 job->post_fence = NULL;
640clean_up_wait_cmd:
641 free_priv_cmdbuf(c, job->wait_cmd);
642 if (!pre_alloc_enabled)
643 job->wait_cmd = NULL;
644fail:
645 *wait_cmd = NULL;
646 return err;
647}
648
649static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
650 struct priv_cmd_entry *cmd)
651{
652 struct gk20a *g = c->g;
653 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
654 struct nvgpu_gpfifo_entry x = {
655 .entry0 = u64_lo32(cmd->gva),
656 .entry1 = u64_hi32(cmd->gva) |
657 pbdma_gp_entry1_length_f(cmd->size)
658 };
659
660 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
661 &x, sizeof(x));
662
663 if (cmd->mem->aperture == APERTURE_SYSMEM)
664 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
665 cmd->mem->cpu_va + cmd->off * sizeof(u32));
666
667 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
668}
669
670/*
671 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
672 * splitting into two memcpys to handle wrap-around.
673 */
674static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
675 struct nvgpu_gpfifo_entry *kern_gpfifo,
676 struct nvgpu_gpfifo_entry __user *user_gpfifo,
677 u32 num_entries)
678{
679 /* byte offsets */
680 u32 gpfifo_size =
681 c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
682 u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
683 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
684 u32 end = start + len; /* exclusive */
685 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
686 struct nvgpu_gpfifo_entry *cpu_src;
687 int err;
688
689 if (user_gpfifo && !c->gpfifo.pipe) {
690 /*
691 * This path (from userspace to sysmem) is special in order to
692 * avoid two copies unnecessarily (from user to pipe, then from
693 * pipe to gpu sysmem buffer).
694 */
695 if (end > gpfifo_size) {
696 /* wrap-around */
697 int length0 = gpfifo_size - start;
698 int length1 = len - length0;
699 void __user *user2 = (u8 __user *)user_gpfifo + length0;
700
701 err = copy_from_user(gpfifo_mem->cpu_va + start,
702 user_gpfifo, length0);
703 if (err)
704 return err;
705
706 err = copy_from_user(gpfifo_mem->cpu_va,
707 user2, length1);
708 if (err)
709 return err;
710 } else {
711 err = copy_from_user(gpfifo_mem->cpu_va + start,
712 user_gpfifo, len);
713 if (err)
714 return err;
715 }
716
717 trace_write_pushbuffer_range(c, NULL, user_gpfifo,
718 0, num_entries);
719 goto out;
720 } else if (user_gpfifo) {
721 /* from userspace to vidmem, use the common copy path below */
722 err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
723 if (err)
724 return err;
725
726 cpu_src = c->gpfifo.pipe;
727 } else {
728 /* from kernel to either sysmem or vidmem, don't need
729 * copy_from_user so use the common path below */
730 cpu_src = kern_gpfifo;
731 }
732
733 if (end > gpfifo_size) {
734 /* wrap-around */
735 int length0 = gpfifo_size - start;
736 int length1 = len - length0;
737 void *src2 = (u8 *)cpu_src + length0;
738
739 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
740 nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
741 } else {
742 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
743
744 }
745
746 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
747
748out:
749 c->gpfifo.put = (c->gpfifo.put + num_entries) &
750 (c->gpfifo.entry_num - 1);
751
752 return 0;
753}
754
755int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
756 struct nvgpu_gpfifo_entry *gpfifo,
757 struct nvgpu_submit_gpfifo_args *args,
758 u32 num_entries,
759 u32 flags,
760 struct nvgpu_channel_fence *fence,
761 struct gk20a_fence **fence_out,
762 struct fifo_profile_gk20a *profile)
763{
764 struct gk20a *g = c->g;
765 struct priv_cmd_entry *wait_cmd = NULL;
766 struct priv_cmd_entry *incr_cmd = NULL;
767 struct gk20a_fence *post_fence = NULL;
768 struct channel_gk20a_job *job = NULL;
769 /* we might need two extra gpfifo entries - one for pre fence
770 * and one for post fence. */
771 const int extra_entries = 2;
772 bool skip_buffer_refcounting = (flags &
773 NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
774 int err = 0;
775 bool need_job_tracking;
776 bool need_deferred_cleanup = false;
777 struct nvgpu_gpfifo_entry __user *user_gpfifo = args ?
778 (struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL;
779
780 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
781 return -ENODEV;
782
783 if (c->has_timedout)
784 return -ETIMEDOUT;
785
786 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
787 return -ENOMEM;
788
789 /* fifo not large enough for request. Return error immediately.
790 * Kernel can insert gpfifo entries before and after user gpfifos.
791 * So, add extra_entries in user request. Also, HW with fifo size N
792 * can accept only N-1 entreis and so the below condition */
793 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
794 nvgpu_err(g, "not enough gpfifo space allocated");
795 return -ENOMEM;
796 }
797
798 if (!gpfifo && !args)
799 return -EINVAL;
800
801 if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
802 NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
803 !fence)
804 return -EINVAL;
805
806 /* an address space needs to have been bound at this point. */
807 if (!gk20a_channel_as_bound(c)) {
808 nvgpu_err(g,
809 "not bound to an address space at time of gpfifo"
810 " submission.");
811 return -EINVAL;
812 }
813
814 gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
815
816 /* update debug settings */
817 nvgpu_ltc_sync_enabled(g);
818
819 nvgpu_log_info(g, "channel %d", c->chid);
820
821 /*
822 * Job tracking is necessary for any of the following conditions:
823 * - pre- or post-fence functionality
824 * - channel wdt
825 * - GPU rail-gating with non-deterministic channels
826 * - buffer refcounting
827 *
828 * If none of the conditions are met, then job tracking is not
829 * required and a fast submit can be done (ie. only need to write
830 * out userspace GPFIFO entries and update GP_PUT).
831 */
832 need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
833 (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
834 c->timeout.enabled ||
835 (g->can_railgate && !c->deterministic) ||
836 !skip_buffer_refcounting;
837
838 if (need_job_tracking) {
839 bool need_sync_framework = false;
840
841 /*
842 * If the channel is to have deterministic latency and
843 * job tracking is required, the channel must have
844 * pre-allocated resources. Otherwise, we fail the submit here
845 */
846 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
847 return -EINVAL;
848
849 need_sync_framework =
850 gk20a_channel_sync_needs_sync_framework(g) ||
851 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
852 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
853
854 /*
855 * Deferred clean-up is necessary for any of the following
856 * conditions:
857 * - channel's deterministic flag is not set
858 * - dependency on sync framework, which could make the
859 * behavior of the clean-up operation non-deterministic
860 * (should not be performed in the submit path)
861 * - channel wdt
862 * - GPU rail-gating with non-deterministic channels
863 * - buffer refcounting
864 *
865 * If none of the conditions are met, then deferred clean-up
866 * is not required, and we clean-up one job-tracking
867 * resource in the submit path.
868 */
869 need_deferred_cleanup = !c->deterministic ||
870 need_sync_framework ||
871 c->timeout.enabled ||
872 (g->can_railgate &&
873 !c->deterministic) ||
874 !skip_buffer_refcounting;
875
876 /*
877 * For deterministic channels, we don't allow deferred clean_up
878 * processing to occur. In cases we hit this, we fail the submit
879 */
880 if (c->deterministic && need_deferred_cleanup)
881 return -EINVAL;
882
883 if (!c->deterministic) {
884 /*
885 * Get a power ref unless this is a deterministic
886 * channel that holds them during the channel lifetime.
887 * This one is released by gk20a_channel_clean_up_jobs,
888 * via syncpt or sema interrupt, whichever is used.
889 */
890 err = gk20a_busy(g);
891 if (err) {
892 nvgpu_err(g,
893 "failed to host gk20a to submit gpfifo, process %s",
894 current->comm);
895 return err;
896 }
897 }
898
899 if (!need_deferred_cleanup) {
900 /* clean up a single job */
901 gk20a_channel_clean_up_jobs(c, false);
902 }
903 }
904
905
906 /* Grab access to HW to deal with do_idle */
907 if (c->deterministic)
908 nvgpu_rwsem_down_read(&g->deterministic_busy);
909
910 if (c->deterministic && c->deterministic_railgate_allowed) {
911 /*
912 * Nope - this channel has dropped its own power ref. As
913 * deterministic submits don't hold power on per each submitted
914 * job like normal ones do, the GPU might railgate any time now
915 * and thus submit is disallowed.
916 */
917 err = -EINVAL;
918 goto clean_up;
919 }
920
921 trace_gk20a_channel_submit_gpfifo(g->name,
922 c->chid,
923 num_entries,
924 flags,
925 fence ? fence->id : 0,
926 fence ? fence->value : 0);
927
928 nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
929 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
930
931 /*
932 * Make sure we have enough space for gpfifo entries. Check cached
933 * values first and then read from HW. If no space, return EAGAIN
934 * and let userpace decide to re-try request or not.
935 */
936 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
937 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
938 err = -EAGAIN;
939 goto clean_up;
940 }
941 }
942
943 if (c->has_timedout) {
944 err = -ETIMEDOUT;
945 goto clean_up;
946 }
947
948 if (need_job_tracking) {
949 err = channel_gk20a_alloc_job(c, &job);
950 if (err)
951 goto clean_up;
952
953 err = gk20a_submit_prepare_syncs(c, fence, job,
954 &wait_cmd, &incr_cmd,
955 &post_fence,
956 need_deferred_cleanup,
957 flags);
958 if (err)
959 goto clean_up_job;
960 }
961
962 gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
963
964 if (wait_cmd)
965 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
966
967 if (gpfifo || user_gpfifo)
968 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
969 num_entries);
970 if (err)
971 goto clean_up_job;
972
973 /*
974 * And here's where we add the incr_cmd we generated earlier. It should
975 * always run!
976 */
977 if (incr_cmd)
978 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
979
980 if (fence_out)
981 *fence_out = gk20a_fence_get(post_fence);
982
983 if (need_job_tracking)
984 /* TODO! Check for errors... */
985 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
986 gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
987
988 g->ops.fifo.userd_gp_put(g, c);
989
990 /* No hw access beyond this point */
991 if (c->deterministic)
992 nvgpu_rwsem_up_read(&g->deterministic_busy);
993
994 trace_gk20a_channel_submitted_gpfifo(g->name,
995 c->chid,
996 num_entries,
997 flags,
998 post_fence ? post_fence->syncpt_id : 0,
999 post_fence ? post_fence->syncpt_value : 0);
1000
1001 nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
1002 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1003
1004 gk20a_fifo_profile_snapshot(profile, PROFILE_END);
1005
1006 nvgpu_log_fn(g, "done");
1007 return err;
1008
1009clean_up_job:
1010 channel_gk20a_free_job(c, job);
1011clean_up:
1012 nvgpu_log_fn(g, "fail");
1013 gk20a_fence_put(post_fence);
1014 if (c->deterministic)
1015 nvgpu_rwsem_up_read(&g->deterministic_busy);
1016 else if (need_deferred_cleanup)
1017 gk20a_idle(g);
1018
1019 return err;
1020}
1021
diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h
deleted file mode 100644
index 4a58b10c..00000000
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ /dev/null
@@ -1,96 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_CHANNEL_H__
17#define __NVGPU_CHANNEL_H__
18
19#include <linux/workqueue.h>
20#include <linux/dma-buf.h>
21
22#include <nvgpu/types.h>
23
24struct channel_gk20a;
25struct nvgpu_gpfifo;
26struct nvgpu_submit_gpfifo_args;
27struct nvgpu_channel_fence;
28struct gk20a_fence;
29struct fifo_profile_gk20a;
30struct nvgpu_os_linux;
31
32struct sync_fence;
33struct sync_timeline;
34
35struct nvgpu_channel_completion_cb {
36 /*
37 * Signal channel owner via a callback, if set, in job cleanup with
38 * schedule_work. Means that something finished on the channel (perhaps
39 * more than one job).
40 */
41 void (*fn)(struct channel_gk20a *, void *);
42 void *user_data;
43 /* Make access to the two above atomic */
44 struct nvgpu_spinlock lock;
45 /* Per-channel async work task, cannot reschedule itself */
46 struct work_struct work;
47};
48
49struct nvgpu_error_notifier {
50 struct dma_buf *dmabuf;
51 void *vaddr;
52
53 struct nvgpu_notification *notification;
54
55 struct nvgpu_mutex mutex;
56};
57
58/*
59 * This struct contains fence_related data.
60 * e.g. sync_timeline for sync_fences.
61 */
62struct nvgpu_os_fence_framework {
63 struct sync_timeline *timeline;
64};
65
66struct nvgpu_channel_linux {
67 struct channel_gk20a *ch;
68
69 struct nvgpu_os_fence_framework fence_framework;
70
71 struct nvgpu_channel_completion_cb completion_cb;
72 struct nvgpu_error_notifier error_notifier;
73
74 struct dma_buf *cyclestate_buffer_handler;
75};
76
77u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
78int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
79void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
80
81struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
82 void (*update_fn)(struct channel_gk20a *, void *),
83 void *update_fn_data,
84 int runlist_id,
85 bool is_privileged_channel);
86
87int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
88 struct nvgpu_gpfifo_entry *gpfifo,
89 struct nvgpu_submit_gpfifo_args *args,
90 u32 num_entries,
91 u32 flags,
92 struct nvgpu_channel_fence *fence,
93 struct gk20a_fence **fence_out,
94 struct fifo_profile_gk20a *profile);
95
96#endif /* __NVGPU_CHANNEL_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/clk.c b/drivers/gpu/nvgpu/common/linux/clk.c
deleted file mode 100644
index 414b17c4..00000000
--- a/drivers/gpu/nvgpu/common/linux/clk.c
+++ /dev/null
@@ -1,165 +0,0 @@
1/*
2 * Linux clock support
3 *
4 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/clk.h>
20
21#include <soc/tegra/tegra-dvfs.h>
22#include <soc/tegra/tegra-bpmp-dvfs.h>
23
24#include "clk.h"
25#include "os_linux.h"
26#include "platform_gk20a.h"
27
28#include "gk20a/gk20a.h"
29
30static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
31{
32 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
33 unsigned long ret;
34
35 switch (api_domain) {
36 case CTRL_CLK_DOMAIN_GPCCLK:
37 if (g->clk.tegra_clk)
38 ret = g->clk.cached_rate ?
39 g->clk.cached_rate :
40 clk_get_rate(g->clk.tegra_clk);
41 else
42 ret = platform->cached_rate ?
43 platform->cached_rate :
44 clk_get_rate(platform->clk[0]);
45 break;
46 case CTRL_CLK_DOMAIN_PWRCLK:
47 ret = clk_get_rate(platform->clk[1]);
48 break;
49 default:
50 nvgpu_err(g, "unknown clock: %u", api_domain);
51 ret = 0;
52 break;
53 }
54
55 return ret;
56}
57
58static int nvgpu_linux_clk_set_rate(struct gk20a *g,
59 u32 api_domain, unsigned long rate)
60{
61 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
62 int ret;
63
64 switch (api_domain) {
65 case CTRL_CLK_DOMAIN_GPCCLK:
66 if (g->clk.tegra_clk) {
67 ret = clk_set_rate(g->clk.tegra_clk, rate);
68 if (!ret)
69 g->clk.cached_rate = rate;
70 } else {
71 ret = clk_set_rate(platform->clk[0], rate);
72 if (!ret)
73 platform->cached_rate = rate;
74 }
75 break;
76 case CTRL_CLK_DOMAIN_PWRCLK:
77 ret = clk_set_rate(platform->clk[1], rate);
78 break;
79 default:
80 nvgpu_err(g, "unknown clock: %u", api_domain);
81 ret = -EINVAL;
82 break;
83 }
84
85 return ret;
86}
87
88static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
89{
90 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
91
92 /*
93 * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
94 * to frequency governor is a shared user on the gbus. The latter can be
95 * accessed as GPU clock parent, and incorporate DVFS related data.
96 */
97 if (g->clk.tegra_clk)
98 return tegra_dvfs_get_fmax_at_vmin_safe_t(
99 clk_get_parent(g->clk.tegra_clk));
100
101 if (platform->maxmin_clk_id)
102 return tegra_bpmp_dvfs_get_fmax_at_vmin(
103 platform->maxmin_clk_id);
104
105 return 0;
106}
107
108static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
109{
110 struct clk *c;
111
112 c = clk_get_sys("gpu_ref", "gpu_ref");
113 if (IS_ERR(c)) {
114 nvgpu_err(g, "failed to get GPCPLL reference clock");
115 return 0;
116 }
117
118 return clk_get_rate(c);
119}
120
121static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
122 unsigned long rate)
123{
124 return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
125 clk_get_parent(clk->tegra_clk), rate);
126}
127
128static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
129{
130 int ret;
131
132 switch (api_domain) {
133 case CTRL_CLK_DOMAIN_GPCCLK:
134 ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk));
135 break;
136 default:
137 nvgpu_err(g, "unknown clock: %u", api_domain);
138 ret = 0;
139 break;
140 }
141
142 return ret;
143}
144
145static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
146{
147 return clk_prepare_enable(clk->tegra_clk);
148}
149
150static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
151{
152 clk_disable_unprepare(clk->tegra_clk);
153}
154
155void nvgpu_linux_init_clk_support(struct gk20a *g)
156{
157 g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
158 g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
159 g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
160 g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
161 g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
162 g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
163 g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
164 g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
165}
diff --git a/drivers/gpu/nvgpu/common/linux/clk.h b/drivers/gpu/nvgpu/common/linux/clk.h
deleted file mode 100644
index 614a7fd7..00000000
--- a/drivers/gpu/nvgpu/common/linux/clk.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_COMMON_LINUX_CLK_H
18
19struct gk20a;
20void nvgpu_linux_init_clk_support(struct gk20a *g);
21
22#endif
diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c
deleted file mode 100644
index 353f6363..00000000
--- a/drivers/gpu/nvgpu/common/linux/comptags.c
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18
19#include <nvgpu/comptags.h>
20
21#include <nvgpu/linux/vm.h>
22
23#include "gk20a/gk20a.h"
24#include "dmabuf.h"
25
26void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
27 struct gk20a_comptags *comptags)
28{
29 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
30 buf->dev);
31
32 if (!comptags)
33 return;
34
35 if (!priv) {
36 memset(comptags, 0, sizeof(*comptags));
37 return;
38 }
39
40 nvgpu_mutex_acquire(&priv->lock);
41 *comptags = priv->comptags;
42 nvgpu_mutex_release(&priv->lock);
43}
44
45int gk20a_alloc_or_get_comptags(struct gk20a *g,
46 struct nvgpu_os_buffer *buf,
47 struct gk20a_comptag_allocator *allocator,
48 struct gk20a_comptags *comptags)
49{
50 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
51 buf->dev);
52 u32 offset;
53 int err;
54 unsigned int ctag_granularity;
55 u32 lines;
56
57 if (!priv)
58 return -ENOSYS;
59
60 nvgpu_mutex_acquire(&priv->lock);
61
62 if (priv->comptags.allocated) {
63 /*
64 * already allocated
65 */
66 *comptags = priv->comptags;
67
68 err = 0;
69 goto exit_locked;
70 }
71
72 ctag_granularity = g->ops.fb.compression_page_size(g);
73 lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
74
75 /* 0-sized buffer? Shouldn't occur, but let's check anyways. */
76 if (lines < 1) {
77 err = -EINVAL;
78 goto exit_locked;
79 }
80
81 /* store the allocator so we can use it when we free the ctags */
82 priv->comptag_allocator = allocator;
83 err = gk20a_comptaglines_alloc(allocator, &offset, lines);
84 if (!err) {
85 priv->comptags.offset = offset;
86 priv->comptags.lines = lines;
87 priv->comptags.needs_clear = true;
88 } else {
89 priv->comptags.offset = 0;
90 priv->comptags.lines = 0;
91 priv->comptags.needs_clear = false;
92 }
93
94 /*
95 * We don't report an error here if comptag alloc failed. The
96 * caller will simply fallback to incompressible kinds. It
97 * would not be safe to re-allocate comptags anyways on
98 * successive calls, as that would break map aliasing.
99 */
100 err = 0;
101 priv->comptags.allocated = true;
102
103 *comptags = priv->comptags;
104
105exit_locked:
106 nvgpu_mutex_release(&priv->lock);
107
108 return err;
109}
110
111bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
112{
113 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
114 buf->dev);
115 bool clear_started = false;
116
117 if (priv) {
118 nvgpu_mutex_acquire(&priv->lock);
119
120 clear_started = priv->comptags.needs_clear;
121
122 if (!clear_started)
123 nvgpu_mutex_release(&priv->lock);
124 }
125
126 return clear_started;
127}
128
129void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
130 bool clear_successful)
131{
132 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
133 buf->dev);
134 if (priv) {
135 if (clear_successful)
136 priv->comptags.needs_clear = false;
137
138 nvgpu_mutex_release(&priv->lock);
139 }
140}
diff --git a/drivers/gpu/nvgpu/common/linux/cond.c b/drivers/gpu/nvgpu/common/linux/cond.c
deleted file mode 100644
index 633c34fd..00000000
--- a/drivers/gpu/nvgpu/common/linux/cond.c
+++ /dev/null
@@ -1,73 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/wait.h>
18#include <linux/sched.h>
19
20#include <nvgpu/cond.h>
21
22int nvgpu_cond_init(struct nvgpu_cond *cond)
23{
24 init_waitqueue_head(&cond->wq);
25 cond->initialized = true;
26
27 return 0;
28}
29
30void nvgpu_cond_destroy(struct nvgpu_cond *cond)
31{
32 cond->initialized = false;
33}
34
35int nvgpu_cond_signal(struct nvgpu_cond *cond)
36{
37 if (!cond->initialized)
38 return -EINVAL;
39
40 wake_up(&cond->wq);
41
42 return 0;
43}
44
45int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
46{
47 if (!cond->initialized)
48 return -EINVAL;
49
50 wake_up_interruptible(&cond->wq);
51
52 return 0;
53}
54
55int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
56{
57 if (!cond->initialized)
58 return -EINVAL;
59
60 wake_up_all(&cond->wq);
61
62 return 0;
63}
64
65int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
66{
67 if (!cond->initialized)
68 return -EINVAL;
69
70 wake_up_interruptible_all(&cond->wq);
71
72 return 0;
73}
diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
deleted file mode 100644
index a335988a..00000000
--- a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
+++ /dev/null
@@ -1,730 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/wait.h>
18#include <linux/ktime.h>
19#include <linux/uaccess.h>
20#include <linux/poll.h>
21#include <trace/events/gk20a.h>
22#include <uapi/linux/nvgpu.h>
23
24#include "gk20a/gk20a.h"
25#include "gk20a/gr_gk20a.h"
26
27#include <nvgpu/kmem.h>
28#include <nvgpu/log.h>
29#include <nvgpu/atomic.h>
30#include <nvgpu/barrier.h>
31
32#include "platform_gk20a.h"
33#include "os_linux.h"
34#include "ctxsw_trace.h"
35
36#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
37#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
38
39#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
40
41/* Userland-facing FIFO (one global + eventually one per VM) */
42struct gk20a_ctxsw_dev {
43 struct gk20a *g;
44
45 struct nvgpu_ctxsw_ring_header *hdr;
46 struct nvgpu_ctxsw_trace_entry *ents;
47 struct nvgpu_ctxsw_trace_filter filter;
48 bool write_enabled;
49 struct nvgpu_cond readout_wq;
50 size_t size;
51 u32 num_ents;
52
53 nvgpu_atomic_t vma_ref;
54
55 struct nvgpu_mutex write_lock;
56};
57
58
59struct gk20a_ctxsw_trace {
60 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
61};
62
63static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
64{
65 return (hdr->write_idx == hdr->read_idx);
66}
67
68static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
69{
70 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
71}
72
73static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
74{
75 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
76}
77
78ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
79 loff_t *off)
80{
81 struct gk20a_ctxsw_dev *dev = filp->private_data;
82 struct gk20a *g = dev->g;
83 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
84 struct nvgpu_ctxsw_trace_entry __user *entry =
85 (struct nvgpu_ctxsw_trace_entry *) buf;
86 size_t copied = 0;
87 int err;
88
89 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
90 "filp=%p buf=%p size=%zu", filp, buf, size);
91
92 nvgpu_mutex_acquire(&dev->write_lock);
93 while (ring_is_empty(hdr)) {
94 nvgpu_mutex_release(&dev->write_lock);
95 if (filp->f_flags & O_NONBLOCK)
96 return -EAGAIN;
97 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
98 !ring_is_empty(hdr), 0);
99 if (err)
100 return err;
101 nvgpu_mutex_acquire(&dev->write_lock);
102 }
103
104 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
105 if (ring_is_empty(hdr))
106 break;
107
108 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
109 sizeof(*entry))) {
110 nvgpu_mutex_release(&dev->write_lock);
111 return -EFAULT;
112 }
113
114 hdr->read_idx++;
115 if (hdr->read_idx >= hdr->num_ents)
116 hdr->read_idx = 0;
117
118 entry++;
119 copied += sizeof(*entry);
120 size -= sizeof(*entry);
121 }
122
123 nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
124 hdr->read_idx);
125
126 *off = hdr->read_idx;
127 nvgpu_mutex_release(&dev->write_lock);
128
129 return copied;
130}
131
132static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
133{
134 struct gk20a *g = dev->g;
135
136 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
137 nvgpu_mutex_acquire(&dev->write_lock);
138 dev->write_enabled = true;
139 nvgpu_mutex_release(&dev->write_lock);
140 dev->g->ops.fecs_trace.enable(dev->g);
141 return 0;
142}
143
144static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
145{
146 struct gk20a *g = dev->g;
147
148 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
149 dev->g->ops.fecs_trace.disable(dev->g);
150 nvgpu_mutex_acquire(&dev->write_lock);
151 dev->write_enabled = false;
152 nvgpu_mutex_release(&dev->write_lock);
153 return 0;
154}
155
156static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
157 size_t size)
158{
159 struct gk20a *g = dev->g;
160 void *buf;
161 int err;
162
163 if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
164 return -EBUSY;
165
166 err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
167 if (err)
168 return err;
169
170
171 dev->hdr = buf;
172 dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
173 dev->size = size;
174 dev->num_ents = dev->hdr->num_ents;
175
176 nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
177 dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
178 return 0;
179}
180
181int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
182 void **buf, size_t *size)
183{
184 struct nvgpu_ctxsw_ring_header *hdr;
185
186 *size = roundup(*size, PAGE_SIZE);
187 hdr = vmalloc_user(*size);
188 if (!hdr)
189 return -ENOMEM;
190
191 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
192 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
193 hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
194 / sizeof(struct nvgpu_ctxsw_trace_entry);
195 hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
196 hdr->drop_count = 0;
197 hdr->read_idx = 0;
198 hdr->write_idx = 0;
199 hdr->write_seqno = 0;
200
201 *buf = hdr;
202 return 0;
203}
204
205int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
206{
207 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
208
209 nvgpu_vfree(g, dev->hdr);
210 return 0;
211}
212
213static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
214 struct nvgpu_ctxsw_ring_setup_args *args)
215{
216 struct gk20a *g = dev->g;
217 size_t size = args->size;
218 int ret;
219
220 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
221
222 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
223 return -EINVAL;
224
225 nvgpu_mutex_acquire(&dev->write_lock);
226 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
227 nvgpu_mutex_release(&dev->write_lock);
228
229 return ret;
230}
231
232static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
233 struct nvgpu_ctxsw_trace_filter_args *args)
234{
235 struct gk20a *g = dev->g;
236
237 nvgpu_mutex_acquire(&dev->write_lock);
238 dev->filter = args->filter;
239 nvgpu_mutex_release(&dev->write_lock);
240
241 if (g->ops.fecs_trace.set_filter)
242 g->ops.fecs_trace.set_filter(g, &dev->filter);
243 return 0;
244}
245
246static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
247 struct nvgpu_ctxsw_trace_filter_args *args)
248{
249 nvgpu_mutex_acquire(&dev->write_lock);
250 args->filter = dev->filter;
251 nvgpu_mutex_release(&dev->write_lock);
252
253 return 0;
254}
255
256static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
257{
258 struct gk20a *g = dev->g;
259 int err;
260
261 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
262
263 err = gk20a_busy(g);
264 if (err)
265 return err;
266
267 if (g->ops.fecs_trace.flush)
268 err = g->ops.fecs_trace.flush(g);
269
270 if (likely(!err))
271 err = g->ops.fecs_trace.poll(g);
272
273 gk20a_idle(g);
274 return err;
275}
276
277int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
278{
279 struct nvgpu_os_linux *l;
280 struct gk20a *g;
281 struct gk20a_ctxsw_trace *trace;
282 struct gk20a_ctxsw_dev *dev;
283 int err;
284 size_t size;
285 u32 n;
286
287 /* only one VM for now */
288 const int vmid = 0;
289
290 l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
291 g = gk20a_get(&l->g);
292 if (!g)
293 return -ENODEV;
294
295 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
296
297 if (!capable(CAP_SYS_ADMIN)) {
298 err = -EPERM;
299 goto free_ref;
300 }
301
302 err = gk20a_busy(g);
303 if (err)
304 goto free_ref;
305
306 trace = g->ctxsw_trace;
307 if (!trace) {
308 err = -ENODEV;
309 goto idle;
310 }
311
312 /* Allow only one user for this device */
313 dev = &trace->devs[vmid];
314 nvgpu_mutex_acquire(&dev->write_lock);
315 if (dev->hdr) {
316 err = -EBUSY;
317 goto done;
318 }
319
320 /* By default, allocate ring buffer big enough to accommodate
321 * FECS records with default event filter */
322
323 /* enable all traces by default */
324 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
325
326 /* compute max number of entries generated with this filter */
327 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
328
329 size = sizeof(struct nvgpu_ctxsw_ring_header) +
330 n * sizeof(struct nvgpu_ctxsw_trace_entry);
331 nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
332 size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
333
334 err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
335 if (!err) {
336 filp->private_data = dev;
337 nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
338 filp, dev, size);
339 }
340
341done:
342 nvgpu_mutex_release(&dev->write_lock);
343
344idle:
345 gk20a_idle(g);
346free_ref:
347 if (err)
348 gk20a_put(g);
349 return err;
350}
351
352int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
353{
354 struct gk20a_ctxsw_dev *dev = filp->private_data;
355 struct gk20a *g = dev->g;
356
357 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
358
359 g->ops.fecs_trace.disable(g);
360
361 nvgpu_mutex_acquire(&dev->write_lock);
362 dev->write_enabled = false;
363 nvgpu_mutex_release(&dev->write_lock);
364
365 if (dev->hdr) {
366 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
367 dev->hdr = NULL;
368 }
369 gk20a_put(g);
370 return 0;
371}
372
373long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
374 unsigned long arg)
375{
376 struct gk20a_ctxsw_dev *dev = filp->private_data;
377 struct gk20a *g = dev->g;
378 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
379 int err = 0;
380
381 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
382
383 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
384 (_IOC_NR(cmd) == 0) ||
385 (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
386 (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
387 return -EINVAL;
388
389 memset(buf, 0, sizeof(buf));
390 if (_IOC_DIR(cmd) & _IOC_WRITE) {
391 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
392 return -EFAULT;
393 }
394
395 switch (cmd) {
396 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
397 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
398 break;
399 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
400 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
401 break;
402 case NVGPU_CTXSW_IOCTL_RING_SETUP:
403 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
404 (struct nvgpu_ctxsw_ring_setup_args *) buf);
405 break;
406 case NVGPU_CTXSW_IOCTL_SET_FILTER:
407 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
408 (struct nvgpu_ctxsw_trace_filter_args *) buf);
409 break;
410 case NVGPU_CTXSW_IOCTL_GET_FILTER:
411 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
412 (struct nvgpu_ctxsw_trace_filter_args *) buf);
413 break;
414 case NVGPU_CTXSW_IOCTL_POLL:
415 err = gk20a_ctxsw_dev_ioctl_poll(dev);
416 break;
417 default:
418 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
419 cmd);
420 err = -ENOTTY;
421 }
422
423 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
424 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
425
426 return err;
427}
428
429unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
430{
431 struct gk20a_ctxsw_dev *dev = filp->private_data;
432 struct gk20a *g = dev->g;
433 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
434 unsigned int mask = 0;
435
436 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
437
438 nvgpu_mutex_acquire(&dev->write_lock);
439 poll_wait(filp, &dev->readout_wq.wq, wait);
440 if (!ring_is_empty(hdr))
441 mask |= POLLIN | POLLRDNORM;
442 nvgpu_mutex_release(&dev->write_lock);
443
444 return mask;
445}
446
447static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
448{
449 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
450 struct gk20a *g = dev->g;
451
452 nvgpu_atomic_inc(&dev->vma_ref);
453 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
454 nvgpu_atomic_read(&dev->vma_ref));
455}
456
457static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
458{
459 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
460 struct gk20a *g = dev->g;
461
462 nvgpu_atomic_dec(&dev->vma_ref);
463 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
464 nvgpu_atomic_read(&dev->vma_ref));
465}
466
467static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
468 .open = gk20a_ctxsw_dev_vma_open,
469 .close = gk20a_ctxsw_dev_vma_close,
470};
471
472int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
473 struct vm_area_struct *vma)
474{
475 return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
476}
477
478int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
479{
480 struct gk20a_ctxsw_dev *dev = filp->private_data;
481 struct gk20a *g = dev->g;
482 int ret;
483
484 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
485 vma->vm_start, vma->vm_end);
486
487 ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
488 if (likely(!ret)) {
489 vma->vm_private_data = dev;
490 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
491 vma->vm_ops->open(vma);
492 }
493
494 return ret;
495}
496
497#ifdef CONFIG_GK20A_CTXSW_TRACE
498static int gk20a_ctxsw_init_devs(struct gk20a *g)
499{
500 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
501 struct gk20a_ctxsw_dev *dev = trace->devs;
502 int err;
503 int i;
504
505 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
506 dev->g = g;
507 dev->hdr = NULL;
508 dev->write_enabled = false;
509 nvgpu_cond_init(&dev->readout_wq);
510 err = nvgpu_mutex_init(&dev->write_lock);
511 if (err)
512 return err;
513 nvgpu_atomic_set(&dev->vma_ref, 0);
514 dev++;
515 }
516 return 0;
517}
518#endif
519
520int gk20a_ctxsw_trace_init(struct gk20a *g)
521{
522#ifdef CONFIG_GK20A_CTXSW_TRACE
523 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
524 int err;
525
526 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
527
528 /* if tracing is not supported, skip this */
529 if (!g->ops.fecs_trace.init)
530 return 0;
531
532 if (likely(trace))
533 return 0;
534
535 trace = nvgpu_kzalloc(g, sizeof(*trace));
536 if (unlikely(!trace))
537 return -ENOMEM;
538 g->ctxsw_trace = trace;
539
540 err = gk20a_ctxsw_init_devs(g);
541 if (err)
542 goto fail;
543
544 err = g->ops.fecs_trace.init(g);
545 if (unlikely(err))
546 goto fail;
547
548 return 0;
549
550fail:
551 memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
552 nvgpu_kfree(g, trace);
553 g->ctxsw_trace = NULL;
554 return err;
555#else
556 return 0;
557#endif
558}
559
560void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
561{
562#ifdef CONFIG_GK20A_CTXSW_TRACE
563 struct gk20a_ctxsw_trace *trace;
564 struct gk20a_ctxsw_dev *dev;
565 int i;
566
567 if (!g->ctxsw_trace)
568 return;
569
570 trace = g->ctxsw_trace;
571 dev = trace->devs;
572
573 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
574 nvgpu_mutex_destroy(&dev->write_lock);
575 dev++;
576 }
577
578 nvgpu_kfree(g, g->ctxsw_trace);
579 g->ctxsw_trace = NULL;
580
581 g->ops.fecs_trace.deinit(g);
582#endif
583}
584
585int gk20a_ctxsw_trace_write(struct gk20a *g,
586 struct nvgpu_ctxsw_trace_entry *entry)
587{
588 struct nvgpu_ctxsw_ring_header *hdr;
589 struct gk20a_ctxsw_dev *dev;
590 int ret = 0;
591 const char *reason;
592 u32 write_idx;
593
594 if (!g->ctxsw_trace)
595 return 0;
596
597 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
598 return -ENODEV;
599
600 dev = &g->ctxsw_trace->devs[entry->vmid];
601 hdr = dev->hdr;
602
603 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
604 "dev=%p hdr=%p", dev, hdr);
605
606 nvgpu_mutex_acquire(&dev->write_lock);
607
608 if (unlikely(!hdr)) {
609 /* device has been released */
610 ret = -ENODEV;
611 goto done;
612 }
613
614 write_idx = hdr->write_idx;
615 if (write_idx >= dev->num_ents) {
616 nvgpu_err(dev->g,
617 "write_idx=%u out of range [0..%u]",
618 write_idx, dev->num_ents);
619 ret = -ENOSPC;
620 reason = "write_idx out of range";
621 goto disable;
622 }
623
624 entry->seqno = hdr->write_seqno++;
625
626 if (!dev->write_enabled) {
627 ret = -EBUSY;
628 reason = "write disabled";
629 goto drop;
630 }
631
632 if (unlikely(ring_is_full(hdr))) {
633 ret = -ENOSPC;
634 reason = "user fifo full";
635 goto drop;
636 }
637
638 if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
639 reason = "filtered out";
640 goto filter;
641 }
642
643 nvgpu_log(g, gpu_dbg_ctxsw,
644 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
645 entry->seqno, entry->context_id, entry->pid,
646 entry->tag, entry->timestamp);
647
648 dev->ents[write_idx] = *entry;
649
650 /* ensure record is written before updating write index */
651 nvgpu_smp_wmb();
652
653 write_idx++;
654 if (unlikely(write_idx >= hdr->num_ents))
655 write_idx = 0;
656 hdr->write_idx = write_idx;
657 nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
658 hdr->read_idx, hdr->write_idx, ring_len(hdr));
659
660 nvgpu_mutex_release(&dev->write_lock);
661 return ret;
662
663disable:
664 g->ops.fecs_trace.disable(g);
665
666drop:
667 hdr->drop_count++;
668
669filter:
670 nvgpu_log(g, gpu_dbg_ctxsw,
671 "dropping seqno=%d context_id=%08x pid=%lld "
672 "tag=%x time=%llx (%s)",
673 entry->seqno, entry->context_id, entry->pid,
674 entry->tag, entry->timestamp, reason);
675
676done:
677 nvgpu_mutex_release(&dev->write_lock);
678 return ret;
679}
680
681void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
682{
683 struct gk20a_ctxsw_dev *dev;
684
685 if (!g->ctxsw_trace)
686 return;
687
688 dev = &g->ctxsw_trace->devs[vmid];
689 nvgpu_cond_signal_interruptible(&dev->readout_wq);
690}
691
692void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
693{
694#ifdef CONFIG_GK20A_CTXSW_TRACE
695 struct nvgpu_ctxsw_trace_entry entry = {
696 .vmid = 0,
697 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
698 .context_id = 0,
699 .pid = ch->tgid,
700 };
701
702 if (!g->ctxsw_trace)
703 return;
704
705 g->ops.ptimer.read_ptimer(g, &entry.timestamp);
706 gk20a_ctxsw_trace_write(g, &entry);
707 gk20a_ctxsw_trace_wake_up(g, 0);
708#endif
709 trace_gk20a_channel_reset(ch->chid, ch->tsgid);
710}
711
712void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
713{
714#ifdef CONFIG_GK20A_CTXSW_TRACE
715 struct nvgpu_ctxsw_trace_entry entry = {
716 .vmid = 0,
717 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
718 .context_id = 0,
719 .pid = tsg->tgid,
720 };
721
722 if (!g->ctxsw_trace)
723 return;
724
725 g->ops.ptimer.read_ptimer(g, &entry.timestamp);
726 gk20a_ctxsw_trace_write(g, &entry);
727 gk20a_ctxsw_trace_wake_up(g, 0);
728#endif
729 trace_gk20a_channel_reset(~0, tsg->tsgid);
730}
diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h
deleted file mode 100644
index 88ca7f25..00000000
--- a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __CTXSW_TRACE_H__
18#define __CTXSW_TRACE_H__
19
20#include <nvgpu/types.h>
21
22#define GK20A_CTXSW_TRACE_NUM_DEVS 1
23
24struct file;
25struct inode;
26struct poll_table_struct;
27
28struct gk20a;
29
30int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
31int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
32long gk20a_ctxsw_dev_ioctl(struct file *filp,
33 unsigned int cmd, unsigned long arg);
34ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
35 size_t size, loff_t *offs);
36unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
37 struct poll_table_struct *pts);
38
39#endif /* __CTXSW_TRACE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c
deleted file mode 100644
index 8738f3e7..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug.c
+++ /dev/null
@@ -1,452 +0,0 @@
1/*
2 * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "debug_ce.h"
17#include "debug_fifo.h"
18#include "debug_gr.h"
19#include "debug_allocator.h"
20#include "debug_kmem.h"
21#include "debug_pmu.h"
22#include "debug_sched.h"
23#include "debug_hal.h"
24#include "debug_xve.h"
25#include "os_linux.h"
26#include "platform_gk20a.h"
27
28#include "gk20a/gk20a.h"
29
30#include <linux/debugfs.h>
31#include <linux/seq_file.h>
32#include <linux/uaccess.h>
33
34#include <nvgpu/debug.h>
35
36unsigned int gk20a_debug_trace_cmdbuf;
37
38static inline void gk20a_debug_write_printk(void *ctx, const char *str,
39 size_t len)
40{
41 pr_info("%s", str);
42}
43
44static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
45 size_t len)
46{
47 seq_write((struct seq_file *)ctx, str, len);
48}
49
50void gk20a_debug_output(struct gk20a_debug_output *o,
51 const char *fmt, ...)
52{
53 va_list args;
54 int len;
55
56 va_start(args, fmt);
57 len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
58 va_end(args);
59 o->fn(o->ctx, o->buf, len);
60}
61
62static int gk20a_gr_dump_regs(struct gk20a *g,
63 struct gk20a_debug_output *o)
64{
65 if (g->ops.gr.dump_gr_regs)
66 gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
67
68 return 0;
69}
70
71int gk20a_gr_debug_dump(struct gk20a *g)
72{
73 struct gk20a_debug_output o = {
74 .fn = gk20a_debug_write_printk
75 };
76
77 gk20a_gr_dump_regs(g, &o);
78
79 return 0;
80}
81
82static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
83{
84 struct device *dev = s->private;
85 struct gk20a *g = gk20a_get_platform(dev)->g;
86 struct gk20a_debug_output o = {
87 .fn = gk20a_debug_write_to_seqfile,
88 .ctx = s,
89 };
90 int err;
91
92 err = gk20a_busy(g);
93 if (err) {
94 nvgpu_err(g, "failed to power on gpu: %d", err);
95 return -EINVAL;
96 }
97
98 gk20a_gr_dump_regs(g, &o);
99
100 gk20a_idle(g);
101
102 return 0;
103}
104
105void gk20a_debug_dump(struct gk20a *g)
106{
107 struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
108 struct gk20a_debug_output o = {
109 .fn = gk20a_debug_write_printk
110 };
111
112 if (platform->dump_platform_dependencies)
113 platform->dump_platform_dependencies(dev_from_gk20a(g));
114
115 /* HAL only initialized after 1st power-on */
116 if (g->ops.debug.show_dump)
117 g->ops.debug.show_dump(g, &o);
118}
119
120static int gk20a_debug_show(struct seq_file *s, void *unused)
121{
122 struct device *dev = s->private;
123 struct gk20a_debug_output o = {
124 .fn = gk20a_debug_write_to_seqfile,
125 .ctx = s,
126 };
127 struct gk20a *g;
128 int err;
129
130 g = gk20a_get_platform(dev)->g;
131
132 err = gk20a_busy(g);
133 if (err) {
134 nvgpu_err(g, "failed to power on gpu: %d", err);
135 return -EFAULT;
136 }
137
138 /* HAL only initialized after 1st power-on */
139 if (g->ops.debug.show_dump)
140 g->ops.debug.show_dump(g, &o);
141
142 gk20a_idle(g);
143 return 0;
144}
145
146static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
147{
148 return single_open(file, gk20a_gr_debug_show, inode->i_private);
149}
150
151static int gk20a_debug_open(struct inode *inode, struct file *file)
152{
153 return single_open(file, gk20a_debug_show, inode->i_private);
154}
155
156static const struct file_operations gk20a_gr_debug_fops = {
157 .open = gk20a_gr_debug_open,
158 .read = seq_read,
159 .llseek = seq_lseek,
160 .release = single_release,
161};
162
163static const struct file_operations gk20a_debug_fops = {
164 .open = gk20a_debug_open,
165 .read = seq_read,
166 .llseek = seq_lseek,
167 .release = single_release,
168};
169
170void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
171{
172 g->ops.fifo.dump_pbdma_status(g, o);
173 g->ops.fifo.dump_eng_status(g, o);
174
175 gk20a_debug_dump_all_channel_status_ramfc(g, o);
176}
177
178static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
179{
180 char buf[3];
181 struct gk20a *g = file->private_data;
182
183 if (g->mm.disable_bigpage)
184 buf[0] = 'Y';
185 else
186 buf[0] = 'N';
187 buf[1] = '\n';
188 buf[2] = 0x00;
189 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
190}
191
192static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
193{
194 char buf[32];
195 int buf_size;
196 bool bv;
197 struct gk20a *g = file->private_data;
198
199 buf_size = min(count, (sizeof(buf)-1));
200 if (copy_from_user(buf, user_buf, buf_size))
201 return -EFAULT;
202
203 if (strtobool(buf, &bv) == 0) {
204 g->mm.disable_bigpage = bv;
205 gk20a_init_gpu_characteristics(g);
206 }
207
208 return count;
209}
210
211static struct file_operations disable_bigpage_fops = {
212 .open = simple_open,
213 .read = disable_bigpage_read,
214 .write = disable_bigpage_write,
215};
216
217static int railgate_residency_show(struct seq_file *s, void *data)
218{
219 struct gk20a *g = s->private;
220 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
221 unsigned long time_since_last_state_transition_ms;
222 unsigned long total_rail_gate_time_ms;
223 unsigned long total_rail_ungate_time_ms;
224
225 if (platform->is_railgated(dev_from_gk20a(g))) {
226 time_since_last_state_transition_ms =
227 jiffies_to_msecs(jiffies -
228 g->pstats.last_rail_gate_complete);
229 total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
230 total_rail_gate_time_ms =
231 g->pstats.total_rail_gate_time_ms +
232 time_since_last_state_transition_ms;
233 } else {
234 time_since_last_state_transition_ms =
235 jiffies_to_msecs(jiffies -
236 g->pstats.last_rail_ungate_complete);
237 total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
238 total_rail_ungate_time_ms =
239 g->pstats.total_rail_ungate_time_ms +
240 time_since_last_state_transition_ms;
241 }
242
243 seq_printf(s, "Time with Rails Gated: %lu ms\n"
244 "Time with Rails UnGated: %lu ms\n"
245 "Total railgating cycles: %lu\n",
246 total_rail_gate_time_ms,
247 total_rail_ungate_time_ms,
248 g->pstats.railgating_cycle_count - 1);
249 return 0;
250
251}
252
253static int railgate_residency_open(struct inode *inode, struct file *file)
254{
255 return single_open(file, railgate_residency_show, inode->i_private);
256}
257
258static const struct file_operations railgate_residency_fops = {
259 .open = railgate_residency_open,
260 .read = seq_read,
261 .llseek = seq_lseek,
262 .release = single_release,
263};
264
265static int gk20a_railgating_debugfs_init(struct gk20a *g)
266{
267 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
268 struct dentry *d;
269
270 d = debugfs_create_file(
271 "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
272 &railgate_residency_fops);
273 if (!d)
274 return -ENOMEM;
275
276 return 0;
277}
278static ssize_t timeouts_enabled_read(struct file *file,
279 char __user *user_buf, size_t count, loff_t *ppos)
280{
281 char buf[3];
282 struct gk20a *g = file->private_data;
283
284 if (nvgpu_is_timeouts_enabled(g))
285 buf[0] = 'Y';
286 else
287 buf[0] = 'N';
288 buf[1] = '\n';
289 buf[2] = 0x00;
290 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
291}
292
293static ssize_t timeouts_enabled_write(struct file *file,
294 const char __user *user_buf, size_t count, loff_t *ppos)
295{
296 char buf[3];
297 int buf_size;
298 bool timeouts_enabled;
299 struct gk20a *g = file->private_data;
300
301 buf_size = min(count, (sizeof(buf)-1));
302 if (copy_from_user(buf, user_buf, buf_size))
303 return -EFAULT;
304
305 if (strtobool(buf, &timeouts_enabled) == 0) {
306 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
307 if (timeouts_enabled == false) {
308 /* requesting to disable timeouts */
309 if (g->timeouts_disabled_by_user == false) {
310 nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
311 g->timeouts_disabled_by_user = true;
312 }
313 } else {
314 /* requesting to enable timeouts */
315 if (g->timeouts_disabled_by_user == true) {
316 nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
317 g->timeouts_disabled_by_user = false;
318 }
319 }
320 nvgpu_mutex_release(&g->dbg_sessions_lock);
321 }
322
323 return count;
324}
325
326static const struct file_operations timeouts_enabled_fops = {
327 .open = simple_open,
328 .read = timeouts_enabled_read,
329 .write = timeouts_enabled_write,
330};
331
332void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
333{
334 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
335 struct device *dev = dev_from_gk20a(g);
336
337 l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
338 if (!l->debugfs)
339 return;
340
341 if (debugfs_symlink)
342 l->debugfs_alias =
343 debugfs_create_symlink(debugfs_symlink,
344 NULL, dev_name(dev));
345
346 debugfs_create_file("status", S_IRUGO, l->debugfs,
347 dev, &gk20a_debug_fops);
348 debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
349 dev, &gk20a_gr_debug_fops);
350 debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
351 l->debugfs, &gk20a_debug_trace_cmdbuf);
352
353 debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
354 l->debugfs, &g->ch_wdt_timeout_ms);
355
356 debugfs_create_u32("disable_syncpoints", S_IRUGO,
357 l->debugfs, &g->disable_syncpoints);
358
359 /* New debug logging API. */
360 debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
361 l->debugfs, &g->log_mask);
362 debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
363 l->debugfs, &g->log_trace);
364
365 l->debugfs_ltc_enabled =
366 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
367 l->debugfs,
368 &g->mm.ltc_enabled_target);
369
370 l->debugfs_gr_idle_timeout_default =
371 debugfs_create_u32("gr_idle_timeout_default_us",
372 S_IRUGO|S_IWUSR, l->debugfs,
373 &g->gr_idle_timeout_default);
374 l->debugfs_timeouts_enabled =
375 debugfs_create_file("timeouts_enabled",
376 S_IRUGO|S_IWUSR,
377 l->debugfs,
378 g,
379 &timeouts_enabled_fops);
380
381 l->debugfs_disable_bigpage =
382 debugfs_create_file("disable_bigpage",
383 S_IRUGO|S_IWUSR,
384 l->debugfs,
385 g,
386 &disable_bigpage_fops);
387
388 l->debugfs_timeslice_low_priority_us =
389 debugfs_create_u32("timeslice_low_priority_us",
390 S_IRUGO|S_IWUSR,
391 l->debugfs,
392 &g->timeslice_low_priority_us);
393 l->debugfs_timeslice_medium_priority_us =
394 debugfs_create_u32("timeslice_medium_priority_us",
395 S_IRUGO|S_IWUSR,
396 l->debugfs,
397 &g->timeslice_medium_priority_us);
398 l->debugfs_timeslice_high_priority_us =
399 debugfs_create_u32("timeslice_high_priority_us",
400 S_IRUGO|S_IWUSR,
401 l->debugfs,
402 &g->timeslice_high_priority_us);
403 l->debugfs_runlist_interleave =
404 debugfs_create_bool("runlist_interleave",
405 S_IRUGO|S_IWUSR,
406 l->debugfs,
407 &g->runlist_interleave);
408 l->debugfs_force_preemption_gfxp =
409 debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
410 l->debugfs,
411 &g->gr.ctx_vars.force_preemption_gfxp);
412
413 l->debugfs_force_preemption_cilp =
414 debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
415 l->debugfs,
416 &g->gr.ctx_vars.force_preemption_cilp);
417
418 l->debugfs_dump_ctxsw_stats =
419 debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
420 S_IRUGO|S_IWUSR, l->debugfs,
421 &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close);
422
423 gr_gk20a_debugfs_init(g);
424 gk20a_pmu_debugfs_init(g);
425 gk20a_railgating_debugfs_init(g);
426#ifdef CONFIG_NVGPU_SUPPORT_CDE
427 gk20a_cde_debugfs_init(g);
428#endif
429 gk20a_ce_debugfs_init(g);
430 nvgpu_alloc_debugfs_init(g);
431 nvgpu_hal_debugfs_init(g);
432 gk20a_fifo_debugfs_init(g);
433 gk20a_sched_debugfs_init(g);
434#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
435 nvgpu_kmem_debugfs_init(g);
436#endif
437 if (g->pci_vendor_id)
438 nvgpu_xve_debugfs_init(g);
439}
440
441void gk20a_debug_deinit(struct gk20a *g)
442{
443 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
444
445 if (!l->debugfs)
446 return;
447
448 gk20a_fifo_debugfs_deinit(g);
449
450 debugfs_remove_recursive(l->debugfs);
451 debugfs_remove(l->debugfs_alias);
452}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
deleted file mode 100644
index d63a9030..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_allocator.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_allocator.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/allocator.h>
22
23static int __alloc_show(struct seq_file *s, void *unused)
24{
25 struct nvgpu_allocator *a = s->private;
26
27 nvgpu_alloc_print_stats(a, s, 1);
28
29 return 0;
30}
31
32static int __alloc_open(struct inode *inode, struct file *file)
33{
34 return single_open(file, __alloc_show, inode->i_private);
35}
36
37static const struct file_operations __alloc_fops = {
38 .open = __alloc_open,
39 .read = seq_read,
40 .llseek = seq_lseek,
41 .release = single_release,
42};
43
44void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
45{
46 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
47
48 if (!l->debugfs_allocators)
49 return;
50
51 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
52 l->debugfs_allocators,
53 a, &__alloc_fops);
54}
55
56void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
57{
58}
59
60void nvgpu_alloc_debugfs_init(struct gk20a *g)
61{
62 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
63
64 l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
65 if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
66 l->debugfs_allocators = NULL;
67 return;
68 }
69}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
deleted file mode 100644
index 1b21cfc5..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_allocator.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
16#define __NVGPU_DEBUG_ALLOCATOR_H__
17
18struct gk20a;
19void nvgpu_alloc_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
deleted file mode 100644
index f0afa6ee..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.c
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "platform_gk20a.h"
17#include "os_linux.h"
18
19#include <linux/debugfs.h>
20
21
22static ssize_t gk20a_cde_reload_write(struct file *file,
23 const char __user *userbuf, size_t count, loff_t *ppos)
24{
25 struct nvgpu_os_linux *l = file->private_data;
26 gk20a_cde_reload(l);
27 return count;
28}
29
30static const struct file_operations gk20a_cde_reload_fops = {
31 .open = simple_open,
32 .write = gk20a_cde_reload_write,
33};
34
35void gk20a_cde_debugfs_init(struct gk20a *g)
36{
37 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
38 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
39
40 if (!platform->has_cde)
41 return;
42
43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
44 l->debugfs, &l->cde_app.shader_parameter);
45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
46 l->debugfs, &l->cde_app.ctx_count);
47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
48 l->debugfs, &l->cde_app.ctx_usecount);
49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
50 l->debugfs, &l->cde_app.ctx_count_top);
51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
52 l, &gk20a_cde_reload_fops);
53}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
deleted file mode 100644
index 4895edd6..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CDE_H__
16#define __NVGPU_DEBUG_CDE_H__
17
18struct gk20a;
19void gk20a_cde_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
deleted file mode 100644
index cea0bb47..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_ce.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_ce.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19
20void gk20a_ce_debugfs_init(struct gk20a *g)
21{
22 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
23
24 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
25 l->debugfs, &g->ce_app.ctx_count);
26 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
27 l->debugfs, &g->ce_app.app_state);
28 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
29 l->debugfs, &g->ce_app.next_ctx_id);
30}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
deleted file mode 100644
index 2a8750c4..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_ce.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CE_H__
16#define __NVGPU_DEBUG_CE_H__
17
18struct gk20a;
19void gk20a_ce_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_clk.c b/drivers/gpu/nvgpu/common/linux/debug_clk.c
deleted file mode 100644
index 2484d44b..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_clk.c
+++ /dev/null
@@ -1,271 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <linux/uaccess.h>
16#include <linux/debugfs.h>
17#include <linux/seq_file.h>
18
19#include "gm20b/clk_gm20b.h"
20#include "os_linux.h"
21#include "platform_gk20a.h"
22
23static int rate_get(void *data, u64 *val)
24{
25 struct gk20a *g = (struct gk20a *)data;
26 struct clk_gk20a *clk = &g->clk;
27
28 *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
29 return 0;
30}
31static int rate_set(void *data, u64 val)
32{
33 struct gk20a *g = (struct gk20a *)data;
34 return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
35}
36DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
37
38static int pll_reg_show(struct seq_file *s, void *data)
39{
40 struct gk20a *g = s->private;
41 struct nvgpu_clk_pll_debug_data d;
42 u32 reg, m, n, pl, f;
43 int err = 0;
44
45 if (g->ops.clk.get_pll_debug_data) {
46 err = g->ops.clk.get_pll_debug_data(g, &d);
47 if (err)
48 return err;
49 } else {
50 return -EINVAL;
51 }
52
53 seq_printf(s, "bypassctrl = %s, ",
54 d.trim_sys_bypassctrl_val ? "bypass" : "vco");
55 seq_printf(s, "sel_vco = %s, ",
56 d.trim_sys_sel_vco_val ? "vco" : "bypass");
57
58 seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
59 d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
60 d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
61 d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
62
63 reg = d.trim_sys_gpcpll_coeff_val;
64 m = d.trim_sys_gpcpll_coeff_mdiv;
65 n = d.trim_sys_gpcpll_coeff_ndiv;
66 pl = d.trim_sys_gpcpll_coeff_pldiv;
67 f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
68 seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
69 seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
70
71 seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
72 d.trim_sys_gpcpll_dvfs0_val,
73 d.trim_sys_gpcpll_dvfs0_dfs_coeff,
74 d.trim_sys_gpcpll_dvfs0_dfs_det_max,
75 d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
76
77 return 0;
78}
79
80static int pll_reg_open(struct inode *inode, struct file *file)
81{
82 return single_open(file, pll_reg_show, inode->i_private);
83}
84
85static const struct file_operations pll_reg_fops = {
86 .open = pll_reg_open,
87 .read = seq_read,
88 .llseek = seq_lseek,
89 .release = single_release,
90};
91
92static int pll_reg_raw_show(struct seq_file *s, void *data)
93{
94 struct gk20a *g = s->private;
95 struct nvgpu_clk_pll_debug_data d;
96 u32 reg;
97 int err = 0;
98
99 if (g->ops.clk.get_pll_debug_data) {
100 err = g->ops.clk.get_pll_debug_data(g, &d);
101 if (err)
102 return err;
103 } else {
104 return -EINVAL;
105 }
106
107 seq_puts(s, "GPCPLL REGISTERS:\n");
108 for (reg = d.trim_sys_gpcpll_cfg_reg;
109 reg <= d.trim_sys_gpcpll_dvfs2_reg;
110 reg += sizeof(u32))
111 seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
112
113 seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
114
115 seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
116 d.trim_sys_sel_vco_val);
117 seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
118 d.trim_sys_gpc2clk_out_val);
119 seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
120 d.trim_sys_bypassctrl_val);
121
122 return 0;
123}
124
125static int pll_reg_raw_open(struct inode *inode, struct file *file)
126{
127 return single_open(file, pll_reg_raw_show, inode->i_private);
128}
129
130static ssize_t pll_reg_raw_write(struct file *file,
131 const char __user *userbuf, size_t count, loff_t *ppos)
132{
133 struct gk20a *g = file->f_path.dentry->d_inode->i_private;
134 char buf[80];
135 u32 reg, val;
136 int err = 0;
137
138 if (sizeof(buf) <= count)
139 return -EINVAL;
140
141 if (copy_from_user(buf, userbuf, count))
142 return -EFAULT;
143
144 /* terminate buffer and trim - white spaces may be appended
145 * at the end when invoked from shell command line */
146 buf[count] = '\0';
147 strim(buf);
148
149 if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
150 return -EINVAL;
151
152 if (g->ops.clk.pll_reg_write(g, reg, val))
153 err = g->ops.clk.pll_reg_write(g, reg, val);
154 else
155 err = -EINVAL;
156
157 return err;
158}
159
160static const struct file_operations pll_reg_raw_fops = {
161 .open = pll_reg_raw_open,
162 .read = seq_read,
163 .write = pll_reg_raw_write,
164 .llseek = seq_lseek,
165 .release = single_release,
166};
167
168static int monitor_get(void *data, u64 *val)
169{
170 struct gk20a *g = (struct gk20a *)data;
171 int err = 0;
172
173 if (g->ops.clk.get_gpcclk_clock_counter)
174 err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
175 else
176 err = -EINVAL;
177
178 return err;
179}
180DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
181
182static int voltage_get(void *data, u64 *val)
183{
184 struct gk20a *g = (struct gk20a *)data;
185 int err = 0;
186
187 if (g->ops.clk.get_voltage)
188 err = g->ops.clk.get_voltage(&g->clk, val);
189 else
190 err = -EINVAL;
191
192 return err;
193}
194DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
195
196static int pll_param_show(struct seq_file *s, void *data)
197{
198 struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
199
200 seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
201 gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
202 gpc_pll_params->vco_ctrl);
203 return 0;
204}
205
206static int pll_param_open(struct inode *inode, struct file *file)
207{
208 return single_open(file, pll_param_show, inode->i_private);
209}
210
211static const struct file_operations pll_param_fops = {
212 .open = pll_param_open,
213 .read = seq_read,
214 .llseek = seq_lseek,
215 .release = single_release,
216};
217
218int gm20b_clk_init_debugfs(struct gk20a *g)
219{
220 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
221 struct dentry *d;
222
223 if (!l->debugfs)
224 return -EINVAL;
225
226 d = debugfs_create_file(
227 "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
228 if (!d)
229 goto err_out;
230
231 d = debugfs_create_file(
232 "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
233 if (!d)
234 goto err_out;
235
236 d = debugfs_create_file("pll_reg_raw",
237 S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
238 if (!d)
239 goto err_out;
240
241 d = debugfs_create_file(
242 "monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
243 if (!d)
244 goto err_out;
245
246 d = debugfs_create_file(
247 "voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
248 if (!d)
249 goto err_out;
250
251 d = debugfs_create_file(
252 "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
253 if (!d)
254 goto err_out;
255
256 d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
257 (u32 *)&g->clk.gpc_pll.mode);
258 if (!d)
259 goto err_out;
260
261 d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
262 l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
263 if (!d)
264 goto err_out;
265
266 return 0;
267
268err_out:
269 pr_err("%s: Failed to make debugfs node\n", __func__);
270 return -ENOMEM;
271}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
deleted file mode 100644
index 2b5674c0..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ /dev/null
@@ -1,378 +0,0 @@
1/*
2 * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22#include <nvgpu/timers.h>
23
24void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
25
26static void *gk20a_fifo_sched_debugfs_seq_start(
27 struct seq_file *s, loff_t *pos)
28{
29 struct gk20a *g = s->private;
30 struct fifo_gk20a *f = &g->fifo;
31
32 if (*pos >= f->num_channels)
33 return NULL;
34
35 return &f->channel[*pos];
36}
37
38static void *gk20a_fifo_sched_debugfs_seq_next(
39 struct seq_file *s, void *v, loff_t *pos)
40{
41 struct gk20a *g = s->private;
42 struct fifo_gk20a *f = &g->fifo;
43
44 ++(*pos);
45 if (*pos >= f->num_channels)
46 return NULL;
47
48 return &f->channel[*pos];
49}
50
51static void gk20a_fifo_sched_debugfs_seq_stop(
52 struct seq_file *s, void *v)
53{
54}
55
56static int gk20a_fifo_sched_debugfs_seq_show(
57 struct seq_file *s, void *v)
58{
59 struct gk20a *g = s->private;
60 struct fifo_gk20a *f = &g->fifo;
61 struct channel_gk20a *ch = v;
62 struct tsg_gk20a *tsg = NULL;
63
64 struct fifo_engine_info_gk20a *engine_info;
65 struct fifo_runlist_info_gk20a *runlist;
66 u32 runlist_id;
67 int ret = SEQ_SKIP;
68 u32 engine_id;
69
70 engine_id = gk20a_fifo_get_gr_engine_id(g);
71 engine_info = (f->engine_info + engine_id);
72 runlist_id = engine_info->runlist_id;
73 runlist = &f->runlist_info[runlist_id];
74
75 if (ch == f->channel) {
76 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
77 seq_puts(s, " (usecs) (msecs)\n");
78 ret = 0;
79 }
80
81 if (!test_bit(ch->chid, runlist->active_channels))
82 return ret;
83
84 if (gk20a_channel_get(ch)) {
85 tsg = tsg_gk20a_from_ch(ch);
86
87 if (tsg)
88 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
89 ch->chid,
90 ch->tsgid,
91 ch->tgid,
92 tsg->timeslice_us,
93 ch->timeout_ms_max,
94 tsg->interleave_level,
95 tsg->gr_ctx.graphics_preempt_mode,
96 tsg->gr_ctx.compute_preempt_mode);
97 gk20a_channel_put(ch);
98 }
99 return 0;
100}
101
102static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
103 .start = gk20a_fifo_sched_debugfs_seq_start,
104 .next = gk20a_fifo_sched_debugfs_seq_next,
105 .stop = gk20a_fifo_sched_debugfs_seq_stop,
106 .show = gk20a_fifo_sched_debugfs_seq_show
107};
108
109static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
110 struct file *file)
111{
112 struct gk20a *g = inode->i_private;
113 int err;
114
115 if (!capable(CAP_SYS_ADMIN))
116 return -EPERM;
117
118 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
119 if (err)
120 return err;
121
122 nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
123
124 ((struct seq_file *)file->private_data)->private = inode->i_private;
125 return 0;
126};
127
128/*
129 * The file operations structure contains our open function along with
130 * set of the canned seq_ ops.
131 */
132static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
133 .owner = THIS_MODULE,
134 .open = gk20a_fifo_sched_debugfs_open,
135 .read = seq_read,
136 .llseek = seq_lseek,
137 .release = seq_release
138};
139
140static int gk20a_fifo_profile_enable(void *data, u64 val)
141{
142 struct gk20a *g = (struct gk20a *) data;
143 struct fifo_gk20a *f = &g->fifo;
144
145
146 nvgpu_mutex_acquire(&f->profile.lock);
147 if (val == 0) {
148 if (f->profile.enabled) {
149 f->profile.enabled = false;
150 nvgpu_ref_put(&f->profile.ref,
151 __gk20a_fifo_profile_free);
152 }
153 } else {
154 if (!f->profile.enabled) {
155 /* not kref init as it can have a running condition if
156 * we enable/disable/enable while kickoff is happening
157 */
158 if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
159 f->profile.data = nvgpu_vzalloc(g,
160 FIFO_PROFILING_ENTRIES *
161 sizeof(struct fifo_profile_gk20a));
162 f->profile.sorted = nvgpu_vzalloc(g,
163 FIFO_PROFILING_ENTRIES *
164 sizeof(u64));
165 if (!(f->profile.data && f->profile.sorted)) {
166 nvgpu_vfree(g, f->profile.data);
167 nvgpu_vfree(g, f->profile.sorted);
168 nvgpu_mutex_release(&f->profile.lock);
169 return -ENOMEM;
170 }
171 nvgpu_ref_init(&f->profile.ref);
172 }
173 atomic_set(&f->profile.get.atomic_var, 0);
174 f->profile.enabled = true;
175 }
176 }
177 nvgpu_mutex_release(&f->profile.lock);
178
179 return 0;
180}
181
182DEFINE_SIMPLE_ATTRIBUTE(
183 gk20a_fifo_profile_enable_debugfs_fops,
184 NULL,
185 gk20a_fifo_profile_enable,
186 "%llu\n"
187);
188
189static int __profile_cmp(const void *a, const void *b)
190{
191 return *((unsigned long long *) a) - *((unsigned long long *) b);
192}
193
194/*
195 * This uses about 800b in the stack, but the function using it is not part
196 * of a callstack where much memory is being used, so it is fine
197 */
198#define PERCENTILE_WIDTH 5
199#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
200
201static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
202 u64 *percentiles, u32 index_end, u32 index_start)
203{
204 unsigned int nelem = 0;
205 unsigned int index;
206 struct fifo_profile_gk20a *profile;
207
208 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
209 profile = &g->fifo.profile.data[index];
210
211 if (profile->timestamp[index_end] >
212 profile->timestamp[index_start]) {
213 /* This is a valid element */
214 g->fifo.profile.sorted[nelem] =
215 profile->timestamp[index_end] -
216 profile->timestamp[index_start];
217 nelem++;
218 }
219 }
220
221 /* sort it */
222 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
223 __profile_cmp, NULL);
224
225 /* build ranges */
226 for (index = 0; index < PERCENTILE_RANGES; index++) {
227 percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
228 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
229 nelem)/100 - 1];
230 }
231 return nelem;
232}
233
234static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
235{
236 struct gk20a *g = s->private;
237 unsigned int get, nelem, index;
238 /*
239 * 800B in the stack, but function is declared statically and only
240 * called from debugfs handler
241 */
242 u64 percentiles_ioctl[PERCENTILE_RANGES];
243 u64 percentiles_kickoff[PERCENTILE_RANGES];
244 u64 percentiles_jobtracking[PERCENTILE_RANGES];
245 u64 percentiles_append[PERCENTILE_RANGES];
246 u64 percentiles_userd[PERCENTILE_RANGES];
247
248 if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
249 seq_printf(s, "Profiling disabled\n");
250 return 0;
251 }
252
253 get = atomic_read(&g->fifo.profile.get.atomic_var);
254
255 __gk20a_fifo_create_stats(g, percentiles_ioctl,
256 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_kickoff,
258 PROFILE_END, PROFILE_ENTRY);
259 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
260 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
261 __gk20a_fifo_create_stats(g, percentiles_append,
262 PROFILE_APPEND, PROFILE_JOB_TRACKING);
263 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
264 PROFILE_END, PROFILE_APPEND);
265
266 seq_printf(s, "Number of kickoffs: %d\n", nelem);
267 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
268
269 for (index = 0; index < PERCENTILE_RANGES; index++)
270 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
271 PERCENTILE_WIDTH * (index+1),
272 percentiles_ioctl[index],
273 percentiles_kickoff[index],
274 percentiles_append[index],
275 percentiles_jobtracking[index],
276 percentiles_userd[index]);
277
278 nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
279
280 return 0;
281}
282
283static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
284{
285 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
286}
287
288static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
289 .open = gk20a_fifo_profile_stats_open,
290 .read = seq_read,
291 .llseek = seq_lseek,
292 .release = single_release,
293};
294
295
296void gk20a_fifo_debugfs_init(struct gk20a *g)
297{
298 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
299 struct dentry *gpu_root = l->debugfs;
300 struct dentry *fifo_root;
301 struct dentry *profile_root;
302
303 fifo_root = debugfs_create_dir("fifo", gpu_root);
304 if (IS_ERR_OR_NULL(fifo_root))
305 return;
306
307 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
308
309 debugfs_create_file("sched", 0600, fifo_root, g,
310 &gk20a_fifo_sched_debugfs_fops);
311
312 profile_root = debugfs_create_dir("profile", fifo_root);
313 if (IS_ERR_OR_NULL(profile_root))
314 return;
315
316 nvgpu_mutex_init(&g->fifo.profile.lock);
317 g->fifo.profile.enabled = false;
318 atomic_set(&g->fifo.profile.get.atomic_var, 0);
319 atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
320
321 debugfs_create_file("enable", 0600, profile_root, g,
322 &gk20a_fifo_profile_enable_debugfs_fops);
323
324 debugfs_create_file("stats", 0600, profile_root, g,
325 &gk20a_fifo_profile_stats_debugfs_fops);
326
327}
328
329void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
330{
331 if (profile)
332 profile->timestamp[idx] = nvgpu_current_time_ns();
333}
334
335void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
336{
337 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
338 profile.ref);
339 nvgpu_vfree(f->g, f->profile.data);
340 nvgpu_vfree(f->g, f->profile.sorted);
341}
342
343/* Get the next element in the ring buffer of profile entries
344 * and grab a reference to the structure
345 */
346struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
347{
348 struct fifo_gk20a *f = &g->fifo;
349 struct fifo_profile_gk20a *profile;
350 unsigned int index;
351
352 /* If kref is zero, profiling is not enabled */
353 if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
354 return NULL;
355 index = atomic_inc_return(&f->profile.get.atomic_var);
356 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
357
358 return profile;
359}
360
361/* Free the reference to the structure. This allows deferred cleanups */
362void gk20a_fifo_profile_release(struct gk20a *g,
363 struct fifo_profile_gk20a *profile)
364{
365 nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
366}
367
368void gk20a_fifo_debugfs_deinit(struct gk20a *g)
369{
370 struct fifo_gk20a *f = &g->fifo;
371
372 nvgpu_mutex_acquire(&f->profile.lock);
373 if (f->profile.enabled) {
374 f->profile.enabled = false;
375 nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
376 }
377 nvgpu_mutex_release(&f->profile.lock);
378}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
deleted file mode 100644
index 46ac853e..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_FIFO_H__
16#define __NVGPU_DEBUG_FIFO_H__
17
18struct gk20a;
19void gk20a_fifo_debugfs_init(struct gk20a *g);
20void gk20a_fifo_debugfs_deinit(struct gk20a *g);
21
22#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
deleted file mode 100644
index d54c6d63..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_gr.c
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_gr.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19
20int gr_gk20a_debugfs_init(struct gk20a *g)
21{
22 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
23
24 l->debugfs_gr_default_attrib_cb_size =
25 debugfs_create_u32("gr_default_attrib_cb_size",
26 S_IRUGO|S_IWUSR, l->debugfs,
27 &g->gr.attrib_cb_default_size);
28
29 return 0;
30}
31
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h
deleted file mode 100644
index 4b46acbb..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_gr.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_GR_H__
16#define __NVGPU_DEBUG_GR_H__
17
18struct gk20a;
19int gr_gk20a_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_hal.c b/drivers/gpu/nvgpu/common/linux/debug_hal.c
deleted file mode 100644
index 031e335e..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_hal.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_hal.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21/* Format and print a single function pointer to the specified seq_file. */
22static void __hal_print_op(struct seq_file *s, void *op_ptr)
23{
24 seq_printf(s, "%pF\n", op_ptr);
25}
26
27/*
28 * Prints an array of function pointer addresses in op_ptrs to the
29 * specified seq_file
30 */
31static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
32{
33 int i;
34
35 for (i = 0; i < num_ops; i++)
36 __hal_print_op(s, op_ptrs[i]);
37}
38
39/*
40 * Show file operation, which generates content of the file once. Prints a list
41 * of gpu operations as defined by gops and the corresponding function pointer
42 * destination addresses. Relies on no compiler reordering of struct fields and
43 * assumption that all members are function pointers.
44 */
45static int __hal_show(struct seq_file *s, void *unused)
46{
47 struct gpu_ops *gops = s->private;
48
49 __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
50
51 return 0;
52}
53
54static int __hal_open(struct inode *inode, struct file *file)
55{
56 return single_open(file, __hal_show, inode->i_private);
57}
58
59static const struct file_operations __hal_fops = {
60 .open = __hal_open,
61 .read = seq_read,
62 .llseek = seq_lseek,
63 .release = single_release,
64};
65
66void nvgpu_hal_debugfs_fini(struct gk20a *g)
67{
68 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
69
70 if (!(l->debugfs_hal == NULL))
71 debugfs_remove_recursive(l->debugfs_hal);
72}
73
74void nvgpu_hal_debugfs_init(struct gk20a *g)
75{
76 struct dentry *d;
77 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
78
79 if (!l->debugfs)
80 return;
81 l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
82 if (IS_ERR_OR_NULL(l->debugfs_hal)) {
83 l->debugfs_hal = NULL;
84 return;
85 }
86
87 /* Pass along reference to the gpu_ops struct as private data */
88 d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
89 &g->ops, &__hal_fops);
90 if (!d) {
91 nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
92 debugfs_remove_recursive(l->debugfs_hal);
93 return;
94 }
95}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_hal.h b/drivers/gpu/nvgpu/common/linux/debug_hal.h
deleted file mode 100644
index eee6f234..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_hal.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_HAL_H__
16#define __NVGPU_DEBUG_HAL_H__
17
18struct gk20a;
19void nvgpu_hal_debugfs_fini(struct gk20a *g);
20void nvgpu_hal_debugfs_init(struct gk20a *g);
21
22#endif /* __NVGPU_DEBUG_HAL_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
deleted file mode 100644
index a0c7d47d..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_kmem.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/debugfs.h>
15#include <linux/seq_file.h>
16
17#include "os_linux.h"
18#include "debug_kmem.h"
19#include "kmem_priv.h"
20
21/**
22 * to_human_readable_bytes - Determine suffix for passed size.
23 *
24 * @bytes - Number of bytes to generate a suffix for.
25 * @hr_bytes [out] - The human readable number of bytes.
26 * @hr_suffix [out] - The suffix for the HR number of bytes.
27 *
28 * Computes a human readable decomposition of the passed number of bytes. The
29 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
30 * number of bytes is then passed back in @hr_bytes. This returns the following
31 * ranges:
32 *
33 * 0 - 1023 B
34 * 1 - 1023 KB
35 * 1 - 1023 MB
36 * 1 - 1023 GB
37 * 1 - 1023 TB
38 * 1 - ... PB
39 */
40static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
41 const char **hr_suffix)
42{
43 static const char *suffixes[] =
44 { "B", "KB", "MB", "GB", "TB", "PB" };
45
46 u64 suffix_ind = 0;
47
48 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
49 bytes >>= 10;
50 suffix_ind++;
51 }
52
53 /*
54 * Handle case where bytes > 1023PB.
55 */
56 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
57 suffix_ind : ARRAY_SIZE(suffixes) - 1;
58
59 *hr_bytes = bytes;
60 *hr_suffix = suffixes[suffix_ind];
61}
62
63/**
64 * print_hr_bytes - Print human readable bytes
65 *
66 * @s - A seq_file to print to. May be NULL.
67 * @msg - A message to print before the bytes.
68 * @bytes - Number of bytes.
69 *
70 * Print @msg followed by the human readable decomposition of the passed number
71 * of bytes.
72 *
73 * If @s is NULL then this prints will be made to the kernel log.
74 */
75static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
76{
77 u64 hr_bytes;
78 const char *hr_suffix;
79
80 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
81 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
82}
83
84/**
85 * print_histogram - Build a histogram of the memory usage.
86 *
87 * @tracker The tracking to pull data from.
88 * @s A seq_file to dump info into.
89 */
90static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
91 struct seq_file *s)
92{
93 int i;
94 u64 pot_min, pot_max;
95 u64 nr_buckets;
96 unsigned int *buckets;
97 unsigned int total_allocs;
98 struct nvgpu_rbtree_node *node;
99 static const char histogram_line[] =
100 "++++++++++++++++++++++++++++++++++++++++";
101
102 /*
103 * pot_min is essentially a round down to the nearest power of 2. This
104 * is the start of the histogram. pot_max is just a round up to the
105 * nearest power of two. Each histogram bucket is one power of two so
106 * the histogram buckets are exponential.
107 */
108 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
109 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
110
111 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
112
113 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
114 if (!buckets) {
115 __pstat(s, "OOM: could not allocate bucket storage!?\n");
116 return;
117 }
118
119 /*
120 * Iterate across all of the allocs and determine what bucket they
121 * should go in. Round the size down to the nearest power of two to
122 * find the right bucket.
123 */
124 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
125 while (node) {
126 int b;
127 u64 bucket_min;
128 struct nvgpu_mem_alloc *alloc =
129 nvgpu_mem_alloc_from_rbtree_node(node);
130
131 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
132 if (bucket_min < tracker->min_alloc)
133 bucket_min = tracker->min_alloc;
134
135 b = __ffs(bucket_min) - __ffs(pot_min);
136
137 /*
138 * Handle the one case were there's an alloc exactly as big as
139 * the maximum bucket size of the largest bucket. Most of the
140 * buckets have an inclusive minimum and exclusive maximum. But
141 * the largest bucket needs to have an _inclusive_ maximum as
142 * well.
143 */
144 if (b == (int)nr_buckets)
145 b--;
146
147 buckets[b]++;
148
149 nvgpu_rbtree_enum_next(&node, node);
150 }
151
152 total_allocs = 0;
153 for (i = 0; i < (int)nr_buckets; i++)
154 total_allocs += buckets[i];
155
156 __pstat(s, "Alloc histogram:\n");
157
158 /*
159 * Actually compute the histogram lines.
160 */
161 for (i = 0; i < (int)nr_buckets; i++) {
162 char this_line[sizeof(histogram_line) + 1];
163 u64 line_length;
164 u64 hr_bytes;
165 const char *hr_suffix;
166
167 memset(this_line, 0, sizeof(this_line));
168
169 /*
170 * Compute the normalized line length. Cant use floating point
171 * so we will just multiply everything by 1000 and use fixed
172 * point.
173 */
174 line_length = (1000 * buckets[i]) / total_allocs;
175 line_length *= sizeof(histogram_line);
176 line_length /= 1000;
177
178 memset(this_line, '+', line_length);
179
180 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
181 &hr_bytes, &hr_suffix);
182 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
183 hr_bytes, hr_bytes << 1,
184 hr_suffix, buckets[i], this_line);
185 }
186}
187
188/**
189 * nvgpu_kmem_print_stats - Print kmem tracking stats.
190 *
191 * @tracker The tracking to pull data from.
192 * @s A seq_file to dump info into.
193 *
194 * Print stats from a tracker. If @s is non-null then seq_printf() will be
195 * used with @s. Otherwise the stats are pr_info()ed.
196 */
197void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
198 struct seq_file *s)
199{
200 nvgpu_lock_tracker(tracker);
201
202 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
203
204 __pstat(s, "Basic Stats:\n");
205 __pstat(s, " Number of allocs %lld\n",
206 tracker->nr_allocs);
207 __pstat(s, " Number of frees %lld\n",
208 tracker->nr_frees);
209 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
210 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
211 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
212 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
213 print_hr_bytes(s, " Bytes allocated (real) ",
214 tracker->bytes_alloced_real);
215 print_hr_bytes(s, " Bytes freed (real) ",
216 tracker->bytes_freed_real);
217 __pstat(s, "\n");
218
219 print_histogram(tracker, s);
220
221 nvgpu_unlock_tracker(tracker);
222}
223
224static int __kmem_tracking_show(struct seq_file *s, void *unused)
225{
226 struct nvgpu_mem_alloc_tracker *tracker = s->private;
227
228 nvgpu_kmem_print_stats(tracker, s);
229
230 return 0;
231}
232
233static int __kmem_tracking_open(struct inode *inode, struct file *file)
234{
235 return single_open(file, __kmem_tracking_show, inode->i_private);
236}
237
238static const struct file_operations __kmem_tracking_fops = {
239 .open = __kmem_tracking_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = single_release,
243};
244
245static int __kmem_traces_dump_tracker(struct gk20a *g,
246 struct nvgpu_mem_alloc_tracker *tracker,
247 struct seq_file *s)
248{
249 struct nvgpu_rbtree_node *node;
250
251 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
252 while (node) {
253 struct nvgpu_mem_alloc *alloc =
254 nvgpu_mem_alloc_from_rbtree_node(node);
255
256 kmem_print_mem_alloc(g, alloc, s);
257
258 nvgpu_rbtree_enum_next(&node, node);
259 }
260
261 return 0;
262}
263
264static int __kmem_traces_show(struct seq_file *s, void *unused)
265{
266 struct gk20a *g = s->private;
267
268 nvgpu_lock_tracker(g->vmallocs);
269 seq_puts(s, "Oustanding vmallocs:\n");
270 __kmem_traces_dump_tracker(g, g->vmallocs, s);
271 seq_puts(s, "\n");
272 nvgpu_unlock_tracker(g->vmallocs);
273
274 nvgpu_lock_tracker(g->kmallocs);
275 seq_puts(s, "Oustanding kmallocs:\n");
276 __kmem_traces_dump_tracker(g, g->kmallocs, s);
277 nvgpu_unlock_tracker(g->kmallocs);
278
279 return 0;
280}
281
282static int __kmem_traces_open(struct inode *inode, struct file *file)
283{
284 return single_open(file, __kmem_traces_show, inode->i_private);
285}
286
287static const struct file_operations __kmem_traces_fops = {
288 .open = __kmem_traces_open,
289 .read = seq_read,
290 .llseek = seq_lseek,
291 .release = single_release,
292};
293
294void nvgpu_kmem_debugfs_init(struct gk20a *g)
295{
296 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
297 struct dentry *node;
298
299 l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
300 if (IS_ERR_OR_NULL(l->debugfs_kmem))
301 return;
302
303 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
304 l->debugfs_kmem,
305 g->vmallocs, &__kmem_tracking_fops);
306 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
307 l->debugfs_kmem,
308 g->kmallocs, &__kmem_tracking_fops);
309 node = debugfs_create_file("traces", S_IRUGO,
310 l->debugfs_kmem,
311 g, &__kmem_traces_fops);
312}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
deleted file mode 100644
index 44322b53..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_kmem.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_KMEM_H__
16#define __NVGPU_DEBUG_KMEM_H__
17
18struct gk20a;
19#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
20void nvgpu_kmem_debugfs_init(struct gk20a *g);
21#endif
22
23#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
deleted file mode 100644
index f4ed992d..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c
+++ /dev/null
@@ -1,481 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <nvgpu/enabled.h>
16#include "debug_pmu.h"
17#include "os_linux.h"
18
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21#include <linux/uaccess.h>
22
23static int lpwr_debug_show(struct seq_file *s, void *data)
24{
25 struct gk20a *g = s->private;
26
27 if (g->ops.pmu.pmu_pg_engines_feature_list &&
28 g->ops.pmu.pmu_pg_engines_feature_list(g,
29 PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
30 NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
31 seq_printf(s, "PSTATE: %u\n"
32 "RPPG Enabled: %u\n"
33 "RPPG ref count: %u\n"
34 "RPPG state: %u\n"
35 "MSCG Enabled: %u\n"
36 "MSCG pstate state: %u\n"
37 "MSCG transition state: %u\n",
38 g->ops.clk_arb.get_current_pstate(g),
39 g->elpg_enabled, g->pmu.elpg_refcnt,
40 g->pmu.elpg_stat, g->mscg_enabled,
41 g->pmu.mscg_stat, g->pmu.mscg_transition_state);
42
43 } else
44 seq_printf(s, "ELPG Enabled: %u\n"
45 "ELPG ref count: %u\n"
46 "ELPG state: %u\n",
47 g->elpg_enabled, g->pmu.elpg_refcnt,
48 g->pmu.elpg_stat);
49
50 return 0;
51
52}
53
54static int lpwr_debug_open(struct inode *inode, struct file *file)
55{
56 return single_open(file, lpwr_debug_show, inode->i_private);
57}
58
59static const struct file_operations lpwr_debug_fops = {
60 .open = lpwr_debug_open,
61 .read = seq_read,
62 .llseek = seq_lseek,
63 .release = single_release,
64};
65
66static int mscg_stat_show(struct seq_file *s, void *data)
67{
68 struct gk20a *g = s->private;
69 u64 total_ingating, total_ungating, residency, divisor, dividend;
70 struct pmu_pg_stats_data pg_stat_data = { 0 };
71 int err;
72
73 /* Don't unnecessarily power on the device */
74 if (g->power_on) {
75 err = gk20a_busy(g);
76 if (err)
77 return err;
78
79 nvgpu_pmu_get_pg_stats(g,
80 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
81 gk20a_idle(g);
82 }
83 total_ingating = g->pg_ingating_time_us +
84 (u64)pg_stat_data.ingating_time;
85 total_ungating = g->pg_ungating_time_us +
86 (u64)pg_stat_data.ungating_time;
87
88 divisor = total_ingating + total_ungating;
89
90 /* We compute the residency on a scale of 1000 */
91 dividend = total_ingating * 1000;
92
93 if (divisor)
94 residency = div64_u64(dividend, divisor);
95 else
96 residency = 0;
97
98 seq_printf(s,
99 "Time in MSCG: %llu us\n"
100 "Time out of MSCG: %llu us\n"
101 "MSCG residency ratio: %llu\n"
102 "MSCG Entry Count: %u\n"
103 "MSCG Avg Entry latency %u\n"
104 "MSCG Avg Exit latency %u\n",
105 total_ingating, total_ungating,
106 residency, pg_stat_data.gating_cnt,
107 pg_stat_data.avg_entry_latency_us,
108 pg_stat_data.avg_exit_latency_us);
109 return 0;
110
111}
112
113static int mscg_stat_open(struct inode *inode, struct file *file)
114{
115 return single_open(file, mscg_stat_show, inode->i_private);
116}
117
118static const struct file_operations mscg_stat_fops = {
119 .open = mscg_stat_open,
120 .read = seq_read,
121 .llseek = seq_lseek,
122 .release = single_release,
123};
124
125static int mscg_transitions_show(struct seq_file *s, void *data)
126{
127 struct gk20a *g = s->private;
128 struct pmu_pg_stats_data pg_stat_data = { 0 };
129 u32 total_gating_cnt;
130 int err;
131
132 if (g->power_on) {
133 err = gk20a_busy(g);
134 if (err)
135 return err;
136
137 nvgpu_pmu_get_pg_stats(g,
138 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
139 gk20a_idle(g);
140 }
141 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
142
143 seq_printf(s, "%u\n", total_gating_cnt);
144 return 0;
145
146}
147
148static int mscg_transitions_open(struct inode *inode, struct file *file)
149{
150 return single_open(file, mscg_transitions_show, inode->i_private);
151}
152
153static const struct file_operations mscg_transitions_fops = {
154 .open = mscg_transitions_open,
155 .read = seq_read,
156 .llseek = seq_lseek,
157 .release = single_release,
158};
159
160static int elpg_stat_show(struct seq_file *s, void *data)
161{
162 struct gk20a *g = s->private;
163 struct pmu_pg_stats_data pg_stat_data = { 0 };
164 u64 total_ingating, total_ungating, residency, divisor, dividend;
165 int err;
166
167 /* Don't unnecessarily power on the device */
168 if (g->power_on) {
169 err = gk20a_busy(g);
170 if (err)
171 return err;
172
173 nvgpu_pmu_get_pg_stats(g,
174 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
175 gk20a_idle(g);
176 }
177 total_ingating = g->pg_ingating_time_us +
178 (u64)pg_stat_data.ingating_time;
179 total_ungating = g->pg_ungating_time_us +
180 (u64)pg_stat_data.ungating_time;
181 divisor = total_ingating + total_ungating;
182
183 /* We compute the residency on a scale of 1000 */
184 dividend = total_ingating * 1000;
185
186 if (divisor)
187 residency = div64_u64(dividend, divisor);
188 else
189 residency = 0;
190
191 seq_printf(s,
192 "Time in ELPG: %llu us\n"
193 "Time out of ELPG: %llu us\n"
194 "ELPG residency ratio: %llu\n"
195 "ELPG Entry Count: %u\n"
196 "ELPG Avg Entry latency %u us\n"
197 "ELPG Avg Exit latency %u us\n",
198 total_ingating, total_ungating,
199 residency, pg_stat_data.gating_cnt,
200 pg_stat_data.avg_entry_latency_us,
201 pg_stat_data.avg_exit_latency_us);
202 return 0;
203
204}
205
206static int elpg_stat_open(struct inode *inode, struct file *file)
207{
208 return single_open(file, elpg_stat_show, inode->i_private);
209}
210
211static const struct file_operations elpg_stat_fops = {
212 .open = elpg_stat_open,
213 .read = seq_read,
214 .llseek = seq_lseek,
215 .release = single_release,
216};
217
218static int elpg_transitions_show(struct seq_file *s, void *data)
219{
220 struct gk20a *g = s->private;
221 struct pmu_pg_stats_data pg_stat_data = { 0 };
222 u32 total_gating_cnt;
223 int err;
224
225 if (g->power_on) {
226 err = gk20a_busy(g);
227 if (err)
228 return err;
229
230 nvgpu_pmu_get_pg_stats(g,
231 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
232 gk20a_idle(g);
233 }
234 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
235
236 seq_printf(s, "%u\n", total_gating_cnt);
237 return 0;
238
239}
240
241static int elpg_transitions_open(struct inode *inode, struct file *file)
242{
243 return single_open(file, elpg_transitions_show, inode->i_private);
244}
245
246static const struct file_operations elpg_transitions_fops = {
247 .open = elpg_transitions_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
250 .release = single_release,
251};
252
253static int falc_trace_show(struct seq_file *s, void *data)
254{
255 struct gk20a *g = s->private;
256 struct nvgpu_pmu *pmu = &g->pmu;
257 u32 i = 0, j = 0, k, l, m;
258 char part_str[40];
259 void *tracebuffer;
260 char *trace;
261 u32 *trace1;
262
263 /* allocate system memory to copy pmu trace buffer */
264 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
265 if (tracebuffer == NULL)
266 return -ENOMEM;
267
268 /* read pmu traces into system memory buffer */
269 nvgpu_mem_rd_n(g, &pmu->trace_buf,
270 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
271
272 trace = (char *)tracebuffer;
273 trace1 = (u32 *)tracebuffer;
274
275 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
276 for (j = 0; j < 0x40; j++)
277 if (trace1[(i / 4) + j])
278 break;
279 if (j == 0x40)
280 break;
281 seq_printf(s, "Index %x: ", trace1[(i / 4)]);
282 l = 0;
283 m = 0;
284 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
285 if (k >= 40)
286 break;
287 strncpy(part_str, (trace+i+20+m), k);
288 part_str[k] = 0;
289 seq_printf(s, "%s0x%x", part_str,
290 trace1[(i / 4) + 1 + l]);
291 l++;
292 m += k + 2;
293 }
294 seq_printf(s, "%s", (trace+i+20+m));
295 }
296
297 nvgpu_kfree(g, tracebuffer);
298 return 0;
299}
300
301static int falc_trace_open(struct inode *inode, struct file *file)
302{
303 return single_open(file, falc_trace_show, inode->i_private);
304}
305
306static const struct file_operations falc_trace_fops = {
307 .open = falc_trace_open,
308 .read = seq_read,
309 .llseek = seq_lseek,
310 .release = single_release,
311};
312
313static int perfmon_events_enable_show(struct seq_file *s, void *data)
314{
315 struct gk20a *g = s->private;
316
317 seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
318 return 0;
319
320}
321
322static int perfmon_events_enable_open(struct inode *inode, struct file *file)
323{
324 return single_open(file, perfmon_events_enable_show, inode->i_private);
325}
326
327static ssize_t perfmon_events_enable_write(struct file *file,
328 const char __user *userbuf, size_t count, loff_t *ppos)
329{
330 struct seq_file *s = file->private_data;
331 struct gk20a *g = s->private;
332 unsigned long val = 0;
333 char buf[40];
334 int buf_size;
335 int err;
336
337 memset(buf, 0, sizeof(buf));
338 buf_size = min(count, (sizeof(buf)-1));
339
340 if (copy_from_user(buf, userbuf, buf_size))
341 return -EFAULT;
342
343 if (kstrtoul(buf, 10, &val) < 0)
344 return -EINVAL;
345
346 /* Don't turn on gk20a unnecessarily */
347 if (g->power_on) {
348 err = gk20a_busy(g);
349 if (err)
350 return err;
351
352 if (val && !g->pmu.perfmon_sampling_enabled &&
353 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
354 g->pmu.perfmon_sampling_enabled = true;
355 g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
356 } else if (!val && g->pmu.perfmon_sampling_enabled &&
357 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
358 g->pmu.perfmon_sampling_enabled = false;
359 g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
360 }
361 gk20a_idle(g);
362 } else {
363 g->pmu.perfmon_sampling_enabled = val ? true : false;
364 }
365
366 return count;
367}
368
369static const struct file_operations perfmon_events_enable_fops = {
370 .open = perfmon_events_enable_open,
371 .read = seq_read,
372 .write = perfmon_events_enable_write,
373 .llseek = seq_lseek,
374 .release = single_release,
375};
376
377static int perfmon_events_count_show(struct seq_file *s, void *data)
378{
379 struct gk20a *g = s->private;
380
381 seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
382 return 0;
383
384}
385
386static int perfmon_events_count_open(struct inode *inode, struct file *file)
387{
388 return single_open(file, perfmon_events_count_show, inode->i_private);
389}
390
391static const struct file_operations perfmon_events_count_fops = {
392 .open = perfmon_events_count_open,
393 .read = seq_read,
394 .llseek = seq_lseek,
395 .release = single_release,
396};
397
398static int security_show(struct seq_file *s, void *data)
399{
400 struct gk20a *g = s->private;
401
402 seq_printf(s, "%d\n", g->pmu.pmu_mode);
403 return 0;
404
405}
406
407static int security_open(struct inode *inode, struct file *file)
408{
409 return single_open(file, security_show, inode->i_private);
410}
411
412static const struct file_operations security_fops = {
413 .open = security_open,
414 .read = seq_read,
415 .llseek = seq_lseek,
416 .release = single_release,
417};
418
419int gk20a_pmu_debugfs_init(struct gk20a *g)
420{
421 struct dentry *d;
422 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
423
424 d = debugfs_create_file(
425 "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
426 &lpwr_debug_fops);
427 if (!d)
428 goto err_out;
429
430 d = debugfs_create_file(
431 "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
432 &mscg_stat_fops);
433 if (!d)
434 goto err_out;
435
436 d = debugfs_create_file(
437 "mscg_transitions", S_IRUGO, l->debugfs, g,
438 &mscg_transitions_fops);
439 if (!d)
440 goto err_out;
441
442 d = debugfs_create_file(
443 "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
444 &elpg_stat_fops);
445 if (!d)
446 goto err_out;
447
448 d = debugfs_create_file(
449 "elpg_transitions", S_IRUGO, l->debugfs, g,
450 &elpg_transitions_fops);
451 if (!d)
452 goto err_out;
453
454 d = debugfs_create_file(
455 "falc_trace", S_IRUGO, l->debugfs, g,
456 &falc_trace_fops);
457 if (!d)
458 goto err_out;
459
460 d = debugfs_create_file(
461 "perfmon_events_enable", S_IRUGO, l->debugfs, g,
462 &perfmon_events_enable_fops);
463 if (!d)
464 goto err_out;
465
466 d = debugfs_create_file(
467 "perfmon_events_count", S_IRUGO, l->debugfs, g,
468 &perfmon_events_count_fops);
469 if (!d)
470 goto err_out;
471
472 d = debugfs_create_file(
473 "pmu_security", S_IRUGO, l->debugfs, g,
474 &security_fops);
475 if (!d)
476 goto err_out;
477 return 0;
478err_out:
479 pr_err("%s: Failed to make debugfs node\n", __func__);
480 return -ENOMEM;
481}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
deleted file mode 100644
index c4e3243d..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_pmu.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_PMU_H__
16#define __NVGPU_DEBUG_PMU_H__
17
18struct gk20a;
19int gk20a_pmu_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
deleted file mode 100644
index 5b7cbddf..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_sched.c
+++ /dev/null
@@ -1,80 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_sched.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
22{
23 struct gk20a *g = s->private;
24 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
25 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
26 bool sched_busy = true;
27
28 int n = sched->bitmap_size / sizeof(u64);
29 int i;
30 int err;
31
32 err = gk20a_busy(g);
33 if (err)
34 return err;
35
36 if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
37 sched_busy = false;
38 nvgpu_mutex_release(&sched->busy_lock);
39 }
40
41 seq_printf(s, "control_locked=%d\n", sched->control_locked);
42 seq_printf(s, "busy=%d\n", sched_busy);
43 seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
44
45 nvgpu_mutex_acquire(&sched->status_lock);
46
47 seq_puts(s, "active_tsg_bitmap\n");
48 for (i = 0; i < n; i++)
49 seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
50
51 seq_puts(s, "recent_tsg_bitmap\n");
52 for (i = 0; i < n; i++)
53 seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
54
55 nvgpu_mutex_release(&sched->status_lock);
56
57 gk20a_idle(g);
58
59 return 0;
60}
61
62static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
63{
64 return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
65}
66
67static const struct file_operations gk20a_sched_debugfs_fops = {
68 .open = gk20a_sched_debugfs_open,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73
74void gk20a_sched_debugfs_init(struct gk20a *g)
75{
76 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
77
78 debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
79 g, &gk20a_sched_debugfs_fops);
80}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
deleted file mode 100644
index 34a8f55f..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_sched.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_SCHED_H__
16#define __NVGPU_DEBUG_SCHED_H__
17
18struct gk20a;
19void gk20a_sched_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_xve.c b/drivers/gpu/nvgpu/common/linux/debug_xve.c
deleted file mode 100644
index 743702a2..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_xve.c
+++ /dev/null
@@ -1,176 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include <nvgpu/types.h>
16#include <nvgpu/xve.h>
17
18#include "debug_xve.h"
19#include "os_linux.h"
20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23
24static ssize_t xve_link_speed_write(struct file *filp,
25 const char __user *buff,
26 size_t len, loff_t *off)
27{
28 struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
29 char kbuff[16];
30 u32 buff_size, check_len;
31 u32 link_speed = 0;
32 int ret;
33
34 buff_size = min_t(size_t, 16, len);
35
36 memset(kbuff, 0, 16);
37 if (copy_from_user(kbuff, buff, buff_size))
38 return -EFAULT;
39
40 check_len = strlen("Gen1");
41 if (strncmp(kbuff, "Gen1", check_len) == 0)
42 link_speed = GPU_XVE_SPEED_2P5;
43 else if (strncmp(kbuff, "Gen2", check_len) == 0)
44 link_speed = GPU_XVE_SPEED_5P0;
45 else if (strncmp(kbuff, "Gen3", check_len) == 0)
46 link_speed = GPU_XVE_SPEED_8P0;
47 else
48 nvgpu_err(g, "%s: Unknown PCIe speed: %s",
49 __func__, kbuff);
50
51 if (!link_speed)
52 return -EINVAL;
53
54 /* Brief pause... To help rate limit this. */
55 nvgpu_msleep(250);
56
57 /*
58 * And actually set the speed. Yay.
59 */
60 ret = g->ops.xve.set_speed(g, link_speed);
61 if (ret)
62 return ret;
63
64 return len;
65}
66
67static int xve_link_speed_show(struct seq_file *s, void *unused)
68{
69 struct gk20a *g = s->private;
70 u32 speed;
71 int err;
72
73 err = g->ops.xve.get_speed(g, &speed);
74 if (err)
75 return err;
76
77 seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed));
78
79 return 0;
80}
81
82static int xve_link_speed_open(struct inode *inode, struct file *file)
83{
84 return single_open(file, xve_link_speed_show, inode->i_private);
85}
86
87static const struct file_operations xve_link_speed_fops = {
88 .open = xve_link_speed_open,
89 .read = seq_read,
90 .write = xve_link_speed_write,
91 .llseek = seq_lseek,
92 .release = single_release,
93};
94
95static int xve_available_speeds_show(struct seq_file *s, void *unused)
96{
97 struct gk20a *g = s->private;
98 u32 available_speeds;
99
100 g->ops.xve.available_speeds(g, &available_speeds);
101
102 seq_puts(s, "Available PCIe bus speeds:\n");
103 if (available_speeds & GPU_XVE_SPEED_2P5)
104 seq_puts(s, " Gen1\n");
105 if (available_speeds & GPU_XVE_SPEED_5P0)
106 seq_puts(s, " Gen2\n");
107 if (available_speeds & GPU_XVE_SPEED_8P0)
108 seq_puts(s, " Gen3\n");
109
110 return 0;
111}
112
113static int xve_available_speeds_open(struct inode *inode, struct file *file)
114{
115 return single_open(file, xve_available_speeds_show, inode->i_private);
116}
117
118static const struct file_operations xve_available_speeds_fops = {
119 .open = xve_available_speeds_open,
120 .read = seq_read,
121 .llseek = seq_lseek,
122 .release = single_release,
123};
124
125static int xve_link_control_status_show(struct seq_file *s, void *unused)
126{
127 struct gk20a *g = s->private;
128 u32 link_status;
129
130 link_status = g->ops.xve.get_link_control_status(g);
131 seq_printf(s, "0x%08x\n", link_status);
132
133 return 0;
134}
135
136static int xve_link_control_status_open(struct inode *inode, struct file *file)
137{
138 return single_open(file, xve_link_control_status_show, inode->i_private);
139}
140
141static const struct file_operations xve_link_control_status_fops = {
142 .open = xve_link_control_status_open,
143 .read = seq_read,
144 .llseek = seq_lseek,
145 .release = single_release,
146};
147
148int nvgpu_xve_debugfs_init(struct gk20a *g)
149{
150 int err = -ENODEV;
151
152 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
153 struct dentry *gpu_root = l->debugfs;
154
155 l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
156 if (IS_ERR_OR_NULL(l->debugfs_xve))
157 goto fail;
158
159 /*
160 * These are just debug nodes. If they fail to get made it's not worth
161 * worrying the higher level SW.
162 */
163 debugfs_create_file("link_speed", S_IRUGO,
164 l->debugfs_xve, g,
165 &xve_link_speed_fops);
166 debugfs_create_file("available_speeds", S_IRUGO,
167 l->debugfs_xve, g,
168 &xve_available_speeds_fops);
169 debugfs_create_file("link_control_status", S_IRUGO,
170 l->debugfs_xve, g,
171 &xve_link_control_status_fops);
172
173 err = 0;
174fail:
175 return err;
176}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_xve.h b/drivers/gpu/nvgpu/common/linux/debug_xve.h
deleted file mode 100644
index f3b1ac54..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_xve.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_XVE_H__
16#define __NVGPU_DEBUG_XVE_H__
17
18struct gk20a;
19int nvgpu_xve_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_SVE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
deleted file mode 100644
index f513dcd6..00000000
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ /dev/null
@@ -1,694 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-mapping.h>
18#include <linux/version.h>
19
20#include <nvgpu/log.h>
21#include <nvgpu/dma.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/bug.h>
24#include <nvgpu/gmmu.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/enabled.h>
27#include <nvgpu/vidmem.h>
28
29#include <nvgpu/linux/dma.h>
30#include <nvgpu/linux/vidmem.h>
31
32#include "gk20a/gk20a.h"
33
34#include "platform_gk20a.h"
35#include "os_linux.h"
36
37#ifdef __DMA_ATTRS_LONGS
38#define NVGPU_DEFINE_DMA_ATTRS(x) \
39 struct dma_attrs x = { \
40 .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \
41 }
42#define NVGPU_DMA_ATTR(attrs) &attrs
43#else
44#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
45#define NVGPU_DMA_ATTR(attrs) attrs
46#endif
47
48/*
49 * Enough to hold all the possible flags in string form. When a new flag is
50 * added it must be added here as well!!
51 */
52#define NVGPU_DMA_STR_SIZE \
53 sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
54
55/*
56 * The returned string is kmalloc()ed here but must be freed by the caller.
57 */
58static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
59{
60 char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
61 int bytes_available = NVGPU_DMA_STR_SIZE;
62
63 /*
64 * Return the empty buffer if there's no flags. Makes it easier on the
65 * calling code to just print it instead of any if (NULL) type logic.
66 */
67 if (!flags)
68 return buf;
69
70#define APPEND_FLAG(flag, str_flag) \
71 do { \
72 if (flags & flag) { \
73 strncat(buf, str_flag, bytes_available); \
74 bytes_available -= strlen(str_flag); \
75 } \
76 } while (0)
77
78 APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
79 APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS ");
80#undef APPEND_FLAG
81
82 return buf;
83}
84
85/**
86 * __dma_dbg - Debug print for DMA allocs and frees.
87 *
88 * @g - The GPU.
89 * @size - The requested size of the alloc (size_t).
90 * @flags - The flags (unsigned long).
91 * @type - A string describing the type (i.e: sysmem or vidmem).
92 * @what - A string with 'alloc' or 'free'.
93 *
94 * @flags is the DMA flags. If there are none or it doesn't make sense to print
95 * flags just pass 0.
96 *
97 * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
98 */
99static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
100 const char *type, const char *what)
101{
102 char *flags_str = NULL;
103
104 /*
105 * Don't bother making the flags_str if debugging is
106 * not enabled. This saves a malloc and a free.
107 */
108 if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
109 return;
110
111 flags_str = nvgpu_dma_flags_to_str(g, flags);
112
113 __nvgpu_log_dbg(g, gpu_dbg_dma,
114 __func__, __LINE__,
115 "DMA %s: [%s] size=%-7zu "
116 "aligned=%-7zu total=%-10llukB %s",
117 what, type,
118 size, PAGE_ALIGN(size),
119 g->dma_memory_used >> 10,
120 flags_str);
121
122 if (flags_str)
123 nvgpu_kfree(g, flags_str);
124}
125
126#define dma_dbg_alloc(g, size, flags, type) \
127 __dma_dbg(g, size, flags, type, "alloc")
128#define dma_dbg_free(g, size, flags, type) \
129 __dma_dbg(g, size, flags, type, "free")
130
131/*
132 * For after the DMA alloc is done.
133 */
134#define __dma_dbg_done(g, size, type, what) \
135 nvgpu_log(g, gpu_dbg_dma, \
136 "DMA %s: [%s] size=%-7zu Done!", \
137 what, type, size); \
138
139#define dma_dbg_alloc_done(g, size, type) \
140 __dma_dbg_done(g, size, type, "alloc")
141#define dma_dbg_free_done(g, size, type) \
142 __dma_dbg_done(g, size, type, "free")
143
144#if defined(CONFIG_GK20A_VIDMEM)
145static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
146 size_t size)
147{
148 u64 addr = 0;
149
150 if (at)
151 addr = nvgpu_alloc_fixed(allocator, at, size, 0);
152 else
153 addr = nvgpu_alloc(allocator, size);
154
155 return addr;
156}
157#endif
158
159#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
160static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
161 unsigned long flags)
162#define ATTR_ARG(x) *x
163#else
164static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
165 unsigned long flags)
166#define ATTR_ARG(x) x
167#endif
168{
169 if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
170 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
171 if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
172 dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
173#undef ATTR_ARG
174}
175
176int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
177{
178 return nvgpu_dma_alloc_flags(g, 0, size, mem);
179}
180
181int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
182 struct nvgpu_mem *mem)
183{
184 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
185 /*
186 * Force the no-kernel-mapping flag on because we don't support
187 * the lack of it for vidmem - the user should not care when
188 * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
189 * difference, the user should use the flag explicitly anyway.
190 *
191 * Incoming flags are ignored here, since bits other than the
192 * no-kernel-mapping flag are ignored by the vidmem mapping
193 * functions anyway.
194 */
195 int err = nvgpu_dma_alloc_flags_vid(g,
196 NVGPU_DMA_NO_KERNEL_MAPPING,
197 size, mem);
198
199 if (!err)
200 return 0;
201 /*
202 * Fall back to sysmem (which may then also fail) in case
203 * vidmem is exhausted.
204 */
205 }
206
207 return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
208}
209
210int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
211{
212 return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
213}
214
215int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
216 size_t size, struct nvgpu_mem *mem)
217{
218 struct device *d = dev_from_gk20a(g);
219 int err;
220 dma_addr_t iova;
221 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
222 void *alloc_ret;
223
224 if (nvgpu_mem_is_valid(mem)) {
225 nvgpu_warn(g, "memory leak !!");
226 WARN_ON(1);
227 }
228
229 /*
230 * WAR for IO coherent chips: the DMA API does not seem to generate
231 * mappings that work correctly. Unclear why - Bug ID: 2040115.
232 *
233 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
234 * and then make a vmap() ourselves.
235 */
236 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
237 flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
238
239 /*
240 * Before the debug print so we see this in the total. But during
241 * cleanup in the fail path this has to be subtracted.
242 */
243 g->dma_memory_used += PAGE_ALIGN(size);
244
245 dma_dbg_alloc(g, size, flags, "sysmem");
246
247 /*
248 * Save the old size but for actual allocation purposes the size is
249 * going to be page aligned.
250 */
251 mem->size = size;
252 size = PAGE_ALIGN(size);
253
254 nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
255
256 alloc_ret = dma_alloc_attrs(d, size, &iova,
257 GFP_KERNEL|__GFP_ZERO,
258 NVGPU_DMA_ATTR(dma_attrs));
259 if (!alloc_ret)
260 return -ENOMEM;
261
262 if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
263 mem->priv.pages = alloc_ret;
264 err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
265 mem->priv.pages,
266 iova, size);
267 } else {
268 mem->cpu_va = alloc_ret;
269 err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
270 iova, size, flags);
271 }
272 if (err)
273 goto fail_free_dma;
274
275 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
276 mem->cpu_va = vmap(mem->priv.pages,
277 size >> PAGE_SHIFT,
278 0, PAGE_KERNEL);
279 if (!mem->cpu_va) {
280 err = -ENOMEM;
281 goto fail_free_sgt;
282 }
283 }
284
285 mem->aligned_size = size;
286 mem->aperture = APERTURE_SYSMEM;
287 mem->priv.flags = flags;
288
289 dma_dbg_alloc_done(g, mem->size, "sysmem");
290
291 return 0;
292
293fail_free_sgt:
294 nvgpu_free_sgtable(g, &mem->priv.sgt);
295fail_free_dma:
296 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
297 mem->cpu_va = NULL;
298 mem->priv.sgt = NULL;
299 mem->size = 0;
300 g->dma_memory_used -= mem->aligned_size;
301 return err;
302}
303
304int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
305{
306 return nvgpu_dma_alloc_flags_vid(g,
307 NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
308}
309
310int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
311 size_t size, struct nvgpu_mem *mem)
312{
313 return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
314}
315
316int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
317 size_t size, struct nvgpu_mem *mem, u64 at)
318{
319#if defined(CONFIG_GK20A_VIDMEM)
320 u64 addr;
321 int err;
322 struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
323 &g->mm.vidmem.allocator :
324 &g->mm.vidmem.bootstrap_allocator;
325 int before_pending;
326
327 if (nvgpu_mem_is_valid(mem)) {
328 nvgpu_warn(g, "memory leak !!");
329 WARN_ON(1);
330 }
331
332 dma_dbg_alloc(g, size, flags, "vidmem");
333
334 mem->size = size;
335 size = PAGE_ALIGN(size);
336
337 if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
338 return -ENOSYS;
339
340 /*
341 * Our own allocator doesn't have any flags yet, and we can't
342 * kernel-map these, so require explicit flags.
343 */
344 WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
345
346 nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
347 before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
348 addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
349 nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
350 if (!addr) {
351 /*
352 * If memory is known to be freed soon, let the user know that
353 * it may be available after a while.
354 */
355 if (before_pending)
356 return -EAGAIN;
357 else
358 return -ENOMEM;
359 }
360
361 if (at)
362 mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
363
364 mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
365 if (!mem->priv.sgt) {
366 err = -ENOMEM;
367 goto fail_physfree;
368 }
369
370 err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
371 if (err)
372 goto fail_kfree;
373
374 nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
375 sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
376
377 mem->aligned_size = size;
378 mem->aperture = APERTURE_VIDMEM;
379 mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
380 mem->allocator = vidmem_alloc;
381 mem->priv.flags = flags;
382
383 nvgpu_init_list_node(&mem->clear_list_entry);
384
385 dma_dbg_alloc_done(g, mem->size, "vidmem");
386
387 return 0;
388
389fail_kfree:
390 nvgpu_kfree(g, mem->priv.sgt);
391fail_physfree:
392 nvgpu_free(&g->mm.vidmem.allocator, addr);
393 mem->size = 0;
394 return err;
395#else
396 return -ENOSYS;
397#endif
398}
399
400int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
401 struct nvgpu_mem *mem)
402{
403 return nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
404}
405
406int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
407 size_t size, struct nvgpu_mem *mem)
408{
409 if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
410 /*
411 * Force the no-kernel-mapping flag on because we don't support
412 * the lack of it for vidmem - the user should not care when
413 * using nvgpu_dma_alloc_map and it's vidmem, or if there's a
414 * difference, the user should use the flag explicitly anyway.
415 */
416 int err = nvgpu_dma_alloc_map_flags_vid(vm,
417 flags | NVGPU_DMA_NO_KERNEL_MAPPING,
418 size, mem);
419
420 if (!err)
421 return 0;
422 /*
423 * Fall back to sysmem (which may then also fail) in case
424 * vidmem is exhausted.
425 */
426 }
427
428 return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
429}
430
431int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
432 struct nvgpu_mem *mem)
433{
434 return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
435}
436
437int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
438 size_t size, struct nvgpu_mem *mem)
439{
440 int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
441
442 if (err)
443 return err;
444
445 mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
446 gk20a_mem_flag_none, false,
447 mem->aperture);
448 if (!mem->gpu_va) {
449 err = -ENOMEM;
450 goto fail_free;
451 }
452
453 return 0;
454
455fail_free:
456 nvgpu_dma_free(vm->mm->g, mem);
457 return err;
458}
459
460int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
461 struct nvgpu_mem *mem)
462{
463 return nvgpu_dma_alloc_map_flags_vid(vm,
464 NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
465}
466
467int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
468 size_t size, struct nvgpu_mem *mem)
469{
470 int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
471
472 if (err)
473 return err;
474
475 mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
476 gk20a_mem_flag_none, false,
477 mem->aperture);
478 if (!mem->gpu_va) {
479 err = -ENOMEM;
480 goto fail_free;
481 }
482
483 return 0;
484
485fail_free:
486 nvgpu_dma_free(vm->mm->g, mem);
487 return err;
488}
489
490static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
491{
492 struct device *d = dev_from_gk20a(g);
493
494 g->dma_memory_used -= mem->aligned_size;
495
496 dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
497
498 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
499 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
500 (mem->cpu_va || mem->priv.pages)) {
501 /*
502 * Free side of WAR for bug 2040115.
503 */
504 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
505 vunmap(mem->cpu_va);
506
507 if (mem->priv.flags) {
508 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
509
510 nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
511
512 if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
513 dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
514 sg_dma_address(mem->priv.sgt->sgl),
515 NVGPU_DMA_ATTR(dma_attrs));
516 } else {
517 dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
518 sg_dma_address(mem->priv.sgt->sgl),
519 NVGPU_DMA_ATTR(dma_attrs));
520 }
521 } else {
522 dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
523 sg_dma_address(mem->priv.sgt->sgl));
524 }
525 mem->cpu_va = NULL;
526 mem->priv.pages = NULL;
527 }
528
529 /*
530 * When this flag is set we expect that pages is still populated but not
531 * by the DMA API.
532 */
533 if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
534 nvgpu_kfree(g, mem->priv.pages);
535
536 if (mem->priv.sgt)
537 nvgpu_free_sgtable(g, &mem->priv.sgt);
538
539 dma_dbg_free_done(g, mem->size, "sysmem");
540
541 mem->size = 0;
542 mem->aligned_size = 0;
543 mem->aperture = APERTURE_INVALID;
544}
545
546static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
547{
548#if defined(CONFIG_GK20A_VIDMEM)
549 size_t mem_size = mem->size;
550
551 dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
552
553 /* Sanity check - only this supported when allocating. */
554 WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
555
556 if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
557 int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
558
559 /*
560 * If there's an error here then that means we can't clear the
561 * vidmem. That's too bad; however, we still own the nvgpu_mem
562 * buf so we have to free that.
563 *
564 * We don't need to worry about the vidmem allocator itself
565 * since when that gets cleaned up in the driver shutdown path
566 * all the outstanding allocs are force freed.
567 */
568 if (err)
569 nvgpu_kfree(g, mem);
570 } else {
571 nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
572 nvgpu_free(mem->allocator,
573 (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
574 nvgpu_free_sgtable(g, &mem->priv.sgt);
575
576 mem->size = 0;
577 mem->aligned_size = 0;
578 mem->aperture = APERTURE_INVALID;
579 }
580
581 dma_dbg_free_done(g, mem_size, "vidmem");
582#endif
583}
584
585void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
586{
587 switch (mem->aperture) {
588 case APERTURE_SYSMEM:
589 return nvgpu_dma_free_sys(g, mem);
590 case APERTURE_VIDMEM:
591 return nvgpu_dma_free_vid(g, mem);
592 default:
593 break; /* like free() on "null" memory */
594 }
595}
596
597void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
598{
599 if (mem->gpu_va)
600 nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
601 mem->gpu_va = 0;
602
603 nvgpu_dma_free(vm->mm->g, mem);
604}
605
606int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
607 void *cpuva, u64 iova, size_t size, unsigned long flags)
608{
609 int err = 0;
610 struct sg_table *tbl;
611 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
612
613 tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
614 if (!tbl) {
615 err = -ENOMEM;
616 goto fail;
617 }
618
619 nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
620 err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
621 size, NVGPU_DMA_ATTR(dma_attrs));
622 if (err)
623 goto fail;
624
625 sg_dma_address(tbl->sgl) = iova;
626 *sgt = tbl;
627
628 return 0;
629
630fail:
631 if (tbl)
632 nvgpu_kfree(g, tbl);
633
634 return err;
635}
636
637int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
638 void *cpuva, u64 iova, size_t size)
639{
640 return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
641}
642
643int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
644 struct page **pages, u64 iova, size_t size)
645{
646 int err = 0;
647 struct sg_table *tbl;
648
649 tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
650 if (!tbl) {
651 err = -ENOMEM;
652 goto fail;
653 }
654
655 err = sg_alloc_table_from_pages(tbl, pages,
656 DIV_ROUND_UP(size, PAGE_SIZE),
657 0, size, GFP_KERNEL);
658 if (err)
659 goto fail;
660
661 sg_dma_address(tbl->sgl) = iova;
662 *sgt = tbl;
663
664 return 0;
665
666fail:
667 if (tbl)
668 nvgpu_kfree(g, tbl);
669
670 return err;
671}
672
673void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
674{
675 sg_free_table(*sgt);
676 nvgpu_kfree(g, *sgt);
677 *sgt = NULL;
678}
679
680bool nvgpu_iommuable(struct gk20a *g)
681{
682#ifdef CONFIG_TEGRA_GK20A
683 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
684
685 /*
686 * Check against the nvgpu device to see if it's been marked as
687 * IOMMU'able.
688 */
689 if (!device_is_iommuable(l->dev))
690 return false;
691#endif
692
693 return true;
694}
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c
deleted file mode 100644
index 129739f0..00000000
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.c
+++ /dev/null
@@ -1,218 +0,0 @@
1/*
2* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/device.h>
18#include <linux/dma-buf.h>
19#include <linux/scatterlist.h>
20
21#include <nvgpu/comptags.h>
22#include <nvgpu/enabled.h>
23
24#include <nvgpu/linux/vm.h>
25#include <nvgpu/linux/vidmem.h>
26
27#include "gk20a/gk20a.h"
28
29#include "platform_gk20a.h"
30#include "dmabuf.h"
31#include "os_linux.h"
32
33static void gk20a_mm_delete_priv(void *_priv)
34{
35 struct gk20a_buffer_state *s, *s_tmp;
36 struct gk20a_dmabuf_priv *priv = _priv;
37 struct gk20a *g;
38
39 if (!priv)
40 return;
41
42 g = priv->g;
43
44 if (priv->comptags.allocated && priv->comptags.lines) {
45 BUG_ON(!priv->comptag_allocator);
46 gk20a_comptaglines_free(priv->comptag_allocator,
47 priv->comptags.offset,
48 priv->comptags.lines);
49 }
50
51 /* Free buffer states */
52 nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
53 gk20a_buffer_state, list) {
54 gk20a_fence_put(s->fence);
55 nvgpu_list_del(&s->list);
56 nvgpu_kfree(g, s);
57 }
58
59 nvgpu_kfree(g, priv);
60}
61
62enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
63 struct dma_buf *dmabuf)
64{
65 struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
66 bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
67
68 if (buf_owner == NULL) {
69 /* Not nvgpu-allocated, assume system memory */
70 return APERTURE_SYSMEM;
71 } else if (WARN_ON(buf_owner == g && unified_memory)) {
72 /* Looks like our video memory, but this gpu doesn't support
73 * it. Warn about a bug and bail out */
74 nvgpu_warn(g,
75 "dmabuf is our vidmem but we don't have local vidmem");
76 return APERTURE_INVALID;
77 } else if (buf_owner != g) {
78 /* Someone else's vidmem */
79 return APERTURE_INVALID;
80 } else {
81 /* Yay, buf_owner == g */
82 return APERTURE_VIDMEM;
83 }
84}
85
86struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
87 struct dma_buf_attachment **attachment)
88{
89 struct gk20a_dmabuf_priv *priv;
90
91 priv = dma_buf_get_drvdata(dmabuf, dev);
92 if (WARN_ON(!priv))
93 return ERR_PTR(-EINVAL);
94
95 nvgpu_mutex_acquire(&priv->lock);
96
97 if (priv->pin_count == 0) {
98 priv->attach = dma_buf_attach(dmabuf, dev);
99 if (IS_ERR(priv->attach)) {
100 nvgpu_mutex_release(&priv->lock);
101 return (struct sg_table *)priv->attach;
102 }
103
104 priv->sgt = dma_buf_map_attachment(priv->attach,
105 DMA_BIDIRECTIONAL);
106 if (IS_ERR(priv->sgt)) {
107 dma_buf_detach(dmabuf, priv->attach);
108 nvgpu_mutex_release(&priv->lock);
109 return priv->sgt;
110 }
111 }
112
113 priv->pin_count++;
114 nvgpu_mutex_release(&priv->lock);
115 *attachment = priv->attach;
116 return priv->sgt;
117}
118
119void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
120 struct dma_buf_attachment *attachment,
121 struct sg_table *sgt)
122{
123 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
124 dma_addr_t dma_addr;
125
126 if (IS_ERR(priv) || !priv)
127 return;
128
129 nvgpu_mutex_acquire(&priv->lock);
130 WARN_ON(priv->sgt != sgt);
131 WARN_ON(priv->attach != attachment);
132 priv->pin_count--;
133 WARN_ON(priv->pin_count < 0);
134 dma_addr = sg_dma_address(priv->sgt->sgl);
135 if (priv->pin_count == 0) {
136 dma_buf_unmap_attachment(priv->attach, priv->sgt,
137 DMA_BIDIRECTIONAL);
138 dma_buf_detach(dmabuf, priv->attach);
139 }
140 nvgpu_mutex_release(&priv->lock);
141}
142
143int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
144{
145 struct gk20a *g = gk20a_get_platform(dev)->g;
146 struct gk20a_dmabuf_priv *priv;
147
148 priv = dma_buf_get_drvdata(dmabuf, dev);
149 if (likely(priv))
150 return 0;
151
152 nvgpu_mutex_acquire(&g->mm.priv_lock);
153 priv = dma_buf_get_drvdata(dmabuf, dev);
154 if (priv)
155 goto priv_exist_or_err;
156
157 priv = nvgpu_kzalloc(g, sizeof(*priv));
158 if (!priv) {
159 priv = ERR_PTR(-ENOMEM);
160 goto priv_exist_or_err;
161 }
162
163 nvgpu_mutex_init(&priv->lock);
164 nvgpu_init_list_node(&priv->states);
165 priv->g = g;
166 dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
167
168priv_exist_or_err:
169 nvgpu_mutex_release(&g->mm.priv_lock);
170 if (IS_ERR(priv))
171 return -ENOMEM;
172
173 return 0;
174}
175
176int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
177 u64 offset, struct gk20a_buffer_state **state)
178{
179 int err = 0;
180 struct gk20a_dmabuf_priv *priv;
181 struct gk20a_buffer_state *s;
182 struct device *dev = dev_from_gk20a(g);
183
184 if (WARN_ON(offset >= (u64)dmabuf->size))
185 return -EINVAL;
186
187 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
188 if (err)
189 return err;
190
191 priv = dma_buf_get_drvdata(dmabuf, dev);
192 if (WARN_ON(!priv))
193 return -ENOSYS;
194
195 nvgpu_mutex_acquire(&priv->lock);
196
197 nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
198 if (s->offset == offset)
199 goto out;
200
201 /* State not found, create state. */
202 s = nvgpu_kzalloc(g, sizeof(*s));
203 if (!s) {
204 err = -ENOMEM;
205 goto out;
206 }
207
208 s->offset = offset;
209 nvgpu_init_list_node(&s->list);
210 nvgpu_mutex_init(&s->lock);
211 nvgpu_list_add_tail(&s->list, &priv->states);
212
213out:
214 nvgpu_mutex_release(&priv->lock);
215 if (!err)
216 *state = s;
217 return err;
218}
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.h b/drivers/gpu/nvgpu/common/linux/dmabuf.h
deleted file mode 100644
index 8399eaaf..00000000
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.h
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __COMMON_LINUX_DMABUF_H__
18#define __COMMON_LINUX_DMABUF_H__
19
20#include <nvgpu/comptags.h>
21#include <nvgpu/list.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/gmmu.h>
24
25struct sg_table;
26struct dma_buf;
27struct dma_buf_attachment;
28struct device;
29
30struct gk20a;
31struct gk20a_buffer_state;
32
33struct gk20a_dmabuf_priv {
34 struct nvgpu_mutex lock;
35
36 struct gk20a *g;
37
38 struct gk20a_comptag_allocator *comptag_allocator;
39 struct gk20a_comptags comptags;
40
41 struct dma_buf_attachment *attach;
42 struct sg_table *sgt;
43
44 int pin_count;
45
46 struct nvgpu_list_node states;
47
48 u64 buffer_id;
49};
50
51struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
52 struct dma_buf_attachment **attachment);
53void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
54 struct dma_buf_attachment *attachment,
55 struct sg_table *sgt);
56
57int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
58
59int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
60 u64 offset, struct gk20a_buffer_state **state);
61
62#endif
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
deleted file mode 100644
index 8f33c5d2..00000000
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ /dev/null
@@ -1,334 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/reboot.h>
18#include <linux/dma-mapping.h>
19#include <linux/mm.h>
20#include <uapi/linux/nvgpu.h>
21
22#include <nvgpu/defaults.h>
23#include <nvgpu/kmem.h>
24#include <nvgpu/nvgpu_common.h>
25#include <nvgpu/soc.h>
26#include <nvgpu/bug.h>
27#include <nvgpu/enabled.h>
28#include <nvgpu/debug.h>
29#include <nvgpu/sizes.h>
30
31#include "gk20a/gk20a.h"
32#include "platform_gk20a.h"
33#include "module.h"
34#include "os_linux.h"
35#include "sysfs.h"
36#include "ioctl.h"
37#include "gk20a/regops_gk20a.h"
38
39#define EMC3D_DEFAULT_RATIO 750
40
41void nvgpu_kernel_restart(void *cmd)
42{
43 kernel_restart(cmd);
44}
45
46static void nvgpu_init_vars(struct gk20a *g)
47{
48 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
49 struct device *dev = dev_from_gk20a(g);
50 struct gk20a_platform *platform = dev_get_drvdata(dev);
51
52 nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq);
53 nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq);
54
55 init_rwsem(&l->busy_lock);
56 nvgpu_rwsem_init(&g->deterministic_busy);
57
58 nvgpu_spinlock_init(&g->mc_enable_lock);
59
60 nvgpu_mutex_init(&platform->railgate_lock);
61 nvgpu_mutex_init(&g->dbg_sessions_lock);
62 nvgpu_mutex_init(&g->client_lock);
63 nvgpu_mutex_init(&g->poweron_lock);
64 nvgpu_mutex_init(&g->poweroff_lock);
65 nvgpu_mutex_init(&g->ctxsw_disable_lock);
66
67 l->regs_saved = l->regs;
68 l->bar1_saved = l->bar1;
69
70 g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
71
72 /* Set DMA parameters to allow larger sgt lists */
73 dev->dma_parms = &l->dma_parms;
74 dma_set_max_seg_size(dev, UINT_MAX);
75
76 /*
77 * A default of 16GB is the largest supported DMA size that is
78 * acceptable to all currently supported Tegra SoCs.
79 */
80 if (!platform->dma_mask)
81 platform->dma_mask = DMA_BIT_MASK(34);
82
83 dma_set_mask(dev, platform->dma_mask);
84 dma_set_coherent_mask(dev, platform->dma_mask);
85
86 nvgpu_init_list_node(&g->profiler_objects);
87
88 nvgpu_init_list_node(&g->boardobj_head);
89 nvgpu_init_list_node(&g->boardobjgrp_head);
90}
91
92static void nvgpu_init_gr_vars(struct gk20a *g)
93{
94 gk20a_init_gr(g);
95
96 nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
97 g->gr.max_comptag_mem = totalram_pages
98 >> (10 - (PAGE_SHIFT - 10));
99}
100
101static void nvgpu_init_timeout(struct gk20a *g)
102{
103 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
104
105 g->timeouts_disabled_by_user = false;
106 nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
107
108 if (nvgpu_platform_is_silicon(g)) {
109 g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
110 } else if (nvgpu_platform_is_fpga(g)) {
111 g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA;
112 } else {
113 g->gr_idle_timeout_default = (u32)ULONG_MAX;
114 }
115 g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
116 g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
117}
118
119static void nvgpu_init_timeslice(struct gk20a *g)
120{
121 g->runlist_interleave = true;
122
123 g->timeslice_low_priority_us = 1300;
124 g->timeslice_medium_priority_us = 2600;
125 g->timeslice_high_priority_us = 5200;
126
127 g->min_timeslice_us = 1000;
128 g->max_timeslice_us = 50000;
129}
130
131static void nvgpu_init_pm_vars(struct gk20a *g)
132{
133 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
134
135 /*
136 * Set up initial power settings. For non-slicon platforms, disable
137 * power features and for silicon platforms, read from platform data
138 */
139 g->slcg_enabled =
140 nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
141 g->blcg_enabled =
142 nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
143 g->elcg_enabled =
144 nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
145 g->elpg_enabled =
146 nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
147 g->aelpg_enabled =
148 nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
149 g->mscg_enabled =
150 nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
151 g->can_elpg =
152 nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
153
154 __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
155 nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
156 __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
157 nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
158 __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
159 nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
160
161 g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
162 g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
163 g->has_syncpoints = platform->has_syncpoints;
164#ifdef CONFIG_NVGPU_SUPPORT_CDE
165 g->has_cde = platform->has_cde;
166#endif
167 g->ptimer_src_freq = platform->ptimer_src_freq;
168 g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
169 g->can_railgate = platform->can_railgate_init;
170 g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
171 /* if default delay is not set, set default delay to 500msec */
172 if (platform->railgate_delay_init)
173 g->railgate_delay = platform->railgate_delay_init;
174 else
175 g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
176 __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
177
178 /* set default values to aelpg parameters */
179 g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
180 g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
181 g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
182 g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
183 g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
184
185 __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
186}
187
188static void nvgpu_init_vbios_vars(struct gk20a *g)
189{
190 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
191
192 __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
193 g->vbios_min_version = platform->vbios_min_version;
194}
195
196static void nvgpu_init_ltc_vars(struct gk20a *g)
197{
198 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
199
200 g->ltc_streamid = platform->ltc_streamid;
201}
202
203static void nvgpu_init_mm_vars(struct gk20a *g)
204{
205 struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
206
207 g->mm.disable_bigpage = platform->disable_bigpage;
208 __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
209 platform->honors_aperture);
210 __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
211 platform->unified_memory);
212 __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
213 platform->unify_address_spaces);
214
215 nvgpu_mutex_init(&g->mm.tlb_lock);
216 nvgpu_mutex_init(&g->mm.priv_lock);
217}
218
219int nvgpu_probe(struct gk20a *g,
220 const char *debugfs_symlink,
221 const char *interface_name,
222 struct class *class)
223{
224 struct device *dev = dev_from_gk20a(g);
225 struct gk20a_platform *platform = dev_get_drvdata(dev);
226 int err = 0;
227
228 nvgpu_init_vars(g);
229 nvgpu_init_gr_vars(g);
230 nvgpu_init_timeout(g);
231 nvgpu_init_timeslice(g);
232 nvgpu_init_pm_vars(g);
233 nvgpu_init_vbios_vars(g);
234 nvgpu_init_ltc_vars(g);
235 err = nvgpu_init_soc_vars(g);
236 if (err) {
237 nvgpu_err(g, "init soc vars failed");
238 return err;
239 }
240
241 /* Initialize the platform interface. */
242 err = platform->probe(dev);
243 if (err) {
244 if (err == -EPROBE_DEFER)
245 nvgpu_info(g, "platform probe failed");
246 else
247 nvgpu_err(g, "platform probe failed");
248 return err;
249 }
250
251 nvgpu_init_mm_vars(g);
252
253 /* platform probe can defer do user init only if probe succeeds */
254 err = gk20a_user_init(dev, interface_name, class);
255 if (err)
256 return err;
257
258 if (platform->late_probe) {
259 err = platform->late_probe(dev);
260 if (err) {
261 nvgpu_err(g, "late probe failed");
262 return err;
263 }
264 }
265
266 nvgpu_create_sysfs(dev);
267 gk20a_debug_init(g, debugfs_symlink);
268
269 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
270 if (!g->dbg_regops_tmp_buf) {
271 nvgpu_err(g, "couldn't allocate regops tmp buf");
272 return -ENOMEM;
273 }
274 g->dbg_regops_tmp_buf_ops =
275 SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
276
277 g->remove_support = gk20a_remove_support;
278
279 nvgpu_ref_init(&g->refcount);
280
281 return 0;
282}
283
284/**
285 * cyclic_delta - Returns delta of cyclic integers a and b.
286 *
287 * @a - First integer
288 * @b - Second integer
289 *
290 * Note: if a is ahead of b, delta is positive.
291 */
292static int cyclic_delta(int a, int b)
293{
294 return a - b;
295}
296
297/**
298 * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete
299 *
300 * @g - The GPU to wait on.
301 *
302 * Waits until all interrupt handlers that have been scheduled to run have
303 * completed.
304 */
305void nvgpu_wait_for_deferred_interrupts(struct gk20a *g)
306{
307 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
308 int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count);
309 int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count);
310
311 /* wait until all stalling irqs are handled */
312 NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq,
313 cyclic_delta(stall_irq_threshold,
314 atomic_read(&l->sw_irq_stall_last_handled))
315 <= 0, 0);
316
317 /* wait until all non-stalling irqs are handled */
318 NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq,
319 cyclic_delta(nonstall_irq_threshold,
320 atomic_read(&l->sw_irq_nonstall_last_handled))
321 <= 0, 0);
322}
323
324static void nvgpu_free_gk20a(struct gk20a *g)
325{
326 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
327
328 kfree(l);
329}
330
331void nvgpu_init_gk20a(struct gk20a *g)
332{
333 g->free = nvgpu_free_gk20a;
334}
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.h b/drivers/gpu/nvgpu/common/linux/driver_common.h
deleted file mode 100644
index 6f42f775..00000000
--- a/drivers/gpu/nvgpu/common/linux/driver_common.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_LINUX_DRIVER_COMMON
18#define NVGPU_LINUX_DRIVER_COMMON
19
20void nvgpu_init_gk20a(struct gk20a *g);
21
22#endif
diff --git a/drivers/gpu/nvgpu/common/linux/dt.c b/drivers/gpu/nvgpu/common/linux/dt.c
deleted file mode 100644
index 88e391e3..00000000
--- a/drivers/gpu/nvgpu/common/linux/dt.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/dt.h>
18#include <linux/of.h>
19
20#include "os_linux.h"
21
22int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
23 u32 index, u32 *value)
24{
25 struct device *dev = dev_from_gk20a(g);
26 struct device_node *np = dev->of_node;
27
28 return of_property_read_u32_index(np, name, index, value);
29}
diff --git a/drivers/gpu/nvgpu/common/linux/firmware.c b/drivers/gpu/nvgpu/common/linux/firmware.c
deleted file mode 100644
index 9a4dc653..00000000
--- a/drivers/gpu/nvgpu/common/linux/firmware.c
+++ /dev/null
@@ -1,117 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/firmware.h>
18
19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h>
21#include <nvgpu/firmware.h>
22
23#include "gk20a/gk20a.h"
24#include "platform_gk20a.h"
25#include "os_linux.h"
26
27static const struct firmware *do_request_firmware(struct device *dev,
28 const char *prefix, const char *fw_name, int flags)
29{
30 const struct firmware *fw;
31 char *fw_path = NULL;
32 int path_len, err;
33
34 if (prefix) {
35 path_len = strlen(prefix) + strlen(fw_name);
36 path_len += 2; /* for the path separator and zero terminator*/
37
38 fw_path = nvgpu_kzalloc(get_gk20a(dev),
39 sizeof(*fw_path) * path_len);
40 if (!fw_path)
41 return NULL;
42
43 sprintf(fw_path, "%s/%s", prefix, fw_name);
44 fw_name = fw_path;
45 }
46
47 if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
48 err = request_firmware_direct(&fw, fw_name, dev);
49 else
50 err = request_firmware(&fw, fw_name, dev);
51
52 nvgpu_kfree(get_gk20a(dev), fw_path);
53 if (err)
54 return NULL;
55 return fw;
56}
57
58/* This is a simple wrapper around request_firmware that takes 'fw_name' and
59 * applies an IP specific relative path prefix to it. The caller is
60 * responsible for calling nvgpu_release_firmware later. */
61struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
62 const char *fw_name,
63 int flags)
64{
65 struct device *dev = dev_from_gk20a(g);
66 struct nvgpu_firmware *fw;
67 const struct firmware *linux_fw;
68
69 /* current->fs is NULL when calling from SYS_EXIT.
70 Add a check here to prevent crash in request_firmware */
71 if (!current->fs || !fw_name)
72 return NULL;
73
74 fw = nvgpu_kzalloc(g, sizeof(*fw));
75 if (!fw)
76 return NULL;
77
78 linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
79
80#ifdef CONFIG_TEGRA_GK20A
81 /* TO BE REMOVED - Support loading from legacy SOC specific path. */
82 if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
83 struct gk20a_platform *platform = gk20a_get_platform(dev);
84 linux_fw = do_request_firmware(dev,
85 platform->soc_name, fw_name, flags);
86 }
87#endif
88
89 if (!linux_fw)
90 goto err;
91
92 fw->data = nvgpu_kmalloc(g, linux_fw->size);
93 if (!fw->data)
94 goto err_release;
95
96 memcpy(fw->data, linux_fw->data, linux_fw->size);
97 fw->size = linux_fw->size;
98
99 release_firmware(linux_fw);
100
101 return fw;
102
103err_release:
104 release_firmware(linux_fw);
105err:
106 nvgpu_kfree(g, fw);
107 return NULL;
108}
109
110void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
111{
112 if(!fw)
113 return;
114
115 nvgpu_kfree(g, fw->data);
116 nvgpu_kfree(g, fw);
117}
diff --git a/drivers/gpu/nvgpu/common/linux/fuse.c b/drivers/gpu/nvgpu/common/linux/fuse.c
deleted file mode 100644
index 27851f92..00000000
--- a/drivers/gpu/nvgpu/common/linux/fuse.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <soc/tegra/fuse.h>
15
16#include <nvgpu/fuse.h>
17
18int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g)
19{
20 return tegra_sku_info.gpu_speedo_id;
21}
22
23/*
24 * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
25 * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
26 */
27void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
28{
29 tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
30}
31
32void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
33{
34 tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
35}
36
37void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
38{
39 tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
40}
41
42void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
43{
44 tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
45}
46
47int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
48{
49 return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
50}
51
52int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
53{
54 return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
55}
diff --git a/drivers/gpu/nvgpu/common/linux/intr.c b/drivers/gpu/nvgpu/common/linux/intr.c
deleted file mode 100644
index 7ffc7e87..00000000
--- a/drivers/gpu/nvgpu/common/linux/intr.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <trace/events/gk20a.h>
15#include <linux/irqreturn.h>
16
17#include "gk20a/gk20a.h"
18#include "gk20a/mc_gk20a.h"
19
20#include <nvgpu/atomic.h>
21#include <nvgpu/unit.h>
22#include "os_linux.h"
23
24irqreturn_t nvgpu_intr_stall(struct gk20a *g)
25{
26 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
27 u32 mc_intr_0;
28
29 trace_mc_gk20a_intr_stall(g->name);
30
31 if (!g->power_on)
32 return IRQ_NONE;
33
34 /* not from gpu when sharing irq with others */
35 mc_intr_0 = g->ops.mc.intr_stall(g);
36 if (unlikely(!mc_intr_0))
37 return IRQ_NONE;
38
39 g->ops.mc.intr_stall_pause(g);
40
41 atomic_inc(&l->hw_irq_stall_count);
42
43 trace_mc_gk20a_intr_stall_done(g->name);
44
45 return IRQ_WAKE_THREAD;
46}
47
48irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
49{
50 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
51 int hw_irq_count;
52
53 nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
54
55 trace_mc_gk20a_intr_thread_stall(g->name);
56
57 hw_irq_count = atomic_read(&l->hw_irq_stall_count);
58 g->ops.mc.isr_stall(g);
59 g->ops.mc.intr_stall_resume(g);
60 /* sync handled irq counter before re-enabling interrupts */
61 atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count);
62
63 nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq);
64
65 trace_mc_gk20a_intr_thread_stall_done(g->name);
66
67 return IRQ_HANDLED;
68}
69
70irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
71{
72 u32 non_stall_intr_val;
73 u32 hw_irq_count;
74 int ops_old, ops_new, ops = 0;
75 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
76
77 if (!g->power_on)
78 return IRQ_NONE;
79
80 /* not from gpu when sharing irq with others */
81 non_stall_intr_val = g->ops.mc.intr_nonstall(g);
82 if (unlikely(!non_stall_intr_val))
83 return IRQ_NONE;
84
85 g->ops.mc.intr_nonstall_pause(g);
86
87 ops = g->ops.mc.isr_nonstall(g);
88 if (ops) {
89 do {
90 ops_old = atomic_read(&l->nonstall_ops);
91 ops_new = ops_old | ops;
92 } while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
93 ops_old, ops_new));
94
95 queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
96 }
97
98 hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count);
99
100 /* sync handled irq counter before re-enabling interrupts */
101 atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count);
102
103 g->ops.mc.intr_nonstall_resume(g);
104
105 nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq);
106
107 return IRQ_HANDLED;
108}
109
110void nvgpu_intr_nonstall_cb(struct work_struct *work)
111{
112 struct nvgpu_os_linux *l =
113 container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
114 struct gk20a *g = &l->g;
115
116 do {
117 u32 ops;
118
119 ops = atomic_xchg(&l->nonstall_ops, 0);
120 mc_gk20a_handle_intr_nonstall(g, ops);
121 } while (atomic_read(&l->nonstall_ops) != 0);
122}
diff --git a/drivers/gpu/nvgpu/common/linux/intr.h b/drivers/gpu/nvgpu/common/linux/intr.h
deleted file mode 100644
index d43cdccb..00000000
--- a/drivers/gpu/nvgpu/common/linux/intr.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#ifndef __NVGPU_LINUX_INTR_H__
15#define __NVGPU_LINUX_INTR_H__
16struct gk20a;
17
18irqreturn_t nvgpu_intr_stall(struct gk20a *g);
19irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g);
20irqreturn_t nvgpu_intr_nonstall(struct gk20a *g);
21void nvgpu_intr_nonstall_cb(struct work_struct *work);
22#endif
diff --git a/drivers/gpu/nvgpu/common/linux/io.c b/drivers/gpu/nvgpu/common/linux/io.c
deleted file mode 100644
index c06512a5..00000000
--- a/drivers/gpu/nvgpu/common/linux/io.c
+++ /dev/null
@@ -1,118 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/io.h>
15#include <nvgpu/types.h>
16
17#include "os_linux.h"
18#include "gk20a/gk20a.h"
19
20void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
21{
22 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
23
24 if (unlikely(!l->regs)) {
25 __gk20a_warn_on_no_regs();
26 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
27 } else {
28 writel_relaxed(v, l->regs + r);
29 nvgpu_wmb();
30 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
31 }
32}
33
34u32 nvgpu_readl(struct gk20a *g, u32 r)
35{
36 u32 v = __nvgpu_readl(g, r);
37
38 if (v == 0xffffffff)
39 __nvgpu_check_gpu_state(g);
40
41 return v;
42}
43
44u32 __nvgpu_readl(struct gk20a *g, u32 r)
45{
46 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
47 u32 v = 0xffffffff;
48
49 if (unlikely(!l->regs)) {
50 __gk20a_warn_on_no_regs();
51 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
52 } else {
53 v = readl(l->regs + r);
54 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
55 }
56
57 return v;
58}
59
60void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
61{
62 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
63
64 if (unlikely(!l->regs)) {
65 __gk20a_warn_on_no_regs();
66 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
67 } else {
68 nvgpu_wmb();
69 do {
70 writel_relaxed(v, l->regs + r);
71 } while (readl(l->regs + r) != v);
72 nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
73 }
74}
75
76void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
77{
78 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
79
80 if (unlikely(!l->bar1)) {
81 __gk20a_warn_on_no_regs();
82 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
83 } else {
84 nvgpu_wmb();
85 writel_relaxed(v, l->bar1 + b);
86 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
87 }
88}
89
90u32 nvgpu_bar1_readl(struct gk20a *g, u32 b)
91{
92 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
93 u32 v = 0xffffffff;
94
95 if (unlikely(!l->bar1)) {
96 __gk20a_warn_on_no_regs();
97 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
98 } else {
99 v = readl(l->bar1 + b);
100 nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
101 }
102
103 return v;
104}
105
106bool nvgpu_io_exists(struct gk20a *g)
107{
108 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
109
110 return l->regs != NULL;
111}
112
113bool nvgpu_io_valid_reg(struct gk20a *g, u32 r)
114{
115 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
116
117 return r < resource_size(l->regs);
118}
diff --git a/drivers/gpu/nvgpu/common/linux/io_usermode.c b/drivers/gpu/nvgpu/common/linux/io_usermode.c
deleted file mode 100644
index a7b728dd..00000000
--- a/drivers/gpu/nvgpu/common/linux/io_usermode.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/io.h>
15#include <nvgpu/types.h>
16
17#include "common/linux/os_linux.h"
18#include "gk20a/gk20a.h"
19
20#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
21
22void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
23{
24 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
25 void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r());
26
27 writel_relaxed(v, reg);
28 nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
29}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl.c b/drivers/gpu/nvgpu/common/linux/ioctl.c
deleted file mode 100644
index 359e5103..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl.c
+++ /dev/null
@@ -1,296 +0,0 @@
1/*
2 * NVGPU IOCTLs
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/file.h>
20
21#include <nvgpu/nvgpu_common.h>
22#include <nvgpu/ctxsw_trace.h>
23
24#include "gk20a/gk20a.h"
25#include "gk20a/dbg_gpu_gk20a.h"
26
27#include "ioctl_channel.h"
28#include "ioctl_ctrl.h"
29#include "ioctl_as.h"
30#include "ioctl_tsg.h"
31#include "ioctl_dbg.h"
32#include "module.h"
33#include "os_linux.h"
34#include "ctxsw_trace.h"
35#include "platform_gk20a.h"
36
37#define GK20A_NUM_CDEVS 7
38
39const struct file_operations gk20a_channel_ops = {
40 .owner = THIS_MODULE,
41 .release = gk20a_channel_release,
42 .open = gk20a_channel_open,
43#ifdef CONFIG_COMPAT
44 .compat_ioctl = gk20a_channel_ioctl,
45#endif
46 .unlocked_ioctl = gk20a_channel_ioctl,
47};
48
49static const struct file_operations gk20a_ctrl_ops = {
50 .owner = THIS_MODULE,
51 .release = gk20a_ctrl_dev_release,
52 .open = gk20a_ctrl_dev_open,
53 .unlocked_ioctl = gk20a_ctrl_dev_ioctl,
54#ifdef CONFIG_COMPAT
55 .compat_ioctl = gk20a_ctrl_dev_ioctl,
56#endif
57};
58
59static const struct file_operations gk20a_dbg_ops = {
60 .owner = THIS_MODULE,
61 .release = gk20a_dbg_gpu_dev_release,
62 .open = gk20a_dbg_gpu_dev_open,
63 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
64 .poll = gk20a_dbg_gpu_dev_poll,
65#ifdef CONFIG_COMPAT
66 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
67#endif
68};
69
70static const struct file_operations gk20a_as_ops = {
71 .owner = THIS_MODULE,
72 .release = gk20a_as_dev_release,
73 .open = gk20a_as_dev_open,
74#ifdef CONFIG_COMPAT
75 .compat_ioctl = gk20a_as_dev_ioctl,
76#endif
77 .unlocked_ioctl = gk20a_as_dev_ioctl,
78};
79
80/*
81 * Note: We use a different 'open' to trigger handling of the profiler session.
82 * Most of the code is shared between them... Though, at some point if the
83 * code does get too tangled trying to handle each in the same path we can
84 * separate them cleanly.
85 */
86static const struct file_operations gk20a_prof_ops = {
87 .owner = THIS_MODULE,
88 .release = gk20a_dbg_gpu_dev_release,
89 .open = gk20a_prof_gpu_dev_open,
90 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
91#ifdef CONFIG_COMPAT
92 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
93#endif
94};
95
96static const struct file_operations gk20a_tsg_ops = {
97 .owner = THIS_MODULE,
98 .release = nvgpu_ioctl_tsg_dev_release,
99 .open = nvgpu_ioctl_tsg_dev_open,
100#ifdef CONFIG_COMPAT
101 .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
102#endif
103 .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
104};
105
106#ifdef CONFIG_GK20A_CTXSW_TRACE
107static const struct file_operations gk20a_ctxsw_ops = {
108 .owner = THIS_MODULE,
109 .release = gk20a_ctxsw_dev_release,
110 .open = gk20a_ctxsw_dev_open,
111#ifdef CONFIG_COMPAT
112 .compat_ioctl = gk20a_ctxsw_dev_ioctl,
113#endif
114 .unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
115 .poll = gk20a_ctxsw_dev_poll,
116 .read = gk20a_ctxsw_dev_read,
117 .mmap = gk20a_ctxsw_dev_mmap,
118};
119#endif
120
121static const struct file_operations gk20a_sched_ops = {
122 .owner = THIS_MODULE,
123 .release = gk20a_sched_dev_release,
124 .open = gk20a_sched_dev_open,
125#ifdef CONFIG_COMPAT
126 .compat_ioctl = gk20a_sched_dev_ioctl,
127#endif
128 .unlocked_ioctl = gk20a_sched_dev_ioctl,
129 .poll = gk20a_sched_dev_poll,
130 .read = gk20a_sched_dev_read,
131};
132
133static int gk20a_create_device(
134 struct device *dev, int devno,
135 const char *interface_name, const char *cdev_name,
136 struct cdev *cdev, struct device **out,
137 const struct file_operations *ops,
138 struct class *class)
139{
140 struct device *subdev;
141 int err;
142 struct gk20a *g = gk20a_from_dev(dev);
143
144 nvgpu_log_fn(g, " ");
145
146 cdev_init(cdev, ops);
147 cdev->owner = THIS_MODULE;
148
149 err = cdev_add(cdev, devno, 1);
150 if (err) {
151 dev_err(dev, "failed to add %s cdev\n", cdev_name);
152 return err;
153 }
154
155 subdev = device_create(class, NULL, devno, NULL,
156 interface_name, cdev_name);
157
158 if (IS_ERR(subdev)) {
159 err = PTR_ERR(dev);
160 cdev_del(cdev);
161 dev_err(dev, "failed to create %s device for %s\n",
162 cdev_name, dev_name(dev));
163 return err;
164 }
165
166 *out = subdev;
167 return 0;
168}
169
170void gk20a_user_deinit(struct device *dev, struct class *class)
171{
172 struct gk20a *g = gk20a_from_dev(dev);
173 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
174
175 if (l->channel.node) {
176 device_destroy(class, l->channel.cdev.dev);
177 cdev_del(&l->channel.cdev);
178 }
179
180 if (l->as_dev.node) {
181 device_destroy(class, l->as_dev.cdev.dev);
182 cdev_del(&l->as_dev.cdev);
183 }
184
185 if (l->ctrl.node) {
186 device_destroy(class, l->ctrl.cdev.dev);
187 cdev_del(&l->ctrl.cdev);
188 }
189
190 if (l->dbg.node) {
191 device_destroy(class, l->dbg.cdev.dev);
192 cdev_del(&l->dbg.cdev);
193 }
194
195 if (l->prof.node) {
196 device_destroy(class, l->prof.cdev.dev);
197 cdev_del(&l->prof.cdev);
198 }
199
200 if (l->tsg.node) {
201 device_destroy(class, l->tsg.cdev.dev);
202 cdev_del(&l->tsg.cdev);
203 }
204
205 if (l->ctxsw.node) {
206 device_destroy(class, l->ctxsw.cdev.dev);
207 cdev_del(&l->ctxsw.cdev);
208 }
209
210 if (l->sched.node) {
211 device_destroy(class, l->sched.cdev.dev);
212 cdev_del(&l->sched.cdev);
213 }
214
215 if (l->cdev_region)
216 unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS);
217}
218
219int gk20a_user_init(struct device *dev, const char *interface_name,
220 struct class *class)
221{
222 int err;
223 dev_t devno;
224 struct gk20a *g = gk20a_from_dev(dev);
225 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
226
227 err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
228 if (err) {
229 dev_err(dev, "failed to allocate devno\n");
230 goto fail;
231 }
232 l->cdev_region = devno;
233
234 err = gk20a_create_device(dev, devno++, interface_name, "",
235 &l->channel.cdev, &l->channel.node,
236 &gk20a_channel_ops,
237 class);
238 if (err)
239 goto fail;
240
241 err = gk20a_create_device(dev, devno++, interface_name, "-as",
242 &l->as_dev.cdev, &l->as_dev.node,
243 &gk20a_as_ops,
244 class);
245 if (err)
246 goto fail;
247
248 err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
249 &l->ctrl.cdev, &l->ctrl.node,
250 &gk20a_ctrl_ops,
251 class);
252 if (err)
253 goto fail;
254
255 err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
256 &l->dbg.cdev, &l->dbg.node,
257 &gk20a_dbg_ops,
258 class);
259 if (err)
260 goto fail;
261
262 err = gk20a_create_device(dev, devno++, interface_name, "-prof",
263 &l->prof.cdev, &l->prof.node,
264 &gk20a_prof_ops,
265 class);
266 if (err)
267 goto fail;
268
269 err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
270 &l->tsg.cdev, &l->tsg.node,
271 &gk20a_tsg_ops,
272 class);
273 if (err)
274 goto fail;
275
276#if defined(CONFIG_GK20A_CTXSW_TRACE)
277 err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
278 &l->ctxsw.cdev, &l->ctxsw.node,
279 &gk20a_ctxsw_ops,
280 class);
281 if (err)
282 goto fail;
283#endif
284
285 err = gk20a_create_device(dev, devno++, interface_name, "-sched",
286 &l->sched.cdev, &l->sched.node,
287 &gk20a_sched_ops,
288 class);
289 if (err)
290 goto fail;
291
292 return 0;
293fail:
294 gk20a_user_deinit(dev, &nvgpu_class);
295 return err;
296}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl.h b/drivers/gpu/nvgpu/common/linux/ioctl.h
deleted file mode 100644
index 7bf16711..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_IOCTL_H__
14#define __NVGPU_IOCTL_H__
15
16struct device;
17struct class;
18
19int gk20a_user_init(struct device *dev, const char *interface_name,
20 struct class *class);
21void gk20a_user_deinit(struct device *dev, struct class *class);
22
23#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
deleted file mode 100644
index 47f612cc..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ /dev/null
@@ -1,423 +0,0 @@
1/*
2 * GK20A Address Spaces
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/cdev.h>
17#include <linux/uaccess.h>
18#include <linux/fs.h>
19
20#include <trace/events/gk20a.h>
21
22#include <uapi/linux/nvgpu.h>
23
24#include <nvgpu/gmmu.h>
25#include <nvgpu/vm_area.h>
26#include <nvgpu/log2.h>
27
28#include <nvgpu/linux/vm.h>
29
30#include "gk20a/gk20a.h"
31#include "platform_gk20a.h"
32#include "ioctl_as.h"
33#include "os_linux.h"
34
35static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
36{
37 u32 core_flags = 0;
38
39 if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
40 core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
41 if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
42 core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
43
44 return core_flags;
45}
46
47static int gk20a_as_ioctl_bind_channel(
48 struct gk20a_as_share *as_share,
49 struct nvgpu_as_bind_channel_args *args)
50{
51 int err = 0;
52 struct channel_gk20a *ch;
53 struct gk20a *g = gk20a_from_vm(as_share->vm);
54
55 nvgpu_log_fn(g, " ");
56
57 ch = gk20a_get_channel_from_file(args->channel_fd);
58 if (!ch)
59 return -EINVAL;
60
61 if (gk20a_channel_as_bound(ch)) {
62 err = -EINVAL;
63 goto out;
64 }
65
66 /* this will set channel_gk20a->vm */
67 err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
68
69out:
70 gk20a_channel_put(ch);
71 return err;
72}
73
74static int gk20a_as_ioctl_alloc_space(
75 struct gk20a_as_share *as_share,
76 struct nvgpu_as_alloc_space_args *args)
77{
78 struct gk20a *g = gk20a_from_vm(as_share->vm);
79
80 nvgpu_log_fn(g, " ");
81 return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
82 &args->o_a.offset,
83 gk20a_as_translate_as_alloc_space_flags(g,
84 args->flags));
85}
86
87static int gk20a_as_ioctl_free_space(
88 struct gk20a_as_share *as_share,
89 struct nvgpu_as_free_space_args *args)
90{
91 struct gk20a *g = gk20a_from_vm(as_share->vm);
92
93 nvgpu_log_fn(g, " ");
94 return nvgpu_vm_area_free(as_share->vm, args->offset);
95}
96
97static int gk20a_as_ioctl_map_buffer_ex(
98 struct gk20a_as_share *as_share,
99 struct nvgpu_as_map_buffer_ex_args *args)
100{
101 struct gk20a *g = gk20a_from_vm(as_share->vm);
102
103 nvgpu_log_fn(g, " ");
104
105 /* unsupported, direct kind control must be used */
106 if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
107 struct gk20a *g = as_share->vm->mm->g;
108 nvgpu_log_info(g, "Direct kind control must be requested");
109 return -EINVAL;
110 }
111
112 return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
113 &args->offset, args->flags,
114 args->compr_kind,
115 args->incompr_kind,
116 args->buffer_offset,
117 args->mapping_size,
118 NULL);
119}
120
121static int gk20a_as_ioctl_unmap_buffer(
122 struct gk20a_as_share *as_share,
123 struct nvgpu_as_unmap_buffer_args *args)
124{
125 struct gk20a *g = gk20a_from_vm(as_share->vm);
126
127 nvgpu_log_fn(g, " ");
128
129 nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
130
131 return 0;
132}
133
134static int gk20a_as_ioctl_map_buffer_batch(
135 struct gk20a_as_share *as_share,
136 struct nvgpu_as_map_buffer_batch_args *args)
137{
138 struct gk20a *g = gk20a_from_vm(as_share->vm);
139 u32 i;
140 int err = 0;
141
142 struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
143 (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
144 args->unmaps;
145 struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
146 (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
147 args->maps;
148
149 struct vm_gk20a_mapping_batch batch;
150
151 nvgpu_log_fn(g, " ");
152
153 if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
154 args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
155 return -EINVAL;
156
157 nvgpu_vm_mapping_batch_start(&batch);
158
159 for (i = 0; i < args->num_unmaps; ++i) {
160 struct nvgpu_as_unmap_buffer_args unmap_args;
161
162 if (copy_from_user(&unmap_args, &user_unmap_args[i],
163 sizeof(unmap_args))) {
164 err = -EFAULT;
165 break;
166 }
167
168 nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
169 }
170
171 if (err) {
172 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
173
174 args->num_unmaps = i;
175 args->num_maps = 0;
176 return err;
177 }
178
179 for (i = 0; i < args->num_maps; ++i) {
180 s16 compressible_kind;
181 s16 incompressible_kind;
182
183 struct nvgpu_as_map_buffer_ex_args map_args;
184 memset(&map_args, 0, sizeof(map_args));
185
186 if (copy_from_user(&map_args, &user_map_args[i],
187 sizeof(map_args))) {
188 err = -EFAULT;
189 break;
190 }
191
192 if (map_args.flags &
193 NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
194 compressible_kind = map_args.compr_kind;
195 incompressible_kind = map_args.incompr_kind;
196 } else {
197 /* direct kind control must be used */
198 err = -EINVAL;
199 break;
200 }
201
202 err = nvgpu_vm_map_buffer(
203 as_share->vm, map_args.dmabuf_fd,
204 &map_args.offset, map_args.flags,
205 compressible_kind, incompressible_kind,
206 map_args.buffer_offset,
207 map_args.mapping_size,
208 &batch);
209 if (err)
210 break;
211 }
212
213 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
214
215 if (err)
216 args->num_maps = i;
217 /* note: args->num_unmaps will be unmodified, which is ok
218 * since all unmaps are done */
219
220 return err;
221}
222
223static int gk20a_as_ioctl_get_va_regions(
224 struct gk20a_as_share *as_share,
225 struct nvgpu_as_get_va_regions_args *args)
226{
227 unsigned int i;
228 unsigned int write_entries;
229 struct nvgpu_as_va_region __user *user_region_ptr;
230 struct vm_gk20a *vm = as_share->vm;
231 struct gk20a *g = gk20a_from_vm(vm);
232 unsigned int page_sizes = gmmu_page_size_kernel;
233
234 nvgpu_log_fn(g, " ");
235
236 if (!vm->big_pages)
237 page_sizes--;
238
239 write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
240 if (write_entries > page_sizes)
241 write_entries = page_sizes;
242
243 user_region_ptr =
244 (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
245
246 for (i = 0; i < write_entries; ++i) {
247 struct nvgpu_as_va_region region;
248 struct nvgpu_allocator *vma = vm->vma[i];
249
250 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
251
252 region.page_size = vm->gmmu_page_sizes[i];
253 region.offset = nvgpu_alloc_base(vma);
254 /* No __aeabi_uldivmod() on some platforms... */
255 region.pages = (nvgpu_alloc_end(vma) -
256 nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
257
258 if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
259 return -EFAULT;
260 }
261
262 args->buf_size =
263 page_sizes * sizeof(struct nvgpu_as_va_region);
264
265 return 0;
266}
267
268static int nvgpu_as_ioctl_get_sync_ro_map(
269 struct gk20a_as_share *as_share,
270 struct nvgpu_as_get_sync_ro_map_args *args)
271{
272#ifdef CONFIG_TEGRA_GK20A_NVHOST
273 struct vm_gk20a *vm = as_share->vm;
274 struct gk20a *g = gk20a_from_vm(vm);
275 u64 base_gpuva;
276 u32 sync_size;
277 int err = 0;
278
279 if (!g->ops.fifo.get_sync_ro_map)
280 return -EINVAL;
281
282 if (!gk20a_platform_has_syncpoints(g))
283 return -EINVAL;
284
285 err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size);
286 if (err)
287 return err;
288
289 args->base_gpuva = base_gpuva;
290 args->sync_size = sync_size;
291
292 return err;
293#else
294 return -EINVAL;
295#endif
296}
297
298int gk20a_as_dev_open(struct inode *inode, struct file *filp)
299{
300 struct nvgpu_os_linux *l;
301 struct gk20a_as_share *as_share;
302 struct gk20a *g;
303 int err;
304
305 l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev);
306 g = &l->g;
307
308 nvgpu_log_fn(g, " ");
309
310 err = gk20a_as_alloc_share(g, 0, 0, &as_share);
311 if (err) {
312 nvgpu_log_fn(g, "failed to alloc share");
313 return err;
314 }
315
316 filp->private_data = as_share;
317 return 0;
318}
319
320int gk20a_as_dev_release(struct inode *inode, struct file *filp)
321{
322 struct gk20a_as_share *as_share = filp->private_data;
323
324 if (!as_share)
325 return 0;
326
327 return gk20a_as_release_share(as_share);
328}
329
330long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
331{
332 int err = 0;
333 struct gk20a_as_share *as_share = filp->private_data;
334 struct gk20a *g = gk20a_from_as(as_share->as);
335
336 u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
337
338 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
339
340 if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
341 (_IOC_NR(cmd) == 0) ||
342 (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
343 (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
344 return -EINVAL;
345
346 memset(buf, 0, sizeof(buf));
347 if (_IOC_DIR(cmd) & _IOC_WRITE) {
348 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
349 return -EFAULT;
350 }
351
352 err = gk20a_busy(g);
353 if (err)
354 return err;
355
356 switch (cmd) {
357 case NVGPU_AS_IOCTL_BIND_CHANNEL:
358 trace_gk20a_as_ioctl_bind_channel(g->name);
359 err = gk20a_as_ioctl_bind_channel(as_share,
360 (struct nvgpu_as_bind_channel_args *)buf);
361
362 break;
363 case NVGPU32_AS_IOCTL_ALLOC_SPACE:
364 {
365 struct nvgpu32_as_alloc_space_args *args32 =
366 (struct nvgpu32_as_alloc_space_args *)buf;
367 struct nvgpu_as_alloc_space_args args;
368
369 args.pages = args32->pages;
370 args.page_size = args32->page_size;
371 args.flags = args32->flags;
372 args.o_a.offset = args32->o_a.offset;
373 trace_gk20a_as_ioctl_alloc_space(g->name);
374 err = gk20a_as_ioctl_alloc_space(as_share, &args);
375 args32->o_a.offset = args.o_a.offset;
376 break;
377 }
378 case NVGPU_AS_IOCTL_ALLOC_SPACE:
379 trace_gk20a_as_ioctl_alloc_space(g->name);
380 err = gk20a_as_ioctl_alloc_space(as_share,
381 (struct nvgpu_as_alloc_space_args *)buf);
382 break;
383 case NVGPU_AS_IOCTL_FREE_SPACE:
384 trace_gk20a_as_ioctl_free_space(g->name);
385 err = gk20a_as_ioctl_free_space(as_share,
386 (struct nvgpu_as_free_space_args *)buf);
387 break;
388 case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
389 trace_gk20a_as_ioctl_map_buffer(g->name);
390 err = gk20a_as_ioctl_map_buffer_ex(as_share,
391 (struct nvgpu_as_map_buffer_ex_args *)buf);
392 break;
393 case NVGPU_AS_IOCTL_UNMAP_BUFFER:
394 trace_gk20a_as_ioctl_unmap_buffer(g->name);
395 err = gk20a_as_ioctl_unmap_buffer(as_share,
396 (struct nvgpu_as_unmap_buffer_args *)buf);
397 break;
398 case NVGPU_AS_IOCTL_GET_VA_REGIONS:
399 trace_gk20a_as_ioctl_get_va_regions(g->name);
400 err = gk20a_as_ioctl_get_va_regions(as_share,
401 (struct nvgpu_as_get_va_regions_args *)buf);
402 break;
403 case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
404 err = gk20a_as_ioctl_map_buffer_batch(as_share,
405 (struct nvgpu_as_map_buffer_batch_args *)buf);
406 break;
407 case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
408 err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
409 (struct nvgpu_as_get_sync_ro_map_args *)buf);
410 break;
411 default:
412 err = -ENOTTY;
413 break;
414 }
415
416 gk20a_idle(g);
417
418 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
419 if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
420 err = -EFAULT;
421
422 return err;
423}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.h b/drivers/gpu/nvgpu/common/linux/ioctl_as.h
deleted file mode 100644
index b3de3782..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * GK20A Address Spaces
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15#ifndef __NVGPU_COMMON_LINUX_AS_H__
16#define __NVGPU_COMMON_LINUX_AS_H__
17
18struct inode;
19struct file;
20
21/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
22 * num_maps */
23#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256
24
25/* struct file_operations driver interface */
26int gk20a_as_dev_open(struct inode *inode, struct file *filp);
27int gk20a_as_dev_release(struct inode *inode, struct file *filp);
28long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
29
30#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
deleted file mode 100644
index b04bb9de..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ /dev/null
@@ -1,1388 +0,0 @@
1/*
2 * GK20A Graphics channel
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <trace/events/gk20a.h>
20#include <linux/file.h>
21#include <linux/anon_inodes.h>
22#include <linux/dma-buf.h>
23#include <linux/poll.h>
24#include <uapi/linux/nvgpu.h>
25
26#include <nvgpu/semaphore.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/kmem.h>
29#include <nvgpu/log.h>
30#include <nvgpu/list.h>
31#include <nvgpu/debug.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/error_notifier.h>
34#include <nvgpu/barrier.h>
35#include <nvgpu/nvhost.h>
36#include <nvgpu/os_sched.h>
37
38#include "gk20a/gk20a.h"
39#include "gk20a/dbg_gpu_gk20a.h"
40#include "gk20a/fence_gk20a.h"
41
42#include "platform_gk20a.h"
43#include "ioctl_channel.h"
44#include "channel.h"
45#include "os_linux.h"
46#include "ctxsw_trace.h"
47
48/* the minimal size of client buffer */
49#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
50 (sizeof(struct gk20a_cs_snapshot_fifo) + \
51 sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
52
53static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
54{
55 switch (graphics_preempt_mode) {
56 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
57 return "WFI";
58 default:
59 return "?";
60 }
61}
62
63static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode)
64{
65 switch (compute_preempt_mode) {
66 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
67 return "WFI";
68 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
69 return "CTA";
70 default:
71 return "?";
72 }
73}
74
75static void gk20a_channel_trace_sched_param(
76 void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice,
77 u32 timeout, const char *interleave,
78 const char *graphics_preempt_mode,
79 const char *compute_preempt_mode),
80 struct channel_gk20a *ch)
81{
82 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
83
84 if (!tsg)
85 return;
86
87 (trace)(ch->chid, ch->tsgid, ch->pid,
88 tsg_gk20a_from_ch(ch)->timeslice_us,
89 ch->timeout_ms_max,
90 gk20a_fifo_interleave_level_name(tsg->interleave_level),
91 gr_gk20a_graphics_preempt_mode_name(
92 tsg->gr_ctx.graphics_preempt_mode),
93 gr_gk20a_compute_preempt_mode_name(
94 tsg->gr_ctx.compute_preempt_mode));
95}
96
97/*
98 * Although channels do have pointers back to the gk20a struct that they were
99 * created under in cases where the driver is killed that pointer can be bad.
100 * The channel memory can be freed before the release() function for a given
101 * channel is called. This happens when the driver dies and userspace doesn't
102 * get a chance to call release() until after the entire gk20a driver data is
103 * unloaded and freed.
104 */
105struct channel_priv {
106 struct gk20a *g;
107 struct channel_gk20a *c;
108};
109
110#if defined(CONFIG_GK20A_CYCLE_STATS)
111
112void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
113{
114 struct nvgpu_channel_linux *priv = ch->os_priv;
115
116 /* disable existing cyclestats buffer */
117 nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
118 if (priv->cyclestate_buffer_handler) {
119 dma_buf_vunmap(priv->cyclestate_buffer_handler,
120 ch->cyclestate.cyclestate_buffer);
121 dma_buf_put(priv->cyclestate_buffer_handler);
122 priv->cyclestate_buffer_handler = NULL;
123 ch->cyclestate.cyclestate_buffer = NULL;
124 ch->cyclestate.cyclestate_buffer_size = 0;
125 }
126 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
127}
128
129static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
130 struct nvgpu_cycle_stats_args *args)
131{
132 struct dma_buf *dmabuf;
133 void *virtual_address;
134 struct nvgpu_channel_linux *priv = ch->os_priv;
135
136 /* is it allowed to handle calls for current GPU? */
137 if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS))
138 return -ENOSYS;
139
140 if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) {
141
142 /* set up new cyclestats buffer */
143 dmabuf = dma_buf_get(args->dmabuf_fd);
144 if (IS_ERR(dmabuf))
145 return PTR_ERR(dmabuf);
146 virtual_address = dma_buf_vmap(dmabuf);
147 if (!virtual_address)
148 return -ENOMEM;
149
150 priv->cyclestate_buffer_handler = dmabuf;
151 ch->cyclestate.cyclestate_buffer = virtual_address;
152 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
153 return 0;
154
155 } else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) {
156 gk20a_channel_free_cycle_stats_buffer(ch);
157 return 0;
158
159 } else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) {
160 /* no requst from GL */
161 return 0;
162
163 } else {
164 pr_err("channel already has cyclestats buffer\n");
165 return -EINVAL;
166 }
167}
168
169static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
170{
171 int ret;
172
173 nvgpu_mutex_acquire(&ch->cs_client_mutex);
174 if (ch->cs_client)
175 ret = gr_gk20a_css_flush(ch, ch->cs_client);
176 else
177 ret = -EBADF;
178 nvgpu_mutex_release(&ch->cs_client_mutex);
179
180 return ret;
181}
182
183static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
184 u32 dmabuf_fd,
185 u32 perfmon_id_count,
186 u32 *perfmon_id_start)
187{
188 int ret = 0;
189 struct gk20a *g = ch->g;
190 struct gk20a_cs_snapshot_client_linux *client_linux;
191 struct gk20a_cs_snapshot_client *client;
192
193 nvgpu_mutex_acquire(&ch->cs_client_mutex);
194 if (ch->cs_client) {
195 nvgpu_mutex_release(&ch->cs_client_mutex);
196 return -EEXIST;
197 }
198
199 client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
200 if (!client_linux) {
201 ret = -ENOMEM;
202 goto err;
203 }
204
205 client_linux->dmabuf_fd = dmabuf_fd;
206 client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
207 if (IS_ERR(client_linux->dma_handler)) {
208 ret = PTR_ERR(client_linux->dma_handler);
209 client_linux->dma_handler = NULL;
210 goto err_free;
211 }
212
213 client = &client_linux->cs_client;
214 client->snapshot_size = client_linux->dma_handler->size;
215 if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
216 ret = -ENOMEM;
217 goto err_put;
218 }
219
220 client->snapshot = (struct gk20a_cs_snapshot_fifo *)
221 dma_buf_vmap(client_linux->dma_handler);
222 if (!client->snapshot) {
223 ret = -ENOMEM;
224 goto err_put;
225 }
226
227 ch->cs_client = client;
228
229 ret = gr_gk20a_css_attach(ch,
230 perfmon_id_count,
231 perfmon_id_start,
232 ch->cs_client);
233
234 nvgpu_mutex_release(&ch->cs_client_mutex);
235
236 return ret;
237
238err_put:
239 dma_buf_put(client_linux->dma_handler);
240err_free:
241 nvgpu_kfree(g, client_linux);
242err:
243 nvgpu_mutex_release(&ch->cs_client_mutex);
244 return ret;
245}
246
247int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
248{
249 int ret;
250 struct gk20a_cs_snapshot_client_linux *client_linux;
251
252 nvgpu_mutex_acquire(&ch->cs_client_mutex);
253 if (!ch->cs_client) {
254 nvgpu_mutex_release(&ch->cs_client_mutex);
255 return 0;
256 }
257
258 client_linux = container_of(ch->cs_client,
259 struct gk20a_cs_snapshot_client_linux,
260 cs_client);
261
262 ret = gr_gk20a_css_detach(ch, ch->cs_client);
263
264 if (client_linux->dma_handler) {
265 if (ch->cs_client->snapshot)
266 dma_buf_vunmap(client_linux->dma_handler,
267 ch->cs_client->snapshot);
268 dma_buf_put(client_linux->dma_handler);
269 }
270
271 ch->cs_client = NULL;
272 nvgpu_kfree(ch->g, client_linux);
273
274 nvgpu_mutex_release(&ch->cs_client_mutex);
275
276 return ret;
277}
278
279static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
280 struct nvgpu_cycle_stats_snapshot_args *args)
281{
282 int ret;
283
284 /* is it allowed to handle calls for current GPU? */
285 if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT))
286 return -ENOSYS;
287
288 if (!args->dmabuf_fd)
289 return -EINVAL;
290
291 /* handle the command (most frequent cases first) */
292 switch (args->cmd) {
293 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
294 ret = gk20a_flush_cycle_stats_snapshot(ch);
295 args->extra = 0;
296 break;
297
298 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
299 ret = gk20a_attach_cycle_stats_snapshot(ch,
300 args->dmabuf_fd,
301 args->extra,
302 &args->extra);
303 break;
304
305 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
306 ret = gk20a_channel_free_cycle_stats_snapshot(ch);
307 args->extra = 0;
308 break;
309
310 default:
311 pr_err("cyclestats: unknown command %u\n", args->cmd);
312 ret = -EINVAL;
313 break;
314 }
315
316 return ret;
317}
318#endif
319
320static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
321 struct nvgpu_channel_wdt_args *args)
322{
323 u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT |
324 NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
325
326 if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
327 ch->timeout.enabled = false;
328 else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
329 ch->timeout.enabled = true;
330 else
331 return -EINVAL;
332
333 if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
334 ch->timeout.limit_ms = args->timeout_ms;
335
336 ch->timeout.debug_dump = (args->wdt_status &
337 NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
338
339 return 0;
340}
341
342static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
343{
344 struct nvgpu_channel_linux *priv = ch->os_priv;
345
346 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
347 if (priv->error_notifier.dmabuf) {
348 dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
349 dma_buf_put(priv->error_notifier.dmabuf);
350 priv->error_notifier.dmabuf = NULL;
351 priv->error_notifier.notification = NULL;
352 priv->error_notifier.vaddr = NULL;
353 }
354 nvgpu_mutex_release(&priv->error_notifier.mutex);
355}
356
357static int gk20a_init_error_notifier(struct channel_gk20a *ch,
358 struct nvgpu_set_error_notifier *args)
359{
360 struct dma_buf *dmabuf;
361 void *va;
362 u64 end = args->offset + sizeof(struct nvgpu_notification);
363 struct nvgpu_channel_linux *priv = ch->os_priv;
364
365 if (!args->mem) {
366 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
367 return -EINVAL;
368 }
369
370 dmabuf = dma_buf_get(args->mem);
371
372 gk20a_channel_free_error_notifiers(ch);
373
374 if (IS_ERR(dmabuf)) {
375 pr_err("Invalid handle: %d\n", args->mem);
376 return -EINVAL;
377 }
378
379 if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
380 dma_buf_put(dmabuf);
381 nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset");
382 return -EINVAL;
383 }
384
385 nvgpu_speculation_barrier();
386
387 /* map handle */
388 va = dma_buf_vmap(dmabuf);
389 if (!va) {
390 dma_buf_put(dmabuf);
391 pr_err("Cannot map notifier handle\n");
392 return -ENOMEM;
393 }
394
395 priv->error_notifier.notification = va + args->offset;
396 priv->error_notifier.vaddr = va;
397 memset(priv->error_notifier.notification, 0,
398 sizeof(struct nvgpu_notification));
399
400 /* set channel notifiers pointer */
401 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
402 priv->error_notifier.dmabuf = dmabuf;
403 nvgpu_mutex_release(&priv->error_notifier.mutex);
404
405 return 0;
406}
407
408/*
409 * This returns the channel with a reference. The caller must
410 * gk20a_channel_put() the ref back after use.
411 *
412 * NULL is returned if the channel was not found.
413 */
414struct channel_gk20a *gk20a_get_channel_from_file(int fd)
415{
416 struct channel_gk20a *ch;
417 struct channel_priv *priv;
418 struct file *f = fget(fd);
419
420 if (!f)
421 return NULL;
422
423 if (f->f_op != &gk20a_channel_ops) {
424 fput(f);
425 return NULL;
426 }
427
428 priv = (struct channel_priv *)f->private_data;
429 ch = gk20a_channel_get(priv->c);
430 fput(f);
431 return ch;
432}
433
434int gk20a_channel_release(struct inode *inode, struct file *filp)
435{
436 struct channel_priv *priv = filp->private_data;
437 struct channel_gk20a *ch;
438 struct gk20a *g;
439
440 int err;
441
442 /* We could still end up here even if the channel_open failed, e.g.
443 * if we ran out of hw channel IDs.
444 */
445 if (!priv)
446 return 0;
447
448 ch = priv->c;
449 g = priv->g;
450
451 err = gk20a_busy(g);
452 if (err) {
453 nvgpu_err(g, "failed to release a channel!");
454 goto channel_release;
455 }
456
457 trace_gk20a_channel_release(dev_name(dev_from_gk20a(g)));
458
459 gk20a_channel_close(ch);
460 gk20a_channel_free_error_notifiers(ch);
461
462 gk20a_idle(g);
463
464channel_release:
465 gk20a_put(g);
466 nvgpu_kfree(g, filp->private_data);
467 filp->private_data = NULL;
468 return 0;
469}
470
471/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */
472static int __gk20a_channel_open(struct gk20a *g,
473 struct file *filp, s32 runlist_id)
474{
475 int err;
476 struct channel_gk20a *ch;
477 struct channel_priv *priv;
478
479 nvgpu_log_fn(g, " ");
480
481 g = gk20a_get(g);
482 if (!g)
483 return -ENODEV;
484
485 trace_gk20a_channel_open(dev_name(dev_from_gk20a(g)));
486
487 priv = nvgpu_kzalloc(g, sizeof(*priv));
488 if (!priv) {
489 err = -ENOMEM;
490 goto free_ref;
491 }
492
493 err = gk20a_busy(g);
494 if (err) {
495 nvgpu_err(g, "failed to power on, %d", err);
496 goto fail_busy;
497 }
498 /* All the user space channel should be non privilege */
499 ch = gk20a_open_new_channel(g, runlist_id, false,
500 nvgpu_current_pid(g), nvgpu_current_tid(g));
501 gk20a_idle(g);
502 if (!ch) {
503 nvgpu_err(g,
504 "failed to get f");
505 err = -ENOMEM;
506 goto fail_busy;
507 }
508
509 gk20a_channel_trace_sched_param(
510 trace_gk20a_channel_sched_defaults, ch);
511
512 priv->g = g;
513 priv->c = ch;
514
515 filp->private_data = priv;
516 return 0;
517
518fail_busy:
519 nvgpu_kfree(g, priv);
520free_ref:
521 gk20a_put(g);
522 return err;
523}
524
525int gk20a_channel_open(struct inode *inode, struct file *filp)
526{
527 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
528 struct nvgpu_os_linux, channel.cdev);
529 struct gk20a *g = &l->g;
530 int ret;
531
532 nvgpu_log_fn(g, "start");
533 ret = __gk20a_channel_open(g, filp, -1);
534
535 nvgpu_log_fn(g, "end");
536 return ret;
537}
538
539int gk20a_channel_open_ioctl(struct gk20a *g,
540 struct nvgpu_channel_open_args *args)
541{
542 int err;
543 int fd;
544 struct file *file;
545 char name[64];
546 s32 runlist_id = args->in.runlist_id;
547 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
548
549 err = get_unused_fd_flags(O_RDWR);
550 if (err < 0)
551 return err;
552 fd = err;
553
554 snprintf(name, sizeof(name), "nvhost-%s-fd%d",
555 dev_name(dev_from_gk20a(g)), fd);
556
557 file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR);
558 if (IS_ERR(file)) {
559 err = PTR_ERR(file);
560 goto clean_up;
561 }
562
563 err = __gk20a_channel_open(g, file, runlist_id);
564 if (err)
565 goto clean_up_file;
566
567 fd_install(fd, file);
568 args->out.channel_fd = fd;
569 return 0;
570
571clean_up_file:
572 fput(file);
573clean_up:
574 put_unused_fd(fd);
575 return err;
576}
577
578static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags)
579{
580 u32 flags = 0;
581
582 if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
583 flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR;
584
585 if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
586 flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC;
587
588 if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE)
589 flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE;
590
591 return flags;
592}
593
594static void nvgpu_get_gpfifo_ex_args(
595 struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args,
596 struct nvgpu_gpfifo_args *gpfifo_args)
597{
598 gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries;
599 gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs;
600 gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags(
601 alloc_gpfifo_ex_args->flags);
602}
603
604static void nvgpu_get_gpfifo_args(
605 struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args,
606 struct nvgpu_gpfifo_args *gpfifo_args)
607{
608 /*
609 * Kernel can insert one extra gpfifo entry before user
610 * submitted gpfifos and another one after, for internal usage.
611 * Triple the requested size.
612 */
613 gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3;
614 gpfifo_args->num_inflight_jobs = 0;
615 gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags(
616 alloc_gpfifo_args->flags);
617}
618
619static void nvgpu_get_fence_args(
620 struct nvgpu_fence *fence_args_in,
621 struct nvgpu_channel_fence *fence_args_out)
622{
623 fence_args_out->id = fence_args_in->id;
624 fence_args_out->value = fence_args_in->value;
625}
626
627static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
628 ulong id, u32 offset,
629 u32 payload, u32 timeout)
630{
631 struct dma_buf *dmabuf;
632 void *data;
633 u32 *semaphore;
634 int ret = 0;
635
636 /* do not wait if channel has timed out */
637 if (ch->has_timedout)
638 return -ETIMEDOUT;
639
640 dmabuf = dma_buf_get(id);
641 if (IS_ERR(dmabuf)) {
642 nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id);
643 return -EINVAL;
644 }
645
646 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
647 if (!data) {
648 nvgpu_err(ch->g, "failed to map notifier memory");
649 ret = -EINVAL;
650 goto cleanup_put;
651 }
652
653 semaphore = data + (offset & ~PAGE_MASK);
654
655 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
656 &ch->semaphore_wq,
657 *semaphore == payload || ch->has_timedout,
658 timeout);
659
660 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
661cleanup_put:
662 dma_buf_put(dmabuf);
663 return ret;
664}
665
666static int gk20a_channel_wait(struct channel_gk20a *ch,
667 struct nvgpu_wait_args *args)
668{
669 struct dma_buf *dmabuf;
670 struct gk20a *g = ch->g;
671 struct notification *notif;
672 struct timespec tv;
673 u64 jiffies;
674 ulong id;
675 u32 offset;
676 int remain, ret = 0;
677 u64 end;
678
679 nvgpu_log_fn(g, " ");
680
681 if (ch->has_timedout)
682 return -ETIMEDOUT;
683
684 switch (args->type) {
685 case NVGPU_WAIT_TYPE_NOTIFIER:
686 id = args->condition.notifier.dmabuf_fd;
687 offset = args->condition.notifier.offset;
688 end = offset + sizeof(struct notification);
689
690 dmabuf = dma_buf_get(id);
691 if (IS_ERR(dmabuf)) {
692 nvgpu_err(g, "invalid notifier nvmap handle 0x%lx",
693 id);
694 return -EINVAL;
695 }
696
697 if (end > dmabuf->size || end < sizeof(struct notification)) {
698 dma_buf_put(dmabuf);
699 nvgpu_err(g, "invalid notifier offset");
700 return -EINVAL;
701 }
702
703 nvgpu_speculation_barrier();
704
705 notif = dma_buf_vmap(dmabuf);
706 if (!notif) {
707 nvgpu_err(g, "failed to map notifier memory");
708 return -ENOMEM;
709 }
710
711 notif = (struct notification *)((uintptr_t)notif + offset);
712
713 /* user should set status pending before
714 * calling this ioctl */
715 remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
716 &ch->notifier_wq,
717 notif->status == 0 || ch->has_timedout,
718 args->timeout);
719
720 if (remain == 0 && notif->status != 0) {
721 ret = -ETIMEDOUT;
722 goto notif_clean_up;
723 } else if (remain < 0) {
724 ret = -EINTR;
725 goto notif_clean_up;
726 }
727
728 /* TBD: fill in correct information */
729 jiffies = get_jiffies_64();
730 jiffies_to_timespec(jiffies, &tv);
731 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
732 notif->timestamp.nanoseconds[1] = tv.tv_sec;
733 notif->info32 = 0xDEADBEEF; /* should be object name */
734 notif->info16 = ch->chid; /* should be method offset */
735
736notif_clean_up:
737 dma_buf_vunmap(dmabuf, notif);
738 return ret;
739
740 case NVGPU_WAIT_TYPE_SEMAPHORE:
741 ret = gk20a_channel_wait_semaphore(ch,
742 args->condition.semaphore.dmabuf_fd,
743 args->condition.semaphore.offset,
744 args->condition.semaphore.payload,
745 args->timeout);
746
747 break;
748
749 default:
750 ret = -EINVAL;
751 break;
752 }
753
754 return ret;
755}
756
757static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
758 struct nvgpu_zcull_bind_args *args)
759{
760 struct gk20a *g = ch->g;
761 struct gr_gk20a *gr = &g->gr;
762
763 nvgpu_log_fn(gr->g, " ");
764
765 return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
766 args->gpu_va, args->mode);
767}
768
769static int gk20a_ioctl_channel_submit_gpfifo(
770 struct channel_gk20a *ch,
771 struct nvgpu_submit_gpfifo_args *args)
772{
773 struct nvgpu_channel_fence fence;
774 struct gk20a_fence *fence_out;
775 struct fifo_profile_gk20a *profile = NULL;
776 u32 submit_flags = 0;
777 int fd = -1;
778 struct gk20a *g = ch->g;
779
780 int ret = 0;
781 nvgpu_log_fn(g, " ");
782
783 profile = gk20a_fifo_profile_acquire(ch->g);
784 gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
785
786 if (ch->has_timedout)
787 return -ETIMEDOUT;
788
789 nvgpu_get_fence_args(&args->fence, &fence);
790 submit_flags =
791 nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
792
793 /* Try and allocate an fd here*/
794 if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
795 && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
796 fd = get_unused_fd_flags(O_RDWR);
797 if (fd < 0)
798 return fd;
799 }
800
801 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
802 submit_flags, &fence,
803 &fence_out, profile);
804
805 if (ret) {
806 if (fd != -1)
807 put_unused_fd(fd);
808 goto clean_up;
809 }
810
811 /* Convert fence_out to something we can pass back to user space. */
812 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
813 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
814 ret = gk20a_fence_install_fd(fence_out, fd);
815 if (ret)
816 put_unused_fd(fd);
817 else
818 args->fence.id = fd;
819 } else {
820 args->fence.id = fence_out->syncpt_id;
821 args->fence.value = fence_out->syncpt_value;
822 }
823 }
824 gk20a_fence_put(fence_out);
825
826 gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT);
827 if (profile)
828 gk20a_fifo_profile_release(ch->g, profile);
829
830clean_up:
831 return ret;
832}
833
834/*
835 * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_*
836 * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_*
837 */
838u32 nvgpu_get_common_runlist_level(u32 level)
839{
840 switch (level) {
841 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
842 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
843 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
844 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM;
845 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
846 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
847 default:
848 pr_err("%s: incorrect runlist level\n", __func__);
849 }
850
851 return level;
852}
853
854static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags)
855{
856 u32 flags = 0;
857
858 if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
859 flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP;
860
861 if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
862 flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP;
863
864 return flags;
865}
866
867static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch,
868 u32 class_num, u32 user_flags)
869{
870 return ch->g->ops.gr.alloc_obj_ctx(ch, class_num,
871 nvgpu_obj_ctx_user_flags_to_common_flags(user_flags));
872}
873
874/*
875 * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
876 * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
877 */
878u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags)
879{
880 u32 flags = 0;
881
882 if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI)
883 flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
884 if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)
885 flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
886
887 return flags;
888}
889
890/*
891 * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
892 * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
893 */
894u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags)
895{
896 u32 flags = 0;
897
898 if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI)
899 flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
900 if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA)
901 flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
902 if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP)
903 flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
904
905 return flags;
906}
907
908/*
909 * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
910 * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
911 */
912u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode)
913{
914 switch (graphics_preempt_mode) {
915 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
916 return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
917 case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
918 return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
919 }
920
921 return graphics_preempt_mode;
922}
923
924/*
925 * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
926 * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
927 */
928u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode)
929{
930 switch (compute_preempt_mode) {
931 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
932 return NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
933 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
934 return NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
935 case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
936 return NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
937 }
938
939 return compute_preempt_mode;
940}
941
942/*
943 * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
944 * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
945 */
946static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
947{
948 switch (graphics_preempt_mode) {
949 case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
950 return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
951 case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
952 return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
953 }
954
955 return graphics_preempt_mode;
956}
957
958/*
959 * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
960 * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
961 */
962static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
963{
964 switch (compute_preempt_mode) {
965 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
966 return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
967 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
968 return NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
969 case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
970 return NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
971 }
972
973 return compute_preempt_mode;
974}
975
976static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch,
977 u32 graphics_preempt_mode, u32 compute_preempt_mode)
978{
979 int err;
980
981 if (ch->g->ops.gr.set_preemption_mode) {
982 err = gk20a_busy(ch->g);
983 if (err) {
984 nvgpu_err(ch->g, "failed to power on, %d", err);
985 return err;
986 }
987 err = ch->g->ops.gr.set_preemption_mode(ch,
988 nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode),
989 nvgpu_get_common_compute_preempt_mode(compute_preempt_mode));
990 gk20a_idle(ch->g);
991 } else {
992 err = -EINVAL;
993 }
994
995 return err;
996}
997
998static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch,
999 struct nvgpu_get_user_syncpoint_args *args)
1000{
1001#ifdef CONFIG_TEGRA_GK20A_NVHOST
1002 struct gk20a *g = ch->g;
1003 int err;
1004
1005 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) {
1006 nvgpu_err(g, "user syncpoints not supported");
1007 return -EINVAL;
1008 }
1009
1010 if (!gk20a_platform_has_syncpoints(g)) {
1011 nvgpu_err(g, "syncpoints not supported");
1012 return -EINVAL;
1013 }
1014
1015 if (g->aggressive_sync_destroy_thresh) {
1016 nvgpu_err(g, "sufficient syncpoints not available");
1017 return -EINVAL;
1018 }
1019
1020 nvgpu_mutex_acquire(&ch->sync_lock);
1021 if (ch->user_sync) {
1022 nvgpu_mutex_release(&ch->sync_lock);
1023 } else {
1024 ch->user_sync = gk20a_channel_sync_create(ch, true);
1025 if (!ch->user_sync) {
1026 nvgpu_mutex_release(&ch->sync_lock);
1027 return -ENOMEM;
1028 }
1029 nvgpu_mutex_release(&ch->sync_lock);
1030
1031 if (g->ops.fifo.resetup_ramfc) {
1032 err = g->ops.fifo.resetup_ramfc(ch);
1033 if (err)
1034 return err;
1035 }
1036 }
1037
1038 args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync);
1039 args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev,
1040 args->syncpoint_id);
1041 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS))
1042 args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync);
1043 else
1044 args->gpu_va = 0;
1045
1046 return 0;
1047#else
1048 return -EINVAL;
1049#endif
1050}
1051
1052long gk20a_channel_ioctl(struct file *filp,
1053 unsigned int cmd, unsigned long arg)
1054{
1055 struct channel_priv *priv = filp->private_data;
1056 struct channel_gk20a *ch = priv->c;
1057 struct device *dev = dev_from_gk20a(ch->g);
1058 u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
1059 int err = 0;
1060 struct gk20a *g = ch->g;
1061
1062 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
1063
1064 if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
1065 (_IOC_NR(cmd) == 0) ||
1066 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
1067 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
1068 return -EINVAL;
1069
1070 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1071 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1072 return -EFAULT;
1073 }
1074
1075 /* take a ref or return timeout if channel refs can't be taken */
1076 ch = gk20a_channel_get(ch);
1077 if (!ch)
1078 return -ETIMEDOUT;
1079
1080 /* protect our sanity for threaded userspace - most of the channel is
1081 * not thread safe */
1082 nvgpu_mutex_acquire(&ch->ioctl_lock);
1083
1084 /* this ioctl call keeps a ref to the file which keeps a ref to the
1085 * channel */
1086
1087 switch (cmd) {
1088 case NVGPU_IOCTL_CHANNEL_OPEN:
1089 err = gk20a_channel_open_ioctl(ch->g,
1090 (struct nvgpu_channel_open_args *)buf);
1091 break;
1092 case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
1093 break;
1094 case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
1095 {
1096 struct nvgpu_alloc_obj_ctx_args *args =
1097 (struct nvgpu_alloc_obj_ctx_args *)buf;
1098
1099 err = gk20a_busy(ch->g);
1100 if (err) {
1101 dev_err(dev,
1102 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1103 __func__, cmd);
1104 break;
1105 }
1106 err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags);
1107 gk20a_idle(ch->g);
1108 break;
1109 }
1110 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
1111 {
1112 struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args =
1113 (struct nvgpu_alloc_gpfifo_ex_args *)buf;
1114 struct nvgpu_gpfifo_args gpfifo_args;
1115
1116 nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args);
1117
1118 err = gk20a_busy(ch->g);
1119 if (err) {
1120 dev_err(dev,
1121 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1122 __func__, cmd);
1123 break;
1124 }
1125
1126 if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
1127 err = -EINVAL;
1128 gk20a_idle(ch->g);
1129 break;
1130 }
1131 err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
1132 gk20a_idle(ch->g);
1133 break;
1134 }
1135 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
1136 {
1137 struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
1138 (struct nvgpu_alloc_gpfifo_args *)buf;
1139 struct nvgpu_gpfifo_args gpfifo_args;
1140
1141 nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args);
1142
1143 err = gk20a_busy(ch->g);
1144 if (err) {
1145 dev_err(dev,
1146 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1147 __func__, cmd);
1148 break;
1149 }
1150
1151 err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
1152 gk20a_idle(ch->g);
1153 break;
1154 }
1155 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
1156 err = gk20a_ioctl_channel_submit_gpfifo(ch,
1157 (struct nvgpu_submit_gpfifo_args *)buf);
1158 break;
1159 case NVGPU_IOCTL_CHANNEL_WAIT:
1160 err = gk20a_busy(ch->g);
1161 if (err) {
1162 dev_err(dev,
1163 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1164 __func__, cmd);
1165 break;
1166 }
1167
1168 /* waiting is thread-safe, not dropping this mutex could
1169 * deadlock in certain conditions */
1170 nvgpu_mutex_release(&ch->ioctl_lock);
1171
1172 err = gk20a_channel_wait(ch,
1173 (struct nvgpu_wait_args *)buf);
1174
1175 nvgpu_mutex_acquire(&ch->ioctl_lock);
1176
1177 gk20a_idle(ch->g);
1178 break;
1179 case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
1180 err = gk20a_busy(ch->g);
1181 if (err) {
1182 dev_err(dev,
1183 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1184 __func__, cmd);
1185 break;
1186 }
1187 err = gk20a_channel_zcull_bind(ch,
1188 (struct nvgpu_zcull_bind_args *)buf);
1189 gk20a_idle(ch->g);
1190 break;
1191 case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
1192 err = gk20a_busy(ch->g);
1193 if (err) {
1194 dev_err(dev,
1195 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1196 __func__, cmd);
1197 break;
1198 }
1199 err = gk20a_init_error_notifier(ch,
1200 (struct nvgpu_set_error_notifier *)buf);
1201 gk20a_idle(ch->g);
1202 break;
1203#ifdef CONFIG_GK20A_CYCLE_STATS
1204 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
1205 err = gk20a_busy(ch->g);
1206 if (err) {
1207 dev_err(dev,
1208 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1209 __func__, cmd);
1210 break;
1211 }
1212 err = gk20a_channel_cycle_stats(ch,
1213 (struct nvgpu_cycle_stats_args *)buf);
1214 gk20a_idle(ch->g);
1215 break;
1216#endif
1217 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
1218 {
1219 u32 timeout =
1220 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
1221 nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
1222 timeout, ch->chid);
1223 ch->timeout_ms_max = timeout;
1224 gk20a_channel_trace_sched_param(
1225 trace_gk20a_channel_set_timeout, ch);
1226 break;
1227 }
1228 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
1229 {
1230 u32 timeout =
1231 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
1232 bool timeout_debug_dump = !((u32)
1233 ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
1234 (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
1235 nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
1236 timeout, ch->chid);
1237 ch->timeout_ms_max = timeout;
1238 ch->timeout_debug_dump = timeout_debug_dump;
1239 gk20a_channel_trace_sched_param(
1240 trace_gk20a_channel_set_timeout, ch);
1241 break;
1242 }
1243 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
1244 ((struct nvgpu_get_param_args *)buf)->value =
1245 ch->has_timedout;
1246 break;
1247 case NVGPU_IOCTL_CHANNEL_ENABLE:
1248 err = gk20a_busy(ch->g);
1249 if (err) {
1250 dev_err(dev,
1251 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1252 __func__, cmd);
1253 break;
1254 }
1255 if (ch->g->ops.fifo.enable_channel)
1256 ch->g->ops.fifo.enable_channel(ch);
1257 else
1258 err = -ENOSYS;
1259 gk20a_idle(ch->g);
1260 break;
1261 case NVGPU_IOCTL_CHANNEL_DISABLE:
1262 err = gk20a_busy(ch->g);
1263 if (err) {
1264 dev_err(dev,
1265 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1266 __func__, cmd);
1267 break;
1268 }
1269 if (ch->g->ops.fifo.disable_channel)
1270 ch->g->ops.fifo.disable_channel(ch);
1271 else
1272 err = -ENOSYS;
1273 gk20a_idle(ch->g);
1274 break;
1275 case NVGPU_IOCTL_CHANNEL_PREEMPT:
1276 err = gk20a_busy(ch->g);
1277 if (err) {
1278 dev_err(dev,
1279 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1280 __func__, cmd);
1281 break;
1282 }
1283 err = gk20a_fifo_preempt(ch->g, ch);
1284 gk20a_idle(ch->g);
1285 break;
1286 case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST:
1287 if (!capable(CAP_SYS_NICE)) {
1288 err = -EPERM;
1289 break;
1290 }
1291 if (!ch->g->ops.fifo.reschedule_runlist) {
1292 err = -ENOSYS;
1293 break;
1294 }
1295 err = gk20a_busy(ch->g);
1296 if (err) {
1297 dev_err(dev,
1298 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1299 __func__, cmd);
1300 break;
1301 }
1302 err = ch->g->ops.fifo.reschedule_runlist(ch,
1303 NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT &
1304 ((struct nvgpu_reschedule_runlist_args *)buf)->flags);
1305 gk20a_idle(ch->g);
1306 break;
1307 case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
1308 err = gk20a_busy(ch->g);
1309 if (err) {
1310 dev_err(dev,
1311 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1312 __func__, cmd);
1313 break;
1314 }
1315 err = ch->g->ops.fifo.force_reset_ch(ch,
1316 NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
1317 gk20a_idle(ch->g);
1318 break;
1319#ifdef CONFIG_GK20A_CYCLE_STATS
1320 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
1321 err = gk20a_busy(ch->g);
1322 if (err) {
1323 dev_err(dev,
1324 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1325 __func__, cmd);
1326 break;
1327 }
1328 err = gk20a_channel_cycle_stats_snapshot(ch,
1329 (struct nvgpu_cycle_stats_snapshot_args *)buf);
1330 gk20a_idle(ch->g);
1331 break;
1332#endif
1333 case NVGPU_IOCTL_CHANNEL_WDT:
1334 err = gk20a_channel_set_wdt_status(ch,
1335 (struct nvgpu_channel_wdt_args *)buf);
1336 break;
1337 case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
1338 err = nvgpu_ioctl_channel_set_preemption_mode(ch,
1339 ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
1340 ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
1341 break;
1342 case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX:
1343 if (ch->g->ops.gr.set_boosted_ctx) {
1344 bool boost =
1345 ((struct nvgpu_boosted_ctx_args *)buf)->boost;
1346
1347 err = gk20a_busy(ch->g);
1348 if (err) {
1349 dev_err(dev,
1350 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1351 __func__, cmd);
1352 break;
1353 }
1354 err = ch->g->ops.gr.set_boosted_ctx(ch, boost);
1355 gk20a_idle(ch->g);
1356 } else {
1357 err = -EINVAL;
1358 }
1359 break;
1360 case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT:
1361 err = gk20a_busy(ch->g);
1362 if (err) {
1363 dev_err(dev,
1364 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1365 __func__, cmd);
1366 break;
1367 }
1368 err = nvgpu_ioctl_channel_get_user_syncpoint(ch,
1369 (struct nvgpu_get_user_syncpoint_args *)buf);
1370 gk20a_idle(ch->g);
1371 break;
1372 default:
1373 dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
1374 err = -ENOTTY;
1375 break;
1376 }
1377
1378 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
1379 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
1380
1381 nvgpu_mutex_release(&ch->ioctl_lock);
1382
1383 gk20a_channel_put(ch);
1384
1385 nvgpu_log_fn(g, "end");
1386
1387 return err;
1388}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
deleted file mode 100644
index 48cff1ea..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
+++ /dev/null
@@ -1,50 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_IOCTL_CHANNEL_H__
14#define __NVGPU_IOCTL_CHANNEL_H__
15
16#include <linux/fs.h>
17
18#include "gk20a/css_gr_gk20a.h"
19
20struct inode;
21struct file;
22struct gk20a;
23struct nvgpu_channel_open_args;
24
25struct gk20a_cs_snapshot_client_linux {
26 struct gk20a_cs_snapshot_client cs_client;
27
28 u32 dmabuf_fd;
29 struct dma_buf *dma_handler;
30};
31
32int gk20a_channel_open(struct inode *inode, struct file *filp);
33int gk20a_channel_release(struct inode *inode, struct file *filp);
34long gk20a_channel_ioctl(struct file *filp,
35 unsigned int cmd, unsigned long arg);
36int gk20a_channel_open_ioctl(struct gk20a *g,
37 struct nvgpu_channel_open_args *args);
38
39int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
40void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
41
42extern const struct file_operations gk20a_channel_ops;
43
44u32 nvgpu_get_common_runlist_level(u32 level);
45
46u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
47u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
48u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
49u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
50#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
deleted file mode 100644
index a7c6a607..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
+++ /dev/null
@@ -1,562 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/cdev.h>
18#include <linux/file.h>
19#include <linux/anon_inodes.h>
20#include <linux/uaccess.h>
21#include <linux/poll.h>
22#ifdef CONFIG_DEBUG_FS
23#include <linux/debugfs.h>
24#endif
25#include <uapi/linux/nvgpu.h>
26
27#include <nvgpu/bitops.h>
28#include <nvgpu/lock.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/atomic.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/kref.h>
33#include <nvgpu/log.h>
34#include <nvgpu/barrier.h>
35#include <nvgpu/cond.h>
36#include <nvgpu/list.h>
37#include <nvgpu/clk_arb.h>
38
39#include "gk20a/gk20a.h"
40#include "clk/clk.h"
41#include "pstate/pstate.h"
42#include "lpwr/lpwr.h"
43#include "volt/volt.h"
44
45#ifdef CONFIG_DEBUG_FS
46#include "common/linux/os_linux.h"
47#endif
48
49static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
50 struct file *filp)
51{
52 struct nvgpu_clk_dev *dev = filp->private_data;
53 struct nvgpu_clk_session *session = dev->session;
54
55
56 clk_arb_dbg(session->g, " ");
57
58 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
59 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
60 return 0;
61}
62
63static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
64{
65 unsigned int poll_mask = 0;
66
67 if (nvgpu_poll_mask & NVGPU_POLLIN)
68 poll_mask |= POLLIN;
69 if (nvgpu_poll_mask & NVGPU_POLLPRI)
70 poll_mask |= POLLPRI;
71 if (nvgpu_poll_mask & NVGPU_POLLOUT)
72 poll_mask |= POLLOUT;
73 if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
74 poll_mask |= POLLRDNORM;
75 if (nvgpu_poll_mask & NVGPU_POLLHUP)
76 poll_mask |= POLLHUP;
77
78 return poll_mask;
79}
80
81static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
82{
83 struct nvgpu_clk_dev *dev = filp->private_data;
84
85 clk_arb_dbg(dev->session->g, " ");
86
87 poll_wait(filp, &dev->readout_wq.wq, wait);
88 return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
89}
90
91void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
92{
93 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
94}
95
96static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
97 struct file *filp)
98{
99 struct nvgpu_clk_dev *dev = filp->private_data;
100 struct nvgpu_clk_session *session = dev->session;
101 struct nvgpu_clk_arb *arb;
102
103 arb = session->g->clk_arb;
104
105 clk_arb_dbg(session->g, " ");
106
107 if (arb) {
108 nvgpu_spinlock_acquire(&arb->users_lock);
109 nvgpu_list_del(&dev->link);
110 nvgpu_spinlock_release(&arb->users_lock);
111 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
112 }
113
114 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
115 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
116
117 return 0;
118}
119
120static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
121{
122 u32 nvgpu_gpu_event;
123
124 switch (nvgpu_event) {
125 case NVGPU_EVENT_VF_UPDATE:
126 nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
127 break;
128 case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
129 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
130 break;
131 case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
132 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
133 break;
134 case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
135 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
136 break;
137 case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
138 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
139 break;
140 case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
141 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
142 break;
143 case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
144 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
145 break;
146 case NVGPU_EVENT_ALARM_GPU_LOST:
147 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
148 break;
149 default:
150 /* Control shouldn't come here */
151 nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
152 break;
153 }
154 return nvgpu_gpu_event;
155}
156
157static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
158 struct nvgpu_gpu_event_info *info) {
159
160 u32 tail, head;
161 u32 events = 0;
162 struct nvgpu_clk_notification *p_notif;
163
164 tail = nvgpu_atomic_read(&dev->queue.tail);
165 head = nvgpu_atomic_read(&dev->queue.head);
166
167 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
168
169 if (_WRAPGTEQ(tail, head) && info) {
170 head++;
171 p_notif = &dev->queue.notifications[head % dev->queue.size];
172 events |= nvgpu_convert_gpu_event(p_notif->notification);
173 info->event_id = ffs(events) - 1;
174 info->timestamp = p_notif->timestamp;
175 nvgpu_atomic_set(&dev->queue.head, head);
176 }
177
178 return events;
179}
180
181static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
182 size_t size, loff_t *off)
183{
184 struct nvgpu_clk_dev *dev = filp->private_data;
185 struct nvgpu_gpu_event_info info;
186 ssize_t err;
187
188 clk_arb_dbg(dev->session->g,
189 "filp=%p, buf=%p, size=%zu", filp, buf, size);
190
191 if ((size - *off) < sizeof(info))
192 return 0;
193
194 memset(&info, 0, sizeof(info));
195 /* Get the oldest event from the queue */
196 while (!__pending_event(dev, &info)) {
197 if (filp->f_flags & O_NONBLOCK)
198 return -EAGAIN;
199 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
200 __pending_event(dev, &info), 0);
201 if (err)
202 return err;
203 if (info.timestamp)
204 break;
205 }
206
207 if (copy_to_user(buf + *off, &info, sizeof(info)))
208 return -EFAULT;
209
210 return sizeof(info);
211}
212
213static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
214 struct nvgpu_gpu_set_event_filter_args *args)
215{
216 struct gk20a *g = dev->session->g;
217 u32 mask;
218
219 nvgpu_log(g, gpu_dbg_fn, " ");
220
221 if (args->flags)
222 return -EINVAL;
223
224 if (args->size != 1)
225 return -EINVAL;
226
227 if (copy_from_user(&mask, (void __user *) args->buffer,
228 args->size * sizeof(u32)))
229 return -EFAULT;
230
231 /* update alarm mask */
232 nvgpu_atomic_set(&dev->enabled_mask, mask);
233
234 return 0;
235}
236
237static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
238 unsigned long arg)
239{
240 struct nvgpu_clk_dev *dev = filp->private_data;
241 struct gk20a *g = dev->session->g;
242 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
243 int err = 0;
244
245 nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
246
247 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
248 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
249 return -EINVAL;
250
251 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
252
253 memset(buf, 0, sizeof(buf));
254 if (_IOC_DIR(cmd) & _IOC_WRITE) {
255 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
256 return -EFAULT;
257 }
258
259 switch (cmd) {
260 case NVGPU_EVENT_IOCTL_SET_FILTER:
261 err = nvgpu_clk_arb_set_event_filter(dev,
262 (struct nvgpu_gpu_set_event_filter_args *)buf);
263 break;
264 default:
265 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
266 err = -ENOTTY;
267 }
268
269 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
270 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
271
272 return err;
273}
274
275static const struct file_operations completion_dev_ops = {
276 .owner = THIS_MODULE,
277 .release = nvgpu_clk_arb_release_completion_dev,
278 .poll = nvgpu_clk_arb_poll_dev,
279};
280
281static const struct file_operations event_dev_ops = {
282 .owner = THIS_MODULE,
283 .release = nvgpu_clk_arb_release_event_dev,
284 .poll = nvgpu_clk_arb_poll_dev,
285 .read = nvgpu_clk_arb_read_event_dev,
286#ifdef CONFIG_COMPAT
287 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
288#endif
289 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
290};
291
292static int nvgpu_clk_arb_install_fd(struct gk20a *g,
293 struct nvgpu_clk_session *session,
294 const struct file_operations *fops,
295 struct nvgpu_clk_dev **_dev)
296{
297 struct file *file;
298 int fd;
299 int err;
300 int status;
301 char name[64];
302 struct nvgpu_clk_dev *dev;
303
304 clk_arb_dbg(g, " ");
305
306 dev = nvgpu_kzalloc(g, sizeof(*dev));
307 if (!dev)
308 return -ENOMEM;
309
310 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
311 DEFAULT_EVENT_NUMBER);
312 if (status < 0) {
313 err = status;
314 goto fail;
315 }
316
317 fd = get_unused_fd_flags(O_RDWR);
318 if (fd < 0) {
319 err = fd;
320 goto fail;
321 }
322
323 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
324 file = anon_inode_getfile(name, fops, dev, O_RDWR);
325 if (IS_ERR(file)) {
326 err = PTR_ERR(file);
327 goto fail_fd;
328 }
329
330 fd_install(fd, file);
331
332 nvgpu_cond_init(&dev->readout_wq);
333
334 nvgpu_atomic_set(&dev->poll_mask, 0);
335
336 dev->session = session;
337 nvgpu_ref_init(&dev->refcount);
338
339 nvgpu_ref_get(&session->refcount);
340
341 *_dev = dev;
342
343 return fd;
344
345fail_fd:
346 put_unused_fd(fd);
347fail:
348 nvgpu_kfree(g, dev);
349
350 return err;
351}
352
353int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
354 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
355{
356 struct nvgpu_clk_arb *arb = g->clk_arb;
357 struct nvgpu_clk_dev *dev;
358 int fd;
359
360 clk_arb_dbg(g, " ");
361
362 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
363 if (fd < 0)
364 return fd;
365
366 /* TODO: alarm mask needs to be set to default value to prevent
367 * failures of legacy tests. This will be removed when sanity is
368 * updated
369 */
370 if (alarm_mask)
371 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
372 else
373 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
374
375 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
376
377 nvgpu_spinlock_acquire(&arb->users_lock);
378 nvgpu_list_add_tail(&dev->link, &arb->users);
379 nvgpu_spinlock_release(&arb->users_lock);
380
381 *event_fd = fd;
382
383 return 0;
384}
385
386int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
387 struct nvgpu_clk_session *session, int *request_fd)
388{
389 struct nvgpu_clk_dev *dev;
390 int fd;
391
392 clk_arb_dbg(g, " ");
393
394 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
395 if (fd < 0)
396 return fd;
397
398 *request_fd = fd;
399
400 return 0;
401}
402
403int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
404 struct nvgpu_clk_session *session, int request_fd)
405{
406 struct nvgpu_clk_arb *arb = g->clk_arb;
407 struct nvgpu_clk_dev *dev;
408 struct fd fd;
409 int err = 0;
410
411 clk_arb_dbg(g, " ");
412
413 fd = fdget(request_fd);
414 if (!fd.file)
415 return -EINVAL;
416
417 if (fd.file->f_op != &completion_dev_ops) {
418 err = -EINVAL;
419 goto fdput_fd;
420 }
421
422 dev = (struct nvgpu_clk_dev *) fd.file->private_data;
423
424 if (!dev || dev->session != session) {
425 err = -EINVAL;
426 goto fdput_fd;
427 }
428 nvgpu_ref_get(&dev->refcount);
429 nvgpu_spinlock_acquire(&session->session_lock);
430 nvgpu_list_add(&dev->node, &session->targets);
431 nvgpu_spinlock_release(&session->session_lock);
432 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
433
434fdput_fd:
435 fdput(fd);
436 return err;
437}
438
439int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
440 int request_fd, u32 api_domain, u16 target_mhz)
441{
442 struct nvgpu_clk_dev *dev;
443 struct fd fd;
444 int err = 0;
445
446 clk_arb_dbg(session->g,
447 "domain=0x%08x target_mhz=%u", api_domain, target_mhz);
448
449 fd = fdget(request_fd);
450 if (!fd.file)
451 return -EINVAL;
452
453 if (fd.file->f_op != &completion_dev_ops) {
454 err = -EINVAL;
455 goto fdput_fd;
456 }
457
458 dev = fd.file->private_data;
459 if (!dev || dev->session != session) {
460 err = -EINVAL;
461 goto fdput_fd;
462 }
463
464 switch (api_domain) {
465 case NVGPU_CLK_DOMAIN_MCLK:
466 dev->mclk_target_mhz = target_mhz;
467 break;
468
469 case NVGPU_CLK_DOMAIN_GPCCLK:
470 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
471 break;
472
473 default:
474 err = -EINVAL;
475 }
476
477fdput_fd:
478 fdput(fd);
479 return err;
480}
481
482u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
483{
484 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
485 u32 api_domains = 0;
486
487 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
488 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
489
490 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
491 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
492
493 return api_domains;
494}
495
496#ifdef CONFIG_DEBUG_FS
497static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
498{
499 struct gk20a *g = s->private;
500 struct nvgpu_clk_arb *arb = g->clk_arb;
501 struct nvgpu_clk_arb_debug *debug;
502
503 u64 num;
504 s64 tmp, avg, std, max, min;
505
506 debug = NV_ACCESS_ONCE(arb->debug);
507 /* Make copy of structure and ensure no reordering */
508 nvgpu_smp_rmb();
509 if (!debug)
510 return -EINVAL;
511
512 std = debug->switch_std;
513 avg = debug->switch_avg;
514 max = debug->switch_max;
515 min = debug->switch_min;
516 num = debug->switch_num;
517
518 tmp = std;
519 do_div(tmp, num);
520 seq_printf(s, "Number of transitions: %lld\n",
521 num);
522 seq_printf(s, "max / min : %lld / %lld usec\n",
523 max, min);
524 seq_printf(s, "avg / std : %lld / %ld usec\n",
525 avg, int_sqrt(tmp));
526
527 return 0;
528}
529
530static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
531{
532 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
533}
534
535static const struct file_operations nvgpu_clk_arb_stats_fops = {
536 .open = nvgpu_clk_arb_stats_open,
537 .read = seq_read,
538 .llseek = seq_lseek,
539 .release = single_release,
540};
541
542
543int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
544{
545 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
546 struct dentry *gpu_root = l->debugfs;
547 struct dentry *d;
548
549 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
550
551 d = debugfs_create_file(
552 "arb_stats",
553 S_IRUGO,
554 gpu_root,
555 g,
556 &nvgpu_clk_arb_stats_fops);
557 if (!d)
558 return -ENOMEM;
559
560 return 0;
561}
562#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
deleted file mode 100644
index 73a8131d..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ /dev/null
@@ -1,1962 +0,0 @@
1/*
2 * Copyright (c) 2011-2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/uaccess.h>
18#include <linux/cdev.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <nvgpu/bitops.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/bug.h>
27#include <nvgpu/ptimer.h>
28#include <nvgpu/vidmem.h>
29#include <nvgpu/log.h>
30#include <nvgpu/enabled.h>
31#include <nvgpu/sizes.h>
32
33#include <nvgpu/linux/vidmem.h>
34
35#include "ioctl_ctrl.h"
36#include "ioctl_dbg.h"
37#include "ioctl_as.h"
38#include "ioctl_tsg.h"
39#include "ioctl_channel.h"
40#include "gk20a/gk20a.h"
41#include "gk20a/fence_gk20a.h"
42
43#include "platform_gk20a.h"
44#include "os_linux.h"
45#include "dmabuf.h"
46#include "channel.h"
47
48#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \
49 (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
50#define MHZ_TO_HZ(a) ((u64)a * MHZ)
51
52struct gk20a_ctrl_priv {
53 struct device *dev;
54 struct gk20a *g;
55 struct nvgpu_clk_session *clk_session;
56};
57
58static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags)
59{
60 u32 core_flags = 0;
61
62 if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED)
63 core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED;
64
65 return core_flags;
66}
67
68int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
69{
70 struct nvgpu_os_linux *l;
71 struct gk20a *g;
72 struct gk20a_ctrl_priv *priv;
73 int err = 0;
74
75 l = container_of(inode->i_cdev,
76 struct nvgpu_os_linux, ctrl.cdev);
77 g = gk20a_get(&l->g);
78 if (!g)
79 return -ENODEV;
80
81 nvgpu_log_fn(g, " ");
82
83 priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv));
84 if (!priv) {
85 err = -ENOMEM;
86 goto free_ref;
87 }
88 filp->private_data = priv;
89 priv->dev = dev_from_gk20a(g);
90 /*
91 * We dont close the arbiter fd's after driver teardown to support
92 * GPU_LOST events, so we store g here, instead of dereferencing the
93 * dev structure on teardown
94 */
95 priv->g = g;
96
97 if (!g->sw_ready) {
98 err = gk20a_busy(g);
99 if (err)
100 goto free_ref;
101 gk20a_idle(g);
102 }
103
104 err = nvgpu_clk_arb_init_session(g, &priv->clk_session);
105free_ref:
106 if (err)
107 gk20a_put(g);
108 return err;
109}
110int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
111{
112 struct gk20a_ctrl_priv *priv = filp->private_data;
113 struct gk20a *g = priv->g;
114
115 nvgpu_log_fn(g, " ");
116
117 if (priv->clk_session)
118 nvgpu_clk_arb_release_session(g, priv->clk_session);
119
120 gk20a_put(g);
121 nvgpu_kfree(g, priv);
122
123 return 0;
124}
125
126struct nvgpu_flags_mapping {
127 u64 ioctl_flag;
128 int enabled_flag;
129};
130
131static struct nvgpu_flags_mapping flags_mapping[] = {
132 {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS,
133 NVGPU_HAS_SYNCPOINTS},
134 {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS,
135 NVGPU_SUPPORT_PARTIAL_MAPPINGS},
136 {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS,
137 NVGPU_SUPPORT_SPARSE_ALLOCS},
138 {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS,
139 NVGPU_SUPPORT_SYNC_FENCE_FDS},
140 {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS,
141 NVGPU_SUPPORT_CYCLE_STATS},
142 {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT,
143 NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT},
144 {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS,
145 NVGPU_SUPPORT_USERSPACE_MANAGED_AS},
146 {NVGPU_GPU_FLAGS_SUPPORT_TSG,
147 NVGPU_SUPPORT_TSG},
148 {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS,
149 NVGPU_SUPPORT_CLOCK_CONTROLS},
150 {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE,
151 NVGPU_SUPPORT_GET_VOLTAGE},
152 {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT,
153 NVGPU_SUPPORT_GET_CURRENT},
154 {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER,
155 NVGPU_SUPPORT_GET_POWER},
156 {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE,
157 NVGPU_SUPPORT_GET_TEMPERATURE},
158 {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT,
159 NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT},
160 {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS,
161 NVGPU_SUPPORT_DEVICE_EVENTS},
162 {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE,
163 NVGPU_SUPPORT_FECS_CTXSW_TRACE},
164 {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
165 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
166 {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
167 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
168 {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
169 NVGPU_SUPPORT_DETERMINISTIC_OPTS},
170 {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS,
171 NVGPU_SUPPORT_SYNCPOINT_ADDRESS},
172 {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT,
173 NVGPU_SUPPORT_USER_SYNCPOINT},
174 {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
175 NVGPU_SUPPORT_IO_COHERENCE},
176 {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
177 NVGPU_SUPPORT_RESCHEDULE_RUNLIST},
178 {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL,
179 NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL},
180 {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF,
181 NVGPU_ECC_ENABLED_SM_LRF},
182 {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM,
183 NVGPU_ECC_ENABLED_SM_SHM},
184 {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX,
185 NVGPU_ECC_ENABLED_TEX},
186 {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC,
187 NVGPU_ECC_ENABLED_LTC},
188 {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS,
189 NVGPU_SUPPORT_TSG_SUBCONTEXTS},
190 {NVGPU_GPU_FLAGS_SUPPORT_SCG,
191 NVGPU_SUPPORT_SCG},
192 {NVGPU_GPU_FLAGS_SUPPORT_VPR,
193 NVGPU_SUPPORT_VPR},
194};
195
196static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
197{
198 unsigned int i;
199 u64 ioctl_flags = 0;
200
201 for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) {
202 if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag))
203 ioctl_flags |= flags_mapping[i].ioctl_flag;
204 }
205
206 return ioctl_flags;
207}
208
209static void nvgpu_set_preemption_mode_flags(struct gk20a *g,
210 struct nvgpu_gpu_characteristics *gpu)
211{
212 struct nvgpu_preemption_modes_rec preemption_mode_rec;
213
214 g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec);
215
216 gpu->graphics_preemption_mode_flags =
217 nvgpu_get_ioctl_graphics_preempt_mode_flags(
218 preemption_mode_rec.graphics_preemption_mode_flags);
219 gpu->compute_preemption_mode_flags =
220 nvgpu_get_ioctl_compute_preempt_mode_flags(
221 preemption_mode_rec.compute_preemption_mode_flags);
222
223 gpu->default_graphics_preempt_mode =
224 nvgpu_get_ioctl_graphics_preempt_mode(
225 preemption_mode_rec.default_graphics_preempt_mode);
226 gpu->default_compute_preempt_mode =
227 nvgpu_get_ioctl_compute_preempt_mode(
228 preemption_mode_rec.default_compute_preempt_mode);
229}
230
231static long
232gk20a_ctrl_ioctl_gpu_characteristics(
233 struct gk20a *g,
234 struct nvgpu_gpu_get_characteristics *request)
235{
236 struct nvgpu_gpu_characteristics gpu;
237 long err = 0;
238
239 if (gk20a_busy(g)) {
240 nvgpu_err(g, "failed to power on gpu");
241 return -EINVAL;
242 }
243
244 memset(&gpu, 0, sizeof(gpu));
245
246 gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
247 gpu.on_board_video_memory_size = 0; /* integrated GPU */
248
249 gpu.num_gpc = g->gr.gpc_count;
250 gpu.max_gpc_count = g->gr.max_gpc_count;
251
252 gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
253
254 gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
255
256 gpu.compression_page_size = g->ops.fb.compression_page_size(g);
257
258 gpu.gpc_mask = (1 << g->gr.gpc_count)-1;
259
260 gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
261
262 gpu.arch = g->params.gpu_arch;
263 gpu.impl = g->params.gpu_impl;
264 gpu.rev = g->params.gpu_rev;
265 gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT;
266 gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ?
267 NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0;
268 gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS);
269 gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS);
270 gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS);
271 gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS);
272 gpu.inline_to_memory_class =
273 g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS);
274 gpu.dma_copy_class =
275 g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
276
277 gpu.vbios_version = g->bios.vbios_version;
278 gpu.vbios_oem_version = g->bios.vbios_oem_version;
279
280 gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g);
281 gpu.pde_coverage_bit_count =
282 g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0];
283 gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g);
284
285 gpu.sm_arch_sm_version = g->params.sm_arch_sm_version;
286 gpu.sm_arch_spa_version = g->params.sm_arch_spa_version;
287 gpu.sm_arch_warp_count = g->params.sm_arch_warp_count;
288
289 gpu.max_css_buffer_size = g->gr.max_css_buffer_size;
290
291 gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
292 gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
293 gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
294 gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
295 gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
296 gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST;
297 gpu.gpu_va_bit_count = 40;
298
299 strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname));
300 gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g);
301 gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
302 gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
303 gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
304 gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
305 gpu.gr_gobs_per_comptagline_per_slice =
306 g->gr.gobs_per_comptagline_per_slice;
307 gpu.num_ltc = g->ltc_count;
308 gpu.lts_per_ltc = g->gr.slices_per_ltc;
309 gpu.cbc_cache_line_size = g->gr.cacheline_size;
310 gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline;
311
312 if (g->ops.clk.get_maxrate)
313 gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK);
314
315 gpu.local_video_memory_size = g->mm.vidmem.size;
316
317 gpu.pci_vendor_id = g->pci_vendor_id;
318 gpu.pci_device_id = g->pci_device_id;
319 gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id;
320 gpu.pci_subsystem_device_id = g->pci_subsystem_device_id;
321 gpu.pci_class = g->pci_class;
322 gpu.pci_revision = g->pci_revision;
323
324 nvgpu_set_preemption_mode_flags(g, &gpu);
325
326 if (request->gpu_characteristics_buf_size > 0) {
327 size_t write_size = sizeof(gpu);
328
329 if (write_size > request->gpu_characteristics_buf_size)
330 write_size = request->gpu_characteristics_buf_size;
331
332 err = copy_to_user((void __user *)(uintptr_t)
333 request->gpu_characteristics_buf_addr,
334 &gpu, write_size);
335 }
336
337 if (err == 0)
338 request->gpu_characteristics_buf_size = sizeof(gpu);
339
340 gk20a_idle(g);
341
342 return err;
343}
344
345static int gk20a_ctrl_prepare_compressible_read(
346 struct gk20a *g,
347 struct nvgpu_gpu_prepare_compressible_read_args *args)
348{
349 int ret = -ENOSYS;
350
351#ifdef CONFIG_NVGPU_SUPPORT_CDE
352 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
353 struct nvgpu_channel_fence fence;
354 struct gk20a_fence *fence_out = NULL;
355 int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
356 args->submit_flags);
357 int fd = -1;
358
359 fence.id = args->fence.syncpt_id;
360 fence.value = args->fence.syncpt_value;
361
362 /* Try and allocate an fd here*/
363 if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
364 && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) {
365 fd = get_unused_fd_flags(O_RDWR);
366 if (fd < 0)
367 return fd;
368 }
369
370 ret = gk20a_prepare_compressible_read(l, args->handle,
371 args->request_compbits, args->offset,
372 args->compbits_hoffset, args->compbits_voffset,
373 args->scatterbuffer_offset,
374 args->width, args->height, args->block_height_log2,
375 submit_flags, &fence, &args->valid_compbits,
376 &args->zbc_color, &fence_out);
377
378 if (ret) {
379 if (fd != -1)
380 put_unused_fd(fd);
381 return ret;
382 }
383
384 /* Convert fence_out to something we can pass back to user space. */
385 if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
386 if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
387 if (fence_out) {
388 ret = gk20a_fence_install_fd(fence_out, fd);
389 if (ret)
390 put_unused_fd(fd);
391 else
392 args->fence.fd = fd;
393 } else {
394 args->fence.fd = -1;
395 put_unused_fd(fd);
396 }
397 } else {
398 if (fence_out) {
399 args->fence.syncpt_id = fence_out->syncpt_id;
400 args->fence.syncpt_value =
401 fence_out->syncpt_value;
402 } else {
403 args->fence.syncpt_id = -1;
404 args->fence.syncpt_value = 0;
405 }
406 }
407 }
408 gk20a_fence_put(fence_out);
409#endif
410
411 return ret;
412}
413
414static int gk20a_ctrl_mark_compressible_write(
415 struct gk20a *g,
416 struct nvgpu_gpu_mark_compressible_write_args *args)
417{
418 int ret = -ENOSYS;
419
420#ifdef CONFIG_NVGPU_SUPPORT_CDE
421 ret = gk20a_mark_compressible_write(g, args->handle,
422 args->valid_compbits, args->offset, args->zbc_color);
423#endif
424
425 return ret;
426}
427
428static int gk20a_ctrl_alloc_as(
429 struct gk20a *g,
430 struct nvgpu_alloc_as_args *args)
431{
432 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
433 struct gk20a_as_share *as_share;
434 int err;
435 int fd;
436 struct file *file;
437 char name[64];
438
439 err = get_unused_fd_flags(O_RDWR);
440 if (err < 0)
441 return err;
442 fd = err;
443
444 snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd);
445
446 file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR);
447 if (IS_ERR(file)) {
448 err = PTR_ERR(file);
449 goto clean_up;
450 }
451
452 err = gk20a_as_alloc_share(g, args->big_page_size,
453 gk20a_as_translate_as_alloc_flags(g,
454 args->flags),
455 &as_share);
456 if (err)
457 goto clean_up_file;
458
459 fd_install(fd, file);
460 file->private_data = as_share;
461
462 args->as_fd = fd;
463 return 0;
464
465clean_up_file:
466 fput(file);
467clean_up:
468 put_unused_fd(fd);
469 return err;
470}
471
472static int gk20a_ctrl_open_tsg(struct gk20a *g,
473 struct nvgpu_gpu_open_tsg_args *args)
474{
475 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
476 int err;
477 int fd;
478 struct file *file;
479 char name[64];
480
481 err = get_unused_fd_flags(O_RDWR);
482 if (err < 0)
483 return err;
484 fd = err;
485
486 snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd);
487
488 file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR);
489 if (IS_ERR(file)) {
490 err = PTR_ERR(file);
491 goto clean_up;
492 }
493
494 err = nvgpu_ioctl_tsg_open(g, file);
495 if (err)
496 goto clean_up_file;
497
498 fd_install(fd, file);
499 args->tsg_fd = fd;
500 return 0;
501
502clean_up_file:
503 fput(file);
504clean_up:
505 put_unused_fd(fd);
506 return err;
507}
508
509static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
510 struct nvgpu_gpu_get_tpc_masks_args *args)
511{
512 struct gr_gk20a *gr = &g->gr;
513 int err = 0;
514 const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
515
516 if (args->mask_buf_size > 0) {
517 size_t write_size = gpc_tpc_mask_size;
518
519 if (write_size > args->mask_buf_size)
520 write_size = args->mask_buf_size;
521
522 err = copy_to_user((void __user *)(uintptr_t)
523 args->mask_buf_addr,
524 gr->gpc_tpc_mask, write_size);
525 }
526
527 if (err == 0)
528 args->mask_buf_size = gpc_tpc_mask_size;
529
530 return err;
531}
532
533static int gk20a_ctrl_get_fbp_l2_masks(
534 struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
535{
536 struct gr_gk20a *gr = &g->gr;
537 int err = 0;
538 const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
539
540 if (args->mask_buf_size > 0) {
541 size_t write_size = fbp_l2_mask_size;
542
543 if (write_size > args->mask_buf_size)
544 write_size = args->mask_buf_size;
545
546 err = copy_to_user((void __user *)(uintptr_t)
547 args->mask_buf_addr,
548 gr->fbp_rop_l2_en_mask, write_size);
549 }
550
551 if (err == 0)
552 args->mask_buf_size = fbp_l2_mask_size;
553
554 return err;
555}
556
557static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
558 struct nvgpu_gpu_l2_fb_args *args)
559{
560 int err = 0;
561
562 if ((!args->l2_flush && !args->fb_flush) ||
563 (!args->l2_flush && args->l2_invalidate))
564 return -EINVAL;
565
566 if (args->l2_flush)
567 g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
568
569 if (args->fb_flush)
570 g->ops.mm.fb_flush(g);
571
572 return err;
573}
574
575/* Invalidate i-cache for kepler & maxwell */
576static int nvgpu_gpu_ioctl_inval_icache(
577 struct gk20a *g,
578 struct nvgpu_gpu_inval_icache_args *args)
579{
580 struct channel_gk20a *ch;
581 int err;
582
583 ch = gk20a_get_channel_from_file(args->channel_fd);
584 if (!ch)
585 return -EINVAL;
586
587 /* Take the global lock, since we'll be doing global regops */
588 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
589 err = g->ops.gr.inval_icache(g, ch);
590 nvgpu_mutex_release(&g->dbg_sessions_lock);
591
592 gk20a_channel_put(ch);
593 return err;
594}
595
596static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
597 struct gk20a *g,
598 struct nvgpu_gpu_mmu_debug_mode_args *args)
599{
600 if (gk20a_busy(g)) {
601 nvgpu_err(g, "failed to power on gpu");
602 return -EINVAL;
603 }
604
605 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
606 g->ops.fb.set_debug_mode(g, args->state == 1);
607 nvgpu_mutex_release(&g->dbg_sessions_lock);
608
609 gk20a_idle(g);
610 return 0;
611}
612
613static int nvgpu_gpu_ioctl_set_debug_mode(
614 struct gk20a *g,
615 struct nvgpu_gpu_sm_debug_mode_args *args)
616{
617 struct channel_gk20a *ch;
618 int err;
619
620 ch = gk20a_get_channel_from_file(args->channel_fd);
621 if (!ch)
622 return -EINVAL;
623
624 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
625 if (g->ops.gr.set_sm_debug_mode)
626 err = g->ops.gr.set_sm_debug_mode(g, ch,
627 args->sms, !!args->enable);
628 else
629 err = -ENOSYS;
630 nvgpu_mutex_release(&g->dbg_sessions_lock);
631
632 gk20a_channel_put(ch);
633 return err;
634}
635
636static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
637{
638 int err;
639
640 err = gk20a_busy(g);
641 if (err)
642 return err;
643
644 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
645 err = g->ops.gr.trigger_suspend(g);
646 nvgpu_mutex_release(&g->dbg_sessions_lock);
647
648 gk20a_idle(g);
649
650 return err;
651}
652
653static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
654 struct nvgpu_gpu_wait_pause_args *args)
655{
656 int err;
657 struct warpstate *ioctl_w_state;
658 struct nvgpu_warpstate *w_state = NULL;
659 u32 sm_count, ioctl_size, size, sm_id;
660
661 sm_count = g->gr.gpc_count * g->gr.tpc_count;
662
663 ioctl_size = sm_count * sizeof(struct warpstate);
664 ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);
665 if (!ioctl_w_state)
666 return -ENOMEM;
667
668 size = sm_count * sizeof(struct nvgpu_warpstate);
669 w_state = nvgpu_kzalloc(g, size);
670 if (!w_state) {
671 err = -ENOMEM;
672 goto out_free;
673 }
674
675 err = gk20a_busy(g);
676 if (err)
677 goto out_free;
678
679 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
680 g->ops.gr.wait_for_pause(g, w_state);
681
682 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
683 ioctl_w_state[sm_id].valid_warps[0] =
684 w_state[sm_id].valid_warps[0];
685 ioctl_w_state[sm_id].valid_warps[1] =
686 w_state[sm_id].valid_warps[1];
687 ioctl_w_state[sm_id].trapped_warps[0] =
688 w_state[sm_id].trapped_warps[0];
689 ioctl_w_state[sm_id].trapped_warps[1] =
690 w_state[sm_id].trapped_warps[1];
691 ioctl_w_state[sm_id].paused_warps[0] =
692 w_state[sm_id].paused_warps[0];
693 ioctl_w_state[sm_id].paused_warps[1] =
694 w_state[sm_id].paused_warps[1];
695 }
696 /* Copy to user space - pointed by "args->pwarpstate" */
697 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate,
698 w_state, ioctl_size)) {
699 nvgpu_log_fn(g, "copy_to_user failed!");
700 err = -EFAULT;
701 }
702
703 nvgpu_mutex_release(&g->dbg_sessions_lock);
704
705 gk20a_idle(g);
706
707out_free:
708 nvgpu_kfree(g, w_state);
709 nvgpu_kfree(g, ioctl_w_state);
710
711 return err;
712}
713
714static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
715{
716 int err;
717
718 err = gk20a_busy(g);
719 if (err)
720 return err;
721
722 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
723 err = g->ops.gr.resume_from_pause(g);
724 nvgpu_mutex_release(&g->dbg_sessions_lock);
725
726 gk20a_idle(g);
727
728 return err;
729}
730
731static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
732{
733 int err;
734
735 err = gk20a_busy(g);
736 if (err)
737 return err;
738
739 err = g->ops.gr.clear_sm_errors(g);
740
741 gk20a_idle(g);
742
743 return err;
744}
745
746static int nvgpu_gpu_ioctl_has_any_exception(
747 struct gk20a *g,
748 struct nvgpu_gpu_tpc_exception_en_status_args *args)
749{
750 u32 tpc_exception_en;
751
752 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
753 tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
754 nvgpu_mutex_release(&g->dbg_sessions_lock);
755
756 args->tpc_exception_en_sm_mask = tpc_exception_en;
757
758 return 0;
759}
760
761static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
762 struct nvgpu_gpu_num_vsms *args)
763{
764 struct gr_gk20a *gr = &g->gr;
765 args->num_vsms = gr->no_of_sm;
766 return 0;
767}
768
769static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
770 struct nvgpu_gpu_vsms_mapping *args)
771{
772 int err = 0;
773 struct gr_gk20a *gr = &g->gr;
774 size_t write_size = gr->no_of_sm *
775 sizeof(struct nvgpu_gpu_vsms_mapping_entry);
776 struct nvgpu_gpu_vsms_mapping_entry *vsms_buf;
777 u32 i;
778
779 vsms_buf = nvgpu_kzalloc(g, write_size);
780 if (vsms_buf == NULL)
781 return -ENOMEM;
782
783 for (i = 0; i < gr->no_of_sm; i++) {
784 vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index;
785 if (g->ops.gr.get_nonpes_aware_tpc)
786 vsms_buf[i].tpc_index =
787 g->ops.gr.get_nonpes_aware_tpc(g,
788 gr->sm_to_cluster[i].gpc_index,
789 gr->sm_to_cluster[i].tpc_index);
790 else
791 vsms_buf[i].tpc_index =
792 gr->sm_to_cluster[i].tpc_index;
793 }
794
795 err = copy_to_user((void __user *)(uintptr_t)
796 args->vsms_map_buf_addr,
797 vsms_buf, write_size);
798 nvgpu_kfree(g, vsms_buf);
799
800 return err;
801}
802
803static int nvgpu_gpu_get_cpu_time_correlation_info(
804 struct gk20a *g,
805 struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
806{
807 struct nvgpu_cpu_time_correlation_sample *samples;
808 int err;
809 u32 i;
810
811 if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT ||
812 args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC)
813 return -EINVAL;
814
815 samples = nvgpu_kzalloc(g, args->count *
816 sizeof(struct nvgpu_cpu_time_correlation_sample));
817 if (!samples) {
818 return -ENOMEM;
819 }
820
821 err = g->ops.ptimer.get_timestamps_zipper(g,
822 args->source_id, args->count, samples);
823 if (!err) {
824 for (i = 0; i < args->count; i++) {
825 args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
826 args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
827 }
828 }
829
830 nvgpu_kfree(g, samples);
831
832 return err;
833}
834
835static int nvgpu_gpu_get_gpu_time(
836 struct gk20a *g,
837 struct nvgpu_gpu_get_gpu_time_args *args)
838{
839 u64 time;
840 int err;
841
842 err = gk20a_busy(g);
843 if (err)
844 return err;
845
846 err = g->ops.ptimer.read_ptimer(g, &time);
847 if (!err)
848 args->gpu_timestamp = time;
849
850 gk20a_idle(g);
851 return err;
852}
853
854static int nvgpu_gpu_get_engine_info(
855 struct gk20a *g,
856 struct nvgpu_gpu_get_engine_info_args *args)
857{
858 int err = 0;
859 u32 engine_enum = ENGINE_INVAL_GK20A;
860 u32 report_index = 0;
861 u32 engine_id_idx;
862 const u32 max_buffer_engines = args->engine_info_buf_size /
863 sizeof(struct nvgpu_gpu_get_engine_info_item);
864 struct nvgpu_gpu_get_engine_info_item __user *dst_item_list =
865 (void __user *)(uintptr_t)args->engine_info_buf_addr;
866
867 for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines;
868 ++engine_id_idx) {
869 u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx];
870 const struct fifo_engine_info_gk20a *src_info =
871 &g->fifo.engine_info[active_engine_id];
872 struct nvgpu_gpu_get_engine_info_item dst_info;
873
874 memset(&dst_info, 0, sizeof(dst_info));
875
876 engine_enum = src_info->engine_enum;
877
878 switch (engine_enum) {
879 case ENGINE_GR_GK20A:
880 dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR;
881 break;
882
883 case ENGINE_GRCE_GK20A:
884 dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY;
885 break;
886
887 case ENGINE_ASYNC_CE_GK20A:
888 dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY;
889 break;
890
891 default:
892 nvgpu_err(g, "Unmapped engine enum %u",
893 engine_enum);
894 continue;
895 }
896
897 dst_info.engine_instance = src_info->inst_id;
898 dst_info.runlist_id = src_info->runlist_id;
899
900 if (report_index < max_buffer_engines) {
901 err = copy_to_user(&dst_item_list[report_index],
902 &dst_info, sizeof(dst_info));
903 if (err)
904 goto clean_up;
905 }
906
907 ++report_index;
908 }
909
910 args->engine_info_buf_size =
911 report_index * sizeof(struct nvgpu_gpu_get_engine_info_item);
912
913clean_up:
914 return err;
915}
916
917static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
918 struct nvgpu_gpu_alloc_vidmem_args *args)
919{
920 u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
921 int fd;
922
923 nvgpu_log_fn(g, " ");
924
925 /* not yet supported */
926 if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
927 return -EINVAL;
928
929 /* not yet supported */
930 if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
931 return -EINVAL;
932
933 if (args->in.size & (SZ_4K - 1))
934 return -EINVAL;
935
936 if (!args->in.size)
937 return -EINVAL;
938
939 if (align & (align - 1))
940 return -EINVAL;
941
942 if (align > roundup_pow_of_two(args->in.size)) {
943 /* log this special case, buddy allocator detail */
944 nvgpu_warn(g,
945 "alignment larger than buffer size rounded up to power of 2 is not supported");
946 return -EINVAL;
947 }
948
949 fd = nvgpu_vidmem_export_linux(g, args->in.size);
950 if (fd < 0)
951 return fd;
952
953 args->out.dmabuf_fd = fd;
954
955 nvgpu_log_fn(g, "done, fd=%d", fd);
956
957 return 0;
958}
959
960static int nvgpu_gpu_get_memory_state(struct gk20a *g,
961 struct nvgpu_gpu_get_memory_state_args *args)
962{
963 int err;
964
965 nvgpu_log_fn(g, " ");
966
967 if (args->reserved[0] || args->reserved[1] ||
968 args->reserved[2] || args->reserved[3])
969 return -EINVAL;
970
971 err = nvgpu_vidmem_get_space(g, &args->total_free_bytes);
972
973 nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes);
974
975 return err;
976}
977
978static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain)
979{
980 u32 domain = 0;
981
982 if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK)
983 domain = NVGPU_CLK_DOMAIN_MCLK;
984 else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK)
985 domain = NVGPU_CLK_DOMAIN_GPCCLK;
986 else
987 domain = NVGPU_CLK_DOMAIN_MAX + 1;
988
989 return domain;
990}
991
992static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
993 struct gk20a_ctrl_priv *priv,
994 struct nvgpu_gpu_clk_vf_points_args *args)
995{
996 struct nvgpu_gpu_clk_vf_point clk_point;
997 struct nvgpu_gpu_clk_vf_point __user *entry;
998 struct nvgpu_clk_session *session = priv->clk_session;
999 u32 clk_domains = 0;
1000 int err;
1001 u16 last_mhz;
1002 u16 *fpoints;
1003 u32 i;
1004 u32 max_points = 0;
1005 u32 num_points = 0;
1006 u16 min_mhz;
1007 u16 max_mhz;
1008
1009 nvgpu_log_fn(g, " ");
1010
1011 if (!session || args->flags)
1012 return -EINVAL;
1013
1014 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1015 args->num_entries = 0;
1016
1017 if (!nvgpu_clk_arb_is_valid_domain(g,
1018 nvgpu_gpu_convert_clk_domain(args->clk_domain)))
1019 return -EINVAL;
1020
1021 err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
1022 nvgpu_gpu_convert_clk_domain(args->clk_domain),
1023 &max_points, NULL);
1024 if (err)
1025 return err;
1026
1027 if (!args->max_entries) {
1028 args->max_entries = max_points;
1029 return 0;
1030 }
1031
1032 if (args->max_entries < max_points)
1033 return -EINVAL;
1034
1035 err = nvgpu_clk_arb_get_arbiter_clk_range(g,
1036 nvgpu_gpu_convert_clk_domain(args->clk_domain),
1037 &min_mhz, &max_mhz);
1038 if (err)
1039 return err;
1040
1041 fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16));
1042 if (!fpoints)
1043 return -ENOMEM;
1044
1045 err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
1046 nvgpu_gpu_convert_clk_domain(args->clk_domain),
1047 &max_points, fpoints);
1048 if (err)
1049 goto fail;
1050
1051 entry = (struct nvgpu_gpu_clk_vf_point __user *)
1052 (uintptr_t)args->clk_vf_point_entries;
1053
1054 last_mhz = 0;
1055 num_points = 0;
1056 for (i = 0; (i < max_points) && !err; i++) {
1057
1058 /* filter out duplicate frequencies */
1059 if (fpoints[i] == last_mhz)
1060 continue;
1061
1062 /* filter out out-of-range frequencies */
1063 if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz))
1064 continue;
1065
1066 last_mhz = fpoints[i];
1067 clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]);
1068
1069 err = copy_to_user((void __user *)entry, &clk_point,
1070 sizeof(clk_point));
1071
1072 num_points++;
1073 entry++;
1074 }
1075
1076 args->num_entries = num_points;
1077
1078fail:
1079 nvgpu_kfree(g, fpoints);
1080 return err;
1081}
1082
1083static int nvgpu_gpu_clk_get_range(struct gk20a *g,
1084 struct gk20a_ctrl_priv *priv,
1085 struct nvgpu_gpu_clk_range_args *args)
1086{
1087 struct nvgpu_gpu_clk_range clk_range;
1088 struct nvgpu_gpu_clk_range __user *entry;
1089 struct nvgpu_clk_session *session = priv->clk_session;
1090
1091 u32 clk_domains = 0;
1092 u32 num_domains;
1093 u32 num_entries;
1094 u32 i;
1095 int bit;
1096 int err;
1097 u16 min_mhz, max_mhz;
1098
1099 nvgpu_log_fn(g, " ");
1100
1101 if (!session)
1102 return -EINVAL;
1103
1104 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1105 num_domains = hweight_long(clk_domains);
1106
1107 if (!args->flags) {
1108 if (!args->num_entries) {
1109 args->num_entries = num_domains;
1110 return 0;
1111 }
1112
1113 if (args->num_entries < num_domains)
1114 return -EINVAL;
1115
1116 args->num_entries = 0;
1117 num_entries = num_domains;
1118
1119 } else {
1120 if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
1121 return -EINVAL;
1122
1123 num_entries = args->num_entries;
1124 if (num_entries > num_domains)
1125 return -EINVAL;
1126 }
1127
1128 entry = (struct nvgpu_gpu_clk_range __user *)
1129 (uintptr_t)args->clk_range_entries;
1130
1131 for (i = 0; i < num_entries; i++, entry++) {
1132
1133 if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
1134 if (copy_from_user(&clk_range, (void __user *)entry,
1135 sizeof(clk_range)))
1136 return -EFAULT;
1137 } else {
1138 bit = ffs(clk_domains) - 1;
1139 clk_range.clk_domain = bit;
1140 clk_domains &= ~BIT(bit);
1141 }
1142
1143 clk_range.flags = 0;
1144 err = nvgpu_clk_arb_get_arbiter_clk_range(g,
1145 nvgpu_gpu_convert_clk_domain(clk_range.clk_domain),
1146 &min_mhz, &max_mhz);
1147 clk_range.min_hz = MHZ_TO_HZ(min_mhz);
1148 clk_range.max_hz = MHZ_TO_HZ(max_mhz);
1149
1150 if (err)
1151 return err;
1152
1153 err = copy_to_user(entry, &clk_range, sizeof(clk_range));
1154 if (err)
1155 return -EFAULT;
1156 }
1157
1158 args->num_entries = num_entries;
1159
1160 return 0;
1161}
1162
1163static int nvgpu_gpu_clk_set_info(struct gk20a *g,
1164 struct gk20a_ctrl_priv *priv,
1165 struct nvgpu_gpu_clk_set_info_args *args)
1166{
1167 struct nvgpu_gpu_clk_info clk_info;
1168 struct nvgpu_gpu_clk_info __user *entry;
1169 struct nvgpu_clk_session *session = priv->clk_session;
1170
1171 int fd;
1172 u32 clk_domains = 0;
1173 u16 freq_mhz;
1174 int i;
1175 int ret;
1176
1177 nvgpu_log_fn(g, " ");
1178
1179 if (!session || args->flags)
1180 return -EINVAL;
1181
1182 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1183 if (!clk_domains)
1184 return -EINVAL;
1185
1186 entry = (struct nvgpu_gpu_clk_info __user *)
1187 (uintptr_t)args->clk_info_entries;
1188
1189 for (i = 0; i < args->num_entries; i++, entry++) {
1190
1191 if (copy_from_user(&clk_info, entry, sizeof(clk_info)))
1192 return -EFAULT;
1193
1194 if (!nvgpu_clk_arb_is_valid_domain(g,
1195 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
1196 return -EINVAL;
1197 }
1198
1199 entry = (struct nvgpu_gpu_clk_info __user *)
1200 (uintptr_t)args->clk_info_entries;
1201
1202 ret = nvgpu_clk_arb_install_request_fd(g, session, &fd);
1203 if (ret < 0)
1204 return ret;
1205
1206 for (i = 0; i < args->num_entries; i++, entry++) {
1207
1208 if (copy_from_user(&clk_info, (void __user *)entry,
1209 sizeof(clk_info)))
1210 return -EFAULT;
1211 freq_mhz = HZ_TO_MHZ(clk_info.freq_hz);
1212
1213 nvgpu_clk_arb_set_session_target_mhz(session, fd,
1214 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
1215 }
1216
1217 ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
1218 if (ret < 0)
1219 return ret;
1220
1221 args->completion_fd = fd;
1222
1223 return ret;
1224}
1225
1226static int nvgpu_gpu_clk_get_info(struct gk20a *g,
1227 struct gk20a_ctrl_priv *priv,
1228 struct nvgpu_gpu_clk_get_info_args *args)
1229{
1230 struct nvgpu_gpu_clk_info clk_info;
1231 struct nvgpu_gpu_clk_info __user *entry;
1232 struct nvgpu_clk_session *session = priv->clk_session;
1233 u32 clk_domains = 0;
1234 u32 num_domains;
1235 u32 num_entries;
1236 u32 i;
1237 u16 freq_mhz;
1238 int err;
1239 int bit;
1240
1241 nvgpu_log_fn(g, " ");
1242
1243 if (!session)
1244 return -EINVAL;
1245
1246 clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
1247 num_domains = hweight_long(clk_domains);
1248
1249 if (!args->flags) {
1250 if (!args->num_entries) {
1251 args->num_entries = num_domains;
1252 return 0;
1253 }
1254
1255 if (args->num_entries < num_domains)
1256 return -EINVAL;
1257
1258 args->num_entries = 0;
1259 num_entries = num_domains;
1260
1261 } else {
1262 if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
1263 return -EINVAL;
1264
1265 num_entries = args->num_entries;
1266 if (num_entries > num_domains * 3)
1267 return -EINVAL;
1268 }
1269
1270 entry = (struct nvgpu_gpu_clk_info __user *)
1271 (uintptr_t)args->clk_info_entries;
1272
1273 for (i = 0; i < num_entries; i++, entry++) {
1274
1275 if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
1276 if (copy_from_user(&clk_info, (void __user *)entry,
1277 sizeof(clk_info)))
1278 return -EFAULT;
1279 } else {
1280 bit = ffs(clk_domains) - 1;
1281 clk_info.clk_domain = bit;
1282 clk_domains &= ~BIT(bit);
1283 clk_info.clk_type = args->clk_type;
1284 }
1285
1286 switch (clk_info.clk_type) {
1287 case NVGPU_GPU_CLK_TYPE_TARGET:
1288 err = nvgpu_clk_arb_get_session_target_mhz(session,
1289 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
1290 &freq_mhz);
1291 break;
1292 case NVGPU_GPU_CLK_TYPE_ACTUAL:
1293 err = nvgpu_clk_arb_get_arbiter_actual_mhz(g,
1294 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
1295 &freq_mhz);
1296 break;
1297 case NVGPU_GPU_CLK_TYPE_EFFECTIVE:
1298 err = nvgpu_clk_arb_get_arbiter_effective_mhz(g,
1299 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
1300 &freq_mhz);
1301 break;
1302 default:
1303 freq_mhz = 0;
1304 err = -EINVAL;
1305 break;
1306 }
1307 if (err)
1308 return err;
1309
1310 clk_info.flags = 0;
1311 clk_info.freq_hz = MHZ_TO_HZ(freq_mhz);
1312
1313 err = copy_to_user((void __user *)entry, &clk_info,
1314 sizeof(clk_info));
1315 if (err)
1316 return -EFAULT;
1317 }
1318
1319 args->num_entries = num_entries;
1320
1321 return 0;
1322}
1323
1324static int nvgpu_gpu_get_event_fd(struct gk20a *g,
1325 struct gk20a_ctrl_priv *priv,
1326 struct nvgpu_gpu_get_event_fd_args *args)
1327{
1328 struct nvgpu_clk_session *session = priv->clk_session;
1329
1330 nvgpu_log_fn(g, " ");
1331
1332 if (!session)
1333 return -EINVAL;
1334
1335 return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd,
1336 args->flags);
1337}
1338
1339static int nvgpu_gpu_get_voltage(struct gk20a *g,
1340 struct nvgpu_gpu_get_voltage_args *args)
1341{
1342 int err = -EINVAL;
1343
1344 nvgpu_log_fn(g, " ");
1345
1346 if (args->reserved)
1347 return -EINVAL;
1348
1349 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE))
1350 return -EINVAL;
1351
1352 err = gk20a_busy(g);
1353 if (err)
1354 return err;
1355
1356 switch (args->which) {
1357 case NVGPU_GPU_VOLTAGE_CORE:
1358 err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
1359 break;
1360 case NVGPU_GPU_VOLTAGE_SRAM:
1361 err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage);
1362 break;
1363 case NVGPU_GPU_VOLTAGE_BUS:
1364 err = pmgr_pwr_devices_get_voltage(g, &args->voltage);
1365 break;
1366 default:
1367 err = -EINVAL;
1368 }
1369
1370 gk20a_idle(g);
1371
1372 return err;
1373}
1374
1375static int nvgpu_gpu_get_current(struct gk20a *g,
1376 struct nvgpu_gpu_get_current_args *args)
1377{
1378 int err;
1379
1380 nvgpu_log_fn(g, " ");
1381
1382 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1383 return -EINVAL;
1384
1385 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT))
1386 return -EINVAL;
1387
1388 err = gk20a_busy(g);
1389 if (err)
1390 return err;
1391
1392 err = pmgr_pwr_devices_get_current(g, &args->currnt);
1393
1394 gk20a_idle(g);
1395
1396 return err;
1397}
1398
1399static int nvgpu_gpu_get_power(struct gk20a *g,
1400 struct nvgpu_gpu_get_power_args *args)
1401{
1402 int err;
1403
1404 nvgpu_log_fn(g, " ");
1405
1406 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1407 return -EINVAL;
1408
1409 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER))
1410 return -EINVAL;
1411
1412 err = gk20a_busy(g);
1413 if (err)
1414 return err;
1415
1416 err = pmgr_pwr_devices_get_power(g, &args->power);
1417
1418 gk20a_idle(g);
1419
1420 return err;
1421}
1422
1423static int nvgpu_gpu_get_temperature(struct gk20a *g,
1424 struct nvgpu_gpu_get_temperature_args *args)
1425{
1426 int err;
1427 u32 temp_f24_8;
1428
1429 nvgpu_log_fn(g, " ");
1430
1431 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1432 return -EINVAL;
1433
1434 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE))
1435 return -EINVAL;
1436
1437 if (!g->ops.therm.get_internal_sensor_curr_temp)
1438 return -EINVAL;
1439
1440 err = gk20a_busy(g);
1441 if (err)
1442 return err;
1443
1444 err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8);
1445
1446 gk20a_idle(g);
1447
1448 args->temp_f24_8 = (s32)temp_f24_8;
1449
1450 return err;
1451}
1452
1453static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
1454 struct nvgpu_gpu_set_therm_alert_limit_args *args)
1455{
1456 int err;
1457
1458 nvgpu_log_fn(g, " ");
1459
1460 if (args->reserved[0] || args->reserved[1] || args->reserved[2])
1461 return -EINVAL;
1462
1463 if (!g->ops.therm.configure_therm_alert)
1464 return -EINVAL;
1465
1466 err = gk20a_busy(g);
1467 if (err)
1468 return err;
1469
1470 err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8);
1471
1472 gk20a_idle(g);
1473
1474 return err;
1475}
1476
1477static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
1478 u32 flags)
1479{
1480 int err = 0;
1481 bool allow;
1482 bool disallow;
1483
1484 allow = flags &
1485 NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
1486
1487 disallow = flags &
1488 NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
1489
1490 /* Can't be both at the same time */
1491 if (allow && disallow)
1492 return -EINVAL;
1493
1494 /* Nothing to do */
1495 if (!allow && !disallow)
1496 return 0;
1497
1498 /*
1499 * Moving into explicit idle or back from it? A call that doesn't
1500 * change the status is a no-op.
1501 */
1502 if (!ch->deterministic_railgate_allowed &&
1503 allow) {
1504 gk20a_idle(ch->g);
1505 } else if (ch->deterministic_railgate_allowed &&
1506 !allow) {
1507 err = gk20a_busy(ch->g);
1508 if (err) {
1509 nvgpu_warn(ch->g,
1510 "cannot busy to restore deterministic ch");
1511 return err;
1512 }
1513 }
1514 ch->deterministic_railgate_allowed = allow;
1515
1516 return err;
1517}
1518
1519static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
1520{
1521 if (!ch->deterministic)
1522 return -EINVAL;
1523
1524 return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
1525}
1526
1527static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
1528 struct nvgpu_gpu_set_deterministic_opts_args *args)
1529{
1530 int __user *user_channels;
1531 u32 i = 0;
1532 int err = 0;
1533
1534 nvgpu_log_fn(g, " ");
1535
1536 user_channels = (int __user *)(uintptr_t)args->channels;
1537
1538 /* Upper limit; prevent holding deterministic_busy for long */
1539 if (args->num_channels > g->fifo.num_channels) {
1540 err = -EINVAL;
1541 goto out;
1542 }
1543
1544 /* Trivial sanity check first */
1545 if (!access_ok(VERIFY_READ, user_channels,
1546 args->num_channels * sizeof(int))) {
1547 err = -EFAULT;
1548 goto out;
1549 }
1550
1551 nvgpu_rwsem_down_read(&g->deterministic_busy);
1552
1553 /* note: we exit at the first failure */
1554 for (; i < args->num_channels; i++) {
1555 int ch_fd = 0;
1556 struct channel_gk20a *ch;
1557
1558 if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
1559 /* User raced with above access_ok */
1560 err = -EFAULT;
1561 break;
1562 }
1563
1564 ch = gk20a_get_channel_from_file(ch_fd);
1565 if (!ch) {
1566 err = -EINVAL;
1567 break;
1568 }
1569
1570 err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
1571
1572 gk20a_channel_put(ch);
1573
1574 if (err)
1575 break;
1576 }
1577
1578 nvgpu_rwsem_up_read(&g->deterministic_busy);
1579
1580out:
1581 args->num_channels = i;
1582 return err;
1583}
1584
1585static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g,
1586 struct nvgpu_gpu_read_single_sm_error_state_args *args)
1587{
1588 struct gr_gk20a *gr = &g->gr;
1589 struct nvgpu_gr_sm_error_state *sm_error_state;
1590 struct nvgpu_gpu_sm_error_state_record sm_error_state_record;
1591 u32 sm_id;
1592 int err = 0;
1593
1594 sm_id = args->sm_id;
1595 if (sm_id >= gr->no_of_sm)
1596 return -EINVAL;
1597
1598 nvgpu_speculation_barrier();
1599
1600 sm_error_state = gr->sm_error_states + sm_id;
1601 sm_error_state_record.global_esr =
1602 sm_error_state->hww_global_esr;
1603 sm_error_state_record.warp_esr =
1604 sm_error_state->hww_warp_esr;
1605 sm_error_state_record.warp_esr_pc =
1606 sm_error_state->hww_warp_esr_pc;
1607 sm_error_state_record.global_esr_report_mask =
1608 sm_error_state->hww_global_esr_report_mask;
1609 sm_error_state_record.warp_esr_report_mask =
1610 sm_error_state->hww_warp_esr_report_mask;
1611
1612 if (args->record_size > 0) {
1613 size_t write_size = sizeof(*sm_error_state);
1614
1615 if (write_size > args->record_size)
1616 write_size = args->record_size;
1617
1618 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1619 err = copy_to_user((void __user *)(uintptr_t)
1620 args->record_mem,
1621 &sm_error_state_record,
1622 write_size);
1623 nvgpu_mutex_release(&g->dbg_sessions_lock);
1624 if (err) {
1625 nvgpu_err(g, "copy_to_user failed!");
1626 return err;
1627 }
1628
1629 args->record_size = write_size;
1630 }
1631
1632 return 0;
1633}
1634
1635long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1636{
1637 struct gk20a_ctrl_priv *priv = filp->private_data;
1638 struct gk20a *g = priv->g;
1639 struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
1640 struct nvgpu_gpu_zcull_get_info_args *get_info_args;
1641 struct nvgpu_gpu_zbc_set_table_args *set_table_args;
1642 struct nvgpu_gpu_zbc_query_table_args *query_table_args;
1643 u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
1644 struct gr_zcull_info *zcull_info;
1645 struct zbc_entry *zbc_val;
1646 struct zbc_query_params *zbc_tbl;
1647 int i, err = 0;
1648
1649 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
1650
1651 if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) ||
1652 (_IOC_NR(cmd) == 0) ||
1653 (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) ||
1654 (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE))
1655 return -EINVAL;
1656
1657 memset(buf, 0, sizeof(buf));
1658 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1659 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1660 return -EFAULT;
1661 }
1662
1663 if (!g->sw_ready) {
1664 err = gk20a_busy(g);
1665 if (err)
1666 return err;
1667
1668 gk20a_idle(g);
1669 }
1670
1671 switch (cmd) {
1672 case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
1673 get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
1674
1675 get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
1676
1677 break;
1678 case NVGPU_GPU_IOCTL_ZCULL_GET_INFO:
1679 get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf;
1680
1681 memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args));
1682
1683 zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info));
1684 if (zcull_info == NULL)
1685 return -ENOMEM;
1686
1687 err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
1688 if (err) {
1689 nvgpu_kfree(g, zcull_info);
1690 break;
1691 }
1692
1693 get_info_args->width_align_pixels = zcull_info->width_align_pixels;
1694 get_info_args->height_align_pixels = zcull_info->height_align_pixels;
1695 get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
1696 get_info_args->aliquot_total = zcull_info->aliquot_total;
1697 get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
1698 get_info_args->region_header_size = zcull_info->region_header_size;
1699 get_info_args->subregion_header_size = zcull_info->subregion_header_size;
1700 get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
1701 get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
1702 get_info_args->subregion_count = zcull_info->subregion_count;
1703
1704 nvgpu_kfree(g, zcull_info);
1705 break;
1706 case NVGPU_GPU_IOCTL_ZBC_SET_TABLE:
1707 set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf;
1708
1709 zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry));
1710 if (zbc_val == NULL)
1711 return -ENOMEM;
1712
1713 zbc_val->format = set_table_args->format;
1714 zbc_val->type = set_table_args->type;
1715
1716 switch (zbc_val->type) {
1717 case GK20A_ZBC_TYPE_COLOR:
1718 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
1719 zbc_val->color_ds[i] = set_table_args->color_ds[i];
1720 zbc_val->color_l2[i] = set_table_args->color_l2[i];
1721 }
1722 break;
1723 case GK20A_ZBC_TYPE_DEPTH:
1724 case T19X_ZBC:
1725 zbc_val->depth = set_table_args->depth;
1726 break;
1727 default:
1728 err = -EINVAL;
1729 }
1730
1731 if (!err) {
1732 err = gk20a_busy(g);
1733 if (!err) {
1734 err = g->ops.gr.zbc_set_table(g, &g->gr,
1735 zbc_val);
1736 gk20a_idle(g);
1737 }
1738 }
1739
1740 if (zbc_val)
1741 nvgpu_kfree(g, zbc_val);
1742 break;
1743 case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE:
1744 query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf;
1745
1746 zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params));
1747 if (zbc_tbl == NULL)
1748 return -ENOMEM;
1749
1750 zbc_tbl->type = query_table_args->type;
1751 zbc_tbl->index_size = query_table_args->index_size;
1752
1753 err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl);
1754
1755 if (!err) {
1756 switch (zbc_tbl->type) {
1757 case GK20A_ZBC_TYPE_COLOR:
1758 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
1759 query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
1760 query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
1761 }
1762 break;
1763 case GK20A_ZBC_TYPE_DEPTH:
1764 case T19X_ZBC:
1765 query_table_args->depth = zbc_tbl->depth;
1766 break;
1767 case GK20A_ZBC_TYPE_INVALID:
1768 query_table_args->index_size = zbc_tbl->index_size;
1769 break;
1770 default:
1771 err = -EINVAL;
1772 }
1773 if (!err) {
1774 query_table_args->format = zbc_tbl->format;
1775 query_table_args->ref_cnt = zbc_tbl->ref_cnt;
1776 }
1777 }
1778
1779 if (zbc_tbl)
1780 nvgpu_kfree(g, zbc_tbl);
1781 break;
1782
1783 case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS:
1784 err = gk20a_ctrl_ioctl_gpu_characteristics(
1785 g, (struct nvgpu_gpu_get_characteristics *)buf);
1786 break;
1787 case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ:
1788 err = gk20a_ctrl_prepare_compressible_read(g,
1789 (struct nvgpu_gpu_prepare_compressible_read_args *)buf);
1790 break;
1791 case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE:
1792 err = gk20a_ctrl_mark_compressible_write(g,
1793 (struct nvgpu_gpu_mark_compressible_write_args *)buf);
1794 break;
1795 case NVGPU_GPU_IOCTL_ALLOC_AS:
1796 err = gk20a_ctrl_alloc_as(g,
1797 (struct nvgpu_alloc_as_args *)buf);
1798 break;
1799 case NVGPU_GPU_IOCTL_OPEN_TSG:
1800 err = gk20a_ctrl_open_tsg(g,
1801 (struct nvgpu_gpu_open_tsg_args *)buf);
1802 break;
1803 case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
1804 err = gk20a_ctrl_get_tpc_masks(g,
1805 (struct nvgpu_gpu_get_tpc_masks_args *)buf);
1806 break;
1807 case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS:
1808 err = gk20a_ctrl_get_fbp_l2_masks(g,
1809 (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf);
1810 break;
1811 case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
1812 /* this arg type here, but ..gpu_open_channel_args in nvgpu.h
1813 * for consistency - they are the same */
1814 err = gk20a_channel_open_ioctl(g,
1815 (struct nvgpu_channel_open_args *)buf);
1816 break;
1817 case NVGPU_GPU_IOCTL_FLUSH_L2:
1818 err = nvgpu_gpu_ioctl_l2_fb_ops(g,
1819 (struct nvgpu_gpu_l2_fb_args *)buf);
1820 break;
1821 case NVGPU_GPU_IOCTL_INVAL_ICACHE:
1822 err = gr_gk20a_elpg_protected_call(g,
1823 nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
1824 break;
1825
1826 case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
1827 err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
1828 (struct nvgpu_gpu_mmu_debug_mode_args *)buf);
1829 break;
1830
1831 case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
1832 err = gr_gk20a_elpg_protected_call(g,
1833 nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
1834 break;
1835
1836 case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND:
1837 err = nvgpu_gpu_ioctl_trigger_suspend(g);
1838 break;
1839
1840 case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
1841 err = nvgpu_gpu_ioctl_wait_for_pause(g,
1842 (struct nvgpu_gpu_wait_pause_args *)buf);
1843 break;
1844
1845 case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE:
1846 err = nvgpu_gpu_ioctl_resume_from_pause(g);
1847 break;
1848
1849 case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS:
1850 err = nvgpu_gpu_ioctl_clear_sm_errors(g);
1851 break;
1852
1853 case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
1854 err = nvgpu_gpu_ioctl_has_any_exception(g,
1855 (struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
1856 break;
1857
1858 case NVGPU_GPU_IOCTL_NUM_VSMS:
1859 err = gk20a_ctrl_get_num_vsms(g,
1860 (struct nvgpu_gpu_num_vsms *)buf);
1861 break;
1862 case NVGPU_GPU_IOCTL_VSMS_MAPPING:
1863 err = gk20a_ctrl_vsm_mapping(g,
1864 (struct nvgpu_gpu_vsms_mapping *)buf);
1865 break;
1866
1867 case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO:
1868 err = nvgpu_gpu_get_cpu_time_correlation_info(g,
1869 (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf);
1870 break;
1871
1872 case NVGPU_GPU_IOCTL_GET_GPU_TIME:
1873 err = nvgpu_gpu_get_gpu_time(g,
1874 (struct nvgpu_gpu_get_gpu_time_args *)buf);
1875 break;
1876
1877 case NVGPU_GPU_IOCTL_GET_ENGINE_INFO:
1878 err = nvgpu_gpu_get_engine_info(g,
1879 (struct nvgpu_gpu_get_engine_info_args *)buf);
1880 break;
1881
1882 case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
1883 err = nvgpu_gpu_alloc_vidmem(g,
1884 (struct nvgpu_gpu_alloc_vidmem_args *)buf);
1885 break;
1886
1887 case NVGPU_GPU_IOCTL_GET_MEMORY_STATE:
1888 err = nvgpu_gpu_get_memory_state(g,
1889 (struct nvgpu_gpu_get_memory_state_args *)buf);
1890 break;
1891
1892 case NVGPU_GPU_IOCTL_CLK_GET_RANGE:
1893 err = nvgpu_gpu_clk_get_range(g, priv,
1894 (struct nvgpu_gpu_clk_range_args *)buf);
1895 break;
1896
1897 case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS:
1898 err = nvgpu_gpu_clk_get_vf_points(g, priv,
1899 (struct nvgpu_gpu_clk_vf_points_args *)buf);
1900 break;
1901
1902 case NVGPU_GPU_IOCTL_CLK_SET_INFO:
1903 err = nvgpu_gpu_clk_set_info(g, priv,
1904 (struct nvgpu_gpu_clk_set_info_args *)buf);
1905 break;
1906
1907 case NVGPU_GPU_IOCTL_CLK_GET_INFO:
1908 err = nvgpu_gpu_clk_get_info(g, priv,
1909 (struct nvgpu_gpu_clk_get_info_args *)buf);
1910 break;
1911
1912 case NVGPU_GPU_IOCTL_GET_EVENT_FD:
1913 err = nvgpu_gpu_get_event_fd(g, priv,
1914 (struct nvgpu_gpu_get_event_fd_args *)buf);
1915 break;
1916
1917 case NVGPU_GPU_IOCTL_GET_VOLTAGE:
1918 err = nvgpu_gpu_get_voltage(g,
1919 (struct nvgpu_gpu_get_voltage_args *)buf);
1920 break;
1921
1922 case NVGPU_GPU_IOCTL_GET_CURRENT:
1923 err = nvgpu_gpu_get_current(g,
1924 (struct nvgpu_gpu_get_current_args *)buf);
1925 break;
1926
1927 case NVGPU_GPU_IOCTL_GET_POWER:
1928 err = nvgpu_gpu_get_power(g,
1929 (struct nvgpu_gpu_get_power_args *)buf);
1930 break;
1931
1932 case NVGPU_GPU_IOCTL_GET_TEMPERATURE:
1933 err = nvgpu_gpu_get_temperature(g,
1934 (struct nvgpu_gpu_get_temperature_args *)buf);
1935 break;
1936
1937 case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT:
1938 err = nvgpu_gpu_set_therm_alert_limit(g,
1939 (struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
1940 break;
1941
1942 case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
1943 err = nvgpu_gpu_set_deterministic_opts(g,
1944 (struct nvgpu_gpu_set_deterministic_opts_args *)buf);
1945 break;
1946
1947 case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
1948 err = nvgpu_gpu_read_single_sm_error_state(g,
1949 (struct nvgpu_gpu_read_single_sm_error_state_args *)buf);
1950 break;
1951
1952 default:
1953 nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
1954 err = -ENOTTY;
1955 break;
1956 }
1957
1958 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
1959 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
1960
1961 return err;
1962}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h
deleted file mode 100644
index 8b4a5e59..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_IOCTL_CTRL_H__
17#define __NVGPU_IOCTL_CTRL_H__
18
19int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
20int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
21long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
22
23#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
deleted file mode 100644
index 31e7e2cb..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
+++ /dev/null
@@ -1,2003 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver
3 *
4 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/fs.h>
20#include <linux/file.h>
21#include <linux/cdev.h>
22#include <linux/uaccess.h>
23#include <linux/dma-buf.h>
24#include <uapi/linux/nvgpu.h>
25
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28#include <nvgpu/vm.h>
29#include <nvgpu/atomic.h>
30#include <nvgpu/cond.h>
31
32#include <nvgpu/linux/vidmem.h>
33#include <nvgpu/linux/vm.h>
34
35#include "gk20a/gk20a.h"
36#include "gk20a/gr_gk20a.h"
37#include "gk20a/regops_gk20a.h"
38#include "gk20a/dbg_gpu_gk20a.h"
39#include "os_linux.h"
40#include "platform_gk20a.h"
41#include "ioctl_dbg.h"
42
43/* turn seriously unwieldy names -> something shorter */
44#define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x
45
46/* silly allocator - just increment id */
47static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
48static int generate_unique_id(void)
49{
50 return nvgpu_atomic_add_return(1, &unique_id);
51}
52
53static int alloc_profiler(struct gk20a *g,
54 struct dbg_profiler_object_data **_prof)
55{
56 struct dbg_profiler_object_data *prof;
57 *_prof = NULL;
58
59 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
60
61 prof = nvgpu_kzalloc(g, sizeof(*prof));
62 if (!prof)
63 return -ENOMEM;
64
65 prof->prof_handle = generate_unique_id();
66 *_prof = prof;
67 return 0;
68}
69
70static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux)
71{
72 struct dbg_session_gk20a_linux *dbg_s_linux;
73 *_dbg_s_linux = NULL;
74
75 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
76
77 dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux));
78 if (!dbg_s_linux)
79 return -ENOMEM;
80
81 dbg_s_linux->dbg_s.id = generate_unique_id();
82 *_dbg_s_linux = dbg_s_linux;
83 return 0;
84}
85
86static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
87 struct gr_gk20a *gr);
88
89static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
90
91static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
92 struct nvgpu_dbg_gpu_exec_reg_ops_args *args);
93
94static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
95 struct nvgpu_dbg_gpu_powergate_args *args);
96
97static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
98 struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
99
100static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
101 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
102
103static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
104 struct dbg_session_gk20a *dbg_s,
105 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
106
107static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s,
108 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
109
110static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux,
111 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
112
113static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
114 struct nvgpu_dbg_gpu_profiler_reserve_args *args);
115
116static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
117 struct nvgpu_dbg_gpu_perfbuf_map_args *args);
118
119static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
120 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
121
122static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
123 int timeout_mode);
124
125static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
126 u32 profiler_handle);
127
128static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s);
129
130static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s);
131
132static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
133 u32 profiler_handle);
134
135static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
136
137static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
138 struct file *filp, bool is_profiler);
139
140unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
141{
142 unsigned int mask = 0;
143 struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data;
144 struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
145 struct gk20a *g = dbg_s->g;
146
147 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
148
149 poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait);
150
151 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
152
153 if (dbg_s->dbg_events.events_enabled &&
154 dbg_s->dbg_events.num_pending_events > 0) {
155 nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d",
156 dbg_s->id);
157 nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
158 dbg_s->dbg_events.num_pending_events);
159 mask = (POLLPRI | POLLIN);
160 }
161
162 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
163
164 return mask;
165}
166
167int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
168{
169 struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data;
170 struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
171 struct gk20a *g = dbg_s->g;
172 struct dbg_profiler_object_data *prof_obj, *tmp_obj;
173
174 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name);
175
176 /* unbind channels */
177 dbg_unbind_all_channels_gk20a(dbg_s);
178
179 /* Powergate/Timeout enable is called here as possibility of dbg_session
180 * which called powergate/timeout disable ioctl, to be killed without
181 * calling powergate/timeout enable ioctl
182 */
183 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
184 g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false);
185 nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
186
187 /* If this session owned the perf buffer, release it */
188 if (g->perfbuf.owner == dbg_s)
189 gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
190
191 /* Per-context profiler objects were released when we called
192 * dbg_unbind_all_channels. We could still have global ones.
193 */
194 nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
195 dbg_profiler_object_data, prof_obj_entry) {
196 if (prof_obj->session_id == dbg_s->id) {
197 if (prof_obj->has_reservation)
198 g->ops.dbg_session_ops.
199 release_profiler_reservation(dbg_s, prof_obj);
200 nvgpu_list_del(&prof_obj->prof_obj_entry);
201 nvgpu_kfree(g, prof_obj);
202 }
203 }
204 nvgpu_mutex_release(&g->dbg_sessions_lock);
205
206 nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
207 nvgpu_mutex_destroy(&dbg_s->ioctl_lock);
208
209 nvgpu_kfree(g, dbg_session_linux);
210 gk20a_put(g);
211
212 return 0;
213}
214
215int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
216{
217 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
218 struct nvgpu_os_linux, prof.cdev);
219 struct gk20a *g = &l->g;
220
221 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
222 return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
223}
224
225static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
226 struct nvgpu_dbg_gpu_timeout_args *args)
227{
228 int err;
229 struct gk20a *g = dbg_s->g;
230
231 nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable);
232
233 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
234 err = nvgpu_dbg_timeout_enable(dbg_s, args->enable);
235 nvgpu_mutex_release(&g->dbg_sessions_lock);
236
237 return err;
238}
239
240static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
241 struct dbg_session_gk20a *dbg_s,
242 struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args)
243{
244 struct gk20a *g = dbg_s->g;
245 struct gr_gk20a *gr = &g->gr;
246 u32 sm_id;
247 struct channel_gk20a *ch;
248 struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
249 struct nvgpu_gr_sm_error_state sm_error_state;
250 int err = 0;
251
252 /* Not currently supported in the virtual case */
253 if (g->is_virtual)
254 return -ENOSYS;
255
256 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
257 if (!ch)
258 return -EINVAL;
259
260 sm_id = args->sm_id;
261 if (sm_id >= gr->no_of_sm)
262 return -EINVAL;
263
264 nvgpu_speculation_barrier();
265
266 if (args->sm_error_state_record_size > 0) {
267 size_t read_size = sizeof(sm_error_state_record);
268
269 if (read_size > args->sm_error_state_record_size)
270 read_size = args->sm_error_state_record_size;
271
272 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
273 err = copy_from_user(&sm_error_state_record,
274 (void __user *)(uintptr_t)
275 args->sm_error_state_record_mem,
276 read_size);
277 nvgpu_mutex_release(&g->dbg_sessions_lock);
278 if (err)
279 return -ENOMEM;
280 }
281
282 err = gk20a_busy(g);
283 if (err)
284 return err;
285
286 sm_error_state.hww_global_esr =
287 sm_error_state_record.hww_global_esr;
288 sm_error_state.hww_warp_esr =
289 sm_error_state_record.hww_warp_esr;
290 sm_error_state.hww_warp_esr_pc =
291 sm_error_state_record.hww_warp_esr_pc;
292 sm_error_state.hww_global_esr_report_mask =
293 sm_error_state_record.hww_global_esr_report_mask;
294 sm_error_state.hww_warp_esr_report_mask =
295 sm_error_state_record.hww_warp_esr_report_mask;
296
297 err = gr_gk20a_elpg_protected_call(g,
298 g->ops.gr.update_sm_error_state(g, ch,
299 sm_id, &sm_error_state));
300
301 gk20a_idle(g);
302
303 return err;
304}
305
306
307static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
308 struct dbg_session_gk20a *dbg_s,
309 struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
310{
311 struct gk20a *g = dbg_s->g;
312 struct gr_gk20a *gr = &g->gr;
313 struct nvgpu_gr_sm_error_state *sm_error_state;
314 struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
315 u32 sm_id;
316 int err = 0;
317
318 sm_id = args->sm_id;
319 if (sm_id >= gr->no_of_sm)
320 return -EINVAL;
321
322 nvgpu_speculation_barrier();
323
324 sm_error_state = gr->sm_error_states + sm_id;
325 sm_error_state_record.hww_global_esr =
326 sm_error_state->hww_global_esr;
327 sm_error_state_record.hww_warp_esr =
328 sm_error_state->hww_warp_esr;
329 sm_error_state_record.hww_warp_esr_pc =
330 sm_error_state->hww_warp_esr_pc;
331 sm_error_state_record.hww_global_esr_report_mask =
332 sm_error_state->hww_global_esr_report_mask;
333 sm_error_state_record.hww_warp_esr_report_mask =
334 sm_error_state->hww_warp_esr_report_mask;
335
336 if (args->sm_error_state_record_size > 0) {
337 size_t write_size = sizeof(*sm_error_state);
338
339 if (write_size > args->sm_error_state_record_size)
340 write_size = args->sm_error_state_record_size;
341
342 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
343 err = copy_to_user((void __user *)(uintptr_t)
344 args->sm_error_state_record_mem,
345 &sm_error_state_record,
346 write_size);
347 nvgpu_mutex_release(&g->dbg_sessions_lock);
348 if (err) {
349 nvgpu_err(g, "copy_to_user failed!");
350 return err;
351 }
352
353 args->sm_error_state_record_size = write_size;
354 }
355
356 return 0;
357}
358
359
360static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
361 struct dbg_session_gk20a *dbg_s,
362 struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args)
363{
364 struct gk20a *g = dbg_s->g;
365
366 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
367
368 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
369
370 dbg_s->broadcast_stop_trigger = (args->broadcast != 0);
371
372 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
373
374 return 0;
375}
376
377static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
378 int timeout_mode)
379{
380 struct gk20a *g = dbg_s->g;
381 int err = 0;
382
383 nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
384 timeout_mode);
385
386 switch (timeout_mode) {
387 case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
388 if (dbg_s->is_timeout_disabled == true)
389 nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
390 dbg_s->is_timeout_disabled = false;
391 break;
392
393 case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE:
394 if (dbg_s->is_timeout_disabled == false)
395 nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
396 dbg_s->is_timeout_disabled = true;
397 break;
398
399 default:
400 nvgpu_err(g,
401 "unrecognized dbg gpu timeout mode : 0x%x",
402 timeout_mode);
403 err = -EINVAL;
404 break;
405 }
406
407 if (!err)
408 nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, "
409 "timeouts disabled refcount %d",
410 dbg_s->is_timeout_disabled ? "true" : "false",
411 nvgpu_atomic_read(&g->timeouts_disabled_refcount));
412 return err;
413}
414
415static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
416 struct file *filp, bool is_profiler)
417{
418 struct nvgpu_os_linux *l;
419 struct dbg_session_gk20a_linux *dbg_session_linux;
420 struct dbg_session_gk20a *dbg_s;
421 struct gk20a *g;
422
423 struct device *dev;
424
425 int err;
426
427 if (!is_profiler)
428 l = container_of(inode->i_cdev,
429 struct nvgpu_os_linux, dbg.cdev);
430 else
431 l = container_of(inode->i_cdev,
432 struct nvgpu_os_linux, prof.cdev);
433 g = gk20a_get(&l->g);
434 if (!g)
435 return -ENODEV;
436
437 dev = dev_from_gk20a(g);
438
439 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name);
440
441 err = alloc_session(g, &dbg_session_linux);
442 if (err)
443 goto free_ref;
444
445 dbg_s = &dbg_session_linux->dbg_s;
446
447 filp->private_data = dbg_session_linux;
448 dbg_session_linux->dev = dev;
449 dbg_s->g = g;
450 dbg_s->is_profiler = is_profiler;
451 dbg_s->is_pg_disabled = false;
452 dbg_s->is_timeout_disabled = false;
453
454 nvgpu_cond_init(&dbg_s->dbg_events.wait_queue);
455 nvgpu_init_list_node(&dbg_s->ch_list);
456 err = nvgpu_mutex_init(&dbg_s->ch_list_lock);
457 if (err)
458 goto err_free_session;
459 err = nvgpu_mutex_init(&dbg_s->ioctl_lock);
460 if (err)
461 goto err_destroy_lock;
462 dbg_s->dbg_events.events_enabled = false;
463 dbg_s->dbg_events.num_pending_events = 0;
464
465 return 0;
466
467err_destroy_lock:
468 nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
469err_free_session:
470 nvgpu_kfree(g, dbg_session_linux);
471free_ref:
472 gk20a_put(g);
473 return err;
474}
475
476void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
477{
478 nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue);
479}
480
481static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
482 struct dbg_session_channel_data *ch_data)
483{
484 struct gk20a *g = dbg_s->g;
485 int chid;
486 struct dbg_session_data *session_data;
487 struct dbg_profiler_object_data *prof_obj, *tmp_obj;
488 struct dbg_session_channel_data_linux *ch_data_linux;
489
490 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
491
492 chid = ch_data->chid;
493
494 /* If there's a profiler ctx reservation record associated with this
495 * session/channel pair, release it.
496 */
497 nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
498 dbg_profiler_object_data, prof_obj_entry) {
499 if ((prof_obj->session_id == dbg_s->id) &&
500 (prof_obj->ch->chid == chid)) {
501 if (prof_obj->has_reservation) {
502 g->ops.dbg_session_ops.
503 release_profiler_reservation(dbg_s, prof_obj);
504 }
505 nvgpu_list_del(&prof_obj->prof_obj_entry);
506 nvgpu_kfree(g, prof_obj);
507 }
508 }
509
510 nvgpu_list_del(&ch_data->ch_entry);
511
512 session_data = ch_data->session_data;
513 nvgpu_list_del(&session_data->dbg_s_entry);
514 nvgpu_kfree(dbg_s->g, session_data);
515
516 ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux,
517 ch_data);
518
519 fput(ch_data_linux->ch_f);
520 nvgpu_kfree(dbg_s->g, ch_data_linux);
521
522 return 0;
523}
524
525static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
526 struct nvgpu_dbg_gpu_bind_channel_args *args)
527{
528 struct file *f;
529 struct gk20a *g = dbg_s->g;
530 struct channel_gk20a *ch;
531 struct dbg_session_channel_data_linux *ch_data_linux;
532 struct dbg_session_data *session_data;
533 int err = 0;
534
535 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
536 g->name, args->channel_fd);
537
538 /*
539 * Although gk20a_get_channel_from_file gives us a channel ref, need to
540 * hold a ref to the file during the session lifetime. See comment in
541 * struct dbg_session_channel_data.
542 */
543 f = fget(args->channel_fd);
544 if (!f)
545 return -ENODEV;
546
547 ch = gk20a_get_channel_from_file(args->channel_fd);
548 if (!ch) {
549 nvgpu_log_fn(g, "no channel found for fd");
550 err = -EINVAL;
551 goto out_fput;
552 }
553
554 nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid);
555
556 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
557 nvgpu_mutex_acquire(&ch->dbg_s_lock);
558
559 ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux));
560 if (!ch_data_linux) {
561 err = -ENOMEM;
562 goto out_chput;
563 }
564 ch_data_linux->ch_f = f;
565 ch_data_linux->ch_data.channel_fd = args->channel_fd;
566 ch_data_linux->ch_data.chid = ch->chid;
567 ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a;
568 nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry);
569
570 session_data = nvgpu_kzalloc(g, sizeof(*session_data));
571 if (!session_data) {
572 err = -ENOMEM;
573 goto out_kfree;
574 }
575 session_data->dbg_s = dbg_s;
576 nvgpu_init_list_node(&session_data->dbg_s_entry);
577 ch_data_linux->ch_data.session_data = session_data;
578
579 nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list);
580
581 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
582 nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list);
583 nvgpu_mutex_release(&dbg_s->ch_list_lock);
584
585 nvgpu_mutex_release(&ch->dbg_s_lock);
586 nvgpu_mutex_release(&g->dbg_sessions_lock);
587
588 gk20a_channel_put(ch);
589
590 return 0;
591
592out_kfree:
593 nvgpu_kfree(g, ch_data_linux);
594out_chput:
595 gk20a_channel_put(ch);
596 nvgpu_mutex_release(&ch->dbg_s_lock);
597 nvgpu_mutex_release(&g->dbg_sessions_lock);
598out_fput:
599 fput(f);
600 return err;
601}
602
603static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
604{
605 struct dbg_session_channel_data *ch_data, *tmp;
606 struct gk20a *g = dbg_s->g;
607
608 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
609 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
610 nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
611 dbg_session_channel_data, ch_entry)
612 ch_data->unbind_single_channel(dbg_s, ch_data);
613 nvgpu_mutex_release(&dbg_s->ch_list_lock);
614 nvgpu_mutex_release(&g->dbg_sessions_lock);
615
616 return 0;
617}
618
619/*
620 * Convert common regops op values of the form of NVGPU_DBG_REG_OP_*
621 * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
622 */
623static u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
624{
625 switch (regops_op) {
626 case REGOP(READ_32):
627 return REGOP_LINUX(READ_32);
628 case REGOP(WRITE_32):
629 return REGOP_LINUX(WRITE_32);
630 case REGOP(READ_64):
631 return REGOP_LINUX(READ_64);
632 case REGOP(WRITE_64):
633 return REGOP_LINUX(WRITE_64);
634 case REGOP(READ_08):
635 return REGOP_LINUX(READ_08);
636 case REGOP(WRITE_08):
637 return REGOP_LINUX(WRITE_08);
638 }
639
640 return regops_op;
641}
642
643/*
644 * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
645 * into common regops op values of the form of NVGPU_DBG_REG_OP_*
646 */
647static u32 nvgpu_get_regops_op_values_common(u32 regops_op)
648{
649 switch (regops_op) {
650 case REGOP_LINUX(READ_32):
651 return REGOP(READ_32);
652 case REGOP_LINUX(WRITE_32):
653 return REGOP(WRITE_32);
654 case REGOP_LINUX(READ_64):
655 return REGOP(READ_64);
656 case REGOP_LINUX(WRITE_64):
657 return REGOP(WRITE_64);
658 case REGOP_LINUX(READ_08):
659 return REGOP(READ_08);
660 case REGOP_LINUX(WRITE_08):
661 return REGOP(WRITE_08);
662 }
663
664 return regops_op;
665}
666
667/*
668 * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
669 * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
670 */
671static u32 nvgpu_get_regops_type_values_linux(u32 regops_type)
672{
673 switch (regops_type) {
674 case REGOP(TYPE_GLOBAL):
675 return REGOP_LINUX(TYPE_GLOBAL);
676 case REGOP(TYPE_GR_CTX):
677 return REGOP_LINUX(TYPE_GR_CTX);
678 case REGOP(TYPE_GR_CTX_TPC):
679 return REGOP_LINUX(TYPE_GR_CTX_TPC);
680 case REGOP(TYPE_GR_CTX_SM):
681 return REGOP_LINUX(TYPE_GR_CTX_SM);
682 case REGOP(TYPE_GR_CTX_CROP):
683 return REGOP_LINUX(TYPE_GR_CTX_CROP);
684 case REGOP(TYPE_GR_CTX_ZROP):
685 return REGOP_LINUX(TYPE_GR_CTX_ZROP);
686 case REGOP(TYPE_GR_CTX_QUAD):
687 return REGOP_LINUX(TYPE_GR_CTX_QUAD);
688 }
689
690 return regops_type;
691}
692
693/*
694 * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
695 * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
696 */
697static u32 nvgpu_get_regops_type_values_common(u32 regops_type)
698{
699 switch (regops_type) {
700 case REGOP_LINUX(TYPE_GLOBAL):
701 return REGOP(TYPE_GLOBAL);
702 case REGOP_LINUX(TYPE_GR_CTX):
703 return REGOP(TYPE_GR_CTX);
704 case REGOP_LINUX(TYPE_GR_CTX_TPC):
705 return REGOP(TYPE_GR_CTX_TPC);
706 case REGOP_LINUX(TYPE_GR_CTX_SM):
707 return REGOP(TYPE_GR_CTX_SM);
708 case REGOP_LINUX(TYPE_GR_CTX_CROP):
709 return REGOP(TYPE_GR_CTX_CROP);
710 case REGOP_LINUX(TYPE_GR_CTX_ZROP):
711 return REGOP(TYPE_GR_CTX_ZROP);
712 case REGOP_LINUX(TYPE_GR_CTX_QUAD):
713 return REGOP(TYPE_GR_CTX_QUAD);
714 }
715
716 return regops_type;
717}
718
719/*
720 * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_*
721 * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
722 */
723static u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
724{
725 switch (regops_status) {
726 case REGOP(STATUS_SUCCESS):
727 return REGOP_LINUX(STATUS_SUCCESS);
728 case REGOP(STATUS_INVALID_OP):
729 return REGOP_LINUX(STATUS_INVALID_OP);
730 case REGOP(STATUS_INVALID_TYPE):
731 return REGOP_LINUX(STATUS_INVALID_TYPE);
732 case REGOP(STATUS_INVALID_OFFSET):
733 return REGOP_LINUX(STATUS_INVALID_OFFSET);
734 case REGOP(STATUS_UNSUPPORTED_OP):
735 return REGOP_LINUX(STATUS_UNSUPPORTED_OP);
736 case REGOP(STATUS_INVALID_MASK ):
737 return REGOP_LINUX(STATUS_INVALID_MASK);
738 }
739
740 return regops_status;
741}
742
743/*
744 * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
745 * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_*
746 */
747static u32 nvgpu_get_regops_status_values_common(u32 regops_status)
748{
749 switch (regops_status) {
750 case REGOP_LINUX(STATUS_SUCCESS):
751 return REGOP(STATUS_SUCCESS);
752 case REGOP_LINUX(STATUS_INVALID_OP):
753 return REGOP(STATUS_INVALID_OP);
754 case REGOP_LINUX(STATUS_INVALID_TYPE):
755 return REGOP(STATUS_INVALID_TYPE);
756 case REGOP_LINUX(STATUS_INVALID_OFFSET):
757 return REGOP(STATUS_INVALID_OFFSET);
758 case REGOP_LINUX(STATUS_UNSUPPORTED_OP):
759 return REGOP(STATUS_UNSUPPORTED_OP);
760 case REGOP_LINUX(STATUS_INVALID_MASK ):
761 return REGOP(STATUS_INVALID_MASK);
762 }
763
764 return regops_status;
765}
766
767static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in,
768 struct nvgpu_dbg_reg_op *out, u32 num_ops)
769{
770 u32 i;
771
772 if(in == NULL || out == NULL)
773 return -ENOMEM;
774
775 for (i = 0; i < num_ops; i++) {
776 out[i].op = nvgpu_get_regops_op_values_common(in[i].op);
777 out[i].type = nvgpu_get_regops_type_values_common(in[i].type);
778 out[i].status = nvgpu_get_regops_status_values_common(in[i].status);
779 out[i].quad = in[i].quad;
780 out[i].group_mask = in[i].group_mask;
781 out[i].sub_group_mask = in[i].sub_group_mask;
782 out[i].offset = in[i].offset;
783 out[i].value_lo = in[i].value_lo;
784 out[i].value_hi = in[i].value_hi;
785 out[i].and_n_mask_lo = in[i].and_n_mask_lo;
786 out[i].and_n_mask_hi = in[i].and_n_mask_hi;
787 }
788
789 return 0;
790}
791
792static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in,
793 struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops)
794{
795 u32 i;
796
797 if(in == NULL || out == NULL)
798 return -ENOMEM;
799
800 for (i = 0; i < num_ops; i++) {
801 out[i].op = nvgpu_get_regops_op_values_linux(in[i].op);
802 out[i].type = nvgpu_get_regops_type_values_linux(in[i].type);
803 out[i].status = nvgpu_get_regops_status_values_linux(in[i].status);
804 out[i].quad = in[i].quad;
805 out[i].group_mask = in[i].group_mask;
806 out[i].sub_group_mask = in[i].sub_group_mask;
807 out[i].offset = in[i].offset;
808 out[i].value_lo = in[i].value_lo;
809 out[i].value_hi = in[i].value_hi;
810 out[i].and_n_mask_lo = in[i].and_n_mask_lo;
811 out[i].and_n_mask_hi = in[i].and_n_mask_hi;
812 }
813
814 return 0;
815}
816
817static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
818 struct nvgpu_dbg_gpu_exec_reg_ops_args *args)
819{
820 int err = 0, powergate_err = 0;
821 bool is_pg_disabled = false;
822
823 struct gk20a *g = dbg_s->g;
824 struct channel_gk20a *ch;
825
826 nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
827
828 if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) {
829 nvgpu_err(g, "regops limit exceeded");
830 return -EINVAL;
831 }
832
833 if (args->num_ops == 0) {
834 /* Nothing to do */
835 return 0;
836 }
837
838 if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
839 nvgpu_err(g, "reg ops work buffer not allocated");
840 return -ENODEV;
841 }
842
843 if (!dbg_s->id) {
844 nvgpu_err(g, "can't call reg_ops on an unbound debugger session");
845 return -EINVAL;
846 }
847
848 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
849 if (!dbg_s->is_profiler && !ch) {
850 nvgpu_err(g, "bind a channel before regops for a debugging session");
851 return -EINVAL;
852 }
853
854 /* be sure that ctx info is in place */
855 if (!g->is_virtual &&
856 !gr_context_info_available(dbg_s, &g->gr)) {
857 nvgpu_err(g, "gr context data not available");
858 return -ENODEV;
859 }
860
861 /* since exec_reg_ops sends methods to the ucode, it must take the
862 * global gpu lock to protect against mixing methods from debug sessions
863 * on other channels */
864 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
865
866 if (!dbg_s->is_pg_disabled && !g->is_virtual) {
867 /* In the virtual case, the server will handle
868 * disabling/enabling powergating when processing reg ops
869 */
870 powergate_err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
871 true);
872 is_pg_disabled = true;
873 }
874
875 if (!powergate_err) {
876 u64 ops_offset = 0; /* index offset */
877
878 struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL;
879
880 linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops *
881 sizeof(struct nvgpu_dbg_gpu_reg_op));
882
883 if (!linux_fragment)
884 return -ENOMEM;
885
886 while (ops_offset < args->num_ops && !err) {
887 const u64 num_ops =
888 min(args->num_ops - ops_offset,
889 (u64)(g->dbg_regops_tmp_buf_ops));
890 const u64 fragment_size =
891 num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op);
892
893 void __user *const fragment =
894 (void __user *)(uintptr_t)
895 (args->ops +
896 ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op));
897
898 nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu",
899 ops_offset, num_ops);
900
901 nvgpu_log_fn(g, "Copying regops from userspace");
902
903 if (copy_from_user(linux_fragment,
904 fragment, fragment_size)) {
905 nvgpu_err(g, "copy_from_user failed!");
906 err = -EFAULT;
907 break;
908 }
909
910 err = nvgpu_get_regops_data_common(linux_fragment,
911 g->dbg_regops_tmp_buf, num_ops);
912
913 if (err)
914 break;
915
916 err = g->ops.dbg_session_ops.exec_reg_ops(
917 dbg_s, g->dbg_regops_tmp_buf, num_ops);
918
919 err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf,
920 linux_fragment, num_ops);
921
922 if (err)
923 break;
924
925 nvgpu_log_fn(g, "Copying result to userspace");
926
927 if (copy_to_user(fragment, linux_fragment,
928 fragment_size)) {
929 nvgpu_err(g, "copy_to_user failed!");
930 err = -EFAULT;
931 break;
932 }
933
934 ops_offset += num_ops;
935 }
936
937 nvgpu_kfree(g, linux_fragment);
938
939 /* enable powergate, if previously disabled */
940 if (is_pg_disabled) {
941 powergate_err =
942 g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
943 false);
944 }
945 }
946
947 nvgpu_mutex_release(&g->dbg_sessions_lock);
948
949 if (!err && powergate_err)
950 err = powergate_err;
951
952 if (err)
953 nvgpu_err(g, "dbg regops failed");
954
955 return err;
956}
957
958static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
959 struct nvgpu_dbg_gpu_powergate_args *args)
960{
961 int err;
962 struct gk20a *g = dbg_s->g;
963 nvgpu_log_fn(g, "%s powergate mode = %d",
964 g->name, args->mode);
965
966 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
967 if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) {
968 err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, true);
969 } else if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE) {
970 err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false);
971 } else {
972 nvgpu_err(g, "invalid powergate mode");
973 err = -EINVAL;
974 }
975 nvgpu_mutex_release(&g->dbg_sessions_lock);
976 return err;
977}
978
979static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
980 struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args)
981{
982 int err;
983 struct gk20a *g = dbg_s->g;
984 struct channel_gk20a *ch_gk20a;
985
986 nvgpu_log_fn(g, "%s smpc ctxsw mode = %d",
987 g->name, args->mode);
988
989 err = gk20a_busy(g);
990 if (err) {
991 nvgpu_err(g, "failed to poweron");
992 return err;
993 }
994
995 /* Take the global lock, since we'll be doing global regops */
996 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
997
998 ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
999 if (!ch_gk20a) {
1000 nvgpu_err(g,
1001 "no bound channel for smpc ctxsw mode update");
1002 err = -EINVAL;
1003 goto clean_up;
1004 }
1005
1006 err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a,
1007 args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
1008 if (err) {
1009 nvgpu_err(g,
1010 "error (%d) during smpc ctxsw mode update", err);
1011 goto clean_up;
1012 }
1013
1014 err = g->ops.regops.apply_smpc_war(dbg_s);
1015 clean_up:
1016 nvgpu_mutex_release(&g->dbg_sessions_lock);
1017 gk20a_idle(g);
1018 return err;
1019}
1020
1021static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
1022 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
1023{
1024 int err;
1025 struct gk20a *g = dbg_s->g;
1026 struct channel_gk20a *ch_gk20a;
1027
1028 nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
1029
1030 /* Must have a valid reservation to enable/disable hwpm cxtsw.
1031 * Just print an error message for now, but eventually this should
1032 * return an error, at the point where all client sw has been
1033 * cleaned up.
1034 */
1035 if (!dbg_s->has_profiler_reservation) {
1036 nvgpu_err(g,
1037 "session doesn't have a valid reservation");
1038 }
1039
1040 err = gk20a_busy(g);
1041 if (err) {
1042 nvgpu_err(g, "failed to poweron");
1043 return err;
1044 }
1045
1046 /* Take the global lock, since we'll be doing global regops */
1047 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1048
1049 ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1050 if (!ch_gk20a) {
1051 nvgpu_err(g,
1052 "no bound channel for pm ctxsw mode update");
1053 err = -EINVAL;
1054 goto clean_up;
1055 }
1056 if (!dbg_s->is_pg_disabled) {
1057 nvgpu_err(g, "powergate is not disabled");
1058 err = -ENOSYS;
1059 goto clean_up;
1060 }
1061 err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
1062 args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
1063 if (err)
1064 nvgpu_err(g,
1065 "error (%d) during pm ctxsw mode update", err);
1066 /* gk20a would require a WAR to set the core PM_ENABLE bit, not
1067 * added here with gk20a being deprecated
1068 */
1069 clean_up:
1070 nvgpu_mutex_release(&g->dbg_sessions_lock);
1071 gk20a_idle(g);
1072 return err;
1073}
1074
1075static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
1076 struct dbg_session_gk20a *dbg_s,
1077 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
1078{
1079 struct gk20a *g = dbg_s->g;
1080 struct channel_gk20a *ch;
1081 int err = 0, action = args->mode;
1082
1083 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
1084
1085 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1086 if (!ch)
1087 return -EINVAL;
1088
1089 err = gk20a_busy(g);
1090 if (err) {
1091 nvgpu_err(g, "failed to poweron");
1092 return err;
1093 }
1094
1095 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1096
1097 /* Suspend GPU context switching */
1098 err = gr_gk20a_disable_ctxsw(g);
1099 if (err) {
1100 nvgpu_err(g, "unable to stop gr ctxsw");
1101 /* this should probably be ctx-fatal... */
1102 goto clean_up;
1103 }
1104
1105 switch (action) {
1106 case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
1107 gr_gk20a_suspend_context(ch);
1108 break;
1109
1110 case NVGPU_DBG_GPU_RESUME_ALL_SMS:
1111 gr_gk20a_resume_context(ch);
1112 break;
1113 }
1114
1115 err = gr_gk20a_enable_ctxsw(g);
1116 if (err)
1117 nvgpu_err(g, "unable to restart ctxsw!");
1118
1119clean_up:
1120 nvgpu_mutex_release(&g->dbg_sessions_lock);
1121 gk20a_idle(g);
1122
1123 return err;
1124}
1125
1126static int nvgpu_ioctl_allocate_profiler_object(
1127 struct dbg_session_gk20a_linux *dbg_session_linux,
1128 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
1129{
1130 int err = 0;
1131 struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
1132 struct gk20a *g = get_gk20a(dbg_session_linux->dev);
1133 struct dbg_profiler_object_data *prof_obj;
1134
1135 nvgpu_log_fn(g, "%s", g->name);
1136
1137 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1138
1139 err = alloc_profiler(g, &prof_obj);
1140 if (err)
1141 goto clean_up;
1142
1143 prof_obj->session_id = dbg_s->id;
1144
1145 if (dbg_s->is_profiler)
1146 prof_obj->ch = NULL;
1147 else {
1148 prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1149 if (prof_obj->ch == NULL) {
1150 nvgpu_err(g,
1151 "bind a channel for dbg session");
1152 nvgpu_kfree(g, prof_obj);
1153 err = -EINVAL;
1154 goto clean_up;
1155 }
1156 }
1157
1158 /* Return handle to client */
1159 args->profiler_handle = prof_obj->prof_handle;
1160
1161 nvgpu_init_list_node(&prof_obj->prof_obj_entry);
1162
1163 nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
1164clean_up:
1165 nvgpu_mutex_release(&g->dbg_sessions_lock);
1166 return err;
1167}
1168
1169static int nvgpu_ioctl_free_profiler_object(
1170 struct dbg_session_gk20a_linux *dbg_s_linux,
1171 struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
1172{
1173 int err = 0;
1174 struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
1175 struct gk20a *g = get_gk20a(dbg_s_linux->dev);
1176 struct dbg_profiler_object_data *prof_obj, *tmp_obj;
1177 bool obj_found = false;
1178
1179 nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x",
1180 g->name, dbg_s->id, args->profiler_handle);
1181
1182 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1183
1184 /* Remove profiler object from the list, if a match is found */
1185 nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
1186 dbg_profiler_object_data, prof_obj_entry) {
1187 if (prof_obj->prof_handle == args->profiler_handle) {
1188 if (prof_obj->session_id != dbg_s->id) {
1189 nvgpu_err(g,
1190 "invalid handle %x",
1191 args->profiler_handle);
1192 err = -EINVAL;
1193 break;
1194 }
1195 if (prof_obj->has_reservation)
1196 g->ops.dbg_session_ops.
1197 release_profiler_reservation(dbg_s, prof_obj);
1198 nvgpu_list_del(&prof_obj->prof_obj_entry);
1199 nvgpu_kfree(g, prof_obj);
1200 obj_found = true;
1201 break;
1202 }
1203 }
1204 if (!obj_found) {
1205 nvgpu_err(g, "profiler %x not found",
1206 args->profiler_handle);
1207 err = -EINVAL;
1208 }
1209
1210 nvgpu_mutex_release(&g->dbg_sessions_lock);
1211 return err;
1212}
1213
1214static struct dbg_profiler_object_data *find_matching_prof_obj(
1215 struct dbg_session_gk20a *dbg_s,
1216 u32 profiler_handle)
1217{
1218 struct gk20a *g = dbg_s->g;
1219 struct dbg_profiler_object_data *prof_obj;
1220
1221 nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
1222 dbg_profiler_object_data, prof_obj_entry) {
1223 if (prof_obj->prof_handle == profiler_handle) {
1224 if (prof_obj->session_id != dbg_s->id) {
1225 nvgpu_err(g,
1226 "invalid handle %x",
1227 profiler_handle);
1228 return NULL;
1229 }
1230 return prof_obj;
1231 }
1232 }
1233 return NULL;
1234}
1235
1236/* used in scenarios where the debugger session can take just the inter-session
1237 * lock for performance, but the profiler session must take the per-gpu lock
1238 * since it might not have an associated channel. */
1239static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s)
1240{
1241 struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1242
1243 if (dbg_s->is_profiler || !ch)
1244 nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock);
1245 else
1246 nvgpu_mutex_acquire(&ch->dbg_s_lock);
1247}
1248
1249static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s)
1250{
1251 struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1252
1253 if (dbg_s->is_profiler || !ch)
1254 nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock);
1255 else
1256 nvgpu_mutex_release(&ch->dbg_s_lock);
1257}
1258
1259static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
1260{
1261 struct gk20a *g = dbg_s->g;
1262
1263 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1264
1265 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
1266
1267 dbg_s->dbg_events.events_enabled = true;
1268 dbg_s->dbg_events.num_pending_events = 0;
1269
1270 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
1271}
1272
1273static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
1274{
1275 struct gk20a *g = dbg_s->g;
1276
1277 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1278
1279 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
1280
1281 dbg_s->dbg_events.events_enabled = false;
1282 dbg_s->dbg_events.num_pending_events = 0;
1283
1284 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
1285}
1286
1287static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
1288{
1289 struct gk20a *g = dbg_s->g;
1290
1291 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1292
1293 gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
1294
1295 if (dbg_s->dbg_events.events_enabled &&
1296 dbg_s->dbg_events.num_pending_events > 0)
1297 dbg_s->dbg_events.num_pending_events--;
1298
1299 gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
1300}
1301
1302
1303static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
1304 struct nvgpu_dbg_gpu_events_ctrl_args *args)
1305{
1306 int ret = 0;
1307 struct channel_gk20a *ch;
1308 struct gk20a *g = dbg_s->g;
1309
1310 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
1311
1312 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1313 if (!ch) {
1314 nvgpu_err(g, "no channel bound to dbg session");
1315 return -EINVAL;
1316 }
1317
1318 switch (args->cmd) {
1319 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
1320 gk20a_dbg_gpu_events_enable(dbg_s);
1321 break;
1322
1323 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
1324 gk20a_dbg_gpu_events_disable(dbg_s);
1325 break;
1326
1327 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
1328 gk20a_dbg_gpu_events_clear(dbg_s);
1329 break;
1330
1331 default:
1332 nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x",
1333 args->cmd);
1334 ret = -EINVAL;
1335 break;
1336 }
1337
1338 return ret;
1339}
1340
1341static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1342 struct nvgpu_dbg_gpu_perfbuf_map_args *args)
1343{
1344 struct gk20a *g = dbg_s->g;
1345 struct mm_gk20a *mm = &g->mm;
1346 int err;
1347 u32 virt_size;
1348 u32 big_page_size = g->ops.mm.get_default_big_page_size();
1349
1350 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1351
1352 if (g->perfbuf.owner) {
1353 nvgpu_mutex_release(&g->dbg_sessions_lock);
1354 return -EBUSY;
1355 }
1356
1357 mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
1358 big_page_size << 10,
1359 NV_MM_DEFAULT_KERNEL_SIZE,
1360 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
1361 false, false, "perfbuf");
1362 if (!mm->perfbuf.vm) {
1363 nvgpu_mutex_release(&g->dbg_sessions_lock);
1364 return -ENOMEM;
1365 }
1366
1367 err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
1368 args->dmabuf_fd,
1369 &args->offset,
1370 0,
1371 0,
1372 0,
1373 0,
1374 args->mapping_size,
1375 NULL);
1376 if (err)
1377 goto err_remove_vm;
1378
1379 /* perf output buffer may not cross a 4GB boundary */
1380 virt_size = u64_lo32(args->mapping_size);
1381 if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
1382 err = -EINVAL;
1383 goto err_unmap;
1384 }
1385
1386 err = g->ops.dbg_session_ops.perfbuffer_enable(g,
1387 args->offset, virt_size);
1388 if (err)
1389 goto err_unmap;
1390
1391 g->perfbuf.owner = dbg_s;
1392 g->perfbuf.offset = args->offset;
1393 nvgpu_mutex_release(&g->dbg_sessions_lock);
1394
1395 return 0;
1396
1397err_unmap:
1398 nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
1399err_remove_vm:
1400 nvgpu_vm_put(mm->perfbuf.vm);
1401 nvgpu_mutex_release(&g->dbg_sessions_lock);
1402 return err;
1403}
1404
1405static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
1406 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
1407{
1408 struct gk20a *g = dbg_s->g;
1409 int err;
1410
1411 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1412 if ((g->perfbuf.owner != dbg_s) ||
1413 (g->perfbuf.offset != args->offset)) {
1414 nvgpu_mutex_release(&g->dbg_sessions_lock);
1415 return -EINVAL;
1416 }
1417
1418 err = gk20a_perfbuf_release_locked(g, args->offset);
1419
1420 nvgpu_mutex_release(&g->dbg_sessions_lock);
1421
1422 return err;
1423}
1424
1425static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
1426 struct nvgpu_dbg_gpu_pc_sampling_args *args)
1427{
1428 struct channel_gk20a *ch;
1429 struct gk20a *g = dbg_s->g;
1430
1431 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1432 if (!ch)
1433 return -EINVAL;
1434
1435 nvgpu_log_fn(g, " ");
1436
1437 return g->ops.gr.update_pc_sampling ?
1438 g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
1439}
1440
1441static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
1442 struct dbg_session_gk20a *dbg_s,
1443 struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args)
1444{
1445 struct gk20a *g = dbg_s->g;
1446 struct gr_gk20a *gr = &g->gr;
1447 u32 sm_id;
1448 struct channel_gk20a *ch;
1449 int err = 0;
1450
1451 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
1452 if (!ch)
1453 return -EINVAL;
1454
1455 sm_id = args->sm_id;
1456 if (sm_id >= gr->no_of_sm)
1457 return -EINVAL;
1458
1459 nvgpu_speculation_barrier();
1460
1461 err = gk20a_busy(g);
1462 if (err)
1463 return err;
1464
1465 err = gr_gk20a_elpg_protected_call(g,
1466 g->ops.gr.clear_sm_error_state(g, ch, sm_id));
1467
1468 gk20a_idle(g);
1469
1470 return err;
1471}
1472
1473static int
1474nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
1475 struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args)
1476{
1477 struct gk20a *g = dbg_s->g;
1478 int err = 0;
1479 int ctx_resident_ch_fd = -1;
1480
1481 err = gk20a_busy(g);
1482 if (err)
1483 return err;
1484
1485 switch (args->action) {
1486 case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
1487 err = g->ops.gr.suspend_contexts(g, dbg_s,
1488 &ctx_resident_ch_fd);
1489 break;
1490
1491 case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS:
1492 err = g->ops.gr.resume_contexts(g, dbg_s,
1493 &ctx_resident_ch_fd);
1494 break;
1495 }
1496
1497 if (ctx_resident_ch_fd < 0) {
1498 args->is_resident_context = 0;
1499 } else {
1500 args->is_resident_context = 1;
1501 args->resident_context_fd = ctx_resident_ch_fd;
1502 }
1503
1504 gk20a_idle(g);
1505
1506 return err;
1507}
1508
1509static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
1510 struct nvgpu_dbg_gpu_access_fb_memory_args *args)
1511{
1512 struct gk20a *g = dbg_s->g;
1513 struct dma_buf *dmabuf;
1514 void __user *user_buffer = (void __user *)(uintptr_t)args->buffer;
1515 void *buffer;
1516 u64 size, access_size, offset;
1517 u64 access_limit_size = SZ_4K;
1518 int err = 0;
1519
1520 if ((args->offset & 3) || (!args->size) || (args->size & 3))
1521 return -EINVAL;
1522
1523 dmabuf = dma_buf_get(args->dmabuf_fd);
1524 if (IS_ERR(dmabuf))
1525 return -EINVAL;
1526
1527 if ((args->offset > dmabuf->size) ||
1528 (args->size > dmabuf->size) ||
1529 (args->offset + args->size > dmabuf->size)) {
1530 err = -EINVAL;
1531 goto fail_dmabuf_put;
1532 }
1533
1534 buffer = nvgpu_big_zalloc(g, access_limit_size);
1535 if (!buffer) {
1536 err = -ENOMEM;
1537 goto fail_dmabuf_put;
1538 }
1539
1540 size = args->size;
1541 offset = 0;
1542
1543 err = gk20a_busy(g);
1544 if (err)
1545 goto fail_free_buffer;
1546
1547 while (size) {
1548 /* Max access size of access_limit_size in one loop */
1549 access_size = min(access_limit_size, size);
1550
1551 if (args->cmd ==
1552 NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) {
1553 err = copy_from_user(buffer, user_buffer + offset,
1554 access_size);
1555 if (err)
1556 goto fail_idle;
1557 }
1558
1559 err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer,
1560 args->offset + offset, access_size,
1561 args->cmd);
1562 if (err)
1563 goto fail_idle;
1564
1565 if (args->cmd ==
1566 NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) {
1567 err = copy_to_user(user_buffer + offset,
1568 buffer, access_size);
1569 if (err)
1570 goto fail_idle;
1571 }
1572
1573 size -= access_size;
1574 offset += access_size;
1575 }
1576
1577fail_idle:
1578 gk20a_idle(g);
1579fail_free_buffer:
1580 nvgpu_big_free(g, buffer);
1581fail_dmabuf_put:
1582 dma_buf_put(dmabuf);
1583
1584 return err;
1585}
1586
1587static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
1588 struct nvgpu_dbg_gpu_profiler_reserve_args *args)
1589{
1590 if (args->acquire)
1591 return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
1592
1593 return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
1594}
1595
1596static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
1597 struct nvgpu_dbg_gpu_timeout_args *args)
1598{
1599 bool status;
1600 struct gk20a *g = dbg_s->g;
1601
1602 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1603 status = nvgpu_is_timeouts_enabled(g);
1604 nvgpu_mutex_release(&g->dbg_sessions_lock);
1605
1606 if (status)
1607 args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE;
1608 else
1609 args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE;
1610}
1611
1612/* In order to perform a context relative op the context has
1613 * to be created already... which would imply that the
1614 * context switch mechanism has already been put in place.
1615 * So by the time we perform such an opertation it should always
1616 * be possible to query for the appropriate context offsets, etc.
1617 *
1618 * But note: while the dbg_gpu bind requires the a channel fd,
1619 * it doesn't require an allocated gr/compute obj at that point...
1620 */
1621static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
1622 struct gr_gk20a *gr)
1623{
1624 int err;
1625
1626 nvgpu_mutex_acquire(&gr->ctx_mutex);
1627 err = !gr->ctx_vars.golden_image_initialized;
1628 nvgpu_mutex_release(&gr->ctx_mutex);
1629 if (err)
1630 return false;
1631 return true;
1632
1633}
1634
1635static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
1636{
1637 struct mm_gk20a *mm = &g->mm;
1638 struct vm_gk20a *vm = mm->perfbuf.vm;
1639 int err;
1640
1641 err = g->ops.dbg_session_ops.perfbuffer_disable(g);
1642
1643 nvgpu_vm_unmap(vm, offset, NULL);
1644 nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
1645 nvgpu_vm_put(vm);
1646
1647 g->perfbuf.owner = NULL;
1648 g->perfbuf.offset = 0;
1649 return err;
1650}
1651
1652static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
1653 u32 profiler_handle)
1654{
1655 struct gk20a *g = dbg_s->g;
1656 struct dbg_profiler_object_data *prof_obj;
1657 int err = 0;
1658
1659 nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
1660
1661 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1662
1663 /* Find matching object. */
1664 prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
1665
1666 if (!prof_obj) {
1667 nvgpu_err(g, "object not found");
1668 err = -EINVAL;
1669 goto exit;
1670 }
1671
1672 if (prof_obj->has_reservation)
1673 g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj);
1674 else {
1675 nvgpu_err(g, "No reservation found");
1676 err = -EINVAL;
1677 goto exit;
1678 }
1679exit:
1680 nvgpu_mutex_release(&g->dbg_sessions_lock);
1681 return err;
1682}
1683
1684static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
1685 u32 profiler_handle)
1686{
1687 struct gk20a *g = dbg_s->g;
1688 struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
1689 int err = 0;
1690
1691 nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
1692
1693 if (g->profiler_reservation_count < 0) {
1694 nvgpu_err(g, "Negative reservation count!");
1695 return -EINVAL;
1696 }
1697
1698 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1699
1700 /* Find matching object. */
1701 my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
1702
1703 if (!my_prof_obj) {
1704 nvgpu_err(g, "object not found");
1705 err = -EINVAL;
1706 goto exit;
1707 }
1708
1709 /* If we already have the reservation, we're done */
1710 if (my_prof_obj->has_reservation) {
1711 err = 0;
1712 goto exit;
1713 }
1714
1715 if (my_prof_obj->ch == NULL) {
1716 /* Global reservations are only allowed if there are no other
1717 * global or per-context reservations currently held
1718 */
1719 if (!g->ops.dbg_session_ops.check_and_set_global_reservation(
1720 dbg_s, my_prof_obj)) {
1721 nvgpu_err(g,
1722 "global reserve: have existing reservation");
1723 err = -EBUSY;
1724 }
1725 } else if (g->global_profiler_reservation_held) {
1726 /* If there's a global reservation,
1727 * we can't take a per-context one.
1728 */
1729 nvgpu_err(g,
1730 "per-ctxt reserve: global reservation in effect");
1731 err = -EBUSY;
1732 } else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) {
1733 /* TSG: check that another channel in the TSG
1734 * doesn't already have the reservation
1735 */
1736 int my_tsgid = my_prof_obj->ch->tsgid;
1737
1738 nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
1739 dbg_profiler_object_data, prof_obj_entry) {
1740 if (prof_obj->has_reservation &&
1741 (prof_obj->ch->tsgid == my_tsgid)) {
1742 nvgpu_err(g,
1743 "per-ctxt reserve (tsg): already reserved");
1744 err = -EBUSY;
1745 goto exit;
1746 }
1747 }
1748
1749 if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
1750 dbg_s, my_prof_obj)) {
1751 /* Another guest OS has the global reservation */
1752 nvgpu_err(g,
1753 "per-ctxt reserve: global reservation in effect");
1754 err = -EBUSY;
1755 }
1756 } else {
1757 /* channel: check that some other profiler object doesn't
1758 * already have the reservation.
1759 */
1760 struct channel_gk20a *my_ch = my_prof_obj->ch;
1761
1762 nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
1763 dbg_profiler_object_data, prof_obj_entry) {
1764 if (prof_obj->has_reservation &&
1765 (prof_obj->ch == my_ch)) {
1766 nvgpu_err(g,
1767 "per-ctxt reserve (ch): already reserved");
1768 err = -EBUSY;
1769 goto exit;
1770 }
1771 }
1772
1773 if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
1774 dbg_s, my_prof_obj)) {
1775 /* Another guest OS has the global reservation */
1776 nvgpu_err(g,
1777 "per-ctxt reserve: global reservation in effect");
1778 err = -EBUSY;
1779 }
1780 }
1781exit:
1782 nvgpu_mutex_release(&g->dbg_sessions_lock);
1783 return err;
1784}
1785
1786static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
1787 struct nvgpu_dbg_gpu_unbind_channel_args *args)
1788{
1789 struct dbg_session_channel_data *ch_data;
1790 struct gk20a *g = dbg_s->g;
1791 bool channel_found = false;
1792 struct channel_gk20a *ch;
1793 int err;
1794
1795 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
1796 g->name, args->channel_fd);
1797
1798 ch = gk20a_get_channel_from_file(args->channel_fd);
1799 if (!ch) {
1800 nvgpu_log_fn(g, "no channel found for fd");
1801 return -EINVAL;
1802 }
1803
1804 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
1805 nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
1806 dbg_session_channel_data, ch_entry) {
1807 if (ch->chid == ch_data->chid) {
1808 channel_found = true;
1809 break;
1810 }
1811 }
1812 nvgpu_mutex_release(&dbg_s->ch_list_lock);
1813
1814 if (!channel_found) {
1815 nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd);
1816 err = -EINVAL;
1817 goto out;
1818 }
1819
1820 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1821 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
1822 err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
1823 nvgpu_mutex_release(&dbg_s->ch_list_lock);
1824 nvgpu_mutex_release(&g->dbg_sessions_lock);
1825
1826out:
1827 gk20a_channel_put(ch);
1828 return err;
1829}
1830
1831int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
1832{
1833 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
1834 struct nvgpu_os_linux, dbg.cdev);
1835 struct gk20a *g = &l->g;
1836
1837 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1838 return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
1839}
1840
1841long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
1842 unsigned long arg)
1843{
1844 struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data;
1845 struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
1846 struct gk20a *g = dbg_s->g;
1847 u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE];
1848 int err = 0;
1849
1850 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
1851
1852 if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) ||
1853 (_IOC_NR(cmd) == 0) ||
1854 (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) ||
1855 (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE))
1856 return -EINVAL;
1857
1858 memset(buf, 0, sizeof(buf));
1859 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1860 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1861 return -EFAULT;
1862 }
1863
1864 if (!g->sw_ready) {
1865 err = gk20a_busy(g);
1866 if (err)
1867 return err;
1868
1869 gk20a_idle(g);
1870 }
1871
1872 /* protect from threaded user space calls */
1873 nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
1874
1875 switch (cmd) {
1876 case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
1877 err = dbg_bind_channel_gk20a(dbg_s,
1878 (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
1879 break;
1880
1881 case NVGPU_DBG_GPU_IOCTL_REG_OPS:
1882 err = nvgpu_ioctl_channel_reg_ops(dbg_s,
1883 (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
1884 break;
1885
1886 case NVGPU_DBG_GPU_IOCTL_POWERGATE:
1887 err = nvgpu_ioctl_powergate_gk20a(dbg_s,
1888 (struct nvgpu_dbg_gpu_powergate_args *)buf);
1889 break;
1890
1891 case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
1892 err = gk20a_dbg_gpu_events_ctrl(dbg_s,
1893 (struct nvgpu_dbg_gpu_events_ctrl_args *)buf);
1894 break;
1895
1896 case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
1897 err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
1898 (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
1899 break;
1900
1901 case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE:
1902 err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s,
1903 (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf);
1904 break;
1905
1906 case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
1907 err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
1908 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
1909 break;
1910
1911 case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
1912 err = gk20a_perfbuf_map(dbg_s,
1913 (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
1914 break;
1915
1916 case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
1917 err = gk20a_perfbuf_unmap(dbg_s,
1918 (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
1919 break;
1920
1921 case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
1922 err = gk20a_dbg_pc_sampling(dbg_s,
1923 (struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
1924 break;
1925
1926 case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE:
1927 err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s,
1928 (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf);
1929 break;
1930
1931 case NVGPU_DBG_GPU_IOCTL_TIMEOUT:
1932 err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s,
1933 (struct nvgpu_dbg_gpu_timeout_args *)buf);
1934 break;
1935
1936 case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT:
1937 nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s,
1938 (struct nvgpu_dbg_gpu_timeout_args *)buf);
1939 break;
1940
1941 case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
1942 err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
1943 (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
1944 break;
1945
1946 case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE:
1947 err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s,
1948 (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf);
1949 break;
1950
1951 case NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE:
1952 err = nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(dbg_s,
1953 (struct nvgpu_dbg_gpu_write_single_sm_error_state_args *)buf);
1954 break;
1955
1956 case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL:
1957 err = dbg_unbind_channel_gk20a(dbg_s,
1958 (struct nvgpu_dbg_gpu_unbind_channel_args *)buf);
1959 break;
1960
1961 case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS:
1962 err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s,
1963 (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf);
1964 break;
1965
1966 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY:
1967 err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s,
1968 (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
1969 break;
1970
1971 case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
1972 err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux,
1973 (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
1974 break;
1975
1976 case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
1977 err = nvgpu_ioctl_free_profiler_object(dbg_s_linux,
1978 (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
1979 break;
1980
1981 case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
1982 err = nvgpu_ioctl_profiler_reserve(dbg_s,
1983 (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
1984 break;
1985
1986 default:
1987 nvgpu_err(g,
1988 "unrecognized dbg gpu ioctl cmd: 0x%x",
1989 cmd);
1990 err = -ENOTTY;
1991 break;
1992 }
1993
1994 nvgpu_mutex_release(&dbg_s->ioctl_lock);
1995
1996 nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
1997
1998 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
1999 err = copy_to_user((void __user *)arg,
2000 buf, _IOC_SIZE(cmd));
2001
2002 return err;
2003}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h
deleted file mode 100644
index bd76045b..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h
+++ /dev/null
@@ -1,54 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger Driver
3 *
4 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#ifndef DBG_GPU_IOCTL_GK20A_H
19#define DBG_GPU_IOCTL_GK20A_H
20#include <linux/poll.h>
21
22#include "gk20a/dbg_gpu_gk20a.h"
23
24/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
25 * of regops */
26#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
27
28struct dbg_session_gk20a_linux {
29 struct device *dev;
30 struct dbg_session_gk20a dbg_s;
31};
32
33struct dbg_session_channel_data_linux {
34 /*
35 * We have to keep a ref to the _file_, not the channel, because
36 * close(channel_fd) is synchronous and would deadlock if we had an
37 * open debug session fd holding a channel ref at that time. Holding a
38 * ref to the file makes close(channel_fd) just drop a kernel ref to
39 * the file; the channel will close when the last file ref is dropped.
40 */
41 struct file *ch_f;
42 struct dbg_session_channel_data ch_data;
43};
44
45/* module debug driver interface */
46int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
47int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
48long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
49unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
50
51/* used by profiler driver interface */
52int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
53
54#endif \ No newline at end of file
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
deleted file mode 100644
index 4ef99ded..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
+++ /dev/null
@@ -1,677 +0,0 @@
1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/cdev.h>
20#include <linux/uaccess.h>
21#include <linux/poll.h>
22#include <uapi/linux/nvgpu.h>
23#include <linux/anon_inodes.h>
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/log.h>
27#include <nvgpu/os_sched.h>
28
29#include "gk20a/gk20a.h"
30#include "gk20a/tsg_gk20a.h"
31#include "gv11b/fifo_gv11b.h"
32#include "platform_gk20a.h"
33#include "ioctl_tsg.h"
34#include "ioctl_channel.h"
35#include "os_linux.h"
36
37struct tsg_private {
38 struct gk20a *g;
39 struct tsg_gk20a *tsg;
40};
41
42static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
43{
44 struct channel_gk20a *ch;
45 int err;
46
47 ch = gk20a_get_channel_from_file(ch_fd);
48 if (!ch)
49 return -EINVAL;
50
51 err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
52
53 gk20a_channel_put(ch);
54 return err;
55}
56
57static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
58 struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg)
59{
60 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
61 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
62 struct channel_gk20a *ch;
63 struct gr_gk20a *gr = &g->gr;
64 int err = 0;
65
66 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
67
68 nvgpu_mutex_acquire(&sched->control_lock);
69 if (sched->control_locked) {
70 err = -EPERM;
71 goto mutex_release;
72 }
73 err = gk20a_busy(g);
74 if (err) {
75 nvgpu_err(g, "failed to power on gpu");
76 goto mutex_release;
77 }
78
79 ch = gk20a_get_channel_from_file(arg->channel_fd);
80 if (!ch) {
81 err = -EINVAL;
82 goto idle;
83 }
84
85 if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
86 if ((arg->num_active_tpcs > gr->max_tpc_count) ||
87 !(arg->num_active_tpcs)) {
88 nvgpu_err(g, "Invalid num of active TPCs");
89 err = -EINVAL;
90 goto ch_put;
91 }
92 tsg->tpc_num_initialized = true;
93 tsg->num_active_tpcs = arg->num_active_tpcs;
94 tsg->tpc_pg_enabled = true;
95 } else {
96 tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
97 }
98
99 if (arg->subcontext_id < g->fifo.max_subctx_count) {
100 ch->subctx_id = arg->subcontext_id;
101 } else {
102 err = -EINVAL;
103 goto ch_put;
104 }
105
106 nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
107 ch->chid, ch->subctx_id);
108
109 /* Use runqueue selector 1 for all ASYNC ids */
110 if (ch->subctx_id > CHANNEL_INFO_VEID0)
111 ch->runqueue_sel = 1;
112
113 err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
114ch_put:
115 gk20a_channel_put(ch);
116idle:
117 gk20a_idle(g);
118mutex_release:
119 nvgpu_mutex_release(&sched->control_lock);
120 return err;
121}
122
123static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
124{
125 struct channel_gk20a *ch;
126 int err = 0;
127
128 ch = gk20a_get_channel_from_file(ch_fd);
129 if (!ch)
130 return -EINVAL;
131
132 if (ch->tsgid != tsg->tsgid) {
133 err = -EINVAL;
134 goto out;
135 }
136
137 err = gk20a_tsg_unbind_channel(ch);
138
139 /*
140 * Mark the channel timedout since channel unbound from TSG
141 * has no context of its own so it can't serve any job
142 */
143 ch->has_timedout = true;
144
145out:
146 gk20a_channel_put(ch);
147 return err;
148}
149
150static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
151 unsigned int event_id,
152 struct gk20a_event_id_data **event_id_data)
153{
154 struct gk20a_event_id_data *local_event_id_data;
155 bool event_found = false;
156
157 nvgpu_mutex_acquire(&tsg->event_id_list_lock);
158 nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
159 gk20a_event_id_data, event_id_node) {
160 if (local_event_id_data->event_id == event_id) {
161 event_found = true;
162 break;
163 }
164 }
165 nvgpu_mutex_release(&tsg->event_id_list_lock);
166
167 if (event_found) {
168 *event_id_data = local_event_id_data;
169 return 0;
170 } else {
171 return -1;
172 }
173}
174
175/*
176 * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
177 * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
178 */
179static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id)
180{
181 switch (event_id) {
182 case NVGPU_EVENT_ID_BPT_INT:
183 return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
184 case NVGPU_EVENT_ID_BPT_PAUSE:
185 return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
186 case NVGPU_EVENT_ID_BLOCKING_SYNC:
187 return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
188 case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
189 return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
190 case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
191 return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
192 case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
193 return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
194 }
195
196 return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
197}
198
199void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
200 int __event_id)
201{
202 struct gk20a_event_id_data *event_id_data;
203 u32 event_id;
204 int err = 0;
205 struct gk20a *g = tsg->g;
206
207 event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id);
208 if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
209 return;
210
211 err = gk20a_tsg_get_event_data_from_id(tsg, event_id,
212 &event_id_data);
213 if (err)
214 return;
215
216 nvgpu_mutex_acquire(&event_id_data->lock);
217
218 nvgpu_log_info(g,
219 "posting event for event_id=%d on tsg=%d\n",
220 event_id, tsg->tsgid);
221 event_id_data->event_posted = true;
222
223 nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq);
224
225 nvgpu_mutex_release(&event_id_data->lock);
226}
227
228static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
229{
230 unsigned int mask = 0;
231 struct gk20a_event_id_data *event_id_data = filep->private_data;
232 struct gk20a *g = event_id_data->g;
233 u32 event_id = event_id_data->event_id;
234 struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
235
236 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
237
238 poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
239
240 nvgpu_mutex_acquire(&event_id_data->lock);
241
242 if (event_id_data->event_posted) {
243 nvgpu_log_info(g,
244 "found pending event_id=%d on TSG=%d\n",
245 event_id, tsg->tsgid);
246 mask = (POLLPRI | POLLIN);
247 event_id_data->event_posted = false;
248 }
249
250 nvgpu_mutex_release(&event_id_data->lock);
251
252 return mask;
253}
254
255static int gk20a_event_id_release(struct inode *inode, struct file *filp)
256{
257 struct gk20a_event_id_data *event_id_data = filp->private_data;
258 struct gk20a *g = event_id_data->g;
259 struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
260
261 nvgpu_mutex_acquire(&tsg->event_id_list_lock);
262 nvgpu_list_del(&event_id_data->event_id_node);
263 nvgpu_mutex_release(&tsg->event_id_list_lock);
264
265 nvgpu_mutex_destroy(&event_id_data->lock);
266 gk20a_put(g);
267 nvgpu_kfree(g, event_id_data);
268 filp->private_data = NULL;
269
270 return 0;
271}
272
273const struct file_operations gk20a_event_id_ops = {
274 .owner = THIS_MODULE,
275 .poll = gk20a_event_id_poll,
276 .release = gk20a_event_id_release,
277};
278
279static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
280 int event_id,
281 int *fd)
282{
283 int err = 0;
284 int local_fd;
285 struct file *file;
286 char name[64];
287 struct gk20a_event_id_data *event_id_data;
288 struct gk20a *g;
289
290 g = gk20a_get(tsg->g);
291 if (!g)
292 return -ENODEV;
293
294 err = gk20a_tsg_get_event_data_from_id(tsg,
295 event_id, &event_id_data);
296 if (err == 0) {
297 /* We already have event enabled */
298 err = -EINVAL;
299 goto free_ref;
300 }
301
302 err = get_unused_fd_flags(O_RDWR);
303 if (err < 0)
304 goto free_ref;
305 local_fd = err;
306
307 snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
308 event_id, local_fd);
309
310 file = anon_inode_getfile(name, &gk20a_event_id_ops,
311 NULL, O_RDWR);
312 if (IS_ERR(file)) {
313 err = PTR_ERR(file);
314 goto clean_up;
315 }
316
317 event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
318 if (!event_id_data) {
319 err = -ENOMEM;
320 goto clean_up_file;
321 }
322 event_id_data->g = g;
323 event_id_data->id = tsg->tsgid;
324 event_id_data->event_id = event_id;
325
326 nvgpu_cond_init(&event_id_data->event_id_wq);
327 err = nvgpu_mutex_init(&event_id_data->lock);
328 if (err)
329 goto clean_up_free;
330
331 nvgpu_init_list_node(&event_id_data->event_id_node);
332
333 nvgpu_mutex_acquire(&tsg->event_id_list_lock);
334 nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
335 nvgpu_mutex_release(&tsg->event_id_list_lock);
336
337 fd_install(local_fd, file);
338 file->private_data = event_id_data;
339
340 *fd = local_fd;
341
342 return 0;
343
344clean_up_free:
345 nvgpu_kfree(g, event_id_data);
346clean_up_file:
347 fput(file);
348clean_up:
349 put_unused_fd(local_fd);
350free_ref:
351 gk20a_put(g);
352 return err;
353}
354
355static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
356 struct nvgpu_event_id_ctrl_args *args)
357{
358 int err = 0;
359 int fd = -1;
360
361 if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
362 return -EINVAL;
363
364 switch (args->cmd) {
365 case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
366 err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
367 if (!err)
368 args->event_fd = fd;
369 break;
370
371 default:
372 nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
373 args->cmd);
374 err = -EINVAL;
375 break;
376 }
377
378 return err;
379}
380
381int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp)
382{
383 struct tsg_private *priv;
384 struct tsg_gk20a *tsg;
385 struct device *dev;
386 int err;
387
388 g = gk20a_get(g);
389 if (!g)
390 return -ENODEV;
391
392 dev = dev_from_gk20a(g);
393
394 nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
395
396 priv = nvgpu_kmalloc(g, sizeof(*priv));
397 if (!priv) {
398 err = -ENOMEM;
399 goto free_ref;
400 }
401
402 tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
403 if (!tsg) {
404 nvgpu_kfree(g, priv);
405 err = -ENOMEM;
406 goto free_ref;
407 }
408
409 priv->g = g;
410 priv->tsg = tsg;
411 filp->private_data = priv;
412
413 gk20a_sched_ctrl_tsg_added(g, tsg);
414
415 return 0;
416
417free_ref:
418 gk20a_put(g);
419 return err;
420}
421
422int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
423{
424 struct nvgpu_os_linux *l;
425 struct gk20a *g;
426 int ret;
427
428 l = container_of(inode->i_cdev,
429 struct nvgpu_os_linux, tsg.cdev);
430 g = &l->g;
431
432 nvgpu_log_fn(g, " ");
433
434 ret = gk20a_busy(g);
435 if (ret) {
436 nvgpu_err(g, "failed to power on, %d", ret);
437 return ret;
438 }
439
440 ret = nvgpu_ioctl_tsg_open(&l->g, filp);
441
442 gk20a_idle(g);
443 nvgpu_log_fn(g, "done");
444 return ret;
445}
446
447void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
448{
449 struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
450 struct gk20a *g = tsg->g;
451
452 gk20a_sched_ctrl_tsg_removed(g, tsg);
453
454 gk20a_tsg_release(ref);
455 gk20a_put(g);
456}
457
458int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
459{
460 struct tsg_private *priv = filp->private_data;
461 struct tsg_gk20a *tsg = priv->tsg;
462
463 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
464 nvgpu_kfree(tsg->g, priv);
465 return 0;
466}
467
468static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
469 struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
470{
471 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
472 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
473 u32 level = arg->level;
474 int err;
475
476 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
477
478 nvgpu_mutex_acquire(&sched->control_lock);
479 if (sched->control_locked) {
480 err = -EPERM;
481 goto done;
482 }
483 err = gk20a_busy(g);
484 if (err) {
485 nvgpu_err(g, "failed to power on gpu");
486 goto done;
487 }
488
489 level = nvgpu_get_common_runlist_level(level);
490 err = gk20a_tsg_set_runlist_interleave(tsg, level);
491
492 gk20a_idle(g);
493done:
494 nvgpu_mutex_release(&sched->control_lock);
495 return err;
496}
497
498static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
499 struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
500{
501 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
502 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
503 int err;
504
505 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
506
507 nvgpu_mutex_acquire(&sched->control_lock);
508 if (sched->control_locked) {
509 err = -EPERM;
510 goto done;
511 }
512 err = gk20a_busy(g);
513 if (err) {
514 nvgpu_err(g, "failed to power on gpu");
515 goto done;
516 }
517 err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
518 gk20a_idle(g);
519done:
520 nvgpu_mutex_release(&sched->control_lock);
521 return err;
522}
523
524static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
525 struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
526{
527 arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
528 return 0;
529}
530
531long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
532 unsigned long arg)
533{
534 struct tsg_private *priv = filp->private_data;
535 struct tsg_gk20a *tsg = priv->tsg;
536 struct gk20a *g = tsg->g;
537 u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
538 int err = 0;
539
540 nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
541
542 if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
543 (_IOC_NR(cmd) == 0) ||
544 (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
545 (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
546 return -EINVAL;
547
548 memset(buf, 0, sizeof(buf));
549 if (_IOC_DIR(cmd) & _IOC_WRITE) {
550 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
551 return -EFAULT;
552 }
553
554 if (!g->sw_ready) {
555 err = gk20a_busy(g);
556 if (err)
557 return err;
558
559 gk20a_idle(g);
560 }
561
562 switch (cmd) {
563 case NVGPU_TSG_IOCTL_BIND_CHANNEL:
564 {
565 int ch_fd = *(int *)buf;
566 if (ch_fd < 0) {
567 err = -EINVAL;
568 break;
569 }
570 err = gk20a_tsg_bind_channel_fd(tsg, ch_fd);
571 break;
572 }
573
574 case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
575 {
576 err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg,
577 (struct nvgpu_tsg_bind_channel_ex_args *)buf);
578 break;
579 }
580
581 case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
582 {
583 int ch_fd = *(int *)buf;
584
585 if (ch_fd < 0) {
586 err = -EINVAL;
587 break;
588 }
589 err = gk20a_busy(g);
590 if (err) {
591 nvgpu_err(g,
592 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
593 break;
594 }
595 err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd);
596 gk20a_idle(g);
597 break;
598 }
599
600 case NVGPU_IOCTL_TSG_ENABLE:
601 {
602 err = gk20a_busy(g);
603 if (err) {
604 nvgpu_err(g,
605 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
606 return err;
607 }
608 g->ops.fifo.enable_tsg(tsg);
609 gk20a_idle(g);
610 break;
611 }
612
613 case NVGPU_IOCTL_TSG_DISABLE:
614 {
615 err = gk20a_busy(g);
616 if (err) {
617 nvgpu_err(g,
618 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
619 return err;
620 }
621 g->ops.fifo.disable_tsg(tsg);
622 gk20a_idle(g);
623 break;
624 }
625
626 case NVGPU_IOCTL_TSG_PREEMPT:
627 {
628 err = gk20a_busy(g);
629 if (err) {
630 nvgpu_err(g,
631 "failed to host gk20a for ioctl cmd: 0x%x", cmd);
632 return err;
633 }
634 /* preempt TSG */
635 err = g->ops.fifo.preempt_tsg(g, tsg->tsgid);
636 gk20a_idle(g);
637 break;
638 }
639
640 case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
641 {
642 err = gk20a_tsg_event_id_ctrl(g, tsg,
643 (struct nvgpu_event_id_ctrl_args *)buf);
644 break;
645 }
646
647 case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
648 err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
649 (struct nvgpu_runlist_interleave_args *)buf);
650 break;
651
652 case NVGPU_IOCTL_TSG_SET_TIMESLICE:
653 {
654 err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
655 (struct nvgpu_timeslice_args *)buf);
656 break;
657 }
658 case NVGPU_IOCTL_TSG_GET_TIMESLICE:
659 {
660 err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
661 (struct nvgpu_timeslice_args *)buf);
662 break;
663 }
664
665 default:
666 nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
667 cmd);
668 err = -ENOTTY;
669 break;
670 }
671
672 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
673 err = copy_to_user((void __user *)arg,
674 buf, _IOC_SIZE(cmd));
675
676 return err;
677}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h
deleted file mode 100644
index 67399fd4..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef NVGPU_IOCTL_TSG_H
14#define NVGPU_IOCTL_TSG_H
15
16struct inode;
17struct file;
18struct gk20a;
19struct nvgpu_ref;
20
21int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
22int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
23int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp);
24long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
25 unsigned int cmd, unsigned long arg);
26void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
27
28#endif
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
deleted file mode 100644
index 10946a08..00000000
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ /dev/null
@@ -1,654 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/mm.h>
18#include <linux/slab.h>
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21#include <linux/vmalloc.h>
22#include <linux/stacktrace.h>
23
24#include <nvgpu/lock.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/bug.h>
28
29#include "gk20a/gk20a.h"
30
31#include "kmem_priv.h"
32
33/*
34 * Statically declared because this needs to be shared across all nvgpu driver
35 * instances. This makes sure that all kmem caches are _definitely_ uniquely
36 * named.
37 */
38static atomic_t kmem_cache_id;
39
40void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
41{
42 void *p;
43
44 if (size > PAGE_SIZE) {
45 if (clear)
46 p = nvgpu_vzalloc(g, size);
47 else
48 p = nvgpu_vmalloc(g, size);
49 } else {
50 if (clear)
51 p = nvgpu_kzalloc(g, size);
52 else
53 p = nvgpu_kmalloc(g, size);
54 }
55
56 return p;
57}
58
59void nvgpu_big_free(struct gk20a *g, void *p)
60{
61 /*
62 * This will have to be fixed eventually. Allocs that use
63 * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
64 * when freeing.
65 */
66 if (is_vmalloc_addr(p))
67 nvgpu_vfree(g, p);
68 else
69 nvgpu_kfree(g, p);
70}
71
72void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
73{
74 void *alloc;
75
76#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
77 alloc = __nvgpu_track_kmalloc(g, size, ip);
78#else
79 alloc = kmalloc(size, GFP_KERNEL);
80#endif
81
82 kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
83 size, alloc, GFP_KERNEL);
84
85 return alloc;
86}
87
88void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
89{
90 void *alloc;
91
92#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
93 alloc = __nvgpu_track_kzalloc(g, size, ip);
94#else
95 alloc = kzalloc(size, GFP_KERNEL);
96#endif
97
98 kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
99 size, alloc, GFP_KERNEL);
100
101 return alloc;
102}
103
104void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip)
105{
106 void *alloc;
107
108#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
109 alloc = __nvgpu_track_kcalloc(g, n, size, ip);
110#else
111 alloc = kcalloc(n, size, GFP_KERNEL);
112#endif
113
114 kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
115 n * size, alloc, GFP_KERNEL);
116
117 return alloc;
118}
119
120void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip)
121{
122 void *alloc;
123
124#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
125 alloc = __nvgpu_track_vmalloc(g, size, ip);
126#else
127 alloc = vmalloc(size);
128#endif
129
130 kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
131
132 return alloc;
133}
134
135void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip)
136{
137 void *alloc;
138
139#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
140 alloc = __nvgpu_track_vzalloc(g, size, ip);
141#else
142 alloc = vzalloc(size);
143#endif
144
145 kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
146
147 return alloc;
148}
149
150void __nvgpu_kfree(struct gk20a *g, void *addr)
151{
152 kmem_dbg(g, "kfree: addr=0x%p", addr);
153#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
154 __nvgpu_track_kfree(g, addr);
155#else
156 kfree(addr);
157#endif
158}
159
160void __nvgpu_vfree(struct gk20a *g, void *addr)
161{
162 kmem_dbg(g, "vfree: addr=0x%p", addr);
163#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
164 __nvgpu_track_vfree(g, addr);
165#else
166 vfree(addr);
167#endif
168}
169
170#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
171
172void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
173{
174 nvgpu_mutex_acquire(&tracker->lock);
175}
176
177void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
178{
179 nvgpu_mutex_release(&tracker->lock);
180}
181
182void kmem_print_mem_alloc(struct gk20a *g,
183 struct nvgpu_mem_alloc *alloc,
184 struct seq_file *s)
185{
186#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
187 int i;
188
189 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
190 alloc->addr, alloc->size);
191 for (i = 0; i < alloc->stack_length; i++)
192 __pstat(s, " %3d [<%p>] %pS\n", i,
193 (void *)alloc->stack[i],
194 (void *)alloc->stack[i]);
195 __pstat(s, "\n");
196#else
197 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
198 alloc->addr, alloc->size, alloc->ip);
199#endif
200}
201
202static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
203 struct nvgpu_mem_alloc *alloc)
204{
205 alloc->allocs_entry.key_start = alloc->addr;
206 alloc->allocs_entry.key_end = alloc->addr + alloc->size;
207
208 nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
209 return 0;
210}
211
212static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
213 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
214{
215 struct nvgpu_mem_alloc *alloc;
216 struct nvgpu_rbtree_node *node = NULL;
217
218 nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
219 if (!node)
220 return NULL;
221
222 alloc = nvgpu_mem_alloc_from_rbtree_node(node);
223
224 nvgpu_rbtree_unlink(node, &tracker->allocs);
225
226 return alloc;
227}
228
229static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
230 unsigned long size, unsigned long real_size,
231 u64 addr, unsigned long ip)
232{
233 int ret;
234 struct nvgpu_mem_alloc *alloc;
235#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
236 struct stack_trace stack_trace;
237#endif
238
239 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
240 if (!alloc)
241 return -ENOMEM;
242
243 alloc->owner = tracker;
244 alloc->size = size;
245 alloc->real_size = real_size;
246 alloc->addr = addr;
247 alloc->ip = (void *)(uintptr_t)ip;
248
249#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
250 stack_trace.max_entries = MAX_STACK_TRACE;
251 stack_trace.nr_entries = 0;
252 stack_trace.entries = alloc->stack;
253 /*
254 * This 4 here skips the 2 function calls that happen for all traced
255 * allocs due to nvgpu:
256 *
257 * __nvgpu_save_kmem_alloc+0x7c/0x128
258 * __nvgpu_track_kzalloc+0xcc/0xf8
259 *
260 * And the function calls that get made by the stack trace code itself.
261 * If the trace savings code changes this will likely have to change
262 * as well.
263 */
264 stack_trace.skip = 4;
265 save_stack_trace(&stack_trace);
266 alloc->stack_length = stack_trace.nr_entries;
267#endif
268
269 nvgpu_lock_tracker(tracker);
270 tracker->bytes_alloced += size;
271 tracker->bytes_alloced_real += real_size;
272 tracker->nr_allocs++;
273
274 /* Keep track of this for building a histogram later on. */
275 if (tracker->max_alloc < size)
276 tracker->max_alloc = size;
277 if (tracker->min_alloc > size)
278 tracker->min_alloc = size;
279
280 ret = nvgpu_add_alloc(tracker, alloc);
281 if (ret) {
282 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
283 kfree(alloc);
284 nvgpu_unlock_tracker(tracker);
285 return ret;
286 }
287 nvgpu_unlock_tracker(tracker);
288
289 return 0;
290}
291
292static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
293 u64 addr)
294{
295 struct nvgpu_mem_alloc *alloc;
296
297 nvgpu_lock_tracker(tracker);
298 alloc = nvgpu_rem_alloc(tracker, addr);
299 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
300 nvgpu_unlock_tracker(tracker);
301 return -EINVAL;
302 }
303
304 memset((void *)alloc->addr, 0, alloc->size);
305
306 tracker->nr_frees++;
307 tracker->bytes_freed += alloc->size;
308 tracker->bytes_freed_real += alloc->real_size;
309 nvgpu_unlock_tracker(tracker);
310
311 return 0;
312}
313
314static void __nvgpu_check_valloc_size(unsigned long size)
315{
316 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
317}
318
319static void __nvgpu_check_kalloc_size(size_t size)
320{
321 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
322}
323
324void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
325 unsigned long ip)
326{
327 void *alloc = vmalloc(size);
328
329 if (!alloc)
330 return NULL;
331
332 __nvgpu_check_valloc_size(size);
333
334 /*
335 * Ignore the return message. If this fails let's not cause any issues
336 * for the rest of the driver.
337 */
338 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
339 (u64)(uintptr_t)alloc, ip);
340
341 return alloc;
342}
343
344void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
345 unsigned long ip)
346{
347 void *alloc = vzalloc(size);
348
349 if (!alloc)
350 return NULL;
351
352 __nvgpu_check_valloc_size(size);
353
354 /*
355 * Ignore the return message. If this fails let's not cause any issues
356 * for the rest of the driver.
357 */
358 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
359 (u64)(uintptr_t)alloc, ip);
360
361 return alloc;
362}
363
364void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
365{
366 void *alloc = kmalloc(size, GFP_KERNEL);
367
368 if (!alloc)
369 return NULL;
370
371 __nvgpu_check_kalloc_size(size);
372
373 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
374 (u64)(uintptr_t)alloc, ip);
375
376 return alloc;
377}
378
379void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
380{
381 void *alloc = kzalloc(size, GFP_KERNEL);
382
383 if (!alloc)
384 return NULL;
385
386 __nvgpu_check_kalloc_size(size);
387
388 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
389 (u64)(uintptr_t)alloc, ip);
390
391 return alloc;
392}
393
394void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
395 unsigned long ip)
396{
397 void *alloc = kcalloc(n, size, GFP_KERNEL);
398
399 if (!alloc)
400 return NULL;
401
402 __nvgpu_check_kalloc_size(n * size);
403
404 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
405 roundup_pow_of_two(n * size),
406 (u64)(uintptr_t)alloc, ip);
407
408 return alloc;
409}
410
411void __nvgpu_track_vfree(struct gk20a *g, void *addr)
412{
413 /*
414 * Often it is accepted practice to pass NULL pointers into free
415 * functions to save code.
416 */
417 if (!addr)
418 return;
419
420 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
421
422 vfree(addr);
423}
424
425void __nvgpu_track_kfree(struct gk20a *g, void *addr)
426{
427 if (!addr)
428 return;
429
430 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
431
432 kfree(addr);
433}
434
435static int __do_check_for_outstanding_allocs(
436 struct gk20a *g,
437 struct nvgpu_mem_alloc_tracker *tracker,
438 const char *type, bool silent)
439{
440 struct nvgpu_rbtree_node *node;
441 int count = 0;
442
443 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
444 while (node) {
445 struct nvgpu_mem_alloc *alloc =
446 nvgpu_mem_alloc_from_rbtree_node(node);
447
448 if (!silent)
449 kmem_print_mem_alloc(g, alloc, NULL);
450
451 count++;
452 nvgpu_rbtree_enum_next(&node, node);
453 }
454
455 return count;
456}
457
458/**
459 * check_for_outstanding_allocs - Count and display outstanding allocs
460 *
461 * @g - The GPU.
462 * @silent - If set don't print anything about the allocs.
463 *
464 * Dump (or just count) the number of allocations left outstanding.
465 */
466static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
467{
468 int count = 0;
469
470 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
471 silent);
472 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
473 silent);
474
475 return count;
476}
477
478static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
479 void (*force_free_func)(const void *))
480{
481 struct nvgpu_rbtree_node *node;
482
483 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
484 while (node) {
485 struct nvgpu_mem_alloc *alloc =
486 nvgpu_mem_alloc_from_rbtree_node(node);
487
488 if (force_free_func)
489 force_free_func((void *)alloc->addr);
490
491 nvgpu_rbtree_unlink(node, &tracker->allocs);
492 kfree(alloc);
493
494 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
495 }
496}
497
498/**
499 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
500 *
501 * @g - The GPU.
502 * @force_free - If set will also free leaked objects if possible.
503 *
504 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
505 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
506 * though, as it is possible that the memory is still in use by other parts of
507 * the GPU driver not aware that this has happened.
508 *
509 * In theory it should be fine if the GPU driver has been deinitialized and
510 * there are no bugs in that code. However, if there are any bugs in that code
511 * then they could likely manifest as odd crashes indeterminate amounts of time
512 * in the future. So use @force_free at your own risk.
513 */
514static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
515{
516 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
517 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
518}
519
520void nvgpu_kmem_fini(struct gk20a *g, int flags)
521{
522 int count;
523 bool silent, force_free;
524
525 if (!flags)
526 return;
527
528 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
529 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
530
531 count = check_for_outstanding_allocs(g, silent);
532 nvgpu_kmem_cleanup(g, force_free);
533
534 /*
535 * If we leak objects we can either BUG() out or just WARN(). In general
536 * it doesn't make sense to BUG() on here since leaking a few objects
537 * won't crash the kernel but it can be helpful for development.
538 *
539 * If neither flag is set then we just silently do nothing.
540 */
541 if (count > 0) {
542 if (flags & NVGPU_KMEM_FINI_WARN) {
543 WARN(1, "Letting %d allocs leak!!\n", count);
544 } else if (flags & NVGPU_KMEM_FINI_BUG) {
545 nvgpu_err(g, "Letting %d allocs leak!!", count);
546 BUG();
547 }
548 }
549}
550
551int nvgpu_kmem_init(struct gk20a *g)
552{
553 int err;
554
555 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
556 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
557
558 if (!g->vmallocs || !g->kmallocs) {
559 err = -ENOMEM;
560 goto fail;
561 }
562
563 g->vmallocs->name = "vmalloc";
564 g->kmallocs->name = "kmalloc";
565
566 g->vmallocs->allocs = NULL;
567 g->kmallocs->allocs = NULL;
568
569 nvgpu_mutex_init(&g->vmallocs->lock);
570 nvgpu_mutex_init(&g->kmallocs->lock);
571
572 g->vmallocs->min_alloc = PAGE_SIZE;
573 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
574
575 /*
576 * This needs to go after all the other initialization since they use
577 * the nvgpu_kzalloc() API.
578 */
579 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
580 sizeof(struct nvgpu_mem_alloc));
581 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
582 sizeof(struct nvgpu_mem_alloc));
583
584 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
585 err = -ENOMEM;
586 if (g->vmallocs->allocs_cache)
587 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
588 if (g->kmallocs->allocs_cache)
589 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
590 goto fail;
591 }
592
593 return 0;
594
595fail:
596 if (g->vmallocs)
597 kfree(g->vmallocs);
598 if (g->kmallocs)
599 kfree(g->kmallocs);
600 return err;
601}
602
603#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
604
605int nvgpu_kmem_init(struct gk20a *g)
606{
607 return 0;
608}
609
610void nvgpu_kmem_fini(struct gk20a *g, int flags)
611{
612}
613#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
614
615struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
616{
617 struct nvgpu_kmem_cache *cache =
618 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
619
620 if (!cache)
621 return NULL;
622
623 cache->g = g;
624
625 snprintf(cache->name, sizeof(cache->name),
626 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
627 atomic_inc_return(&kmem_cache_id));
628 cache->cache = kmem_cache_create(cache->name,
629 size, size, 0, NULL);
630 if (!cache->cache) {
631 nvgpu_kfree(g, cache);
632 return NULL;
633 }
634
635 return cache;
636}
637
638void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
639{
640 struct gk20a *g = cache->g;
641
642 kmem_cache_destroy(cache->cache);
643 nvgpu_kfree(g, cache);
644}
645
646void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
647{
648 return kmem_cache_alloc(cache->cache, GFP_KERNEL);
649}
650
651void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
652{
653 kmem_cache_free(cache->cache, ptr);
654}
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
deleted file mode 100644
index a41762af..00000000
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __KMEM_PRIV_H__
18#define __KMEM_PRIV_H__
19
20#include <nvgpu/rbtree.h>
21#include <nvgpu/lock.h>
22
23struct seq_file;
24
25#define __pstat(s, fmt, msg...) \
26 do { \
27 if (s) \
28 seq_printf(s, fmt, ##msg); \
29 else \
30 pr_info(fmt, ##msg); \
31 } while (0)
32
33#define MAX_STACK_TRACE 20
34
35/*
36 * Linux specific version of the nvgpu_kmem_cache struct. This type is
37 * completely opaque to the rest of the driver.
38 */
39struct nvgpu_kmem_cache {
40 struct gk20a *g;
41 struct kmem_cache *cache;
42
43 /*
44 * Memory to hold the kmem_cache unique name. Only necessary on our
45 * k3.10 kernel when not using the SLUB allocator but it's easier to
46 * just carry this on to newer kernels.
47 */
48 char name[128];
49};
50
51#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
52
53struct nvgpu_mem_alloc {
54 struct nvgpu_mem_alloc_tracker *owner;
55
56 void *ip;
57#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
58 unsigned long stack[MAX_STACK_TRACE];
59 int stack_length;
60#endif
61
62 u64 addr;
63
64 unsigned long size;
65 unsigned long real_size;
66
67 struct nvgpu_rbtree_node allocs_entry;
68};
69
70static inline struct nvgpu_mem_alloc *
71nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
72{
73 return (struct nvgpu_mem_alloc *)
74 ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
75};
76
77/*
78 * Linux specific tracking of vmalloc, kmalloc, etc.
79 */
80struct nvgpu_mem_alloc_tracker {
81 const char *name;
82 struct nvgpu_kmem_cache *allocs_cache;
83 struct nvgpu_rbtree_node *allocs;
84 struct nvgpu_mutex lock;
85
86 u64 bytes_alloced;
87 u64 bytes_freed;
88 u64 bytes_alloced_real;
89 u64 bytes_freed_real;
90 u64 nr_allocs;
91 u64 nr_frees;
92
93 unsigned long min_alloc;
94 unsigned long max_alloc;
95};
96
97void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
98void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
99
100void kmem_print_mem_alloc(struct gk20a *g,
101 struct nvgpu_mem_alloc *alloc,
102 struct seq_file *s);
103#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
104
105#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/log.c b/drivers/gpu/nvgpu/common/linux/log.c
deleted file mode 100644
index ca29e0f3..00000000
--- a/drivers/gpu/nvgpu/common/linux/log.c
+++ /dev/null
@@ -1,132 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/device.h>
19
20#include <nvgpu/log.h>
21
22#include "gk20a/gk20a.h"
23#include "platform_gk20a.h"
24#include "os_linux.h"
25
26/*
27 * Define a length for log buffers. This is the buffer that the 'fmt, ...' part
28 * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
29 * needs to not be overly sized since we have limited kernel stack space. But at
30 * the same time we don't want it to be restrictive either.
31 */
32#define LOG_BUFFER_LENGTH 160
33
34/*
35 * Annoying quirk of Linux: this has to be a string literal since the printk()
36 * function and friends use the preprocessor to concatenate stuff to the start
37 * of this string when printing.
38 */
39#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n"
40
41static const char *log_types[] = {
42 "ERR",
43 "WRN",
44 "DBG",
45 "INFO",
46};
47
48int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask)
49{
50 return !!(g->log_mask & log_mask);
51}
52
53static inline const char *nvgpu_log_name(struct gk20a *g)
54{
55 return dev_name(dev_from_gk20a(g));
56}
57
58#ifdef CONFIG_GK20A_TRACE_PRINTK
59static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
60 const char *func_name, int line,
61 const char *log_type, const char *log)
62{
63 trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
64}
65#endif
66
67static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
68 const char *func_name, int line,
69 enum nvgpu_log_type type, const char *log)
70{
71 const char *name = gpu_name ? gpu_name : "";
72 const char *log_type = log_types[type];
73
74#ifdef CONFIG_GK20A_TRACE_PRINTK
75 if (trace)
76 return __nvgpu_trace_printk_log(trace, name, func_name,
77 line, log_type, log);
78#endif
79 switch (type) {
80 case NVGPU_DEBUG:
81 /*
82 * We could use pr_debug() here but we control debug enablement
83 * separately from the Linux kernel. Perhaps this is a bug in
84 * nvgpu.
85 */
86 pr_info(LOG_FMT, name, func_name, line, log_type, log);
87 break;
88 case NVGPU_INFO:
89 pr_info(LOG_FMT, name, func_name, line, log_type, log);
90 break;
91 case NVGPU_WARNING:
92 pr_warn(LOG_FMT, name, func_name, line, log_type, log);
93 break;
94 case NVGPU_ERROR:
95 pr_err(LOG_FMT, name, func_name, line, log_type, log);
96 break;
97 }
98}
99
100__attribute__((format (printf, 5, 6)))
101void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line,
102 enum nvgpu_log_type type, const char *fmt, ...)
103{
104 char log[LOG_BUFFER_LENGTH];
105 va_list args;
106
107 va_start(args, fmt);
108 vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
109 va_end(args);
110
111 __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
112 func_name, line, type, log);
113}
114
115__attribute__((format (printf, 5, 6)))
116void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
117 const char *func_name, int line,
118 const char *fmt, ...)
119{
120 char log[LOG_BUFFER_LENGTH];
121 va_list args;
122
123 if ((log_mask & g->log_mask) == 0)
124 return;
125
126 va_start(args, fmt);
127 vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
128 va_end(args);
129
130 __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g),
131 func_name, line, NVGPU_DEBUG, log);
132}
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
deleted file mode 100644
index af71cc81..00000000
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ /dev/null
@@ -1,1365 +0,0 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/module.h>
20#include <linux/of.h>
21#include <linux/of_device.h>
22#include <linux/of_platform.h>
23#include <linux/of_address.h>
24#include <linux/interrupt.h>
25#include <linux/pm_runtime.h>
26#include <linux/reset.h>
27#include <linux/platform/tegra/common.h>
28#include <linux/pci.h>
29
30#include <uapi/linux/nvgpu.h>
31#include <dt-bindings/soc/gm20b-fuse.h>
32#include <dt-bindings/soc/gp10b-fuse.h>
33
34#include <soc/tegra/fuse.h>
35
36#include <nvgpu/dma.h>
37#include <nvgpu/kmem.h>
38#include <nvgpu/nvgpu_common.h>
39#include <nvgpu/soc.h>
40#include <nvgpu/enabled.h>
41#include <nvgpu/debug.h>
42#include <nvgpu/ctxsw_trace.h>
43#include <nvgpu/vidmem.h>
44#include <nvgpu/sim.h>
45
46#include "platform_gk20a.h"
47#include "sysfs.h"
48#include "vgpu/vgpu_linux.h"
49#include "scale.h"
50#include "pci.h"
51#include "module.h"
52#include "module_usermode.h"
53#include "intr.h"
54#include "ioctl.h"
55
56#include "os_linux.h"
57#include "ctxsw_trace.h"
58#include "driver_common.h"
59#include "channel.h"
60
61#ifdef CONFIG_NVGPU_SUPPORT_CDE
62#include "cde.h"
63#endif
64
65#define CLASS_NAME "nvidia-gpu"
66/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
67
68#define GK20A_WAIT_FOR_IDLE_MS 2000
69
70#define CREATE_TRACE_POINTS
71#include <trace/events/gk20a.h>
72
73
74struct device_node *nvgpu_get_node(struct gk20a *g)
75{
76 struct device *dev = dev_from_gk20a(g);
77
78 if (dev_is_pci(dev)) {
79 struct pci_bus *bus = to_pci_dev(dev)->bus;
80
81 while (!pci_is_root_bus(bus))
82 bus = bus->parent;
83
84 return bus->bridge->parent->of_node;
85 }
86
87 return dev->of_node;
88}
89
90void gk20a_busy_noresume(struct gk20a *g)
91{
92 pm_runtime_get_noresume(dev_from_gk20a(g));
93}
94
95int gk20a_busy(struct gk20a *g)
96{
97 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
98 int ret = 0;
99 struct device *dev;
100
101 if (!g)
102 return -ENODEV;
103
104 atomic_inc(&g->usage_count.atomic_var);
105
106 down_read(&l->busy_lock);
107
108 if (!gk20a_can_busy(g)) {
109 ret = -ENODEV;
110 atomic_dec(&g->usage_count.atomic_var);
111 goto fail;
112 }
113
114 dev = dev_from_gk20a(g);
115
116 if (pm_runtime_enabled(dev)) {
117 /* Increment usage count and attempt to resume device */
118 ret = pm_runtime_get_sync(dev);
119 if (ret < 0) {
120 /* Mark suspended so runtime pm will retry later */
121 pm_runtime_set_suspended(dev);
122 pm_runtime_put_noidle(dev);
123 atomic_dec(&g->usage_count.atomic_var);
124 goto fail;
125 }
126 } else {
127 nvgpu_mutex_acquire(&g->poweron_lock);
128 if (!g->power_on) {
129 ret = gk20a_gpu_is_virtual(dev) ?
130 vgpu_pm_finalize_poweron(dev)
131 : gk20a_pm_finalize_poweron(dev);
132 if (ret) {
133 atomic_dec(&g->usage_count.atomic_var);
134 nvgpu_mutex_release(&g->poweron_lock);
135 goto fail;
136 }
137 }
138 nvgpu_mutex_release(&g->poweron_lock);
139 }
140
141fail:
142 up_read(&l->busy_lock);
143
144 return ret < 0 ? ret : 0;
145}
146
147void gk20a_idle_nosuspend(struct gk20a *g)
148{
149 pm_runtime_put_noidle(dev_from_gk20a(g));
150}
151
152void gk20a_idle(struct gk20a *g)
153{
154 struct device *dev;
155
156 atomic_dec(&g->usage_count.atomic_var);
157
158 dev = dev_from_gk20a(g);
159
160 if (!(dev && gk20a_can_busy(g)))
161 return;
162
163 if (pm_runtime_enabled(dev)) {
164 pm_runtime_mark_last_busy(dev);
165 pm_runtime_put_sync_autosuspend(dev);
166 }
167}
168
169/*
170 * Undoes gk20a_lockout_registers().
171 */
172static int gk20a_restore_registers(struct gk20a *g)
173{
174 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
175
176 l->regs = l->regs_saved;
177 l->bar1 = l->bar1_saved;
178
179 nvgpu_restore_usermode_registers(g);
180
181 return 0;
182}
183
184static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l)
185{
186 int err = 0;
187
188#ifdef CONFIG_NVGPU_SUPPORT_CDE
189 err = nvgpu_cde_init_ops(l);
190#endif
191
192 return err;
193}
194
195int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
196{
197 struct gk20a *g = &l->g;
198 int err;
199
200 if (l->init_done)
201 return 0;
202
203 err = nvgpu_init_channel_support_linux(l);
204 if (err) {
205 nvgpu_err(g, "failed to init linux channel support");
206 return err;
207 }
208
209 l->init_done = true;
210
211 return 0;
212}
213
214int gk20a_pm_finalize_poweron(struct device *dev)
215{
216 struct gk20a *g = get_gk20a(dev);
217 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
218 struct gk20a_platform *platform = gk20a_get_platform(dev);
219 int err;
220
221 nvgpu_log_fn(g, " ");
222
223 if (g->power_on)
224 return 0;
225
226 trace_gk20a_finalize_poweron(dev_name(dev));
227
228 /* Increment platform power refcount */
229 if (platform->busy) {
230 err = platform->busy(dev);
231 if (err < 0) {
232 nvgpu_err(g, "failed to poweron platform dependency");
233 return err;
234 }
235 }
236
237 err = gk20a_restore_registers(g);
238 if (err)
239 return err;
240
241 /* Enable interrupt workqueue */
242 if (!l->nonstall_work_queue) {
243 l->nonstall_work_queue = alloc_workqueue("%s",
244 WQ_HIGHPRI, 1, "mc_nonstall");
245 INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb);
246 }
247
248 err = gk20a_detect_chip(g);
249 if (err)
250 return err;
251
252 if (g->sim) {
253 if (g->sim->sim_init_late)
254 g->sim->sim_init_late(g);
255 }
256
257 err = gk20a_finalize_poweron(g);
258 if (err)
259 goto done;
260
261 err = nvgpu_finalize_poweron_linux(l);
262 if (err)
263 goto done;
264
265 nvgpu_init_mm_ce_context(g);
266
267 nvgpu_vidmem_thread_unpause(&g->mm);
268
269 /* Initialise scaling: it will initialize scaling drive only once */
270 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) &&
271 nvgpu_platform_is_silicon(g)) {
272 gk20a_scale_init(dev);
273 if (platform->initscale)
274 platform->initscale(dev);
275 }
276
277 trace_gk20a_finalize_poweron_done(dev_name(dev));
278
279 err = nvgpu_init_os_linux_ops(l);
280 if (err)
281 goto done;
282
283 enable_irq(g->irq_stall);
284 if (g->irq_stall != g->irq_nonstall)
285 enable_irq(g->irq_nonstall);
286 g->irqs_enabled = 1;
287
288 gk20a_scale_resume(dev_from_gk20a(g));
289
290#ifdef CONFIG_NVGPU_SUPPORT_CDE
291 if (platform->has_cde)
292 gk20a_init_cde_support(l);
293#endif
294
295 err = gk20a_sched_ctrl_init(g);
296 if (err) {
297 nvgpu_err(g, "failed to init sched control");
298 return err;
299 }
300
301 g->sw_ready = true;
302
303done:
304 if (err)
305 g->power_on = false;
306
307 return err;
308}
309
310/*
311 * Locks out the driver from accessing GPU registers. This prevents access to
312 * thse registers after the GPU has been clock or power gated. This should help
313 * find annoying bugs where register reads and writes are silently dropped
314 * after the GPU has been turned off. On older chips these reads and writes can
315 * also lock the entire CPU up.
316 */
317static int gk20a_lockout_registers(struct gk20a *g)
318{
319 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
320
321 l->regs = NULL;
322 l->bar1 = NULL;
323
324 nvgpu_lockout_usermode_registers(g);
325
326 return 0;
327}
328
329static int gk20a_pm_prepare_poweroff(struct device *dev)
330{
331 struct gk20a *g = get_gk20a(dev);
332#ifdef CONFIG_NVGPU_SUPPORT_CDE
333 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
334#endif
335 int ret = 0;
336 struct gk20a_platform *platform = gk20a_get_platform(dev);
337 bool irqs_enabled;
338
339 nvgpu_log_fn(g, " ");
340
341 nvgpu_mutex_acquire(&g->poweroff_lock);
342
343 if (!g->power_on)
344 goto done;
345
346 /* disable IRQs and wait for completion */
347 irqs_enabled = g->irqs_enabled;
348 if (irqs_enabled) {
349 disable_irq(g->irq_stall);
350 if (g->irq_stall != g->irq_nonstall)
351 disable_irq(g->irq_nonstall);
352 g->irqs_enabled = 0;
353 }
354
355 gk20a_scale_suspend(dev);
356
357#ifdef CONFIG_NVGPU_SUPPORT_CDE
358 gk20a_cde_suspend(l);
359#endif
360
361 ret = gk20a_prepare_poweroff(g);
362 if (ret)
363 goto error;
364
365 /* Decrement platform power refcount */
366 if (platform->idle)
367 platform->idle(dev);
368
369 /* Stop CPU from accessing the GPU registers. */
370 gk20a_lockout_registers(g);
371
372 nvgpu_mutex_release(&g->poweroff_lock);
373 return 0;
374
375error:
376 /* re-enabled IRQs if previously enabled */
377 if (irqs_enabled) {
378 enable_irq(g->irq_stall);
379 if (g->irq_stall != g->irq_nonstall)
380 enable_irq(g->irq_nonstall);
381 g->irqs_enabled = 1;
382 }
383
384 gk20a_scale_resume(dev);
385done:
386 nvgpu_mutex_release(&g->poweroff_lock);
387
388 return ret;
389}
390
391static struct of_device_id tegra_gk20a_of_match[] = {
392#ifdef CONFIG_TEGRA_GK20A
393 { .compatible = "nvidia,tegra210-gm20b",
394 .data = &gm20b_tegra_platform },
395 { .compatible = "nvidia,tegra186-gp10b",
396 .data = &gp10b_tegra_platform },
397 { .compatible = "nvidia,gv11b",
398 .data = &gv11b_tegra_platform },
399#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
400 { .compatible = "nvidia,gv11b-vgpu",
401 .data = &gv11b_vgpu_tegra_platform},
402#endif
403#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
404 { .compatible = "nvidia,tegra124-gk20a-vgpu",
405 .data = &vgpu_tegra_platform },
406#endif
407#endif
408
409 { },
410};
411
412#ifdef CONFIG_PM
413/**
414 * __gk20a_do_idle() - force the GPU to idle and railgate
415 *
416 * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
417 *
418 * Acquires two locks : &l->busy_lock and &platform->railgate_lock
419 * In success, we hold these locks and return
420 * In failure, we release these locks and return
421 */
422int __gk20a_do_idle(struct gk20a *g, bool force_reset)
423{
424 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
425 struct device *dev = dev_from_gk20a(g);
426 struct gk20a_platform *platform = dev_get_drvdata(dev);
427 struct nvgpu_timeout timeout;
428 int ref_cnt;
429 int target_ref_cnt = 0;
430 bool is_railgated;
431 int err = 0;
432
433 /*
434 * Hold back deterministic submits and changes to deterministic
435 * channels - this must be outside the power busy locks.
436 */
437 gk20a_channel_deterministic_idle(g);
438
439 /* acquire busy lock to block other busy() calls */
440 down_write(&l->busy_lock);
441
442 /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
443 nvgpu_mutex_acquire(&platform->railgate_lock);
444
445 /* check if it is already railgated ? */
446 if (platform->is_railgated(dev))
447 return 0;
448
449 /*
450 * release railgate_lock, prevent suspend by incrementing usage counter,
451 * re-acquire railgate_lock
452 */
453 nvgpu_mutex_release(&platform->railgate_lock);
454 pm_runtime_get_sync(dev);
455
456 /*
457 * One refcount taken in this API
458 * If User disables rail gating, we take one more
459 * extra refcount
460 */
461 if (g->can_railgate)
462 target_ref_cnt = 1;
463 else
464 target_ref_cnt = 2;
465 nvgpu_mutex_acquire(&platform->railgate_lock);
466
467 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
468 NVGPU_TIMER_CPU_TIMER);
469
470 /* check and wait until GPU is idle (with a timeout) */
471 do {
472 nvgpu_usleep_range(1000, 1100);
473 ref_cnt = atomic_read(&dev->power.usage_count);
474 } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
475
476 if (ref_cnt != target_ref_cnt) {
477 nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt",
478 ref_cnt);
479 goto fail_drop_usage_count;
480 }
481
482 /* check if global force_reset flag is set */
483 force_reset |= platform->force_reset_in_do_idle;
484
485 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
486 NVGPU_TIMER_CPU_TIMER);
487
488 if (g->can_railgate && !force_reset) {
489 /*
490 * Case 1 : GPU railgate is supported
491 *
492 * if GPU is now idle, we will have only one ref count,
493 * drop this ref which will rail gate the GPU
494 */
495 pm_runtime_put_sync(dev);
496
497 /* add sufficient delay to allow GPU to rail gate */
498 nvgpu_msleep(g->railgate_delay);
499
500 /* check in loop if GPU is railgated or not */
501 do {
502 nvgpu_usleep_range(1000, 1100);
503 is_railgated = platform->is_railgated(dev);
504 } while (!is_railgated && !nvgpu_timeout_expired(&timeout));
505
506 if (is_railgated) {
507 return 0;
508 } else {
509 nvgpu_err(g, "failed to idle in timeout");
510 goto fail_timeout;
511 }
512 } else {
513 /*
514 * Case 2 : GPU railgate is not supported or we explicitly
515 * do not want to depend on runtime PM
516 *
517 * if GPU is now idle, call prepare_poweroff() to save the
518 * state and then do explicit railgate
519 *
520 * __gk20a_do_unidle() needs to unrailgate, call
521 * finalize_poweron(), and then call pm_runtime_put_sync()
522 * to balance the GPU usage counter
523 */
524
525 /* Save the GPU state */
526 err = gk20a_pm_prepare_poweroff(dev);
527 if (err)
528 goto fail_drop_usage_count;
529
530 /* railgate GPU */
531 platform->railgate(dev);
532
533 nvgpu_udelay(10);
534
535 g->forced_reset = true;
536 return 0;
537 }
538
539fail_drop_usage_count:
540 pm_runtime_put_noidle(dev);
541fail_timeout:
542 nvgpu_mutex_release(&platform->railgate_lock);
543 up_write(&l->busy_lock);
544 gk20a_channel_deterministic_unidle(g);
545 return -EBUSY;
546}
547
548/**
549 * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
550 * from outside of GPU driver
551 *
552 * In success, this call MUST be balanced by caller with gk20a_do_unidle()
553 */
554static int gk20a_do_idle(void *_g)
555{
556 struct gk20a *g = (struct gk20a *)_g;
557
558 return __gk20a_do_idle(g, true);
559}
560
561/**
562 * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
563 */
564int __gk20a_do_unidle(struct gk20a *g)
565{
566 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
567 struct device *dev = dev_from_gk20a(g);
568 struct gk20a_platform *platform = dev_get_drvdata(dev);
569 int err;
570
571 if (g->forced_reset) {
572 /*
573 * If we did a forced-reset/railgate
574 * then unrailgate the GPU here first
575 */
576 platform->unrailgate(dev);
577
578 /* restore the GPU state */
579 err = gk20a_pm_finalize_poweron(dev);
580 if (err)
581 return err;
582
583 /* balance GPU usage counter */
584 pm_runtime_put_sync(dev);
585
586 g->forced_reset = false;
587 }
588
589 /* release the lock and open up all other busy() calls */
590 nvgpu_mutex_release(&platform->railgate_lock);
591 up_write(&l->busy_lock);
592
593 gk20a_channel_deterministic_unidle(g);
594
595 return 0;
596}
597
598/**
599 * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
600 */
601static int gk20a_do_unidle(void *_g)
602{
603 struct gk20a *g = (struct gk20a *)_g;
604
605 return __gk20a_do_unidle(g);
606}
607#endif
608
609void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
610 struct resource **out)
611{
612 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
613
614 if (!r)
615 return NULL;
616 if (out)
617 *out = r;
618 return devm_ioremap_resource(&dev->dev, r);
619}
620
621static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
622{
623 struct gk20a *g = dev_id;
624
625 return nvgpu_intr_stall(g);
626}
627
628static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
629{
630 struct gk20a *g = dev_id;
631
632 return nvgpu_intr_nonstall(g);
633}
634
635static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
636{
637 struct gk20a *g = dev_id;
638
639 return nvgpu_intr_thread_stall(g);
640}
641
642void gk20a_remove_support(struct gk20a *g)
643{
644 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
645 struct sim_nvgpu_linux *sim_linux;
646
647 tegra_unregister_idle_unidle(gk20a_do_idle);
648
649 nvgpu_kfree(g, g->dbg_regops_tmp_buf);
650
651 nvgpu_remove_channel_support_linux(l);
652
653 if (g->pmu.remove_support)
654 g->pmu.remove_support(&g->pmu);
655
656 if (g->gr.remove_support)
657 g->gr.remove_support(&g->gr);
658
659 if (g->mm.remove_ce_support)
660 g->mm.remove_ce_support(&g->mm);
661
662 if (g->fifo.remove_support)
663 g->fifo.remove_support(&g->fifo);
664
665 if (g->mm.remove_support)
666 g->mm.remove_support(&g->mm);
667
668 if (g->sim) {
669 sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
670 if (g->sim->remove_support)
671 g->sim->remove_support(g);
672 if (sim_linux->remove_support_linux)
673 sim_linux->remove_support_linux(g);
674 }
675
676 /* free mappings to registers, etc */
677 if (l->regs) {
678 iounmap(l->regs);
679 l->regs = NULL;
680 }
681 if (l->bar1) {
682 iounmap(l->bar1);
683 l->bar1 = NULL;
684 }
685
686 nvgpu_remove_usermode_support(g);
687
688 nvgpu_free_enabled_flags(g);
689}
690
691static int gk20a_init_support(struct platform_device *dev)
692{
693 int err = -ENOMEM;
694 struct gk20a *g = get_gk20a(&dev->dev);
695 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
696
697 tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g);
698
699 l->regs = nvgpu_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
700 &l->reg_mem);
701 if (IS_ERR(l->regs)) {
702 nvgpu_err(g, "failed to remap gk20a registers");
703 err = PTR_ERR(l->regs);
704 goto fail;
705 }
706
707 l->bar1 = nvgpu_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
708 &l->bar1_mem);
709 if (IS_ERR(l->bar1)) {
710 nvgpu_err(g, "failed to remap gk20a bar1");
711 err = PTR_ERR(l->bar1);
712 goto fail;
713 }
714
715 err = nvgpu_init_sim_support_linux(g, dev);
716 if (err)
717 goto fail;
718 err = nvgpu_init_sim_support(g);
719 if (err)
720 goto fail_sim;
721
722 nvgpu_init_usermode_support(g);
723 return 0;
724
725fail_sim:
726 nvgpu_remove_sim_support_linux(g);
727fail:
728 if (l->regs) {
729 iounmap(l->regs);
730 l->regs = NULL;
731 }
732 if (l->bar1) {
733 iounmap(l->bar1);
734 l->bar1 = NULL;
735 }
736
737 return err;
738}
739
740static int gk20a_pm_railgate(struct device *dev)
741{
742 struct gk20a_platform *platform = dev_get_drvdata(dev);
743 int ret = 0;
744 struct gk20a *g = get_gk20a(dev);
745
746 /* if platform is already railgated, then just return */
747 if (platform->is_railgated && platform->is_railgated(dev))
748 return ret;
749
750#ifdef CONFIG_DEBUG_FS
751 g->pstats.last_rail_gate_start = jiffies;
752
753 if (g->pstats.railgating_cycle_count >= 1)
754 g->pstats.total_rail_ungate_time_ms =
755 g->pstats.total_rail_ungate_time_ms +
756 jiffies_to_msecs(g->pstats.last_rail_gate_start -
757 g->pstats.last_rail_ungate_complete);
758#endif
759
760 if (platform->railgate)
761 ret = platform->railgate(dev);
762 if (ret) {
763 nvgpu_err(g, "failed to railgate platform, err=%d", ret);
764 return ret;
765 }
766
767#ifdef CONFIG_DEBUG_FS
768 g->pstats.last_rail_gate_complete = jiffies;
769#endif
770 ret = tegra_fuse_clock_disable();
771 if (ret)
772 nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret);
773
774 return ret;
775}
776
777static int gk20a_pm_unrailgate(struct device *dev)
778{
779 struct gk20a_platform *platform = dev_get_drvdata(dev);
780 int ret = 0;
781 struct gk20a *g = get_gk20a(dev);
782
783 ret = tegra_fuse_clock_enable();
784 if (ret) {
785 nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret);
786 return ret;
787 }
788#ifdef CONFIG_DEBUG_FS
789 g->pstats.last_rail_ungate_start = jiffies;
790 if (g->pstats.railgating_cycle_count >= 1)
791 g->pstats.total_rail_gate_time_ms =
792 g->pstats.total_rail_gate_time_ms +
793 jiffies_to_msecs(g->pstats.last_rail_ungate_start -
794 g->pstats.last_rail_gate_complete);
795
796 g->pstats.railgating_cycle_count++;
797#endif
798
799 trace_gk20a_pm_unrailgate(dev_name(dev));
800
801 if (platform->unrailgate) {
802 nvgpu_mutex_acquire(&platform->railgate_lock);
803 ret = platform->unrailgate(dev);
804 nvgpu_mutex_release(&platform->railgate_lock);
805 }
806
807#ifdef CONFIG_DEBUG_FS
808 g->pstats.last_rail_ungate_complete = jiffies;
809#endif
810
811 return ret;
812}
813
814/*
815 * Remove association of the driver with OS interrupt handler
816 */
817void nvgpu_free_irq(struct gk20a *g)
818{
819 struct device *dev = dev_from_gk20a(g);
820
821 devm_free_irq(dev, g->irq_stall, g);
822 if (g->irq_stall != g->irq_nonstall)
823 devm_free_irq(dev, g->irq_nonstall, g);
824}
825
826/*
827 * Idle the GPU in preparation of shutdown/remove.
828 * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
829 * state to prevent further activity on the driver SW side.
830 * On driver removal quiesce() should be called after start_unload()
831 */
832int nvgpu_quiesce(struct gk20a *g)
833{
834 int err;
835 struct device *dev = dev_from_gk20a(g);
836
837 if (g->power_on) {
838 err = gk20a_wait_for_idle(g);
839 if (err) {
840 nvgpu_err(g, "failed to idle GPU, err=%d", err);
841 return err;
842 }
843
844 err = gk20a_fifo_disable_all_engine_activity(g, true);
845 if (err) {
846 nvgpu_err(g,
847 "failed to disable engine activity, err=%d",
848 err);
849 return err;
850 }
851
852 err = gk20a_fifo_wait_engine_idle(g);
853 if (err) {
854 nvgpu_err(g, "failed to idle engines, err=%d",
855 err);
856 return err;
857 }
858 }
859
860 if (gk20a_gpu_is_virtual(dev))
861 err = vgpu_pm_prepare_poweroff(dev);
862 else
863 err = gk20a_pm_prepare_poweroff(dev);
864
865 if (err)
866 nvgpu_err(g, "failed to prepare for poweroff, err=%d",
867 err);
868
869 return err;
870}
871
872static void gk20a_pm_shutdown(struct platform_device *pdev)
873{
874 struct gk20a_platform *platform = platform_get_drvdata(pdev);
875 struct gk20a *g = platform->g;
876 int err;
877
878 nvgpu_info(g, "shutting down");
879
880 /* vgpu has nothing to clean up currently */
881 if (gk20a_gpu_is_virtual(&pdev->dev))
882 return;
883
884 if (!g->power_on)
885 goto finish;
886
887 gk20a_driver_start_unload(g);
888
889 /* If GPU is already railgated,
890 * just prevent more requests, and return */
891 if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
892 __pm_runtime_disable(&pdev->dev, false);
893 nvgpu_info(g, "already railgated, shut down complete");
894 return;
895 }
896
897 /* Prevent more requests by disabling Runtime PM */
898 __pm_runtime_disable(&pdev->dev, false);
899
900 err = nvgpu_quiesce(g);
901 if (err)
902 goto finish;
903
904 err = gk20a_pm_railgate(&pdev->dev);
905 if (err)
906 nvgpu_err(g, "failed to railgate, err=%d", err);
907
908finish:
909 nvgpu_info(g, "shut down complete");
910}
911
912#ifdef CONFIG_PM
913static int gk20a_pm_runtime_resume(struct device *dev)
914{
915 int err = 0;
916
917 err = gk20a_pm_unrailgate(dev);
918 if (err)
919 goto fail;
920
921 if (gk20a_gpu_is_virtual(dev))
922 err = vgpu_pm_finalize_poweron(dev);
923 else
924 err = gk20a_pm_finalize_poweron(dev);
925 if (err)
926 goto fail_poweron;
927
928 return 0;
929
930fail_poweron:
931 gk20a_pm_railgate(dev);
932fail:
933 return err;
934}
935
936static int gk20a_pm_runtime_suspend(struct device *dev)
937{
938 int err = 0;
939 struct gk20a *g = get_gk20a(dev);
940
941 if (gk20a_gpu_is_virtual(dev))
942 err = vgpu_pm_prepare_poweroff(dev);
943 else
944 err = gk20a_pm_prepare_poweroff(dev);
945 if (err) {
946 nvgpu_err(g, "failed to power off, err=%d", err);
947 goto fail;
948 }
949
950 err = gk20a_pm_railgate(dev);
951 if (err)
952 goto fail;
953
954 return 0;
955
956fail:
957 gk20a_pm_finalize_poweron(dev);
958 pm_runtime_mark_last_busy(dev);
959 return err;
960}
961
962static int gk20a_pm_suspend(struct device *dev)
963{
964 struct gk20a_platform *platform = dev_get_drvdata(dev);
965 struct gk20a *g = get_gk20a(dev);
966 int ret = 0;
967 int idle_usage_count = 0;
968
969 if (!g->power_on) {
970 if (!pm_runtime_enabled(dev))
971 gk20a_pm_railgate(dev);
972 return 0;
973 }
974
975 if (nvgpu_atomic_read(&g->usage_count) > idle_usage_count)
976 return -EBUSY;
977
978 ret = gk20a_pm_runtime_suspend(dev);
979 if (ret)
980 return ret;
981
982 if (platform->suspend)
983 platform->suspend(dev);
984
985 g->suspended = true;
986
987 return 0;
988}
989
990static int gk20a_pm_resume(struct device *dev)
991{
992 struct gk20a *g = get_gk20a(dev);
993 int ret = 0;
994
995 if (!g->suspended) {
996 if (!pm_runtime_enabled(dev))
997 gk20a_pm_unrailgate(dev);
998 return 0;
999 }
1000
1001 ret = gk20a_pm_runtime_resume(dev);
1002
1003 g->suspended = false;
1004
1005 return ret;
1006}
1007
1008static const struct dev_pm_ops gk20a_pm_ops = {
1009 .runtime_resume = gk20a_pm_runtime_resume,
1010 .runtime_suspend = gk20a_pm_runtime_suspend,
1011 .resume = gk20a_pm_resume,
1012 .suspend = gk20a_pm_suspend,
1013};
1014#endif
1015
1016static int gk20a_pm_init(struct device *dev)
1017{
1018 struct gk20a *g = get_gk20a(dev);
1019 int err = 0;
1020
1021 nvgpu_log_fn(g, " ");
1022
1023 /*
1024 * Initialise pm runtime. For railgate disable
1025 * case, set autosuspend delay to negative which
1026 * will suspend runtime pm
1027 */
1028 if (g->railgate_delay && g->can_railgate)
1029 pm_runtime_set_autosuspend_delay(dev,
1030 g->railgate_delay);
1031 else
1032 pm_runtime_set_autosuspend_delay(dev, -1);
1033
1034 pm_runtime_use_autosuspend(dev);
1035 pm_runtime_enable(dev);
1036
1037 return err;
1038}
1039
1040/*
1041 * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING.
1042 */
1043void gk20a_driver_start_unload(struct gk20a *g)
1044{
1045 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
1046
1047 nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n");
1048
1049 down_write(&l->busy_lock);
1050 __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
1051 /* GR SW ready needs to be invalidated at this time with the busy lock
1052 * held to prevent a racing condition on the gr/mm code */
1053 g->gr.sw_ready = false;
1054 g->sw_ready = false;
1055 up_write(&l->busy_lock);
1056
1057 if (g->is_virtual)
1058 return;
1059
1060 gk20a_wait_for_idle(g);
1061
1062 nvgpu_wait_for_deferred_interrupts(g);
1063
1064 if (l->nonstall_work_queue) {
1065 cancel_work_sync(&l->nonstall_fn_work);
1066 destroy_workqueue(l->nonstall_work_queue);
1067 l->nonstall_work_queue = NULL;
1068 }
1069}
1070
1071static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
1072{
1073 gk20a_get_platform(&pdev->dev)->g = gk20a;
1074}
1075
1076static int nvgpu_read_fuse_overrides(struct gk20a *g)
1077{
1078 struct device_node *np = nvgpu_get_node(g);
1079 u32 *fuses;
1080 int count, i;
1081
1082 if (!np) /* may be pcie device */
1083 return 0;
1084
1085 count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
1086 if (count <= 0)
1087 return count;
1088
1089 fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2);
1090 if (!fuses)
1091 return -ENOMEM;
1092 of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
1093 for (i = 0; i < count; i++) {
1094 u32 fuse, value;
1095
1096 fuse = fuses[2 * i];
1097 value = fuses[2 * i + 1];
1098 switch (fuse) {
1099 case GM20B_FUSE_OPT_TPC_DISABLE:
1100 g->tpc_fs_mask_user = ~value;
1101 break;
1102 case GP10B_FUSE_OPT_ECC_EN:
1103 g->gr.fecs_feature_override_ecc_val = value;
1104 break;
1105 default:
1106 nvgpu_err(g, "ignore unknown fuse override %08x", fuse);
1107 break;
1108 }
1109 }
1110
1111 nvgpu_kfree(g, fuses);
1112
1113 return 0;
1114}
1115
1116static int gk20a_probe(struct platform_device *dev)
1117{
1118 struct nvgpu_os_linux *l = NULL;
1119 struct gk20a *gk20a;
1120 int err;
1121 struct gk20a_platform *platform = NULL;
1122 struct device_node *np;
1123
1124 if (dev->dev.of_node) {
1125 const struct of_device_id *match;
1126
1127 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
1128 if (match)
1129 platform = (struct gk20a_platform *)match->data;
1130 } else
1131 platform = (struct gk20a_platform *)dev->dev.platform_data;
1132
1133 if (!platform) {
1134 dev_err(&dev->dev, "no platform data\n");
1135 return -ENODATA;
1136 }
1137
1138 platform_set_drvdata(dev, platform);
1139
1140 if (gk20a_gpu_is_virtual(&dev->dev))
1141 return vgpu_probe(dev);
1142
1143 l = kzalloc(sizeof(*l), GFP_KERNEL);
1144 if (!l) {
1145 dev_err(&dev->dev, "couldn't allocate gk20a support");
1146 return -ENOMEM;
1147 }
1148
1149 hash_init(l->ecc_sysfs_stats_htable);
1150
1151 gk20a = &l->g;
1152
1153 nvgpu_log_fn(gk20a, " ");
1154
1155 nvgpu_init_gk20a(gk20a);
1156 set_gk20a(dev, gk20a);
1157 l->dev = &dev->dev;
1158 gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK;
1159
1160 nvgpu_kmem_init(gk20a);
1161
1162 err = nvgpu_init_enabled_flags(gk20a);
1163 if (err)
1164 goto return_err;
1165
1166 np = nvgpu_get_node(gk20a);
1167 if (of_dma_is_coherent(np)) {
1168 __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
1169 __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
1170 }
1171
1172 if (nvgpu_platform_is_simulation(gk20a))
1173 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
1174
1175 gk20a->irq_stall = platform_get_irq(dev, 0);
1176 gk20a->irq_nonstall = platform_get_irq(dev, 1);
1177 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) {
1178 err = -ENXIO;
1179 goto return_err;
1180 }
1181
1182 err = devm_request_threaded_irq(&dev->dev,
1183 gk20a->irq_stall,
1184 gk20a_intr_isr_stall,
1185 gk20a_intr_thread_stall,
1186 0, "gk20a_stall", gk20a);
1187 if (err) {
1188 dev_err(&dev->dev,
1189 "failed to request stall intr irq @ %d\n",
1190 gk20a->irq_stall);
1191 goto return_err;
1192 }
1193 err = devm_request_irq(&dev->dev,
1194 gk20a->irq_nonstall,
1195 gk20a_intr_isr_nonstall,
1196 0, "gk20a_nonstall", gk20a);
1197 if (err) {
1198 dev_err(&dev->dev,
1199 "failed to request non-stall intr irq @ %d\n",
1200 gk20a->irq_nonstall);
1201 goto return_err;
1202 }
1203 disable_irq(gk20a->irq_stall);
1204 if (gk20a->irq_stall != gk20a->irq_nonstall)
1205 disable_irq(gk20a->irq_nonstall);
1206
1207 err = gk20a_init_support(dev);
1208 if (err)
1209 goto return_err;
1210
1211 err = nvgpu_read_fuse_overrides(gk20a);
1212
1213#ifdef CONFIG_RESET_CONTROLLER
1214 platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
1215 if (IS_ERR(platform->reset_control))
1216 platform->reset_control = NULL;
1217#endif
1218
1219 err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
1220 if (err)
1221 goto return_err;
1222
1223 err = gk20a_pm_init(&dev->dev);
1224 if (err) {
1225 dev_err(&dev->dev, "pm init failed");
1226 goto return_err;
1227 }
1228
1229 gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
1230
1231 return 0;
1232
1233return_err:
1234 nvgpu_free_enabled_flags(gk20a);
1235
1236 /*
1237 * Last since the above allocs may use data structures in here.
1238 */
1239 nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP);
1240
1241 kfree(l);
1242
1243 return err;
1244}
1245
1246int nvgpu_remove(struct device *dev, struct class *class)
1247{
1248 struct gk20a *g = get_gk20a(dev);
1249#ifdef CONFIG_NVGPU_SUPPORT_CDE
1250 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
1251#endif
1252 struct gk20a_platform *platform = gk20a_get_platform(dev);
1253 int err;
1254
1255 nvgpu_log_fn(g, " ");
1256
1257 err = nvgpu_quiesce(g);
1258 WARN(err, "gpu failed to idle during driver removal");
1259
1260 if (nvgpu_mem_is_valid(&g->syncpt_mem))
1261 nvgpu_dma_free(g, &g->syncpt_mem);
1262
1263#ifdef CONFIG_NVGPU_SUPPORT_CDE
1264 if (platform->has_cde)
1265 gk20a_cde_destroy(l);
1266#endif
1267
1268#ifdef CONFIG_GK20A_CTXSW_TRACE
1269 gk20a_ctxsw_trace_cleanup(g);
1270#endif
1271
1272 gk20a_sched_ctrl_cleanup(g);
1273
1274 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1275 gk20a_scale_exit(dev);
1276
1277 nvgpu_clk_arb_cleanup_arbiter(g);
1278
1279 gk20a_user_deinit(dev, class);
1280
1281 gk20a_debug_deinit(g);
1282
1283 nvgpu_remove_sysfs(dev);
1284
1285 if (platform->secure_buffer.destroy)
1286 platform->secure_buffer.destroy(g,
1287 &platform->secure_buffer);
1288
1289 if (pm_runtime_enabled(dev))
1290 pm_runtime_disable(dev);
1291
1292 if (platform->remove)
1293 platform->remove(dev);
1294
1295 nvgpu_log_fn(g, "removed");
1296
1297 return err;
1298}
1299
1300static int __exit gk20a_remove(struct platform_device *pdev)
1301{
1302 int err;
1303 struct device *dev = &pdev->dev;
1304 struct gk20a *g = get_gk20a(dev);
1305
1306 if (gk20a_gpu_is_virtual(dev))
1307 return vgpu_remove(pdev);
1308
1309 err = nvgpu_remove(dev, &nvgpu_class);
1310
1311 set_gk20a(pdev, NULL);
1312 gk20a_put(g);
1313
1314 return err;
1315}
1316
1317static struct platform_driver gk20a_driver = {
1318 .probe = gk20a_probe,
1319 .remove = __exit_p(gk20a_remove),
1320 .shutdown = gk20a_pm_shutdown,
1321 .driver = {
1322 .owner = THIS_MODULE,
1323 .name = "gk20a",
1324 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
1325#ifdef CONFIG_OF
1326 .of_match_table = tegra_gk20a_of_match,
1327#endif
1328#ifdef CONFIG_PM
1329 .pm = &gk20a_pm_ops,
1330#endif
1331 .suppress_bind_attrs = true,
1332 }
1333};
1334
1335struct class nvgpu_class = {
1336 .owner = THIS_MODULE,
1337 .name = CLASS_NAME,
1338};
1339
1340static int __init gk20a_init(void)
1341{
1342
1343 int ret;
1344
1345 ret = class_register(&nvgpu_class);
1346 if (ret)
1347 return ret;
1348
1349 ret = nvgpu_pci_init();
1350 if (ret)
1351 return ret;
1352
1353 return platform_driver_register(&gk20a_driver);
1354}
1355
1356static void __exit gk20a_exit(void)
1357{
1358 nvgpu_pci_exit();
1359 platform_driver_unregister(&gk20a_driver);
1360 class_unregister(&nvgpu_class);
1361}
1362
1363MODULE_LICENSE("GPL v2");
1364module_init(gk20a_init);
1365module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h
deleted file mode 100644
index ab4bca03..00000000
--- a/drivers/gpu/nvgpu/common/linux/module.h
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
14#define __NVGPU_COMMON_LINUX_MODULE_H__
15
16struct gk20a;
17struct device;
18struct nvgpu_os_linux;
19
20int gk20a_pm_finalize_poweron(struct device *dev);
21int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
22void gk20a_remove_support(struct gk20a *g);
23void gk20a_driver_start_unload(struct gk20a *g);
24int nvgpu_quiesce(struct gk20a *g);
25int nvgpu_remove(struct device *dev, struct class *class);
26void nvgpu_free_irq(struct gk20a *g);
27struct device_node *nvgpu_get_node(struct gk20a *g);
28void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
29 struct resource **out);
30extern struct class nvgpu_class;
31
32#endif
diff --git a/drivers/gpu/nvgpu/common/linux/module_usermode.c b/drivers/gpu/nvgpu/common/linux/module_usermode.c
deleted file mode 100644
index 61cb4e87..00000000
--- a/drivers/gpu/nvgpu/common/linux/module_usermode.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/types.h>
18
19#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
20
21#include "common/linux/os_linux.h"
22
23/*
24 * Locks out the driver from accessing GPU registers. This prevents access to
25 * thse registers after the GPU has been clock or power gated. This should help
26 * find annoying bugs where register reads and writes are silently dropped
27 * after the GPU has been turned off. On older chips these reads and writes can
28 * also lock the entire CPU up.
29 */
30void nvgpu_lockout_usermode_registers(struct gk20a *g)
31{
32 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
33
34 l->usermode_regs = NULL;
35}
36
37/*
38 * Undoes t19x_lockout_registers().
39 */
40void nvgpu_restore_usermode_registers(struct gk20a *g)
41{
42 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
43
44 l->usermode_regs = l->usermode_regs_saved;
45}
46
47void nvgpu_remove_usermode_support(struct gk20a *g)
48{
49 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
50
51 if (l->usermode_regs) {
52 l->usermode_regs = NULL;
53 }
54}
55
56void nvgpu_init_usermode_support(struct gk20a *g)
57{
58 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
59
60 l->usermode_regs = l->regs + usermode_cfg0_r();
61 l->usermode_regs_saved = l->usermode_regs;
62}
diff --git a/drivers/gpu/nvgpu/common/linux/module_usermode.h b/drivers/gpu/nvgpu/common/linux/module_usermode.h
deleted file mode 100644
index b17053ca..00000000
--- a/drivers/gpu/nvgpu/common/linux/module_usermode.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_MODULE_T19X_H__
18#define __NVGPU_MODULE_T19X_H__
19
20struct gk20a;
21
22void nvgpu_init_usermode_support(struct gk20a *g);
23void nvgpu_remove_usermode_support(struct gk20a *g);
24void nvgpu_lockout_usermode_registers(struct gk20a *g);
25void nvgpu_restore_usermode_registers(struct gk20a *g);
26
27#endif
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
deleted file mode 100644
index 93925803..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ /dev/null
@@ -1,613 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/dma.h>
18#include <nvgpu/gmmu.h>
19#include <nvgpu/nvgpu_mem.h>
20#include <nvgpu/page_allocator.h>
21#include <nvgpu/log.h>
22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
25#include <nvgpu/vidmem.h>
26
27#include <nvgpu/linux/dma.h>
28#include <nvgpu/linux/vidmem.h>
29
30#include <linux/vmalloc.h>
31
32#include "os_linux.h"
33
34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h"
36#include "platform_gk20a.h"
37
38static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
39{
40 struct device *dev = dev_from_gk20a(g);
41 struct gk20a_platform *platform = gk20a_get_platform(dev);
42 u64 ipa = sg_phys((struct scatterlist *)sgl);
43
44 if (platform->phys_addr)
45 return platform->phys_addr(g, ipa);
46
47 return ipa;
48}
49
50int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
51{
52 void *cpu_va;
53 pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
54 PAGE_KERNEL :
55 pgprot_writecombine(PAGE_KERNEL);
56
57 if (mem->aperture != APERTURE_SYSMEM)
58 return 0;
59
60 /*
61 * WAR for bug 2040115: we already will always have a coherent vmap()
62 * for all sysmem buffers. The prot settings are left alone since
63 * eventually this should be deleted.
64 */
65 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
66 return 0;
67
68 /*
69 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
70 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
71 * another CPU mapping.
72 */
73 if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
74 return 0;
75
76 if (WARN_ON(mem->cpu_va)) {
77 nvgpu_warn(g, "nested");
78 return -EBUSY;
79 }
80
81 cpu_va = vmap(mem->priv.pages,
82 PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
83 0, prot);
84
85 if (WARN_ON(!cpu_va))
86 return -ENOMEM;
87
88 mem->cpu_va = cpu_va;
89 return 0;
90}
91
92void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
93{
94 if (mem->aperture != APERTURE_SYSMEM)
95 return;
96
97 /*
98 * WAR for bug 2040115: skip this since the map will be taken care of
99 * during the free in the DMA API.
100 */
101 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
102 return;
103
104 /*
105 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
106 * already made by the DMA API.
107 */
108 if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
109 return;
110
111 vunmap(mem->cpu_va);
112 mem->cpu_va = NULL;
113}
114
115static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
116{
117 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
118 u32 r = start, *dest_u32 = *arg;
119
120 if (!l->regs) {
121 __gk20a_warn_on_no_regs();
122 return;
123 }
124
125 while (words--) {
126 *dest_u32++ = gk20a_readl(g, r);
127 r += sizeof(u32);
128 }
129
130 *arg = dest_u32;
131}
132
133u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
134{
135 u32 data = 0;
136
137 if (mem->aperture == APERTURE_SYSMEM) {
138 u32 *ptr = mem->cpu_va;
139
140 WARN_ON(!ptr);
141 data = ptr[w];
142#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
143 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
144#endif
145 } else if (mem->aperture == APERTURE_VIDMEM) {
146 u32 value;
147 u32 *p = &value;
148
149 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
150 sizeof(u32), pramin_access_batch_rd_n, &p);
151
152 data = value;
153
154 } else {
155 WARN_ON("Accessing unallocated nvgpu_mem");
156 }
157
158 return data;
159}
160
161u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
162{
163 WARN_ON(offset & 3);
164 return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
165}
166
167void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
168 u32 offset, void *dest, u32 size)
169{
170 WARN_ON(offset & 3);
171 WARN_ON(size & 3);
172
173 if (mem->aperture == APERTURE_SYSMEM) {
174 u8 *src = (u8 *)mem->cpu_va + offset;
175
176 WARN_ON(!mem->cpu_va);
177 memcpy(dest, src, size);
178#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
179 if (size)
180 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
181 src, *dest, size);
182#endif
183 } else if (mem->aperture == APERTURE_VIDMEM) {
184 u32 *dest_u32 = dest;
185
186 nvgpu_pramin_access_batched(g, mem, offset, size,
187 pramin_access_batch_rd_n, &dest_u32);
188 } else {
189 WARN_ON("Accessing unallocated nvgpu_mem");
190 }
191}
192
193static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
194{
195 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
196 u32 r = start, *src_u32 = *arg;
197
198 if (!l->regs) {
199 __gk20a_warn_on_no_regs();
200 return;
201 }
202
203 while (words--) {
204 writel_relaxed(*src_u32++, l->regs + r);
205 r += sizeof(u32);
206 }
207
208 *arg = src_u32;
209}
210
211void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
212{
213 if (mem->aperture == APERTURE_SYSMEM) {
214 u32 *ptr = mem->cpu_va;
215
216 WARN_ON(!ptr);
217#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
218 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
219#endif
220 ptr[w] = data;
221 } else if (mem->aperture == APERTURE_VIDMEM) {
222 u32 value = data;
223 u32 *p = &value;
224
225 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
226 sizeof(u32), pramin_access_batch_wr_n, &p);
227 if (!mem->skip_wmb)
228 wmb();
229 } else {
230 WARN_ON("Accessing unallocated nvgpu_mem");
231 }
232}
233
234void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
235{
236 WARN_ON(offset & 3);
237 nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
238}
239
240void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
241 void *src, u32 size)
242{
243 WARN_ON(offset & 3);
244 WARN_ON(size & 3);
245
246 if (mem->aperture == APERTURE_SYSMEM) {
247 u8 *dest = (u8 *)mem->cpu_va + offset;
248
249 WARN_ON(!mem->cpu_va);
250#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
251 if (size)
252 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
253 dest, *src, size);
254#endif
255 memcpy(dest, src, size);
256 } else if (mem->aperture == APERTURE_VIDMEM) {
257 u32 *src_u32 = src;
258
259 nvgpu_pramin_access_batched(g, mem, offset, size,
260 pramin_access_batch_wr_n, &src_u32);
261 if (!mem->skip_wmb)
262 wmb();
263 } else {
264 WARN_ON("Accessing unallocated nvgpu_mem");
265 }
266}
267
268static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
269{
270 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
271 u32 r = start, repeat = **arg;
272
273 if (!l->regs) {
274 __gk20a_warn_on_no_regs();
275 return;
276 }
277
278 while (words--) {
279 writel_relaxed(repeat, l->regs + r);
280 r += sizeof(u32);
281 }
282}
283
284void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
285 u32 c, u32 size)
286{
287 WARN_ON(offset & 3);
288 WARN_ON(size & 3);
289 WARN_ON(c & ~0xff);
290
291 c &= 0xff;
292
293 if (mem->aperture == APERTURE_SYSMEM) {
294 u8 *dest = (u8 *)mem->cpu_va + offset;
295
296 WARN_ON(!mem->cpu_va);
297#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
298 if (size)
299 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
300 dest, c, size);
301#endif
302 memset(dest, c, size);
303 } else if (mem->aperture == APERTURE_VIDMEM) {
304 u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
305 u32 *p = &repeat_value;
306
307 nvgpu_pramin_access_batched(g, mem, offset, size,
308 pramin_access_batch_set, &p);
309 if (!mem->skip_wmb)
310 wmb();
311 } else {
312 WARN_ON("Accessing unallocated nvgpu_mem");
313 }
314}
315
316/*
317 * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
318 * and/or become private to this file once all bad usages of Linux SGLs are
319 * cleaned up in the driver.
320 */
321u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
322{
323 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
324 !nvgpu_iommuable(g))
325 return g->ops.mm.gpu_phys_addr(g, NULL,
326 __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
327
328 if (sg_dma_address(sgl) == 0)
329 return g->ops.mm.gpu_phys_addr(g, NULL,
330 __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
331
332 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
333 return 0;
334
335 return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
336}
337
338/*
339 * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
340 * allocation.
341 */
342static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
343{
344 return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
345}
346
347/*
348 * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
349 * allocation.
350 *
351 * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
352 * than one scatterlist chunk. If there's more than one scatterlist chunk then
353 * the buffer will not be contiguous. As such the base address probably isn't
354 * very useful. This is true for SYSMEM as well, if there's no IOMMU.
355 *
356 * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
357 * an IOMMU present and enabled for the GPU.
358 *
359 * %attrs can be NULL. If it is not NULL then it may be inspected to determine
360 * if the address needs to be modified before writing into a PTE.
361 */
362u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
363{
364 struct nvgpu_page_alloc *alloc;
365
366 if (mem->aperture == APERTURE_SYSMEM)
367 return nvgpu_mem_get_addr_sysmem(g, mem);
368
369 /*
370 * Otherwise get the vidmem address.
371 */
372 alloc = mem->vidmem_alloc;
373
374 /* This API should not be used with > 1 chunks */
375 WARN_ON(alloc->nr_chunks != 1);
376
377 return alloc->base;
378}
379
380/*
381 * This should only be used on contiguous buffers regardless of whether
382 * there's an IOMMU present/enabled. This applies to both SYSMEM and
383 * VIDMEM.
384 */
385u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
386{
387 /*
388 * For a VIDMEM buf, this is identical to simply get_addr() so just fall
389 * back to that.
390 */
391 if (mem->aperture == APERTURE_VIDMEM)
392 return nvgpu_mem_get_addr(g, mem);
393
394 return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
395}
396
397/*
398 * Be careful how you use this! You are responsible for correctly freeing this
399 * memory.
400 */
401int nvgpu_mem_create_from_mem(struct gk20a *g,
402 struct nvgpu_mem *dest, struct nvgpu_mem *src,
403 int start_page, int nr_pages)
404{
405 int ret;
406 u64 start = start_page * PAGE_SIZE;
407 u64 size = nr_pages * PAGE_SIZE;
408 dma_addr_t new_iova;
409
410 if (src->aperture != APERTURE_SYSMEM)
411 return -EINVAL;
412
413 /* Some silly things a caller might do... */
414 if (size > src->size)
415 return -EINVAL;
416 if ((start + size) > src->size)
417 return -EINVAL;
418
419 dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
420 dest->aperture = src->aperture;
421 dest->skip_wmb = src->skip_wmb;
422 dest->size = size;
423
424 /*
425 * Re-use the CPU mapping only if the mapping was made by the DMA API.
426 *
427 * Bug 2040115: the DMA API wrapper makes the mapping that we should
428 * re-use.
429 */
430 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
431 nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
432 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
433
434 dest->priv.pages = src->priv.pages + start_page;
435 dest->priv.flags = src->priv.flags;
436
437 new_iova = sg_dma_address(src->priv.sgt->sgl) ?
438 sg_dma_address(src->priv.sgt->sgl) + start : 0;
439
440 /*
441 * Make a new SG table that is based only on the subset of pages that
442 * is passed to us. This table gets freed by the dma free routines.
443 */
444 if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
445 ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
446 src->priv.pages + start_page,
447 new_iova, size);
448 else
449 ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
450 new_iova, size);
451
452 return ret;
453}
454
455int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
456 struct page **pages, int nr_pages)
457{
458 struct sg_table *sgt;
459 struct page **our_pages =
460 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
461
462 if (!our_pages)
463 return -ENOMEM;
464
465 memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
466
467 if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
468 nr_pages * PAGE_SIZE)) {
469 nvgpu_kfree(g, our_pages);
470 return -ENOMEM;
471 }
472
473 /*
474 * If we are making an SGT from physical pages we can be reasonably
475 * certain that this should bypass the SMMU - thus we set the DMA (aka
476 * IOVA) address to 0. This tells the GMMU mapping code to not make a
477 * mapping directed to the SMMU.
478 */
479 sg_dma_address(sgt->sgl) = 0;
480
481 dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
482 dest->aperture = APERTURE_SYSMEM;
483 dest->skip_wmb = 0;
484 dest->size = PAGE_SIZE * nr_pages;
485
486 dest->priv.flags = 0;
487 dest->priv.pages = our_pages;
488 dest->priv.sgt = sgt;
489
490 return 0;
491}
492
493#ifdef CONFIG_TEGRA_GK20A_NVHOST
494int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
495 u64 src_phys, int nr_pages)
496{
497 struct page **pages =
498 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
499 int i, ret = 0;
500
501 if (!pages)
502 return -ENOMEM;
503
504 for (i = 0; i < nr_pages; i++)
505 pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
506
507 ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
508 nvgpu_kfree(g, pages);
509
510 return ret;
511}
512#endif
513
514static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
515{
516 return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
517}
518
519static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
520{
521 return (u64)__nvgpu_sgl_phys(g, sgl);
522}
523
524static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
525{
526 return (u64)sg_dma_address((struct scatterlist *)sgl);
527}
528
529static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
530{
531 return (u64)((struct scatterlist *)sgl)->length;
532}
533
534static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
535 struct nvgpu_sgl *sgl,
536 struct nvgpu_gmmu_attrs *attrs)
537{
538 if (sg_dma_address((struct scatterlist *)sgl) == 0)
539 return g->ops.mm.gpu_phys_addr(g, attrs,
540 __nvgpu_sgl_phys(g, sgl));
541
542 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
543 return 0;
544
545 return nvgpu_mem_iommu_translate(g,
546 sg_dma_address((struct scatterlist *)sgl));
547}
548
549static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
550 struct nvgpu_sgt *sgt)
551{
552 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
553 return false;
554 return true;
555}
556
557static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
558{
559 /*
560 * Free this SGT. All we do is free the passed SGT. The actual Linux
561 * SGT/SGL needs to be freed separately.
562 */
563 nvgpu_kfree(g, sgt);
564}
565
566static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
567 .sgl_next = nvgpu_mem_linux_sgl_next,
568 .sgl_phys = nvgpu_mem_linux_sgl_phys,
569 .sgl_dma = nvgpu_mem_linux_sgl_dma,
570 .sgl_length = nvgpu_mem_linux_sgl_length,
571 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
572 .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
573 .sgt_free = nvgpu_mem_linux_sgl_free,
574};
575
576static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
577 struct gk20a *g,
578 struct scatterlist *linux_sgl)
579{
580 struct nvgpu_page_alloc *vidmem_alloc;
581
582 vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
583 if (!vidmem_alloc)
584 return NULL;
585
586 return &vidmem_alloc->sgt;
587}
588
589struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
590{
591 struct nvgpu_sgt *nvgpu_sgt;
592 struct scatterlist *linux_sgl = sgt->sgl;
593
594 if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
595 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
596
597 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
598 if (!nvgpu_sgt)
599 return NULL;
600
601 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
602
603 nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
604 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
605
606 return nvgpu_sgt;
607}
608
609struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
610 struct nvgpu_mem *mem)
611{
612 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
613}
diff --git a/drivers/gpu/nvgpu/common/linux/nvhost.c b/drivers/gpu/nvgpu/common/linux/nvhost.c
deleted file mode 100644
index 6ab60248..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvhost.c
+++ /dev/null
@@ -1,294 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/nvhost.h>
18#include <linux/nvhost_t194.h>
19#include <linux/nvhost_ioctl.h>
20#include <linux/of_platform.h>
21
22#include <nvgpu/nvhost.h>
23
24#include "nvhost_priv.h"
25
26#include "gk20a/gk20a.h"
27#include "os_linux.h"
28#include "module.h"
29
30int nvgpu_get_nvhost_dev(struct gk20a *g)
31{
32 struct device_node *np = nvgpu_get_node(g);
33 struct platform_device *host1x_pdev = NULL;
34 const __be32 *host1x_ptr;
35
36 host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
37 if (host1x_ptr) {
38 struct device_node *host1x_node =
39 of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
40
41 host1x_pdev = of_find_device_by_node(host1x_node);
42 if (!host1x_pdev) {
43 nvgpu_warn(g, "host1x device not available");
44 return -EPROBE_DEFER;
45 }
46
47 } else {
48 if (g->has_syncpoints) {
49 nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
50 g->has_syncpoints = false;
51 }
52 return 0;
53 }
54
55 g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
56 if (!g->nvhost_dev)
57 return -ENOMEM;
58
59 g->nvhost_dev->host1x_pdev = host1x_pdev;
60
61 return 0;
62}
63
64void nvgpu_free_nvhost_dev(struct gk20a *g)
65{
66 nvgpu_kfree(g, g->nvhost_dev);
67}
68
69int nvgpu_nvhost_module_busy_ext(
70 struct nvgpu_nvhost_dev *nvhost_dev)
71{
72 return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
73}
74
75void nvgpu_nvhost_module_idle_ext(
76 struct nvgpu_nvhost_dev *nvhost_dev)
77{
78 nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
79}
80
81void nvgpu_nvhost_debug_dump_device(
82 struct nvgpu_nvhost_dev *nvhost_dev)
83{
84 nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
85}
86
87const char *nvgpu_nvhost_syncpt_get_name(
88 struct nvgpu_nvhost_dev *nvhost_dev, int id)
89{
90 return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
91}
92
93bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
94 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
95{
96 return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
97}
98
99int nvgpu_nvhost_syncpt_is_expired_ext(
100 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
101{
102 return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
103 id, thresh);
104}
105
106u32 nvgpu_nvhost_syncpt_incr_max_ext(
107 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs)
108{
109 return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs);
110}
111
112int nvgpu_nvhost_intr_register_notifier(
113 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
114 void (*callback)(void *, int), void *private_data)
115{
116 return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
117 id, thresh,
118 callback, private_data);
119}
120
121void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
122 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
123{
124 nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
125}
126
127void nvgpu_nvhost_syncpt_put_ref_ext(
128 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
129{
130 nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
131}
132
133u32 nvgpu_nvhost_get_syncpt_host_managed(
134 struct nvgpu_nvhost_dev *nvhost_dev,
135 u32 param, const char *syncpt_name)
136{
137 return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev,
138 param, syncpt_name);
139}
140
141u32 nvgpu_nvhost_get_syncpt_client_managed(
142 struct nvgpu_nvhost_dev *nvhost_dev,
143 const char *syncpt_name)
144{
145 return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
146 syncpt_name);
147}
148
149int nvgpu_nvhost_syncpt_wait_timeout_ext(
150 struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
151 u32 thresh, u32 timeout, u32 *value, struct timespec *ts)
152{
153 return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
154 id, thresh, timeout, value, ts);
155}
156
157int nvgpu_nvhost_syncpt_read_ext_check(
158 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
159{
160 return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
161}
162
163u32 nvgpu_nvhost_syncpt_read_maxval(
164 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
165{
166 return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
167}
168
169void nvgpu_nvhost_syncpt_set_safe_state(
170 struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
171{
172 u32 val;
173
174 /*
175 * Add large number of increments to current value
176 * so that all waiters on this syncpoint are released
177 *
178 * We don't expect any case where more than 0x10000 increments
179 * are pending
180 */
181 val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
182 val += 0x10000;
183
184 nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
185 nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
186}
187
188int nvgpu_nvhost_create_symlink(struct gk20a *g)
189{
190 struct device *dev = dev_from_gk20a(g);
191 int err = 0;
192
193 if (g->nvhost_dev &&
194 (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
195 err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
196 &dev->kobj,
197 dev_name(dev));
198 }
199
200 return err;
201}
202
203void nvgpu_nvhost_remove_symlink(struct gk20a *g)
204{
205 struct device *dev = dev_from_gk20a(g);
206
207 if (g->nvhost_dev &&
208 (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
209 sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
210 dev_name(dev));
211 }
212}
213
214#ifdef CONFIG_SYNC
215u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt)
216{
217 return nvhost_sync_pt_id(pt);
218}
219
220u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt)
221{
222 return nvhost_sync_pt_thresh(pt);
223}
224
225struct sync_fence *nvgpu_nvhost_sync_fdget(int fd)
226{
227 return nvhost_sync_fdget(fd);
228}
229
230int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence)
231{
232 return nvhost_sync_num_pts(fence);
233}
234
235struct sync_fence *nvgpu_nvhost_sync_create_fence(
236 struct nvgpu_nvhost_dev *nvhost_dev,
237 u32 id, u32 thresh, const char *name)
238{
239 struct nvhost_ctrl_sync_fence_info pt = {
240 .id = id,
241 .thresh = thresh,
242 };
243
244 return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name);
245}
246#endif /* CONFIG_SYNC */
247
248#ifdef CONFIG_TEGRA_T19X_GRHOST
249int nvgpu_nvhost_syncpt_unit_interface_get_aperture(
250 struct nvgpu_nvhost_dev *nvhost_dev,
251 u64 *base, size_t *size)
252{
253 return nvhost_syncpt_unit_interface_get_aperture(
254 nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
255}
256
257u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id)
258{
259 return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
260}
261
262int nvgpu_nvhost_syncpt_init(struct gk20a *g)
263{
264 int err = 0;
265
266 if (!g->has_syncpoints)
267 return -ENOSYS;
268
269 err = nvgpu_get_nvhost_dev(g);
270 if (err) {
271 nvgpu_err(g, "host1x device not available");
272 g->has_syncpoints = false;
273 return -ENOSYS;
274 }
275
276 err = nvgpu_nvhost_syncpt_unit_interface_get_aperture(
277 g->nvhost_dev,
278 &g->syncpt_unit_base,
279 &g->syncpt_unit_size);
280 if (err) {
281 nvgpu_err(g, "Failed to get syncpt interface");
282 g->has_syncpoints = false;
283 return -ENOSYS;
284 }
285
286 g->syncpt_size =
287 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
288 nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
289 g->syncpt_unit_base, g->syncpt_unit_size,
290 g->syncpt_size);
291
292 return 0;
293}
294#endif
diff --git a/drivers/gpu/nvgpu/common/linux/nvhost_priv.h b/drivers/gpu/nvgpu/common/linux/nvhost_priv.h
deleted file mode 100644
index c03390a7..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvhost_priv.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_NVHOST_PRIV_H__
18#define __NVGPU_NVHOST_PRIV_H__
19
20struct nvgpu_nvhost_dev {
21 struct platform_device *host1x_pdev;
22};
23
24#endif /* __NVGPU_NVHOST_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/nvlink.c b/drivers/gpu/nvgpu/common/linux/nvlink.c
deleted file mode 100644
index c93514c0..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvlink.c
+++ /dev/null
@@ -1,106 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <gk20a/gk20a.h>
18#include <nvgpu/nvlink.h>
19#include <nvgpu/enabled.h>
20#include "module.h"
21
22#ifdef CONFIG_TEGRA_NVLINK
23int nvgpu_nvlink_read_dt_props(struct gk20a *g)
24{
25 struct device_node *np;
26 struct nvlink_device *ndev = g->nvlink.priv;
27 u32 local_dev_id;
28 u32 local_link_id;
29 u32 remote_dev_id;
30 u32 remote_link_id;
31 bool is_master;
32
33 /* Parse DT */
34 np = nvgpu_get_node(g);
35 if (!np)
36 goto fail;
37
38 np = of_get_child_by_name(np, "nvidia,nvlink");
39 if (!np)
40 goto fail;
41
42 np = of_get_child_by_name(np, "endpoint");
43 if (!np)
44 goto fail;
45
46 /* Parse DT structure to detect endpoint topology */
47 of_property_read_u32(np, "local_dev_id", &local_dev_id);
48 of_property_read_u32(np, "local_link_id", &local_link_id);
49 of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
50 of_property_read_u32(np, "remote_link_id", &remote_link_id);
51 is_master = of_property_read_bool(np, "is_master");
52
53 /* Check that we are in dGPU mode */
54 if (local_dev_id != NVLINK_ENDPT_GV100) {
55 nvgpu_err(g, "Local nvlink device is not dGPU");
56 return -EINVAL;
57 }
58
59 ndev->is_master = is_master;
60 ndev->device_id = local_dev_id;
61 ndev->link.link_id = local_link_id;
62 ndev->link.remote_dev_info.device_id = remote_dev_id;
63 ndev->link.remote_dev_info.link_id = remote_link_id;
64
65 return 0;
66
67fail:
68 nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
69 return -ENODEV;
70}
71#endif /* CONFIG_TEGRA_NVLINK */
72
73void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
74{
75 /* MSS_NVLINK_1_BASE */
76 void __iomem *soc1 = ioremap(0x01f20010, 4096);
77 /* MSS_NVLINK_2_BASE */
78 void __iomem *soc2 = ioremap(0x01f40010, 4096);
79 /* MSS_NVLINK_3_BASE */
80 void __iomem *soc3 = ioremap(0x01f60010, 4096);
81 /* MSS_NVLINK_4_BASE */
82 void __iomem *soc4 = ioremap(0x01f80010, 4096);
83 u32 val;
84
85 nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
86
87 val = readl_relaxed(soc1);
88 writel_relaxed(val, soc1);
89 val = readl_relaxed(soc1 + 4);
90 writel_relaxed(val, soc1 + 4);
91
92 val = readl_relaxed(soc2);
93 writel_relaxed(val, soc2);
94 val = readl_relaxed(soc2 + 4);
95 writel_relaxed(val, soc2 + 4);
96
97 val = readl_relaxed(soc3);
98 writel_relaxed(val, soc3);
99 val = readl_relaxed(soc3 + 4);
100 writel_relaxed(val, soc3 + 4);
101
102 val = readl_relaxed(soc4);
103 writel_relaxed(val, soc4);
104 val = readl_relaxed(soc4 + 4);
105 writel_relaxed(val, soc4 + 4);
106}
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android.c b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
deleted file mode 100644
index 9be8c6c0..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#include <nvgpu/types.h>
17#include <nvgpu/os_fence.h>
18#include <nvgpu/linux/os_fence_android.h>
19
20#include "gk20a/gk20a.h"
21
22#include "../drivers/staging/android/sync.h"
23
24inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
25{
26 struct sync_fence *fence = (struct sync_fence *)s->priv;
27 return fence;
28}
29
30static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
31{
32 fence_out->priv = NULL;
33 fence_out->g = NULL;
34 fence_out->ops = NULL;
35}
36
37void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
38 struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
39 struct sync_fence *fence)
40{
41 fence_out->g = g;
42 fence_out->ops = fops;
43 fence_out->priv = (void *)fence;
44}
45
46void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
47{
48 struct sync_fence *fence = nvgpu_get_sync_fence(s);
49
50 sync_fence_put(fence);
51
52 nvgpu_os_fence_clear(s);
53}
54
55void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
56{
57 struct sync_fence *fence = nvgpu_get_sync_fence(s);
58
59 sync_fence_get(fence);
60 sync_fence_install(fence, fd);
61}
62
63int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
64 struct channel_gk20a *c, int fd)
65{
66 int err = -ENOSYS;
67
68#ifdef CONFIG_TEGRA_GK20A_NVHOST
69 err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
70#endif
71
72 if (err)
73 err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
74
75 if (err)
76 nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
77
78 return err;
79}
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
deleted file mode 100644
index 25832417..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ /dev/null
@@ -1,111 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/errno.h>
18
19#include <nvgpu/types.h>
20#include <nvgpu/os_fence.h>
21#include <nvgpu/linux/os_fence_android.h>
22#include <nvgpu/semaphore.h>
23
24#include "gk20a/channel_sync_gk20a.h"
25#include "gk20a/mm_gk20a.h"
26
27#include "sync_sema_android.h"
28
29#include "../drivers/staging/android/sync.h"
30
31int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
32 struct priv_cmd_entry *wait_cmd,
33 struct channel_gk20a *c,
34 int max_wait_cmds)
35{
36 int err;
37 int wait_cmd_size;
38 int num_wait_cmds;
39 int i;
40 struct nvgpu_semaphore *sema;
41 struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
42
43 wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
44
45 num_wait_cmds = sync_fence->num_fences;
46 if (num_wait_cmds == 0)
47 return 0;
48
49 if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
50 return -EINVAL;
51
52 err = gk20a_channel_alloc_priv_cmdbuf(c,
53 wait_cmd_size * num_wait_cmds,
54 wait_cmd);
55 if (err) {
56 nvgpu_err(c->g, "not enough priv cmd buffer space");
57 return err;
58 }
59
60 for (i = 0; i < num_wait_cmds; i++) {
61 struct fence *f = sync_fence->cbs[i].sync_pt;
62 struct sync_pt *pt = sync_pt_from_fence(f);
63
64 sema = gk20a_sync_pt_sema(pt);
65 gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd,
66 wait_cmd_size, i);
67 }
68
69 return 0;
70}
71
72static const struct nvgpu_os_fence_ops sema_ops = {
73 .program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
74 .drop_ref = nvgpu_os_fence_android_drop_ref,
75 .install_fence = nvgpu_os_fence_android_install_fd,
76};
77
78int nvgpu_os_fence_sema_create(
79 struct nvgpu_os_fence *fence_out,
80 struct channel_gk20a *c,
81 struct nvgpu_semaphore *sema)
82{
83 struct sync_fence *fence;
84
85 fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
86 nvgpu_semaphore_gpu_ro_va(sema));
87
88 if (!fence) {
89 nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
90 (u32)nvgpu_semaphore_gpu_ro_va(sema));
91
92 return -ENOMEM;
93 }
94
95 nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
96
97 return 0;
98}
99
100int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
101 struct channel_gk20a *c, int fd)
102{
103 struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
104
105 if (!fence)
106 return -EINVAL;
107
108 nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
109
110 return 0;
111}
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
deleted file mode 100644
index d7a72fcd..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
+++ /dev/null
@@ -1,121 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/errno.h>
18
19#include <nvgpu/types.h>
20#include <nvgpu/os_fence.h>
21#include <nvgpu/linux/os_fence_android.h>
22#include <nvgpu/nvhost.h>
23#include <nvgpu/atomic.h>
24
25#include "gk20a/gk20a.h"
26#include "gk20a/channel_gk20a.h"
27#include "gk20a/channel_sync_gk20a.h"
28#include "gk20a/mm_gk20a.h"
29
30#include "../drivers/staging/android/sync.h"
31
32int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
33 struct priv_cmd_entry *wait_cmd,
34 struct channel_gk20a *c,
35 int max_wait_cmds)
36{
37 int err;
38 int wait_cmd_size;
39 int num_wait_cmds;
40 int i;
41 u32 wait_id;
42 struct sync_pt *pt;
43
44 struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
45
46 if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
47 return -EINVAL;
48
49 /* validate syncpt ids */
50 for (i = 0; i < sync_fence->num_fences; i++) {
51 pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
52 wait_id = nvgpu_nvhost_sync_pt_id(pt);
53 if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
54 c->g->nvhost_dev, wait_id)) {
55 return -EINVAL;
56 }
57 }
58
59 num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
60 if (num_wait_cmds == 0)
61 return 0;
62
63 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
64 err = gk20a_channel_alloc_priv_cmdbuf(c,
65 wait_cmd_size * num_wait_cmds, wait_cmd);
66 if (err) {
67 nvgpu_err(c->g,
68 "not enough priv cmd buffer space");
69 return err;
70 }
71
72 for (i = 0; i < sync_fence->num_fences; i++) {
73 struct fence *f = sync_fence->cbs[i].sync_pt;
74 struct sync_pt *pt = sync_pt_from_fence(f);
75 u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
76 u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
77
78 err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
79 wait_cmd, wait_cmd_size, i, true);
80 }
81
82 WARN_ON(i != num_wait_cmds);
83
84 return 0;
85}
86
87static const struct nvgpu_os_fence_ops syncpt_ops = {
88 .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
89 .drop_ref = nvgpu_os_fence_android_drop_ref,
90 .install_fence = nvgpu_os_fence_android_install_fd,
91};
92
93int nvgpu_os_fence_syncpt_create(
94 struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
95 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
96{
97 struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
98 nvhost_dev, id, thresh, "fence");
99
100 if (!fence) {
101 nvgpu_err(c->g, "error constructing fence %s", "fence");
102 return -ENOMEM;
103 }
104
105 nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
106
107 return 0;
108}
109
110int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
111 struct channel_gk20a *c, int fd)
112{
113 struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
114
115 if (!fence)
116 return -ENOMEM;
117
118 nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
119
120 return 0;
121} \ No newline at end of file
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h
deleted file mode 100644
index 4dcce322..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ /dev/null
@@ -1,166 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef NVGPU_OS_LINUX_H
18#define NVGPU_OS_LINUX_H
19
20#include <linux/cdev.h>
21#include <linux/iommu.h>
22#include <linux/hashtable.h>
23
24#include "gk20a/gk20a.h"
25#include "cde.h"
26#include "sched.h"
27
28struct nvgpu_os_linux_ops {
29 struct {
30 void (*get_program_numbers)(struct gk20a *g,
31 u32 block_height_log2,
32 u32 shader_parameter,
33 int *hprog, int *vprog);
34 bool (*need_scatter_buffer)(struct gk20a *g);
35 int (*populate_scatter_buffer)(struct gk20a *g,
36 struct sg_table *sgt,
37 size_t surface_size,
38 void *scatter_buffer_ptr,
39 size_t scatter_buffer_size);
40 } cde;
41};
42
43struct nvgpu_os_linux {
44 struct gk20a g;
45 struct device *dev;
46
47 struct {
48 struct cdev cdev;
49 struct device *node;
50 } channel;
51
52 struct {
53 struct cdev cdev;
54 struct device *node;
55 } ctrl;
56
57 struct {
58 struct cdev cdev;
59 struct device *node;
60 } as_dev;
61
62 struct {
63 struct cdev cdev;
64 struct device *node;
65 } dbg;
66
67 struct {
68 struct cdev cdev;
69 struct device *node;
70 } prof;
71
72 struct {
73 struct cdev cdev;
74 struct device *node;
75 } tsg;
76
77 struct {
78 struct cdev cdev;
79 struct device *node;
80 } ctxsw;
81
82 struct {
83 struct cdev cdev;
84 struct device *node;
85 } sched;
86
87 dev_t cdev_region;
88
89 struct devfreq *devfreq;
90
91 struct device_dma_parameters dma_parms;
92
93 atomic_t hw_irq_stall_count;
94 atomic_t hw_irq_nonstall_count;
95
96 struct nvgpu_cond sw_irq_stall_last_handled_wq;
97 atomic_t sw_irq_stall_last_handled;
98
99 atomic_t nonstall_ops;
100
101 struct nvgpu_cond sw_irq_nonstall_last_handled_wq;
102 atomic_t sw_irq_nonstall_last_handled;
103
104 struct work_struct nonstall_fn_work;
105 struct workqueue_struct *nonstall_work_queue;
106
107 struct resource *reg_mem;
108 void __iomem *regs;
109 void __iomem *regs_saved;
110
111 struct resource *bar1_mem;
112 void __iomem *bar1;
113 void __iomem *bar1_saved;
114
115 void __iomem *usermode_regs;
116 void __iomem *usermode_regs_saved;
117
118 struct nvgpu_os_linux_ops ops;
119
120#ifdef CONFIG_DEBUG_FS
121 struct dentry *debugfs;
122 struct dentry *debugfs_alias;
123
124 struct dentry *debugfs_ltc_enabled;
125 struct dentry *debugfs_timeouts_enabled;
126 struct dentry *debugfs_gr_idle_timeout_default;
127 struct dentry *debugfs_disable_bigpage;
128 struct dentry *debugfs_gr_default_attrib_cb_size;
129
130 struct dentry *debugfs_timeslice_low_priority_us;
131 struct dentry *debugfs_timeslice_medium_priority_us;
132 struct dentry *debugfs_timeslice_high_priority_us;
133 struct dentry *debugfs_runlist_interleave;
134 struct dentry *debugfs_allocators;
135 struct dentry *debugfs_xve;
136 struct dentry *debugfs_kmem;
137 struct dentry *debugfs_hal;
138
139 struct dentry *debugfs_force_preemption_cilp;
140 struct dentry *debugfs_force_preemption_gfxp;
141 struct dentry *debugfs_dump_ctxsw_stats;
142#endif
143 DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
144
145 struct gk20a_cde_app cde_app;
146
147 struct rw_semaphore busy_lock;
148
149 struct gk20a_sched_ctrl sched_ctrl;
150
151 bool init_done;
152};
153
154static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
155{
156 return container_of(g, struct nvgpu_os_linux, g);
157}
158
159static inline struct device *dev_from_gk20a(struct gk20a *g)
160{
161 return nvgpu_os_linux_from_gk20a(g)->dev;
162}
163
164#define INTERFACE_NAME "nvhost%s-gpu"
165
166#endif
diff --git a/drivers/gpu/nvgpu/common/linux/os_sched.c b/drivers/gpu/nvgpu/common/linux/os_sched.c
deleted file mode 100644
index 586b35eb..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_sched.c
+++ /dev/null
@@ -1,26 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/os_sched.h>
15
16#include <linux/sched.h>
17
18int nvgpu_current_tid(struct gk20a *g)
19{
20 return current->pid;
21}
22
23int nvgpu_current_pid(struct gk20a *g)
24{
25 return current->tgid;
26}
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
deleted file mode 100644
index 1011b441..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ /dev/null
@@ -1,861 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/pci.h>
18#include <linux/interrupt.h>
19#include <linux/pm_runtime.h>
20#include <linux/of_platform.h>
21#include <linux/of_address.h>
22
23#include <nvgpu/nvhost.h>
24#include <nvgpu/nvgpu_common.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/enabled.h>
27#include <nvgpu/nvlink.h>
28#include <nvgpu/soc.h>
29
30#include "gk20a/gk20a.h"
31#include "clk/clk.h"
32#include "clk/clk_mclk.h"
33#include "module.h"
34#include "intr.h"
35#include "sysfs.h"
36#include "os_linux.h"
37#include "platform_gk20a.h"
38#include <nvgpu/sim.h>
39
40#include "pci.h"
41#include "pci_usermode.h"
42
43#include "os_linux.h"
44#include "driver_common.h"
45
46#define PCI_INTERFACE_NAME "card-%s%%s"
47
48static int nvgpu_pci_tegra_probe(struct device *dev)
49{
50 return 0;
51}
52
53static int nvgpu_pci_tegra_remove(struct device *dev)
54{
55 struct gk20a *g = get_gk20a(dev);
56
57 if (g->ops.gr.remove_gr_sysfs)
58 g->ops.gr.remove_gr_sysfs(g);
59
60 return 0;
61}
62
63static bool nvgpu_pci_tegra_is_railgated(struct device *pdev)
64{
65 return false;
66}
67
68static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate)
69{
70 long ret = (long)rate;
71
72 if (rate == UINT_MAX)
73 ret = BOOT_GPC2CLK_MHZ * 1000000UL;
74
75 return ret;
76}
77
78static struct gk20a_platform nvgpu_pci_device[] = {
79 { /* DEVICE=0x1c35 */
80 /* ptimer src frequency in hz */
81 .ptimer_src_freq = 31250000,
82
83 .probe = nvgpu_pci_tegra_probe,
84 .remove = nvgpu_pci_tegra_remove,
85
86 /* power management configuration */
87 .railgate_delay_init = 500,
88 .can_railgate_init = false,
89 .can_elpg_init = true,
90 .enable_elpg = true,
91 .enable_elcg = false,
92 .enable_slcg = true,
93 .enable_blcg = true,
94 .enable_mscg = true,
95 .can_slcg = true,
96 .can_blcg = true,
97 .can_elcg = true,
98
99 .disable_aspm = true,
100
101 /* power management callbacks */
102 .is_railgated = nvgpu_pci_tegra_is_railgated,
103 .clk_round_rate = nvgpu_pci_clk_round_rate,
104
105 .ch_wdt_timeout_ms = 7000,
106
107 .honors_aperture = true,
108 .dma_mask = DMA_BIT_MASK(40),
109 .vbios_min_version = 0x86063000,
110 .hardcode_sw_threshold = true,
111 .ina3221_dcb_index = 0,
112 .ina3221_i2c_address = 0x84,
113 .ina3221_i2c_port = 0x2,
114 },
115 { /* DEVICE=0x1c36 */
116 /* ptimer src frequency in hz */
117 .ptimer_src_freq = 31250000,
118
119 .probe = nvgpu_pci_tegra_probe,
120 .remove = nvgpu_pci_tegra_remove,
121
122 /* power management configuration */
123 .railgate_delay_init = 500,
124 .can_railgate_init = false,
125 .can_elpg_init = true,
126 .enable_elpg = true,
127 .enable_elcg = false,
128 .enable_slcg = true,
129 .enable_blcg = true,
130 .enable_mscg = true,
131 .can_slcg = true,
132 .can_blcg = true,
133 .can_elcg = true,
134
135 .disable_aspm = true,
136
137 /* power management callbacks */
138 .is_railgated = nvgpu_pci_tegra_is_railgated,
139 .clk_round_rate = nvgpu_pci_clk_round_rate,
140
141 .ch_wdt_timeout_ms = 7000,
142
143 .honors_aperture = true,
144 .dma_mask = DMA_BIT_MASK(40),
145 .vbios_min_version = 0x86062d00,
146 .hardcode_sw_threshold = true,
147 .ina3221_dcb_index = 0,
148 .ina3221_i2c_address = 0x84,
149 .ina3221_i2c_port = 0x2,
150 },
151 { /* DEVICE=0x1c37 */
152 /* ptimer src frequency in hz */
153 .ptimer_src_freq = 31250000,
154
155 .probe = nvgpu_pci_tegra_probe,
156 .remove = nvgpu_pci_tegra_remove,
157
158 /* power management configuration */
159 .railgate_delay_init = 500,
160 .can_railgate_init = false,
161 .can_elpg_init = true,
162 .enable_elpg = true,
163 .enable_elcg = false,
164 .enable_slcg = true,
165 .enable_blcg = true,
166 .enable_mscg = true,
167 .can_slcg = true,
168 .can_blcg = true,
169 .can_elcg = true,
170
171 .disable_aspm = true,
172
173 /* power management callbacks */
174 .is_railgated = nvgpu_pci_tegra_is_railgated,
175 .clk_round_rate = nvgpu_pci_clk_round_rate,
176
177 .ch_wdt_timeout_ms = 7000,
178
179 .honors_aperture = true,
180 .dma_mask = DMA_BIT_MASK(40),
181 .vbios_min_version = 0x86063000,
182 .hardcode_sw_threshold = true,
183 .ina3221_dcb_index = 0,
184 .ina3221_i2c_address = 0x84,
185 .ina3221_i2c_port = 0x2,
186 },
187 { /* DEVICE=0x1c75 */
188 /* ptimer src frequency in hz */
189 .ptimer_src_freq = 31250000,
190
191 .probe = nvgpu_pci_tegra_probe,
192 .remove = nvgpu_pci_tegra_remove,
193
194 /* power management configuration */
195 .railgate_delay_init = 500,
196 .can_railgate_init = false,
197 .can_elpg_init = true,
198 .enable_elpg = true,
199 .enable_elcg = false,
200 .enable_slcg = true,
201 .enable_blcg = true,
202 .enable_mscg = true,
203 .can_slcg = true,
204 .can_blcg = true,
205 .can_elcg = true,
206
207 .disable_aspm = true,
208
209 /* power management callbacks */
210 .is_railgated = nvgpu_pci_tegra_is_railgated,
211 .clk_round_rate = nvgpu_pci_clk_round_rate,
212
213 .ch_wdt_timeout_ms = 7000,
214
215 .honors_aperture = true,
216 .dma_mask = DMA_BIT_MASK(40),
217 .vbios_min_version = 0x86065300,
218 .hardcode_sw_threshold = false,
219 .ina3221_dcb_index = 1,
220 .ina3221_i2c_address = 0x80,
221 .ina3221_i2c_port = 0x1,
222 },
223 { /* DEVICE=PG503 SKU 201 */
224 /* ptimer src frequency in hz */
225 .ptimer_src_freq = 31250000,
226
227 .probe = nvgpu_pci_tegra_probe,
228 .remove = nvgpu_pci_tegra_remove,
229
230 /* power management configuration */
231 .railgate_delay_init = 500,
232 .can_railgate_init = false,
233 .can_elpg_init = false,
234 .enable_elpg = false,
235 .enable_elcg = false,
236 .enable_slcg = false,
237 .enable_blcg = false,
238 .enable_mscg = false,
239 .can_slcg = false,
240 .can_blcg = false,
241 .can_elcg = false,
242
243 .disable_aspm = true,
244
245 /* power management callbacks */
246 .is_railgated = nvgpu_pci_tegra_is_railgated,
247 .clk_round_rate = nvgpu_pci_clk_round_rate,
248
249 .ch_wdt_timeout_ms = 7000,
250
251 .honors_aperture = true,
252 .dma_mask = DMA_BIT_MASK(40),
253 .vbios_min_version = 0x88001e00,
254 .hardcode_sw_threshold = false,
255 .run_preos = true,
256 },
257 { /* DEVICE=PG503 SKU 200 ES */
258 /* ptimer src frequency in hz */
259 .ptimer_src_freq = 31250000,
260
261 .probe = nvgpu_pci_tegra_probe,
262 .remove = nvgpu_pci_tegra_remove,
263
264 /* power management configuration */
265 .railgate_delay_init = 500,
266 .can_railgate_init = false,
267 .can_elpg_init = false,
268 .enable_elpg = false,
269 .enable_elcg = false,
270 .enable_slcg = false,
271 .enable_blcg = false,
272 .enable_mscg = false,
273 .can_slcg = false,
274 .can_blcg = false,
275 .can_elcg = false,
276
277 .disable_aspm = true,
278
279 /* power management callbacks */
280 .is_railgated = nvgpu_pci_tegra_is_railgated,
281 .clk_round_rate = nvgpu_pci_clk_round_rate,
282
283 .ch_wdt_timeout_ms = 7000,
284
285 .honors_aperture = true,
286 .dma_mask = DMA_BIT_MASK(40),
287 .vbios_min_version = 0x88001e00,
288 .hardcode_sw_threshold = false,
289 .run_preos = true,
290 },
291 {
292 /* ptimer src frequency in hz */
293 .ptimer_src_freq = 31250000,
294
295 .probe = nvgpu_pci_tegra_probe,
296 .remove = nvgpu_pci_tegra_remove,
297
298 /* power management configuration */
299 .railgate_delay_init = 500,
300 .can_railgate_init = false,
301 .can_elpg_init = false,
302 .enable_elpg = false,
303 .enable_elcg = false,
304 .enable_slcg = false,
305 .enable_blcg = false,
306 .enable_mscg = false,
307 .can_slcg = false,
308 .can_blcg = false,
309 .can_elcg = false,
310
311 .disable_aspm = true,
312
313 /* power management callbacks */
314 .is_railgated = nvgpu_pci_tegra_is_railgated,
315 .clk_round_rate = nvgpu_pci_clk_round_rate,
316
317 .ch_wdt_timeout_ms = 7000,
318
319 .honors_aperture = true,
320 .dma_mask = DMA_BIT_MASK(40),
321 .vbios_min_version = 0x88000126,
322 .hardcode_sw_threshold = false,
323 .run_preos = true,
324 .has_syncpoints = true,
325 },
326 { /* SKU250 */
327 /* ptimer src frequency in hz */
328 .ptimer_src_freq = 31250000,
329
330 .probe = nvgpu_pci_tegra_probe,
331 .remove = nvgpu_pci_tegra_remove,
332
333 /* power management configuration */
334 .railgate_delay_init = 500,
335 .can_railgate_init = false,
336 .can_elpg_init = false,
337 .enable_elpg = false,
338 .enable_elcg = true,
339 .enable_slcg = true,
340 .enable_blcg = true,
341 .enable_mscg = false,
342 .can_slcg = true,
343 .can_blcg = true,
344 .can_elcg = true,
345
346 .disable_aspm = true,
347
348 /* power management callbacks */
349 .is_railgated = nvgpu_pci_tegra_is_railgated,
350 .clk_round_rate = nvgpu_pci_clk_round_rate,
351
352 .ch_wdt_timeout_ms = 7000,
353
354 .honors_aperture = true,
355 .dma_mask = DMA_BIT_MASK(40),
356 .vbios_min_version = 0x1,
357 .hardcode_sw_threshold = false,
358 .run_preos = true,
359 .has_syncpoints = true,
360 },
361 { /* SKU 0x1e3f */
362 /* ptimer src frequency in hz */
363 .ptimer_src_freq = 31250000,
364
365 .probe = nvgpu_pci_tegra_probe,
366 .remove = nvgpu_pci_tegra_remove,
367
368 /* power management configuration */
369 .railgate_delay_init = 500,
370 .can_railgate_init = false,
371 .can_elpg_init = false,
372 .enable_elpg = false,
373 .enable_elcg = false,
374 .enable_slcg = false,
375 .enable_blcg = false,
376 .enable_mscg = false,
377 .can_slcg = false,
378 .can_blcg = false,
379 .can_elcg = false,
380
381 .disable_aspm = true,
382
383 /* power management callbacks */
384 .is_railgated = nvgpu_pci_tegra_is_railgated,
385 .clk_round_rate = nvgpu_pci_clk_round_rate,
386
387 /*
388 * WAR: PCIE X1 is very slow, set to very high value till nvlink is up
389 */
390 .ch_wdt_timeout_ms = 30000,
391
392 .honors_aperture = true,
393 .dma_mask = DMA_BIT_MASK(40),
394 .vbios_min_version = 0x1,
395 .hardcode_sw_threshold = false,
396 .unified_memory = false,
397 },
398 { /* 0x1eba */
399 /* ptimer src frequency in hz */
400 .ptimer_src_freq = 31250000,
401
402 .probe = nvgpu_pci_tegra_probe,
403 .remove = nvgpu_pci_tegra_remove,
404
405 /* power management configuration */
406 .railgate_delay_init = 500,
407 .can_railgate_init = false,
408 .can_elpg_init = false,
409 .enable_elpg = false,
410 .enable_elcg = false,
411 .enable_slcg = false,
412 .enable_blcg = false,
413 .enable_mscg = false,
414 .can_slcg = false,
415 .can_blcg = false,
416 .can_elcg = false,
417
418 .disable_aspm = true,
419
420 /* power management callbacks */
421 .is_railgated = nvgpu_pci_tegra_is_railgated,
422 .clk_round_rate = nvgpu_pci_clk_round_rate,
423
424 .ch_wdt_timeout_ms = 7000,
425
426 .honors_aperture = true,
427 .dma_mask = DMA_BIT_MASK(40),
428 .vbios_min_version = 0x90040109,
429 .hardcode_sw_threshold = false,
430 .has_syncpoints = true,
431 },
432};
433
434static struct pci_device_id nvgpu_pci_table[] = {
435 {
436 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35),
437 .class = PCI_BASE_CLASS_DISPLAY << 16,
438 .class_mask = 0xff << 16,
439 .driver_data = 0,
440 },
441 {
442 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36),
443 .class = PCI_BASE_CLASS_DISPLAY << 16,
444 .class_mask = 0xff << 16,
445 .driver_data = 1,
446 },
447 {
448 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37),
449 .class = PCI_BASE_CLASS_DISPLAY << 16,
450 .class_mask = 0xff << 16,
451 .driver_data = 2,
452 },
453 {
454 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75),
455 .class = PCI_BASE_CLASS_DISPLAY << 16,
456 .class_mask = 0xff << 16,
457 .driver_data = 3,
458 },
459 {
460 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1),
461 .class = PCI_BASE_CLASS_DISPLAY << 16,
462 .class_mask = 0xff << 16,
463 .driver_data = 4,
464 },
465 {
466 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0),
467 .class = PCI_BASE_CLASS_DISPLAY << 16,
468 .class_mask = 0xff << 16,
469 .driver_data = 5,
470 },
471 {
472 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe),
473 .class = PCI_BASE_CLASS_DISPLAY << 16,
474 .class_mask = 0xff << 16,
475 .driver_data = 6,
476 },
477 {
478 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1),
479 .class = PCI_BASE_CLASS_DISPLAY << 16,
480 .class_mask = 0xff << 16,
481 .driver_data = 7,
482 },
483 {
484 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f),
485 .class = PCI_BASE_CLASS_DISPLAY << 16,
486 .class_mask = 0xff << 16,
487 .driver_data = 8,
488 },
489 {
490 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba),
491 .class = PCI_BASE_CLASS_DISPLAY << 16,
492 .class_mask = 0xff << 16,
493 .driver_data = 9,
494 },
495 {}
496};
497
498static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
499{
500 struct gk20a *g = dev_id;
501 irqreturn_t ret_stall;
502 irqreturn_t ret_nonstall;
503
504 ret_stall = nvgpu_intr_stall(g);
505 ret_nonstall = nvgpu_intr_nonstall(g);
506
507#if defined(CONFIG_PCI_MSI)
508 /* Send MSI EOI */
509 if (g->ops.xve.rearm_msi && g->msi_enabled)
510 g->ops.xve.rearm_msi(g);
511#endif
512
513 return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
514}
515
516static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
517{
518 struct gk20a *g = dev_id;
519
520 return nvgpu_intr_thread_stall(g);
521}
522
523static int nvgpu_pci_init_support(struct pci_dev *pdev)
524{
525 int err = 0;
526 struct gk20a *g = get_gk20a(&pdev->dev);
527 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
528
529 l->regs = ioremap(pci_resource_start(pdev, 0),
530 pci_resource_len(pdev, 0));
531 if (IS_ERR(l->regs)) {
532 nvgpu_err(g, "failed to remap gk20a registers");
533 err = PTR_ERR(l->regs);
534 goto fail;
535 }
536
537 l->bar1 = ioremap(pci_resource_start(pdev, 1),
538 pci_resource_len(pdev, 1));
539 if (IS_ERR(l->bar1)) {
540 nvgpu_err(g, "failed to remap gk20a bar1");
541 err = PTR_ERR(l->bar1);
542 goto fail;
543 }
544
545 err = nvgpu_init_sim_support_linux_pci(g);
546 if (err)
547 goto fail;
548 err = nvgpu_init_sim_support_pci(g);
549 if (err)
550 goto fail_sim;
551
552 nvgpu_pci_init_usermode_support(l);
553
554 return 0;
555
556 fail_sim:
557 nvgpu_remove_sim_support_linux_pci(g);
558 fail:
559 if (l->regs) {
560 iounmap(l->regs);
561 l->regs = NULL;
562 }
563 if (l->bar1) {
564 iounmap(l->bar1);
565 l->bar1 = NULL;
566 }
567
568 return err;
569}
570
571static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
572{
573 if (mode)
574 *mode = S_IRUGO | S_IWUGO;
575 return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev));
576}
577
578static struct class nvgpu_pci_class = {
579 .owner = THIS_MODULE,
580 .name = "nvidia-pci-gpu",
581 .devnode = nvgpu_pci_devnode,
582};
583
584#ifdef CONFIG_PM
585static int nvgpu_pci_pm_runtime_resume(struct device *dev)
586{
587 return gk20a_pm_finalize_poweron(dev);
588}
589
590static int nvgpu_pci_pm_runtime_suspend(struct device *dev)
591{
592 return 0;
593}
594
595static const struct dev_pm_ops nvgpu_pci_pm_ops = {
596 .runtime_resume = nvgpu_pci_pm_runtime_resume,
597 .runtime_suspend = nvgpu_pci_pm_runtime_suspend,
598 .resume = nvgpu_pci_pm_runtime_resume,
599 .suspend = nvgpu_pci_pm_runtime_suspend,
600};
601#endif
602
603static int nvgpu_pci_pm_init(struct device *dev)
604{
605#ifdef CONFIG_PM
606 struct gk20a *g = get_gk20a(dev);
607
608 if (!g->can_railgate) {
609 pm_runtime_disable(dev);
610 } else {
611 if (g->railgate_delay)
612 pm_runtime_set_autosuspend_delay(dev,
613 g->railgate_delay);
614
615 /*
616 * Runtime PM for PCI devices is disabled by default,
617 * so we need to enable it first
618 */
619 pm_runtime_use_autosuspend(dev);
620 pm_runtime_put_noidle(dev);
621 pm_runtime_allow(dev);
622 }
623#endif
624 return 0;
625}
626
627static int nvgpu_pci_probe(struct pci_dev *pdev,
628 const struct pci_device_id *pent)
629{
630 struct gk20a_platform *platform = NULL;
631 struct nvgpu_os_linux *l;
632 struct gk20a *g;
633 int err;
634 char nodefmt[64];
635 struct device_node *np;
636
637 /* make sure driver_data is a sane index */
638 if (pent->driver_data >= sizeof(nvgpu_pci_device) /
639 sizeof(nvgpu_pci_device[0])) {
640 return -EINVAL;
641 }
642
643 l = kzalloc(sizeof(*l), GFP_KERNEL);
644 if (!l) {
645 dev_err(&pdev->dev, "couldn't allocate gk20a support");
646 return -ENOMEM;
647 }
648
649 hash_init(l->ecc_sysfs_stats_htable);
650
651 g = &l->g;
652 nvgpu_init_gk20a(g);
653
654 nvgpu_kmem_init(g);
655
656 /* Allocate memory to hold platform data*/
657 platform = (struct gk20a_platform *)nvgpu_kzalloc( g,
658 sizeof(struct gk20a_platform));
659 if (!platform) {
660 dev_err(&pdev->dev, "couldn't allocate platform data");
661 err = -ENOMEM;
662 goto err_free_l;
663 }
664
665 /* copy detected device data to allocated platform space*/
666 memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data],
667 sizeof(struct gk20a_platform));
668
669 pci_set_drvdata(pdev, platform);
670
671 err = nvgpu_init_enabled_flags(g);
672 if (err)
673 goto err_free_platform;
674
675 platform->g = g;
676 l->dev = &pdev->dev;
677
678 np = nvgpu_get_node(g);
679 if (of_dma_is_coherent(np)) {
680 __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
681 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
682 }
683
684 err = pci_enable_device(pdev);
685 if (err)
686 goto err_free_platform;
687 pci_set_master(pdev);
688
689 g->pci_vendor_id = pdev->vendor;
690 g->pci_device_id = pdev->device;
691 g->pci_subsystem_vendor_id = pdev->subsystem_vendor;
692 g->pci_subsystem_device_id = pdev->subsystem_device;
693 g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub
694 g->pci_revision = pdev->revision;
695
696 g->ina3221_dcb_index = platform->ina3221_dcb_index;
697 g->ina3221_i2c_address = platform->ina3221_i2c_address;
698 g->ina3221_i2c_port = platform->ina3221_i2c_port;
699 g->hardcode_sw_threshold = platform->hardcode_sw_threshold;
700
701#if defined(CONFIG_PCI_MSI)
702 err = pci_enable_msi(pdev);
703 if (err) {
704 nvgpu_err(g,
705 "MSI could not be enabled, falling back to legacy");
706 g->msi_enabled = false;
707 } else
708 g->msi_enabled = true;
709#endif
710
711 g->irq_stall = pdev->irq;
712 g->irq_nonstall = pdev->irq;
713 if (g->irq_stall < 0) {
714 err = -ENXIO;
715 goto err_disable_msi;
716 }
717
718 err = devm_request_threaded_irq(&pdev->dev,
719 g->irq_stall,
720 nvgpu_pci_isr,
721 nvgpu_pci_intr_thread,
722#if defined(CONFIG_PCI_MSI)
723 g->msi_enabled ? 0 :
724#endif
725 IRQF_SHARED, "nvgpu", g);
726 if (err) {
727 nvgpu_err(g,
728 "failed to request irq @ %d", g->irq_stall);
729 goto err_disable_msi;
730 }
731 disable_irq(g->irq_stall);
732
733 err = nvgpu_pci_init_support(pdev);
734 if (err)
735 goto err_free_irq;
736
737 if (strchr(dev_name(&pdev->dev), '%')) {
738 nvgpu_err(g, "illegal character in device name");
739 err = -EINVAL;
740 goto err_free_irq;
741 }
742
743 snprintf(nodefmt, sizeof(nodefmt),
744 PCI_INTERFACE_NAME, dev_name(&pdev->dev));
745
746 err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class);
747 if (err)
748 goto err_free_irq;
749
750 err = nvgpu_pci_pm_init(&pdev->dev);
751 if (err) {
752 nvgpu_err(g, "pm init failed");
753 goto err_free_irq;
754 }
755
756 err = nvgpu_nvlink_probe(g);
757 /*
758 * ENODEV is a legal error which means there is no NVLINK
759 * any other error is fatal
760 */
761 if (err) {
762 if (err != -ENODEV) {
763 nvgpu_err(g, "fatal error probing nvlink, bailing out");
764 goto err_free_irq;
765 }
766 /* Enable Semaphore SHIM on nvlink only for now. */
767 __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false);
768 g->has_syncpoints = false;
769 } else {
770 err = nvgpu_nvhost_syncpt_init(g);
771 if (err) {
772 if (err != -ENOSYS) {
773 nvgpu_err(g, "syncpt init failed");
774 goto err_free_irq;
775 }
776 }
777 }
778
779 g->mm.has_physical_mode = false;
780
781 return 0;
782
783err_free_irq:
784 nvgpu_free_irq(g);
785err_disable_msi:
786#if defined(CONFIG_PCI_MSI)
787 if (g->msi_enabled)
788 pci_disable_msi(pdev);
789#endif
790err_free_platform:
791 nvgpu_kfree(g, platform);
792err_free_l:
793 kfree(l);
794 return err;
795}
796
797static void nvgpu_pci_remove(struct pci_dev *pdev)
798{
799 struct gk20a *g = get_gk20a(&pdev->dev);
800 struct device *dev = dev_from_gk20a(g);
801 int err;
802
803 /* no support yet for unbind if DGPU is in VGPU mode */
804 if (gk20a_gpu_is_virtual(dev))
805 return;
806
807 nvgpu_nvlink_remove(g);
808
809 gk20a_driver_start_unload(g);
810 err = nvgpu_quiesce(g);
811 /* TODO: handle failure to idle */
812 WARN(err, "gpu failed to idle during driver removal");
813
814 nvgpu_free_irq(g);
815
816 nvgpu_remove(dev, &nvgpu_pci_class);
817
818#if defined(CONFIG_PCI_MSI)
819 if (g->msi_enabled)
820 pci_disable_msi(pdev);
821 else {
822 /* IRQ does not need to be enabled in MSI as the line is not
823 * shared
824 */
825 enable_irq(g->irq_stall);
826 }
827#endif
828
829 /* free allocated platform data space */
830 nvgpu_kfree(g, gk20a_get_platform(&pdev->dev));
831
832 gk20a_get_platform(&pdev->dev)->g = NULL;
833 gk20a_put(g);
834}
835
836static struct pci_driver nvgpu_pci_driver = {
837 .name = "nvgpu",
838 .id_table = nvgpu_pci_table,
839 .probe = nvgpu_pci_probe,
840 .remove = nvgpu_pci_remove,
841#ifdef CONFIG_PM
842 .driver.pm = &nvgpu_pci_pm_ops,
843#endif
844};
845
846int __init nvgpu_pci_init(void)
847{
848 int ret;
849
850 ret = class_register(&nvgpu_pci_class);
851 if (ret)
852 return ret;
853
854 return pci_register_driver(&nvgpu_pci_driver);
855}
856
857void __exit nvgpu_pci_exit(void)
858{
859 pci_unregister_driver(&nvgpu_pci_driver);
860 class_unregister(&nvgpu_pci_class);
861}
diff --git a/drivers/gpu/nvgpu/common/linux/pci.h b/drivers/gpu/nvgpu/common/linux/pci.h
deleted file mode 100644
index cc6b77b1..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef NVGPU_PCI_H
17#define NVGPU_PCI_H
18
19#ifdef CONFIG_GK20A_PCI
20int nvgpu_pci_init(void);
21void nvgpu_pci_exit(void);
22#else
23static inline int nvgpu_pci_init(void) { return 0; }
24static inline void nvgpu_pci_exit(void) {}
25#endif
26
27#endif
diff --git a/drivers/gpu/nvgpu/common/linux/pci_usermode.c b/drivers/gpu/nvgpu/common/linux/pci_usermode.c
deleted file mode 100644
index f474bd10..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci_usermode.c
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/types.h>
15
16#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
17
18#include "common/linux/os_linux.h"
19
20void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l)
21{
22 l->usermode_regs = l->regs + usermode_cfg0_r();
23 l->usermode_regs_saved = l->usermode_regs;
24}
diff --git a/drivers/gpu/nvgpu/common/linux/pci_usermode.h b/drivers/gpu/nvgpu/common/linux/pci_usermode.h
deleted file mode 100644
index 25a08d28..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci_usermode.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_PCI_USERMODE_H__
17#define __NVGPU_PCI_USERMODE_H__
18
19struct nvgpu_os_linux;
20
21void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l);
22
23#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c
deleted file mode 100644
index 0fe1c8d2..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/hashtable.h>
18
19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h>
21#include <nvgpu/hashtable.h>
22
23#include "os_linux.h"
24
25#include "gk20a/gk20a.h"
26
27#include "platform_gk20a.h"
28#include "platform_gk20a_tegra.h"
29#include "gp10b/platform_gp10b.h"
30#include "platform_gp10b_tegra.h"
31#include "platform_ecc_sysfs.h"
32
33static u32 gen_ecc_hash_key(char *str)
34{
35 int i = 0;
36 u32 hash_key = 0x811c9dc5;
37
38 while (str[i]) {
39 hash_key *= 0x1000193;
40 hash_key ^= (u32)(str[i]);
41 i++;
42 };
43
44 return hash_key;
45}
46
47static ssize_t ecc_stat_show(struct device *dev,
48 struct device_attribute *attr,
49 char *buf)
50{
51 const char *ecc_stat_full_name = attr->attr.name;
52 const char *ecc_stat_base_name;
53 unsigned int hw_unit;
54 unsigned int subunit;
55 struct gk20a_ecc_stat *ecc_stat;
56 u32 hash_key;
57 struct gk20a *g = get_gk20a(dev);
58 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
59
60 if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
61 &subunit) == 2) {
62 ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
63 hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
64 } else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
65 ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
66 } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
67 ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
68 } else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
69 ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
70 } else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
71 ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
72 } else {
73 return snprintf(buf,
74 PAGE_SIZE,
75 "Error: Invalid ECC stat name!\n");
76 }
77
78 hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
79
80 hash_for_each_possible(l->ecc_sysfs_stats_htable,
81 ecc_stat,
82 hash_node,
83 hash_key) {
84 if (hw_unit >= ecc_stat->count)
85 continue;
86 if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
87 return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
88 }
89
90 return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
91}
92
93int nvgpu_gr_ecc_stat_create(struct device *dev,
94 int is_l2, char *ecc_stat_name,
95 struct gk20a_ecc_stat *ecc_stat)
96{
97 struct gk20a *g = get_gk20a(dev);
98 char *ltc_unit_name = "ltc";
99 char *gr_unit_name = "gpc0_tpc";
100 char *lts_unit_name = "lts";
101 int num_hw_units = 0;
102 int num_subunits = 0;
103
104 if (is_l2 == 1)
105 num_hw_units = g->ltc_count;
106 else if (is_l2 == 2) {
107 num_hw_units = g->ltc_count;
108 num_subunits = g->gr.slices_per_ltc;
109 } else
110 num_hw_units = g->gr.tpc_count;
111
112
113 return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
114 is_l2 ? ltc_unit_name : gr_unit_name,
115 num_subunits ? lts_unit_name: NULL,
116 ecc_stat_name,
117 ecc_stat);
118}
119
120int nvgpu_ecc_stat_create(struct device *dev,
121 int num_hw_units, int num_subunits,
122 char *ecc_unit_name, char *ecc_subunit_name,
123 char *ecc_stat_name,
124 struct gk20a_ecc_stat *ecc_stat)
125{
126 int error = 0;
127 struct gk20a *g = get_gk20a(dev);
128 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
129 int hw_unit = 0;
130 int subunit = 0;
131 int element = 0;
132 u32 hash_key = 0;
133 struct device_attribute *dev_attr_array;
134
135 int num_elements = num_subunits ? num_subunits * num_hw_units :
136 num_hw_units;
137
138 /* Allocate arrays */
139 dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
140 num_elements);
141 ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
142 ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
143
144 for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
145 ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
146 ECC_STAT_NAME_MAX_SIZE);
147 }
148 ecc_stat->count = num_elements;
149 if (num_subunits) {
150 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
151 for (subunit = 0; subunit < num_subunits; subunit++) {
152 element = hw_unit*num_subunits + subunit;
153
154 snprintf(ecc_stat->names[element],
155 ECC_STAT_NAME_MAX_SIZE,
156 "%s%d_%s%d_%s",
157 ecc_unit_name,
158 hw_unit,
159 ecc_subunit_name,
160 subunit,
161 ecc_stat_name);
162
163 sysfs_attr_init(&dev_attr_array[element].attr);
164 dev_attr_array[element].attr.name =
165 ecc_stat->names[element];
166 dev_attr_array[element].attr.mode =
167 VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
168 dev_attr_array[element].show = ecc_stat_show;
169 dev_attr_array[element].store = NULL;
170
171 /* Create sysfs file */
172 error |= device_create_file(dev,
173 &dev_attr_array[element]);
174
175 }
176 }
177 } else {
178 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
179
180 /* Fill in struct device_attribute members */
181 snprintf(ecc_stat->names[hw_unit],
182 ECC_STAT_NAME_MAX_SIZE,
183 "%s%d_%s",
184 ecc_unit_name,
185 hw_unit,
186 ecc_stat_name);
187
188 sysfs_attr_init(&dev_attr_array[hw_unit].attr);
189 dev_attr_array[hw_unit].attr.name =
190 ecc_stat->names[hw_unit];
191 dev_attr_array[hw_unit].attr.mode =
192 VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
193 dev_attr_array[hw_unit].show = ecc_stat_show;
194 dev_attr_array[hw_unit].store = NULL;
195
196 /* Create sysfs file */
197 error |= device_create_file(dev,
198 &dev_attr_array[hw_unit]);
199 }
200 }
201
202 /* Add hash table entry */
203 hash_key = gen_ecc_hash_key(ecc_stat_name);
204 hash_add(l->ecc_sysfs_stats_htable,
205 &ecc_stat->hash_node,
206 hash_key);
207
208 ecc_stat->attr_array = dev_attr_array;
209
210 return error;
211}
212
213void nvgpu_gr_ecc_stat_remove(struct device *dev,
214 int is_l2, struct gk20a_ecc_stat *ecc_stat)
215{
216 struct gk20a *g = get_gk20a(dev);
217 int num_hw_units = 0;
218 int num_subunits = 0;
219
220 if (is_l2 == 1)
221 num_hw_units = g->ltc_count;
222 else if (is_l2 == 2) {
223 num_hw_units = g->ltc_count;
224 num_subunits = g->gr.slices_per_ltc;
225 } else
226 num_hw_units = g->gr.tpc_count;
227
228 nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
229}
230
231void nvgpu_ecc_stat_remove(struct device *dev,
232 int num_hw_units, int num_subunits,
233 struct gk20a_ecc_stat *ecc_stat)
234{
235 struct gk20a *g = get_gk20a(dev);
236 struct device_attribute *dev_attr_array = ecc_stat->attr_array;
237 int hw_unit = 0;
238 int subunit = 0;
239 int element = 0;
240 int num_elements = num_subunits ? num_subunits * num_hw_units :
241 num_hw_units;
242
243 /* Remove sysfs files */
244 if (num_subunits) {
245 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
246 for (subunit = 0; subunit < num_subunits; subunit++) {
247 element = hw_unit * num_subunits + subunit;
248
249 device_remove_file(dev,
250 &dev_attr_array[element]);
251 }
252 }
253 } else {
254 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
255 device_remove_file(dev, &dev_attr_array[hw_unit]);
256 }
257
258 /* Remove hash table entry */
259 hash_del(&ecc_stat->hash_node);
260
261 /* Free arrays */
262 nvgpu_kfree(g, ecc_stat->counters);
263
264 for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
265 nvgpu_kfree(g, ecc_stat->names[hw_unit]);
266
267 nvgpu_kfree(g, ecc_stat->names);
268 nvgpu_kfree(g, dev_attr_array);
269}
diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h
deleted file mode 100644
index d29f7bd3..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h
+++ /dev/null
@@ -1,37 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef _NVGPU_PLATFORM_SYSFS_H_
18#define _NVGPU_PLATFORM_SYSFS_H_
19
20#include "gp10b/gr_gp10b.h"
21
22#define ECC_STAT_NAME_MAX_SIZE 100
23
24int nvgpu_gr_ecc_stat_create(struct device *dev,
25 int is_l2, char *ecc_stat_name,
26 struct gk20a_ecc_stat *ecc_stat);
27int nvgpu_ecc_stat_create(struct device *dev,
28 int num_hw_units, int num_subunits,
29 char *ecc_unit_name, char *ecc_subunit_name,
30 char *ecc_stat_name,
31 struct gk20a_ecc_stat *ecc_stat);
32void nvgpu_gr_ecc_stat_remove(struct device *dev,
33 int is_l2, struct gk20a_ecc_stat *ecc_stat);
34void nvgpu_ecc_stat_remove(struct device *dev,
35 int num_hw_units, int num_subunits,
36 struct gk20a_ecc_stat *ecc_stat);
37#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
deleted file mode 100644
index 9a99b7fe..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
+++ /dev/null
@@ -1,317 +0,0 @@
1/*
2 * GK20A Platform (SoC) Interface
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _GK20A_PLATFORM_H_
17#define _GK20A_PLATFORM_H_
18
19#include <linux/device.h>
20
21#include <nvgpu/lock.h>
22
23#include "gk20a/gk20a.h"
24
25#define GK20A_CLKS_MAX 4
26
27struct gk20a;
28struct channel_gk20a;
29struct gr_ctx_buffer_desc;
30struct gk20a_scale_profile;
31
32struct secure_page_buffer {
33 void (*destroy)(struct gk20a *, struct secure_page_buffer *);
34 size_t size;
35 dma_addr_t phys;
36 size_t used;
37};
38
39struct gk20a_platform {
40 /* Populated by the gk20a driver before probing the platform. */
41 struct gk20a *g;
42
43 /* Should be populated at probe. */
44 bool can_railgate_init;
45
46 /* Should be populated at probe. */
47 bool can_elpg_init;
48
49 /* Should be populated at probe. */
50 bool has_syncpoints;
51
52 /* channel limit after which to start aggressive sync destroy */
53 unsigned int aggressive_sync_destroy_thresh;
54
55 /* flag to set sync destroy aggressiveness */
56 bool aggressive_sync_destroy;
57
58 /* set if ASPM should be disabled on boot; only makes sense for PCI */
59 bool disable_aspm;
60
61 /* Set if the platform can unify the small/large address spaces. */
62 bool unify_address_spaces;
63
64 /* Clock configuration is stored here. Platform probe is responsible
65 * for filling this data. */
66 struct clk *clk[GK20A_CLKS_MAX];
67 int num_clks;
68 int maxmin_clk_id;
69
70#ifdef CONFIG_RESET_CONTROLLER
71 /* Reset control for device */
72 struct reset_control *reset_control;
73#endif
74
75 /* Delay before rail gated */
76 int railgate_delay_init;
77
78 /* init value for slowdown factor */
79 u8 ldiv_slowdown_factor_init;
80
81 /* Second Level Clock Gating: true = enable false = disable */
82 bool enable_slcg;
83
84 /* Block Level Clock Gating: true = enable flase = disable */
85 bool enable_blcg;
86
87 /* Engine Level Clock Gating: true = enable flase = disable */
88 bool enable_elcg;
89
90 /* Should be populated at probe. */
91 bool can_slcg;
92
93 /* Should be populated at probe. */
94 bool can_blcg;
95
96 /* Should be populated at probe. */
97 bool can_elcg;
98
99 /* Engine Level Power Gating: true = enable flase = disable */
100 bool enable_elpg;
101
102 /* Adaptative ELPG: true = enable flase = disable */
103 bool enable_aelpg;
104
105 /* PMU Perfmon: true = enable false = disable */
106 bool enable_perfmon;
107
108 /* Memory System Clock Gating: true = enable flase = disable*/
109 bool enable_mscg;
110
111 /* Timeout for per-channel watchdog (in mS) */
112 u32 ch_wdt_timeout_ms;
113
114 /* Disable big page support */
115 bool disable_bigpage;
116
117 /*
118 * gk20a_do_idle() API can take GPU either into rail gate or CAR reset
119 * This flag can be used to force CAR reset case instead of rail gate
120 */
121 bool force_reset_in_do_idle;
122
123 /* guest/vm id, needed for IPA to PA transation */
124 int vmid;
125
126 /* Initialize the platform interface of the gk20a driver.
127 *
128 * The platform implementation of this function must
129 * - set the power and clocks of the gk20a device to a known
130 * state, and
131 * - populate the gk20a_platform structure (a pointer to the
132 * structure can be obtained by calling gk20a_get_platform).
133 *
134 * After this function is finished, the driver will initialise
135 * pm runtime and genpd based on the platform configuration.
136 */
137 int (*probe)(struct device *dev);
138
139 /* Second stage initialisation - called once all power management
140 * initialisations are done.
141 */
142 int (*late_probe)(struct device *dev);
143
144 /* Remove device after power management has been done
145 */
146 int (*remove)(struct device *dev);
147
148 /* Poweron platform dependencies */
149 int (*busy)(struct device *dev);
150
151 /* Powerdown platform dependencies */
152 void (*idle)(struct device *dev);
153
154 /* Preallocated VPR buffer for kernel */
155 size_t secure_buffer_size;
156 struct secure_page_buffer secure_buffer;
157
158 /* Device is going to be suspended */
159 int (*suspend)(struct device *);
160
161 /* Called to turn off the device */
162 int (*railgate)(struct device *dev);
163
164 /* Called to turn on the device */
165 int (*unrailgate)(struct device *dev);
166 struct nvgpu_mutex railgate_lock;
167
168 /* Called to check state of device */
169 bool (*is_railgated)(struct device *dev);
170
171 /* get supported frequency list */
172 int (*get_clk_freqs)(struct device *pdev,
173 unsigned long **freqs, int *num_freqs);
174
175 /* clk related supported functions */
176 long (*clk_round_rate)(struct device *dev,
177 unsigned long rate);
178
179 /* Called to register GPCPLL with common clk framework */
180 int (*clk_register)(struct gk20a *g);
181
182 /* platform specific scale init quirks */
183 void (*initscale)(struct device *dev);
184
185 /* Postscale callback is called after frequency change */
186 void (*postscale)(struct device *dev,
187 unsigned long freq);
188
189 /* Pre callback is called before frequency change */
190 void (*prescale)(struct device *dev);
191
192 /* Devfreq governor name. If scaling is enabled, we request
193 * this governor to be used in scaling */
194 const char *devfreq_governor;
195
196 /* Quality of service notifier callback. If this is set, the scaling
197 * routines will register a callback to Qos. Each time we receive
198 * a new value, this callback gets called. */
199 int (*qos_notify)(struct notifier_block *nb,
200 unsigned long n, void *p);
201
202 /* Called as part of debug dump. If the gpu gets hung, this function
203 * is responsible for delivering all necessary debug data of other
204 * hw units which may interact with the gpu without direct supervision
205 * of the CPU.
206 */
207 void (*dump_platform_dependencies)(struct device *dev);
208
209 /* Defined when SMMU stage-2 is enabled, and we need to use physical
210 * addresses (not IPA). This is the case for GV100 nvlink in HV+L
211 * configuration, when dGPU is in pass-through mode.
212 */
213 u64 (*phys_addr)(struct gk20a *g, u64 ipa);
214
215 /* Callbacks to assert/deassert GPU reset */
216 int (*reset_assert)(struct device *dev);
217 int (*reset_deassert)(struct device *dev);
218 struct clk *clk_reset;
219 struct dvfs_rail *gpu_rail;
220
221 bool virtual_dev;
222#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
223 void *vgpu_priv;
224#endif
225 /* source frequency for ptimer in hz */
226 u32 ptimer_src_freq;
227
228#ifdef CONFIG_NVGPU_SUPPORT_CDE
229 bool has_cde;
230#endif
231
232 /* soc name for finding firmware files */
233 const char *soc_name;
234
235 /* false if vidmem aperture actually points to sysmem */
236 bool honors_aperture;
237 /* unified or split memory with separate vidmem? */
238 bool unified_memory;
239
240 /*
241 * DMA mask for Linux (both coh and non-coh). If not set defaults to
242 * 0x3ffffffff (i.e a 34 bit mask).
243 */
244 u64 dma_mask;
245
246 /* minimum supported VBIOS version */
247 u32 vbios_min_version;
248
249 /* true if we run preos microcode on this board */
250 bool run_preos;
251
252 /* true if we need to program sw threshold for
253 * power limits
254 */
255 bool hardcode_sw_threshold;
256
257 /* i2c device index, port and address for INA3221 */
258 u32 ina3221_dcb_index;
259 u32 ina3221_i2c_address;
260 u32 ina3221_i2c_port;
261
262 /* stream id to use */
263 u32 ltc_streamid;
264
265 /* scaling rate */
266 unsigned long cached_rate;
267};
268
269static inline struct gk20a_platform *gk20a_get_platform(
270 struct device *dev)
271{
272 return (struct gk20a_platform *)dev_get_drvdata(dev);
273}
274
275#ifdef CONFIG_TEGRA_GK20A
276extern struct gk20a_platform gm20b_tegra_platform;
277extern struct gk20a_platform gp10b_tegra_platform;
278extern struct gk20a_platform gv11b_tegra_platform;
279#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
280extern struct gk20a_platform vgpu_tegra_platform;
281extern struct gk20a_platform gv11b_vgpu_tegra_platform;
282#endif
283#endif
284
285int gk20a_tegra_busy(struct device *dev);
286void gk20a_tegra_idle(struct device *dev);
287void gk20a_tegra_debug_dump(struct device *pdev);
288
289static inline struct gk20a *get_gk20a(struct device *dev)
290{
291 return gk20a_get_platform(dev)->g;
292}
293static inline struct gk20a *gk20a_from_dev(struct device *dev)
294{
295 if (!dev)
296 return NULL;
297
298 return ((struct gk20a_platform *)dev_get_drvdata(dev))->g;
299}
300static inline bool gk20a_gpu_is_virtual(struct device *dev)
301{
302 struct gk20a_platform *platform = dev_get_drvdata(dev);
303
304 return platform->virtual_dev;
305}
306
307static inline int support_gk20a_pmu(struct device *dev)
308{
309 if (IS_ENABLED(CONFIG_GK20A_PMU)) {
310 /* gPMU is not supported for vgpu */
311 return !gk20a_gpu_is_virtual(dev);
312 }
313
314 return 0;
315}
316
317#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
deleted file mode 100644
index af55e5b6..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
+++ /dev/null
@@ -1,957 +0,0 @@
1/*
2 * GK20A Tegra Platform Interface
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/clkdev.h>
17#include <linux/of_platform.h>
18#include <linux/debugfs.h>
19#include <linux/platform_data/tegra_edp.h>
20#include <linux/delay.h>
21#include <uapi/linux/nvgpu.h>
22#include <linux/dma-buf.h>
23#include <linux/dma-attrs.h>
24#include <linux/nvmap.h>
25#include <linux/reset.h>
26#if defined(CONFIG_TEGRA_DVFS)
27#include <linux/tegra_soctherm.h>
28#endif
29#include <linux/platform/tegra/common.h>
30#include <linux/platform/tegra/mc.h>
31#include <linux/clk/tegra.h>
32#if defined(CONFIG_COMMON_CLK)
33#include <soc/tegra/tegra-dvfs.h>
34#endif
35#ifdef CONFIG_TEGRA_BWMGR
36#include <linux/platform/tegra/emc_bwmgr.h>
37#endif
38
39#include <linux/platform/tegra/tegra_emc.h>
40#include <soc/tegra/chip-id.h>
41
42#include <nvgpu/kmem.h>
43#include <nvgpu/bug.h>
44#include <nvgpu/enabled.h>
45#include <nvgpu/nvhost.h>
46
47#include <nvgpu/linux/dma.h>
48
49#include "gk20a/gk20a.h"
50#include "gm20b/clk_gm20b.h"
51
52#include "scale.h"
53#include "platform_gk20a.h"
54#include "clk.h"
55#include "os_linux.h"
56
57#include "../../../arch/arm/mach-tegra/iomap.h"
58#include <soc/tegra/pmc.h>
59
60#define TEGRA_GK20A_BW_PER_FREQ 32
61#define TEGRA_GM20B_BW_PER_FREQ 64
62#define TEGRA_DDR3_BW_PER_FREQ 16
63#define TEGRA_DDR4_BW_PER_FREQ 16
64#define MC_CLIENT_GPU 34
65#define PMC_GPU_RG_CNTRL_0 0x2d4
66
67#ifdef CONFIG_COMMON_CLK
68#define GPU_RAIL_NAME "vdd-gpu"
69#else
70#define GPU_RAIL_NAME "vdd_gpu"
71#endif
72
73extern struct device tegra_vpr_dev;
74
75#ifdef CONFIG_TEGRA_BWMGR
76struct gk20a_emc_params {
77 unsigned long bw_ratio;
78 unsigned long freq_last_set;
79 struct tegra_bwmgr_client *bwmgr_cl;
80};
81#else
82struct gk20a_emc_params {
83 unsigned long bw_ratio;
84 unsigned long freq_last_set;
85};
86#endif
87
88#define MHZ_TO_HZ(x) ((x) * 1000000)
89#define HZ_TO_MHZ(x) ((x) / 1000000)
90
91static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
92 struct secure_page_buffer *secure_buffer)
93{
94 DEFINE_DMA_ATTRS(attrs);
95 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
96 dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
97 (void *)(uintptr_t)secure_buffer->phys,
98 secure_buffer->phys, __DMA_ATTR(attrs));
99
100 secure_buffer->destroy = NULL;
101}
102
103static int gk20a_tegra_secure_alloc(struct gk20a *g,
104 struct gr_ctx_buffer_desc *desc,
105 size_t size)
106{
107 struct device *dev = dev_from_gk20a(g);
108 struct gk20a_platform *platform = dev_get_drvdata(dev);
109 struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
110 dma_addr_t phys;
111 struct sg_table *sgt;
112 struct page *page;
113 int err = 0;
114 size_t aligned_size = PAGE_ALIGN(size);
115
116 if (nvgpu_mem_is_valid(&desc->mem))
117 return 0;
118
119 /* We ran out of preallocated memory */
120 if (secure_buffer->used + aligned_size > secure_buffer->size) {
121 nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
122 size, secure_buffer->used, secure_buffer->size);
123 return -ENOMEM;
124 }
125
126 phys = secure_buffer->phys + secure_buffer->used;
127
128 sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
129 if (!sgt) {
130 nvgpu_err(platform->g, "failed to allocate memory");
131 return -ENOMEM;
132 }
133 err = sg_alloc_table(sgt, 1, GFP_KERNEL);
134 if (err) {
135 nvgpu_err(platform->g, "failed to allocate sg_table");
136 goto fail_sgt;
137 }
138 page = phys_to_page(phys);
139 sg_set_page(sgt->sgl, page, size, 0);
140 /* This bypasses SMMU for VPR during gmmu_map. */
141 sg_dma_address(sgt->sgl) = 0;
142
143 desc->destroy = NULL;
144
145 desc->mem.priv.sgt = sgt;
146 desc->mem.size = size;
147 desc->mem.aperture = APERTURE_SYSMEM;
148
149 secure_buffer->used += aligned_size;
150
151 return err;
152
153fail_sgt:
154 nvgpu_kfree(platform->g, sgt);
155 return err;
156}
157
158/*
159 * gk20a_tegra_get_emc_rate()
160 *
161 * This function returns the minimum emc clock based on gpu frequency
162 */
163
164static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
165 struct gk20a_emc_params *emc_params)
166{
167 unsigned long gpu_freq, gpu_fmax_at_vmin;
168 unsigned long emc_rate, emc_scale;
169
170 gpu_freq = clk_get_rate(g->clk.tegra_clk);
171 gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
172 clk_get_parent(g->clk.tegra_clk));
173
174 /* When scaling emc, account for the gpu load when the
175 * gpu frequency is less than or equal to fmax@vmin. */
176 if (gpu_freq <= gpu_fmax_at_vmin)
177 emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
178 else
179 emc_scale = g->emc3d_ratio;
180
181 emc_rate =
182 (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
183
184 return MHZ_TO_HZ(emc_rate);
185}
186
187/*
188 * gk20a_tegra_prescale(profile, freq)
189 *
190 * This function informs EDP about changed constraints.
191 */
192
193static void gk20a_tegra_prescale(struct device *dev)
194{
195 struct gk20a *g = get_gk20a(dev);
196 u32 avg = 0;
197
198 nvgpu_pmu_load_norm(g, &avg);
199 tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
200}
201
202/*
203 * gk20a_tegra_calibrate_emc()
204 *
205 */
206
207static void gk20a_tegra_calibrate_emc(struct device *dev,
208 struct gk20a_emc_params *emc_params)
209{
210 enum tegra_chipid cid = tegra_get_chip_id();
211 long gpu_bw, emc_bw;
212
213 /* store gpu bw based on soc */
214 switch (cid) {
215 case TEGRA210:
216 gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
217 break;
218 case TEGRA124:
219 case TEGRA132:
220 gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
221 break;
222 default:
223 gpu_bw = 0;
224 break;
225 }
226
227 /* TODO detect DDR type.
228 * Okay for now since DDR3 and DDR4 have the same BW ratio */
229 emc_bw = TEGRA_DDR3_BW_PER_FREQ;
230
231 /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
232 * NOTE the ratio must come out as an integer */
233 emc_params->bw_ratio = (gpu_bw / emc_bw);
234}
235
236#ifdef CONFIG_TEGRA_BWMGR
237#ifdef CONFIG_TEGRA_DVFS
238static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb)
239{
240 struct gk20a_scale_profile *profile = platform->g->scale_profile;
241 struct gk20a_emc_params *params;
242 unsigned long rate;
243
244 if (!profile || !profile->private_data)
245 return;
246
247 params = (struct gk20a_emc_params *)profile->private_data;
248 rate = (enb) ? params->freq_last_set : 0;
249 tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR);
250}
251#endif
252
253static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
254{
255 struct gk20a_platform *platform = dev_get_drvdata(dev);
256 struct gk20a_scale_profile *profile = platform->g->scale_profile;
257 struct gk20a_emc_params *emc_params;
258 unsigned long emc_rate;
259
260 if (!profile || !profile->private_data)
261 return;
262
263 emc_params = profile->private_data;
264 emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params);
265
266 if (emc_rate > tegra_bwmgr_get_max_emc_rate())
267 emc_rate = tegra_bwmgr_get_max_emc_rate();
268
269 emc_params->freq_last_set = emc_rate;
270 if (platform->is_railgated && platform->is_railgated(dev))
271 return;
272
273 tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate,
274 TEGRA_BWMGR_SET_EMC_FLOOR);
275
276}
277
278#endif
279
280#if defined(CONFIG_TEGRA_DVFS)
281/*
282 * gk20a_tegra_is_railgated()
283 *
284 * Check status of gk20a power rail
285 */
286
287static bool gk20a_tegra_is_railgated(struct device *dev)
288{
289 struct gk20a *g = get_gk20a(dev);
290 struct gk20a_platform *platform = dev_get_drvdata(dev);
291 bool ret = false;
292
293 if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
294 ret = !tegra_dvfs_is_rail_up(platform->gpu_rail);
295
296 return ret;
297}
298
299/*
300 * gm20b_tegra_railgate()
301 *
302 * Gate (disable) gm20b power rail
303 */
304
305static int gm20b_tegra_railgate(struct device *dev)
306{
307 struct gk20a *g = get_gk20a(dev);
308 struct gk20a_platform *platform = dev_get_drvdata(dev);
309 int ret = 0;
310
311 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) ||
312 !tegra_dvfs_is_rail_up(platform->gpu_rail))
313 return 0;
314
315 tegra_mc_flush(MC_CLIENT_GPU);
316
317 udelay(10);
318
319 /* enable clamp */
320 tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0);
321 tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
322
323 udelay(10);
324
325 platform->reset_assert(dev);
326
327 udelay(10);
328
329 /*
330 * GPCPLL is already disabled before entering this function; reference
331 * clocks are enabled until now - disable them just before rail gating
332 */
333 clk_disable_unprepare(platform->clk_reset);
334 clk_disable_unprepare(platform->clk[0]);
335 clk_disable_unprepare(platform->clk[1]);
336 if (platform->clk[3])
337 clk_disable_unprepare(platform->clk[3]);
338
339 udelay(10);
340
341 tegra_soctherm_gpu_tsens_invalidate(1);
342
343 if (tegra_dvfs_is_rail_up(platform->gpu_rail)) {
344 ret = tegra_dvfs_rail_power_down(platform->gpu_rail);
345 if (ret)
346 goto err_power_off;
347 } else
348 pr_info("No GPU regulator?\n");
349
350#ifdef CONFIG_TEGRA_BWMGR
351 gm20b_bwmgr_set_rate(platform, false);
352#endif
353
354 return 0;
355
356err_power_off:
357 nvgpu_err(platform->g, "Could not railgate GPU");
358 return ret;
359}
360
361
362/*
363 * gm20b_tegra_unrailgate()
364 *
365 * Ungate (enable) gm20b power rail
366 */
367
368static int gm20b_tegra_unrailgate(struct device *dev)
369{
370 struct gk20a_platform *platform = dev_get_drvdata(dev);
371 struct gk20a *g = platform->g;
372 int ret = 0;
373 bool first = false;
374
375 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
376 return 0;
377
378 ret = tegra_dvfs_rail_power_up(platform->gpu_rail);
379 if (ret)
380 return ret;
381
382#ifdef CONFIG_TEGRA_BWMGR
383 gm20b_bwmgr_set_rate(platform, true);
384#endif
385
386 tegra_soctherm_gpu_tsens_invalidate(0);
387
388 if (!platform->clk_reset) {
389 platform->clk_reset = clk_get(dev, "gpu_gate");
390 if (IS_ERR(platform->clk_reset)) {
391 nvgpu_err(g, "fail to get gpu reset clk");
392 goto err_clk_on;
393 }
394 }
395
396 if (!first) {
397 ret = clk_prepare_enable(platform->clk_reset);
398 if (ret) {
399 nvgpu_err(g, "could not turn on gpu_gate");
400 goto err_clk_on;
401 }
402
403 ret = clk_prepare_enable(platform->clk[0]);
404 if (ret) {
405 nvgpu_err(g, "could not turn on gpu pll");
406 goto err_clk_on;
407 }
408 ret = clk_prepare_enable(platform->clk[1]);
409 if (ret) {
410 nvgpu_err(g, "could not turn on pwr clock");
411 goto err_clk_on;
412 }
413
414 if (platform->clk[3]) {
415 ret = clk_prepare_enable(platform->clk[3]);
416 if (ret) {
417 nvgpu_err(g, "could not turn on fuse clock");
418 goto err_clk_on;
419 }
420 }
421 }
422
423 udelay(10);
424
425 platform->reset_assert(dev);
426
427 udelay(10);
428
429 tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0);
430 tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
431
432 udelay(10);
433
434 clk_disable(platform->clk_reset);
435 platform->reset_deassert(dev);
436 clk_enable(platform->clk_reset);
437
438 /* Flush MC after boot/railgate/SC7 */
439 tegra_mc_flush(MC_CLIENT_GPU);
440
441 udelay(10);
442
443 tegra_mc_flush_done(MC_CLIENT_GPU);
444
445 udelay(10);
446
447 return 0;
448
449err_clk_on:
450 tegra_dvfs_rail_power_down(platform->gpu_rail);
451
452 return ret;
453}
454#endif
455
456
457static struct {
458 char *name;
459 unsigned long default_rate;
460} tegra_gk20a_clocks[] = {
461 {"gpu_ref", UINT_MAX},
462 {"pll_p_out5", 204000000},
463 {"emc", UINT_MAX},
464 {"fuse", UINT_MAX},
465};
466
467
468
469/*
470 * gk20a_tegra_get_clocks()
471 *
472 * This function finds clocks in tegra platform and populates
473 * the clock information to gk20a platform data.
474 */
475
476static int gk20a_tegra_get_clocks(struct device *dev)
477{
478 struct gk20a_platform *platform = dev_get_drvdata(dev);
479 char devname[16];
480 unsigned int i;
481 int ret = 0;
482
483 BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks));
484
485 snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev));
486
487 platform->num_clks = 0;
488 for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
489 long rate = tegra_gk20a_clocks[i].default_rate;
490 struct clk *c;
491
492 c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
493 if (IS_ERR(c)) {
494 ret = PTR_ERR(c);
495 goto err_get_clock;
496 }
497 rate = clk_round_rate(c, rate);
498 clk_set_rate(c, rate);
499 platform->clk[i] = c;
500 if (i == 0)
501 platform->cached_rate = rate;
502 }
503 platform->num_clks = i;
504
505 return 0;
506
507err_get_clock:
508
509 while (i--)
510 clk_put(platform->clk[i]);
511 return ret;
512}
513
514#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
515static int gm20b_tegra_reset_assert(struct device *dev)
516{
517 struct gk20a_platform *platform = gk20a_get_platform(dev);
518
519 if (!platform->reset_control) {
520 WARN(1, "Reset control not initialized\n");
521 return -ENOSYS;
522 }
523
524 return reset_control_assert(platform->reset_control);
525}
526
527static int gm20b_tegra_reset_deassert(struct device *dev)
528{
529 struct gk20a_platform *platform = gk20a_get_platform(dev);
530
531 if (!platform->reset_control) {
532 WARN(1, "Reset control not initialized\n");
533 return -ENOSYS;
534 }
535
536 return reset_control_deassert(platform->reset_control);
537}
538#endif
539
540static void gk20a_tegra_scale_init(struct device *dev)
541{
542 struct gk20a_platform *platform = gk20a_get_platform(dev);
543 struct gk20a_scale_profile *profile = platform->g->scale_profile;
544 struct gk20a_emc_params *emc_params;
545 struct gk20a *g = platform->g;
546
547 if (!profile)
548 return;
549
550 if (profile->private_data)
551 return;
552
553 emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params));
554 if (!emc_params)
555 return;
556
557 emc_params->freq_last_set = -1;
558 gk20a_tegra_calibrate_emc(dev, emc_params);
559
560#ifdef CONFIG_TEGRA_BWMGR
561 emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
562 if (!emc_params->bwmgr_cl) {
563 nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__);
564 return;
565 }
566#endif
567
568 profile->private_data = emc_params;
569}
570
571static void gk20a_tegra_scale_exit(struct device *dev)
572{
573 struct gk20a_platform *platform = dev_get_drvdata(dev);
574 struct gk20a_scale_profile *profile = platform->g->scale_profile;
575 struct gk20a_emc_params *emc_params;
576
577 if (!profile)
578 return;
579
580 emc_params = profile->private_data;
581#ifdef CONFIG_TEGRA_BWMGR
582 tegra_bwmgr_unregister(emc_params->bwmgr_cl);
583#endif
584
585 nvgpu_kfree(platform->g, profile->private_data);
586}
587
588void gk20a_tegra_debug_dump(struct device *dev)
589{
590#ifdef CONFIG_TEGRA_GK20A_NVHOST
591 struct gk20a_platform *platform = gk20a_get_platform(dev);
592 struct gk20a *g = platform->g;
593
594 if (g->nvhost_dev)
595 nvgpu_nvhost_debug_dump_device(g->nvhost_dev);
596#endif
597}
598
599int gk20a_tegra_busy(struct device *dev)
600{
601#ifdef CONFIG_TEGRA_GK20A_NVHOST
602 struct gk20a_platform *platform = gk20a_get_platform(dev);
603 struct gk20a *g = platform->g;
604
605 if (g->nvhost_dev)
606 return nvgpu_nvhost_module_busy_ext(g->nvhost_dev);
607#endif
608 return 0;
609}
610
611void gk20a_tegra_idle(struct device *dev)
612{
613#ifdef CONFIG_TEGRA_GK20A_NVHOST
614 struct gk20a_platform *platform = gk20a_get_platform(dev);
615 struct gk20a *g = platform->g;
616
617 if (g->nvhost_dev)
618 nvgpu_nvhost_module_idle_ext(g->nvhost_dev);
619#endif
620}
621
622int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
623{
624 struct gk20a *g = platform->g;
625 struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
626 DEFINE_DMA_ATTRS(attrs);
627 dma_addr_t iova;
628
629 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
630 return 0;
631
632 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
633 (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
634 GFP_KERNEL, __DMA_ATTR(attrs));
635 /* Some platforms disable VPR. In that case VPR allocations always
636 * fail. Just disable VPR usage in nvgpu in that case. */
637 if (dma_mapping_error(&tegra_vpr_dev, iova))
638 return 0;
639
640 secure_buffer->size = platform->secure_buffer_size;
641 secure_buffer->phys = iova;
642 secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
643
644 g->ops.secure_alloc = gk20a_tegra_secure_alloc;
645 __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
646
647 return 0;
648}
649
650#ifdef CONFIG_COMMON_CLK
651static struct clk *gk20a_clk_get(struct gk20a *g)
652{
653 if (!g->clk.tegra_clk) {
654 struct clk *clk;
655 char clk_dev_id[32];
656 struct device *dev = dev_from_gk20a(g);
657
658 snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));
659
660 clk = clk_get_sys(clk_dev_id, "gpu");
661 if (IS_ERR(clk)) {
662 nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n",
663 clk_dev_id);
664 return NULL;
665 }
666 g->clk.tegra_clk = clk;
667 }
668
669 return g->clk.tegra_clk;
670}
671
672static int gm20b_clk_prepare_ops(struct clk_hw *hw)
673{
674 struct clk_gk20a *clk = to_clk_gk20a(hw);
675 return gm20b_clk_prepare(clk);
676}
677
678static void gm20b_clk_unprepare_ops(struct clk_hw *hw)
679{
680 struct clk_gk20a *clk = to_clk_gk20a(hw);
681 gm20b_clk_unprepare(clk);
682}
683
684static int gm20b_clk_is_prepared_ops(struct clk_hw *hw)
685{
686 struct clk_gk20a *clk = to_clk_gk20a(hw);
687 return gm20b_clk_is_prepared(clk);
688}
689
690static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate)
691{
692 struct clk_gk20a *clk = to_clk_gk20a(hw);
693 return gm20b_recalc_rate(clk, parent_rate);
694}
695
696static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate,
697 unsigned long parent_rate)
698{
699 struct clk_gk20a *clk = to_clk_gk20a(hw);
700 return gm20b_gpcclk_set_rate(clk, rate, parent_rate);
701}
702
703static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate,
704 unsigned long *parent_rate)
705{
706 struct clk_gk20a *clk = to_clk_gk20a(hw);
707 return gm20b_round_rate(clk, rate, parent_rate);
708}
709
710static const struct clk_ops gm20b_clk_ops = {
711 .prepare = gm20b_clk_prepare_ops,
712 .unprepare = gm20b_clk_unprepare_ops,
713 .is_prepared = gm20b_clk_is_prepared_ops,
714 .recalc_rate = gm20b_recalc_rate_ops,
715 .set_rate = gm20b_gpcclk_set_rate_ops,
716 .round_rate = gm20b_round_rate_ops,
717};
718
719static int gm20b_register_gpcclk(struct gk20a *g)
720{
721 const char *parent_name = "pllg_ref";
722 struct clk_gk20a *clk = &g->clk;
723 struct clk_init_data init;
724 struct clk *c;
725 int err = 0;
726
727 /* make sure the clock is available */
728 if (!gk20a_clk_get(g))
729 return -ENOSYS;
730
731 err = gm20b_init_clk_setup_sw(g);
732 if (err)
733 return err;
734
735 init.name = "gpcclk";
736 init.ops = &gm20b_clk_ops;
737 init.parent_names = &parent_name;
738 init.num_parents = 1;
739 init.flags = 0;
740
741 /* Data in .init is copied by clk_register(), so stack variable OK */
742 clk->hw.init = &init;
743 c = clk_register(dev_from_gk20a(g), &clk->hw);
744 if (IS_ERR(c)) {
745 nvgpu_err(g, "Failed to register GPCPLL clock");
746 return -EINVAL;
747 }
748
749 clk->g = g;
750 clk_register_clkdev(c, "gpcclk", "gpcclk");
751
752 return err;
753}
754#endif /* CONFIG_COMMON_CLK */
755
756static int gk20a_tegra_probe(struct device *dev)
757{
758 struct gk20a_platform *platform = dev_get_drvdata(dev);
759 struct device_node *np = dev->of_node;
760 bool joint_xpu_rail = false;
761 int ret;
762 struct gk20a *g = platform->g;
763
764#ifdef CONFIG_COMMON_CLK
765 /* DVFS is not guaranteed to be initialized at the time of probe on
766 * kernels with Common Clock Framework enabled.
767 */
768 if (!platform->gpu_rail) {
769 platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME);
770 if (!platform->gpu_rail) {
771 nvgpu_log_info(g, "deferring probe no gpu_rail");
772 return -EPROBE_DEFER;
773 }
774 }
775
776 if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) {
777 nvgpu_log_info(g, "deferring probe gpu_rail not ready");
778 return -EPROBE_DEFER;
779 }
780#endif
781
782#ifdef CONFIG_TEGRA_GK20A_NVHOST
783 ret = nvgpu_get_nvhost_dev(platform->g);
784 if (ret)
785 return ret;
786#endif
787
788#ifdef CONFIG_OF
789 joint_xpu_rail = of_property_read_bool(of_chosen,
790 "nvidia,tegra-joint_xpu_rail");
791#endif
792
793 if (joint_xpu_rail) {
794 nvgpu_log_info(g, "XPU rails are joint\n");
795 platform->g->can_railgate = false;
796 }
797
798 platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
799 if (tegra_get_chip_id() == TEGRA210) {
800 /* WAR for bug 1547668: Disable railgating and scaling
801 irrespective of platform data if the rework was not made. */
802 np = of_find_node_by_path("/gpu-dvfs-rework");
803 if (!(np && of_device_is_available(np))) {
804 platform->devfreq_governor = "";
805 dev_warn(dev, "board does not support scaling");
806 }
807 platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1;
808 if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p)
809 platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1;
810 }
811
812 if (tegra_get_chip_id() == TEGRA132)
813 platform->soc_name = "tegra13x";
814
815 gk20a_tegra_get_clocks(dev);
816 nvgpu_linux_init_clk_support(platform->g);
817 ret = gk20a_tegra_init_secure_alloc(platform);
818 if (ret)
819 return ret;
820
821 if (platform->clk_register) {
822 ret = platform->clk_register(platform->g);
823 if (ret)
824 return ret;
825 }
826
827 return 0;
828}
829
830static int gk20a_tegra_late_probe(struct device *dev)
831{
832 return 0;
833}
834
835static int gk20a_tegra_remove(struct device *dev)
836{
837 /* deinitialise tegra specific scaling quirks */
838 gk20a_tegra_scale_exit(dev);
839
840#ifdef CONFIG_TEGRA_GK20A_NVHOST
841 nvgpu_free_nvhost_dev(get_gk20a(dev));
842#endif
843
844 return 0;
845}
846
847static int gk20a_tegra_suspend(struct device *dev)
848{
849 tegra_edp_notify_gpu_load(0, 0);
850 return 0;
851}
852
853#if defined(CONFIG_COMMON_CLK)
854static long gk20a_round_clk_rate(struct device *dev, unsigned long rate)
855{
856 struct gk20a_platform *platform = gk20a_get_platform(dev);
857 struct gk20a *g = platform->g;
858
859 /* make sure the clock is available */
860 if (!gk20a_clk_get(g))
861 return rate;
862
863 return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
864}
865
866static int gk20a_clk_get_freqs(struct device *dev,
867 unsigned long **freqs, int *num_freqs)
868{
869 struct gk20a_platform *platform = gk20a_get_platform(dev);
870 struct gk20a *g = platform->g;
871
872 /* make sure the clock is available */
873 if (!gk20a_clk_get(g))
874 return -ENOSYS;
875
876 return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
877 freqs, num_freqs);
878}
879#endif
880
881struct gk20a_platform gm20b_tegra_platform = {
882 .has_syncpoints = true,
883 .aggressive_sync_destroy_thresh = 64,
884
885 /* power management configuration */
886 .railgate_delay_init = 500,
887 .can_railgate_init = true,
888 .can_elpg_init = true,
889 .enable_slcg = true,
890 .enable_blcg = true,
891 .enable_elcg = true,
892 .can_slcg = true,
893 .can_blcg = true,
894 .can_elcg = true,
895 .enable_elpg = true,
896 .enable_aelpg = true,
897 .enable_perfmon = true,
898 .ptimer_src_freq = 19200000,
899
900 .force_reset_in_do_idle = false,
901
902 .ch_wdt_timeout_ms = 5000,
903
904 .probe = gk20a_tegra_probe,
905 .late_probe = gk20a_tegra_late_probe,
906 .remove = gk20a_tegra_remove,
907 /* power management callbacks */
908 .suspend = gk20a_tegra_suspend,
909
910#if defined(CONFIG_TEGRA_DVFS)
911 .railgate = gm20b_tegra_railgate,
912 .unrailgate = gm20b_tegra_unrailgate,
913 .is_railgated = gk20a_tegra_is_railgated,
914#endif
915
916 .busy = gk20a_tegra_busy,
917 .idle = gk20a_tegra_idle,
918
919#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
920 .reset_assert = gm20b_tegra_reset_assert,
921 .reset_deassert = gm20b_tegra_reset_deassert,
922#else
923 .reset_assert = gk20a_tegra_reset_assert,
924 .reset_deassert = gk20a_tegra_reset_deassert,
925#endif
926
927#if defined(CONFIG_COMMON_CLK)
928 .clk_round_rate = gk20a_round_clk_rate,
929 .get_clk_freqs = gk20a_clk_get_freqs,
930#endif
931
932#ifdef CONFIG_COMMON_CLK
933 .clk_register = gm20b_register_gpcclk,
934#endif
935
936 /* frequency scaling configuration */
937 .initscale = gk20a_tegra_scale_init,
938 .prescale = gk20a_tegra_prescale,
939#ifdef CONFIG_TEGRA_BWMGR
940 .postscale = gm20b_tegra_postscale,
941#endif
942 .devfreq_governor = "nvhost_podgov",
943 .qos_notify = gk20a_scale_qos_notify,
944
945 .dump_platform_dependencies = gk20a_tegra_debug_dump,
946
947#ifdef CONFIG_NVGPU_SUPPORT_CDE
948 .has_cde = true,
949#endif
950
951 .soc_name = "tegra21x",
952
953 .unified_memory = true,
954 .dma_mask = DMA_BIT_MASK(34),
955
956 .secure_buffer_size = 335872,
957};
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
deleted file mode 100644
index f7d50406..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * GK20A Platform (SoC) Interface
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
17#define _NVGPU_PLATFORM_GK20A_TEGRA_H_
18
19struct gk20a_platform;
20
21int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
22
23#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
deleted file mode 100644
index fce16653..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
+++ /dev/null
@@ -1,607 +0,0 @@
1/*
2 * GP10B Tegra Platform Interface
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/of_platform.h>
17#include <linux/debugfs.h>
18#include <linux/dma-buf.h>
19#include <linux/nvmap.h>
20#include <linux/reset.h>
21#include <linux/platform/tegra/emc_bwmgr.h>
22
23#include <uapi/linux/nvgpu.h>
24
25#include <soc/tegra/tegra_bpmp.h>
26#include <soc/tegra/tegra_powergate.h>
27#include <soc/tegra/tegra-bpmp-dvfs.h>
28
29#include <dt-bindings/memory/tegra-swgroup.h>
30
31#include <nvgpu/kmem.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/enabled.h>
34#include <nvgpu/hashtable.h>
35#include <nvgpu/nvhost.h>
36
37#include "os_linux.h"
38
39#include "clk.h"
40
41#include "gk20a/gk20a.h"
42
43#include "platform_gk20a.h"
44#include "platform_ecc_sysfs.h"
45#include "platform_gk20a_tegra.h"
46#include "gp10b/platform_gp10b.h"
47#include "platform_gp10b_tegra.h"
48#include "scale.h"
49
50/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
51#define GP10B_FREQ_SELECT_STEP 8
52/* Max number of freq supported in h/w */
53#define GP10B_MAX_SUPPORTED_FREQS 120
54static unsigned long
55gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP];
56
57#define TEGRA_GP10B_BW_PER_FREQ 64
58#define TEGRA_DDR4_BW_PER_FREQ 16
59
60#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
61
62#define GPCCLK_INIT_RATE 1000000000
63
64static struct {
65 char *name;
66 unsigned long default_rate;
67} tegra_gp10b_clocks[] = {
68 {"gpu", GPCCLK_INIT_RATE},
69 {"gpu_sys", 204000000} };
70
71/*
72 * gp10b_tegra_get_clocks()
73 *
74 * This function finds clocks in tegra platform and populates
75 * the clock information to gp10b platform data.
76 */
77
78int gp10b_tegra_get_clocks(struct device *dev)
79{
80 struct gk20a_platform *platform = dev_get_drvdata(dev);
81 unsigned int i;
82
83 platform->num_clks = 0;
84 for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
85 long rate = tegra_gp10b_clocks[i].default_rate;
86 struct clk *c;
87
88 c = clk_get(dev, tegra_gp10b_clocks[i].name);
89 if (IS_ERR(c)) {
90 nvgpu_err(platform->g, "cannot get clock %s",
91 tegra_gp10b_clocks[i].name);
92 } else {
93 clk_set_rate(c, rate);
94 platform->clk[i] = c;
95 if (i == 0)
96 platform->cached_rate = rate;
97 }
98 }
99 platform->num_clks = i;
100
101 if (platform->clk[0]) {
102 i = tegra_bpmp_dvfs_get_clk_id(dev->of_node,
103 tegra_gp10b_clocks[0].name);
104 if (i > 0)
105 platform->maxmin_clk_id = i;
106 }
107
108 return 0;
109}
110
111void gp10b_tegra_scale_init(struct device *dev)
112{
113 struct gk20a_platform *platform = gk20a_get_platform(dev);
114 struct gk20a_scale_profile *profile = platform->g->scale_profile;
115 struct tegra_bwmgr_client *bwmgr_handle;
116
117 if (!profile)
118 return;
119
120 if ((struct tegra_bwmgr_client *)profile->private_data)
121 return;
122
123 bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
124 if (!bwmgr_handle)
125 return;
126
127 profile->private_data = (void *)bwmgr_handle;
128}
129
130static void gp10b_tegra_scale_exit(struct device *dev)
131{
132 struct gk20a_platform *platform = gk20a_get_platform(dev);
133 struct gk20a_scale_profile *profile = platform->g->scale_profile;
134
135 if (profile)
136 tegra_bwmgr_unregister(
137 (struct tegra_bwmgr_client *)profile->private_data);
138}
139
140static int gp10b_tegra_probe(struct device *dev)
141{
142 struct gk20a_platform *platform = dev_get_drvdata(dev);
143#ifdef CONFIG_TEGRA_GK20A_NVHOST
144 int ret;
145
146 ret = nvgpu_get_nvhost_dev(platform->g);
147 if (ret)
148 return ret;
149#endif
150
151 ret = gk20a_tegra_init_secure_alloc(platform);
152 if (ret)
153 return ret;
154
155 platform->disable_bigpage = !device_is_iommuable(dev);
156
157 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
158 = false;
159 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
160 = false;
161
162 platform->g->gr.ctx_vars.force_preemption_gfxp = false;
163 platform->g->gr.ctx_vars.force_preemption_cilp = false;
164
165 gp10b_tegra_get_clocks(dev);
166 nvgpu_linux_init_clk_support(platform->g);
167
168 return 0;
169}
170
171static int gp10b_tegra_late_probe(struct device *dev)
172{
173 return 0;
174}
175
176static int gp10b_tegra_remove(struct device *dev)
177{
178 struct gk20a *g = get_gk20a(dev);
179
180 if (g->ops.gr.remove_gr_sysfs)
181 g->ops.gr.remove_gr_sysfs(g);
182
183 /* deinitialise tegra specific scaling quirks */
184 gp10b_tegra_scale_exit(dev);
185
186#ifdef CONFIG_TEGRA_GK20A_NVHOST
187 nvgpu_free_nvhost_dev(get_gk20a(dev));
188#endif
189
190 return 0;
191}
192
193static bool gp10b_tegra_is_railgated(struct device *dev)
194{
195 bool ret = false;
196
197 if (tegra_bpmp_running())
198 ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU);
199
200 return ret;
201}
202
203static int gp10b_tegra_railgate(struct device *dev)
204{
205 struct gk20a_platform *platform = gk20a_get_platform(dev);
206 struct gk20a_scale_profile *profile = platform->g->scale_profile;
207
208 /* remove emc frequency floor */
209 if (profile)
210 tegra_bwmgr_set_emc(
211 (struct tegra_bwmgr_client *)profile->private_data,
212 0, TEGRA_BWMGR_SET_EMC_FLOOR);
213
214 if (tegra_bpmp_running() &&
215 tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) {
216 int i;
217 for (i = 0; i < platform->num_clks; i++) {
218 if (platform->clk[i])
219 clk_disable_unprepare(platform->clk[i]);
220 }
221 tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU);
222 }
223 return 0;
224}
225
226static int gp10b_tegra_unrailgate(struct device *dev)
227{
228 int ret = 0;
229 struct gk20a_platform *platform = gk20a_get_platform(dev);
230 struct gk20a_scale_profile *profile = platform->g->scale_profile;
231
232 if (tegra_bpmp_running()) {
233 int i;
234 ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU);
235 for (i = 0; i < platform->num_clks; i++) {
236 if (platform->clk[i])
237 clk_prepare_enable(platform->clk[i]);
238 }
239 }
240
241 /* to start with set emc frequency floor to max rate*/
242 if (profile)
243 tegra_bwmgr_set_emc(
244 (struct tegra_bwmgr_client *)profile->private_data,
245 tegra_bwmgr_get_max_emc_rate(),
246 TEGRA_BWMGR_SET_EMC_FLOOR);
247 return ret;
248}
249
250static int gp10b_tegra_suspend(struct device *dev)
251{
252 return 0;
253}
254
255int gp10b_tegra_reset_assert(struct device *dev)
256{
257 struct gk20a_platform *platform = gk20a_get_platform(dev);
258 int ret = 0;
259
260 if (!platform->reset_control)
261 return -EINVAL;
262
263 ret = reset_control_assert(platform->reset_control);
264
265 return ret;
266}
267
268int gp10b_tegra_reset_deassert(struct device *dev)
269{
270 struct gk20a_platform *platform = gk20a_get_platform(dev);
271 int ret = 0;
272
273 if (!platform->reset_control)
274 return -EINVAL;
275
276 ret = reset_control_deassert(platform->reset_control);
277
278 return ret;
279}
280
281void gp10b_tegra_prescale(struct device *dev)
282{
283 struct gk20a *g = get_gk20a(dev);
284 u32 avg = 0;
285
286 nvgpu_log_fn(g, " ");
287
288 nvgpu_pmu_load_norm(g, &avg);
289
290 nvgpu_log_fn(g, "done");
291}
292
293void gp10b_tegra_postscale(struct device *pdev,
294 unsigned long freq)
295{
296 struct gk20a_platform *platform = gk20a_get_platform(pdev);
297 struct gk20a_scale_profile *profile = platform->g->scale_profile;
298 struct gk20a *g = get_gk20a(pdev);
299 unsigned long emc_rate;
300
301 nvgpu_log_fn(g, " ");
302 if (profile && !platform->is_railgated(pdev)) {
303 unsigned long emc_scale;
304
305 if (freq <= gp10b_freq_table[0])
306 emc_scale = 0;
307 else
308 emc_scale = g->emc3d_ratio;
309
310 emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000;
311
312 if (emc_rate > tegra_bwmgr_get_max_emc_rate())
313 emc_rate = tegra_bwmgr_get_max_emc_rate();
314
315 tegra_bwmgr_set_emc(
316 (struct tegra_bwmgr_client *)profile->private_data,
317 emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
318 }
319 nvgpu_log_fn(g, "done");
320}
321
322long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
323{
324 struct gk20a *g = get_gk20a(dev);
325 struct gk20a_scale_profile *profile = g->scale_profile;
326 unsigned long *freq_table = profile->devfreq_profile.freq_table;
327 int max_states = profile->devfreq_profile.max_state;
328 int i;
329
330 for (i = 0; i < max_states; ++i)
331 if (freq_table[i] >= rate)
332 return freq_table[i];
333
334 return freq_table[max_states - 1];
335}
336
337int gp10b_clk_get_freqs(struct device *dev,
338 unsigned long **freqs, int *num_freqs)
339{
340 struct gk20a_platform *platform = gk20a_get_platform(dev);
341 struct gk20a *g = platform->g;
342 unsigned long max_rate;
343 unsigned long new_rate = 0, prev_rate = 0;
344 int i = 0, freq_counter = 0;
345
346 max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
347
348 /*
349 * Walk the h/w frequency table and only select
350 * GP10B_FREQ_SELECT_STEP'th frequencies and
351 * add MAX freq to last
352 */
353 for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
354 prev_rate = new_rate;
355 new_rate = clk_round_rate(platform->clk[0], prev_rate + 1);
356
357 if (i % GP10B_FREQ_SELECT_STEP == 0 ||
358 new_rate == max_rate) {
359 gp10b_freq_table[freq_counter++] = new_rate;
360
361 if (new_rate == max_rate)
362 break;
363 }
364 }
365
366 WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS);
367
368 /* Fill freq table */
369 *freqs = gp10b_freq_table;
370 *num_freqs = freq_counter;
371
372 nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n",
373 gp10b_freq_table[0], max_rate, *num_freqs);
374
375 return 0;
376}
377
378struct gk20a_platform gp10b_tegra_platform = {
379 .has_syncpoints = true,
380
381 /* power management configuration */
382 .railgate_delay_init = 500,
383
384 /* ldiv slowdown factor */
385 .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16,
386
387 /* power management configuration */
388 .can_railgate_init = true,
389 .enable_elpg = true,
390 .can_elpg_init = true,
391 .enable_blcg = true,
392 .enable_slcg = true,
393 .enable_elcg = true,
394 .can_slcg = true,
395 .can_blcg = true,
396 .can_elcg = true,
397 .enable_aelpg = true,
398 .enable_perfmon = true,
399
400 /* ptimer src frequency in hz*/
401 .ptimer_src_freq = 31250000,
402
403 .ch_wdt_timeout_ms = 5000,
404
405 .probe = gp10b_tegra_probe,
406 .late_probe = gp10b_tegra_late_probe,
407 .remove = gp10b_tegra_remove,
408
409 /* power management callbacks */
410 .suspend = gp10b_tegra_suspend,
411 .railgate = gp10b_tegra_railgate,
412 .unrailgate = gp10b_tegra_unrailgate,
413 .is_railgated = gp10b_tegra_is_railgated,
414
415 .busy = gk20a_tegra_busy,
416 .idle = gk20a_tegra_idle,
417
418 .dump_platform_dependencies = gk20a_tegra_debug_dump,
419
420#ifdef CONFIG_NVGPU_SUPPORT_CDE
421 .has_cde = true,
422#endif
423
424 .clk_round_rate = gp10b_round_clk_rate,
425 .get_clk_freqs = gp10b_clk_get_freqs,
426
427 /* frequency scaling configuration */
428 .initscale = gp10b_tegra_scale_init,
429 .prescale = gp10b_tegra_prescale,
430 .postscale = gp10b_tegra_postscale,
431 .devfreq_governor = "nvhost_podgov",
432
433 .qos_notify = gk20a_scale_qos_notify,
434
435 .reset_assert = gp10b_tegra_reset_assert,
436 .reset_deassert = gp10b_tegra_reset_deassert,
437
438 .force_reset_in_do_idle = false,
439
440 .soc_name = "tegra18x",
441
442 .unified_memory = true,
443 .dma_mask = DMA_BIT_MASK(36),
444
445 .ltc_streamid = TEGRA_SID_GPUB,
446
447 .secure_buffer_size = 401408,
448};
449
450void gr_gp10b_create_sysfs(struct gk20a *g)
451{
452 int error = 0;
453 struct device *dev = dev_from_gk20a(g);
454
455 /* This stat creation function is called on GR init. GR can get
456 initialized multiple times but we only need to create the ECC
457 stats once. Therefore, add the following check to avoid
458 creating duplicate stat sysfs nodes. */
459 if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
460 return;
461
462 error |= nvgpu_gr_ecc_stat_create(dev,
463 0,
464 "sm_lrf_ecc_single_err_count",
465 &g->ecc.gr.sm_lrf_single_err_count);
466
467 error |= nvgpu_gr_ecc_stat_create(dev,
468 0,
469 "sm_lrf_ecc_double_err_count",
470 &g->ecc.gr.sm_lrf_double_err_count);
471
472 error |= nvgpu_gr_ecc_stat_create(dev,
473 0,
474 "sm_shm_ecc_sec_count",
475 &g->ecc.gr.sm_shm_sec_count);
476
477 error |= nvgpu_gr_ecc_stat_create(dev,
478 0,
479 "sm_shm_ecc_sed_count",
480 &g->ecc.gr.sm_shm_sed_count);
481
482 error |= nvgpu_gr_ecc_stat_create(dev,
483 0,
484 "sm_shm_ecc_ded_count",
485 &g->ecc.gr.sm_shm_ded_count);
486
487 error |= nvgpu_gr_ecc_stat_create(dev,
488 0,
489 "tex_ecc_total_sec_pipe0_count",
490 &g->ecc.gr.tex_total_sec_pipe0_count);
491
492 error |= nvgpu_gr_ecc_stat_create(dev,
493 0,
494 "tex_ecc_total_ded_pipe0_count",
495 &g->ecc.gr.tex_total_ded_pipe0_count);
496
497 error |= nvgpu_gr_ecc_stat_create(dev,
498 0,
499 "tex_ecc_unique_sec_pipe0_count",
500 &g->ecc.gr.tex_unique_sec_pipe0_count);
501
502 error |= nvgpu_gr_ecc_stat_create(dev,
503 0,
504 "tex_ecc_unique_ded_pipe0_count",
505 &g->ecc.gr.tex_unique_ded_pipe0_count);
506
507 error |= nvgpu_gr_ecc_stat_create(dev,
508 0,
509 "tex_ecc_total_sec_pipe1_count",
510 &g->ecc.gr.tex_total_sec_pipe1_count);
511
512 error |= nvgpu_gr_ecc_stat_create(dev,
513 0,
514 "tex_ecc_total_ded_pipe1_count",
515 &g->ecc.gr.tex_total_ded_pipe1_count);
516
517 error |= nvgpu_gr_ecc_stat_create(dev,
518 0,
519 "tex_ecc_unique_sec_pipe1_count",
520 &g->ecc.gr.tex_unique_sec_pipe1_count);
521
522 error |= nvgpu_gr_ecc_stat_create(dev,
523 0,
524 "tex_ecc_unique_ded_pipe1_count",
525 &g->ecc.gr.tex_unique_ded_pipe1_count);
526
527 error |= nvgpu_gr_ecc_stat_create(dev,
528 2,
529 "ecc_sec_count",
530 &g->ecc.ltc.l2_sec_count);
531
532 error |= nvgpu_gr_ecc_stat_create(dev,
533 2,
534 "ecc_ded_count",
535 &g->ecc.ltc.l2_ded_count);
536
537 if (error)
538 dev_err(dev, "Failed to create sysfs attributes!\n");
539}
540
541void gr_gp10b_remove_sysfs(struct gk20a *g)
542{
543 struct device *dev = dev_from_gk20a(g);
544
545 if (!g->ecc.gr.sm_lrf_single_err_count.counters)
546 return;
547
548 nvgpu_gr_ecc_stat_remove(dev,
549 0,
550 &g->ecc.gr.sm_lrf_single_err_count);
551
552 nvgpu_gr_ecc_stat_remove(dev,
553 0,
554 &g->ecc.gr.sm_lrf_double_err_count);
555
556 nvgpu_gr_ecc_stat_remove(dev,
557 0,
558 &g->ecc.gr.sm_shm_sec_count);
559
560 nvgpu_gr_ecc_stat_remove(dev,
561 0,
562 &g->ecc.gr.sm_shm_sed_count);
563
564 nvgpu_gr_ecc_stat_remove(dev,
565 0,
566 &g->ecc.gr.sm_shm_ded_count);
567
568 nvgpu_gr_ecc_stat_remove(dev,
569 0,
570 &g->ecc.gr.tex_total_sec_pipe0_count);
571
572 nvgpu_gr_ecc_stat_remove(dev,
573 0,
574 &g->ecc.gr.tex_total_ded_pipe0_count);
575
576 nvgpu_gr_ecc_stat_remove(dev,
577 0,
578 &g->ecc.gr.tex_unique_sec_pipe0_count);
579
580 nvgpu_gr_ecc_stat_remove(dev,
581 0,
582 &g->ecc.gr.tex_unique_ded_pipe0_count);
583
584 nvgpu_gr_ecc_stat_remove(dev,
585 0,
586 &g->ecc.gr.tex_total_sec_pipe1_count);
587
588 nvgpu_gr_ecc_stat_remove(dev,
589 0,
590 &g->ecc.gr.tex_total_ded_pipe1_count);
591
592 nvgpu_gr_ecc_stat_remove(dev,
593 0,
594 &g->ecc.gr.tex_unique_sec_pipe1_count);
595
596 nvgpu_gr_ecc_stat_remove(dev,
597 0,
598 &g->ecc.gr.tex_unique_ded_pipe1_count);
599
600 nvgpu_gr_ecc_stat_remove(dev,
601 2,
602 &g->ecc.ltc.l2_sec_count);
603
604 nvgpu_gr_ecc_stat_remove(dev,
605 2,
606 &g->ecc.ltc.l2_ded_count);
607}
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h
deleted file mode 100644
index 6de90275..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef _PLATFORM_GP10B_TEGRA_H_
18#define _PLATFORM_GP10B_TEGRA_H_
19
20#include "gp10b/gr_gp10b.h"
21#include "platform_ecc_sysfs.h"
22
23#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
deleted file mode 100644
index bf66762b..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ /dev/null
@@ -1,588 +0,0 @@
1/*
2 * GV11B Tegra Platform Interface
3 *
4 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/of_platform.h>
20#include <linux/debugfs.h>
21#include <linux/dma-buf.h>
22#include <linux/nvmap.h>
23#include <linux/reset.h>
24#include <linux/hashtable.h>
25#include <linux/clk.h>
26#include <linux/platform/tegra/emc_bwmgr.h>
27
28#include <nvgpu/nvhost.h>
29
30#include <uapi/linux/nvgpu.h>
31
32#include <soc/tegra/tegra_bpmp.h>
33#include <soc/tegra/tegra_powergate.h>
34
35#include "gk20a/gk20a.h"
36#include "platform_gk20a.h"
37#include "clk.h"
38#include "scale.h"
39
40#include "gp10b/platform_gp10b.h"
41#include "platform_gp10b_tegra.h"
42#include "platform_ecc_sysfs.h"
43
44#include "os_linux.h"
45#include "platform_gk20a_tegra.h"
46#include "gv11b/gr_gv11b.h"
47
48static void gv11b_tegra_scale_exit(struct device *dev)
49{
50 struct gk20a_platform *platform = gk20a_get_platform(dev);
51 struct gk20a_scale_profile *profile = platform->g->scale_profile;
52
53 if (profile)
54 tegra_bwmgr_unregister(
55 (struct tegra_bwmgr_client *)profile->private_data);
56}
57
58static int gv11b_tegra_probe(struct device *dev)
59{
60 struct gk20a_platform *platform = dev_get_drvdata(dev);
61 int err;
62
63 err = nvgpu_nvhost_syncpt_init(platform->g);
64 if (err) {
65 if (err != -ENOSYS)
66 return err;
67 }
68
69 err = gk20a_tegra_init_secure_alloc(platform);
70 if (err)
71 return err;
72
73 platform->disable_bigpage = !device_is_iommuable(dev);
74
75 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
76 = false;
77 platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
78 = false;
79
80 platform->g->gr.ctx_vars.force_preemption_gfxp = false;
81 platform->g->gr.ctx_vars.force_preemption_cilp = false;
82
83 gp10b_tegra_get_clocks(dev);
84 nvgpu_linux_init_clk_support(platform->g);
85
86 return 0;
87}
88
89static int gv11b_tegra_late_probe(struct device *dev)
90{
91 return 0;
92}
93
94
95static int gv11b_tegra_remove(struct device *dev)
96{
97 struct gk20a *g = get_gk20a(dev);
98
99 if (g->ops.gr.remove_gr_sysfs)
100 g->ops.gr.remove_gr_sysfs(g);
101
102 gv11b_tegra_scale_exit(dev);
103
104#ifdef CONFIG_TEGRA_GK20A_NVHOST
105 nvgpu_free_nvhost_dev(get_gk20a(dev));
106#endif
107
108 return 0;
109}
110
111static bool gv11b_tegra_is_railgated(struct device *dev)
112{
113 bool ret = false;
114#ifdef TEGRA194_POWER_DOMAIN_GPU
115 struct gk20a *g = get_gk20a(dev);
116
117 if (tegra_bpmp_running()) {
118 nvgpu_log(g, gpu_dbg_info, "bpmp running");
119 ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU);
120
121 nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no");
122 } else {
123 nvgpu_log(g, gpu_dbg_info, "bpmp not running");
124 }
125#endif
126 return ret;
127}
128
129static int gv11b_tegra_railgate(struct device *dev)
130{
131#ifdef TEGRA194_POWER_DOMAIN_GPU
132 struct gk20a_platform *platform = gk20a_get_platform(dev);
133 struct gk20a_scale_profile *profile = platform->g->scale_profile;
134 struct gk20a *g = get_gk20a(dev);
135 int i;
136
137 /* remove emc frequency floor */
138 if (profile)
139 tegra_bwmgr_set_emc(
140 (struct tegra_bwmgr_client *)profile->private_data,
141 0, TEGRA_BWMGR_SET_EMC_FLOOR);
142
143 if (tegra_bpmp_running()) {
144 nvgpu_log(g, gpu_dbg_info, "bpmp running");
145 if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) {
146 nvgpu_log(g, gpu_dbg_info, "powergate is not powered");
147 return 0;
148 }
149 nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare");
150 for (i = 0; i < platform->num_clks; i++) {
151 if (platform->clk[i])
152 clk_disable_unprepare(platform->clk[i]);
153 }
154 nvgpu_log(g, gpu_dbg_info, "powergate_partition");
155 tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU);
156 } else {
157 nvgpu_log(g, gpu_dbg_info, "bpmp not running");
158 }
159#endif
160 return 0;
161}
162
163static int gv11b_tegra_unrailgate(struct device *dev)
164{
165 int ret = 0;
166#ifdef TEGRA194_POWER_DOMAIN_GPU
167 struct gk20a_platform *platform = gk20a_get_platform(dev);
168 struct gk20a *g = get_gk20a(dev);
169 struct gk20a_scale_profile *profile = platform->g->scale_profile;
170 int i;
171
172 if (tegra_bpmp_running()) {
173 nvgpu_log(g, gpu_dbg_info, "bpmp running");
174 ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU);
175 if (ret) {
176 nvgpu_log(g, gpu_dbg_info,
177 "unpowergate partition failed");
178 return ret;
179 }
180 nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable");
181 for (i = 0; i < platform->num_clks; i++) {
182 if (platform->clk[i])
183 clk_prepare_enable(platform->clk[i]);
184 }
185 } else {
186 nvgpu_log(g, gpu_dbg_info, "bpmp not running");
187 }
188
189 /* to start with set emc frequency floor to max rate*/
190 if (profile)
191 tegra_bwmgr_set_emc(
192 (struct tegra_bwmgr_client *)profile->private_data,
193 tegra_bwmgr_get_max_emc_rate(),
194 TEGRA_BWMGR_SET_EMC_FLOOR);
195#endif
196 return ret;
197}
198
199static int gv11b_tegra_suspend(struct device *dev)
200{
201 return 0;
202}
203
204struct gk20a_platform gv11b_tegra_platform = {
205 .has_syncpoints = true,
206
207 /* ptimer src frequency in hz*/
208 .ptimer_src_freq = 31250000,
209
210 .ch_wdt_timeout_ms = 5000,
211
212 .probe = gv11b_tegra_probe,
213 .late_probe = gv11b_tegra_late_probe,
214 .remove = gv11b_tegra_remove,
215 .railgate_delay_init = 500,
216 .can_railgate_init = true,
217
218 .can_slcg = true,
219 .can_blcg = true,
220 .can_elcg = true,
221 .enable_slcg = true,
222 .enable_blcg = true,
223 .enable_elcg = true,
224 .enable_perfmon = true,
225
226 /* power management configuration */
227 .enable_elpg = true,
228 .can_elpg_init = true,
229 .enable_aelpg = true,
230
231 /* power management callbacks */
232 .suspend = gv11b_tegra_suspend,
233 .railgate = gv11b_tegra_railgate,
234 .unrailgate = gv11b_tegra_unrailgate,
235 .is_railgated = gv11b_tegra_is_railgated,
236
237 .busy = gk20a_tegra_busy,
238 .idle = gk20a_tegra_idle,
239
240 .clk_round_rate = gp10b_round_clk_rate,
241 .get_clk_freqs = gp10b_clk_get_freqs,
242
243 /* frequency scaling configuration */
244 .initscale = gp10b_tegra_scale_init,
245 .prescale = gp10b_tegra_prescale,
246 .postscale = gp10b_tegra_postscale,
247 .devfreq_governor = "nvhost_podgov",
248
249 .qos_notify = gk20a_scale_qos_notify,
250
251 .dump_platform_dependencies = gk20a_tegra_debug_dump,
252
253 .soc_name = "tegra19x",
254
255 .honors_aperture = true,
256 .unified_memory = true,
257 .dma_mask = DMA_BIT_MASK(36),
258
259 .reset_assert = gp10b_tegra_reset_assert,
260 .reset_deassert = gp10b_tegra_reset_deassert,
261
262 .secure_buffer_size = 667648,
263};
264
265void gr_gv11b_create_sysfs(struct gk20a *g)
266{
267 struct device *dev = dev_from_gk20a(g);
268 int error = 0;
269
270 /* This stat creation function is called on GR init. GR can get
271 initialized multiple times but we only need to create the ECC
272 stats once. Therefore, add the following check to avoid
273 creating duplicate stat sysfs nodes. */
274 if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
275 return;
276
277 gr_gp10b_create_sysfs(g);
278
279 error |= nvgpu_gr_ecc_stat_create(dev,
280 0,
281 "sm_l1_tag_ecc_corrected_err_count",
282 &g->ecc.gr.sm_l1_tag_corrected_err_count);
283
284 error |= nvgpu_gr_ecc_stat_create(dev,
285 0,
286 "sm_l1_tag_ecc_uncorrected_err_count",
287 &g->ecc.gr.sm_l1_tag_uncorrected_err_count);
288
289 error |= nvgpu_gr_ecc_stat_create(dev,
290 0,
291 "sm_cbu_ecc_corrected_err_count",
292 &g->ecc.gr.sm_cbu_corrected_err_count);
293
294 error |= nvgpu_gr_ecc_stat_create(dev,
295 0,
296 "sm_cbu_ecc_uncorrected_err_count",
297 &g->ecc.gr.sm_cbu_uncorrected_err_count);
298
299 error |= nvgpu_gr_ecc_stat_create(dev,
300 0,
301 "sm_l1_data_ecc_corrected_err_count",
302 &g->ecc.gr.sm_l1_data_corrected_err_count);
303
304 error |= nvgpu_gr_ecc_stat_create(dev,
305 0,
306 "sm_l1_data_ecc_uncorrected_err_count",
307 &g->ecc.gr.sm_l1_data_uncorrected_err_count);
308
309 error |= nvgpu_gr_ecc_stat_create(dev,
310 0,
311 "sm_icache_ecc_corrected_err_count",
312 &g->ecc.gr.sm_icache_corrected_err_count);
313
314 error |= nvgpu_gr_ecc_stat_create(dev,
315 0,
316 "sm_icache_ecc_uncorrected_err_count",
317 &g->ecc.gr.sm_icache_uncorrected_err_count);
318
319 error |= nvgpu_gr_ecc_stat_create(dev,
320 0,
321 "gcc_l15_ecc_corrected_err_count",
322 &g->ecc.gr.gcc_l15_corrected_err_count);
323
324 error |= nvgpu_gr_ecc_stat_create(dev,
325 0,
326 "gcc_l15_ecc_uncorrected_err_count",
327 &g->ecc.gr.gcc_l15_uncorrected_err_count);
328
329 error |= nvgpu_ecc_stat_create(dev,
330 g->ltc_count,
331 0,
332 "ltc",
333 NULL,
334 "l2_cache_uncorrected_err_count",
335 &g->ecc.ltc.l2_cache_uncorrected_err_count);
336
337 error |= nvgpu_ecc_stat_create(dev,
338 g->ltc_count,
339 0,
340 "ltc",
341 NULL,
342 "l2_cache_corrected_err_count",
343 &g->ecc.ltc.l2_cache_corrected_err_count);
344
345 error |= nvgpu_ecc_stat_create(dev,
346 1,
347 0,
348 "gpc",
349 NULL,
350 "fecs_ecc_uncorrected_err_count",
351 &g->ecc.gr.fecs_uncorrected_err_count);
352
353 error |= nvgpu_ecc_stat_create(dev,
354 1,
355 0,
356 "gpc",
357 NULL,
358 "fecs_ecc_corrected_err_count",
359 &g->ecc.gr.fecs_corrected_err_count);
360
361 error |= nvgpu_ecc_stat_create(dev,
362 g->gr.gpc_count,
363 0,
364 "gpc",
365 NULL,
366 "gpccs_ecc_uncorrected_err_count",
367 &g->ecc.gr.gpccs_uncorrected_err_count);
368
369 error |= nvgpu_ecc_stat_create(dev,
370 g->gr.gpc_count,
371 0,
372 "gpc",
373 NULL,
374 "gpccs_ecc_corrected_err_count",
375 &g->ecc.gr.gpccs_corrected_err_count);
376
377 error |= nvgpu_ecc_stat_create(dev,
378 g->gr.gpc_count,
379 0,
380 "gpc",
381 NULL,
382 "mmu_l1tlb_ecc_uncorrected_err_count",
383 &g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
384
385 error |= nvgpu_ecc_stat_create(dev,
386 g->gr.gpc_count,
387 0,
388 "gpc",
389 NULL,
390 "mmu_l1tlb_ecc_corrected_err_count",
391 &g->ecc.gr.mmu_l1tlb_corrected_err_count);
392
393 error |= nvgpu_ecc_stat_create(dev,
394 1,
395 0,
396 "eng",
397 NULL,
398 "mmu_l2tlb_ecc_uncorrected_err_count",
399 &g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
400
401 error |= nvgpu_ecc_stat_create(dev,
402 1,
403 0,
404 "eng",
405 NULL,
406 "mmu_l2tlb_ecc_corrected_err_count",
407 &g->ecc.fb.mmu_l2tlb_corrected_err_count);
408
409 error |= nvgpu_ecc_stat_create(dev,
410 1,
411 0,
412 "eng",
413 NULL,
414 "mmu_hubtlb_ecc_uncorrected_err_count",
415 &g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
416
417 error |= nvgpu_ecc_stat_create(dev,
418 1,
419 0,
420 "eng",
421 NULL,
422 "mmu_hubtlb_ecc_corrected_err_count",
423 &g->ecc.fb.mmu_hubtlb_corrected_err_count);
424
425 error |= nvgpu_ecc_stat_create(dev,
426 1,
427 0,
428 "eng",
429 NULL,
430 "mmu_fillunit_ecc_uncorrected_err_count",
431 &g->ecc.fb.mmu_fillunit_uncorrected_err_count);
432
433 error |= nvgpu_ecc_stat_create(dev,
434 1,
435 0,
436 "eng",
437 NULL,
438 "mmu_fillunit_ecc_corrected_err_count",
439 &g->ecc.fb.mmu_fillunit_corrected_err_count);
440
441 error |= nvgpu_ecc_stat_create(dev,
442 1,
443 0,
444 "eng",
445 NULL,
446 "pmu_ecc_uncorrected_err_count",
447 &g->ecc.pmu.pmu_uncorrected_err_count);
448
449 error |= nvgpu_ecc_stat_create(dev,
450 1,
451 0,
452 "eng",
453 NULL,
454 "pmu_ecc_corrected_err_count",
455 &g->ecc.pmu.pmu_corrected_err_count);
456
457 if (error)
458 dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
459}
460
461void gr_gv11b_remove_sysfs(struct gk20a *g)
462{
463 struct device *dev = dev_from_gk20a(g);
464
465 if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
466 return;
467 gr_gp10b_remove_sysfs(g);
468
469 nvgpu_gr_ecc_stat_remove(dev,
470 0,
471 &g->ecc.gr.sm_l1_tag_corrected_err_count);
472
473 nvgpu_gr_ecc_stat_remove(dev,
474 0,
475 &g->ecc.gr.sm_l1_tag_uncorrected_err_count);
476
477 nvgpu_gr_ecc_stat_remove(dev,
478 0,
479 &g->ecc.gr.sm_cbu_corrected_err_count);
480
481 nvgpu_gr_ecc_stat_remove(dev,
482 0,
483 &g->ecc.gr.sm_cbu_uncorrected_err_count);
484
485 nvgpu_gr_ecc_stat_remove(dev,
486 0,
487 &g->ecc.gr.sm_l1_data_corrected_err_count);
488
489 nvgpu_gr_ecc_stat_remove(dev,
490 0,
491 &g->ecc.gr.sm_l1_data_uncorrected_err_count);
492
493 nvgpu_gr_ecc_stat_remove(dev,
494 0,
495 &g->ecc.gr.sm_icache_corrected_err_count);
496
497 nvgpu_gr_ecc_stat_remove(dev,
498 0,
499 &g->ecc.gr.sm_icache_uncorrected_err_count);
500
501 nvgpu_gr_ecc_stat_remove(dev,
502 0,
503 &g->ecc.gr.gcc_l15_corrected_err_count);
504
505 nvgpu_gr_ecc_stat_remove(dev,
506 0,
507 &g->ecc.gr.gcc_l15_uncorrected_err_count);
508
509 nvgpu_ecc_stat_remove(dev,
510 g->ltc_count,
511 0,
512 &g->ecc.ltc.l2_cache_uncorrected_err_count);
513
514 nvgpu_ecc_stat_remove(dev,
515 g->ltc_count,
516 0,
517 &g->ecc.ltc.l2_cache_corrected_err_count);
518
519 nvgpu_ecc_stat_remove(dev,
520 1,
521 0,
522 &g->ecc.gr.fecs_uncorrected_err_count);
523
524 nvgpu_ecc_stat_remove(dev,
525 1,
526 0,
527 &g->ecc.gr.fecs_corrected_err_count);
528
529 nvgpu_ecc_stat_remove(dev,
530 g->gr.gpc_count,
531 0,
532 &g->ecc.gr.gpccs_uncorrected_err_count);
533
534 nvgpu_ecc_stat_remove(dev,
535 g->gr.gpc_count,
536 0,
537 &g->ecc.gr.gpccs_corrected_err_count);
538
539 nvgpu_ecc_stat_remove(dev,
540 g->gr.gpc_count,
541 0,
542 &g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
543
544 nvgpu_ecc_stat_remove(dev,
545 g->gr.gpc_count,
546 0,
547 &g->ecc.gr.mmu_l1tlb_corrected_err_count);
548
549 nvgpu_ecc_stat_remove(dev,
550 1,
551 0,
552 &g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
553
554 nvgpu_ecc_stat_remove(dev,
555 1,
556 0,
557 &g->ecc.fb.mmu_l2tlb_corrected_err_count);
558
559 nvgpu_ecc_stat_remove(dev,
560 1,
561 0,
562 &g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
563
564 nvgpu_ecc_stat_remove(dev,
565 1,
566 0,
567 &g->ecc.fb.mmu_hubtlb_corrected_err_count);
568
569 nvgpu_ecc_stat_remove(dev,
570 1,
571 0,
572 &g->ecc.fb.mmu_fillunit_uncorrected_err_count);
573
574 nvgpu_ecc_stat_remove(dev,
575 1,
576 0,
577 &g->ecc.fb.mmu_fillunit_corrected_err_count);
578
579 nvgpu_ecc_stat_remove(dev,
580 1,
581 0,
582 &g->ecc.pmu.pmu_uncorrected_err_count);
583
584 nvgpu_ecc_stat_remove(dev,
585 1,
586 0,
587 &g->ecc.pmu.pmu_corrected_err_count);
588}
diff --git a/drivers/gpu/nvgpu/common/linux/rwsem.c b/drivers/gpu/nvgpu/common/linux/rwsem.c
deleted file mode 100644
index 297ddf11..00000000
--- a/drivers/gpu/nvgpu/common/linux/rwsem.c
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <nvgpu/rwsem.h>
15
16void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem)
17{
18 init_rwsem(&rwsem->rwsem);
19}
20
21void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem)
22{
23 up_read(&rwsem->rwsem);
24}
25
26void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem)
27{
28 down_read(&rwsem->rwsem);
29}
30
31void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem)
32{
33 up_write(&rwsem->rwsem);
34}
35
36void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem)
37{
38 down_write(&rwsem->rwsem);
39}
diff --git a/drivers/gpu/nvgpu/common/linux/scale.c b/drivers/gpu/nvgpu/common/linux/scale.c
deleted file mode 100644
index 84ac1cfd..00000000
--- a/drivers/gpu/nvgpu/common/linux/scale.c
+++ /dev/null
@@ -1,428 +0,0 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/devfreq.h>
20#include <linux/export.h>
21#include <soc/tegra/chip-id.h>
22#include <linux/pm_qos.h>
23
24#include <governor.h>
25
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28
29#include "gk20a/gk20a.h"
30#include "platform_gk20a.h"
31#include "scale.h"
32#include "os_linux.h"
33
34/*
35 * gk20a_scale_qos_notify()
36 *
37 * This function is called when the minimum QoS requirement for the device
38 * has changed. The function calls postscaling callback if it is defined.
39 */
40
41#if defined(CONFIG_COMMON_CLK)
42int gk20a_scale_qos_notify(struct notifier_block *nb,
43 unsigned long n, void *p)
44{
45 struct gk20a_scale_profile *profile =
46 container_of(nb, struct gk20a_scale_profile,
47 qos_notify_block);
48 struct gk20a *g = get_gk20a(profile->dev);
49 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
50 struct devfreq *devfreq = l->devfreq;
51
52 if (!devfreq)
53 return NOTIFY_OK;
54
55 mutex_lock(&devfreq->lock);
56 /* check for pm_qos min and max frequency requirement */
57 profile->qos_min_freq =
58 (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
59 profile->qos_max_freq =
60 (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
61
62 if (profile->qos_min_freq > profile->qos_max_freq) {
63 nvgpu_err(g,
64 "QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
65 profile->qos_min_freq, profile->qos_max_freq);
66 profile->qos_min_freq = profile->qos_max_freq;
67 }
68
69 update_devfreq(devfreq);
70 mutex_unlock(&devfreq->lock);
71
72 return NOTIFY_OK;
73}
74#else
75int gk20a_scale_qos_notify(struct notifier_block *nb,
76 unsigned long n, void *p)
77{
78 struct gk20a_scale_profile *profile =
79 container_of(nb, struct gk20a_scale_profile,
80 qos_notify_block);
81 struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
82 struct gk20a *g = get_gk20a(profile->dev);
83 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
84 unsigned long freq;
85
86 if (!platform->postscale)
87 return NOTIFY_OK;
88
89 /* get the frequency requirement. if devfreq is enabled, check if it
90 * has higher demand than qos */
91 freq = platform->clk_round_rate(profile->dev,
92 (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
93 if (l->devfreq)
94 freq = max(l->devfreq->previous_freq, freq);
95
96 /* Update gpu load because we may scale the emc target
97 * if the gpu load changed. */
98 nvgpu_pmu_load_update(g);
99 platform->postscale(profile->dev, freq);
100
101 return NOTIFY_OK;
102}
103#endif
104
105/*
106 * gk20a_scale_make_freq_table(profile)
107 *
108 * This function initialises the frequency table for the given device profile
109 */
110
111static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
112{
113 struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
114 int num_freqs, err;
115 unsigned long *freqs;
116
117 if (platform->get_clk_freqs) {
118 /* get gpu frequency table */
119 err = platform->get_clk_freqs(profile->dev, &freqs,
120 &num_freqs);
121 if (err)
122 return -ENOSYS;
123 } else
124 return -ENOSYS;
125
126 profile->devfreq_profile.freq_table = (unsigned long *)freqs;
127 profile->devfreq_profile.max_state = num_freqs;
128
129 return 0;
130}
131
132/*
133 * gk20a_scale_target(dev, *freq, flags)
134 *
135 * This function scales the clock
136 */
137
138static int gk20a_scale_target(struct device *dev, unsigned long *freq,
139 u32 flags)
140{
141 struct gk20a_platform *platform = dev_get_drvdata(dev);
142 struct gk20a *g = platform->g;
143 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
144 struct gk20a_scale_profile *profile = g->scale_profile;
145 struct devfreq *devfreq = l->devfreq;
146 unsigned long local_freq = *freq;
147 unsigned long rounded_rate;
148 unsigned long min_freq = 0, max_freq = 0;
149
150 /*
151 * Calculate floor and cap frequency values
152 *
153 * Policy :
154 * We have two APIs to clip the frequency
155 * 1. devfreq
156 * 2. pm_qos
157 *
158 * To calculate floor (min) freq, we select MAX of floor frequencies
159 * requested from both APIs
160 * To get cap (max) freq, we select MIN of max frequencies
161 *
162 * In case we have conflict (min_freq > max_freq) after above
163 * steps, we ensure that max_freq wins over min_freq
164 */
165 min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
166 max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
167
168 if (min_freq > max_freq)
169 min_freq = max_freq;
170
171 /* Clip requested frequency */
172 if (local_freq < min_freq)
173 local_freq = min_freq;
174
175 if (local_freq > max_freq)
176 local_freq = max_freq;
177
178 /* set the final frequency */
179 rounded_rate = platform->clk_round_rate(dev, local_freq);
180
181 /* Check for duplicate request */
182 if (rounded_rate == g->last_freq)
183 return 0;
184
185 if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
186 *freq = rounded_rate;
187 else {
188 g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
189 *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
190 }
191
192 g->last_freq = *freq;
193
194 /* postscale will only scale emc (dram clock) if evaluating
195 * gk20a_tegra_get_emc_rate() produces a new or different emc
196 * target because the load or_and gpufreq has changed */
197 if (platform->postscale)
198 platform->postscale(dev, rounded_rate);
199
200 return 0;
201}
202
203/*
204 * update_load_estimate_gpmu(profile)
205 *
206 * Update load estimate using gpmu. The gpmu value is normalised
207 * based on the time it was asked last time.
208 */
209
210static void update_load_estimate_gpmu(struct device *dev)
211{
212 struct gk20a *g = get_gk20a(dev);
213 struct gk20a_scale_profile *profile = g->scale_profile;
214 unsigned long dt;
215 u32 busy_time;
216 ktime_t t;
217
218 t = ktime_get();
219 dt = ktime_us_delta(t, profile->last_event_time);
220
221 profile->dev_stat.total_time = dt;
222 profile->last_event_time = t;
223 nvgpu_pmu_load_norm(g, &busy_time);
224 profile->dev_stat.busy_time = (busy_time * dt) / 1000;
225}
226
227/*
228 * gk20a_scale_suspend(dev)
229 *
230 * This function informs devfreq of suspend
231 */
232
233void gk20a_scale_suspend(struct device *dev)
234{
235 struct gk20a *g = get_gk20a(dev);
236 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
237 struct devfreq *devfreq = l->devfreq;
238
239 if (!devfreq)
240 return;
241
242 devfreq_suspend_device(devfreq);
243}
244
245/*
246 * gk20a_scale_resume(dev)
247 *
248 * This functions informs devfreq of resume
249 */
250
251void gk20a_scale_resume(struct device *dev)
252{
253 struct gk20a *g = get_gk20a(dev);
254 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
255 struct devfreq *devfreq = l->devfreq;
256
257 if (!devfreq)
258 return;
259
260 g->last_freq = 0;
261 devfreq_resume_device(devfreq);
262}
263
264/*
265 * gk20a_scale_get_dev_status(dev, *stat)
266 *
267 * This function queries the current device status.
268 */
269
270static int gk20a_scale_get_dev_status(struct device *dev,
271 struct devfreq_dev_status *stat)
272{
273 struct gk20a *g = get_gk20a(dev);
274 struct gk20a_scale_profile *profile = g->scale_profile;
275 struct gk20a_platform *platform = dev_get_drvdata(dev);
276
277 /* update the software shadow */
278 nvgpu_pmu_load_update(g);
279
280 /* inform edp about new constraint */
281 if (platform->prescale)
282 platform->prescale(dev);
283
284 /* Make sure there are correct values for the current frequency */
285 profile->dev_stat.current_frequency =
286 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
287
288 /* Update load estimate */
289 update_load_estimate_gpmu(dev);
290
291 /* Copy the contents of the current device status */
292 *stat = profile->dev_stat;
293
294 /* Finally, clear out the local values */
295 profile->dev_stat.total_time = 0;
296 profile->dev_stat.busy_time = 0;
297
298 return 0;
299}
300
301/*
302 * get_cur_freq(struct device *dev, unsigned long *freq)
303 *
304 * This function gets the current GPU clock rate.
305 */
306
307static int get_cur_freq(struct device *dev, unsigned long *freq)
308{
309 struct gk20a *g = get_gk20a(dev);
310 *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
311 return 0;
312}
313
314
315/*
316 * gk20a_scale_init(dev)
317 */
318
319void gk20a_scale_init(struct device *dev)
320{
321 struct gk20a_platform *platform = dev_get_drvdata(dev);
322 struct gk20a *g = platform->g;
323 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
324 struct gk20a_scale_profile *profile;
325 int err;
326
327 if (g->scale_profile)
328 return;
329
330 if (!platform->devfreq_governor && !platform->qos_notify)
331 return;
332
333 profile = nvgpu_kzalloc(g, sizeof(*profile));
334
335 profile->dev = dev;
336 profile->dev_stat.busy = false;
337
338 /* Create frequency table */
339 err = gk20a_scale_make_freq_table(profile);
340 if (err || !profile->devfreq_profile.max_state)
341 goto err_get_freqs;
342
343 profile->qos_min_freq = 0;
344 profile->qos_max_freq = UINT_MAX;
345
346 /* Store device profile so we can access it if devfreq governor
347 * init needs that */
348 g->scale_profile = profile;
349
350 if (platform->devfreq_governor) {
351 struct devfreq *devfreq;
352
353 profile->devfreq_profile.initial_freq =
354 profile->devfreq_profile.freq_table[0];
355 profile->devfreq_profile.target = gk20a_scale_target;
356 profile->devfreq_profile.get_dev_status =
357 gk20a_scale_get_dev_status;
358 profile->devfreq_profile.get_cur_freq = get_cur_freq;
359 profile->devfreq_profile.polling_ms = 25;
360
361 devfreq = devfreq_add_device(dev,
362 &profile->devfreq_profile,
363 platform->devfreq_governor, NULL);
364
365 if (IS_ERR(devfreq))
366 devfreq = NULL;
367
368 l->devfreq = devfreq;
369 }
370
371 /* Should we register QoS callback for this device? */
372 if (platform->qos_notify) {
373 profile->qos_notify_block.notifier_call =
374 platform->qos_notify;
375
376 pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
377 &profile->qos_notify_block);
378 pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
379 &profile->qos_notify_block);
380 }
381
382 return;
383
384err_get_freqs:
385 nvgpu_kfree(g, profile);
386}
387
388void gk20a_scale_exit(struct device *dev)
389{
390 struct gk20a_platform *platform = dev_get_drvdata(dev);
391 struct gk20a *g = platform->g;
392 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
393 int err;
394
395 if (platform->qos_notify) {
396 pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
397 &g->scale_profile->qos_notify_block);
398 pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
399 &g->scale_profile->qos_notify_block);
400 }
401
402 if (platform->devfreq_governor) {
403 err = devfreq_remove_device(l->devfreq);
404 l->devfreq = NULL;
405 }
406
407 nvgpu_kfree(g, g->scale_profile);
408 g->scale_profile = NULL;
409}
410
411/*
412 * gk20a_scale_hw_init(dev)
413 *
414 * Initialize hardware portion of the device
415 */
416
417void gk20a_scale_hw_init(struct device *dev)
418{
419 struct gk20a_platform *platform = dev_get_drvdata(dev);
420 struct gk20a_scale_profile *profile = platform->g->scale_profile;
421
422 /* make sure that scaling has bee initialised */
423 if (!profile)
424 return;
425
426 profile->dev_stat.total_time = 0;
427 profile->last_event_time = ktime_get();
428}
diff --git a/drivers/gpu/nvgpu/common/linux/scale.h b/drivers/gpu/nvgpu/common/linux/scale.h
deleted file mode 100644
index c1e6fe86..00000000
--- a/drivers/gpu/nvgpu/common/linux/scale.h
+++ /dev/null
@@ -1,66 +0,0 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef GK20A_SCALE_H
20#define GK20A_SCALE_H
21
22#include <linux/devfreq.h>
23
24struct clk;
25
26struct gk20a_scale_profile {
27 struct device *dev;
28 ktime_t last_event_time;
29 struct devfreq_dev_profile devfreq_profile;
30 struct devfreq_dev_status dev_stat;
31 struct notifier_block qos_notify_block;
32 unsigned long qos_min_freq;
33 unsigned long qos_max_freq;
34 void *private_data;
35};
36
37/* Initialization and de-initialization for module */
38void gk20a_scale_init(struct device *);
39void gk20a_scale_exit(struct device *);
40void gk20a_scale_hw_init(struct device *dev);
41
42#if defined(CONFIG_GK20A_DEVFREQ)
43/*
44 * call when performing submit to notify scaling mechanism that the module is
45 * in use
46 */
47void gk20a_scale_notify_busy(struct device *);
48void gk20a_scale_notify_idle(struct device *);
49
50void gk20a_scale_suspend(struct device *);
51void gk20a_scale_resume(struct device *);
52int gk20a_scale_qos_notify(struct notifier_block *nb,
53 unsigned long n, void *p);
54#else
55static inline void gk20a_scale_notify_busy(struct device *dev) {}
56static inline void gk20a_scale_notify_idle(struct device *dev) {}
57static inline void gk20a_scale_suspend(struct device *dev) {}
58static inline void gk20a_scale_resume(struct device *dev) {}
59static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
60 unsigned long n, void *p)
61{
62 return -ENOSYS;
63}
64#endif
65
66#endif
diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c
deleted file mode 100644
index 2ad5aabf..00000000
--- a/drivers/gpu/nvgpu/common/linux/sched.c
+++ /dev/null
@@ -1,676 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#include <asm/barrier.h>
17#include <linux/wait.h>
18#include <linux/uaccess.h>
19#include <linux/poll.h>
20#include <uapi/linux/nvgpu.h>
21
22#include <nvgpu/kmem.h>
23#include <nvgpu/log.h>
24#include <nvgpu/bug.h>
25#include <nvgpu/barrier.h>
26
27#include "gk20a/gk20a.h"
28#include "gk20a/gr_gk20a.h"
29#include "sched.h"
30#include "os_linux.h"
31#include "ioctl_tsg.h"
32
33#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
35
36ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
37 size_t size, loff_t *off)
38{
39 struct gk20a_sched_ctrl *sched = filp->private_data;
40 struct gk20a *g = sched->g;
41 struct nvgpu_sched_event_arg event = { 0 };
42 int err;
43
44 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched,
45 "filp=%p buf=%p size=%zu", filp, buf, size);
46
47 if (size < sizeof(event))
48 return -EINVAL;
49 size = sizeof(event);
50
51 nvgpu_mutex_acquire(&sched->status_lock);
52 while (!sched->status) {
53 nvgpu_mutex_release(&sched->status_lock);
54 if (filp->f_flags & O_NONBLOCK)
55 return -EAGAIN;
56 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq,
57 sched->status, 0);
58 if (err)
59 return err;
60 nvgpu_mutex_acquire(&sched->status_lock);
61 }
62
63 event.reserved = 0;
64 event.status = sched->status;
65
66 if (copy_to_user(buf, &event, size)) {
67 nvgpu_mutex_release(&sched->status_lock);
68 return -EFAULT;
69 }
70
71 sched->status = 0;
72
73 nvgpu_mutex_release(&sched->status_lock);
74
75 return size;
76}
77
78unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
79{
80 struct gk20a_sched_ctrl *sched = filp->private_data;
81 struct gk20a *g = sched->g;
82 unsigned int mask = 0;
83
84 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
85
86 nvgpu_mutex_acquire(&sched->status_lock);
87 poll_wait(filp, &sched->readout_wq.wq, wait);
88 if (sched->status)
89 mask |= POLLIN | POLLRDNORM;
90 nvgpu_mutex_release(&sched->status_lock);
91
92 return mask;
93}
94
95static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
96 struct nvgpu_sched_get_tsgs_args *arg)
97{
98 struct gk20a *g = sched->g;
99
100 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
101 arg->size, arg->buffer);
102
103 if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
104 arg->size = sched->bitmap_size;
105 return -ENOSPC;
106 }
107
108 nvgpu_mutex_acquire(&sched->status_lock);
109 if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
110 sched->active_tsg_bitmap, sched->bitmap_size)) {
111 nvgpu_mutex_release(&sched->status_lock);
112 return -EFAULT;
113 }
114 nvgpu_mutex_release(&sched->status_lock);
115
116 return 0;
117}
118
119static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
120 struct nvgpu_sched_get_tsgs_args *arg)
121{
122 struct gk20a *g = sched->g;
123
124 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
125 arg->size, arg->buffer);
126
127 if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
128 arg->size = sched->bitmap_size;
129 return -ENOSPC;
130 }
131
132 nvgpu_mutex_acquire(&sched->status_lock);
133 if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
134 sched->recent_tsg_bitmap, sched->bitmap_size)) {
135 nvgpu_mutex_release(&sched->status_lock);
136 return -EFAULT;
137 }
138
139 memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
140 nvgpu_mutex_release(&sched->status_lock);
141
142 return 0;
143}
144
145static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
146 struct nvgpu_sched_get_tsgs_by_pid_args *arg)
147{
148 struct gk20a *g = sched->g;
149 struct fifo_gk20a *f = &g->fifo;
150 struct tsg_gk20a *tsg;
151 u64 *bitmap;
152 unsigned int tsgid;
153 /* pid at user level corresponds to kernel tgid */
154 pid_t tgid = (pid_t)arg->pid;
155 int err = 0;
156
157 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
158 (pid_t)arg->pid, arg->size, arg->buffer);
159
160 if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
161 arg->size = sched->bitmap_size;
162 return -ENOSPC;
163 }
164
165 bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size);
166 if (!bitmap)
167 return -ENOMEM;
168
169 nvgpu_mutex_acquire(&sched->status_lock);
170 for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
171 if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
172 tsg = &f->tsg[tsgid];
173 if (tsg->tgid == tgid)
174 NVGPU_SCHED_SET(tsgid, bitmap);
175 }
176 }
177 nvgpu_mutex_release(&sched->status_lock);
178
179 if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
180 bitmap, sched->bitmap_size))
181 err = -EFAULT;
182
183 nvgpu_kfree(sched->g, bitmap);
184
185 return err;
186}
187
188static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
189 struct nvgpu_sched_tsg_get_params_args *arg)
190{
191 struct gk20a *g = sched->g;
192 struct fifo_gk20a *f = &g->fifo;
193 struct tsg_gk20a *tsg;
194 u32 tsgid = arg->tsgid;
195
196 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
197
198 if (tsgid >= f->num_channels)
199 return -EINVAL;
200
201 nvgpu_speculation_barrier();
202
203 tsg = &f->tsg[tsgid];
204 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
205 return -ENXIO;
206
207 arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */
208 arg->runlist_interleave = tsg->interleave_level;
209 arg->timeslice = tsg->timeslice_us;
210
211 arg->graphics_preempt_mode =
212 tsg->gr_ctx.graphics_preempt_mode;
213 arg->compute_preempt_mode =
214 tsg->gr_ctx.compute_preempt_mode;
215
216 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
217
218 return 0;
219}
220
221static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
222 struct gk20a_sched_ctrl *sched,
223 struct nvgpu_sched_tsg_timeslice_args *arg)
224{
225 struct gk20a *g = sched->g;
226 struct fifo_gk20a *f = &g->fifo;
227 struct tsg_gk20a *tsg;
228 u32 tsgid = arg->tsgid;
229 int err;
230
231 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
232
233 if (tsgid >= f->num_channels)
234 return -EINVAL;
235
236 nvgpu_speculation_barrier();
237
238 tsg = &f->tsg[tsgid];
239 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
240 return -ENXIO;
241
242 err = gk20a_busy(g);
243 if (err)
244 goto done;
245
246 err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
247
248 gk20a_idle(g);
249
250done:
251 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
252
253 return err;
254}
255
256static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
257 struct gk20a_sched_ctrl *sched,
258 struct nvgpu_sched_tsg_runlist_interleave_args *arg)
259{
260 struct gk20a *g = sched->g;
261 struct fifo_gk20a *f = &g->fifo;
262 struct tsg_gk20a *tsg;
263 u32 tsgid = arg->tsgid;
264 int err;
265
266 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
267
268 if (tsgid >= f->num_channels)
269 return -EINVAL;
270
271 nvgpu_speculation_barrier();
272
273 tsg = &f->tsg[tsgid];
274 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
275 return -ENXIO;
276
277 err = gk20a_busy(g);
278 if (err)
279 goto done;
280
281 err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
282
283 gk20a_idle(g);
284
285done:
286 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
287
288 return err;
289}
290
291static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
292{
293 struct gk20a *g = sched->g;
294
295 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
296
297 nvgpu_mutex_acquire(&sched->control_lock);
298 sched->control_locked = true;
299 nvgpu_mutex_release(&sched->control_lock);
300 return 0;
301}
302
303static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
304{
305 struct gk20a *g = sched->g;
306
307 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
308
309 nvgpu_mutex_acquire(&sched->control_lock);
310 sched->control_locked = false;
311 nvgpu_mutex_release(&sched->control_lock);
312 return 0;
313}
314
315static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched,
316 struct nvgpu_sched_api_version_args *args)
317{
318 struct gk20a *g = sched->g;
319
320 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
321
322 args->version = NVGPU_SCHED_API_VERSION;
323 return 0;
324}
325
326static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
327 struct nvgpu_sched_tsg_refcount_args *arg)
328{
329 struct gk20a *g = sched->g;
330 struct fifo_gk20a *f = &g->fifo;
331 struct tsg_gk20a *tsg;
332 u32 tsgid = arg->tsgid;
333
334 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
335
336 if (tsgid >= f->num_channels)
337 return -EINVAL;
338
339 nvgpu_speculation_barrier();
340
341 tsg = &f->tsg[tsgid];
342 if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
343 return -ENXIO;
344
345 nvgpu_mutex_acquire(&sched->status_lock);
346 if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
347 nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
348 /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */
349 nvgpu_mutex_release(&sched->status_lock);
350 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
351 return -ENXIO;
352 }
353
354 /* keep reference on TSG, will be released on
355 * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
356 */
357 NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
358 nvgpu_mutex_release(&sched->status_lock);
359
360 return 0;
361}
362
363static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
364 struct nvgpu_sched_tsg_refcount_args *arg)
365{
366 struct gk20a *g = sched->g;
367 struct fifo_gk20a *f = &g->fifo;
368 struct tsg_gk20a *tsg;
369 u32 tsgid = arg->tsgid;
370
371 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
372
373 if (tsgid >= f->num_channels)
374 return -EINVAL;
375
376 nvgpu_speculation_barrier();
377
378 nvgpu_mutex_acquire(&sched->status_lock);
379 if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
380 nvgpu_mutex_release(&sched->status_lock);
381 nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid);
382 return -ENXIO;
383 }
384 NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
385 nvgpu_mutex_release(&sched->status_lock);
386
387 tsg = &f->tsg[tsgid];
388 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
389
390 return 0;
391}
392
393int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
394{
395 struct nvgpu_os_linux *l = container_of(inode->i_cdev,
396 struct nvgpu_os_linux, sched.cdev);
397 struct gk20a *g;
398 struct gk20a_sched_ctrl *sched;
399 int err = 0;
400
401 g = gk20a_get(&l->g);
402 if (!g)
403 return -ENODEV;
404 sched = &l->sched_ctrl;
405
406 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
407
408 if (!sched->sw_ready) {
409 err = gk20a_busy(g);
410 if (err)
411 goto free_ref;
412
413 gk20a_idle(g);
414 }
415
416 if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
417 err = -EBUSY;
418 goto free_ref;
419 }
420
421 memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
422 sched->bitmap_size);
423 memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size);
424
425 filp->private_data = sched;
426 nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
427
428free_ref:
429 if (err)
430 gk20a_put(g);
431 return err;
432}
433
434long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
435 unsigned long arg)
436{
437 struct gk20a_sched_ctrl *sched = filp->private_data;
438 struct gk20a *g = sched->g;
439 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
440 int err = 0;
441
442 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
443
444 if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
445 (_IOC_NR(cmd) == 0) ||
446 (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
447 (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
448 return -EINVAL;
449
450 memset(buf, 0, sizeof(buf));
451 if (_IOC_DIR(cmd) & _IOC_WRITE) {
452 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
453 return -EFAULT;
454 }
455
456 switch (cmd) {
457 case NVGPU_SCHED_IOCTL_GET_TSGS:
458 err = gk20a_sched_dev_ioctl_get_tsgs(sched,
459 (struct nvgpu_sched_get_tsgs_args *)buf);
460 break;
461 case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
462 err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched,
463 (struct nvgpu_sched_get_tsgs_args *)buf);
464 break;
465 case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
466 err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched,
467 (struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
468 break;
469 case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
470 err = gk20a_sched_dev_ioctl_get_params(sched,
471 (struct nvgpu_sched_tsg_get_params_args *)buf);
472 break;
473 case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
474 err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched,
475 (struct nvgpu_sched_tsg_timeslice_args *)buf);
476 break;
477 case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
478 err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched,
479 (struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
480 break;
481 case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
482 err = gk20a_sched_dev_ioctl_lock_control(sched);
483 break;
484 case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
485 err = gk20a_sched_dev_ioctl_unlock_control(sched);
486 break;
487 case NVGPU_SCHED_IOCTL_GET_API_VERSION:
488 err = gk20a_sched_dev_ioctl_get_api_version(sched,
489 (struct nvgpu_sched_api_version_args *)buf);
490 break;
491 case NVGPU_SCHED_IOCTL_GET_TSG:
492 err = gk20a_sched_dev_ioctl_get_tsg(sched,
493 (struct nvgpu_sched_tsg_refcount_args *)buf);
494 break;
495 case NVGPU_SCHED_IOCTL_PUT_TSG:
496 err = gk20a_sched_dev_ioctl_put_tsg(sched,
497 (struct nvgpu_sched_tsg_refcount_args *)buf);
498 break;
499 default:
500 nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
501 err = -ENOTTY;
502 }
503
504 /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
505 * purpose with NULL buffer and/or zero size to discover TSG bitmap
506 * size. We need to update user arguments in this case too, even
507 * if we return an error.
508 */
509 if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
510 if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
511 err = -EFAULT;
512 }
513
514 return err;
515}
516
517int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
518{
519 struct gk20a_sched_ctrl *sched = filp->private_data;
520 struct gk20a *g = sched->g;
521 struct fifo_gk20a *f = &g->fifo;
522 struct tsg_gk20a *tsg;
523 unsigned int tsgid;
524
525 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
526
527 /* release any reference to TSGs */
528 for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
529 if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
530 tsg = &f->tsg[tsgid];
531 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
532 }
533 }
534
535 /* unlock control */
536 nvgpu_mutex_acquire(&sched->control_lock);
537 sched->control_locked = false;
538 nvgpu_mutex_release(&sched->control_lock);
539
540 nvgpu_mutex_release(&sched->busy_lock);
541 gk20a_put(g);
542 return 0;
543}
544
545void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
546{
547 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
548 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
549 int err;
550
551 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
552
553 if (!sched->sw_ready) {
554 err = gk20a_busy(g);
555 if (err) {
556 WARN_ON(err);
557 return;
558 }
559
560 gk20a_idle(g);
561 }
562
563 nvgpu_mutex_acquire(&sched->status_lock);
564 NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
565 NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
566 sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
567 nvgpu_mutex_release(&sched->status_lock);
568 nvgpu_cond_signal_interruptible(&sched->readout_wq);
569}
570
571void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
572{
573 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
574 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
575
576 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
577
578 nvgpu_mutex_acquire(&sched->status_lock);
579 NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
580
581 /* clear recent_tsg_bitmap as well: if app manager did not
582 * notice that TSG was previously added, no need to notify it
583 * if the TSG has been released in the meantime. If the
584 * TSG gets reallocated, app manager will be notified as usual.
585 */
586 NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
587
588 /* do not set event_pending, we only want to notify app manager
589 * when TSGs are added, so that it can apply sched params
590 */
591 nvgpu_mutex_release(&sched->status_lock);
592}
593
594int gk20a_sched_ctrl_init(struct gk20a *g)
595{
596 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
597 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
598 struct fifo_gk20a *f = &g->fifo;
599 int err;
600
601 if (sched->sw_ready)
602 return 0;
603
604 sched->g = g;
605 sched->bitmap_size = roundup(f->num_channels, 64) / 8;
606 sched->status = 0;
607
608 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
609 g, sched, sched->bitmap_size);
610
611 sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
612 if (!sched->active_tsg_bitmap)
613 return -ENOMEM;
614
615 sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
616 if (!sched->recent_tsg_bitmap) {
617 err = -ENOMEM;
618 goto free_active;
619 }
620
621 sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
622 if (!sched->ref_tsg_bitmap) {
623 err = -ENOMEM;
624 goto free_recent;
625 }
626
627 nvgpu_cond_init(&sched->readout_wq);
628
629 err = nvgpu_mutex_init(&sched->status_lock);
630 if (err)
631 goto free_ref;
632
633 err = nvgpu_mutex_init(&sched->control_lock);
634 if (err)
635 goto free_status_lock;
636
637 err = nvgpu_mutex_init(&sched->busy_lock);
638 if (err)
639 goto free_control_lock;
640
641 sched->sw_ready = true;
642
643 return 0;
644
645free_control_lock:
646 nvgpu_mutex_destroy(&sched->control_lock);
647free_status_lock:
648 nvgpu_mutex_destroy(&sched->status_lock);
649free_ref:
650 nvgpu_kfree(g, sched->ref_tsg_bitmap);
651free_recent:
652 nvgpu_kfree(g, sched->recent_tsg_bitmap);
653free_active:
654 nvgpu_kfree(g, sched->active_tsg_bitmap);
655
656 return err;
657}
658
659void gk20a_sched_ctrl_cleanup(struct gk20a *g)
660{
661 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
662 struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
663
664 nvgpu_kfree(g, sched->active_tsg_bitmap);
665 nvgpu_kfree(g, sched->recent_tsg_bitmap);
666 nvgpu_kfree(g, sched->ref_tsg_bitmap);
667 sched->active_tsg_bitmap = NULL;
668 sched->recent_tsg_bitmap = NULL;
669 sched->ref_tsg_bitmap = NULL;
670
671 nvgpu_mutex_destroy(&sched->status_lock);
672 nvgpu_mutex_destroy(&sched->control_lock);
673 nvgpu_mutex_destroy(&sched->busy_lock);
674
675 sched->sw_ready = false;
676}
diff --git a/drivers/gpu/nvgpu/common/linux/sched.h b/drivers/gpu/nvgpu/common/linux/sched.h
deleted file mode 100644
index a699bbea..00000000
--- a/drivers/gpu/nvgpu/common/linux/sched.h
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef __NVGPU_SCHED_H
17#define __NVGPU_SCHED_H
18
19struct gk20a;
20struct gpu_ops;
21struct tsg_gk20a;
22struct poll_table_struct;
23
24struct gk20a_sched_ctrl {
25 struct gk20a *g;
26
27 struct nvgpu_mutex control_lock;
28 bool control_locked;
29 bool sw_ready;
30 struct nvgpu_mutex status_lock;
31 struct nvgpu_mutex busy_lock;
32
33 u64 status;
34
35 size_t bitmap_size;
36 u64 *active_tsg_bitmap;
37 u64 *recent_tsg_bitmap;
38 u64 *ref_tsg_bitmap;
39
40 struct nvgpu_cond readout_wq;
41};
42
43int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
44int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
45long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
46ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
47unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
48
49void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
50void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
51int gk20a_sched_ctrl_init(struct gk20a *);
52
53void gk20a_sched_ctrl_cleanup(struct gk20a *g);
54
55#endif /* __NVGPU_SCHED_H */
diff --git a/drivers/gpu/nvgpu/common/linux/sim.c b/drivers/gpu/nvgpu/common/linux/sim.c
deleted file mode 100644
index 8e964f39..00000000
--- a/drivers/gpu/nvgpu/common/linux/sim.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/io.h>
18#include <linux/highmem.h>
19#include <linux/platform_device.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/linux/vm.h>
23#include <nvgpu/bitops.h>
24#include <nvgpu/nvgpu_mem.h>
25#include <nvgpu/dma.h>
26#include <nvgpu/soc.h>
27#include <nvgpu/hw_sim.h>
28#include <nvgpu/sim.h>
29#include "gk20a/gk20a.h"
30#include "platform_gk20a.h"
31#include "os_linux.h"
32#include "module.h"
33
34void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v)
35{
36 struct sim_nvgpu_linux *sim_linux =
37 container_of(sim, struct sim_nvgpu_linux, sim);
38
39 writel(v, sim_linux->regs + r);
40}
41
42u32 sim_readl(struct sim_nvgpu *sim, u32 r)
43{
44 struct sim_nvgpu_linux *sim_linux =
45 container_of(sim, struct sim_nvgpu_linux, sim);
46
47 return readl(sim_linux->regs + r);
48}
49
50void nvgpu_remove_sim_support_linux(struct gk20a *g)
51{
52 struct sim_nvgpu_linux *sim_linux;
53
54 if (!g->sim)
55 return;
56
57 sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
58 if (sim_linux->regs) {
59 sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
60 iounmap(sim_linux->regs);
61 sim_linux->regs = NULL;
62 }
63 nvgpu_kfree(g, sim_linux);
64 g->sim = NULL;
65}
66
67int nvgpu_init_sim_support_linux(struct gk20a *g,
68 struct platform_device *dev)
69{
70 struct sim_nvgpu_linux *sim_linux;
71 int err = -ENOMEM;
72
73 if (!nvgpu_platform_is_simulation(g))
74 return 0;
75
76 sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
77 if (!sim_linux)
78 return err;
79 g->sim = &sim_linux->sim;
80 g->sim->g = g;
81 sim_linux->regs = nvgpu_ioremap_resource(dev,
82 GK20A_SIM_IORESOURCE_MEM,
83 &sim_linux->reg_mem);
84 if (IS_ERR(sim_linux->regs)) {
85 nvgpu_err(g, "failed to remap gk20a sim regs");
86 err = PTR_ERR(sim_linux->regs);
87 goto fail;
88 }
89 sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux;
90 return 0;
91
92fail:
93 nvgpu_remove_sim_support_linux(g);
94 return err;
95}
diff --git a/drivers/gpu/nvgpu/common/linux/sim_pci.c b/drivers/gpu/nvgpu/common/linux/sim_pci.c
deleted file mode 100644
index d37767b7..00000000
--- a/drivers/gpu/nvgpu/common/linux/sim_pci.c
+++ /dev/null
@@ -1,91 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/io.h>
18#include <linux/highmem.h>
19#include <linux/platform_device.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/linux/vm.h>
23#include <nvgpu/bitops.h>
24#include <nvgpu/nvgpu_mem.h>
25#include <nvgpu/dma.h>
26#include <nvgpu/hw_sim_pci.h>
27#include <nvgpu/sim.h>
28#include "gk20a/gk20a.h"
29#include "os_linux.h"
30#include "module.h"
31
32static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base)
33{
34 u32 cfg;
35 bool is_simulation = false;
36
37 cfg = nvgpu_readl(g, sim_base + sim_config_r());
38 if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v())
39 is_simulation = true;
40
41 return is_simulation;
42}
43
44void nvgpu_remove_sim_support_linux_pci(struct gk20a *g)
45{
46 struct sim_nvgpu_linux *sim_linux;
47 bool is_simulation;
48
49 is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
50
51 if (!is_simulation) {
52 return;
53 }
54
55 if (!g->sim) {
56 nvgpu_warn(g, "sim_gk20a not allocated");
57 return;
58 }
59 sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
60
61 if (sim_linux->regs) {
62 sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
63 sim_linux->regs = NULL;
64 }
65 nvgpu_kfree(g, sim_linux);
66 g->sim = NULL;
67}
68
69int nvgpu_init_sim_support_linux_pci(struct gk20a *g)
70{
71 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
72 struct sim_nvgpu_linux *sim_linux;
73 int err = -ENOMEM;
74 bool is_simulation;
75
76 is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
77 __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation);
78
79 if (!is_simulation)
80 return 0;
81
82 sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
83 if (!sim_linux)
84 return err;
85 g->sim = &sim_linux->sim;
86 g->sim->g = g;
87 sim_linux->regs = l->regs + sim_r();
88 sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci;
89
90 return 0;
91}
diff --git a/drivers/gpu/nvgpu/common/linux/soc.c b/drivers/gpu/nvgpu/common/linux/soc.c
deleted file mode 100644
index 1b27d6f1..00000000
--- a/drivers/gpu/nvgpu/common/linux/soc.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <soc/tegra/chip-id.h>
15#include <soc/tegra/fuse.h>
16#include <soc/tegra/tegra_bpmp.h>
17#ifdef CONFIG_TEGRA_HV_MANAGER
18#include <soc/tegra/virt/syscalls.h>
19#endif
20
21#include <nvgpu/soc.h>
22#include "os_linux.h"
23#include "platform_gk20a.h"
24
25bool nvgpu_platform_is_silicon(struct gk20a *g)
26{
27 return tegra_platform_is_silicon();
28}
29
30bool nvgpu_platform_is_simulation(struct gk20a *g)
31{
32 return tegra_platform_is_vdk();
33}
34
35bool nvgpu_platform_is_fpga(struct gk20a *g)
36{
37 return tegra_platform_is_fpga();
38}
39
40bool nvgpu_is_hypervisor_mode(struct gk20a *g)
41{
42 return is_tegra_hypervisor_mode();
43}
44
45bool nvgpu_is_bpmp_running(struct gk20a *g)
46{
47 return tegra_bpmp_running();
48}
49
50bool nvgpu_is_soc_t194_a01(struct gk20a *g)
51{
52 return ((tegra_get_chip_id() == TEGRA194 &&
53 tegra_chip_get_revision() == TEGRA194_REVISION_A01) ?
54 true : false);
55}
56
57#ifdef CONFIG_TEGRA_HV_MANAGER
58/* When nvlink is enabled on dGPU, we need to use physical memory addresses.
59 * There is no SMMU translation. However, the device initially enumerates as a
60 * PCIe device. As such, when allocation memory for this PCIe device, the DMA
61 * framework ends up allocating memory using SMMU (if enabled in device tree).
62 * As a result, when we switch to nvlink, we need to use underlying physical
63 * addresses, even if memory mappings exist in SMMU.
64 * In addition, when stage-2 SMMU translation is enabled (for instance when HV
65 * is enabled), the addresses we get from dma_alloc are IPAs. We need to
66 * convert them to PA.
67 */
68static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa)
69{
70 struct device *dev = dev_from_gk20a(g);
71 struct gk20a_platform *platform = gk20a_get_platform(dev);
72 struct hyp_ipa_pa_info info;
73 int err;
74 u64 pa = 0ULL;
75
76 err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa);
77 if (err < 0) {
78 /* WAR for bug 2096877
79 * hyp_read_ipa_pa_info only looks up RAM mappings.
80 * assume one to one IPA:PA mapping for syncpt aperture
81 */
82 u64 start = g->syncpt_unit_base;
83 u64 end = g->syncpt_unit_base + g->syncpt_unit_size;
84 if ((ipa >= start) && (ipa < end)) {
85 pa = ipa;
86 nvgpu_log(g, gpu_dbg_map_v,
87 "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n",
88 ipa, platform->vmid, pa);
89 } else {
90 nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d",
91 ipa, platform->vmid, err);
92 }
93 } else {
94 pa = info.base + info.offset;
95 nvgpu_log(g, gpu_dbg_map_v,
96 "ipa=%llx vmid=%d -> pa=%llx "
97 "base=%llx offset=%llx size=%llx\n",
98 ipa, platform->vmid, pa, info.base,
99 info.offset, info.size);
100 }
101 return pa;
102}
103#endif
104
105int nvgpu_init_soc_vars(struct gk20a *g)
106{
107#ifdef CONFIG_TEGRA_HV_MANAGER
108 struct device *dev = dev_from_gk20a(g);
109 struct gk20a_platform *platform = gk20a_get_platform(dev);
110 int err;
111
112 if (nvgpu_is_hypervisor_mode(g)) {
113 err = hyp_read_gid(&platform->vmid);
114 if (err) {
115 nvgpu_err(g, "failed to read vmid");
116 return err;
117 }
118 platform->phys_addr = nvgpu_tegra_hv_ipa_pa;
119 }
120#endif
121 return 0;
122}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
deleted file mode 100644
index fad21351..00000000
--- a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
+++ /dev/null
@@ -1,419 +0,0 @@
1/*
2 * Semaphore Sync Framework Integration
3 *
4 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/hrtimer.h>
22#include <linux/module.h>
23#include <nvgpu/lock.h>
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/semaphore.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/kref.h>
29#include "../common/linux/channel.h"
30
31#include "../drivers/staging/android/sync.h"
32
33#include "sync_sema_android.h"
34
35static const struct sync_timeline_ops gk20a_sync_timeline_ops;
36
37struct gk20a_sync_timeline {
38 struct sync_timeline obj;
39 u32 max;
40 u32 min;
41};
42
43/**
44 * The sync framework dups pts when merging fences. We share a single
45 * refcounted gk20a_sync_pt for each duped pt.
46 */
47struct gk20a_sync_pt {
48 struct gk20a *g;
49 struct nvgpu_ref refcount;
50 u32 thresh;
51 struct nvgpu_semaphore *sema;
52 struct gk20a_sync_timeline *obj;
53
54 /*
55 * Use a spin lock here since it will have better performance
56 * than a mutex - there should be very little contention on this
57 * lock.
58 */
59 struct nvgpu_spinlock lock;
60};
61
62struct gk20a_sync_pt_inst {
63 struct sync_pt pt;
64 struct gk20a_sync_pt *shared;
65};
66
67/**
68 * Compares sync pt values a and b, both of which will trigger either before
69 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
70 * ref). Supplying ref allows us to handle wrapping correctly.
71 *
72 * Returns -1 if a < b (a triggers before b)
73 * 0 if a = b (a and b trigger at the same time)
74 * 1 if a > b (b triggers before a)
75 */
76static int __gk20a_sync_pt_compare_ref(
77 u32 ref,
78 u32 a,
79 u32 b)
80{
81 /*
82 * We normalize both a and b by subtracting ref from them.
83 * Denote the normalized values by a_n and b_n. Note that because
84 * of wrapping, a_n and/or b_n may be negative.
85 *
86 * The normalized values a_n and b_n satisfy:
87 * - a positive value triggers before a negative value
88 * - a smaller positive value triggers before a greater positive value
89 * - a smaller negative value (greater in absolute value) triggers
90 * before a greater negative value (smaller in absolute value).
91 *
92 * Thus we can just stick to unsigned arithmetic and compare
93 * (u32)a_n to (u32)b_n.
94 *
95 * Just to reiterate the possible cases:
96 *
97 * 1A) ...ref..a....b....
98 * 1B) ...ref..b....a....
99 * 2A) ...b....ref..a.... b_n < 0
100 * 2B) ...a....ref..b.... a_n > 0
101 * 3A) ...a....b....ref.. a_n < 0, b_n < 0
102 * 3A) ...b....a....ref.. a_n < 0, b_n < 0
103 */
104 u32 a_n = a - ref;
105 u32 b_n = b - ref;
106 if (a_n < b_n)
107 return -1;
108 else if (a_n > b_n)
109 return 1;
110 else
111 return 0;
112}
113
114static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
115{
116 struct gk20a_sync_pt_inst *pti =
117 container_of(pt, struct gk20a_sync_pt_inst, pt);
118 return pti->shared;
119}
120static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
121{
122 if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
123 return NULL;
124 return (struct gk20a_sync_timeline *)obj;
125}
126
127static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
128{
129 struct gk20a_sync_pt *pt =
130 container_of(ref, struct gk20a_sync_pt, refcount);
131 struct gk20a *g = pt->g;
132
133 if (pt->sema)
134 nvgpu_semaphore_put(pt->sema);
135 nvgpu_kfree(g, pt);
136}
137
138static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
139 struct gk20a *g,
140 struct gk20a_sync_timeline *obj,
141 struct nvgpu_semaphore *sema)
142{
143 struct gk20a_sync_pt *shared;
144
145 shared = nvgpu_kzalloc(g, sizeof(*shared));
146 if (!shared)
147 return NULL;
148
149 nvgpu_ref_init(&shared->refcount);
150 shared->g = g;
151 shared->obj = obj;
152 shared->sema = sema;
153 shared->thresh = ++obj->max; /* sync framework has a lock */
154
155 nvgpu_spinlock_init(&shared->lock);
156
157 nvgpu_semaphore_get(sema);
158
159 return shared;
160}
161
162static struct sync_pt *gk20a_sync_pt_create_inst(
163 struct gk20a *g,
164 struct gk20a_sync_timeline *obj,
165 struct nvgpu_semaphore *sema)
166{
167 struct gk20a_sync_pt_inst *pti;
168
169 pti = (struct gk20a_sync_pt_inst *)
170 sync_pt_create(&obj->obj, sizeof(*pti));
171 if (!pti)
172 return NULL;
173
174 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
175 if (!pti->shared) {
176 sync_pt_free(&pti->pt);
177 return NULL;
178 }
179 return &pti->pt;
180}
181
182static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
183{
184 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
185 if (pt)
186 nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
187}
188
189static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
190{
191 struct gk20a_sync_pt_inst *pti;
192 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
193
194 pti = (struct gk20a_sync_pt_inst *)
195 sync_pt_create(&pt->obj->obj, sizeof(*pti));
196 if (!pti)
197 return NULL;
198 pti->shared = pt;
199 nvgpu_ref_get(&pt->refcount);
200 return &pti->pt;
201}
202
203/*
204 * This function must be able to run on the same sync_pt concurrently. This
205 * requires a lock to protect access to the sync_pt's internal data structures
206 * which are modified as a side effect of calling this function.
207 */
208static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
209{
210 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
211 struct gk20a_sync_timeline *obj = pt->obj;
212 bool signaled = true;
213
214 nvgpu_spinlock_acquire(&pt->lock);
215 if (!pt->sema)
216 goto done;
217
218 /* Acquired == not realeased yet == active == not signaled. */
219 signaled = !nvgpu_semaphore_is_acquired(pt->sema);
220
221 if (signaled) {
222 /* Update min if necessary. */
223 if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
224 obj->min) == 1)
225 obj->min = pt->thresh;
226
227 /* Release the semaphore to the pool. */
228 nvgpu_semaphore_put(pt->sema);
229 pt->sema = NULL;
230 }
231done:
232 nvgpu_spinlock_release(&pt->lock);
233
234 return signaled;
235}
236
237static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
238{
239 bool a_expired;
240 bool b_expired;
241 struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
242 struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
243
244 if (WARN_ON(pt_a->obj != pt_b->obj))
245 return 0;
246
247 /* Early out */
248 if (a == b)
249 return 0;
250
251 a_expired = gk20a_sync_pt_has_signaled(a);
252 b_expired = gk20a_sync_pt_has_signaled(b);
253 if (a_expired && !b_expired) {
254 /* Easy, a was earlier */
255 return -1;
256 } else if (!a_expired && b_expired) {
257 /* Easy, b was earlier */
258 return 1;
259 }
260
261 /* Both a and b are expired (trigger before min) or not
262 * expired (trigger after min), so we can use min
263 * as a reference value for __gk20a_sync_pt_compare_ref.
264 */
265 return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
266 pt_a->thresh, pt_b->thresh);
267}
268
269static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
270{
271 return obj->min;
272}
273
274static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
275 char *str, int size)
276{
277 struct gk20a_sync_timeline *obj =
278 (struct gk20a_sync_timeline *)timeline;
279 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
280}
281
282static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
283 char *str, int size)
284{
285 struct nvgpu_semaphore *s = pt->sema;
286
287 snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
288 s->location.pool->page_idx,
289 nvgpu_semaphore_get_value(s),
290 nvgpu_semaphore_read(s));
291}
292
293static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
294 int size)
295{
296 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
297
298 if (pt->sema) {
299 gk20a_sync_pt_value_str_for_sema(pt, str, size);
300 return;
301 }
302
303 snprintf(str, size, "%d", pt->thresh);
304}
305
306static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
307 .driver_name = "nvgpu_semaphore",
308 .dup = gk20a_sync_pt_dup_inst,
309 .has_signaled = gk20a_sync_pt_has_signaled,
310 .compare = gk20a_sync_pt_compare,
311 .free_pt = gk20a_sync_pt_free_inst,
312 .timeline_value_str = gk20a_sync_timeline_value_str,
313 .pt_value_str = gk20a_sync_pt_value_str,
314};
315
316/* Public API */
317
318struct sync_fence *gk20a_sync_fence_fdget(int fd)
319{
320 struct sync_fence *fence = sync_fence_fdget(fd);
321 int i;
322
323 if (!fence)
324 return NULL;
325
326 for (i = 0; i < fence->num_fences; i++) {
327 struct fence *pt = fence->cbs[i].sync_pt;
328 struct sync_pt *spt = sync_pt_from_fence(pt);
329 struct sync_timeline *t;
330
331 if (spt == NULL) {
332 sync_fence_put(fence);
333 return NULL;
334 }
335
336 t = sync_pt_parent(spt);
337 if (t->ops != &gk20a_sync_timeline_ops) {
338 sync_fence_put(fence);
339 return NULL;
340 }
341 }
342
343 return fence;
344}
345
346struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
347{
348 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
349 struct nvgpu_semaphore *sema;
350
351 nvgpu_spinlock_acquire(&pt->lock);
352 sema = pt->sema;
353 if (sema)
354 nvgpu_semaphore_get(sema);
355 nvgpu_spinlock_release(&pt->lock);
356
357 return sema;
358}
359
360void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
361{
362 sync_timeline_signal(timeline, 0);
363}
364
365void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
366{
367 sync_timeline_destroy(timeline);
368}
369
370struct sync_timeline *gk20a_sync_timeline_create(
371 const char *name)
372{
373 struct gk20a_sync_timeline *obj;
374
375 obj = (struct gk20a_sync_timeline *)
376 sync_timeline_create(&gk20a_sync_timeline_ops,
377 sizeof(struct gk20a_sync_timeline),
378 name);
379 if (!obj)
380 return NULL;
381 obj->max = 0;
382 obj->min = 0;
383 return &obj->obj;
384}
385
386struct sync_fence *gk20a_sync_fence_create(
387 struct channel_gk20a *c,
388 struct nvgpu_semaphore *sema,
389 const char *fmt, ...)
390{
391 char name[30];
392 va_list args;
393 struct sync_pt *pt;
394 struct sync_fence *fence;
395 struct gk20a *g = c->g;
396
397 struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
398 struct nvgpu_os_fence_framework *fence_framework = NULL;
399 struct gk20a_sync_timeline *timeline = NULL;
400
401 fence_framework = &os_channel_priv->fence_framework;
402
403 timeline = to_gk20a_timeline(fence_framework->timeline);
404
405 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
406 if (pt == NULL)
407 return NULL;
408
409 va_start(args, fmt);
410 vsnprintf(name, sizeof(name), fmt, args);
411 va_end(args);
412
413 fence = sync_fence_create(name, pt);
414 if (fence == NULL) {
415 sync_pt_free(pt);
416 return NULL;
417 }
418 return fence;
419}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
deleted file mode 100644
index 4fca7bed..00000000
--- a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
+++ /dev/null
@@ -1,51 +0,0 @@
1/*
2 * Semaphore Sync Framework Integration
3 *
4 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _GK20A_SYNC_H_
20#define _GK20A_SYNC_H_
21
22struct sync_timeline;
23struct sync_fence;
24struct sync_pt;
25struct nvgpu_semaphore;
26struct fence;
27
28#ifdef CONFIG_SYNC
29struct sync_timeline *gk20a_sync_timeline_create(const char *name);
30void gk20a_sync_timeline_destroy(struct sync_timeline *);
31void gk20a_sync_timeline_signal(struct sync_timeline *);
32struct sync_fence *gk20a_sync_fence_create(
33 struct channel_gk20a *c,
34 struct nvgpu_semaphore *,
35 const char *fmt, ...);
36struct sync_fence *gk20a_sync_fence_fdget(int fd);
37struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
38#else
39static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
40static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
41static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
42{
43 return NULL;
44}
45static inline struct sync_timeline *gk20a_sync_timeline_create(
46 const char *name) {
47 return NULL;
48}
49#endif
50
51#endif
diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.c b/drivers/gpu/nvgpu/common/linux/sysfs.c
deleted file mode 100644
index e5995bb8..00000000
--- a/drivers/gpu/nvgpu/common/linux/sysfs.c
+++ /dev/null
@@ -1,1205 +0,0 @@
1/*
2 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/device.h>
18#include <linux/pm_runtime.h>
19#include <linux/fb.h>
20
21#include <nvgpu/kmem.h>
22#include <nvgpu/nvhost.h>
23
24#include "sysfs.h"
25#include "platform_gk20a.h"
26#include "gk20a/pmu_gk20a.h"
27#include "gk20a/gr_gk20a.h"
28#include "gv11b/gr_gv11b.h"
29
30#define PTIMER_FP_FACTOR 1000000
31
32#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
33
34static ssize_t elcg_enable_store(struct device *dev,
35 struct device_attribute *attr, const char *buf, size_t count)
36{
37 struct gk20a *g = get_gk20a(dev);
38 unsigned long val = 0;
39 int err;
40
41 if (kstrtoul(buf, 10, &val) < 0)
42 return -EINVAL;
43
44 err = gk20a_busy(g);
45 if (err)
46 return err;
47
48 if (val) {
49 g->elcg_enabled = true;
50 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO);
51 } else {
52 g->elcg_enabled = false;
53 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
54 }
55
56 gk20a_idle(g);
57
58 nvgpu_info(g, "ELCG is %s.", g->elcg_enabled ? "enabled" :
59 "disabled");
60
61 return count;
62}
63
64static ssize_t elcg_enable_read(struct device *dev,
65 struct device_attribute *attr, char *buf)
66{
67 struct gk20a *g = get_gk20a(dev);
68
69 return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0);
70}
71
72static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store);
73
74static ssize_t blcg_enable_store(struct device *dev,
75 struct device_attribute *attr, const char *buf, size_t count)
76{
77 struct gk20a *g = get_gk20a(dev);
78 unsigned long val = 0;
79 int err;
80
81 if (kstrtoul(buf, 10, &val) < 0)
82 return -EINVAL;
83
84 if (val)
85 g->blcg_enabled = true;
86 else
87 g->blcg_enabled = false;
88
89 err = gk20a_busy(g);
90 if (err)
91 return err;
92
93 if (g->ops.clock_gating.blcg_bus_load_gating_prod)
94 g->ops.clock_gating.blcg_bus_load_gating_prod(g,
95 g->blcg_enabled);
96 if (g->ops.clock_gating.blcg_ce_load_gating_prod)
97 g->ops.clock_gating.blcg_ce_load_gating_prod(g,
98 g->blcg_enabled);
99 if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod)
100 g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g,
101 g->blcg_enabled);
102 if (g->ops.clock_gating.blcg_fb_load_gating_prod)
103 g->ops.clock_gating.blcg_fb_load_gating_prod(g,
104 g->blcg_enabled);
105 if (g->ops.clock_gating.blcg_fifo_load_gating_prod)
106 g->ops.clock_gating.blcg_fifo_load_gating_prod(g,
107 g->blcg_enabled);
108 if (g->ops.clock_gating.blcg_gr_load_gating_prod)
109 g->ops.clock_gating.blcg_gr_load_gating_prod(g,
110 g->blcg_enabled);
111 if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
112 g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
113 g->blcg_enabled);
114 if (g->ops.clock_gating.blcg_pmu_load_gating_prod)
115 g->ops.clock_gating.blcg_pmu_load_gating_prod(g,
116 g->blcg_enabled);
117 if (g->ops.clock_gating.blcg_xbar_load_gating_prod)
118 g->ops.clock_gating.blcg_xbar_load_gating_prod(g,
119 g->blcg_enabled);
120 gk20a_idle(g);
121
122 nvgpu_info(g, "BLCG is %s.", g->blcg_enabled ? "enabled" :
123 "disabled");
124
125 return count;
126}
127
128static ssize_t blcg_enable_read(struct device *dev,
129 struct device_attribute *attr, char *buf)
130{
131 struct gk20a *g = get_gk20a(dev);
132
133 return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0);
134}
135
136
137static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store);
138
139static ssize_t slcg_enable_store(struct device *dev,
140 struct device_attribute *attr, const char *buf, size_t count)
141{
142 struct gk20a *g = get_gk20a(dev);
143 unsigned long val = 0;
144 int err;
145
146 if (kstrtoul(buf, 10, &val) < 0)
147 return -EINVAL;
148
149 if (val)
150 g->slcg_enabled = true;
151 else
152 g->slcg_enabled = false;
153
154 /*
155 * TODO: slcg_therm_load_gating is not enabled anywhere during
156 * init. Therefore, it would be incongruous to add it here. Once
157 * it is added to init, we should add it here too.
158 */
159 err = gk20a_busy(g);
160 if (err)
161 return err;
162
163 if (g->ops.clock_gating.slcg_bus_load_gating_prod)
164 g->ops.clock_gating.slcg_bus_load_gating_prod(g,
165 g->slcg_enabled);
166 if (g->ops.clock_gating.slcg_ce2_load_gating_prod)
167 g->ops.clock_gating.slcg_ce2_load_gating_prod(g,
168 g->slcg_enabled);
169 if (g->ops.clock_gating.slcg_chiplet_load_gating_prod)
170 g->ops.clock_gating.slcg_chiplet_load_gating_prod(g,
171 g->slcg_enabled);
172 if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod)
173 g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g,
174 g->slcg_enabled);
175 if (g->ops.clock_gating.slcg_fb_load_gating_prod)
176 g->ops.clock_gating.slcg_fb_load_gating_prod(g,
177 g->slcg_enabled);
178 if (g->ops.clock_gating.slcg_fifo_load_gating_prod)
179 g->ops.clock_gating.slcg_fifo_load_gating_prod(g,
180 g->slcg_enabled);
181 if (g->ops.clock_gating.slcg_gr_load_gating_prod)
182 g->ops.clock_gating.slcg_gr_load_gating_prod(g,
183 g->slcg_enabled);
184 if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
185 g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
186 g->slcg_enabled);
187 if (g->ops.clock_gating.slcg_perf_load_gating_prod)
188 g->ops.clock_gating.slcg_perf_load_gating_prod(g,
189 g->slcg_enabled);
190 if (g->ops.clock_gating.slcg_priring_load_gating_prod)
191 g->ops.clock_gating.slcg_priring_load_gating_prod(g,
192 g->slcg_enabled);
193 if (g->ops.clock_gating.slcg_pmu_load_gating_prod)
194 g->ops.clock_gating.slcg_pmu_load_gating_prod(g,
195 g->slcg_enabled);
196 if (g->ops.clock_gating.slcg_xbar_load_gating_prod)
197 g->ops.clock_gating.slcg_xbar_load_gating_prod(g,
198 g->slcg_enabled);
199 gk20a_idle(g);
200
201 nvgpu_info(g, "SLCG is %s.", g->slcg_enabled ? "enabled" :
202 "disabled");
203
204 return count;
205}
206
207static ssize_t slcg_enable_read(struct device *dev,
208 struct device_attribute *attr, char *buf)
209{
210 struct gk20a *g = get_gk20a(dev);
211
212 return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0);
213}
214
215static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store);
216
217static ssize_t ptimer_scale_factor_show(struct device *dev,
218 struct device_attribute *attr,
219 char *buf)
220{
221 struct gk20a *g = get_gk20a(dev);
222 struct gk20a_platform *platform = dev_get_drvdata(dev);
223 u32 src_freq_hz = platform->ptimer_src_freq;
224 u32 scaling_factor_fp;
225 ssize_t res;
226
227 if (!src_freq_hz) {
228 nvgpu_err(g, "reference clk_m rate is not set correctly");
229 return -EINVAL;
230 }
231
232 scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) /
233 ((u32)(src_freq_hz) /
234 (u32)(PTIMER_FP_FACTOR));
235 res = snprintf(buf,
236 PAGE_SIZE,
237 "%u.%u\n",
238 scaling_factor_fp / PTIMER_FP_FACTOR,
239 scaling_factor_fp % PTIMER_FP_FACTOR);
240
241 return res;
242
243}
244
245static DEVICE_ATTR(ptimer_scale_factor,
246 S_IRUGO,
247 ptimer_scale_factor_show,
248 NULL);
249
250static ssize_t ptimer_ref_freq_show(struct device *dev,
251 struct device_attribute *attr,
252 char *buf)
253{
254 struct gk20a *g = get_gk20a(dev);
255 struct gk20a_platform *platform = dev_get_drvdata(dev);
256 u32 src_freq_hz = platform->ptimer_src_freq;
257 ssize_t res;
258
259 if (!src_freq_hz) {
260 nvgpu_err(g, "reference clk_m rate is not set correctly");
261 return -EINVAL;
262 }
263
264 res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ);
265
266 return res;
267
268}
269
270static DEVICE_ATTR(ptimer_ref_freq,
271 S_IRUGO,
272 ptimer_ref_freq_show,
273 NULL);
274
275static ssize_t ptimer_src_freq_show(struct device *dev,
276 struct device_attribute *attr,
277 char *buf)
278{
279 struct gk20a *g = get_gk20a(dev);
280 struct gk20a_platform *platform = dev_get_drvdata(dev);
281 u32 src_freq_hz = platform->ptimer_src_freq;
282 ssize_t res;
283
284 if (!src_freq_hz) {
285 nvgpu_err(g, "reference clk_m rate is not set correctly");
286 return -EINVAL;
287 }
288
289 res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz);
290
291 return res;
292
293}
294
295static DEVICE_ATTR(ptimer_src_freq,
296 S_IRUGO,
297 ptimer_src_freq_show,
298 NULL);
299
300
301#if defined(CONFIG_PM)
302static ssize_t railgate_enable_store(struct device *dev,
303 struct device_attribute *attr, const char *buf, size_t count)
304{
305 unsigned long railgate_enable = 0;
306 /* dev is guaranteed to be valid here. Ok to de-reference */
307 struct gk20a *g = get_gk20a(dev);
308 int err;
309
310 if (kstrtoul(buf, 10, &railgate_enable) < 0)
311 return -EINVAL;
312
313 if (railgate_enable && !g->can_railgate) {
314 g->can_railgate = true;
315 pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
316 } else if (railgate_enable == 0 && g->can_railgate) {
317 g->can_railgate = false;
318 pm_runtime_set_autosuspend_delay(dev, -1);
319 }
320 /* wake-up system to make rail-gating setting effective */
321 err = gk20a_busy(g);
322 if (err)
323 return err;
324 gk20a_idle(g);
325
326 nvgpu_info(g, "railgate is %s.", g->can_railgate ?
327 "enabled" : "disabled");
328
329 return count;
330}
331
332static ssize_t railgate_enable_read(struct device *dev,
333 struct device_attribute *attr, char *buf)
334{
335 struct gk20a *g = get_gk20a(dev);
336
337 return snprintf(buf, PAGE_SIZE, "%d\n", g->can_railgate ? 1 : 0);
338}
339
340static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read,
341 railgate_enable_store);
342#endif
343
344static ssize_t railgate_delay_store(struct device *dev,
345 struct device_attribute *attr,
346 const char *buf, size_t count)
347{
348 int railgate_delay = 0, ret = 0;
349 struct gk20a *g = get_gk20a(dev);
350 int err;
351
352 if (!g->can_railgate) {
353 nvgpu_info(g, "does not support power-gating");
354 return count;
355 }
356
357 ret = sscanf(buf, "%d", &railgate_delay);
358 if (ret == 1 && railgate_delay >= 0) {
359 g->railgate_delay = railgate_delay;
360 pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
361 } else
362 nvgpu_err(g, "Invalid powergate delay");
363
364 /* wake-up system to make rail-gating delay effective immediately */
365 err = gk20a_busy(g);
366 if (err)
367 return err;
368 gk20a_idle(g);
369
370 return count;
371}
372static ssize_t railgate_delay_show(struct device *dev,
373 struct device_attribute *attr, char *buf)
374{
375 struct gk20a *g = get_gk20a(dev);
376
377 return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay);
378}
379static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show,
380 railgate_delay_store);
381
382static ssize_t is_railgated_show(struct device *dev,
383 struct device_attribute *attr, char *buf)
384{
385 struct gk20a_platform *platform = dev_get_drvdata(dev);
386 bool is_railgated = 0;
387
388 if (platform->is_railgated)
389 is_railgated = platform->is_railgated(dev);
390
391 return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no");
392}
393static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL);
394
395static ssize_t counters_show(struct device *dev,
396 struct device_attribute *attr, char *buf)
397{
398 struct gk20a *g = get_gk20a(dev);
399 u32 busy_cycles, total_cycles;
400 ssize_t res;
401
402 nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
403
404 res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
405
406 return res;
407}
408static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL);
409
410static ssize_t counters_show_reset(struct device *dev,
411 struct device_attribute *attr, char *buf)
412{
413 ssize_t res = counters_show(dev, attr, buf);
414 struct gk20a *g = get_gk20a(dev);
415
416 nvgpu_pmu_reset_load_counters(g);
417
418 return res;
419}
420static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL);
421
422static ssize_t gk20a_load_show(struct device *dev,
423 struct device_attribute *attr,
424 char *buf)
425{
426 struct gk20a *g = get_gk20a(dev);
427 u32 busy_time;
428 ssize_t res;
429 int err;
430
431 if (!g->power_on) {
432 busy_time = 0;
433 } else {
434 err = gk20a_busy(g);
435 if (err)
436 return err;
437
438 nvgpu_pmu_load_update(g);
439 nvgpu_pmu_load_norm(g, &busy_time);
440 gk20a_idle(g);
441 }
442
443 res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
444
445 return res;
446}
447static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL);
448
449static ssize_t elpg_enable_store(struct device *dev,
450 struct device_attribute *attr, const char *buf, size_t count)
451{
452 struct gk20a *g = get_gk20a(dev);
453 unsigned long val = 0;
454 int err;
455
456 if (kstrtoul(buf, 10, &val) < 0)
457 return -EINVAL;
458
459 if (!g->power_on) {
460 g->elpg_enabled = val ? true : false;
461 } else {
462 err = gk20a_busy(g);
463 if (err)
464 return -EAGAIN;
465 /*
466 * Since elpg is refcounted, we should not unnecessarily call
467 * enable/disable if it is already so.
468 */
469 if (val && !g->elpg_enabled) {
470 g->elpg_enabled = true;
471 nvgpu_pmu_pg_global_enable(g, true);
472
473 } else if (!val && g->elpg_enabled) {
474 if (g->ops.pmu.pmu_pg_engines_feature_list &&
475 g->ops.pmu.pmu_pg_engines_feature_list(g,
476 PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
477 NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
478 nvgpu_pmu_pg_global_enable(g, false);
479 g->elpg_enabled = false;
480 } else {
481 g->elpg_enabled = false;
482 nvgpu_pmu_pg_global_enable(g, false);
483 }
484 }
485 gk20a_idle(g);
486 }
487 nvgpu_info(g, "ELPG is %s.", g->elpg_enabled ? "enabled" :
488 "disabled");
489
490 return count;
491}
492
493static ssize_t elpg_enable_read(struct device *dev,
494 struct device_attribute *attr, char *buf)
495{
496 struct gk20a *g = get_gk20a(dev);
497
498 return snprintf(buf, PAGE_SIZE, "%d\n", g->elpg_enabled ? 1 : 0);
499}
500
501static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store);
502
503static ssize_t ldiv_slowdown_factor_store(struct device *dev,
504 struct device_attribute *attr, const char *buf, size_t count)
505{
506 struct gk20a *g = get_gk20a(dev);
507 unsigned long val = 0;
508 int err;
509
510 if (kstrtoul(buf, 10, &val) < 0) {
511 nvgpu_err(g, "parse error for input SLOWDOWN factor\n");
512 return -EINVAL;
513 }
514
515 if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) {
516 nvgpu_err(g, "Invalid SLOWDOWN factor\n");
517 return -EINVAL;
518 }
519
520 if (val == g->ldiv_slowdown_factor)
521 return count;
522
523 if (!g->power_on) {
524 g->ldiv_slowdown_factor = val;
525 } else {
526 err = gk20a_busy(g);
527 if (err)
528 return -EAGAIN;
529
530 g->ldiv_slowdown_factor = val;
531
532 if (g->ops.pmu.pmu_pg_init_param)
533 g->ops.pmu.pmu_pg_init_param(g,
534 PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
535
536 gk20a_idle(g);
537 }
538
539 nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor);
540
541 return count;
542}
543
544static ssize_t ldiv_slowdown_factor_read(struct device *dev,
545 struct device_attribute *attr, char *buf)
546{
547 struct gk20a *g = get_gk20a(dev);
548
549 return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor);
550}
551
552static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW,
553 ldiv_slowdown_factor_read, ldiv_slowdown_factor_store);
554
555static ssize_t mscg_enable_store(struct device *dev,
556 struct device_attribute *attr, const char *buf, size_t count)
557{
558 struct gk20a *g = get_gk20a(dev);
559 struct nvgpu_pmu *pmu = &g->pmu;
560 unsigned long val = 0;
561 int err;
562
563 if (kstrtoul(buf, 10, &val) < 0)
564 return -EINVAL;
565
566 if (!g->power_on) {
567 g->mscg_enabled = val ? true : false;
568 } else {
569 err = gk20a_busy(g);
570 if (err)
571 return -EAGAIN;
572 /*
573 * Since elpg is refcounted, we should not unnecessarily call
574 * enable/disable if it is already so.
575 */
576 if (val && !g->mscg_enabled) {
577 g->mscg_enabled = true;
578 if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
579 PMU_PG_LPWR_FEATURE_MSCG)) {
580 if (!ACCESS_ONCE(pmu->mscg_stat)) {
581 WRITE_ONCE(pmu->mscg_stat,
582 PMU_MSCG_ENABLED);
583 /* make status visible */
584 smp_mb();
585 }
586 }
587
588 } else if (!val && g->mscg_enabled) {
589 if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
590 PMU_PG_LPWR_FEATURE_MSCG)) {
591 nvgpu_pmu_pg_global_enable(g, false);
592 WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
593 /* make status visible */
594 smp_mb();
595 g->mscg_enabled = false;
596 if (g->elpg_enabled)
597 nvgpu_pmu_pg_global_enable(g, true);
598 }
599 g->mscg_enabled = false;
600 }
601 gk20a_idle(g);
602 }
603 nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" :
604 "disabled");
605
606 return count;
607}
608
609static ssize_t mscg_enable_read(struct device *dev,
610 struct device_attribute *attr, char *buf)
611{
612 struct gk20a *g = get_gk20a(dev);
613
614 return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0);
615}
616
617static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store);
618
619static ssize_t aelpg_param_store(struct device *dev,
620 struct device_attribute *attr, const char *buf, size_t count)
621{
622 struct gk20a *g = get_gk20a(dev);
623 int status = 0;
624 union pmu_ap_cmd ap_cmd;
625 int *paramlist = (int *)g->pmu.aelpg_param;
626 u32 defaultparam[5] = {
627 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US,
628 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US,
629 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US,
630 APCTRL_POWER_BREAKEVEN_DEFAULT_US,
631 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT
632 };
633
634 /* Get each parameter value from input string*/
635 sscanf(buf, "%d %d %d %d %d", &paramlist[0], &paramlist[1],
636 &paramlist[2], &paramlist[3], &paramlist[4]);
637
638 /* If parameter value is 0 then reset to SW default values*/
639 if ((paramlist[0] | paramlist[1] | paramlist[2]
640 | paramlist[3] | paramlist[4]) == 0x00) {
641 memcpy(paramlist, defaultparam, sizeof(defaultparam));
642 }
643
644 /* If aelpg is enabled & pmu is ready then post values to
645 * PMU else store then post later
646 */
647 if (g->aelpg_enabled && g->pmu.pmu_ready) {
648 /* Disable AELPG */
649 ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
650 ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
651 status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
652
653 /* Enable AELPG */
654 nvgpu_aelpg_init(g);
655 nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
656 }
657
658 return count;
659}
660
661static ssize_t aelpg_param_read(struct device *dev,
662 struct device_attribute *attr, char *buf)
663{
664 struct gk20a *g = get_gk20a(dev);
665
666 return snprintf(buf, PAGE_SIZE,
667 "%d %d %d %d %d\n", g->pmu.aelpg_param[0],
668 g->pmu.aelpg_param[1], g->pmu.aelpg_param[2],
669 g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]);
670}
671
672static DEVICE_ATTR(aelpg_param, ROOTRW,
673 aelpg_param_read, aelpg_param_store);
674
675static ssize_t aelpg_enable_store(struct device *dev,
676 struct device_attribute *attr, const char *buf, size_t count)
677{
678 struct gk20a *g = get_gk20a(dev);
679 unsigned long val = 0;
680 int status = 0;
681 union pmu_ap_cmd ap_cmd;
682 int err;
683
684 if (kstrtoul(buf, 10, &val) < 0)
685 return -EINVAL;
686
687 err = gk20a_busy(g);
688 if (err)
689 return err;
690
691 if (g->pmu.pmu_ready) {
692 if (val && !g->aelpg_enabled) {
693 g->aelpg_enabled = true;
694 /* Enable AELPG */
695 ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL;
696 ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
697 status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
698 } else if (!val && g->aelpg_enabled) {
699 g->aelpg_enabled = false;
700 /* Disable AELPG */
701 ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
702 ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
703 status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
704 }
705 } else {
706 nvgpu_info(g, "PMU is not ready, AELPG request failed");
707 }
708 gk20a_idle(g);
709
710 nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" :
711 "disabled");
712
713 return count;
714}
715
716static ssize_t aelpg_enable_read(struct device *dev,
717 struct device_attribute *attr, char *buf)
718{
719 struct gk20a *g = get_gk20a(dev);
720
721 return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0);
722}
723
724static DEVICE_ATTR(aelpg_enable, ROOTRW,
725 aelpg_enable_read, aelpg_enable_store);
726
727
728static ssize_t allow_all_enable_read(struct device *dev,
729 struct device_attribute *attr, char *buf)
730{
731 struct gk20a *g = get_gk20a(dev);
732
733 return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0);
734}
735
736static ssize_t allow_all_enable_store(struct device *dev,
737 struct device_attribute *attr, const char *buf, size_t count)
738{
739 struct gk20a *g = get_gk20a(dev);
740 unsigned long val = 0;
741 int err;
742
743 if (kstrtoul(buf, 10, &val) < 0)
744 return -EINVAL;
745
746 err = gk20a_busy(g);
747 g->allow_all = (val ? true : false);
748 gk20a_idle(g);
749
750 return count;
751}
752
753static DEVICE_ATTR(allow_all, ROOTRW,
754 allow_all_enable_read, allow_all_enable_store);
755
756static ssize_t emc3d_ratio_store(struct device *dev,
757 struct device_attribute *attr, const char *buf, size_t count)
758{
759 struct gk20a *g = get_gk20a(dev);
760 unsigned long val = 0;
761
762 if (kstrtoul(buf, 10, &val) < 0)
763 return -EINVAL;
764
765 g->emc3d_ratio = val;
766
767 return count;
768}
769
770static ssize_t emc3d_ratio_read(struct device *dev,
771 struct device_attribute *attr, char *buf)
772{
773 struct gk20a *g = get_gk20a(dev);
774
775 return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio);
776}
777
778static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
779
780static ssize_t fmax_at_vmin_safe_read(struct device *dev,
781 struct device_attribute *attr, char *buf)
782{
783 struct gk20a *g = get_gk20a(dev);
784 unsigned long gpu_fmax_at_vmin_hz = 0;
785
786 if (g->ops.clk.get_fmax_at_vmin_safe)
787 gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g);
788
789 return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz));
790}
791
792static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL);
793
794#ifdef CONFIG_PM
795static ssize_t force_idle_store(struct device *dev,
796 struct device_attribute *attr, const char *buf, size_t count)
797{
798 struct gk20a *g = get_gk20a(dev);
799 unsigned long val = 0;
800 int err = 0;
801
802 if (kstrtoul(buf, 10, &val) < 0)
803 return -EINVAL;
804
805 if (val) {
806 if (g->forced_idle)
807 return count; /* do nothing */
808 else {
809 err = __gk20a_do_idle(g, false);
810 if (!err) {
811 g->forced_idle = 1;
812 nvgpu_info(g, "gpu is idle : %d",
813 g->forced_idle);
814 }
815 }
816 } else {
817 if (!g->forced_idle)
818 return count; /* do nothing */
819 else {
820 err = __gk20a_do_unidle(g);
821 if (!err) {
822 g->forced_idle = 0;
823 nvgpu_info(g, "gpu is idle : %d",
824 g->forced_idle);
825 }
826 }
827 }
828
829 return count;
830}
831
832static ssize_t force_idle_read(struct device *dev,
833 struct device_attribute *attr, char *buf)
834{
835 struct gk20a *g = get_gk20a(dev);
836
837 return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0);
838}
839
840static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
841#endif
842
843static ssize_t tpc_fs_mask_store(struct device *dev,
844 struct device_attribute *attr, const char *buf, size_t count)
845{
846 struct gk20a *g = get_gk20a(dev);
847 unsigned long val = 0;
848
849 if (kstrtoul(buf, 10, &val) < 0)
850 return -EINVAL;
851
852 if (!g->gr.gpc_tpc_mask)
853 return -ENODEV;
854
855 if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
856 g->gr.gpc_tpc_mask[0] = val;
857 g->tpc_fs_mask_user = val;
858
859 g->ops.gr.set_gpc_tpc_mask(g, 0);
860
861 nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image);
862 g->gr.ctx_vars.local_golden_image = NULL;
863 g->gr.ctx_vars.golden_image_initialized = false;
864 g->gr.ctx_vars.golden_image_size = 0;
865 /* Cause next poweron to reinit just gr */
866 g->gr.sw_ready = false;
867 }
868
869 return count;
870}
871
872static ssize_t tpc_fs_mask_read(struct device *dev,
873 struct device_attribute *attr, char *buf)
874{
875 struct gk20a *g = get_gk20a(dev);
876 struct gr_gk20a *gr = &g->gr;
877 u32 gpc_index;
878 u32 tpc_fs_mask = 0;
879 int err = 0;
880
881 err = gk20a_busy(g);
882 if (err)
883 return err;
884
885 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
886 if (g->ops.gr.get_gpc_tpc_mask)
887 tpc_fs_mask |=
888 g->ops.gr.get_gpc_tpc_mask(g, gpc_index) <<
889 (gr->max_tpc_per_gpc_count * gpc_index);
890 }
891
892 gk20a_idle(g);
893
894 return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask);
895}
896
897static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store);
898
899static ssize_t min_timeslice_us_read(struct device *dev,
900 struct device_attribute *attr, char *buf)
901{
902 struct gk20a *g = get_gk20a(dev);
903
904 return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us);
905}
906
907static ssize_t min_timeslice_us_store(struct device *dev,
908 struct device_attribute *attr, const char *buf, size_t count)
909{
910 struct gk20a *g = get_gk20a(dev);
911 unsigned long val;
912
913 if (kstrtoul(buf, 10, &val) < 0)
914 return -EINVAL;
915
916 if (val > g->max_timeslice_us)
917 return -EINVAL;
918
919 g->min_timeslice_us = val;
920
921 return count;
922}
923
924static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read,
925 min_timeslice_us_store);
926
927static ssize_t max_timeslice_us_read(struct device *dev,
928 struct device_attribute *attr, char *buf)
929{
930 struct gk20a *g = get_gk20a(dev);
931
932 return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us);
933}
934
935static ssize_t max_timeslice_us_store(struct device *dev,
936 struct device_attribute *attr, const char *buf, size_t count)
937{
938 struct gk20a *g = get_gk20a(dev);
939 unsigned long val;
940
941 if (kstrtoul(buf, 10, &val) < 0)
942 return -EINVAL;
943
944 if (val < g->min_timeslice_us)
945 return -EINVAL;
946
947 g->max_timeslice_us = val;
948
949 return count;
950}
951
952static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read,
953 max_timeslice_us_store);
954
955static ssize_t czf_bypass_store(struct device *dev,
956 struct device_attribute *attr, const char *buf, size_t count)
957{
958 struct gk20a *g = get_gk20a(dev);
959 unsigned long val;
960
961 if (kstrtoul(buf, 10, &val) < 0)
962 return -EINVAL;
963
964 if (val >= 4)
965 return -EINVAL;
966
967 g->gr.czf_bypass = val;
968
969 return count;
970}
971
972static ssize_t czf_bypass_read(struct device *dev,
973 struct device_attribute *attr, char *buf)
974{
975 struct gk20a *g = get_gk20a(dev);
976
977 return sprintf(buf, "%d\n", g->gr.czf_bypass);
978}
979
980static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
981
982static ssize_t pd_max_batches_store(struct device *dev,
983 struct device_attribute *attr, const char *buf, size_t count)
984{
985 struct gk20a *g = get_gk20a(dev);
986 unsigned long val;
987
988 if (kstrtoul(buf, 10, &val) < 0)
989 return -EINVAL;
990
991 if (val > 64)
992 return -EINVAL;
993
994 g->gr.pd_max_batches = val;
995
996 return count;
997}
998
999static ssize_t pd_max_batches_read(struct device *dev,
1000 struct device_attribute *attr, char *buf)
1001{
1002 struct gk20a *g = get_gk20a(dev);
1003
1004 return sprintf(buf, "%d\n", g->gr.pd_max_batches);
1005}
1006
1007static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store);
1008
1009static ssize_t gfxp_wfi_timeout_count_store(struct device *dev,
1010 struct device_attribute *attr, const char *buf, size_t count)
1011{
1012 struct gk20a *g = get_gk20a(dev);
1013 struct gr_gk20a *gr = &g->gr;
1014 unsigned long val = 0;
1015 int err = -1;
1016
1017 if (kstrtoul(buf, 10, &val) < 0)
1018 return -EINVAL;
1019
1020 if (g->ops.gr.get_max_gfxp_wfi_timeout_count) {
1021 if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g))
1022 return -EINVAL;
1023 }
1024
1025 gr->gfxp_wfi_timeout_count = val;
1026
1027 if (g->ops.gr.init_preemption_state && g->power_on) {
1028 err = gk20a_busy(g);
1029 if (err)
1030 return err;
1031
1032 err = gr_gk20a_elpg_protected_call(g,
1033 g->ops.gr.init_preemption_state(g));
1034
1035 gk20a_idle(g);
1036
1037 if (err)
1038 return err;
1039 }
1040 return count;
1041}
1042
1043static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev,
1044 struct device_attribute *attr, const char *buf, size_t count)
1045{
1046 struct gk20a *g = get_gk20a(dev);
1047 struct gr_gk20a *gr = &g->gr;
1048 int err = -1;
1049
1050 if (count > 0 && buf[0] == 's')
1051 /* sysclk */
1052 gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK;
1053 else
1054 /* usec */
1055 gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC;
1056
1057 if (g->ops.gr.init_preemption_state && g->power_on) {
1058 err = gk20a_busy(g);
1059 if (err)
1060 return err;
1061
1062 err = gr_gk20a_elpg_protected_call(g,
1063 g->ops.gr.init_preemption_state(g));
1064
1065 gk20a_idle(g);
1066
1067 if (err)
1068 return err;
1069 }
1070
1071 return count;
1072}
1073
1074static ssize_t gfxp_wfi_timeout_count_read(struct device *dev,
1075 struct device_attribute *attr, char *buf)
1076{
1077 struct gk20a *g = get_gk20a(dev);
1078 struct gr_gk20a *gr = &g->gr;
1079 u32 val = gr->gfxp_wfi_timeout_count;
1080
1081 return snprintf(buf, PAGE_SIZE, "%d\n", val);
1082}
1083
1084static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev,
1085 struct device_attribute *attr, char *buf)
1086{
1087 struct gk20a *g = get_gk20a(dev);
1088 struct gr_gk20a *gr = &g->gr;
1089
1090 if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC)
1091 return snprintf(buf, PAGE_SIZE, "usec\n");
1092 else
1093 return snprintf(buf, PAGE_SIZE, "sysclk\n");
1094}
1095
1096static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH),
1097 gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store);
1098
1099static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH),
1100 gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store);
1101
1102void nvgpu_remove_sysfs(struct device *dev)
1103{
1104 device_remove_file(dev, &dev_attr_elcg_enable);
1105 device_remove_file(dev, &dev_attr_blcg_enable);
1106 device_remove_file(dev, &dev_attr_slcg_enable);
1107 device_remove_file(dev, &dev_attr_ptimer_scale_factor);
1108 device_remove_file(dev, &dev_attr_ptimer_ref_freq);
1109 device_remove_file(dev, &dev_attr_ptimer_src_freq);
1110 device_remove_file(dev, &dev_attr_elpg_enable);
1111 device_remove_file(dev, &dev_attr_mscg_enable);
1112 device_remove_file(dev, &dev_attr_emc3d_ratio);
1113 device_remove_file(dev, &dev_attr_ldiv_slowdown_factor);
1114
1115 device_remove_file(dev, &dev_attr_fmax_at_vmin_safe);
1116
1117 device_remove_file(dev, &dev_attr_counters);
1118 device_remove_file(dev, &dev_attr_counters_reset);
1119 device_remove_file(dev, &dev_attr_load);
1120 device_remove_file(dev, &dev_attr_railgate_delay);
1121 device_remove_file(dev, &dev_attr_is_railgated);
1122#ifdef CONFIG_PM
1123 device_remove_file(dev, &dev_attr_force_idle);
1124 device_remove_file(dev, &dev_attr_railgate_enable);
1125#endif
1126 device_remove_file(dev, &dev_attr_aelpg_param);
1127 device_remove_file(dev, &dev_attr_aelpg_enable);
1128 device_remove_file(dev, &dev_attr_allow_all);
1129 device_remove_file(dev, &dev_attr_tpc_fs_mask);
1130 device_remove_file(dev, &dev_attr_min_timeslice_us);
1131 device_remove_file(dev, &dev_attr_max_timeslice_us);
1132
1133#ifdef CONFIG_TEGRA_GK20A_NVHOST
1134 nvgpu_nvhost_remove_symlink(get_gk20a(dev));
1135#endif
1136
1137 device_remove_file(dev, &dev_attr_czf_bypass);
1138 device_remove_file(dev, &dev_attr_pd_max_batches);
1139 device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count);
1140 device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
1141
1142 if (strcmp(dev_name(dev), "gpu.0")) {
1143 struct kobject *kobj = &dev->kobj;
1144 struct device *parent = container_of((kobj->parent),
1145 struct device, kobj);
1146 sysfs_remove_link(&parent->kobj, "gpu.0");
1147 }
1148}
1149
1150int nvgpu_create_sysfs(struct device *dev)
1151{
1152 struct gk20a *g = get_gk20a(dev);
1153 int error = 0;
1154
1155 error |= device_create_file(dev, &dev_attr_elcg_enable);
1156 error |= device_create_file(dev, &dev_attr_blcg_enable);
1157 error |= device_create_file(dev, &dev_attr_slcg_enable);
1158 error |= device_create_file(dev, &dev_attr_ptimer_scale_factor);
1159 error |= device_create_file(dev, &dev_attr_ptimer_ref_freq);
1160 error |= device_create_file(dev, &dev_attr_ptimer_src_freq);
1161 error |= device_create_file(dev, &dev_attr_elpg_enable);
1162 error |= device_create_file(dev, &dev_attr_mscg_enable);
1163 error |= device_create_file(dev, &dev_attr_emc3d_ratio);
1164 error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor);
1165
1166 error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe);
1167
1168 error |= device_create_file(dev, &dev_attr_counters);
1169 error |= device_create_file(dev, &dev_attr_counters_reset);
1170 error |= device_create_file(dev, &dev_attr_load);
1171 error |= device_create_file(dev, &dev_attr_railgate_delay);
1172 error |= device_create_file(dev, &dev_attr_is_railgated);
1173#ifdef CONFIG_PM
1174 error |= device_create_file(dev, &dev_attr_force_idle);
1175 error |= device_create_file(dev, &dev_attr_railgate_enable);
1176#endif
1177 error |= device_create_file(dev, &dev_attr_aelpg_param);
1178 error |= device_create_file(dev, &dev_attr_aelpg_enable);
1179 error |= device_create_file(dev, &dev_attr_allow_all);
1180 error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
1181 error |= device_create_file(dev, &dev_attr_min_timeslice_us);
1182 error |= device_create_file(dev, &dev_attr_max_timeslice_us);
1183
1184#ifdef CONFIG_TEGRA_GK20A_NVHOST
1185 error |= nvgpu_nvhost_create_symlink(g);
1186#endif
1187
1188 error |= device_create_file(dev, &dev_attr_czf_bypass);
1189 error |= device_create_file(dev, &dev_attr_pd_max_batches);
1190 error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count);
1191 error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
1192
1193 if (strcmp(dev_name(dev), "gpu.0")) {
1194 struct kobject *kobj = &dev->kobj;
1195 struct device *parent = container_of((kobj->parent),
1196 struct device, kobj);
1197 error |= sysfs_create_link(&parent->kobj,
1198 &dev->kobj, "gpu.0");
1199 }
1200
1201 if (error)
1202 nvgpu_err(g, "Failed to create sysfs attributes!\n");
1203
1204 return error;
1205}
diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.h b/drivers/gpu/nvgpu/common/linux/sysfs.h
deleted file mode 100644
index 80925844..00000000
--- a/drivers/gpu/nvgpu/common/linux/sysfs.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef NVGPU_SYSFS_H
17#define NVGPU_SYSFS_H
18
19struct device;
20
21int nvgpu_create_sysfs(struct device *dev);
22void nvgpu_remove_sysfs(struct device *dev);
23
24#endif
diff --git a/drivers/gpu/nvgpu/common/linux/thread.c b/drivers/gpu/nvgpu/common/linux/thread.c
deleted file mode 100644
index 92c556f2..00000000
--- a/drivers/gpu/nvgpu/common/linux/thread.c
+++ /dev/null
@@ -1,63 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kthread.h>
18
19#include <nvgpu/thread.h>
20
21int nvgpu_thread_proxy(void *threaddata)
22{
23 struct nvgpu_thread *thread = threaddata;
24 int ret = thread->fn(thread->data);
25
26 thread->running = false;
27 return ret;
28}
29
30int nvgpu_thread_create(struct nvgpu_thread *thread,
31 void *data,
32 int (*threadfn)(void *data), const char *name)
33{
34 struct task_struct *task = kthread_create(nvgpu_thread_proxy,
35 thread, name);
36 if (IS_ERR(task))
37 return PTR_ERR(task);
38
39 thread->task = task;
40 thread->fn = threadfn;
41 thread->data = data;
42 thread->running = true;
43 wake_up_process(task);
44 return 0;
45};
46
47void nvgpu_thread_stop(struct nvgpu_thread *thread)
48{
49 if (thread->task) {
50 kthread_stop(thread->task);
51 thread->task = NULL;
52 }
53};
54
55bool nvgpu_thread_should_stop(struct nvgpu_thread *thread)
56{
57 return kthread_should_stop();
58};
59
60bool nvgpu_thread_is_running(struct nvgpu_thread *thread)
61{
62 return ACCESS_ONCE(thread->running);
63};
diff --git a/drivers/gpu/nvgpu/common/linux/timers.c b/drivers/gpu/nvgpu/common/linux/timers.c
deleted file mode 100644
index d1aa641f..00000000
--- a/drivers/gpu/nvgpu/common/linux/timers.c
+++ /dev/null
@@ -1,270 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/ktime.h>
18#include <linux/delay.h>
19
20#include <nvgpu/timers.h>
21#include <nvgpu/soc.h>
22
23#include "gk20a/gk20a.h"
24
25#include "platform_gk20a.h"
26
27/*
28 * Returns 1 if the platform is pre-Si and should ignore the timeout checking.
29 * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the
30 * timeout check regardless of platform).
31 */
32static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout)
33{
34 if (timeout->flags & NVGPU_TIMER_NO_PRE_SI)
35 return 0;
36
37 return !nvgpu_platform_is_silicon(timeout->g);
38}
39
40/**
41 * nvgpu_timeout_init - Init timer.
42 *
43 * @g - nvgpu device.
44 * @timeout - The timer.
45 * @duration - Timeout in milliseconds or number of retries.
46 * @flags - Flags for timer.
47 *
48 * This configures the timeout to start the timeout duration now, i.e: when this
49 * function is called. Available flags to pass to @flags:
50 *
51 * %NVGPU_TIMER_CPU_TIMER
52 * %NVGPU_TIMER_RETRY_TIMER
53 * %NVGPU_TIMER_NO_PRE_SI
54 * %NVGPU_TIMER_SILENT_TIMEOUT
55 *
56 * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then
57 * a CPU timer is used by default.
58 */
59int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout,
60 u32 duration, unsigned long flags)
61{
62 if (flags & ~NVGPU_TIMER_FLAG_MASK)
63 return -EINVAL;
64
65 memset(timeout, 0, sizeof(*timeout));
66
67 timeout->g = g;
68 timeout->flags = flags;
69
70 if (flags & NVGPU_TIMER_RETRY_TIMER)
71 timeout->retries.max = duration;
72 else
73 timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(),
74 (s64)NSEC_PER_MSEC * duration));
75
76 return 0;
77}
78
79static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout,
80 void *caller,
81 const char *fmt, va_list args)
82{
83 struct gk20a *g = timeout->g;
84 ktime_t now = ktime_get();
85
86 if (nvgpu_timeout_is_pre_silicon(timeout))
87 return 0;
88
89 if (ktime_after(now, ns_to_ktime(timeout->time))) {
90 if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
91 char buf[128];
92
93 vsnprintf(buf, sizeof(buf), fmt, args);
94
95 nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf);
96 }
97
98 return -ETIMEDOUT;
99 }
100
101 return 0;
102}
103
104static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout,
105 void *caller,
106 const char *fmt, va_list args)
107{
108 struct gk20a *g = timeout->g;
109
110 if (nvgpu_timeout_is_pre_silicon(timeout))
111 return 0;
112
113 if (timeout->retries.attempted >= timeout->retries.max) {
114 if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
115 char buf[128];
116
117 vsnprintf(buf, sizeof(buf), fmt, args);
118
119 nvgpu_err(g, "No more retries @ %pF %s", caller, buf);
120 }
121
122 return -ETIMEDOUT;
123 }
124
125 timeout->retries.attempted++;
126
127 return 0;
128}
129
130/**
131 * __nvgpu_timeout_expired_msg - Check if a timeout has expired.
132 *
133 * @timeout - The timeout to check.
134 * @caller - Address of the caller of this function.
135 * @fmt - The fmt string.
136 *
137 * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise.
138 *
139 * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout
140 * then a message is printed based on %fmt.
141 */
142int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout,
143 void *caller, const char *fmt, ...)
144{
145 int ret;
146 va_list args;
147
148 va_start(args, fmt);
149 if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
150 ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt,
151 args);
152 else
153 ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt,
154 args);
155 va_end(args);
156
157 return ret;
158}
159
160/**
161 * nvgpu_timeout_peek_expired - Check the status of a timeout.
162 *
163 * @timeout - The timeout to check.
164 *
165 * Returns non-zero if the timeout is expired, zero otherwise. In the case of
166 * retry timers this will not increment the underlying retry count. Also if the
167 * timer has expired no messages will be printed.
168 *
169 * This function honors the pre-Si check as well.
170 */
171int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout)
172{
173 if (nvgpu_timeout_is_pre_silicon(timeout))
174 return 0;
175
176 if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
177 return timeout->retries.attempted >= timeout->retries.max;
178 else
179 return ktime_after(ktime_get(), ns_to_ktime(timeout->time));
180}
181
182/**
183 * nvgpu_udelay - Delay for some number of microseconds.
184 *
185 * @usecs - Microseconds to wait for.
186 *
187 * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly
188 * accurate. This is normally backed by a busy-loop so this means waits should
189 * be kept short, below 100us. If longer delays are necessary then
190 * nvgpu_msleep() should be preferred.
191 *
192 * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This
193 * function will attempt to not use a busy-loop.
194 */
195void nvgpu_udelay(unsigned int usecs)
196{
197 udelay(usecs);
198}
199
200/**
201 * nvgpu_usleep_range - Sleep for a range of microseconds.
202 *
203 * @min_us - Minimum wait time.
204 * @max_us - Maximum wait time.
205 *
206 * Wait for some number of microseconds between @min_us and @max_us. This,
207 * unlike nvgpu_udelay(), will attempt to sleep for the passed number of
208 * microseconds instead of busy looping. Not all platforms support this,
209 * and in that case this reduces to nvgpu_udelay(min_us).
210 *
211 * Linux note: this is not safe to use in atomic context. If you are in
212 * atomic context you must use nvgpu_udelay().
213 */
214void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us)
215{
216 usleep_range(min_us, max_us);
217}
218
219/**
220 * nvgpu_msleep - Sleep for some milliseconds.
221 *
222 * @msecs - Sleep for at least this many milliseconds.
223 *
224 * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms
225 * or so) the sleep will be significantly longer due to scheduling overhead and
226 * mechanics.
227 */
228void nvgpu_msleep(unsigned int msecs)
229{
230 msleep(msecs);
231}
232
233/**
234 * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock.
235 *
236 * Return a clock in millisecond units. The start time of the clock is
237 * unspecified; the time returned can be compared with older ones to measure
238 * durations. The source clock does not jump when the system clock is adjusted.
239 */
240s64 nvgpu_current_time_ms(void)
241{
242 return ktime_to_ms(ktime_get());
243}
244
245/**
246 * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock.
247 *
248 * Return a clock in nanosecond units. The start time of the clock is
249 * unspecified; the time returned can be compared with older ones to measure
250 * durations. The source clock does not jump when the system clock is adjusted.
251 */
252s64 nvgpu_current_time_ns(void)
253{
254 return ktime_to_ns(ktime_get());
255}
256
257/**
258 * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp.
259 *
260 * Return a "high resolution" time stamp. It does not really matter exactly what
261 * it is, so long as it generally returns unique values and monotonically
262 * increases - wrap around _is_ possible though in a system running for long
263 * enough.
264 *
265 * Note: what high resolution means is system dependent.
266 */
267u64 nvgpu_hr_timestamp(void)
268{
269 return get_cycles();
270}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
deleted file mode 100644
index 0858e6b1..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
+++ /dev/null
@@ -1,168 +0,0 @@
1/*
2 * Virtualized GPU Clock Interface
3 *
4 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <nvgpu/vgpu/vgpu.h>
20
21#include "gk20a/gk20a.h"
22#include "clk_vgpu.h"
23#include "ctrl/ctrlclk.h"
24#include "common/linux/platform_gk20a.h"
25
26static unsigned long
27vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE];
28
29static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain)
30{
31 struct tegra_vgpu_cmd_msg msg = {};
32 struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
33 int err;
34 unsigned long ret = 0;
35
36 nvgpu_log_fn(g, " ");
37
38 switch (api_domain) {
39 case CTRL_CLK_DOMAIN_GPCCLK:
40 msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE;
41 msg.handle = vgpu_get_handle(g);
42 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
43 err = err ? err : msg.ret;
44 if (err)
45 nvgpu_err(g, "%s failed - %d", __func__, err);
46 else
47 /* return frequency in Hz */
48 ret = p->rate * 1000;
49 break;
50 case CTRL_CLK_DOMAIN_PWRCLK:
51 nvgpu_err(g, "unsupported clock: %u", api_domain);
52 break;
53 default:
54 nvgpu_err(g, "unknown clock: %u", api_domain);
55 break;
56 }
57
58 return ret;
59}
60
61static int vgpu_clk_set_rate(struct gk20a *g,
62 u32 api_domain, unsigned long rate)
63{
64 struct tegra_vgpu_cmd_msg msg = {};
65 struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
66 int err = -EINVAL;
67
68 nvgpu_log_fn(g, " ");
69
70 switch (api_domain) {
71 case CTRL_CLK_DOMAIN_GPCCLK:
72 msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
73 msg.handle = vgpu_get_handle(g);
74
75 /* server dvfs framework requires frequency in kHz */
76 p->rate = (u32)(rate / 1000);
77 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
78 err = err ? err : msg.ret;
79 if (err)
80 nvgpu_err(g, "%s failed - %d", __func__, err);
81 break;
82 case CTRL_CLK_DOMAIN_PWRCLK:
83 nvgpu_err(g, "unsupported clock: %u", api_domain);
84 break;
85 default:
86 nvgpu_err(g, "unknown clock: %u", api_domain);
87 break;
88 }
89
90 return err;
91}
92
93static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain)
94{
95 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
96
97 return priv->constants.max_freq;
98}
99
100void vgpu_init_clk_support(struct gk20a *g)
101{
102 g->ops.clk.get_rate = vgpu_clk_get_rate;
103 g->ops.clk.set_rate = vgpu_clk_set_rate;
104 g->ops.clk.get_maxrate = vgpu_clk_get_maxrate;
105}
106
107long vgpu_clk_round_rate(struct device *dev, unsigned long rate)
108{
109 /* server will handle frequency rounding */
110 return rate;
111}
112
113int vgpu_clk_get_freqs(struct device *dev,
114 unsigned long **freqs, int *num_freqs)
115{
116 struct gk20a_platform *platform = gk20a_get_platform(dev);
117 struct gk20a *g = platform->g;
118 struct tegra_vgpu_cmd_msg msg = {};
119 struct tegra_vgpu_get_gpu_freq_table_params *p =
120 &msg.params.get_gpu_freq_table;
121 unsigned int i;
122 int err;
123
124 nvgpu_log_fn(g, " ");
125
126 msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE;
127 msg.handle = vgpu_get_handle(g);
128
129 p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE;
130 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
131 err = err ? err : msg.ret;
132 if (err) {
133 nvgpu_err(g, "%s failed - %d", __func__, err);
134 return err;
135 }
136
137 /* return frequency in Hz */
138 for (i = 0; i < p->num_freqs; i++)
139 vgpu_freq_table[i] = p->freqs[i] * 1000;
140
141 *freqs = vgpu_freq_table;
142 *num_freqs = p->num_freqs;
143
144 return 0;
145}
146
147int vgpu_clk_cap_rate(struct device *dev, unsigned long rate)
148{
149 struct gk20a_platform *platform = gk20a_get_platform(dev);
150 struct gk20a *g = platform->g;
151 struct tegra_vgpu_cmd_msg msg = {};
152 struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
153 int err = 0;
154
155 nvgpu_log_fn(g, " ");
156
157 msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE;
158 msg.handle = vgpu_get_handle(g);
159 p->rate = (u32)rate;
160 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
161 err = err ? err : msg.ret;
162 if (err) {
163 nvgpu_err(g, "%s failed - %d", __func__, err);
164 return err;
165 }
166
167 return 0;
168}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
deleted file mode 100644
index 8d477643..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Virtualized GPU Clock Interface
3 *
4 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CLK_VIRT_H_
20#define _CLK_VIRT_H_
21
22void vgpu_init_clk_support(struct gk20a *g);
23long vgpu_clk_round_rate(struct device *dev, unsigned long rate);
24int vgpu_clk_get_freqs(struct device *dev,
25 unsigned long **freqs, int *num_freqs);
26int vgpu_clk_cap_rate(struct device *dev, unsigned long rate);
27#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
deleted file mode 100644
index 499a8eb4..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
+++ /dev/null
@@ -1,224 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <uapi/linux/nvgpu.h>
18
19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h>
21#include <nvgpu/enabled.h>
22#include <nvgpu/ctxsw_trace.h>
23#include <nvgpu/vgpu/vgpu_ivm.h>
24#include <nvgpu/vgpu/tegra_vgpu.h>
25#include <nvgpu/vgpu/vgpu.h>
26
27#include "gk20a/gk20a.h"
28#include "common/linux/os_linux.h"
29#include "vgpu/fecs_trace_vgpu.h"
30
31struct vgpu_fecs_trace {
32 struct tegra_hv_ivm_cookie *cookie;
33 struct nvgpu_ctxsw_ring_header *header;
34 struct nvgpu_ctxsw_trace_entry *entries;
35 int num_entries;
36 bool enabled;
37 void *buf;
38};
39
40int vgpu_fecs_trace_init(struct gk20a *g)
41{
42 struct device *dev = dev_from_gk20a(g);
43 struct device_node *np = dev->of_node;
44 struct of_phandle_args args;
45 struct vgpu_fecs_trace *vcst;
46 u32 mempool;
47 int err;
48
49 nvgpu_log_fn(g, " ");
50
51 vcst = nvgpu_kzalloc(g, sizeof(*vcst));
52 if (!vcst)
53 return -ENOMEM;
54
55 err = of_parse_phandle_with_fixed_args(np,
56 "mempool-fecs-trace", 1, 0, &args);
57 if (err) {
58 nvgpu_info(g, "does not support fecs trace");
59 goto fail;
60 }
61 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
62
63 mempool = args.args[0];
64 vcst->cookie = vgpu_ivm_mempool_reserve(mempool);
65 if (IS_ERR(vcst->cookie)) {
66 nvgpu_info(g,
67 "mempool %u reserve failed", mempool);
68 vcst->cookie = NULL;
69 err = -EINVAL;
70 goto fail;
71 }
72
73 vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie),
74 vgpu_ivm_get_size(vcst->cookie));
75 if (!vcst->buf) {
76 nvgpu_info(g, "ioremap_cache failed");
77 err = -EINVAL;
78 goto fail;
79 }
80 vcst->header = vcst->buf;
81 vcst->num_entries = vcst->header->num_ents;
82 if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) {
83 nvgpu_err(g, "entry size mismatch");
84 goto fail;
85 }
86 vcst->entries = vcst->buf + sizeof(*vcst->header);
87 g->fecs_trace = (struct gk20a_fecs_trace *)vcst;
88
89 return 0;
90fail:
91 iounmap(vcst->buf);
92 if (vcst->cookie)
93 vgpu_ivm_mempool_unreserve(vcst->cookie);
94 nvgpu_kfree(g, vcst);
95 return err;
96}
97
98int vgpu_fecs_trace_deinit(struct gk20a *g)
99{
100 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
101
102 iounmap(vcst->buf);
103 vgpu_ivm_mempool_unreserve(vcst->cookie);
104 nvgpu_kfree(g, vcst);
105 return 0;
106}
107
108int vgpu_fecs_trace_enable(struct gk20a *g)
109{
110 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
111 struct tegra_vgpu_cmd_msg msg = {
112 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE,
113 .handle = vgpu_get_handle(g),
114 };
115 int err;
116
117 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
118 err = err ? err : msg.ret;
119 WARN_ON(err);
120 vcst->enabled = !err;
121 return err;
122}
123
124int vgpu_fecs_trace_disable(struct gk20a *g)
125{
126 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
127 struct tegra_vgpu_cmd_msg msg = {
128 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE,
129 .handle = vgpu_get_handle(g),
130 };
131 int err;
132
133 vcst->enabled = false;
134 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
135 err = err ? err : msg.ret;
136 WARN_ON(err);
137 return err;
138}
139
140bool vgpu_fecs_trace_is_enabled(struct gk20a *g)
141{
142 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
143
144 return (vcst && vcst->enabled);
145}
146
147int vgpu_fecs_trace_poll(struct gk20a *g)
148{
149 struct tegra_vgpu_cmd_msg msg = {
150 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL,
151 .handle = vgpu_get_handle(g),
152 };
153 int err;
154
155 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
156 err = err ? err : msg.ret;
157 WARN_ON(err);
158 return err;
159}
160
161int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size)
162{
163 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
164
165 *buf = vcst->buf;
166 *size = vgpu_ivm_get_size(vcst->cookie);
167 return 0;
168}
169
170int vgpu_free_user_buffer(struct gk20a *g)
171{
172 return 0;
173}
174
175int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma)
176{
177 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
178 unsigned long size = vgpu_ivm_get_size(vcst->cookie);
179 unsigned long vsize = vma->vm_end - vma->vm_start;
180
181 size = min(size, vsize);
182 size = round_up(size, PAGE_SIZE);
183
184 return remap_pfn_range(vma, vma->vm_start,
185 vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT,
186 size,
187 vma->vm_page_prot);
188}
189
190#ifdef CONFIG_GK20A_CTXSW_TRACE
191int vgpu_fecs_trace_max_entries(struct gk20a *g,
192 struct nvgpu_ctxsw_trace_filter *filter)
193{
194 struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
195
196 return vcst->header->num_ents;
197}
198
199#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE
200#error "FECS trace filter size mismatch!"
201#endif
202
203int vgpu_fecs_trace_set_filter(struct gk20a *g,
204 struct nvgpu_ctxsw_trace_filter *filter)
205{
206 struct tegra_vgpu_cmd_msg msg = {
207 .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER,
208 .handle = vgpu_get_handle(g),
209 };
210 struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter;
211 int err;
212
213 memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits));
214 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
215 err = err ? err : msg.ret;
216 WARN_ON(err);
217 return err;
218}
219
220void vgpu_fecs_trace_data_update(struct gk20a *g)
221{
222 gk20a_ctxsw_trace_wake_up(g, 0);
223}
224#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
deleted file mode 100644
index 054b019b..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
+++ /dev/null
@@ -1,97 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include "gk20a/gk20a.h"
18#include "common/linux/vgpu/clk_vgpu.h"
19#include "common/linux/platform_gk20a.h"
20#include "common/linux/os_linux.h"
21
22#include <nvgpu/nvhost.h>
23
24#include <linux/platform_device.h>
25
26static int gv11b_vgpu_probe(struct device *dev)
27{
28 struct platform_device *pdev = to_platform_device(dev);
29 struct gk20a_platform *platform = dev_get_drvdata(dev);
30 struct resource *r;
31 void __iomem *regs;
32 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g);
33 struct gk20a *g = platform->g;
34 int ret;
35
36 r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode");
37 if (!r) {
38 nvgpu_err(g, "failed to get usermode regs");
39 return -ENXIO;
40 }
41 regs = devm_ioremap_resource(dev, r);
42 if (IS_ERR(regs)) {
43 nvgpu_err(g, "failed to map usermode regs");
44 return PTR_ERR(regs);
45 }
46 l->usermode_regs = regs;
47
48#ifdef CONFIG_TEGRA_GK20A_NVHOST
49 ret = nvgpu_get_nvhost_dev(g);
50 if (ret) {
51 l->usermode_regs = NULL;
52 return ret;
53 }
54
55 ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev,
56 &g->syncpt_unit_base,
57 &g->syncpt_unit_size);
58 if (ret) {
59 nvgpu_err(g, "Failed to get syncpt interface");
60 return -ENOSYS;
61 }
62 g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
63 nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
64 g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size);
65#endif
66 vgpu_init_clk_support(platform->g);
67
68 return 0;
69}
70
71struct gk20a_platform gv11b_vgpu_tegra_platform = {
72 .has_syncpoints = true,
73
74 /* power management configuration */
75 .can_railgate_init = false,
76 .can_elpg_init = false,
77 .enable_slcg = false,
78 .enable_blcg = false,
79 .enable_elcg = false,
80 .enable_elpg = false,
81 .enable_aelpg = false,
82 .can_slcg = false,
83 .can_blcg = false,
84 .can_elcg = false,
85
86 .ch_wdt_timeout_ms = 5000,
87
88 .probe = gv11b_vgpu_probe,
89
90 .clk_round_rate = vgpu_clk_round_rate,
91 .get_clk_freqs = vgpu_clk_get_freqs,
92
93 /* frequency scaling configuration */
94 .devfreq_governor = "userspace",
95
96 .virtual_dev = true,
97};
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
deleted file mode 100644
index 830b04ac..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * Tegra Virtualized GPU Platform Interface
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "gk20a/gk20a.h"
20#include "common/linux/platform_gk20a.h"
21#include "clk_vgpu.h"
22
23#include <nvgpu/nvhost.h>
24
25static int gk20a_tegra_probe(struct device *dev)
26{
27#ifdef CONFIG_TEGRA_GK20A_NVHOST
28 struct gk20a_platform *platform = dev_get_drvdata(dev);
29 int ret;
30
31 ret = nvgpu_get_nvhost_dev(platform->g);
32 if (ret)
33 return ret;
34
35 vgpu_init_clk_support(platform->g);
36 return 0;
37#else
38 return 0;
39#endif
40}
41
42struct gk20a_platform vgpu_tegra_platform = {
43 .has_syncpoints = true,
44 .aggressive_sync_destroy_thresh = 64,
45
46 /* power management configuration */
47 .can_railgate_init = false,
48 .can_elpg_init = false,
49 .enable_slcg = false,
50 .enable_blcg = false,
51 .enable_elcg = false,
52 .enable_elpg = false,
53 .enable_aelpg = false,
54 .can_slcg = false,
55 .can_blcg = false,
56 .can_elcg = false,
57
58 .ch_wdt_timeout_ms = 5000,
59
60 .probe = gk20a_tegra_probe,
61
62 .clk_round_rate = vgpu_clk_round_rate,
63 .get_clk_freqs = vgpu_clk_get_freqs,
64
65 /* frequency scaling configuration */
66 .devfreq_governor = "userspace",
67
68 .virtual_dev = true,
69};
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
deleted file mode 100644
index 5a8ed9fd..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
+++ /dev/null
@@ -1,50 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/device.h>
18#include <nvgpu/vgpu/vgpu.h>
19
20#include "common/linux/platform_gk20a.h"
21
22static ssize_t vgpu_load_show(struct device *dev,
23 struct device_attribute *attr,
24 char *buf)
25{
26 struct gk20a *g = get_gk20a(dev);
27 struct tegra_vgpu_cmd_msg msg = {0};
28 struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load;
29 int err;
30
31 msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD;
32 msg.handle = vgpu_get_handle(g);
33 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
34 if (err)
35 return err;
36
37 return snprintf(buf, PAGE_SIZE, "%u\n", p->load);
38}
39static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
40
41void vgpu_create_sysfs(struct device *dev)
42{
43 if (device_create_file(dev, &dev_attr_load))
44 dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
45}
46
47void vgpu_remove_sysfs(struct device *dev)
48{
49 device_remove_file(dev, &dev_attr_load);
50}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c
deleted file mode 100644
index b28b5013..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/types.h>
18#include <linux/tegra_gr_comm.h>
19
20#include "common/linux/os_linux.h"
21
22int vgpu_ivc_init(struct gk20a *g, u32 elems,
23 const size_t *queue_sizes, u32 queue_start, u32 num_queues)
24{
25 struct platform_device *pdev = to_platform_device(dev_from_gk20a(g));
26
27 return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start,
28 num_queues);
29}
30
31void vgpu_ivc_deinit(u32 queue_start, u32 num_queues)
32{
33 tegra_gr_comm_deinit(queue_start, num_queues);
34}
35
36void vgpu_ivc_release(void *handle)
37{
38 tegra_gr_comm_release(handle);
39}
40
41u32 vgpu_ivc_get_server_vmid(void)
42{
43 return tegra_gr_comm_get_server_vmid();
44}
45
46int vgpu_ivc_recv(u32 index, void **handle, void **data,
47 size_t *size, u32 *sender)
48{
49 return tegra_gr_comm_recv(index, handle, data, size, sender);
50}
51
52int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size)
53{
54 return tegra_gr_comm_send(peer, index, data, size);
55}
56
57int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle,
58 void **data, size_t *size)
59{
60 return tegra_gr_comm_sendrecv(peer, index, handle, data, size);
61}
62
63u32 vgpu_ivc_get_peer_self(void)
64{
65 return TEGRA_GR_COMM_ID_SELF;
66}
67
68void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr,
69 size_t *size)
70{
71 return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size);
72}
73
74void vgpu_ivc_oob_put_ptr(void *handle)
75{
76 tegra_gr_comm_oob_put_ptr(handle);
77}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c
deleted file mode 100644
index 90089de8..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/vgpu/vgpu_ivm.h>
18
19#include <linux/tegra-ivc.h>
20
21#include "common/linux/os_linux.h"
22
23struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id)
24{
25 return tegra_hv_mempool_reserve(id);
26}
27
28int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie)
29{
30 return tegra_hv_mempool_unreserve(cookie);
31}
32
33u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie)
34{
35 return cookie->ipa;
36}
37
38u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie)
39{
40 return cookie->size;
41}
42
43void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie)
44{
45 return ioremap_cache(vgpu_ivm_get_ipa(cookie),
46 vgpu_ivm_get_size(cookie));
47}
48
49void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie,
50 void *addr)
51{
52 iounmap(addr);
53}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
deleted file mode 100644
index 0d224eb9..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
+++ /dev/null
@@ -1,475 +0,0 @@
1/*
2 * Virtualized GPU for Linux
3 *
4 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/mm.h>
20#include <linux/dma-mapping.h>
21#include <linux/pm_runtime.h>
22#include <linux/pm_qos.h>
23#include <linux/platform_device.h>
24#include <soc/tegra/chip-id.h>
25
26#include <nvgpu/kmem.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/enabled.h>
29#include <nvgpu/debug.h>
30#include <nvgpu/soc.h>
31#include <nvgpu/ctxsw_trace.h>
32#include <nvgpu/defaults.h>
33#include <nvgpu/ltc.h>
34
35#include "vgpu_linux.h"
36#include "vgpu/fecs_trace_vgpu.h"
37#include "clk_vgpu.h"
38#include "gk20a/tsg_gk20a.h"
39#include "gk20a/channel_gk20a.h"
40#include "gk20a/regops_gk20a.h"
41#include "gm20b/hal_gm20b.h"
42
43#include "common/linux/module.h"
44#include "common/linux/os_linux.h"
45#include "common/linux/ioctl.h"
46#include "common/linux/scale.h"
47#include "common/linux/driver_common.h"
48#include "common/linux/platform_gk20a.h"
49
50#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
51
52struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g)
53{
54 struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g));
55
56 return (struct vgpu_priv_data *)plat->vgpu_priv;
57}
58
59static void vgpu_remove_support(struct gk20a *g)
60{
61 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
62
63 vgpu_remove_support_common(g);
64
65 /* free mappings to registers, etc*/
66
67 if (l->bar1) {
68 iounmap(l->bar1);
69 l->bar1 = NULL;
70 }
71}
72
73static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
74{
75 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
76
77 nvgpu_mutex_init(&g->poweron_lock);
78 nvgpu_mutex_init(&g->poweroff_lock);
79 nvgpu_mutex_init(&g->ctxsw_disable_lock);
80 l->regs_saved = l->regs;
81 l->bar1_saved = l->bar1;
82
83 g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
84 g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
85 g->has_syncpoints = platform->has_syncpoints;
86 g->ptimer_src_freq = platform->ptimer_src_freq;
87 g->can_railgate = platform->can_railgate_init;
88 g->railgate_delay = platform->railgate_delay_init;
89
90 __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
91 platform->unify_address_spaces);
92}
93
94static int vgpu_init_support(struct platform_device *pdev)
95{
96 struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
97 struct gk20a *g = get_gk20a(&pdev->dev);
98 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
99 void __iomem *regs;
100 int err = 0;
101
102 if (!r) {
103 nvgpu_err(g, "failed to get gk20a bar1");
104 err = -ENXIO;
105 goto fail;
106 }
107
108 if (r->name && !strcmp(r->name, "/vgpu")) {
109 regs = devm_ioremap_resource(&pdev->dev, r);
110 if (IS_ERR(regs)) {
111 nvgpu_err(g, "failed to remap gk20a bar1");
112 err = PTR_ERR(regs);
113 goto fail;
114 }
115 l->bar1 = regs;
116 l->bar1_mem = r;
117 }
118
119 nvgpu_mutex_init(&g->dbg_sessions_lock);
120 nvgpu_mutex_init(&g->client_lock);
121
122 nvgpu_init_list_node(&g->profiler_objects);
123
124 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
125 if (!g->dbg_regops_tmp_buf) {
126 nvgpu_err(g, "couldn't allocate regops tmp buf");
127 return -ENOMEM;
128 }
129 g->dbg_regops_tmp_buf_ops =
130 SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
131
132 g->remove_support = vgpu_remove_support;
133 return 0;
134
135 fail:
136 vgpu_remove_support(g);
137 return err;
138}
139
140int vgpu_pm_prepare_poweroff(struct device *dev)
141{
142 struct gk20a *g = get_gk20a(dev);
143 int ret = 0;
144
145 nvgpu_log_fn(g, " ");
146
147 if (!g->power_on)
148 return 0;
149
150 ret = gk20a_channel_suspend(g);
151 if (ret)
152 return ret;
153
154 g->power_on = false;
155
156 return ret;
157}
158
159int vgpu_pm_finalize_poweron(struct device *dev)
160{
161 struct gk20a *g = get_gk20a(dev);
162 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
163 int err;
164
165 nvgpu_log_fn(g, " ");
166
167 if (g->power_on)
168 return 0;
169
170 g->power_on = true;
171
172 vgpu_detect_chip(g);
173 err = vgpu_init_hal(g);
174 if (err)
175 goto done;
176
177 if (g->ops.ltc.init_fs_state)
178 g->ops.ltc.init_fs_state(g);
179
180 err = nvgpu_init_ltc_support(g);
181 if (err) {
182 nvgpu_err(g, "failed to init ltc");
183 goto done;
184 }
185
186 err = vgpu_init_mm_support(g);
187 if (err) {
188 nvgpu_err(g, "failed to init gk20a mm");
189 goto done;
190 }
191
192 err = vgpu_init_fifo_support(g);
193 if (err) {
194 nvgpu_err(g, "failed to init gk20a fifo");
195 goto done;
196 }
197
198 err = vgpu_init_gr_support(g);
199 if (err) {
200 nvgpu_err(g, "failed to init gk20a gr");
201 goto done;
202 }
203
204 err = g->ops.chip_init_gpu_characteristics(g);
205 if (err) {
206 nvgpu_err(g, "failed to init gk20a gpu characteristics");
207 goto done;
208 }
209
210 err = nvgpu_finalize_poweron_linux(l);
211 if (err)
212 goto done;
213
214#ifdef CONFIG_GK20A_CTXSW_TRACE
215 gk20a_ctxsw_trace_init(g);
216#endif
217 gk20a_sched_ctrl_init(g);
218 gk20a_channel_resume(g);
219
220 g->sw_ready = true;
221
222done:
223 return err;
224}
225
226static int vgpu_qos_notify(struct notifier_block *nb,
227 unsigned long n, void *data)
228{
229 struct gk20a_scale_profile *profile =
230 container_of(nb, struct gk20a_scale_profile,
231 qos_notify_block);
232 struct gk20a *g = get_gk20a(profile->dev);
233 u32 max_freq;
234 int err;
235
236 nvgpu_log_fn(g, " ");
237
238 max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
239 err = vgpu_clk_cap_rate(profile->dev, max_freq);
240 if (err)
241 nvgpu_err(g, "%s failed, err=%d", __func__, err);
242
243 return NOTIFY_OK; /* need notify call further */
244}
245
246static int vgpu_pm_qos_init(struct device *dev)
247{
248 struct gk20a *g = get_gk20a(dev);
249 struct gk20a_scale_profile *profile = g->scale_profile;
250
251 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) {
252 if (!profile)
253 return -EINVAL;
254 } else {
255 profile = nvgpu_kzalloc(g, sizeof(*profile));
256 if (!profile)
257 return -ENOMEM;
258 g->scale_profile = profile;
259 }
260
261 profile->dev = dev;
262 profile->qos_notify_block.notifier_call = vgpu_qos_notify;
263 pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
264 &profile->qos_notify_block);
265 return 0;
266}
267
268static void vgpu_pm_qos_remove(struct device *dev)
269{
270 struct gk20a *g = get_gk20a(dev);
271
272 pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
273 &g->scale_profile->qos_notify_block);
274 nvgpu_kfree(g, g->scale_profile);
275 g->scale_profile = NULL;
276}
277
278static int vgpu_pm_init(struct device *dev)
279{
280 struct gk20a *g = get_gk20a(dev);
281 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
282 unsigned long *freqs;
283 int num_freqs;
284 int err = 0;
285
286 nvgpu_log_fn(g, " ");
287
288 if (nvgpu_platform_is_simulation(g))
289 return 0;
290
291 __pm_runtime_disable(dev, false);
292
293 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
294 gk20a_scale_init(dev);
295
296 if (l->devfreq) {
297 /* set min/max frequency based on frequency table */
298 err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs);
299 if (err)
300 return err;
301
302 if (num_freqs < 1)
303 return -EINVAL;
304
305 l->devfreq->min_freq = freqs[0];
306 l->devfreq->max_freq = freqs[num_freqs - 1];
307 }
308
309 err = vgpu_pm_qos_init(dev);
310 if (err)
311 return err;
312
313 return err;
314}
315
316int vgpu_probe(struct platform_device *pdev)
317{
318 struct nvgpu_os_linux *l;
319 struct gk20a *gk20a;
320 int err;
321 struct device *dev = &pdev->dev;
322 struct gk20a_platform *platform = gk20a_get_platform(dev);
323 struct vgpu_priv_data *priv;
324
325 if (!platform) {
326 dev_err(dev, "no platform data\n");
327 return -ENODATA;
328 }
329
330 l = kzalloc(sizeof(*l), GFP_KERNEL);
331 if (!l) {
332 dev_err(dev, "couldn't allocate gk20a support");
333 return -ENOMEM;
334 }
335 gk20a = &l->g;
336
337 nvgpu_log_fn(gk20a, " ");
338
339 nvgpu_init_gk20a(gk20a);
340
341 nvgpu_kmem_init(gk20a);
342
343 err = nvgpu_init_enabled_flags(gk20a);
344 if (err) {
345 kfree(gk20a);
346 return err;
347 }
348
349 l->dev = dev;
350 if (tegra_platform_is_vdk())
351 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
352
353 gk20a->is_virtual = true;
354
355 priv = nvgpu_kzalloc(gk20a, sizeof(*priv));
356 if (!priv) {
357 kfree(gk20a);
358 return -ENOMEM;
359 }
360
361 platform->g = gk20a;
362 platform->vgpu_priv = priv;
363
364 err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
365 if (err)
366 return err;
367
368 vgpu_init_support(pdev);
369
370 vgpu_init_vars(gk20a, platform);
371
372 init_rwsem(&l->busy_lock);
373
374 nvgpu_spinlock_init(&gk20a->mc_enable_lock);
375
376 gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
377
378 /* Initialize the platform interface. */
379 err = platform->probe(dev);
380 if (err) {
381 if (err == -EPROBE_DEFER)
382 nvgpu_info(gk20a, "platform probe failed");
383 else
384 nvgpu_err(gk20a, "platform probe failed");
385 return err;
386 }
387
388 if (platform->late_probe) {
389 err = platform->late_probe(dev);
390 if (err) {
391 nvgpu_err(gk20a, "late probe failed");
392 return err;
393 }
394 }
395
396 err = vgpu_comm_init(gk20a);
397 if (err) {
398 nvgpu_err(gk20a, "failed to init comm interface");
399 return -ENOSYS;
400 }
401
402 priv->virt_handle = vgpu_connect();
403 if (!priv->virt_handle) {
404 nvgpu_err(gk20a, "failed to connect to server node");
405 vgpu_comm_deinit();
406 return -ENOSYS;
407 }
408
409 err = vgpu_get_constants(gk20a);
410 if (err) {
411 vgpu_comm_deinit();
412 return err;
413 }
414
415 err = vgpu_pm_init(dev);
416 if (err) {
417 nvgpu_err(gk20a, "pm init failed");
418 return err;
419 }
420
421 err = nvgpu_thread_create(&priv->intr_handler, gk20a,
422 vgpu_intr_thread, "gk20a");
423 if (err)
424 return err;
425
426 gk20a_debug_init(gk20a, "gpu.0");
427
428 /* Set DMA parameters to allow larger sgt lists */
429 dev->dma_parms = &l->dma_parms;
430 dma_set_max_seg_size(dev, UINT_MAX);
431
432 gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
433 gk20a->timeouts_disabled_by_user = false;
434 nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0);
435
436 vgpu_create_sysfs(dev);
437 gk20a_init_gr(gk20a);
438
439 nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages);
440 gk20a->gr.max_comptag_mem = totalram_pages
441 >> (10 - (PAGE_SHIFT - 10));
442
443 nvgpu_ref_init(&gk20a->refcount);
444
445 return 0;
446}
447
448int vgpu_remove(struct platform_device *pdev)
449{
450 struct device *dev = &pdev->dev;
451 struct gk20a *g = get_gk20a(dev);
452
453 nvgpu_log_fn(g, " ");
454
455 vgpu_pm_qos_remove(dev);
456 if (g->remove_support)
457 g->remove_support(g);
458
459 vgpu_comm_deinit();
460 gk20a_sched_ctrl_cleanup(g);
461 gk20a_user_deinit(dev, &nvgpu_class);
462 vgpu_remove_sysfs(dev);
463 gk20a_get_platform(dev)->g = NULL;
464 gk20a_put(g);
465
466 return 0;
467}
468
469bool vgpu_is_reduced_bar1(struct gk20a *g)
470{
471 struct fifo_gk20a *f = &g->fifo;
472 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
473
474 return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size;
475}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h
deleted file mode 100644
index 38379cf2..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h
+++ /dev/null
@@ -1,57 +0,0 @@
1/*
2 * Virtualized GPU Linux Interfaces
3 *
4 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef __VGPU_LINUX_H__
20#define __VGPU_LINUX_H__
21
22struct device;
23struct platform_device;
24
25#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
26
27#include <nvgpu/vgpu/vgpu.h>
28
29int vgpu_pm_prepare_poweroff(struct device *dev);
30int vgpu_pm_finalize_poweron(struct device *dev);
31int vgpu_probe(struct platform_device *dev);
32int vgpu_remove(struct platform_device *dev);
33
34void vgpu_create_sysfs(struct device *dev);
35void vgpu_remove_sysfs(struct device *dev);
36#else
37/* define placeholders for functions used outside of vgpu */
38
39static inline int vgpu_pm_prepare_poweroff(struct device *dev)
40{
41 return -ENOSYS;
42}
43static inline int vgpu_pm_finalize_poweron(struct device *dev)
44{
45 return -ENOSYS;
46}
47static inline int vgpu_probe(struct platform_device *dev)
48{
49 return -ENOSYS;
50}
51static inline int vgpu_remove(struct platform_device *dev)
52{
53 return -ENOSYS;
54}
55#endif
56
57#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c
deleted file mode 100644
index 136d4a10..00000000
--- a/drivers/gpu/nvgpu/common/linux/vidmem.c
+++ /dev/null
@@ -1,262 +0,0 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18#include <uapi/linux/nvgpu.h>
19
20#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
21#include <linux/platform/tegra/tegra_fd.h>
22#endif
23
24#include <nvgpu/dma.h>
25#include <nvgpu/enabled.h>
26#include <nvgpu/vidmem.h>
27#include <nvgpu/nvgpu_mem.h>
28#include <nvgpu/page_allocator.h>
29
30#include <nvgpu/linux/vm.h>
31#include <nvgpu/linux/dma.h>
32#include <nvgpu/linux/vidmem.h>
33
34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h"
36
37bool nvgpu_addr_is_vidmem_page_alloc(u64 addr)
38{
39 return !!(addr & 1ULL);
40}
41
42void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr)
43{
44 /* set bit 0 to indicate vidmem allocation */
45 sg_dma_address(sgl) = (addr | 1ULL);
46}
47
48struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl)
49{
50 u64 addr;
51
52 addr = sg_dma_address(sgl);
53
54 if (nvgpu_addr_is_vidmem_page_alloc(addr))
55 addr = addr & ~1ULL;
56 else
57 WARN_ON(1);
58
59 return (struct nvgpu_page_alloc *)(uintptr_t)addr;
60}
61
62static struct sg_table *gk20a_vidbuf_map_dma_buf(
63 struct dma_buf_attachment *attach, enum dma_data_direction dir)
64{
65 struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv;
66
67 return buf->mem->priv.sgt;
68}
69
70static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
71 struct sg_table *sgt,
72 enum dma_data_direction dir)
73{
74}
75
76static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
77{
78 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
79 struct nvgpu_vidmem_linux *linux_buf = buf->priv;
80 struct gk20a *g = buf->g;
81
82 vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
83 dmabuf, buf->mem->size >> 10);
84
85 if (linux_buf && linux_buf->dmabuf_priv_delete)
86 linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv);
87
88 nvgpu_kfree(g, linux_buf);
89 nvgpu_vidmem_buf_free(g, buf);
90
91 gk20a_put(g);
92}
93
94static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
95{
96 WARN_ON("Not supported");
97 return NULL;
98}
99
100static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
101 unsigned long page_num)
102{
103 WARN_ON("Not supported");
104 return NULL;
105}
106
107static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
108{
109 return -EINVAL;
110}
111
112static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
113 struct device *dev, void *priv, void (*delete)(void *priv))
114{
115 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
116 struct nvgpu_vidmem_linux *linux_buf = buf->priv;
117
118 linux_buf->dmabuf_priv = priv;
119 linux_buf->dmabuf_priv_delete = delete;
120
121 return 0;
122}
123
124static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
125 struct device *dev)
126{
127 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
128 struct nvgpu_vidmem_linux *linux_buf = buf->priv;
129
130 return linux_buf->dmabuf_priv;
131}
132
133static const struct dma_buf_ops gk20a_vidbuf_ops = {
134 .map_dma_buf = gk20a_vidbuf_map_dma_buf,
135 .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf,
136 .release = gk20a_vidbuf_release,
137 .kmap_atomic = gk20a_vidbuf_kmap_atomic,
138 .kmap = gk20a_vidbuf_kmap,
139 .mmap = gk20a_vidbuf_mmap,
140 .set_drvdata = gk20a_vidbuf_set_private,
141 .get_drvdata = gk20a_vidbuf_get_private,
142};
143
144static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf)
145{
146 DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
147
148 exp_info.priv = buf;
149 exp_info.ops = &gk20a_vidbuf_ops;
150 exp_info.size = buf->mem->size;
151 exp_info.flags = O_RDWR;
152
153 return dma_buf_export(&exp_info);
154}
155
156struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
157{
158 struct nvgpu_vidmem_buf *buf = dmabuf->priv;
159
160 if (dmabuf->ops != &gk20a_vidbuf_ops)
161 return NULL;
162
163 return buf->g;
164}
165
166int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
167{
168 struct nvgpu_vidmem_buf *buf = NULL;
169 struct nvgpu_vidmem_linux *priv;
170 int err, fd;
171
172 /*
173 * This ref is released when the dma_buf is closed.
174 */
175 if (!gk20a_get(g))
176 return -ENODEV;
177
178 vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes);
179
180 priv = nvgpu_kzalloc(g, sizeof(*priv));
181 if (!priv) {
182 err = -ENOMEM;
183 goto fail;
184 }
185
186 buf = nvgpu_vidmem_user_alloc(g, bytes);
187 if (IS_ERR(buf)) {
188 err = PTR_ERR(buf);
189 goto fail;
190 }
191
192 priv->dmabuf = gk20a_vidbuf_export(buf);
193 if (IS_ERR(priv->dmabuf)) {
194 err = PTR_ERR(priv->dmabuf);
195 goto fail;
196 }
197
198 buf->priv = priv;
199
200#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
201 fd = tegra_alloc_fd(current->files, 1024, O_RDWR);
202#else
203 fd = get_unused_fd_flags(O_RDWR);
204#endif
205 if (fd < 0) {
206 /* ->release frees what we have done */
207 dma_buf_put(priv->dmabuf);
208 return fd;
209 }
210
211 /* fclose() on this drops one ref, freeing the dma buf */
212 fd_install(fd, priv->dmabuf->file);
213
214 vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
215 priv->dmabuf, buf->mem->size >> 10);
216
217 return fd;
218
219fail:
220 nvgpu_vidmem_buf_free(g, buf);
221 nvgpu_kfree(g, priv);
222 gk20a_put(g);
223
224 vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err);
225 return err;
226}
227
228int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
229 void *buffer, u64 offset, u64 size, u32 cmd)
230{
231 struct nvgpu_vidmem_buf *vidmem_buf;
232 struct nvgpu_mem *mem;
233 int err = 0;
234
235 if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM)
236 return -EINVAL;
237
238 vidmem_buf = dmabuf->priv;
239 mem = vidmem_buf->mem;
240
241 switch (cmd) {
242 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
243 nvgpu_mem_rd_n(g, mem, offset, buffer, size);
244 break;
245
246 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE:
247 nvgpu_mem_wr_n(g, mem, offset, buffer, size);
248 break;
249
250 default:
251 err = -EINVAL;
252 }
253
254 return err;
255}
256
257void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem)
258{
259 nvgpu_free(vidmem->allocator,
260 (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
261 nvgpu_free_sgtable(g, &vidmem->priv.sgt);
262}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
deleted file mode 100644
index baa77515..00000000
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ /dev/null
@@ -1,332 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18#include <linux/scatterlist.h>
19#include <uapi/linux/nvgpu.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/rbtree.h>
24#include <nvgpu/vm_area.h>
25#include <nvgpu/nvgpu_mem.h>
26#include <nvgpu/page_allocator.h>
27#include <nvgpu/vidmem.h>
28
29#include <nvgpu/linux/vm.h>
30#include <nvgpu/linux/vidmem.h>
31#include <nvgpu/linux/nvgpu_mem.h>
32
33#include "gk20a/gk20a.h"
34#include "gk20a/mm_gk20a.h"
35
36#include "platform_gk20a.h"
37#include "os_linux.h"
38#include "dmabuf.h"
39
40static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags)
41{
42 u32 core_flags = 0;
43
44 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
45 core_flags |= NVGPU_VM_MAP_FIXED_OFFSET;
46 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE)
47 core_flags |= NVGPU_VM_MAP_CACHEABLE;
48 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT)
49 core_flags |= NVGPU_VM_MAP_IO_COHERENT;
50 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE)
51 core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE;
52 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC)
53 core_flags |= NVGPU_VM_MAP_L3_ALLOC;
54 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)
55 core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL;
56
57 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS)
58 nvgpu_warn(g, "Ignoring deprecated flag: "
59 "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS");
60
61 return core_flags;
62}
63
64static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
65 struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
66{
67 struct nvgpu_rbtree_node *node = NULL;
68 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
69
70 nvgpu_rbtree_enum_start(0, &node, root);
71
72 while (node) {
73 struct nvgpu_mapped_buf *mapped_buffer =
74 mapped_buffer_from_rbtree_node(node);
75
76 if (mapped_buffer->os_priv.dmabuf == dmabuf &&
77 mapped_buffer->kind == kind)
78 return mapped_buffer;
79
80 nvgpu_rbtree_enum_next(&node, node);
81 }
82
83 return NULL;
84}
85
86int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
87 struct dma_buf **dmabuf,
88 u64 *offset)
89{
90 struct nvgpu_mapped_buf *mapped_buffer;
91 struct gk20a *g = gk20a_from_vm(vm);
92
93 nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va);
94
95 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
96
97 mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
98 if (!mapped_buffer) {
99 nvgpu_mutex_release(&vm->update_gmmu_lock);
100 return -EINVAL;
101 }
102
103 *dmabuf = mapped_buffer->os_priv.dmabuf;
104 *offset = gpu_va - mapped_buffer->addr;
105
106 nvgpu_mutex_release(&vm->update_gmmu_lock);
107
108 return 0;
109}
110
111u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
112{
113 return os_buf->dmabuf->size;
114}
115
116/*
117 * vm->update_gmmu_lock must be held. This checks to see if we already have
118 * mapped the passed buffer into this VM. If so, just return the existing
119 * mapping address.
120 */
121struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
122 struct nvgpu_os_buffer *os_buf,
123 u64 map_addr,
124 u32 flags,
125 int kind)
126{
127 struct gk20a *g = gk20a_from_vm(vm);
128 struct nvgpu_mapped_buf *mapped_buffer = NULL;
129
130 if (flags & NVGPU_VM_MAP_FIXED_OFFSET) {
131 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
132 if (!mapped_buffer)
133 return NULL;
134
135 if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
136 mapped_buffer->kind != (u32)kind)
137 return NULL;
138 } else {
139 mapped_buffer =
140 __nvgpu_vm_find_mapped_buf_reverse(vm,
141 os_buf->dmabuf,
142 kind);
143 if (!mapped_buffer)
144 return NULL;
145 }
146
147 if (mapped_buffer->flags != flags)
148 return NULL;
149
150 /*
151 * If we find the mapping here then that means we have mapped it already
152 * and the prior pin and get must be undone.
153 */
154 gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment,
155 mapped_buffer->os_priv.sgt);
156 dma_buf_put(os_buf->dmabuf);
157
158 nvgpu_log(g, gpu_dbg_map,
159 "gv: 0x%04x_%08x + 0x%-7zu "
160 "[dma: 0x%010llx, pa: 0x%010llx] "
161 "pgsz=%-3dKb as=%-2d "
162 "flags=0x%x apt=%s (reused)",
163 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
164 os_buf->dmabuf->size,
165 (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
166 (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
167 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
168 vm_aspace_id(vm),
169 mapped_buffer->flags,
170 nvgpu_aperture_str(g,
171 gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
172
173 return mapped_buffer;
174}
175
176int nvgpu_vm_map_linux(struct vm_gk20a *vm,
177 struct dma_buf *dmabuf,
178 u64 offset_align,
179 u32 flags,
180 s16 compr_kind,
181 s16 incompr_kind,
182 int rw_flag,
183 u64 buffer_offset,
184 u64 mapping_size,
185 struct vm_gk20a_mapping_batch *batch,
186 u64 *gpu_va)
187{
188 struct gk20a *g = gk20a_from_vm(vm);
189 struct device *dev = dev_from_gk20a(g);
190 struct nvgpu_os_buffer os_buf;
191 struct sg_table *sgt;
192 struct nvgpu_sgt *nvgpu_sgt = NULL;
193 struct nvgpu_mapped_buf *mapped_buffer = NULL;
194 struct dma_buf_attachment *attachment;
195 u64 map_addr = 0ULL;
196 int err = 0;
197
198 if (flags & NVGPU_VM_MAP_FIXED_OFFSET)
199 map_addr = offset_align;
200
201 sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
202 if (IS_ERR(sgt)) {
203 nvgpu_warn(g, "Failed to pin dma_buf!");
204 return PTR_ERR(sgt);
205 }
206 os_buf.dmabuf = dmabuf;
207 os_buf.attachment = attachment;
208 os_buf.dev = dev;
209
210 if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
211 err = -EINVAL;
212 goto clean_up;
213 }
214
215 nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
216 if (!nvgpu_sgt) {
217 err = -ENOMEM;
218 goto clean_up;
219 }
220
221 mapped_buffer = nvgpu_vm_map(vm,
222 &os_buf,
223 nvgpu_sgt,
224 map_addr,
225 mapping_size,
226 buffer_offset,
227 rw_flag,
228 flags,
229 compr_kind,
230 incompr_kind,
231 batch,
232 gk20a_dmabuf_aperture(g, dmabuf));
233
234 nvgpu_sgt_free(g, nvgpu_sgt);
235
236 if (IS_ERR(mapped_buffer)) {
237 err = PTR_ERR(mapped_buffer);
238 goto clean_up;
239 }
240
241 mapped_buffer->os_priv.dmabuf = dmabuf;
242 mapped_buffer->os_priv.attachment = attachment;
243 mapped_buffer->os_priv.sgt = sgt;
244
245 *gpu_va = mapped_buffer->addr;
246 return 0;
247
248clean_up:
249 gk20a_mm_unpin(dev, dmabuf, attachment, sgt);
250
251 return err;
252}
253
254int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
255 int dmabuf_fd,
256 u64 *offset_align,
257 u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
258 s16 compr_kind,
259 s16 incompr_kind,
260 u64 buffer_offset,
261 u64 mapping_size,
262 struct vm_gk20a_mapping_batch *batch)
263{
264 struct gk20a *g = gk20a_from_vm(vm);
265 struct dma_buf *dmabuf;
266 u64 ret_va;
267 int err = 0;
268
269 /* get ref to the mem handle (released on unmap_locked) */
270 dmabuf = dma_buf_get(dmabuf_fd);
271 if (IS_ERR(dmabuf)) {
272 nvgpu_warn(g, "%s: fd %d is not a dmabuf",
273 __func__, dmabuf_fd);
274 return PTR_ERR(dmabuf);
275 }
276
277 /* verify that we're not overflowing the buffer, i.e.
278 * (buffer_offset + mapping_size)> dmabuf->size.
279 *
280 * Since buffer_offset + mapping_size could overflow, first check
281 * that mapping size < dmabuf_size, at which point we can subtract
282 * mapping_size from both sides for the final comparison.
283 */
284 if ((mapping_size > dmabuf->size) ||
285 (buffer_offset > (dmabuf->size - mapping_size))) {
286 nvgpu_err(g,
287 "buf size %llx < (offset(%llx) + map_size(%llx))\n",
288 (u64)dmabuf->size, buffer_offset, mapping_size);
289 dma_buf_put(dmabuf);
290 return -EINVAL;
291 }
292
293 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
294 if (err) {
295 dma_buf_put(dmabuf);
296 return err;
297 }
298
299 err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
300 nvgpu_vm_translate_linux_flags(g, flags),
301 compr_kind, incompr_kind,
302 gk20a_mem_flag_none,
303 buffer_offset,
304 mapping_size,
305 batch,
306 &ret_va);
307
308 if (!err)
309 *offset_align = ret_va;
310 else
311 dma_buf_put(dmabuf);
312
313 return err;
314}
315
316/*
317 * This is the function call-back for freeing OS specific components of an
318 * nvgpu_mapped_buf. This should most likely never be called outside of the
319 * core MM framework!
320 *
321 * Note: the VM lock will be held.
322 */
323void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
324{
325 struct vm_gk20a *vm = mapped_buffer->vm;
326
327 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
328 mapped_buffer->os_priv.attachment,
329 mapped_buffer->os_priv.sgt);
330
331 dma_buf_put(mapped_buffer->os_priv.dmabuf);
332}