diff options
Diffstat (limited to 'include/os/linux')
146 files changed, 31469 insertions, 0 deletions
diff --git a/include/os/linux/cde.c b/include/os/linux/cde.c new file mode 100644 index 0000000..715513c --- /dev/null +++ b/include/os/linux/cde.c | |||
@@ -0,0 +1,1794 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dma-mapping.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <trace/events/gk20a.h> | ||
25 | |||
26 | #include <nvgpu/dma.h> | ||
27 | #include <nvgpu/gmmu.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/nvgpu_common.h> | ||
30 | #include <nvgpu/kmem.h> | ||
31 | #include <nvgpu/log.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/firmware.h> | ||
34 | #include <nvgpu/os_sched.h> | ||
35 | #include <nvgpu/channel.h> | ||
36 | #include <nvgpu/utils.h> | ||
37 | #include <nvgpu/gk20a.h> | ||
38 | |||
39 | #include <nvgpu/linux/vm.h> | ||
40 | |||
41 | #include "gk20a/mm_gk20a.h" | ||
42 | #include "gk20a/fence_gk20a.h" | ||
43 | #include "gk20a/gr_gk20a.h" | ||
44 | |||
45 | #include "cde.h" | ||
46 | #include "os_linux.h" | ||
47 | #include "dmabuf.h" | ||
48 | #include "channel.h" | ||
49 | #include "cde_gm20b.h" | ||
50 | #include "cde_gp10b.h" | ||
51 | |||
52 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
53 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
54 | |||
55 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
56 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); | ||
57 | |||
58 | #define CTX_DELETE_TIME 1000 | ||
59 | |||
60 | #define MAX_CTX_USE_COUNT 42 | ||
61 | #define MAX_CTX_RETRY_TIME 2000 | ||
62 | |||
63 | static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | ||
64 | { | ||
65 | struct nvgpu_mapped_buf *buffer; | ||
66 | dma_addr_t addr = 0; | ||
67 | struct gk20a *g = gk20a_from_vm(vm); | ||
68 | |||
69 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
70 | buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); | ||
71 | if (buffer) | ||
72 | addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); | ||
73 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
74 | |||
75 | return addr; | ||
76 | } | ||
77 | |||
78 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
79 | { | ||
80 | unsigned int i; | ||
81 | |||
82 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
83 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
84 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
85 | } | ||
86 | |||
87 | nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); | ||
88 | |||
89 | cde_ctx->convert_cmd = NULL; | ||
90 | cde_ctx->init_convert_cmd = NULL; | ||
91 | cde_ctx->num_bufs = 0; | ||
92 | cde_ctx->num_params = 0; | ||
93 | cde_ctx->init_cmd_num_entries = 0; | ||
94 | cde_ctx->convert_cmd_num_entries = 0; | ||
95 | cde_ctx->init_cmd_executed = false; | ||
96 | } | ||
97 | |||
98 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
99 | __must_hold(&cde_app->mutex) | ||
100 | { | ||
101 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
102 | struct gk20a *g = &l->g; | ||
103 | struct channel_gk20a *ch = cde_ctx->ch; | ||
104 | struct vm_gk20a *vm = ch->vm; | ||
105 | |||
106 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
107 | |||
108 | /* release mapped memory */ | ||
109 | gk20a_deinit_cde_img(cde_ctx); | ||
110 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
111 | cde_ctx->backing_store_vaddr); | ||
112 | |||
113 | /* | ||
114 | * free the channel | ||
115 | * gk20a_channel_close() will also unbind the channel from TSG | ||
116 | */ | ||
117 | gk20a_channel_close(ch); | ||
118 | nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release); | ||
119 | |||
120 | /* housekeeping on app */ | ||
121 | nvgpu_list_del(&cde_ctx->list); | ||
122 | l->cde_app.ctx_count--; | ||
123 | nvgpu_kfree(g, cde_ctx); | ||
124 | } | ||
125 | |||
126 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
127 | bool wait_finish) | ||
128 | __releases(&cde_app->mutex) | ||
129 | __acquires(&cde_app->mutex) | ||
130 | { | ||
131 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
132 | |||
133 | /* permanent contexts do not have deleter works */ | ||
134 | if (!cde_ctx->is_temporary) | ||
135 | return; | ||
136 | |||
137 | if (wait_finish) { | ||
138 | nvgpu_mutex_release(&cde_app->mutex); | ||
139 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
140 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
141 | } else { | ||
142 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) | ||
147 | __must_hold(&l->cde_app->mutex) | ||
148 | { | ||
149 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
150 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
151 | |||
152 | /* safe to go off the mutex in cancel_deleter since app is | ||
153 | * deinitialised; no new jobs are started. deleter works may be only at | ||
154 | * waiting for the mutex or before, going to abort */ | ||
155 | |||
156 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
157 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
158 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
159 | gk20a_cde_remove_ctx(cde_ctx); | ||
160 | } | ||
161 | |||
162 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
163 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
164 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
165 | gk20a_cde_remove_ctx(cde_ctx); | ||
166 | } | ||
167 | } | ||
168 | |||
169 | static void gk20a_cde_stop(struct nvgpu_os_linux *l) | ||
170 | __must_hold(&l->cde_app->mutex) | ||
171 | { | ||
172 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
173 | |||
174 | /* prevent further conversions and delayed works from working */ | ||
175 | cde_app->initialised = false; | ||
176 | /* free all data, empty the list */ | ||
177 | gk20a_cde_remove_contexts(l); | ||
178 | } | ||
179 | |||
180 | void gk20a_cde_destroy(struct nvgpu_os_linux *l) | ||
181 | __acquires(&l->cde_app->mutex) | ||
182 | __releases(&l->cde_app->mutex) | ||
183 | { | ||
184 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
185 | |||
186 | if (!cde_app->initialised) | ||
187 | return; | ||
188 | |||
189 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
190 | gk20a_cde_stop(l); | ||
191 | nvgpu_mutex_release(&cde_app->mutex); | ||
192 | |||
193 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
194 | } | ||
195 | |||
196 | void gk20a_cde_suspend(struct nvgpu_os_linux *l) | ||
197 | __acquires(&l->cde_app->mutex) | ||
198 | __releases(&l->cde_app->mutex) | ||
199 | { | ||
200 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
201 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
202 | |||
203 | if (!cde_app->initialised) | ||
204 | return; | ||
205 | |||
206 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
207 | |||
208 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
209 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
210 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
211 | } | ||
212 | |||
213 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
214 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
215 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
216 | } | ||
217 | |||
218 | nvgpu_mutex_release(&cde_app->mutex); | ||
219 | |||
220 | } | ||
221 | |||
222 | static int gk20a_cde_create_context(struct nvgpu_os_linux *l) | ||
223 | __must_hold(&l->cde_app->mutex) | ||
224 | { | ||
225 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
226 | struct gk20a_cde_ctx *cde_ctx; | ||
227 | |||
228 | cde_ctx = gk20a_cde_allocate_context(l); | ||
229 | if (IS_ERR(cde_ctx)) | ||
230 | return PTR_ERR(cde_ctx); | ||
231 | |||
232 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
233 | cde_app->ctx_count++; | ||
234 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
235 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
236 | |||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) | ||
241 | __must_hold(&l->cde_app->mutex) | ||
242 | { | ||
243 | int err; | ||
244 | int i; | ||
245 | |||
246 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
247 | err = gk20a_cde_create_context(l); | ||
248 | if (err) | ||
249 | goto out; | ||
250 | } | ||
251 | |||
252 | return 0; | ||
253 | out: | ||
254 | gk20a_cde_remove_contexts(l); | ||
255 | return err; | ||
256 | } | ||
257 | |||
258 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
259 | struct nvgpu_firmware *img, | ||
260 | struct gk20a_cde_hdr_buf *buf) | ||
261 | { | ||
262 | struct nvgpu_mem *mem; | ||
263 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
264 | struct gk20a *g = &l->g; | ||
265 | int err; | ||
266 | |||
267 | /* check that the file can hold the buf */ | ||
268 | if (buf->data_byte_offset != 0 && | ||
269 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
270 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
271 | cde_ctx->num_bufs); | ||
272 | return -EINVAL; | ||
273 | } | ||
274 | |||
275 | /* check that we have enough buf elems available */ | ||
276 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
277 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
278 | cde_ctx->num_bufs); | ||
279 | return -ENOMEM; | ||
280 | } | ||
281 | |||
282 | /* allocate buf */ | ||
283 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
284 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
285 | if (err) { | ||
286 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
287 | cde_ctx->num_bufs); | ||
288 | return -ENOMEM; | ||
289 | } | ||
290 | |||
291 | /* copy the content */ | ||
292 | if (buf->data_byte_offset != 0) | ||
293 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
294 | buf->num_bytes); | ||
295 | |||
296 | cde_ctx->num_bufs++; | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
302 | int type, s32 shift, u64 mask, u64 value) | ||
303 | { | ||
304 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
305 | struct gk20a *g = &l->g; | ||
306 | u32 *target_mem_ptr = target; | ||
307 | u64 *target_mem_ptr_u64 = target; | ||
308 | u64 current_value, new_value; | ||
309 | |||
310 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
311 | value &= mask; | ||
312 | |||
313 | /* read current data from the location */ | ||
314 | current_value = 0; | ||
315 | if (type == TYPE_PARAM_TYPE_U32) { | ||
316 | if (mask != 0xfffffffful) | ||
317 | current_value = *target_mem_ptr; | ||
318 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
319 | if (mask != ~0ul) | ||
320 | current_value = *target_mem_ptr_u64; | ||
321 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
322 | current_value = *target_mem_ptr_u64; | ||
323 | current_value = (u64)(current_value >> 32) | | ||
324 | (u64)(current_value << 32); | ||
325 | } else { | ||
326 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
327 | type); | ||
328 | return -EINVAL; | ||
329 | } | ||
330 | |||
331 | current_value &= ~mask; | ||
332 | new_value = current_value | value; | ||
333 | |||
334 | /* store the element data back */ | ||
335 | if (type == TYPE_PARAM_TYPE_U32) | ||
336 | *target_mem_ptr = (u32)new_value; | ||
337 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
338 | *target_mem_ptr_u64 = new_value; | ||
339 | else { | ||
340 | new_value = (u64)(new_value >> 32) | | ||
341 | (u64)(new_value << 32); | ||
342 | *target_mem_ptr_u64 = new_value; | ||
343 | } | ||
344 | |||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
349 | struct nvgpu_firmware *img, | ||
350 | struct gk20a_cde_hdr_replace *replace) | ||
351 | { | ||
352 | struct nvgpu_mem *source_mem; | ||
353 | struct nvgpu_mem *target_mem; | ||
354 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
355 | struct gk20a *g = &l->g; | ||
356 | u32 *target_mem_ptr; | ||
357 | u64 vaddr; | ||
358 | int err; | ||
359 | |||
360 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
361 | replace->source_buf >= cde_ctx->num_bufs) { | ||
362 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
363 | replace->target_buf, replace->source_buf, | ||
364 | cde_ctx->num_bufs); | ||
365 | return -EINVAL; | ||
366 | } | ||
367 | |||
368 | source_mem = cde_ctx->mem + replace->source_buf; | ||
369 | target_mem = cde_ctx->mem + replace->target_buf; | ||
370 | target_mem_ptr = target_mem->cpu_va; | ||
371 | |||
372 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
373 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
374 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
375 | replace->target_byte_offset, | ||
376 | replace->source_byte_offset, | ||
377 | source_mem->size, | ||
378 | target_mem->size); | ||
379 | return -EINVAL; | ||
380 | } | ||
381 | |||
382 | /* calculate the target pointer */ | ||
383 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
384 | |||
385 | /* determine patch value */ | ||
386 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
387 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
388 | replace->shift, replace->mask, | ||
389 | vaddr); | ||
390 | if (err) { | ||
391 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
392 | err, replace->target_buf, | ||
393 | replace->target_byte_offset, | ||
394 | replace->source_buf, | ||
395 | replace->source_byte_offset); | ||
396 | } | ||
397 | |||
398 | return err; | ||
399 | } | ||
400 | |||
401 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
402 | { | ||
403 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
404 | struct gk20a *g = &l->g; | ||
405 | struct nvgpu_mem *target_mem; | ||
406 | u32 *target_mem_ptr; | ||
407 | u64 new_data; | ||
408 | int user_id = 0, err; | ||
409 | unsigned int i; | ||
410 | |||
411 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
412 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
413 | target_mem = cde_ctx->mem + param->target_buf; | ||
414 | target_mem_ptr = target_mem->cpu_va; | ||
415 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
416 | |||
417 | switch (param->id) { | ||
418 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
419 | new_data = g->gr.comptags_per_cacheline; | ||
420 | break; | ||
421 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
422 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
423 | g->gr.cacheline_size; | ||
424 | break; | ||
425 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
426 | new_data = cde_ctx->surf_param_offset; | ||
427 | break; | ||
428 | case TYPE_PARAM_NUMPAGES: | ||
429 | new_data = cde_ctx->surf_param_lines; | ||
430 | break; | ||
431 | case TYPE_PARAM_BACKINGSTORE: | ||
432 | new_data = cde_ctx->backing_store_vaddr; | ||
433 | break; | ||
434 | case TYPE_PARAM_DESTINATION: | ||
435 | new_data = cde_ctx->compbit_vaddr; | ||
436 | break; | ||
437 | case TYPE_PARAM_DESTINATION_SIZE: | ||
438 | new_data = cde_ctx->compbit_size; | ||
439 | break; | ||
440 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
441 | new_data = g->gr.compbit_store.mem.size; | ||
442 | break; | ||
443 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
444 | new_data = gpuva_to_iova_base(cde_ctx->vm, | ||
445 | cde_ctx->surf_vaddr); | ||
446 | if (new_data == 0) { | ||
447 | nvgpu_warn(g, "cde: failed to find 0x%llx", | ||
448 | cde_ctx->surf_vaddr); | ||
449 | return -EINVAL; | ||
450 | } | ||
451 | break; | ||
452 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
453 | new_data = g->gr.compbit_store.base_hw; | ||
454 | break; | ||
455 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
456 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
457 | break; | ||
458 | case TYPE_PARAM_SCATTERBUFFER: | ||
459 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
460 | break; | ||
461 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
462 | new_data = cde_ctx->scatterbuffer_size; | ||
463 | break; | ||
464 | default: | ||
465 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
466 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
467 | continue; | ||
468 | new_data = cde_ctx->user_param_values[user_id]; | ||
469 | } | ||
470 | |||
471 | nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
472 | i, param->id, param->target_buf, | ||
473 | param->target_byte_offset, new_data, | ||
474 | param->data_offset, param->type, param->shift, | ||
475 | param->mask); | ||
476 | |||
477 | new_data += param->data_offset; | ||
478 | |||
479 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
480 | param->shift, param->mask, new_data); | ||
481 | |||
482 | if (err) { | ||
483 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
484 | err, i, param->id, param->target_buf, | ||
485 | param->target_byte_offset, new_data); | ||
486 | return err; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
494 | struct nvgpu_firmware *img, | ||
495 | struct gk20a_cde_hdr_param *param) | ||
496 | { | ||
497 | struct nvgpu_mem *target_mem; | ||
498 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
499 | struct gk20a *g = &l->g; | ||
500 | |||
501 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
502 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
503 | cde_ctx->num_params, param->target_buf, | ||
504 | cde_ctx->num_bufs); | ||
505 | return -EINVAL; | ||
506 | } | ||
507 | |||
508 | target_mem = cde_ctx->mem + param->target_buf; | ||
509 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
510 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
511 | cde_ctx->num_params, param->target_byte_offset, | ||
512 | target_mem->size); | ||
513 | return -EINVAL; | ||
514 | } | ||
515 | |||
516 | /* does this parameter fit into our parameter structure */ | ||
517 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
518 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
519 | cde_ctx->num_params); | ||
520 | return -ENOMEM; | ||
521 | } | ||
522 | |||
523 | /* is the given id valid? */ | ||
524 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
525 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
526 | param->id, cde_ctx->num_params, | ||
527 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
528 | return -EINVAL; | ||
529 | } | ||
530 | |||
531 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
532 | cde_ctx->num_params++; | ||
533 | |||
534 | return 0; | ||
535 | } | ||
536 | |||
537 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
538 | struct nvgpu_firmware *img, | ||
539 | u32 required_class) | ||
540 | { | ||
541 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
542 | struct gk20a *g = &l->g; | ||
543 | int err; | ||
544 | |||
545 | /* CDE enabled */ | ||
546 | cde_ctx->ch->cde = true; | ||
547 | |||
548 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); | ||
549 | if (err) { | ||
550 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
551 | err); | ||
552 | return err; | ||
553 | } | ||
554 | |||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
559 | struct nvgpu_firmware *img, | ||
560 | u32 op, | ||
561 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
562 | u32 num_elems) | ||
563 | { | ||
564 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
565 | struct gk20a *g = &l->g; | ||
566 | struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem; | ||
567 | u32 *num_entries; | ||
568 | unsigned int i; | ||
569 | |||
570 | /* check command type */ | ||
571 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
572 | gpfifo = &cde_ctx->init_convert_cmd; | ||
573 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
574 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
575 | gpfifo = &cde_ctx->convert_cmd; | ||
576 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
577 | } else { | ||
578 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
579 | op); | ||
580 | return -EINVAL; | ||
581 | } | ||
582 | |||
583 | /* allocate gpfifo entries to be pushed */ | ||
584 | *gpfifo = nvgpu_kzalloc(g, | ||
585 | sizeof(struct nvgpu_gpfifo_entry) * num_elems); | ||
586 | if (!*gpfifo) { | ||
587 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
588 | return -ENOMEM; | ||
589 | } | ||
590 | |||
591 | gpfifo_elem = *gpfifo; | ||
592 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
593 | struct nvgpu_mem *target_mem; | ||
594 | |||
595 | /* validate the current entry */ | ||
596 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
597 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
598 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
599 | return -EINVAL; | ||
600 | } | ||
601 | |||
602 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
603 | if (target_mem->size< | ||
604 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
605 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
606 | target_mem->size, | ||
607 | cmd_elem->target_byte_offset, | ||
608 | cmd_elem->num_bytes); | ||
609 | return -EINVAL; | ||
610 | } | ||
611 | |||
612 | /* store the element into gpfifo */ | ||
613 | gpfifo_elem->entry0 = | ||
614 | u64_lo32(target_mem->gpu_va + | ||
615 | cmd_elem->target_byte_offset); | ||
616 | gpfifo_elem->entry1 = | ||
617 | u64_hi32(target_mem->gpu_va + | ||
618 | cmd_elem->target_byte_offset) | | ||
619 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
620 | sizeof(u32)); | ||
621 | } | ||
622 | |||
623 | *num_entries = num_elems; | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
628 | { | ||
629 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
630 | struct gk20a *g = &l->g; | ||
631 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
632 | sizeof(struct nvgpu_gpfifo_entry); | ||
633 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
634 | sizeof(struct nvgpu_gpfifo_entry); | ||
635 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
636 | struct nvgpu_gpfifo_entry *combined_cmd; | ||
637 | |||
638 | /* allocate buffer that has space for both */ | ||
639 | combined_cmd = nvgpu_kzalloc(g, total_bytes); | ||
640 | if (!combined_cmd) { | ||
641 | nvgpu_warn(g, | ||
642 | "cde: could not allocate memory for gpfifo entries"); | ||
643 | return -ENOMEM; | ||
644 | } | ||
645 | |||
646 | /* move the original init here and append convert */ | ||
647 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
648 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
649 | cde_ctx->convert_cmd, conv_bytes); | ||
650 | |||
651 | nvgpu_kfree(g, cde_ctx->init_convert_cmd); | ||
652 | nvgpu_kfree(g, cde_ctx->convert_cmd); | ||
653 | |||
654 | cde_ctx->init_convert_cmd = combined_cmd; | ||
655 | cde_ctx->convert_cmd = combined_cmd | ||
656 | + cde_ctx->init_cmd_num_entries; | ||
657 | |||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
662 | struct nvgpu_firmware *img) | ||
663 | { | ||
664 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
665 | struct gk20a *g = &l->g; | ||
666 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
667 | u32 *data = (u32 *)img->data; | ||
668 | u32 num_of_elems; | ||
669 | struct gk20a_cde_hdr_elem *elem; | ||
670 | u32 min_size = 0; | ||
671 | int err = 0; | ||
672 | unsigned int i; | ||
673 | |||
674 | min_size += 2 * sizeof(u32); | ||
675 | if (img->size < min_size) { | ||
676 | nvgpu_warn(g, "cde: invalid image header"); | ||
677 | return -EINVAL; | ||
678 | } | ||
679 | |||
680 | cde_app->firmware_version = data[0]; | ||
681 | num_of_elems = data[1]; | ||
682 | |||
683 | min_size += num_of_elems * sizeof(*elem); | ||
684 | if (img->size < min_size) { | ||
685 | nvgpu_warn(g, "cde: bad image"); | ||
686 | return -EINVAL; | ||
687 | } | ||
688 | |||
689 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
690 | for (i = 0; i < num_of_elems; i++) { | ||
691 | int err = 0; | ||
692 | switch (elem->type) { | ||
693 | case TYPE_BUF: | ||
694 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
695 | break; | ||
696 | case TYPE_REPLACE: | ||
697 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
698 | &elem->replace); | ||
699 | break; | ||
700 | case TYPE_PARAM: | ||
701 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
702 | break; | ||
703 | case TYPE_REQUIRED_CLASS: | ||
704 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
705 | elem->required_class); | ||
706 | break; | ||
707 | case TYPE_COMMAND: | ||
708 | { | ||
709 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
710 | &img->data[elem->command.data_byte_offset]; | ||
711 | err = gk20a_init_cde_command(cde_ctx, img, | ||
712 | elem->command.op, cmd, | ||
713 | elem->command.num_entries); | ||
714 | break; | ||
715 | } | ||
716 | case TYPE_ARRAY: | ||
717 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
718 | elem->array.data, | ||
719 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
720 | break; | ||
721 | default: | ||
722 | nvgpu_warn(g, "cde: unknown header element"); | ||
723 | err = -EINVAL; | ||
724 | } | ||
725 | |||
726 | if (err) | ||
727 | goto deinit_image; | ||
728 | |||
729 | elem++; | ||
730 | } | ||
731 | |||
732 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
733 | nvgpu_warn(g, "cde: convert command not defined"); | ||
734 | err = -EINVAL; | ||
735 | goto deinit_image; | ||
736 | } | ||
737 | |||
738 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
739 | nvgpu_warn(g, "cde: convert command not defined"); | ||
740 | err = -EINVAL; | ||
741 | goto deinit_image; | ||
742 | } | ||
743 | |||
744 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
745 | if (err) | ||
746 | goto deinit_image; | ||
747 | |||
748 | return 0; | ||
749 | |||
750 | deinit_image: | ||
751 | gk20a_deinit_cde_img(cde_ctx); | ||
752 | return err; | ||
753 | } | ||
754 | |||
755 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
756 | u32 op, struct nvgpu_channel_fence *fence, | ||
757 | u32 flags, struct gk20a_fence **fence_out) | ||
758 | { | ||
759 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
760 | struct gk20a *g = &l->g; | ||
761 | struct nvgpu_gpfifo_entry *gpfifo = NULL; | ||
762 | int num_entries = 0; | ||
763 | |||
764 | /* check command type */ | ||
765 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
766 | /* both init and convert combined */ | ||
767 | gpfifo = cde_ctx->init_convert_cmd; | ||
768 | num_entries = cde_ctx->init_cmd_num_entries | ||
769 | + cde_ctx->convert_cmd_num_entries; | ||
770 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
771 | gpfifo = cde_ctx->convert_cmd; | ||
772 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
773 | } else if (op == TYPE_BUF_COMMAND_NOOP) { | ||
774 | /* Any non-null gpfifo will suffice with 0 num_entries */ | ||
775 | gpfifo = cde_ctx->init_convert_cmd; | ||
776 | num_entries = 0; | ||
777 | } else { | ||
778 | nvgpu_warn(g, "cde: unknown buffer"); | ||
779 | return -EINVAL; | ||
780 | } | ||
781 | |||
782 | if (gpfifo == NULL) { | ||
783 | nvgpu_warn(g, "cde: buffer not available"); | ||
784 | return -ENOSYS; | ||
785 | } | ||
786 | |||
787 | return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, | ||
788 | num_entries, flags, fence, fence_out); | ||
789 | } | ||
790 | |||
791 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
792 | __acquires(&cde_app->mutex) | ||
793 | __releases(&cde_app->mutex) | ||
794 | { | ||
795 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
796 | struct gk20a *g = &cde_ctx->l->g; | ||
797 | |||
798 | nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
799 | trace_gk20a_cde_release(cde_ctx); | ||
800 | |||
801 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
802 | |||
803 | if (cde_ctx->in_use) { | ||
804 | cde_ctx->in_use = false; | ||
805 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
806 | cde_app->ctx_usecount--; | ||
807 | } else { | ||
808 | nvgpu_log_info(g, "double release cde context %p", cde_ctx); | ||
809 | } | ||
810 | |||
811 | nvgpu_mutex_release(&cde_app->mutex); | ||
812 | } | ||
813 | |||
814 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
815 | __acquires(&cde_app->mutex) | ||
816 | __releases(&cde_app->mutex) | ||
817 | { | ||
818 | struct delayed_work *delay_work = to_delayed_work(work); | ||
819 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
820 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
821 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
822 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
823 | struct gk20a *g = &l->g; | ||
824 | int err; | ||
825 | |||
826 | /* someone has just taken it? engine deletion started? */ | ||
827 | if (cde_ctx->in_use || !cde_app->initialised) | ||
828 | return; | ||
829 | |||
830 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
831 | "cde: attempting to delete temporary %p", cde_ctx); | ||
832 | |||
833 | err = gk20a_busy(g); | ||
834 | if (err) { | ||
835 | /* this context would find new use anyway later, so not freeing | ||
836 | * here does not leak anything */ | ||
837 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
838 | " temp ctx deletion"); | ||
839 | return; | ||
840 | } | ||
841 | |||
842 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
843 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
844 | nvgpu_log(g, gpu_dbg_cde_ctx, | ||
845 | "cde: context use raced, not deleting %p", | ||
846 | cde_ctx); | ||
847 | goto out; | ||
848 | } | ||
849 | |||
850 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
851 | "double pending %p", cde_ctx); | ||
852 | |||
853 | gk20a_cde_remove_ctx(cde_ctx); | ||
854 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
855 | "cde: destroyed %p count=%d use=%d max=%d", | ||
856 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
857 | cde_app->ctx_count_top); | ||
858 | |||
859 | out: | ||
860 | nvgpu_mutex_release(&cde_app->mutex); | ||
861 | gk20a_idle(g); | ||
862 | } | ||
863 | |||
864 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) | ||
865 | __must_hold(&cde_app->mutex) | ||
866 | { | ||
867 | struct gk20a *g = &l->g; | ||
868 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
869 | struct gk20a_cde_ctx *cde_ctx; | ||
870 | |||
871 | /* exhausted? */ | ||
872 | |||
873 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
874 | return ERR_PTR(-EAGAIN); | ||
875 | |||
876 | /* idle context available? */ | ||
877 | |||
878 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
879 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
880 | gk20a_cde_ctx, list); | ||
881 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
882 | "cde: got free %p count=%d use=%d max=%d", | ||
883 | cde_ctx, cde_app->ctx_count, | ||
884 | cde_app->ctx_usecount, | ||
885 | cde_app->ctx_count_top); | ||
886 | trace_gk20a_cde_get_context(cde_ctx); | ||
887 | |||
888 | /* deleter work may be scheduled, but in_use prevents it */ | ||
889 | cde_ctx->in_use = true; | ||
890 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
891 | cde_app->ctx_usecount++; | ||
892 | |||
893 | /* cancel any deletions now that ctx is in use */ | ||
894 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
895 | return cde_ctx; | ||
896 | } | ||
897 | |||
898 | /* no free contexts, get a temporary one */ | ||
899 | |||
900 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
901 | "cde: no free contexts, count=%d", | ||
902 | cde_app->ctx_count); | ||
903 | |||
904 | cde_ctx = gk20a_cde_allocate_context(l); | ||
905 | if (IS_ERR(cde_ctx)) { | ||
906 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
907 | PTR_ERR(cde_ctx)); | ||
908 | return cde_ctx; | ||
909 | } | ||
910 | |||
911 | trace_gk20a_cde_get_context(cde_ctx); | ||
912 | cde_ctx->in_use = true; | ||
913 | cde_ctx->is_temporary = true; | ||
914 | cde_app->ctx_usecount++; | ||
915 | cde_app->ctx_count++; | ||
916 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
917 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
918 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
919 | |||
920 | return cde_ctx; | ||
921 | } | ||
922 | |||
923 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) | ||
924 | __releases(&cde_app->mutex) | ||
925 | __acquires(&cde_app->mutex) | ||
926 | { | ||
927 | struct gk20a *g = &l->g; | ||
928 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
929 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
930 | struct nvgpu_timeout timeout; | ||
931 | |||
932 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
933 | NVGPU_TIMER_CPU_TIMER); | ||
934 | |||
935 | do { | ||
936 | cde_ctx = gk20a_cde_do_get_context(l); | ||
937 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
938 | break; | ||
939 | |||
940 | /* exhausted, retry */ | ||
941 | nvgpu_mutex_release(&cde_app->mutex); | ||
942 | cond_resched(); | ||
943 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
944 | } while (!nvgpu_timeout_expired(&timeout)); | ||
945 | |||
946 | return cde_ctx; | ||
947 | } | ||
948 | |||
949 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) | ||
950 | { | ||
951 | struct gk20a *g = &l->g; | ||
952 | struct gk20a_cde_ctx *cde_ctx; | ||
953 | int ret; | ||
954 | |||
955 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
956 | if (!cde_ctx) | ||
957 | return ERR_PTR(-ENOMEM); | ||
958 | |||
959 | cde_ctx->l = l; | ||
960 | cde_ctx->dev = dev_from_gk20a(g); | ||
961 | |||
962 | ret = gk20a_cde_load(cde_ctx); | ||
963 | if (ret) { | ||
964 | nvgpu_kfree(g, cde_ctx); | ||
965 | return ERR_PTR(ret); | ||
966 | } | ||
967 | |||
968 | nvgpu_init_list_node(&cde_ctx->list); | ||
969 | cde_ctx->is_temporary = false; | ||
970 | cde_ctx->in_use = false; | ||
971 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
972 | gk20a_cde_ctx_deleter_fn); | ||
973 | |||
974 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
975 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
976 | return cde_ctx; | ||
977 | } | ||
978 | |||
979 | static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm, | ||
980 | u32 map_offset, u32 map_size) | ||
981 | { | ||
982 | struct gk20a *g = gk20a_from_vm(vm); | ||
983 | |||
984 | /* | ||
985 | * To be simple we will just make the map size depend on the | ||
986 | * iommu'ability of the driver. If there's an IOMMU we can rely on | ||
987 | * buffers being contiguous. If not, then we'll use 4k pages since we | ||
988 | * know that will work for any buffer. | ||
989 | */ | ||
990 | if (!nvgpu_iommuable(g)) | ||
991 | return SZ_4K; | ||
992 | |||
993 | /* | ||
994 | * If map size or offset is not 64K aligned then use small pages. | ||
995 | */ | ||
996 | if (map_size & (vm->big_page_size - 1) || | ||
997 | map_offset & (vm->big_page_size - 1)) | ||
998 | return SZ_4K; | ||
999 | |||
1000 | return vm->big_page_size; | ||
1001 | } | ||
1002 | |||
1003 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
1004 | struct dma_buf *compbits_scatter_buf, | ||
1005 | u64 compbits_byte_offset, | ||
1006 | u64 scatterbuffer_byte_offset, | ||
1007 | struct nvgpu_channel_fence *fence, | ||
1008 | u32 __flags, struct gk20a_cde_param *params, | ||
1009 | int num_params, struct gk20a_fence **fence_out) | ||
1010 | __acquires(&l->cde_app->mutex) | ||
1011 | __releases(&l->cde_app->mutex) | ||
1012 | { | ||
1013 | struct gk20a *g = &l->g; | ||
1014 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
1015 | struct gk20a_comptags comptags; | ||
1016 | struct nvgpu_os_buffer os_buf = { | ||
1017 | compbits_scatter_buf, | ||
1018 | NULL, | ||
1019 | dev_from_gk20a(g) | ||
1020 | }; | ||
1021 | u64 mapped_compbits_offset = 0; | ||
1022 | u64 compbits_size = 0; | ||
1023 | u64 mapped_scatterbuffer_offset = 0; | ||
1024 | u64 scatterbuffer_size = 0; | ||
1025 | u64 map_vaddr = 0; | ||
1026 | u64 map_offset = 0; | ||
1027 | u64 map_size = 0; | ||
1028 | u8 *surface = NULL; | ||
1029 | u64 big_page_mask = 0; | ||
1030 | u32 flags; | ||
1031 | int err, i; | ||
1032 | const s16 compbits_kind = 0; | ||
1033 | u32 submit_op; | ||
1034 | struct dma_buf_attachment *attachment; | ||
1035 | |||
1036 | nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
1037 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
1038 | |||
1039 | /* scatter buffer must be after compbits buffer */ | ||
1040 | if (scatterbuffer_byte_offset && | ||
1041 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
1042 | return -EINVAL; | ||
1043 | |||
1044 | err = gk20a_busy(g); | ||
1045 | if (err) | ||
1046 | return err; | ||
1047 | |||
1048 | nvgpu_mutex_acquire(&l->cde_app.mutex); | ||
1049 | cde_ctx = gk20a_cde_get_context(l); | ||
1050 | nvgpu_mutex_release(&l->cde_app.mutex); | ||
1051 | if (IS_ERR(cde_ctx)) { | ||
1052 | err = PTR_ERR(cde_ctx); | ||
1053 | goto exit_idle; | ||
1054 | } | ||
1055 | |||
1056 | /* First, map the buffer to local va */ | ||
1057 | |||
1058 | /* ensure that the compbits buffer has drvdata */ | ||
1059 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
1060 | dev_from_gk20a(g)); | ||
1061 | if (err) | ||
1062 | goto exit_idle; | ||
1063 | |||
1064 | /* compbits don't start at page aligned offset, so we need to align | ||
1065 | the region to be mapped */ | ||
1066 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
1067 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
1068 | map_size = compbits_scatter_buf->size - map_offset; | ||
1069 | |||
1070 | |||
1071 | /* compute compbit start offset from the beginning of the mapped | ||
1072 | area */ | ||
1073 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
1074 | if (scatterbuffer_byte_offset) { | ||
1075 | compbits_size = scatterbuffer_byte_offset - | ||
1076 | compbits_byte_offset; | ||
1077 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
1078 | map_offset; | ||
1079 | scatterbuffer_size = compbits_scatter_buf->size - | ||
1080 | scatterbuffer_byte_offset; | ||
1081 | } else { | ||
1082 | compbits_size = compbits_scatter_buf->size - | ||
1083 | compbits_byte_offset; | ||
1084 | } | ||
1085 | |||
1086 | nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
1087 | map_offset, map_size); | ||
1088 | nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
1089 | mapped_compbits_offset, compbits_size); | ||
1090 | nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
1091 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
1092 | |||
1093 | |||
1094 | /* map the destination buffer */ | ||
1095 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ | ||
1096 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, | ||
1097 | NVGPU_VM_MAP_CACHEABLE | | ||
1098 | NVGPU_VM_MAP_DIRECT_KIND_CTRL, | ||
1099 | gk20a_cde_mapping_page_size(cde_ctx->vm, | ||
1100 | map_offset, | ||
1101 | map_size), | ||
1102 | NV_KIND_INVALID, | ||
1103 | compbits_kind, /* incompressible kind */ | ||
1104 | gk20a_mem_flag_none, | ||
1105 | map_offset, map_size, | ||
1106 | NULL, | ||
1107 | &map_vaddr); | ||
1108 | if (err) { | ||
1109 | nvgpu_warn(g, "cde: failed to map compbits scatter buf at %lld size %lld", | ||
1110 | map_offset, map_size); | ||
1111 | dma_buf_put(compbits_scatter_buf); | ||
1112 | err = -EINVAL; | ||
1113 | goto exit_idle; | ||
1114 | } | ||
1115 | |||
1116 | if (scatterbuffer_byte_offset && | ||
1117 | l->ops.cde.need_scatter_buffer && | ||
1118 | l->ops.cde.need_scatter_buffer(g)) { | ||
1119 | struct sg_table *sgt; | ||
1120 | void *scatter_buffer; | ||
1121 | |||
1122 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1123 | if (IS_ERR(surface)) { | ||
1124 | nvgpu_warn(g, | ||
1125 | "dma_buf_vmap failed"); | ||
1126 | err = -EINVAL; | ||
1127 | goto exit_unmap_vaddr; | ||
1128 | } | ||
1129 | |||
1130 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1131 | |||
1132 | nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1133 | surface, scatter_buffer); | ||
1134 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1135 | &attachment); | ||
1136 | if (IS_ERR(sgt)) { | ||
1137 | nvgpu_warn(g, | ||
1138 | "mm_pin failed"); | ||
1139 | err = -EINVAL; | ||
1140 | goto exit_unmap_surface; | ||
1141 | } else { | ||
1142 | err = l->ops.cde.populate_scatter_buffer(g, sgt, | ||
1143 | compbits_byte_offset, scatter_buffer, | ||
1144 | scatterbuffer_size); | ||
1145 | WARN_ON(err); | ||
1146 | |||
1147 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1148 | attachment, sgt); | ||
1149 | if (err) | ||
1150 | goto exit_unmap_surface; | ||
1151 | } | ||
1152 | |||
1153 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1154 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1155 | surface = NULL; | ||
1156 | } | ||
1157 | |||
1158 | /* store source buffer compression tags */ | ||
1159 | gk20a_get_comptags(&os_buf, &comptags); | ||
1160 | cde_ctx->surf_param_offset = comptags.offset; | ||
1161 | cde_ctx->surf_param_lines = comptags.lines; | ||
1162 | |||
1163 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
1164 | compbits live in the same surface, and we can get the alloc base | ||
1165 | address by using gpuva_to_iova_base, this will do */ | ||
1166 | cde_ctx->surf_vaddr = map_vaddr; | ||
1167 | |||
1168 | /* store information about destination */ | ||
1169 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
1170 | cde_ctx->compbit_size = compbits_size; | ||
1171 | |||
1172 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1173 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1174 | |||
1175 | /* remove existing argument data */ | ||
1176 | memset(cde_ctx->user_param_values, 0, | ||
1177 | sizeof(cde_ctx->user_param_values)); | ||
1178 | |||
1179 | /* read user space arguments for the conversion */ | ||
1180 | for (i = 0; i < num_params; i++) { | ||
1181 | struct gk20a_cde_param *param = params + i; | ||
1182 | int id = param->id - NUM_RESERVED_PARAMS; | ||
1183 | |||
1184 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
1185 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
1186 | err = -EINVAL; | ||
1187 | goto exit_unmap_surface; | ||
1188 | } | ||
1189 | cde_ctx->user_param_values[id] = param->value; | ||
1190 | } | ||
1191 | |||
1192 | /* patch data */ | ||
1193 | err = gk20a_cde_patch_params(cde_ctx); | ||
1194 | if (err) { | ||
1195 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
1196 | goto exit_unmap_surface; | ||
1197 | } | ||
1198 | |||
1199 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
1200 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
1201 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
1202 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
1203 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1204 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1205 | |||
1206 | /* take always the postfence as it is needed for protecting the | ||
1207 | * cde context */ | ||
1208 | flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
1209 | |||
1210 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
1211 | gk20a_idle(g); | ||
1212 | |||
1213 | if (comptags.lines == 0) { | ||
1214 | /* | ||
1215 | * Nothing to do on the buffer, but do a null kickoff for | ||
1216 | * managing the pre and post fences. | ||
1217 | */ | ||
1218 | submit_op = TYPE_BUF_COMMAND_NOOP; | ||
1219 | } else if (!cde_ctx->init_cmd_executed) { | ||
1220 | /* | ||
1221 | * First time, so include the init pushbuf too in addition to | ||
1222 | * the conversion code. | ||
1223 | */ | ||
1224 | submit_op = TYPE_BUF_COMMAND_INIT; | ||
1225 | } else { | ||
1226 | /* | ||
1227 | * The usual condition: execute just the conversion. | ||
1228 | */ | ||
1229 | submit_op = TYPE_BUF_COMMAND_CONVERT; | ||
1230 | } | ||
1231 | err = gk20a_cde_execute_buffer(cde_ctx, submit_op, | ||
1232 | fence, flags, fence_out); | ||
1233 | |||
1234 | if (comptags.lines != 0 && !err) | ||
1235 | cde_ctx->init_cmd_executed = true; | ||
1236 | |||
1237 | /* unmap the buffers - channel holds references to them now */ | ||
1238 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1239 | |||
1240 | return err; | ||
1241 | |||
1242 | exit_unmap_surface: | ||
1243 | if (surface) | ||
1244 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1245 | exit_unmap_vaddr: | ||
1246 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1247 | exit_idle: | ||
1248 | gk20a_idle(g); | ||
1249 | return err; | ||
1250 | } | ||
1251 | |||
1252 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
1253 | __acquires(&cde_app->mutex) | ||
1254 | __releases(&cde_app->mutex) | ||
1255 | { | ||
1256 | struct gk20a_cde_ctx *cde_ctx = data; | ||
1257 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1258 | struct gk20a *g = &l->g; | ||
1259 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1260 | bool channel_idle; | ||
1261 | |||
1262 | channel_gk20a_joblist_lock(ch); | ||
1263 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
1264 | channel_gk20a_joblist_unlock(ch); | ||
1265 | |||
1266 | if (!channel_idle) | ||
1267 | return; | ||
1268 | |||
1269 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
1270 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
1271 | if (!cde_ctx->in_use) | ||
1272 | nvgpu_log_info(g, "double finish cde context %p on channel %p", | ||
1273 | cde_ctx, ch); | ||
1274 | |||
1275 | if (gk20a_channel_check_timedout(ch)) { | ||
1276 | if (cde_ctx->is_temporary) { | ||
1277 | nvgpu_warn(g, | ||
1278 | "cde: channel had timed out" | ||
1279 | " (temporary channel)"); | ||
1280 | /* going to be deleted anyway */ | ||
1281 | } else { | ||
1282 | nvgpu_warn(g, | ||
1283 | "cde: channel had timed out" | ||
1284 | ", reloading"); | ||
1285 | /* mark it to be deleted, replace with a new one */ | ||
1286 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1287 | cde_ctx->is_temporary = true; | ||
1288 | if (gk20a_cde_create_context(l)) { | ||
1289 | nvgpu_err(g, "cde: can't replace context"); | ||
1290 | } | ||
1291 | nvgpu_mutex_release(&cde_app->mutex); | ||
1292 | } | ||
1293 | } | ||
1294 | |||
1295 | /* delete temporary contexts later (watch for doubles) */ | ||
1296 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
1297 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
1298 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
1299 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
1300 | } | ||
1301 | |||
1302 | if (!gk20a_channel_check_timedout(ch)) { | ||
1303 | gk20a_cde_ctx_release(cde_ctx); | ||
1304 | } | ||
1305 | } | ||
1306 | |||
1307 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
1308 | { | ||
1309 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1310 | struct gk20a *g = &l->g; | ||
1311 | struct nvgpu_firmware *img; | ||
1312 | struct channel_gk20a *ch; | ||
1313 | struct tsg_gk20a *tsg; | ||
1314 | struct gr_gk20a *gr = &g->gr; | ||
1315 | struct nvgpu_setup_bind_args setup_bind_args; | ||
1316 | int err = 0; | ||
1317 | u64 vaddr; | ||
1318 | |||
1319 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
1320 | if (!img) { | ||
1321 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
1322 | return -ENOSYS; | ||
1323 | } | ||
1324 | |||
1325 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
1326 | if (!tsg) { | ||
1327 | nvgpu_err(g, "cde: could not create TSG"); | ||
1328 | err = -ENOMEM; | ||
1329 | goto err_get_gk20a_channel; | ||
1330 | } | ||
1331 | |||
1332 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
1333 | cde_ctx, | ||
1334 | -1, | ||
1335 | false); | ||
1336 | if (!ch) { | ||
1337 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
1338 | err = -ENOMEM; | ||
1339 | goto err_get_gk20a_channel; | ||
1340 | } | ||
1341 | |||
1342 | ch->timeout.enabled = false; | ||
1343 | |||
1344 | /* bind the channel to the vm */ | ||
1345 | err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch); | ||
1346 | if (err) { | ||
1347 | nvgpu_warn(g, "cde: could not bind vm"); | ||
1348 | goto err_commit_va; | ||
1349 | } | ||
1350 | |||
1351 | err = gk20a_tsg_bind_channel(tsg, ch); | ||
1352 | if (err) { | ||
1353 | nvgpu_err(g, "cde: unable to bind to tsg"); | ||
1354 | goto err_setup_bind; | ||
1355 | } | ||
1356 | |||
1357 | setup_bind_args.num_gpfifo_entries = 1024; | ||
1358 | setup_bind_args.num_inflight_jobs = 0; | ||
1359 | setup_bind_args.flags = 0; | ||
1360 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
1361 | if (err) { | ||
1362 | nvgpu_warn(g, "cde: unable to setup channel"); | ||
1363 | goto err_setup_bind; | ||
1364 | } | ||
1365 | |||
1366 | /* map backing store to gpu virtual space */ | ||
1367 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
1368 | g->gr.compbit_store.mem.size, | ||
1369 | NVGPU_VM_MAP_CACHEABLE, | ||
1370 | gk20a_mem_flag_read_only, | ||
1371 | false, | ||
1372 | gr->compbit_store.mem.aperture); | ||
1373 | |||
1374 | if (!vaddr) { | ||
1375 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
1376 | err = -ENOMEM; | ||
1377 | goto err_map_backingstore; | ||
1378 | } | ||
1379 | |||
1380 | /* store initialisation data */ | ||
1381 | cde_ctx->ch = ch; | ||
1382 | cde_ctx->tsg = tsg; | ||
1383 | cde_ctx->vm = ch->vm; | ||
1384 | cde_ctx->backing_store_vaddr = vaddr; | ||
1385 | |||
1386 | /* initialise the firmware */ | ||
1387 | err = gk20a_init_cde_img(cde_ctx, img); | ||
1388 | if (err) { | ||
1389 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
1390 | goto err_init_cde_img; | ||
1391 | } | ||
1392 | |||
1393 | /* initialisation done */ | ||
1394 | nvgpu_release_firmware(g, img); | ||
1395 | |||
1396 | return 0; | ||
1397 | |||
1398 | err_init_cde_img: | ||
1399 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
1400 | err_map_backingstore: | ||
1401 | err_setup_bind: | ||
1402 | nvgpu_vm_put(ch->vm); | ||
1403 | err_commit_va: | ||
1404 | err_get_gk20a_channel: | ||
1405 | nvgpu_release_firmware(g, img); | ||
1406 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
1407 | return err; | ||
1408 | } | ||
1409 | |||
1410 | int gk20a_cde_reload(struct nvgpu_os_linux *l) | ||
1411 | __acquires(&l->cde_app->mutex) | ||
1412 | __releases(&l->cde_app->mutex) | ||
1413 | { | ||
1414 | struct gk20a *g = &l->g; | ||
1415 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1416 | int err; | ||
1417 | |||
1418 | if (!cde_app->initialised) | ||
1419 | return -ENOSYS; | ||
1420 | |||
1421 | err = gk20a_busy(g); | ||
1422 | if (err) | ||
1423 | return err; | ||
1424 | |||
1425 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1426 | |||
1427 | gk20a_cde_stop(l); | ||
1428 | |||
1429 | err = gk20a_cde_create_contexts(l); | ||
1430 | if (!err) | ||
1431 | cde_app->initialised = true; | ||
1432 | |||
1433 | nvgpu_mutex_release(&cde_app->mutex); | ||
1434 | |||
1435 | gk20a_idle(g); | ||
1436 | return err; | ||
1437 | } | ||
1438 | |||
1439 | int gk20a_init_cde_support(struct nvgpu_os_linux *l) | ||
1440 | __acquires(&cde_app->mutex) | ||
1441 | __releases(&cde_app->mutex) | ||
1442 | { | ||
1443 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1444 | struct gk20a *g = &l->g; | ||
1445 | int err; | ||
1446 | |||
1447 | if (cde_app->initialised) | ||
1448 | return 0; | ||
1449 | |||
1450 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1451 | |||
1452 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
1453 | if (err) | ||
1454 | return err; | ||
1455 | |||
1456 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1457 | |||
1458 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
1459 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
1460 | cde_app->ctx_count = 0; | ||
1461 | cde_app->ctx_count_top = 0; | ||
1462 | cde_app->ctx_usecount = 0; | ||
1463 | |||
1464 | err = gk20a_cde_create_contexts(l); | ||
1465 | if (!err) | ||
1466 | cde_app->initialised = true; | ||
1467 | |||
1468 | nvgpu_mutex_release(&cde_app->mutex); | ||
1469 | nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
1470 | |||
1471 | if (err) | ||
1472 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
1473 | |||
1474 | return err; | ||
1475 | } | ||
1476 | |||
1477 | enum cde_launch_patch_id { | ||
1478 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
1479 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
1480 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
1481 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
1482 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
1483 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
1484 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
1485 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
1486 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
1487 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
1488 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
1489 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
1490 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
1491 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
1492 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
1493 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
1494 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
1495 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
1496 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
1497 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
1498 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
1499 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
1500 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
1501 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
1502 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
1503 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
1504 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
1505 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
1506 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
1507 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
1508 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
1509 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
1510 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
1511 | }; | ||
1512 | |||
1513 | /* maximum number of WRITE_PATCHes in the below function */ | ||
1514 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
1515 | |||
1516 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
1517 | struct nvgpu_os_linux *l, | ||
1518 | struct dma_buf *dmabuf, u32 consumer, | ||
1519 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1520 | u64 scatterbuffer_offset, | ||
1521 | u32 width, u32 height, u32 block_height_log2, | ||
1522 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
1523 | struct gk20a_buffer_state *state) | ||
1524 | { | ||
1525 | struct gk20a *g = &l->g; | ||
1526 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1527 | int param = 0; | ||
1528 | int err = 0; | ||
1529 | struct gk20a_fence *new_fence = NULL; | ||
1530 | const int wgx = 8; | ||
1531 | const int wgy = 8; | ||
1532 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1533 | const int xalign = compbits_per_byte * wgx; | ||
1534 | const int yalign = wgy; | ||
1535 | |||
1536 | /* Compute per launch parameters */ | ||
1537 | const int xtiles = (width + 7) >> 3; | ||
1538 | const int ytiles = (height + 7) >> 3; | ||
1539 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
1540 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
1541 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
1542 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
1543 | const int xblocks = (xtiles + 1) >> 1; | ||
1544 | const int voffset = compbits_voffset - compbits_hoffset; | ||
1545 | |||
1546 | int hprog = -1; | ||
1547 | int vprog = -1; | ||
1548 | |||
1549 | if (l->ops.cde.get_program_numbers) | ||
1550 | l->ops.cde.get_program_numbers(g, block_height_log2, | ||
1551 | l->cde_app.shader_parameter, | ||
1552 | &hprog, &vprog); | ||
1553 | else { | ||
1554 | nvgpu_warn(g, "cde: chip not supported"); | ||
1555 | return -ENOSYS; | ||
1556 | } | ||
1557 | |||
1558 | if (hprog < 0 || vprog < 0) { | ||
1559 | nvgpu_warn(g, "cde: could not determine programs"); | ||
1560 | return -ENOSYS; | ||
1561 | } | ||
1562 | |||
1563 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
1564 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1565 | xtiles, ytiles); | ||
1566 | |||
1567 | nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
1568 | width, height, block_height_log2, | ||
1569 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
1570 | nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1571 | width, height, xtiles, ytiles); | ||
1572 | nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
1573 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
1574 | nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
1575 | hprog, | ||
1576 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
1577 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
1578 | vprog, | ||
1579 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
1580 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1581 | |||
1582 | /* Write parameters */ | ||
1583 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1584 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1585 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
1586 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1587 | block_height_log2); | ||
1588 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1589 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1590 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1591 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1592 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1593 | |||
1594 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
1595 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
1596 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
1597 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
1598 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
1599 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1600 | |||
1601 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
1602 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
1603 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
1604 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
1605 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
1606 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1607 | |||
1608 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
1609 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
1610 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
1611 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
1612 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
1613 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
1614 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
1615 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1616 | |||
1617 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
1618 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1619 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1620 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1621 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1622 | } else { | ||
1623 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1624 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1625 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1626 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1627 | } | ||
1628 | |||
1629 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
1630 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1631 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1632 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1633 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1634 | } else { | ||
1635 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1636 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1637 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1638 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1639 | } | ||
1640 | #undef WRITE_PATCH | ||
1641 | |||
1642 | err = gk20a_cde_convert(l, dmabuf, | ||
1643 | compbits_hoffset, | ||
1644 | scatterbuffer_offset, | ||
1645 | fence_in, submit_flags, | ||
1646 | params, param, &new_fence); | ||
1647 | if (err) | ||
1648 | goto out; | ||
1649 | |||
1650 | /* compbits generated, update state & fence */ | ||
1651 | gk20a_fence_put(state->fence); | ||
1652 | state->fence = new_fence; | ||
1653 | state->valid_compbits |= consumer & | ||
1654 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1655 | out: | ||
1656 | return err; | ||
1657 | } | ||
1658 | |||
1659 | static int gk20a_buffer_convert_gpu_to_cde( | ||
1660 | struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, | ||
1661 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1662 | u64 scatterbuffer_offset, | ||
1663 | u32 width, u32 height, u32 block_height_log2, | ||
1664 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
1665 | struct gk20a_buffer_state *state) | ||
1666 | { | ||
1667 | struct gk20a *g = &l->g; | ||
1668 | int err = 0; | ||
1669 | |||
1670 | if (!l->cde_app.initialised) | ||
1671 | return -ENOSYS; | ||
1672 | |||
1673 | nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n", | ||
1674 | l->cde_app.firmware_version); | ||
1675 | |||
1676 | if (l->cde_app.firmware_version == 1) { | ||
1677 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
1678 | l, dmabuf, consumer, offset, compbits_hoffset, | ||
1679 | compbits_voffset, scatterbuffer_offset, | ||
1680 | width, height, block_height_log2, | ||
1681 | submit_flags, fence_in, state); | ||
1682 | } else { | ||
1683 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
1684 | l->cde_app.firmware_version); | ||
1685 | err = -EINVAL; | ||
1686 | } | ||
1687 | |||
1688 | return err; | ||
1689 | } | ||
1690 | |||
1691 | int gk20a_prepare_compressible_read( | ||
1692 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
1693 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1694 | u64 scatterbuffer_offset, | ||
1695 | u32 width, u32 height, u32 block_height_log2, | ||
1696 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
1697 | u32 *valid_compbits, u32 *zbc_color, | ||
1698 | struct gk20a_fence **fence_out) | ||
1699 | { | ||
1700 | struct gk20a *g = &l->g; | ||
1701 | int err = 0; | ||
1702 | struct gk20a_buffer_state *state; | ||
1703 | struct dma_buf *dmabuf; | ||
1704 | u32 missing_bits; | ||
1705 | |||
1706 | dmabuf = dma_buf_get(buffer_fd); | ||
1707 | if (IS_ERR(dmabuf)) | ||
1708 | return -EINVAL; | ||
1709 | |||
1710 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1711 | if (err) { | ||
1712 | dma_buf_put(dmabuf); | ||
1713 | return err; | ||
1714 | } | ||
1715 | |||
1716 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1717 | |||
1718 | nvgpu_mutex_acquire(&state->lock); | ||
1719 | |||
1720 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
1721 | |||
1722 | gk20a_fence_put(state->fence); | ||
1723 | state->fence = NULL; | ||
1724 | /* state->fence = decompress(); | ||
1725 | state->valid_compbits = 0; */ | ||
1726 | err = -EINVAL; | ||
1727 | goto out; | ||
1728 | } else if (missing_bits) { | ||
1729 | u32 missing_cde_bits = missing_bits & | ||
1730 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1731 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
1732 | missing_cde_bits) { | ||
1733 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1734 | l, dmabuf, | ||
1735 | missing_cde_bits, | ||
1736 | offset, compbits_hoffset, | ||
1737 | compbits_voffset, scatterbuffer_offset, | ||
1738 | width, height, block_height_log2, | ||
1739 | submit_flags, fence, | ||
1740 | state); | ||
1741 | if (err) | ||
1742 | goto out; | ||
1743 | } | ||
1744 | } | ||
1745 | |||
1746 | if (state->fence && fence_out) | ||
1747 | *fence_out = gk20a_fence_get(state->fence); | ||
1748 | |||
1749 | if (valid_compbits) | ||
1750 | *valid_compbits = state->valid_compbits; | ||
1751 | |||
1752 | if (zbc_color) | ||
1753 | *zbc_color = state->zbc_color; | ||
1754 | |||
1755 | out: | ||
1756 | nvgpu_mutex_release(&state->lock); | ||
1757 | dma_buf_put(dmabuf); | ||
1758 | return err; | ||
1759 | } | ||
1760 | |||
1761 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1762 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
1763 | { | ||
1764 | int err; | ||
1765 | struct gk20a_buffer_state *state; | ||
1766 | struct dma_buf *dmabuf; | ||
1767 | |||
1768 | dmabuf = dma_buf_get(buffer_fd); | ||
1769 | if (IS_ERR(dmabuf)) { | ||
1770 | nvgpu_err(g, "invalid dmabuf"); | ||
1771 | return -EINVAL; | ||
1772 | } | ||
1773 | |||
1774 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1775 | if (err) { | ||
1776 | nvgpu_err(g, "could not get state from dmabuf"); | ||
1777 | dma_buf_put(dmabuf); | ||
1778 | return err; | ||
1779 | } | ||
1780 | |||
1781 | nvgpu_mutex_acquire(&state->lock); | ||
1782 | |||
1783 | /* Update the compbits state. */ | ||
1784 | state->valid_compbits = valid_compbits; | ||
1785 | state->zbc_color = zbc_color; | ||
1786 | |||
1787 | /* Discard previous compbit job fence. */ | ||
1788 | gk20a_fence_put(state->fence); | ||
1789 | state->fence = NULL; | ||
1790 | |||
1791 | nvgpu_mutex_release(&state->lock); | ||
1792 | dma_buf_put(dmabuf); | ||
1793 | return 0; | ||
1794 | } | ||
diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h new file mode 100644 index 0000000..5928b62 --- /dev/null +++ b/include/os/linux/cde.h | |||
@@ -0,0 +1,326 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #include <nvgpu/nvgpu_mem.h> | ||
23 | #include <nvgpu/list.h> | ||
24 | #include <nvgpu/lock.h> | ||
25 | |||
26 | #include <linux/kobject.h> | ||
27 | #include <linux/workqueue.h> | ||
28 | |||
29 | #define MAX_CDE_BUFS 10 | ||
30 | #define MAX_CDE_PARAMS 64 | ||
31 | #define MAX_CDE_USER_PARAMS 40 | ||
32 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
33 | |||
34 | /* | ||
35 | * The size of the context ring buffer that is dedicated for handling cde | ||
36 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
37 | * wait on the previous job to that channel, so increasing this value | ||
38 | * reduces the likelihood of stalls. | ||
39 | */ | ||
40 | #define NUM_CDE_CONTEXTS 4 | ||
41 | |||
42 | struct dma_buf; | ||
43 | struct device; | ||
44 | struct nvgpu_os_linux; | ||
45 | struct gk20a; | ||
46 | struct gk20a_fence; | ||
47 | struct nvgpu_channel_fence; | ||
48 | struct channel_gk20a; | ||
49 | struct vm_gk20a; | ||
50 | struct nvgpu_gpfifo_entry; | ||
51 | |||
52 | /* | ||
53 | * this element defines a buffer that is allocated and mapped into gpu address | ||
54 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
55 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
56 | * | ||
57 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
58 | */ | ||
59 | |||
60 | struct gk20a_cde_hdr_buf { | ||
61 | u64 data_byte_offset; | ||
62 | u64 num_bytes; | ||
63 | }; | ||
64 | |||
65 | /* | ||
66 | * this element defines a constant patching in buffers. It basically | ||
67 | * computes physical address to <source_buf>+source_byte_offset. The | ||
68 | * address is then modified into patch value as per: | ||
69 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
70 | * | ||
71 | * The type field defines the register size as: | ||
72 | * 0=u32, | ||
73 | * 1=u64 (little endian), | ||
74 | * 2=u64 (big endian) | ||
75 | */ | ||
76 | |||
77 | struct gk20a_cde_hdr_replace { | ||
78 | u32 target_buf; | ||
79 | u32 source_buf; | ||
80 | s32 shift; | ||
81 | u32 type; | ||
82 | u64 target_byte_offset; | ||
83 | u64 source_byte_offset; | ||
84 | u64 mask; | ||
85 | }; | ||
86 | |||
87 | enum { | ||
88 | TYPE_PARAM_TYPE_U32 = 0, | ||
89 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
90 | TYPE_PARAM_TYPE_U64_BIG | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * this element defines a runtime patching in buffers. Parameters with id from | ||
95 | * 0 to 1024 are reserved for special usage as follows: | ||
96 | * 0 = comptags_per_cacheline, | ||
97 | * 1 = slices_per_fbp, | ||
98 | * 2 = num_fbps | ||
99 | * 3 = source buffer first page offset | ||
100 | * 4 = source buffer block height log2 | ||
101 | * 5 = backing store memory address | ||
102 | * 6 = destination memory address | ||
103 | * 7 = destination size (bytes) | ||
104 | * 8 = backing store size (bytes) | ||
105 | * 9 = cache line size | ||
106 | * | ||
107 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
108 | * parameters from user space should be placed in buffers, what is their | ||
109 | * type, etc. | ||
110 | * | ||
111 | * Once the value is available, we add data_offset to the value. | ||
112 | * | ||
113 | * The value address is then modified into patch value as per: | ||
114 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
115 | * | ||
116 | * The type field defines the register size as: | ||
117 | * 0=u32, | ||
118 | * 1=u64 (little endian), | ||
119 | * 2=u64 (big endian) | ||
120 | */ | ||
121 | |||
122 | struct gk20a_cde_hdr_param { | ||
123 | u32 id; | ||
124 | u32 target_buf; | ||
125 | s32 shift; | ||
126 | u32 type; | ||
127 | s64 data_offset; | ||
128 | u64 target_byte_offset; | ||
129 | u64 mask; | ||
130 | }; | ||
131 | |||
132 | enum { | ||
133 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
134 | TYPE_PARAM_GPU_CONFIGURATION, | ||
135 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
136 | TYPE_PARAM_NUMPAGES, | ||
137 | TYPE_PARAM_BACKINGSTORE, | ||
138 | TYPE_PARAM_DESTINATION, | ||
139 | TYPE_PARAM_DESTINATION_SIZE, | ||
140 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
141 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
142 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
143 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
144 | TYPE_PARAM_SCATTERBUFFER, | ||
145 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
146 | NUM_RESERVED_PARAMS = 1024, | ||
147 | }; | ||
148 | |||
149 | /* | ||
150 | * This header element defines a command. The op field determines whether the | ||
151 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
152 | * denotes the beginning address of command elements in the file. | ||
153 | */ | ||
154 | |||
155 | struct gk20a_cde_hdr_command { | ||
156 | u32 op; | ||
157 | u32 num_entries; | ||
158 | u64 data_byte_offset; | ||
159 | }; | ||
160 | |||
161 | enum { | ||
162 | TYPE_BUF_COMMAND_INIT = 0, | ||
163 | TYPE_BUF_COMMAND_CONVERT, | ||
164 | TYPE_BUF_COMMAND_NOOP | ||
165 | }; | ||
166 | |||
167 | /* | ||
168 | * This is a command element defines one entry inside push buffer. target_buf | ||
169 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
170 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
171 | */ | ||
172 | |||
173 | struct gk20a_cde_cmd_elem { | ||
174 | u32 target_buf; | ||
175 | u32 padding; | ||
176 | u64 target_byte_offset; | ||
177 | u64 num_bytes; | ||
178 | }; | ||
179 | |||
180 | /* | ||
181 | * This element is used for storing a small array of data. | ||
182 | */ | ||
183 | |||
184 | enum { | ||
185 | ARRAY_PROGRAM_OFFSET = 0, | ||
186 | ARRAY_REGISTER_COUNT, | ||
187 | ARRAY_LAUNCH_COMMAND, | ||
188 | NUM_CDE_ARRAYS | ||
189 | }; | ||
190 | |||
191 | struct gk20a_cde_hdr_array { | ||
192 | u32 id; | ||
193 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
194 | }; | ||
195 | |||
196 | /* | ||
197 | * Following defines a single header element. Each element has a type and | ||
198 | * some of the data structures. | ||
199 | */ | ||
200 | |||
201 | struct gk20a_cde_hdr_elem { | ||
202 | u32 type; | ||
203 | u32 padding; | ||
204 | union { | ||
205 | struct gk20a_cde_hdr_buf buf; | ||
206 | struct gk20a_cde_hdr_replace replace; | ||
207 | struct gk20a_cde_hdr_param param; | ||
208 | u32 required_class; | ||
209 | struct gk20a_cde_hdr_command command; | ||
210 | struct gk20a_cde_hdr_array array; | ||
211 | }; | ||
212 | }; | ||
213 | |||
214 | enum { | ||
215 | TYPE_BUF = 0, | ||
216 | TYPE_REPLACE, | ||
217 | TYPE_PARAM, | ||
218 | TYPE_REQUIRED_CLASS, | ||
219 | TYPE_COMMAND, | ||
220 | TYPE_ARRAY | ||
221 | }; | ||
222 | |||
223 | struct gk20a_cde_param { | ||
224 | u32 id; | ||
225 | u32 padding; | ||
226 | u64 value; | ||
227 | }; | ||
228 | |||
229 | struct gk20a_cde_ctx { | ||
230 | struct nvgpu_os_linux *l; | ||
231 | struct device *dev; | ||
232 | |||
233 | /* channel related data */ | ||
234 | struct channel_gk20a *ch; | ||
235 | struct tsg_gk20a *tsg; | ||
236 | struct vm_gk20a *vm; | ||
237 | |||
238 | /* buf converter configuration */ | ||
239 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
240 | unsigned int num_bufs; | ||
241 | |||
242 | /* buffer patching params (where should patching be done) */ | ||
243 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
244 | unsigned int num_params; | ||
245 | |||
246 | /* storage for user space parameter values */ | ||
247 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
248 | |||
249 | u32 surf_param_offset; | ||
250 | u32 surf_param_lines; | ||
251 | u64 surf_vaddr; | ||
252 | |||
253 | u64 compbit_vaddr; | ||
254 | u64 compbit_size; | ||
255 | |||
256 | u64 scatterbuffer_vaddr; | ||
257 | u64 scatterbuffer_size; | ||
258 | |||
259 | u64 backing_store_vaddr; | ||
260 | |||
261 | struct nvgpu_gpfifo_entry *init_convert_cmd; | ||
262 | int init_cmd_num_entries; | ||
263 | |||
264 | struct nvgpu_gpfifo_entry *convert_cmd; | ||
265 | int convert_cmd_num_entries; | ||
266 | |||
267 | struct kobj_attribute attr; | ||
268 | |||
269 | bool init_cmd_executed; | ||
270 | |||
271 | struct nvgpu_list_node list; | ||
272 | bool is_temporary; | ||
273 | bool in_use; | ||
274 | struct delayed_work ctx_deleter_work; | ||
275 | }; | ||
276 | |||
277 | static inline struct gk20a_cde_ctx * | ||
278 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
279 | { | ||
280 | return (struct gk20a_cde_ctx *) | ||
281 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
282 | }; | ||
283 | |||
284 | struct gk20a_cde_app { | ||
285 | bool initialised; | ||
286 | struct nvgpu_mutex mutex; | ||
287 | |||
288 | struct nvgpu_list_node free_contexts; | ||
289 | struct nvgpu_list_node used_contexts; | ||
290 | unsigned int ctx_count; | ||
291 | unsigned int ctx_usecount; | ||
292 | unsigned int ctx_count_top; | ||
293 | |||
294 | u32 firmware_version; | ||
295 | |||
296 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
297 | |||
298 | u32 shader_parameter; | ||
299 | }; | ||
300 | |||
301 | void gk20a_cde_destroy(struct nvgpu_os_linux *l); | ||
302 | void gk20a_cde_suspend(struct nvgpu_os_linux *l); | ||
303 | int gk20a_init_cde_support(struct nvgpu_os_linux *l); | ||
304 | int gk20a_cde_reload(struct nvgpu_os_linux *l); | ||
305 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
306 | struct dma_buf *compbits_buf, | ||
307 | u64 compbits_byte_offset, | ||
308 | u64 scatterbuffer_byte_offset, | ||
309 | struct nvgpu_channel_fence *fence, | ||
310 | u32 __flags, struct gk20a_cde_param *params, | ||
311 | int num_params, struct gk20a_fence **fence_out); | ||
312 | |||
313 | int gk20a_prepare_compressible_read( | ||
314 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
315 | u64 compbits_hoffset, u64 compbits_voffset, | ||
316 | u64 scatterbuffer_offset, | ||
317 | u32 width, u32 height, u32 block_height_log2, | ||
318 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
319 | u32 *valid_compbits, u32 *zbc_color, | ||
320 | struct gk20a_fence **fence_out); | ||
321 | int gk20a_mark_compressible_write( | ||
322 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
323 | u32 zbc_color); | ||
324 | int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); | ||
325 | |||
326 | #endif | ||
diff --git a/include/os/linux/cde_gm20b.c b/include/os/linux/cde_gm20b.c new file mode 100644 index 0000000..a9a4754 --- /dev/null +++ b/include/os/linux/cde_gm20b.c | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * GM20B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/gk20a.h> | ||
26 | |||
27 | #include "cde_gm20b.h" | ||
28 | |||
29 | enum programs { | ||
30 | PROG_HPASS = 0, | ||
31 | PROG_VPASS_LARGE = 1, | ||
32 | PROG_VPASS_SMALL = 2, | ||
33 | PROG_HPASS_DEBUG = 3, | ||
34 | PROG_VPASS_LARGE_DEBUG = 4, | ||
35 | PROG_VPASS_SMALL_DEBUG = 5, | ||
36 | PROG_PASSTHROUGH = 6, | ||
37 | }; | ||
38 | |||
39 | void gm20b_cde_get_program_numbers(struct gk20a *g, | ||
40 | u32 block_height_log2, | ||
41 | u32 shader_parameter, | ||
42 | int *hprog_out, int *vprog_out) | ||
43 | { | ||
44 | int hprog = PROG_HPASS; | ||
45 | int vprog = (block_height_log2 >= 2) ? | ||
46 | PROG_VPASS_LARGE : PROG_VPASS_SMALL; | ||
47 | if (shader_parameter == 1) { | ||
48 | hprog = PROG_PASSTHROUGH; | ||
49 | vprog = PROG_PASSTHROUGH; | ||
50 | } else if (shader_parameter == 2) { | ||
51 | hprog = PROG_HPASS_DEBUG; | ||
52 | vprog = (block_height_log2 >= 2) ? | ||
53 | PROG_VPASS_LARGE_DEBUG : | ||
54 | PROG_VPASS_SMALL_DEBUG; | ||
55 | } | ||
56 | |||
57 | *hprog_out = hprog; | ||
58 | *vprog_out = vprog; | ||
59 | } | ||
diff --git a/include/os/linux/cde_gm20b.h b/include/os/linux/cde_gm20b.h new file mode 100644 index 0000000..fac8aaf --- /dev/null +++ b/include/os/linux/cde_gm20b.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * GM20B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_CDE | ||
26 | #define _NVHOST_GM20B_CDE | ||
27 | |||
28 | void gm20b_cde_get_program_numbers(struct gk20a *g, | ||
29 | u32 block_height_log2, | ||
30 | u32 shader_parameter, | ||
31 | int *hprog_out, int *vprog_out); | ||
32 | |||
33 | #endif | ||
diff --git a/include/os/linux/cde_gp10b.c b/include/os/linux/cde_gp10b.c new file mode 100644 index 0000000..6356d33 --- /dev/null +++ b/include/os/linux/cde_gp10b.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * GP10B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/log.h> | ||
26 | #include <nvgpu/dma.h> | ||
27 | #include <nvgpu/gk20a.h> | ||
28 | |||
29 | #include "cde_gp10b.h" | ||
30 | |||
31 | enum gp10b_programs { | ||
32 | GP10B_PROG_HPASS = 0, | ||
33 | GP10B_PROG_HPASS_4K = 1, | ||
34 | GP10B_PROG_VPASS = 2, | ||
35 | GP10B_PROG_VPASS_4K = 3, | ||
36 | GP10B_PROG_HPASS_DEBUG = 4, | ||
37 | GP10B_PROG_HPASS_4K_DEBUG = 5, | ||
38 | GP10B_PROG_VPASS_DEBUG = 6, | ||
39 | GP10B_PROG_VPASS_4K_DEBUG = 7, | ||
40 | GP10B_PROG_PASSTHROUGH = 8, | ||
41 | }; | ||
42 | |||
43 | void gp10b_cde_get_program_numbers(struct gk20a *g, | ||
44 | u32 block_height_log2, | ||
45 | u32 shader_parameter, | ||
46 | int *hprog_out, int *vprog_out) | ||
47 | { | ||
48 | int hprog, vprog; | ||
49 | |||
50 | if (shader_parameter == 1) { | ||
51 | hprog = GP10B_PROG_PASSTHROUGH; | ||
52 | vprog = GP10B_PROG_PASSTHROUGH; | ||
53 | } else { | ||
54 | hprog = GP10B_PROG_HPASS; | ||
55 | vprog = GP10B_PROG_VPASS; | ||
56 | if (shader_parameter == 2) { | ||
57 | hprog = GP10B_PROG_HPASS_DEBUG; | ||
58 | vprog = GP10B_PROG_VPASS_DEBUG; | ||
59 | } | ||
60 | if (!nvgpu_iommuable(g)) { | ||
61 | if (!g->mm.disable_bigpage) { | ||
62 | nvgpu_warn(g, | ||
63 | "When no IOMMU big pages cannot be used"); | ||
64 | } | ||
65 | hprog |= 1; | ||
66 | vprog |= 1; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | *hprog_out = hprog; | ||
71 | *vprog_out = vprog; | ||
72 | } | ||
73 | |||
74 | bool gp10b_need_scatter_buffer(struct gk20a *g) | ||
75 | { | ||
76 | return !nvgpu_iommuable(g); | ||
77 | } | ||
78 | |||
79 | static u8 parity(u32 a) | ||
80 | { | ||
81 | a ^= a>>16u; | ||
82 | a ^= a>>8u; | ||
83 | a ^= a>>4u; | ||
84 | a &= 0xfu; | ||
85 | return (0x6996u >> a) & 1u; | ||
86 | } | ||
87 | |||
88 | int gp10b_populate_scatter_buffer(struct gk20a *g, | ||
89 | struct sg_table *sgt, | ||
90 | size_t surface_size, | ||
91 | void *scatter_buffer_ptr, | ||
92 | size_t scatter_buffer_size) | ||
93 | { | ||
94 | /* map scatter buffer to CPU VA and fill it */ | ||
95 | const u32 page_size_log2 = 12; | ||
96 | const u32 page_size = 1 << page_size_log2; | ||
97 | const u32 page_size_shift = page_size_log2 - 7u; | ||
98 | |||
99 | /* 0011 1111 1111 1111 1111 1110 0100 1000 */ | ||
100 | const u32 getSliceMaskGP10B = 0x3ffffe48; | ||
101 | u8 *scatter_buffer = scatter_buffer_ptr; | ||
102 | |||
103 | size_t i; | ||
104 | struct scatterlist *sg = NULL; | ||
105 | u8 d = 0; | ||
106 | size_t page = 0; | ||
107 | size_t pages_left; | ||
108 | |||
109 | surface_size = round_up(surface_size, page_size); | ||
110 | |||
111 | pages_left = surface_size >> page_size_log2; | ||
112 | if ((pages_left >> 3) > scatter_buffer_size) | ||
113 | return -ENOMEM; | ||
114 | |||
115 | for_each_sg(sgt->sgl, sg, sgt->nents, i) { | ||
116 | unsigned int j; | ||
117 | u64 surf_pa = sg_phys(sg); | ||
118 | unsigned int n = (int)(sg->length >> page_size_log2); | ||
119 | |||
120 | nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n); | ||
121 | |||
122 | for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) { | ||
123 | u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift; | ||
124 | u8 scatter_bit = parity(addr); | ||
125 | u8 bit = page & 7; | ||
126 | |||
127 | d |= scatter_bit << bit; | ||
128 | if (bit == 7) { | ||
129 | scatter_buffer[page >> 3] = d; | ||
130 | d = 0; | ||
131 | } | ||
132 | |||
133 | ++page; | ||
134 | --pages_left; | ||
135 | } | ||
136 | |||
137 | if (pages_left == 0) | ||
138 | break; | ||
139 | } | ||
140 | |||
141 | /* write the last byte in case the number of pages is not divisible by 8 */ | ||
142 | if ((page & 7) != 0) | ||
143 | scatter_buffer[page >> 3] = d; | ||
144 | |||
145 | if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) { | ||
146 | nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:"); | ||
147 | for (i = 0; i < page >> 3; i++) { | ||
148 | nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | return 0; | ||
153 | } | ||
diff --git a/include/os/linux/cde_gp10b.h b/include/os/linux/cde_gp10b.h new file mode 100644 index 0000000..3ecca2a --- /dev/null +++ b/include/os/linux/cde_gp10b.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * GP10B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GP10B_CDE | ||
26 | #define _NVHOST_GP10B_CDE | ||
27 | |||
28 | #include "os_linux.h" | ||
29 | |||
30 | void gp10b_cde_get_program_numbers(struct gk20a *g, | ||
31 | u32 block_height_log2, | ||
32 | u32 shader_parameter, | ||
33 | int *hprog_out, int *vprog_out); | ||
34 | bool gp10b_need_scatter_buffer(struct gk20a *g); | ||
35 | int gp10b_populate_scatter_buffer(struct gk20a *g, | ||
36 | struct sg_table *sgt, | ||
37 | size_t surface_size, | ||
38 | void *scatter_buffer_ptr, | ||
39 | size_t scatter_buffer_size); | ||
40 | #endif | ||
diff --git a/include/os/linux/channel.h b/include/os/linux/channel.h new file mode 100644 index 0000000..e6326fa --- /dev/null +++ b/include/os/linux/channel.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef NVGPU_LINUX_CHANNEL_H | ||
17 | #define NVGPU_LINUX_CHANNEL_H | ||
18 | |||
19 | #include <linux/workqueue.h> | ||
20 | #include <linux/dma-buf.h> | ||
21 | |||
22 | #include <nvgpu/types.h> | ||
23 | |||
24 | struct channel_gk20a; | ||
25 | struct nvgpu_gpfifo; | ||
26 | struct nvgpu_submit_gpfifo_args; | ||
27 | struct nvgpu_channel_fence; | ||
28 | struct gk20a_fence; | ||
29 | struct fifo_profile_gk20a; | ||
30 | struct nvgpu_os_linux; | ||
31 | |||
32 | struct sync_fence; | ||
33 | struct sync_timeline; | ||
34 | |||
35 | struct nvgpu_channel_completion_cb { | ||
36 | /* | ||
37 | * Signal channel owner via a callback, if set, in job cleanup with | ||
38 | * schedule_work. Means that something finished on the channel (perhaps | ||
39 | * more than one job). | ||
40 | */ | ||
41 | void (*fn)(struct channel_gk20a *, void *); | ||
42 | void *user_data; | ||
43 | /* Make access to the two above atomic */ | ||
44 | struct nvgpu_spinlock lock; | ||
45 | /* Per-channel async work task, cannot reschedule itself */ | ||
46 | struct work_struct work; | ||
47 | }; | ||
48 | |||
49 | struct nvgpu_error_notifier { | ||
50 | struct dma_buf *dmabuf; | ||
51 | void *vaddr; | ||
52 | |||
53 | struct nvgpu_notification *notification; | ||
54 | |||
55 | struct nvgpu_mutex mutex; | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * This struct contains fence_related data. | ||
60 | * e.g. sync_timeline for sync_fences. | ||
61 | */ | ||
62 | struct nvgpu_os_fence_framework { | ||
63 | struct sync_timeline *timeline; | ||
64 | }; | ||
65 | |||
66 | struct nvgpu_usermode_bufs_linux { | ||
67 | /* | ||
68 | * Common low level info of these is stored in nvgpu_mems in | ||
69 | * channel_gk20a; these hold lifetimes for the actual dmabuf and its | ||
70 | * dma mapping. | ||
71 | */ | ||
72 | struct nvgpu_usermode_buf_linux { | ||
73 | struct dma_buf *dmabuf; | ||
74 | struct dma_buf_attachment *attachment; | ||
75 | struct sg_table *sgt; | ||
76 | } gpfifo, userd; | ||
77 | }; | ||
78 | |||
79 | struct nvgpu_channel_linux { | ||
80 | struct channel_gk20a *ch; | ||
81 | |||
82 | struct nvgpu_os_fence_framework fence_framework; | ||
83 | |||
84 | struct nvgpu_channel_completion_cb completion_cb; | ||
85 | struct nvgpu_error_notifier error_notifier; | ||
86 | |||
87 | struct dma_buf *cyclestate_buffer_handler; | ||
88 | |||
89 | struct nvgpu_usermode_bufs_linux usermode; | ||
90 | }; | ||
91 | |||
92 | u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags); | ||
93 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); | ||
94 | void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l); | ||
95 | |||
96 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
97 | void (*update_fn)(struct channel_gk20a *, void *), | ||
98 | void *update_fn_data, | ||
99 | int runlist_id, | ||
100 | bool is_privileged_channel); | ||
101 | |||
102 | #endif | ||
diff --git a/include/os/linux/clk.c b/include/os/linux/clk.c new file mode 100644 index 0000000..e9796ea --- /dev/null +++ b/include/os/linux/clk.c | |||
@@ -0,0 +1,286 @@ | |||
1 | /* | ||
2 | * Linux clock support | ||
3 | * | ||
4 | * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/clk.h> | ||
20 | |||
21 | #include <soc/tegra/tegra-dvfs.h> | ||
22 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
23 | |||
24 | #include "clk.h" | ||
25 | #include "os_linux.h" | ||
26 | #include "platform_gk20a.h" | ||
27 | |||
28 | #include <nvgpu/gk20a.h> | ||
29 | #include <nvgpu/clk_arb.h> | ||
30 | |||
31 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
32 | |||
33 | static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) | ||
34 | { | ||
35 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
36 | unsigned long ret; | ||
37 | |||
38 | switch (api_domain) { | ||
39 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
40 | if (g->clk.tegra_clk) | ||
41 | ret = clk_get_rate(g->clk.tegra_clk); | ||
42 | else | ||
43 | ret = clk_get_rate(platform->clk[0]); | ||
44 | break; | ||
45 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
46 | ret = clk_get_rate(platform->clk[1]); | ||
47 | break; | ||
48 | default: | ||
49 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
50 | ret = 0; | ||
51 | break; | ||
52 | } | ||
53 | |||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | static int nvgpu_linux_clk_set_rate(struct gk20a *g, | ||
58 | u32 api_domain, unsigned long rate) | ||
59 | { | ||
60 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
61 | int ret; | ||
62 | |||
63 | switch (api_domain) { | ||
64 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
65 | if (g->clk.tegra_clk) | ||
66 | ret = clk_set_rate(g->clk.tegra_clk, rate); | ||
67 | else | ||
68 | ret = clk_set_rate(platform->clk[0], rate); | ||
69 | break; | ||
70 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
71 | ret = clk_set_rate(platform->clk[1], rate); | ||
72 | break; | ||
73 | default: | ||
74 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
75 | ret = -EINVAL; | ||
76 | break; | ||
77 | } | ||
78 | |||
79 | return ret; | ||
80 | } | ||
81 | |||
82 | static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g) | ||
83 | { | ||
84 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
85 | |||
86 | /* | ||
87 | * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed | ||
88 | * to frequency governor is a shared user on the gbus. The latter can be | ||
89 | * accessed as GPU clock parent, and incorporate DVFS related data. | ||
90 | */ | ||
91 | if (g->clk.tegra_clk) | ||
92 | return tegra_dvfs_get_fmax_at_vmin_safe_t( | ||
93 | g->clk.tegra_clk_parent); | ||
94 | |||
95 | if (platform->maxmin_clk_id) | ||
96 | return tegra_bpmp_dvfs_get_fmax_at_vmin( | ||
97 | platform->maxmin_clk_id); | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g) | ||
103 | { | ||
104 | struct clk *c; | ||
105 | |||
106 | c = clk_get_sys("gpu_ref", "gpu_ref"); | ||
107 | if (IS_ERR(c)) { | ||
108 | nvgpu_err(g, "failed to get GPCPLL reference clock"); | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | return clk_get_rate(c); | ||
113 | } | ||
114 | |||
115 | static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk, | ||
116 | unsigned long rate) | ||
117 | { | ||
118 | return tegra_dvfs_predict_mv_at_hz_cur_tfloor( | ||
119 | clk->tegra_clk_parent, rate); | ||
120 | } | ||
121 | |||
122 | static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain) | ||
123 | { | ||
124 | int ret; | ||
125 | u16 min_mhz, max_mhz; | ||
126 | |||
127 | switch (api_domain) { | ||
128 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
129 | ret = tegra_dvfs_get_maxrate(g->clk.tegra_clk_parent); | ||
130 | /* If dvfs not supported */ | ||
131 | if (ret == 0) { | ||
132 | int err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
133 | NVGPU_CLK_DOMAIN_GPCCLK, | ||
134 | &min_mhz, &max_mhz); | ||
135 | if (err == 0) { | ||
136 | ret = max_mhz * 1000000L; | ||
137 | } | ||
138 | } | ||
139 | break; | ||
140 | default: | ||
141 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
142 | ret = 0; | ||
143 | break; | ||
144 | } | ||
145 | |||
146 | return ret; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * This API is used to return a list of supported frequencies by igpu. | ||
151 | * Set *num_points as 0 to get the size of the freqs list, returned | ||
152 | * by *num_points itself. freqs array must be provided by caller. | ||
153 | * If *num_points is non-zero, then freqs array size must atleast | ||
154 | * equal *num_points. | ||
155 | */ | ||
156 | static int nvgpu_linux_clk_get_f_points(struct gk20a *g, | ||
157 | u32 api_domain, u32 *num_points, u16 *freqs) | ||
158 | { | ||
159 | struct device *dev = dev_from_gk20a(g); | ||
160 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
161 | unsigned long *gpu_freq_table; | ||
162 | int ret = 0; | ||
163 | int num_supported_freq = 0; | ||
164 | u32 i; | ||
165 | |||
166 | switch (api_domain) { | ||
167 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
168 | ret = platform->get_clk_freqs(dev, &gpu_freq_table, | ||
169 | &num_supported_freq); | ||
170 | |||
171 | if (ret) { | ||
172 | return ret; | ||
173 | } | ||
174 | |||
175 | if (num_points == NULL) { | ||
176 | return -EINVAL; | ||
177 | } | ||
178 | |||
179 | if (*num_points != 0U) { | ||
180 | if (freqs == NULL || (*num_points > (u32)num_supported_freq)) { | ||
181 | return -EINVAL; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | if (*num_points == 0) { | ||
186 | *num_points = num_supported_freq; | ||
187 | } else { | ||
188 | for (i = 0; i < *num_points; i++) { | ||
189 | freqs[i] = HZ_TO_MHZ(gpu_freq_table[i]); | ||
190 | } | ||
191 | } | ||
192 | break; | ||
193 | default: | ||
194 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
195 | ret = -EINVAL; | ||
196 | break; | ||
197 | } | ||
198 | |||
199 | return ret; | ||
200 | } | ||
201 | |||
202 | static int nvgpu_clk_get_range(struct gk20a *g, u32 api_domain, | ||
203 | u16 *min_mhz, u16 *max_mhz) | ||
204 | { | ||
205 | struct device *dev = dev_from_gk20a(g); | ||
206 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
207 | unsigned long *freqs; | ||
208 | int num_freqs; | ||
209 | int ret; | ||
210 | |||
211 | switch (api_domain) { | ||
212 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
213 | ret = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
214 | |||
215 | if (!ret) { | ||
216 | *min_mhz = HZ_TO_MHZ(freqs[0]); | ||
217 | *max_mhz = HZ_TO_MHZ(freqs[num_freqs - 1]); | ||
218 | } | ||
219 | break; | ||
220 | default: | ||
221 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
222 | ret = -EINVAL; | ||
223 | break; | ||
224 | } | ||
225 | |||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | /* rate_target should be passed in as Hz | ||
230 | rounded_rate is returned in Hz */ | ||
231 | static int nvgpu_clk_get_round_rate(struct gk20a *g, | ||
232 | u32 api_domain, unsigned long rate_target, | ||
233 | unsigned long *rounded_rate) | ||
234 | { | ||
235 | struct device *dev = dev_from_gk20a(g); | ||
236 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
237 | unsigned long *freqs; | ||
238 | int num_freqs; | ||
239 | int i, ret = 0; | ||
240 | |||
241 | switch (api_domain) { | ||
242 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
243 | ret = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
244 | |||
245 | for (i = 0; i < num_freqs; ++i) { | ||
246 | if (freqs[i] >= rate_target) { | ||
247 | *rounded_rate = freqs[i]; | ||
248 | return 0; | ||
249 | } | ||
250 | } | ||
251 | *rounded_rate = freqs[num_freqs - 1]; | ||
252 | break; | ||
253 | default: | ||
254 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
255 | ret = -EINVAL; | ||
256 | break; | ||
257 | } | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) | ||
263 | { | ||
264 | return clk_prepare_enable(clk->tegra_clk); | ||
265 | } | ||
266 | |||
267 | static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk) | ||
268 | { | ||
269 | clk_disable_unprepare(clk->tegra_clk); | ||
270 | } | ||
271 | |||
272 | void nvgpu_linux_init_clk_support(struct gk20a *g) | ||
273 | { | ||
274 | g->ops.clk.get_rate = nvgpu_linux_clk_get_rate; | ||
275 | g->ops.clk.set_rate = nvgpu_linux_clk_set_rate; | ||
276 | g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe; | ||
277 | g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate; | ||
278 | g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor; | ||
279 | g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; | ||
280 | g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; | ||
281 | g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; | ||
282 | g->ops.clk.clk_domain_get_f_points = nvgpu_linux_clk_get_f_points; | ||
283 | g->ops.clk.get_clk_range = nvgpu_clk_get_range; | ||
284 | g->ops.clk.clk_get_round_rate = nvgpu_clk_get_round_rate; | ||
285 | g->ops.clk.measure_freq = nvgpu_clk_measure_freq; | ||
286 | } | ||
diff --git a/include/os/linux/clk.h b/include/os/linux/clk.h new file mode 100644 index 0000000..614a7fd --- /dev/null +++ b/include/os/linux/clk.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_COMMON_LINUX_CLK_H | ||
18 | |||
19 | struct gk20a; | ||
20 | void nvgpu_linux_init_clk_support(struct gk20a *g); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/comptags.c b/include/os/linux/comptags.c new file mode 100644 index 0000000..ab37197 --- /dev/null +++ b/include/os/linux/comptags.c | |||
@@ -0,0 +1,140 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | |||
19 | #include <nvgpu/comptags.h> | ||
20 | #include <nvgpu/gk20a.h> | ||
21 | |||
22 | #include <nvgpu/linux/vm.h> | ||
23 | |||
24 | #include "dmabuf.h" | ||
25 | |||
26 | void gk20a_get_comptags(struct nvgpu_os_buffer *buf, | ||
27 | struct gk20a_comptags *comptags) | ||
28 | { | ||
29 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
30 | buf->dev); | ||
31 | |||
32 | if (!comptags) | ||
33 | return; | ||
34 | |||
35 | if (!priv) { | ||
36 | memset(comptags, 0, sizeof(*comptags)); | ||
37 | return; | ||
38 | } | ||
39 | |||
40 | nvgpu_mutex_acquire(&priv->lock); | ||
41 | *comptags = priv->comptags; | ||
42 | nvgpu_mutex_release(&priv->lock); | ||
43 | } | ||
44 | |||
45 | int gk20a_alloc_or_get_comptags(struct gk20a *g, | ||
46 | struct nvgpu_os_buffer *buf, | ||
47 | struct gk20a_comptag_allocator *allocator, | ||
48 | struct gk20a_comptags *comptags) | ||
49 | { | ||
50 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
51 | buf->dev); | ||
52 | u32 offset; | ||
53 | int err; | ||
54 | unsigned int ctag_granularity; | ||
55 | u32 lines; | ||
56 | |||
57 | if (!priv) | ||
58 | return -ENOSYS; | ||
59 | |||
60 | nvgpu_mutex_acquire(&priv->lock); | ||
61 | |||
62 | if (priv->comptags.allocated) { | ||
63 | /* | ||
64 | * already allocated | ||
65 | */ | ||
66 | *comptags = priv->comptags; | ||
67 | |||
68 | err = 0; | ||
69 | goto exit_locked; | ||
70 | } | ||
71 | |||
72 | ctag_granularity = g->ops.fb.compression_page_size(g); | ||
73 | lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); | ||
74 | |||
75 | /* 0-sized buffer? Shouldn't occur, but let's check anyways. */ | ||
76 | if (lines < 1) { | ||
77 | err = -EINVAL; | ||
78 | goto exit_locked; | ||
79 | } | ||
80 | |||
81 | /* store the allocator so we can use it when we free the ctags */ | ||
82 | priv->comptag_allocator = allocator; | ||
83 | err = gk20a_comptaglines_alloc(allocator, &offset, lines); | ||
84 | if (!err) { | ||
85 | priv->comptags.offset = offset; | ||
86 | priv->comptags.lines = lines; | ||
87 | priv->comptags.needs_clear = true; | ||
88 | } else { | ||
89 | priv->comptags.offset = 0; | ||
90 | priv->comptags.lines = 0; | ||
91 | priv->comptags.needs_clear = false; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * We don't report an error here if comptag alloc failed. The | ||
96 | * caller will simply fallback to incompressible kinds. It | ||
97 | * would not be safe to re-allocate comptags anyways on | ||
98 | * successive calls, as that would break map aliasing. | ||
99 | */ | ||
100 | err = 0; | ||
101 | priv->comptags.allocated = true; | ||
102 | |||
103 | *comptags = priv->comptags; | ||
104 | |||
105 | exit_locked: | ||
106 | nvgpu_mutex_release(&priv->lock); | ||
107 | |||
108 | return err; | ||
109 | } | ||
110 | |||
111 | bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) | ||
112 | { | ||
113 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
114 | buf->dev); | ||
115 | bool clear_started = false; | ||
116 | |||
117 | if (priv) { | ||
118 | nvgpu_mutex_acquire(&priv->lock); | ||
119 | |||
120 | clear_started = priv->comptags.needs_clear; | ||
121 | |||
122 | if (!clear_started) | ||
123 | nvgpu_mutex_release(&priv->lock); | ||
124 | } | ||
125 | |||
126 | return clear_started; | ||
127 | } | ||
128 | |||
129 | void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf, | ||
130 | bool clear_successful) | ||
131 | { | ||
132 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
133 | buf->dev); | ||
134 | if (priv) { | ||
135 | if (clear_successful) | ||
136 | priv->comptags.needs_clear = false; | ||
137 | |||
138 | nvgpu_mutex_release(&priv->lock); | ||
139 | } | ||
140 | } | ||
diff --git a/include/os/linux/cond.c b/include/os/linux/cond.c new file mode 100644 index 0000000..633c34f --- /dev/null +++ b/include/os/linux/cond.c | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/wait.h> | ||
18 | #include <linux/sched.h> | ||
19 | |||
20 | #include <nvgpu/cond.h> | ||
21 | |||
22 | int nvgpu_cond_init(struct nvgpu_cond *cond) | ||
23 | { | ||
24 | init_waitqueue_head(&cond->wq); | ||
25 | cond->initialized = true; | ||
26 | |||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | void nvgpu_cond_destroy(struct nvgpu_cond *cond) | ||
31 | { | ||
32 | cond->initialized = false; | ||
33 | } | ||
34 | |||
35 | int nvgpu_cond_signal(struct nvgpu_cond *cond) | ||
36 | { | ||
37 | if (!cond->initialized) | ||
38 | return -EINVAL; | ||
39 | |||
40 | wake_up(&cond->wq); | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond) | ||
46 | { | ||
47 | if (!cond->initialized) | ||
48 | return -EINVAL; | ||
49 | |||
50 | wake_up_interruptible(&cond->wq); | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | int nvgpu_cond_broadcast(struct nvgpu_cond *cond) | ||
56 | { | ||
57 | if (!cond->initialized) | ||
58 | return -EINVAL; | ||
59 | |||
60 | wake_up_all(&cond->wq); | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond) | ||
66 | { | ||
67 | if (!cond->initialized) | ||
68 | return -EINVAL; | ||
69 | |||
70 | wake_up_interruptible_all(&cond->wq); | ||
71 | |||
72 | return 0; | ||
73 | } | ||
diff --git a/include/os/linux/ctxsw_trace.c b/include/os/linux/ctxsw_trace.c new file mode 100644 index 0000000..2d36d9c --- /dev/null +++ b/include/os/linux/ctxsw_trace.c | |||
@@ -0,0 +1,792 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/wait.h> | ||
18 | #include <linux/ktime.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | #include <linux/poll.h> | ||
21 | #include <trace/events/gk20a.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | #include <nvgpu/ctxsw_trace.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/log.h> | ||
26 | #include <nvgpu/atomic.h> | ||
27 | #include <nvgpu/barrier.h> | ||
28 | #include <nvgpu/gk20a.h> | ||
29 | #include <nvgpu/channel.h> | ||
30 | |||
31 | #include "gk20a/gr_gk20a.h" | ||
32 | #include "gk20a/fecs_trace_gk20a.h" | ||
33 | |||
34 | #include "platform_gk20a.h" | ||
35 | #include "os_linux.h" | ||
36 | #include "ctxsw_trace.h" | ||
37 | |||
38 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
39 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
40 | |||
41 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
42 | |||
43 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
44 | struct gk20a_ctxsw_dev { | ||
45 | struct gk20a *g; | ||
46 | |||
47 | struct nvgpu_ctxsw_ring_header *hdr; | ||
48 | struct nvgpu_gpu_ctxsw_trace_entry *ents; | ||
49 | struct nvgpu_gpu_ctxsw_trace_filter filter; | ||
50 | bool write_enabled; | ||
51 | struct nvgpu_cond readout_wq; | ||
52 | size_t size; | ||
53 | u32 num_ents; | ||
54 | |||
55 | nvgpu_atomic_t vma_ref; | ||
56 | |||
57 | struct nvgpu_mutex write_lock; | ||
58 | }; | ||
59 | |||
60 | |||
61 | struct gk20a_ctxsw_trace { | ||
62 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
63 | }; | ||
64 | |||
65 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
66 | { | ||
67 | return (hdr->write_idx == hdr->read_idx); | ||
68 | } | ||
69 | |||
70 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
71 | { | ||
72 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
73 | } | ||
74 | |||
75 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
76 | { | ||
77 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
78 | } | ||
79 | |||
80 | static void nvgpu_set_ctxsw_trace_entry(struct nvgpu_ctxsw_trace_entry *entry_dst, | ||
81 | struct nvgpu_gpu_ctxsw_trace_entry *entry_src) | ||
82 | { | ||
83 | entry_dst->tag = entry_src->tag; | ||
84 | entry_dst->vmid = entry_src->vmid; | ||
85 | entry_dst->seqno = entry_src->seqno; | ||
86 | entry_dst->context_id = entry_src->context_id; | ||
87 | entry_dst->pid = entry_src->pid; | ||
88 | entry_dst->timestamp = entry_src->timestamp; | ||
89 | } | ||
90 | |||
91 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
92 | loff_t *off) | ||
93 | { | ||
94 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
95 | struct gk20a *g = dev->g; | ||
96 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
97 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
98 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
99 | struct nvgpu_ctxsw_trace_entry user_entry; | ||
100 | size_t copied = 0; | ||
101 | int err; | ||
102 | |||
103 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | ||
104 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
105 | |||
106 | nvgpu_mutex_acquire(&dev->write_lock); | ||
107 | while (ring_is_empty(hdr)) { | ||
108 | nvgpu_mutex_release(&dev->write_lock); | ||
109 | if (filp->f_flags & O_NONBLOCK) | ||
110 | return -EAGAIN; | ||
111 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
112 | !ring_is_empty(hdr), 0); | ||
113 | if (err) | ||
114 | return err; | ||
115 | nvgpu_mutex_acquire(&dev->write_lock); | ||
116 | } | ||
117 | |||
118 | while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) { | ||
119 | if (ring_is_empty(hdr)) | ||
120 | break; | ||
121 | |||
122 | nvgpu_set_ctxsw_trace_entry(&user_entry, &dev->ents[hdr->read_idx]); | ||
123 | if (copy_to_user(entry, &user_entry, | ||
124 | sizeof(*entry))) { | ||
125 | nvgpu_mutex_release(&dev->write_lock); | ||
126 | return -EFAULT; | ||
127 | } | ||
128 | |||
129 | hdr->read_idx++; | ||
130 | if (hdr->read_idx >= hdr->num_ents) | ||
131 | hdr->read_idx = 0; | ||
132 | |||
133 | entry++; | ||
134 | copied += sizeof(*entry); | ||
135 | size -= sizeof(*entry); | ||
136 | } | ||
137 | |||
138 | nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
139 | hdr->read_idx); | ||
140 | |||
141 | *off = hdr->read_idx; | ||
142 | nvgpu_mutex_release(&dev->write_lock); | ||
143 | |||
144 | return copied; | ||
145 | } | ||
146 | |||
147 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
148 | { | ||
149 | struct gk20a *g = dev->g; | ||
150 | |||
151 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
152 | nvgpu_mutex_acquire(&dev->write_lock); | ||
153 | dev->write_enabled = true; | ||
154 | nvgpu_mutex_release(&dev->write_lock); | ||
155 | dev->g->ops.fecs_trace.enable(dev->g); | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
160 | { | ||
161 | struct gk20a *g = dev->g; | ||
162 | |||
163 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
164 | dev->g->ops.fecs_trace.disable(dev->g); | ||
165 | nvgpu_mutex_acquire(&dev->write_lock); | ||
166 | dev->write_enabled = false; | ||
167 | nvgpu_mutex_release(&dev->write_lock); | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, | ||
172 | size_t size) | ||
173 | { | ||
174 | struct gk20a *g = dev->g; | ||
175 | void *buf; | ||
176 | int err; | ||
177 | |||
178 | if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) | ||
179 | return -EBUSY; | ||
180 | |||
181 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); | ||
182 | if (err) | ||
183 | return err; | ||
184 | |||
185 | |||
186 | dev->hdr = buf; | ||
187 | dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
188 | dev->size = size; | ||
189 | dev->num_ents = dev->hdr->num_ents; | ||
190 | |||
191 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
192 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
197 | void **buf, size_t *size) | ||
198 | { | ||
199 | struct nvgpu_ctxsw_ring_header *hdr; | ||
200 | |||
201 | *size = roundup(*size, PAGE_SIZE); | ||
202 | hdr = vmalloc_user(*size); | ||
203 | if (!hdr) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
207 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
208 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
209 | / sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
210 | hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
211 | hdr->drop_count = 0; | ||
212 | hdr->read_idx = 0; | ||
213 | hdr->write_idx = 0; | ||
214 | hdr->write_seqno = 0; | ||
215 | |||
216 | *buf = hdr; | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
221 | { | ||
222 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
223 | |||
224 | nvgpu_vfree(g, dev->hdr); | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
229 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
230 | { | ||
231 | struct gk20a *g = dev->g; | ||
232 | size_t size = args->size; | ||
233 | int ret; | ||
234 | |||
235 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
236 | |||
237 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
238 | return -EINVAL; | ||
239 | |||
240 | nvgpu_mutex_acquire(&dev->write_lock); | ||
241 | ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
242 | nvgpu_mutex_release(&dev->write_lock); | ||
243 | |||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | static void nvgpu_set_ctxsw_trace_filter_args(struct nvgpu_gpu_ctxsw_trace_filter *filter_dst, | ||
248 | struct nvgpu_ctxsw_trace_filter *filter_src) | ||
249 | { | ||
250 | memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64); | ||
251 | } | ||
252 | |||
253 | static void nvgpu_get_ctxsw_trace_filter_args(struct nvgpu_ctxsw_trace_filter *filter_dst, | ||
254 | struct nvgpu_gpu_ctxsw_trace_filter *filter_src) | ||
255 | { | ||
256 | memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64); | ||
257 | } | ||
258 | |||
259 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
260 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
261 | { | ||
262 | struct gk20a *g = dev->g; | ||
263 | |||
264 | nvgpu_mutex_acquire(&dev->write_lock); | ||
265 | nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter); | ||
266 | nvgpu_mutex_release(&dev->write_lock); | ||
267 | |||
268 | if (g->ops.fecs_trace.set_filter) | ||
269 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
274 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
275 | { | ||
276 | nvgpu_mutex_acquire(&dev->write_lock); | ||
277 | nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter); | ||
278 | nvgpu_mutex_release(&dev->write_lock); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
284 | { | ||
285 | struct gk20a *g = dev->g; | ||
286 | int err; | ||
287 | |||
288 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
289 | |||
290 | err = gk20a_busy(g); | ||
291 | if (err) | ||
292 | return err; | ||
293 | |||
294 | if (g->ops.fecs_trace.flush) | ||
295 | err = g->ops.fecs_trace.flush(g); | ||
296 | |||
297 | if (likely(!err)) | ||
298 | err = g->ops.fecs_trace.poll(g); | ||
299 | |||
300 | gk20a_idle(g); | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
305 | { | ||
306 | struct nvgpu_os_linux *l; | ||
307 | struct gk20a *g; | ||
308 | struct gk20a_ctxsw_trace *trace; | ||
309 | struct gk20a_ctxsw_dev *dev; | ||
310 | int err; | ||
311 | size_t size; | ||
312 | u32 n; | ||
313 | |||
314 | /* only one VM for now */ | ||
315 | const int vmid = 0; | ||
316 | |||
317 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); | ||
318 | g = gk20a_get(&l->g); | ||
319 | if (!g) | ||
320 | return -ENODEV; | ||
321 | |||
322 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
323 | |||
324 | err = gk20a_busy(g); | ||
325 | if (err) | ||
326 | goto free_ref; | ||
327 | |||
328 | trace = g->ctxsw_trace; | ||
329 | if (!trace) { | ||
330 | err = -ENODEV; | ||
331 | goto idle; | ||
332 | } | ||
333 | |||
334 | /* Allow only one user for this device */ | ||
335 | dev = &trace->devs[vmid]; | ||
336 | nvgpu_mutex_acquire(&dev->write_lock); | ||
337 | if (dev->hdr) { | ||
338 | err = -EBUSY; | ||
339 | goto done; | ||
340 | } | ||
341 | |||
342 | /* By default, allocate ring buffer big enough to accommodate | ||
343 | * FECS records with default event filter */ | ||
344 | |||
345 | /* enable all traces by default */ | ||
346 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
347 | |||
348 | /* compute max number of entries generated with this filter */ | ||
349 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
350 | |||
351 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
352 | n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
353 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
354 | size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry)); | ||
355 | |||
356 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
357 | if (!err) { | ||
358 | filp->private_data = dev; | ||
359 | nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
360 | filp, dev, size); | ||
361 | } | ||
362 | |||
363 | done: | ||
364 | nvgpu_mutex_release(&dev->write_lock); | ||
365 | |||
366 | idle: | ||
367 | gk20a_idle(g); | ||
368 | free_ref: | ||
369 | if (err) | ||
370 | gk20a_put(g); | ||
371 | return err; | ||
372 | } | ||
373 | |||
374 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
375 | { | ||
376 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
377 | struct gk20a *g = dev->g; | ||
378 | |||
379 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
380 | |||
381 | g->ops.fecs_trace.disable(g); | ||
382 | |||
383 | nvgpu_mutex_acquire(&dev->write_lock); | ||
384 | dev->write_enabled = false; | ||
385 | nvgpu_mutex_release(&dev->write_lock); | ||
386 | |||
387 | if (dev->hdr) { | ||
388 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); | ||
389 | dev->hdr = NULL; | ||
390 | } | ||
391 | gk20a_put(g); | ||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
396 | unsigned long arg) | ||
397 | { | ||
398 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
399 | struct gk20a *g = dev->g; | ||
400 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
401 | int err = 0; | ||
402 | |||
403 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
404 | |||
405 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || | ||
406 | (_IOC_NR(cmd) == 0) || | ||
407 | (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || | ||
408 | (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) | ||
409 | return -EINVAL; | ||
410 | |||
411 | memset(buf, 0, sizeof(buf)); | ||
412 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
413 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
414 | return -EFAULT; | ||
415 | } | ||
416 | |||
417 | switch (cmd) { | ||
418 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
419 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
420 | break; | ||
421 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
422 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
423 | break; | ||
424 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
425 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
426 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
427 | break; | ||
428 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
429 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
430 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
431 | break; | ||
432 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
433 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
434 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
435 | break; | ||
436 | case NVGPU_CTXSW_IOCTL_POLL: | ||
437 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
438 | break; | ||
439 | default: | ||
440 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
441 | cmd); | ||
442 | err = -ENOTTY; | ||
443 | } | ||
444 | |||
445 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
446 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
447 | |||
448 | return err; | ||
449 | } | ||
450 | |||
451 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
452 | { | ||
453 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
454 | struct gk20a *g = dev->g; | ||
455 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
456 | unsigned int mask = 0; | ||
457 | |||
458 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
459 | |||
460 | nvgpu_mutex_acquire(&dev->write_lock); | ||
461 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
462 | if (!ring_is_empty(hdr)) | ||
463 | mask |= POLLIN | POLLRDNORM; | ||
464 | nvgpu_mutex_release(&dev->write_lock); | ||
465 | |||
466 | return mask; | ||
467 | } | ||
468 | |||
469 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
470 | { | ||
471 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
472 | struct gk20a *g = dev->g; | ||
473 | |||
474 | nvgpu_atomic_inc(&dev->vma_ref); | ||
475 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
476 | nvgpu_atomic_read(&dev->vma_ref)); | ||
477 | } | ||
478 | |||
479 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
480 | { | ||
481 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
482 | struct gk20a *g = dev->g; | ||
483 | |||
484 | nvgpu_atomic_dec(&dev->vma_ref); | ||
485 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
486 | nvgpu_atomic_read(&dev->vma_ref)); | ||
487 | } | ||
488 | |||
489 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
490 | .open = gk20a_ctxsw_dev_vma_open, | ||
491 | .close = gk20a_ctxsw_dev_vma_close, | ||
492 | }; | ||
493 | |||
494 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
495 | struct vm_area_struct *vma) | ||
496 | { | ||
497 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
498 | } | ||
499 | |||
500 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
501 | { | ||
502 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
503 | struct gk20a *g = dev->g; | ||
504 | int ret; | ||
505 | |||
506 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
507 | vma->vm_start, vma->vm_end); | ||
508 | |||
509 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); | ||
510 | if (likely(!ret)) { | ||
511 | vma->vm_private_data = dev; | ||
512 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
513 | vma->vm_ops->open(vma); | ||
514 | } | ||
515 | |||
516 | return ret; | ||
517 | } | ||
518 | |||
519 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
520 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
521 | { | ||
522 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
523 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
524 | int err; | ||
525 | int i; | ||
526 | |||
527 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
528 | dev->g = g; | ||
529 | dev->hdr = NULL; | ||
530 | dev->write_enabled = false; | ||
531 | nvgpu_cond_init(&dev->readout_wq); | ||
532 | err = nvgpu_mutex_init(&dev->write_lock); | ||
533 | if (err) | ||
534 | return err; | ||
535 | nvgpu_atomic_set(&dev->vma_ref, 0); | ||
536 | dev++; | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | #endif | ||
541 | |||
542 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
543 | { | ||
544 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
545 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
546 | int err; | ||
547 | |||
548 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
549 | |||
550 | /* if tracing is not supported, skip this */ | ||
551 | if (!g->ops.fecs_trace.init) | ||
552 | return 0; | ||
553 | |||
554 | if (likely(trace)) { | ||
555 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | trace = nvgpu_kzalloc(g, sizeof(*trace)); | ||
560 | if (unlikely(!trace)) | ||
561 | return -ENOMEM; | ||
562 | g->ctxsw_trace = trace; | ||
563 | |||
564 | err = gk20a_ctxsw_init_devs(g); | ||
565 | if (err) | ||
566 | goto fail; | ||
567 | |||
568 | err = g->ops.fecs_trace.init(g); | ||
569 | if (unlikely(err)) | ||
570 | goto fail; | ||
571 | |||
572 | return 0; | ||
573 | |||
574 | fail: | ||
575 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
576 | nvgpu_kfree(g, trace); | ||
577 | g->ctxsw_trace = NULL; | ||
578 | return err; | ||
579 | #else | ||
580 | return 0; | ||
581 | #endif | ||
582 | } | ||
583 | |||
584 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
585 | { | ||
586 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
587 | struct gk20a_ctxsw_trace *trace; | ||
588 | struct gk20a_ctxsw_dev *dev; | ||
589 | int i; | ||
590 | |||
591 | if (!g->ctxsw_trace) | ||
592 | return; | ||
593 | |||
594 | trace = g->ctxsw_trace; | ||
595 | dev = trace->devs; | ||
596 | |||
597 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
598 | nvgpu_mutex_destroy(&dev->write_lock); | ||
599 | dev++; | ||
600 | } | ||
601 | |||
602 | nvgpu_kfree(g, g->ctxsw_trace); | ||
603 | g->ctxsw_trace = NULL; | ||
604 | |||
605 | g->ops.fecs_trace.deinit(g); | ||
606 | #endif | ||
607 | } | ||
608 | |||
609 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
610 | struct nvgpu_gpu_ctxsw_trace_entry *entry) | ||
611 | { | ||
612 | struct nvgpu_ctxsw_ring_header *hdr; | ||
613 | struct gk20a_ctxsw_dev *dev; | ||
614 | int ret = 0; | ||
615 | const char *reason; | ||
616 | u32 write_idx; | ||
617 | |||
618 | if (!g->ctxsw_trace) | ||
619 | return 0; | ||
620 | |||
621 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
622 | return -ENODEV; | ||
623 | |||
624 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
625 | hdr = dev->hdr; | ||
626 | |||
627 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
628 | "dev=%p hdr=%p", dev, hdr); | ||
629 | |||
630 | nvgpu_mutex_acquire(&dev->write_lock); | ||
631 | |||
632 | if (unlikely(!hdr)) { | ||
633 | /* device has been released */ | ||
634 | ret = -ENODEV; | ||
635 | goto done; | ||
636 | } | ||
637 | |||
638 | write_idx = hdr->write_idx; | ||
639 | if (write_idx >= dev->num_ents) { | ||
640 | nvgpu_err(dev->g, | ||
641 | "write_idx=%u out of range [0..%u]", | ||
642 | write_idx, dev->num_ents); | ||
643 | ret = -ENOSPC; | ||
644 | reason = "write_idx out of range"; | ||
645 | goto disable; | ||
646 | } | ||
647 | |||
648 | entry->seqno = hdr->write_seqno++; | ||
649 | |||
650 | if (!dev->write_enabled) { | ||
651 | ret = -EBUSY; | ||
652 | reason = "write disabled"; | ||
653 | goto drop; | ||
654 | } | ||
655 | |||
656 | if (unlikely(ring_is_full(hdr))) { | ||
657 | ret = -ENOSPC; | ||
658 | reason = "user fifo full"; | ||
659 | goto drop; | ||
660 | } | ||
661 | |||
662 | if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
663 | reason = "filtered out"; | ||
664 | goto filter; | ||
665 | } | ||
666 | |||
667 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
668 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
669 | entry->seqno, entry->context_id, entry->pid, | ||
670 | entry->tag, entry->timestamp); | ||
671 | |||
672 | dev->ents[write_idx] = *entry; | ||
673 | |||
674 | /* ensure record is written before updating write index */ | ||
675 | nvgpu_smp_wmb(); | ||
676 | |||
677 | write_idx++; | ||
678 | if (unlikely(write_idx >= hdr->num_ents)) | ||
679 | write_idx = 0; | ||
680 | hdr->write_idx = write_idx; | ||
681 | nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
682 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
683 | |||
684 | nvgpu_mutex_release(&dev->write_lock); | ||
685 | return ret; | ||
686 | |||
687 | disable: | ||
688 | g->ops.fecs_trace.disable(g); | ||
689 | |||
690 | drop: | ||
691 | hdr->drop_count++; | ||
692 | |||
693 | filter: | ||
694 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
695 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
696 | "tag=%x time=%llx (%s)", | ||
697 | entry->seqno, entry->context_id, entry->pid, | ||
698 | entry->tag, entry->timestamp, reason); | ||
699 | |||
700 | done: | ||
701 | nvgpu_mutex_release(&dev->write_lock); | ||
702 | return ret; | ||
703 | } | ||
704 | |||
705 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
706 | { | ||
707 | struct gk20a_ctxsw_dev *dev; | ||
708 | |||
709 | if (!g->ctxsw_trace) | ||
710 | return; | ||
711 | |||
712 | dev = &g->ctxsw_trace->devs[vmid]; | ||
713 | nvgpu_cond_signal_interruptible(&dev->readout_wq); | ||
714 | } | ||
715 | |||
716 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | ||
717 | { | ||
718 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
719 | struct nvgpu_gpu_ctxsw_trace_entry entry = { | ||
720 | .vmid = 0, | ||
721 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
722 | .context_id = 0, | ||
723 | .pid = ch->tgid, | ||
724 | }; | ||
725 | |||
726 | if (!g->ctxsw_trace) | ||
727 | return; | ||
728 | |||
729 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
730 | gk20a_ctxsw_trace_write(g, &entry); | ||
731 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
732 | #endif | ||
733 | trace_gk20a_channel_reset(ch->chid, ch->tsgid); | ||
734 | } | ||
735 | |||
736 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | ||
737 | { | ||
738 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
739 | struct nvgpu_gpu_ctxsw_trace_entry entry = { | ||
740 | .vmid = 0, | ||
741 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
742 | .context_id = 0, | ||
743 | .pid = tsg->tgid, | ||
744 | }; | ||
745 | |||
746 | if (!g->ctxsw_trace) | ||
747 | return; | ||
748 | |||
749 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
750 | gk20a_ctxsw_trace_write(g, &entry); | ||
751 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
752 | #endif | ||
753 | trace_gk20a_channel_reset(~0, tsg->tsgid); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_* | ||
758 | * into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_* | ||
759 | */ | ||
760 | |||
761 | u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags) | ||
762 | { | ||
763 | switch (tags){ | ||
764 | case NVGPU_CTXSW_TAG_SOF: | ||
765 | return NVGPU_GPU_CTXSW_TAG_SOF; | ||
766 | case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: | ||
767 | return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST; | ||
768 | case NVGPU_CTXSW_TAG_FE_ACK: | ||
769 | return NVGPU_GPU_CTXSW_TAG_FE_ACK; | ||
770 | case NVGPU_CTXSW_TAG_FE_ACK_WFI: | ||
771 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI; | ||
772 | case NVGPU_CTXSW_TAG_FE_ACK_GFXP: | ||
773 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP; | ||
774 | case NVGPU_CTXSW_TAG_FE_ACK_CTAP: | ||
775 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP; | ||
776 | case NVGPU_CTXSW_TAG_FE_ACK_CILP: | ||
777 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP; | ||
778 | case NVGPU_CTXSW_TAG_SAVE_END: | ||
779 | return NVGPU_GPU_CTXSW_TAG_SAVE_END; | ||
780 | case NVGPU_CTXSW_TAG_RESTORE_START: | ||
781 | return NVGPU_GPU_CTXSW_TAG_RESTORE_START; | ||
782 | case NVGPU_CTXSW_TAG_CONTEXT_START: | ||
783 | return NVGPU_GPU_CTXSW_TAG_CONTEXT_START; | ||
784 | case NVGPU_CTXSW_TAG_ENGINE_RESET: | ||
785 | return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET; | ||
786 | case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP: | ||
787 | return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP; | ||
788 | } | ||
789 | |||
790 | WARN_ON(1); | ||
791 | return tags; | ||
792 | } | ||
diff --git a/include/os/linux/ctxsw_trace.h b/include/os/linux/ctxsw_trace.h new file mode 100644 index 0000000..88ca7f2 --- /dev/null +++ b/include/os/linux/ctxsw_trace.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __CTXSW_TRACE_H__ | ||
18 | #define __CTXSW_TRACE_H__ | ||
19 | |||
20 | #include <nvgpu/types.h> | ||
21 | |||
22 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | ||
23 | |||
24 | struct file; | ||
25 | struct inode; | ||
26 | struct poll_table_struct; | ||
27 | |||
28 | struct gk20a; | ||
29 | |||
30 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); | ||
31 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); | ||
32 | long gk20a_ctxsw_dev_ioctl(struct file *filp, | ||
33 | unsigned int cmd, unsigned long arg); | ||
34 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, | ||
35 | size_t size, loff_t *offs); | ||
36 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, | ||
37 | struct poll_table_struct *pts); | ||
38 | |||
39 | #endif /* __CTXSW_TRACE_H__ */ | ||
diff --git a/include/os/linux/debug.c b/include/os/linux/debug.c new file mode 100644 index 0000000..5f0703c --- /dev/null +++ b/include/os/linux/debug.c | |||
@@ -0,0 +1,457 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_cde.h" | ||
16 | #include "debug_ce.h" | ||
17 | #include "debug_fifo.h" | ||
18 | #include "debug_gr.h" | ||
19 | #include "debug_allocator.h" | ||
20 | #include "debug_kmem.h" | ||
21 | #include "debug_pmu.h" | ||
22 | #include "debug_sched.h" | ||
23 | #include "debug_hal.h" | ||
24 | #include "debug_xve.h" | ||
25 | #include "debug_ltc.h" | ||
26 | #include "debug_bios.h" | ||
27 | #include "os_linux.h" | ||
28 | #include "platform_gk20a.h" | ||
29 | |||
30 | #include <nvgpu/gk20a.h> | ||
31 | |||
32 | #include <linux/debugfs.h> | ||
33 | #include <linux/seq_file.h> | ||
34 | #include <linux/uaccess.h> | ||
35 | |||
36 | #include <nvgpu/debug.h> | ||
37 | |||
38 | unsigned int gk20a_debug_trace_cmdbuf; | ||
39 | |||
40 | static inline void gk20a_debug_write_printk(void *ctx, const char *str, | ||
41 | size_t len) | ||
42 | { | ||
43 | pr_info("%s", str); | ||
44 | } | ||
45 | |||
46 | static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, | ||
47 | size_t len) | ||
48 | { | ||
49 | seq_write((struct seq_file *)ctx, str, len); | ||
50 | } | ||
51 | |||
52 | void gk20a_debug_output(struct gk20a_debug_output *o, | ||
53 | const char *fmt, ...) | ||
54 | { | ||
55 | va_list args; | ||
56 | int len; | ||
57 | |||
58 | va_start(args, fmt); | ||
59 | len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); | ||
60 | va_end(args); | ||
61 | o->fn(o->ctx, o->buf, len); | ||
62 | } | ||
63 | |||
64 | static int gk20a_gr_dump_regs(struct gk20a *g, | ||
65 | struct gk20a_debug_output *o) | ||
66 | { | ||
67 | if (g->ops.gr.dump_gr_regs) | ||
68 | gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | int gk20a_gr_debug_dump(struct gk20a *g) | ||
74 | { | ||
75 | struct gk20a_debug_output o = { | ||
76 | .fn = gk20a_debug_write_printk | ||
77 | }; | ||
78 | |||
79 | gk20a_gr_dump_regs(g, &o); | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static int gk20a_gr_debug_show(struct seq_file *s, void *unused) | ||
85 | { | ||
86 | struct device *dev = s->private; | ||
87 | struct gk20a *g = gk20a_get_platform(dev)->g; | ||
88 | struct gk20a_debug_output o = { | ||
89 | .fn = gk20a_debug_write_to_seqfile, | ||
90 | .ctx = s, | ||
91 | }; | ||
92 | int err; | ||
93 | |||
94 | err = gk20a_busy(g); | ||
95 | if (err) { | ||
96 | nvgpu_err(g, "failed to power on gpu: %d", err); | ||
97 | return -EINVAL; | ||
98 | } | ||
99 | |||
100 | gk20a_gr_dump_regs(g, &o); | ||
101 | |||
102 | gk20a_idle(g); | ||
103 | |||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | void gk20a_debug_dump(struct gk20a *g) | ||
108 | { | ||
109 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
110 | struct gk20a_debug_output o = { | ||
111 | .fn = gk20a_debug_write_printk | ||
112 | }; | ||
113 | |||
114 | if (platform->dump_platform_dependencies) | ||
115 | platform->dump_platform_dependencies(dev_from_gk20a(g)); | ||
116 | |||
117 | /* HAL only initialized after 1st power-on */ | ||
118 | if (g->ops.debug.show_dump) | ||
119 | g->ops.debug.show_dump(g, &o); | ||
120 | } | ||
121 | |||
122 | static int gk20a_debug_show(struct seq_file *s, void *unused) | ||
123 | { | ||
124 | struct device *dev = s->private; | ||
125 | struct gk20a_debug_output o = { | ||
126 | .fn = gk20a_debug_write_to_seqfile, | ||
127 | .ctx = s, | ||
128 | }; | ||
129 | struct gk20a *g; | ||
130 | int err; | ||
131 | |||
132 | g = gk20a_get_platform(dev)->g; | ||
133 | |||
134 | err = gk20a_busy(g); | ||
135 | if (err) { | ||
136 | nvgpu_err(g, "failed to power on gpu: %d", err); | ||
137 | return -EFAULT; | ||
138 | } | ||
139 | |||
140 | /* HAL only initialized after 1st power-on */ | ||
141 | if (g->ops.debug.show_dump) | ||
142 | g->ops.debug.show_dump(g, &o); | ||
143 | |||
144 | gk20a_idle(g); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static int gk20a_gr_debug_open(struct inode *inode, struct file *file) | ||
149 | { | ||
150 | return single_open(file, gk20a_gr_debug_show, inode->i_private); | ||
151 | } | ||
152 | |||
153 | static int gk20a_debug_open(struct inode *inode, struct file *file) | ||
154 | { | ||
155 | return single_open(file, gk20a_debug_show, inode->i_private); | ||
156 | } | ||
157 | |||
158 | static const struct file_operations gk20a_gr_debug_fops = { | ||
159 | .open = gk20a_gr_debug_open, | ||
160 | .read = seq_read, | ||
161 | .llseek = seq_lseek, | ||
162 | .release = single_release, | ||
163 | }; | ||
164 | |||
165 | static const struct file_operations gk20a_debug_fops = { | ||
166 | .open = gk20a_debug_open, | ||
167 | .read = seq_read, | ||
168 | .llseek = seq_lseek, | ||
169 | .release = single_release, | ||
170 | }; | ||
171 | |||
172 | void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) | ||
173 | { | ||
174 | g->ops.fifo.dump_pbdma_status(g, o); | ||
175 | g->ops.fifo.dump_eng_status(g, o); | ||
176 | |||
177 | gk20a_debug_dump_all_channel_status_ramfc(g, o); | ||
178 | } | ||
179 | |||
180 | static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) | ||
181 | { | ||
182 | char buf[3]; | ||
183 | struct gk20a *g = file->private_data; | ||
184 | |||
185 | if (g->mm.disable_bigpage) | ||
186 | buf[0] = 'Y'; | ||
187 | else | ||
188 | buf[0] = 'N'; | ||
189 | buf[1] = '\n'; | ||
190 | buf[2] = 0x00; | ||
191 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
192 | } | ||
193 | |||
194 | static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) | ||
195 | { | ||
196 | char buf[32]; | ||
197 | int buf_size; | ||
198 | bool bv; | ||
199 | struct gk20a *g = file->private_data; | ||
200 | |||
201 | buf_size = min(count, (sizeof(buf)-1)); | ||
202 | if (copy_from_user(buf, user_buf, buf_size)) | ||
203 | return -EFAULT; | ||
204 | |||
205 | if (strtobool(buf, &bv) == 0) { | ||
206 | g->mm.disable_bigpage = bv; | ||
207 | gk20a_init_gpu_characteristics(g); | ||
208 | } | ||
209 | |||
210 | return count; | ||
211 | } | ||
212 | |||
213 | static struct file_operations disable_bigpage_fops = { | ||
214 | .open = simple_open, | ||
215 | .read = disable_bigpage_read, | ||
216 | .write = disable_bigpage_write, | ||
217 | }; | ||
218 | |||
219 | static int railgate_residency_show(struct seq_file *s, void *data) | ||
220 | { | ||
221 | struct gk20a *g = s->private; | ||
222 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
223 | unsigned long time_since_last_state_transition_ms; | ||
224 | unsigned long total_rail_gate_time_ms; | ||
225 | unsigned long total_rail_ungate_time_ms; | ||
226 | |||
227 | if (platform->is_railgated(dev_from_gk20a(g))) { | ||
228 | time_since_last_state_transition_ms = | ||
229 | jiffies_to_msecs(jiffies - | ||
230 | g->pstats.last_rail_gate_complete); | ||
231 | total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; | ||
232 | total_rail_gate_time_ms = | ||
233 | g->pstats.total_rail_gate_time_ms + | ||
234 | time_since_last_state_transition_ms; | ||
235 | } else { | ||
236 | time_since_last_state_transition_ms = | ||
237 | jiffies_to_msecs(jiffies - | ||
238 | g->pstats.last_rail_ungate_complete); | ||
239 | total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; | ||
240 | total_rail_ungate_time_ms = | ||
241 | g->pstats.total_rail_ungate_time_ms + | ||
242 | time_since_last_state_transition_ms; | ||
243 | } | ||
244 | |||
245 | seq_printf(s, "Time with Rails Gated: %lu ms\n" | ||
246 | "Time with Rails UnGated: %lu ms\n" | ||
247 | "Total railgating cycles: %lu\n", | ||
248 | total_rail_gate_time_ms, | ||
249 | total_rail_ungate_time_ms, | ||
250 | g->pstats.railgating_cycle_count - 1); | ||
251 | return 0; | ||
252 | |||
253 | } | ||
254 | |||
255 | static int railgate_residency_open(struct inode *inode, struct file *file) | ||
256 | { | ||
257 | return single_open(file, railgate_residency_show, inode->i_private); | ||
258 | } | ||
259 | |||
260 | static const struct file_operations railgate_residency_fops = { | ||
261 | .open = railgate_residency_open, | ||
262 | .read = seq_read, | ||
263 | .llseek = seq_lseek, | ||
264 | .release = single_release, | ||
265 | }; | ||
266 | |||
267 | static int gk20a_railgating_debugfs_init(struct gk20a *g) | ||
268 | { | ||
269 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
270 | struct dentry *d; | ||
271 | |||
272 | d = debugfs_create_file( | ||
273 | "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
274 | &railgate_residency_fops); | ||
275 | if (!d) | ||
276 | return -ENOMEM; | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | static ssize_t timeouts_enabled_read(struct file *file, | ||
281 | char __user *user_buf, size_t count, loff_t *ppos) | ||
282 | { | ||
283 | char buf[3]; | ||
284 | struct gk20a *g = file->private_data; | ||
285 | |||
286 | if (nvgpu_is_timeouts_enabled(g)) | ||
287 | buf[0] = 'Y'; | ||
288 | else | ||
289 | buf[0] = 'N'; | ||
290 | buf[1] = '\n'; | ||
291 | buf[2] = 0x00; | ||
292 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
293 | } | ||
294 | |||
295 | static ssize_t timeouts_enabled_write(struct file *file, | ||
296 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
297 | { | ||
298 | char buf[3]; | ||
299 | int buf_size; | ||
300 | bool timeouts_enabled; | ||
301 | struct gk20a *g = file->private_data; | ||
302 | |||
303 | buf_size = min(count, (sizeof(buf)-1)); | ||
304 | if (copy_from_user(buf, user_buf, buf_size)) | ||
305 | return -EFAULT; | ||
306 | |||
307 | if (strtobool(buf, &timeouts_enabled) == 0) { | ||
308 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
309 | if (timeouts_enabled == false) { | ||
310 | /* requesting to disable timeouts */ | ||
311 | if (g->timeouts_disabled_by_user == false) { | ||
312 | nvgpu_atomic_inc(&g->timeouts_disabled_refcount); | ||
313 | g->timeouts_disabled_by_user = true; | ||
314 | } | ||
315 | } else { | ||
316 | /* requesting to enable timeouts */ | ||
317 | if (g->timeouts_disabled_by_user == true) { | ||
318 | nvgpu_atomic_dec(&g->timeouts_disabled_refcount); | ||
319 | g->timeouts_disabled_by_user = false; | ||
320 | } | ||
321 | } | ||
322 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
323 | } | ||
324 | |||
325 | return count; | ||
326 | } | ||
327 | |||
328 | static const struct file_operations timeouts_enabled_fops = { | ||
329 | .open = simple_open, | ||
330 | .read = timeouts_enabled_read, | ||
331 | .write = timeouts_enabled_write, | ||
332 | }; | ||
333 | |||
334 | void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) | ||
335 | { | ||
336 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
337 | struct device *dev = dev_from_gk20a(g); | ||
338 | |||
339 | l->debugfs = debugfs_create_dir(dev_name(dev), NULL); | ||
340 | if (!l->debugfs) | ||
341 | return; | ||
342 | |||
343 | if (debugfs_symlink) | ||
344 | l->debugfs_alias = | ||
345 | debugfs_create_symlink(debugfs_symlink, | ||
346 | NULL, dev_name(dev)); | ||
347 | |||
348 | debugfs_create_file("status", S_IRUGO, l->debugfs, | ||
349 | dev, &gk20a_debug_fops); | ||
350 | debugfs_create_file("gr_status", S_IRUGO, l->debugfs, | ||
351 | dev, &gk20a_gr_debug_fops); | ||
352 | debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, | ||
353 | l->debugfs, &gk20a_debug_trace_cmdbuf); | ||
354 | |||
355 | debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, | ||
356 | l->debugfs, &g->ch_wdt_timeout_ms); | ||
357 | |||
358 | debugfs_create_u32("disable_syncpoints", S_IRUGO, | ||
359 | l->debugfs, &g->disable_syncpoints); | ||
360 | |||
361 | /* New debug logging API. */ | ||
362 | debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR, | ||
363 | l->debugfs, &g->log_mask); | ||
364 | debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR, | ||
365 | l->debugfs, &g->log_trace); | ||
366 | |||
367 | l->debugfs_ltc_enabled = | ||
368 | debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, | ||
369 | l->debugfs, | ||
370 | &g->mm.ltc_enabled_target); | ||
371 | |||
372 | l->debugfs_gr_idle_timeout_default = | ||
373 | debugfs_create_u32("gr_idle_timeout_default_us", | ||
374 | S_IRUGO|S_IWUSR, l->debugfs, | ||
375 | &g->gr_idle_timeout_default); | ||
376 | l->debugfs_timeouts_enabled = | ||
377 | debugfs_create_file("timeouts_enabled", | ||
378 | S_IRUGO|S_IWUSR, | ||
379 | l->debugfs, | ||
380 | g, | ||
381 | &timeouts_enabled_fops); | ||
382 | |||
383 | l->debugfs_disable_bigpage = | ||
384 | debugfs_create_file("disable_bigpage", | ||
385 | S_IRUGO|S_IWUSR, | ||
386 | l->debugfs, | ||
387 | g, | ||
388 | &disable_bigpage_fops); | ||
389 | |||
390 | l->debugfs_timeslice_low_priority_us = | ||
391 | debugfs_create_u32("timeslice_low_priority_us", | ||
392 | S_IRUGO|S_IWUSR, | ||
393 | l->debugfs, | ||
394 | &g->timeslice_low_priority_us); | ||
395 | l->debugfs_timeslice_medium_priority_us = | ||
396 | debugfs_create_u32("timeslice_medium_priority_us", | ||
397 | S_IRUGO|S_IWUSR, | ||
398 | l->debugfs, | ||
399 | &g->timeslice_medium_priority_us); | ||
400 | l->debugfs_timeslice_high_priority_us = | ||
401 | debugfs_create_u32("timeslice_high_priority_us", | ||
402 | S_IRUGO|S_IWUSR, | ||
403 | l->debugfs, | ||
404 | &g->timeslice_high_priority_us); | ||
405 | l->debugfs_runlist_interleave = | ||
406 | debugfs_create_bool("runlist_interleave", | ||
407 | S_IRUGO|S_IWUSR, | ||
408 | l->debugfs, | ||
409 | &g->runlist_interleave); | ||
410 | l->debugfs_force_preemption_gfxp = | ||
411 | debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, | ||
412 | l->debugfs, | ||
413 | &g->gr.ctx_vars.force_preemption_gfxp); | ||
414 | |||
415 | l->debugfs_force_preemption_cilp = | ||
416 | debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, | ||
417 | l->debugfs, | ||
418 | &g->gr.ctx_vars.force_preemption_cilp); | ||
419 | |||
420 | l->debugfs_dump_ctxsw_stats = | ||
421 | debugfs_create_bool("dump_ctxsw_stats_on_channel_close", | ||
422 | S_IRUGO|S_IWUSR, l->debugfs, | ||
423 | &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close); | ||
424 | |||
425 | gr_gk20a_debugfs_init(g); | ||
426 | gk20a_pmu_debugfs_init(g); | ||
427 | gk20a_railgating_debugfs_init(g); | ||
428 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
429 | gk20a_cde_debugfs_init(g); | ||
430 | #endif | ||
431 | gk20a_ce_debugfs_init(g); | ||
432 | nvgpu_alloc_debugfs_init(g); | ||
433 | nvgpu_hal_debugfs_init(g); | ||
434 | gk20a_fifo_debugfs_init(g); | ||
435 | gk20a_sched_debugfs_init(g); | ||
436 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
437 | nvgpu_kmem_debugfs_init(g); | ||
438 | #endif | ||
439 | nvgpu_ltc_debugfs_init(g); | ||
440 | if (g->pci_vendor_id) { | ||
441 | nvgpu_xve_debugfs_init(g); | ||
442 | nvgpu_bios_debugfs_init(g); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | void gk20a_debug_deinit(struct gk20a *g) | ||
447 | { | ||
448 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
449 | |||
450 | if (!l->debugfs) | ||
451 | return; | ||
452 | |||
453 | gk20a_fifo_debugfs_deinit(g); | ||
454 | |||
455 | debugfs_remove_recursive(l->debugfs); | ||
456 | debugfs_remove(l->debugfs_alias); | ||
457 | } | ||
diff --git a/include/os/linux/debug_allocator.c b/include/os/linux/debug_allocator.c new file mode 100644 index 0000000..d63a903 --- /dev/null +++ b/include/os/linux/debug_allocator.c | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_allocator.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include <nvgpu/allocator.h> | ||
22 | |||
23 | static int __alloc_show(struct seq_file *s, void *unused) | ||
24 | { | ||
25 | struct nvgpu_allocator *a = s->private; | ||
26 | |||
27 | nvgpu_alloc_print_stats(a, s, 1); | ||
28 | |||
29 | return 0; | ||
30 | } | ||
31 | |||
32 | static int __alloc_open(struct inode *inode, struct file *file) | ||
33 | { | ||
34 | return single_open(file, __alloc_show, inode->i_private); | ||
35 | } | ||
36 | |||
37 | static const struct file_operations __alloc_fops = { | ||
38 | .open = __alloc_open, | ||
39 | .read = seq_read, | ||
40 | .llseek = seq_lseek, | ||
41 | .release = single_release, | ||
42 | }; | ||
43 | |||
44 | void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) | ||
45 | { | ||
46 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
47 | |||
48 | if (!l->debugfs_allocators) | ||
49 | return; | ||
50 | |||
51 | a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, | ||
52 | l->debugfs_allocators, | ||
53 | a, &__alloc_fops); | ||
54 | } | ||
55 | |||
56 | void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) | ||
57 | { | ||
58 | } | ||
59 | |||
60 | void nvgpu_alloc_debugfs_init(struct gk20a *g) | ||
61 | { | ||
62 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
63 | |||
64 | l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs); | ||
65 | if (IS_ERR_OR_NULL(l->debugfs_allocators)) { | ||
66 | l->debugfs_allocators = NULL; | ||
67 | return; | ||
68 | } | ||
69 | } | ||
diff --git a/include/os/linux/debug_allocator.h b/include/os/linux/debug_allocator.h new file mode 100644 index 0000000..1b21cfc --- /dev/null +++ b/include/os/linux/debug_allocator.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_ALLOCATOR_H__ | ||
16 | #define __NVGPU_DEBUG_ALLOCATOR_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void nvgpu_alloc_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */ | ||
diff --git a/include/os/linux/debug_bios.c b/include/os/linux/debug_bios.c new file mode 100644 index 0000000..f69ccf3 --- /dev/null +++ b/include/os/linux/debug_bios.c | |||
@@ -0,0 +1,60 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <nvgpu/types.h> | ||
16 | |||
17 | #include "debug_bios.h" | ||
18 | #include "os_linux.h" | ||
19 | |||
20 | #include <linux/debugfs.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | static int bios_version_show(struct seq_file *s, void *unused) | ||
24 | { | ||
25 | struct gk20a *g = s->private; | ||
26 | |||
27 | seq_printf(s, "Version %02x.%02x.%02x.%02x.%02x\n", | ||
28 | (g->bios.vbios_version >> 24) & 0xFF, | ||
29 | (g->bios.vbios_version >> 16) & 0xFF, | ||
30 | (g->bios.vbios_version >> 8) & 0xFF, | ||
31 | (g->bios.vbios_version >> 0) & 0xFF, | ||
32 | (g->bios.vbios_oem_version) & 0xFF); | ||
33 | |||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | static int bios_version_open(struct inode *inode, struct file *file) | ||
38 | { | ||
39 | return single_open(file, bios_version_show, inode->i_private); | ||
40 | } | ||
41 | |||
42 | static const struct file_operations bios_version_fops = { | ||
43 | .open = bios_version_open, | ||
44 | .read = seq_read, | ||
45 | .llseek = seq_lseek, | ||
46 | .release = single_release, | ||
47 | }; | ||
48 | |||
49 | |||
50 | int nvgpu_bios_debugfs_init(struct gk20a *g) | ||
51 | { | ||
52 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
53 | struct dentry *gpu_root = l->debugfs; | ||
54 | |||
55 | debugfs_create_file("bios", S_IRUGO, | ||
56 | gpu_root, g, | ||
57 | &bios_version_fops); | ||
58 | |||
59 | return 0; | ||
60 | } | ||
diff --git a/include/os/linux/debug_bios.h b/include/os/linux/debug_bios.h new file mode 100644 index 0000000..f8e7783 --- /dev/null +++ b/include/os/linux/debug_bios.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_BIOS_H__ | ||
16 | #define __NVGPU_DEBUG_BIOS_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int nvgpu_bios_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_BIOS_H__ */ | ||
diff --git a/include/os/linux/debug_cde.c b/include/os/linux/debug_cde.c new file mode 100644 index 0000000..f0afa6e --- /dev/null +++ b/include/os/linux/debug_cde.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_cde.h" | ||
16 | #include "platform_gk20a.h" | ||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include <linux/debugfs.h> | ||
20 | |||
21 | |||
22 | static ssize_t gk20a_cde_reload_write(struct file *file, | ||
23 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
24 | { | ||
25 | struct nvgpu_os_linux *l = file->private_data; | ||
26 | gk20a_cde_reload(l); | ||
27 | return count; | ||
28 | } | ||
29 | |||
30 | static const struct file_operations gk20a_cde_reload_fops = { | ||
31 | .open = simple_open, | ||
32 | .write = gk20a_cde_reload_write, | ||
33 | }; | ||
34 | |||
35 | void gk20a_cde_debugfs_init(struct gk20a *g) | ||
36 | { | ||
37 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
38 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
39 | |||
40 | if (!platform->has_cde) | ||
41 | return; | ||
42 | |||
43 | debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, | ||
44 | l->debugfs, &l->cde_app.shader_parameter); | ||
45 | debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, | ||
46 | l->debugfs, &l->cde_app.ctx_count); | ||
47 | debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, | ||
48 | l->debugfs, &l->cde_app.ctx_usecount); | ||
49 | debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, | ||
50 | l->debugfs, &l->cde_app.ctx_count_top); | ||
51 | debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, | ||
52 | l, &gk20a_cde_reload_fops); | ||
53 | } | ||
diff --git a/include/os/linux/debug_cde.h b/include/os/linux/debug_cde.h new file mode 100644 index 0000000..4895edd --- /dev/null +++ b/include/os/linux/debug_cde.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_CDE_H__ | ||
16 | #define __NVGPU_DEBUG_CDE_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_cde_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_CDE_H__ */ | ||
diff --git a/include/os/linux/debug_ce.c b/include/os/linux/debug_ce.c new file mode 100644 index 0000000..cea0bb4 --- /dev/null +++ b/include/os/linux/debug_ce.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_ce.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | |||
20 | void gk20a_ce_debugfs_init(struct gk20a *g) | ||
21 | { | ||
22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
23 | |||
24 | debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, | ||
25 | l->debugfs, &g->ce_app.ctx_count); | ||
26 | debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, | ||
27 | l->debugfs, &g->ce_app.app_state); | ||
28 | debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, | ||
29 | l->debugfs, &g->ce_app.next_ctx_id); | ||
30 | } | ||
diff --git a/include/os/linux/debug_ce.h b/include/os/linux/debug_ce.h new file mode 100644 index 0000000..2a8750c --- /dev/null +++ b/include/os/linux/debug_ce.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_CE_H__ | ||
16 | #define __NVGPU_DEBUG_CE_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_ce_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_CE_H__ */ | ||
diff --git a/include/os/linux/debug_clk_gm20b.c b/include/os/linux/debug_clk_gm20b.c new file mode 100644 index 0000000..b8b95fd --- /dev/null +++ b/include/os/linux/debug_clk_gm20b.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/debugfs.h> | ||
17 | #include <linux/seq_file.h> | ||
18 | |||
19 | #include <nvgpu/io.h> | ||
20 | #include <nvgpu/clk_arb.h> | ||
21 | |||
22 | #include "gm20b/clk_gm20b.h" | ||
23 | #include "os_linux.h" | ||
24 | #include "platform_gk20a.h" | ||
25 | |||
26 | static int rate_get(void *data, u64 *val) | ||
27 | { | ||
28 | struct gk20a *g = (struct gk20a *)data; | ||
29 | struct clk_gk20a *clk = &g->clk; | ||
30 | |||
31 | *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
32 | return 0; | ||
33 | } | ||
34 | static int rate_set(void *data, u64 val) | ||
35 | { | ||
36 | struct gk20a *g = (struct gk20a *)data; | ||
37 | if (nvgpu_clk_arb_has_active_req(g)) | ||
38 | return 0; | ||
39 | return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val); | ||
40 | } | ||
41 | DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); | ||
42 | |||
43 | static int pll_reg_show(struct seq_file *s, void *data) | ||
44 | { | ||
45 | struct gk20a *g = s->private; | ||
46 | struct nvgpu_clk_pll_debug_data d; | ||
47 | u32 reg, m, n, pl, f; | ||
48 | int err = 0; | ||
49 | |||
50 | if (g->ops.clk.get_pll_debug_data) { | ||
51 | err = g->ops.clk.get_pll_debug_data(g, &d); | ||
52 | if (err) | ||
53 | return err; | ||
54 | } else { | ||
55 | return -EINVAL; | ||
56 | } | ||
57 | |||
58 | seq_printf(s, "bypassctrl = %s, ", | ||
59 | d.trim_sys_bypassctrl_val ? "bypass" : "vco"); | ||
60 | seq_printf(s, "sel_vco = %s, ", | ||
61 | d.trim_sys_sel_vco_val ? "vco" : "bypass"); | ||
62 | |||
63 | seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val, | ||
64 | d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled", | ||
65 | d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked", | ||
66 | d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off"); | ||
67 | |||
68 | reg = d.trim_sys_gpcpll_coeff_val; | ||
69 | m = d.trim_sys_gpcpll_coeff_mdiv; | ||
70 | n = d.trim_sys_gpcpll_coeff_ndiv; | ||
71 | pl = d.trim_sys_gpcpll_coeff_pldiv; | ||
72 | f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl)); | ||
73 | seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); | ||
74 | seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2); | ||
75 | |||
76 | seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n", | ||
77 | d.trim_sys_gpcpll_dvfs0_val, | ||
78 | d.trim_sys_gpcpll_dvfs0_dfs_coeff, | ||
79 | d.trim_sys_gpcpll_dvfs0_dfs_det_max, | ||
80 | d.trim_sys_gpcpll_dvfs0_dfs_dc_offset); | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | static int pll_reg_open(struct inode *inode, struct file *file) | ||
86 | { | ||
87 | return single_open(file, pll_reg_show, inode->i_private); | ||
88 | } | ||
89 | |||
90 | static const struct file_operations pll_reg_fops = { | ||
91 | .open = pll_reg_open, | ||
92 | .read = seq_read, | ||
93 | .llseek = seq_lseek, | ||
94 | .release = single_release, | ||
95 | }; | ||
96 | |||
97 | static int pll_reg_raw_show(struct seq_file *s, void *data) | ||
98 | { | ||
99 | struct gk20a *g = s->private; | ||
100 | struct nvgpu_clk_pll_debug_data d; | ||
101 | u32 reg; | ||
102 | int err = 0; | ||
103 | |||
104 | if (g->ops.clk.get_pll_debug_data) { | ||
105 | err = g->ops.clk.get_pll_debug_data(g, &d); | ||
106 | if (err) | ||
107 | return err; | ||
108 | } else { | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | seq_puts(s, "GPCPLL REGISTERS:\n"); | ||
113 | for (reg = d.trim_sys_gpcpll_cfg_reg; | ||
114 | reg < d.trim_sys_gpcpll_dvfs2_reg; | ||
115 | reg += sizeof(u32)) | ||
116 | seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); | ||
117 | |||
118 | reg = d.trim_bcast_gpcpll_dvfs2_reg; | ||
119 | if (reg) | ||
120 | seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); | ||
121 | |||
122 | seq_puts(s, "\nGPC CLK OUT REGISTERS:\n"); | ||
123 | |||
124 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg, | ||
125 | d.trim_sys_sel_vco_val); | ||
126 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg, | ||
127 | d.trim_sys_gpc2clk_out_val); | ||
128 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg, | ||
129 | d.trim_sys_bypassctrl_val); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int pll_reg_raw_open(struct inode *inode, struct file *file) | ||
135 | { | ||
136 | return single_open(file, pll_reg_raw_show, inode->i_private); | ||
137 | } | ||
138 | |||
139 | static ssize_t pll_reg_raw_write(struct file *file, | ||
140 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
141 | { | ||
142 | struct gk20a *g = file->f_path.dentry->d_inode->i_private; | ||
143 | char buf[80]; | ||
144 | u32 reg, val; | ||
145 | int err = 0; | ||
146 | |||
147 | if (sizeof(buf) <= count) | ||
148 | return -EINVAL; | ||
149 | |||
150 | if (copy_from_user(buf, userbuf, count)) | ||
151 | return -EFAULT; | ||
152 | |||
153 | /* terminate buffer and trim - white spaces may be appended | ||
154 | * at the end when invoked from shell command line */ | ||
155 | buf[count] = '\0'; | ||
156 | strim(buf); | ||
157 | |||
158 | if (sscanf(buf, "[0x%x] = 0x%x", ®, &val) != 2) | ||
159 | return -EINVAL; | ||
160 | |||
161 | if (g->ops.clk.pll_reg_write(g, reg, val)) | ||
162 | err = g->ops.clk.pll_reg_write(g, reg, val); | ||
163 | else | ||
164 | err = -EINVAL; | ||
165 | |||
166 | return err; | ||
167 | } | ||
168 | |||
169 | static const struct file_operations pll_reg_raw_fops = { | ||
170 | .open = pll_reg_raw_open, | ||
171 | .read = seq_read, | ||
172 | .write = pll_reg_raw_write, | ||
173 | .llseek = seq_lseek, | ||
174 | .release = single_release, | ||
175 | }; | ||
176 | |||
177 | static int monitor_get(void *data, u64 *val) | ||
178 | { | ||
179 | struct gk20a *g = (struct gk20a *)data; | ||
180 | int err = 0; | ||
181 | |||
182 | if (g->ops.clk.get_gpcclk_clock_counter) | ||
183 | err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val); | ||
184 | else | ||
185 | err = -EINVAL; | ||
186 | |||
187 | return err; | ||
188 | } | ||
189 | DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); | ||
190 | |||
191 | static int voltage_get(void *data, u64 *val) | ||
192 | { | ||
193 | struct gk20a *g = (struct gk20a *)data; | ||
194 | int err = 0; | ||
195 | |||
196 | if (g->ops.clk.get_voltage) | ||
197 | err = g->ops.clk.get_voltage(&g->clk, val); | ||
198 | else | ||
199 | err = -EINVAL; | ||
200 | |||
201 | return err; | ||
202 | } | ||
203 | DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n"); | ||
204 | |||
205 | static int pll_param_show(struct seq_file *s, void *data) | ||
206 | { | ||
207 | struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms(); | ||
208 | |||
209 | seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n", | ||
210 | gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope, | ||
211 | gpc_pll_params->vco_ctrl); | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int pll_param_open(struct inode *inode, struct file *file) | ||
216 | { | ||
217 | return single_open(file, pll_param_show, inode->i_private); | ||
218 | } | ||
219 | |||
220 | static const struct file_operations pll_param_fops = { | ||
221 | .open = pll_param_open, | ||
222 | .read = seq_read, | ||
223 | .llseek = seq_lseek, | ||
224 | .release = single_release, | ||
225 | }; | ||
226 | |||
227 | int gm20b_clk_init_debugfs(struct gk20a *g) | ||
228 | { | ||
229 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
230 | struct dentry *d; | ||
231 | |||
232 | if (!l->debugfs) | ||
233 | return -EINVAL; | ||
234 | |||
235 | d = debugfs_create_file( | ||
236 | "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops); | ||
237 | if (!d) | ||
238 | goto err_out; | ||
239 | |||
240 | d = debugfs_create_file( | ||
241 | "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops); | ||
242 | if (!d) | ||
243 | goto err_out; | ||
244 | |||
245 | d = debugfs_create_file("pll_reg_raw", | ||
246 | S_IRUGO, l->debugfs, g, &pll_reg_raw_fops); | ||
247 | if (!d) | ||
248 | goto err_out; | ||
249 | |||
250 | d = debugfs_create_file( | ||
251 | "monitor", S_IRUGO, l->debugfs, g, &monitor_fops); | ||
252 | if (!d) | ||
253 | goto err_out; | ||
254 | |||
255 | d = debugfs_create_file( | ||
256 | "voltage", S_IRUGO, l->debugfs, g, &voltage_fops); | ||
257 | if (!d) | ||
258 | goto err_out; | ||
259 | |||
260 | d = debugfs_create_file( | ||
261 | "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops); | ||
262 | if (!d) | ||
263 | goto err_out; | ||
264 | |||
265 | d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs, | ||
266 | (u32 *)&g->clk.gpc_pll.mode); | ||
267 | if (!d) | ||
268 | goto err_out; | ||
269 | |||
270 | d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO, | ||
271 | l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq); | ||
272 | if (!d) | ||
273 | goto err_out; | ||
274 | |||
275 | return 0; | ||
276 | |||
277 | err_out: | ||
278 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
279 | return -ENOMEM; | ||
280 | } | ||
diff --git a/include/os/linux/debug_clk_gm20b.h b/include/os/linux/debug_clk_gm20b.h new file mode 100644 index 0000000..850ad89 --- /dev/null +++ b/include/os/linux/debug_clk_gm20b.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __DEBUG_CLK_GM20B_H | ||
18 | #define __DEBUG_CLK_GM20B_H | ||
19 | |||
20 | #ifdef CONFIG_DEBUG_FS | ||
21 | int gm20b_clk_init_debugfs(struct gk20a *g); | ||
22 | #else | ||
23 | inline int gm20b_clk_init_debugfs(struct gk20a *g) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | #endif | ||
28 | |||
29 | #endif | ||
diff --git a/include/os/linux/debug_clk_gp106.c b/include/os/linux/debug_clk_gp106.c new file mode 100644 index 0000000..4900c00 --- /dev/null +++ b/include/os/linux/debug_clk_gp106.c | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/debugfs.h> | ||
18 | |||
19 | #include <nvgpu/clk.h> | ||
20 | |||
21 | #include "os_linux.h" | ||
22 | |||
23 | void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); | ||
24 | |||
25 | static int gp106_get_rate_show(void *data , u64 *val) | ||
26 | { | ||
27 | struct namemap_cfg *c = (struct namemap_cfg *)data; | ||
28 | struct gk20a *g = c->g; | ||
29 | |||
30 | if (!g->ops.clk.get_rate_cntr) | ||
31 | return -EINVAL; | ||
32 | |||
33 | *val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) : | ||
34 | 0 /* TODO PLL read */; | ||
35 | |||
36 | return 0; | ||
37 | } | ||
38 | DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gp106_get_rate_show, NULL, "%llu\n"); | ||
39 | |||
40 | static int sys_cfc_read(void *data , u64 *val) | ||
41 | { | ||
42 | struct gk20a *g = (struct gk20a *)data; | ||
43 | bool bload = boardobjgrpmask_bitget( | ||
44 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
45 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
46 | |||
47 | /* val = 1 implies CLFC is loaded or enabled */ | ||
48 | *val = bload ? 1 : 0; | ||
49 | return 0; | ||
50 | } | ||
51 | static int sys_cfc_write(void *data , u64 val) | ||
52 | { | ||
53 | struct gk20a *g = (struct gk20a *)data; | ||
54 | int status; | ||
55 | /* val = 1 implies load or enable the CLFC */ | ||
56 | bool bload = val ? true : false; | ||
57 | |||
58 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
59 | status = clk_pmu_freq_controller_load(g, bload, | ||
60 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
61 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
62 | |||
63 | return status; | ||
64 | } | ||
65 | DEFINE_SIMPLE_ATTRIBUTE(sys_cfc_fops, sys_cfc_read, sys_cfc_write, "%llu\n"); | ||
66 | |||
67 | static int ltc_cfc_read(void *data , u64 *val) | ||
68 | { | ||
69 | struct gk20a *g = (struct gk20a *)data; | ||
70 | bool bload = boardobjgrpmask_bitget( | ||
71 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
72 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
73 | |||
74 | /* val = 1 implies CLFC is loaded or enabled */ | ||
75 | *val = bload ? 1 : 0; | ||
76 | return 0; | ||
77 | } | ||
78 | static int ltc_cfc_write(void *data , u64 val) | ||
79 | { | ||
80 | struct gk20a *g = (struct gk20a *)data; | ||
81 | int status; | ||
82 | /* val = 1 implies load or enable the CLFC */ | ||
83 | bool bload = val ? true : false; | ||
84 | |||
85 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
86 | status = clk_pmu_freq_controller_load(g, bload, | ||
87 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
88 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
89 | |||
90 | return status; | ||
91 | } | ||
92 | DEFINE_SIMPLE_ATTRIBUTE(ltc_cfc_fops, ltc_cfc_read, ltc_cfc_write, "%llu\n"); | ||
93 | |||
94 | static int xbar_cfc_read(void *data , u64 *val) | ||
95 | { | ||
96 | struct gk20a *g = (struct gk20a *)data; | ||
97 | bool bload = boardobjgrpmask_bitget( | ||
98 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
99 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
100 | |||
101 | /* val = 1 implies CLFC is loaded or enabled */ | ||
102 | *val = bload ? 1 : 0; | ||
103 | return 0; | ||
104 | } | ||
105 | static int xbar_cfc_write(void *data , u64 val) | ||
106 | { | ||
107 | struct gk20a *g = (struct gk20a *)data; | ||
108 | int status; | ||
109 | /* val = 1 implies load or enable the CLFC */ | ||
110 | bool bload = val ? true : false; | ||
111 | |||
112 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
113 | status = clk_pmu_freq_controller_load(g, bload, | ||
114 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
115 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
116 | |||
117 | return status; | ||
118 | } | ||
119 | DEFINE_SIMPLE_ATTRIBUTE(xbar_cfc_fops, xbar_cfc_read, | ||
120 | xbar_cfc_write, "%llu\n"); | ||
121 | |||
122 | static int gpc_cfc_read(void *data , u64 *val) | ||
123 | { | ||
124 | struct gk20a *g = (struct gk20a *)data; | ||
125 | bool bload = boardobjgrpmask_bitget( | ||
126 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
127 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
128 | |||
129 | /* val = 1 implies CLFC is loaded or enabled */ | ||
130 | *val = bload ? 1 : 0; | ||
131 | return 0; | ||
132 | } | ||
133 | static int gpc_cfc_write(void *data , u64 val) | ||
134 | { | ||
135 | struct gk20a *g = (struct gk20a *)data; | ||
136 | int status; | ||
137 | /* val = 1 implies load or enable the CLFC */ | ||
138 | bool bload = val ? true : false; | ||
139 | |||
140 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
141 | status = clk_pmu_freq_controller_load(g, bload, | ||
142 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
143 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
144 | |||
145 | return status; | ||
146 | } | ||
147 | DEFINE_SIMPLE_ATTRIBUTE(gpc_cfc_fops, gpc_cfc_read, gpc_cfc_write, "%llu\n"); | ||
148 | |||
149 | int gp106_clk_init_debugfs(struct gk20a *g) | ||
150 | { | ||
151 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
152 | struct dentry *gpu_root = l->debugfs; | ||
153 | struct dentry *clocks_root, *clk_freq_ctlr_root; | ||
154 | struct dentry *d; | ||
155 | unsigned int i; | ||
156 | |||
157 | if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root))) | ||
158 | return -ENOMEM; | ||
159 | |||
160 | clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root); | ||
161 | if (clk_freq_ctlr_root == NULL) | ||
162 | return -ENOMEM; | ||
163 | |||
164 | d = debugfs_create_file("sys", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
165 | g, &sys_cfc_fops); | ||
166 | d = debugfs_create_file("ltc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
167 | g, <c_cfc_fops); | ||
168 | d = debugfs_create_file("xbar", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
169 | g, &xbar_cfc_fops); | ||
170 | d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
171 | g, &gpc_cfc_fops); | ||
172 | |||
173 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
174 | |||
175 | for (i = 0; i < g->clk.namemap_num; i++) { | ||
176 | if (g->clk.clk_namemap[i].is_enable) { | ||
177 | d = debugfs_create_file( | ||
178 | g->clk.clk_namemap[i].name, | ||
179 | S_IRUGO, | ||
180 | clocks_root, | ||
181 | &g->clk.clk_namemap[i], | ||
182 | &get_rate_fops); | ||
183 | if (!d) | ||
184 | goto err_out; | ||
185 | } | ||
186 | } | ||
187 | return 0; | ||
188 | |||
189 | err_out: | ||
190 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
191 | debugfs_remove_recursive(clocks_root); | ||
192 | return -ENOMEM; | ||
193 | } | ||
diff --git a/include/os/linux/debug_clk_gp106.h b/include/os/linux/debug_clk_gp106.h new file mode 100644 index 0000000..b1d031d --- /dev/null +++ b/include/os/linux/debug_clk_gp106.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __DEBUG_CLK_GP106_H | ||
18 | #define __DEBUG_CLK_GP106_H | ||
19 | |||
20 | #ifdef CONFIG_DEBUG_FS | ||
21 | int gp106_clk_init_debugfs(struct gk20a *g); | ||
22 | #else | ||
23 | inline int gp106_clk_init_debugfs(struct gk20a *g) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | #endif | ||
28 | |||
29 | #endif | ||
diff --git a/include/os/linux/debug_clk_gv100.c b/include/os/linux/debug_clk_gv100.c new file mode 100644 index 0000000..623f2b6 --- /dev/null +++ b/include/os/linux/debug_clk_gv100.c | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/debugfs.h> | ||
18 | |||
19 | #include "gv100/clk_gv100.h" | ||
20 | |||
21 | #include "os_linux.h" | ||
22 | |||
23 | void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); | ||
24 | |||
25 | static int gv100_get_rate_show(void *data , u64 *val) | ||
26 | { | ||
27 | struct namemap_cfg *c = (struct namemap_cfg *)data; | ||
28 | struct gk20a *g = c->g; | ||
29 | |||
30 | if (!g->ops.clk.get_rate_cntr) | ||
31 | return -EINVAL; | ||
32 | |||
33 | *val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) : | ||
34 | 0 /* TODO PLL read */; | ||
35 | |||
36 | return 0; | ||
37 | } | ||
38 | DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gv100_get_rate_show, NULL, "%llu\n"); | ||
39 | |||
40 | static int sys_cfc_read(void *data , u64 *val) | ||
41 | { | ||
42 | struct gk20a *g = (struct gk20a *)data; | ||
43 | bool bload = boardobjgrpmask_bitget( | ||
44 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
45 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
46 | |||
47 | /* val = 1 implies CLFC is loaded or enabled */ | ||
48 | *val = bload ? 1 : 0; | ||
49 | return 0; | ||
50 | } | ||
51 | static int sys_cfc_write(void *data , u64 val) | ||
52 | { | ||
53 | struct gk20a *g = (struct gk20a *)data; | ||
54 | int status; | ||
55 | /* val = 1 implies load or enable the CLFC */ | ||
56 | bool bload = val ? true : false; | ||
57 | |||
58 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
59 | status = clk_pmu_freq_controller_load(g, bload, | ||
60 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
61 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
62 | |||
63 | return status; | ||
64 | } | ||
65 | DEFINE_SIMPLE_ATTRIBUTE(sys_cfc_fops, sys_cfc_read, sys_cfc_write, "%llu\n"); | ||
66 | |||
67 | static int ltc_cfc_read(void *data , u64 *val) | ||
68 | { | ||
69 | struct gk20a *g = (struct gk20a *)data; | ||
70 | bool bload = boardobjgrpmask_bitget( | ||
71 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
72 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
73 | |||
74 | /* val = 1 implies CLFC is loaded or enabled */ | ||
75 | *val = bload ? 1 : 0; | ||
76 | return 0; | ||
77 | } | ||
78 | static int ltc_cfc_write(void *data , u64 val) | ||
79 | { | ||
80 | struct gk20a *g = (struct gk20a *)data; | ||
81 | int status; | ||
82 | /* val = 1 implies load or enable the CLFC */ | ||
83 | bool bload = val ? true : false; | ||
84 | |||
85 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
86 | status = clk_pmu_freq_controller_load(g, bload, | ||
87 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
88 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
89 | |||
90 | return status; | ||
91 | } | ||
92 | DEFINE_SIMPLE_ATTRIBUTE(ltc_cfc_fops, ltc_cfc_read, ltc_cfc_write, "%llu\n"); | ||
93 | |||
94 | static int xbar_cfc_read(void *data , u64 *val) | ||
95 | { | ||
96 | struct gk20a *g = (struct gk20a *)data; | ||
97 | bool bload = boardobjgrpmask_bitget( | ||
98 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
99 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
100 | |||
101 | /* val = 1 implies CLFC is loaded or enabled */ | ||
102 | *val = bload ? 1 : 0; | ||
103 | return 0; | ||
104 | } | ||
105 | static int xbar_cfc_write(void *data , u64 val) | ||
106 | { | ||
107 | struct gk20a *g = (struct gk20a *)data; | ||
108 | int status; | ||
109 | /* val = 1 implies load or enable the CLFC */ | ||
110 | bool bload = val ? true : false; | ||
111 | |||
112 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
113 | status = clk_pmu_freq_controller_load(g, bload, | ||
114 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
115 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
116 | |||
117 | return status; | ||
118 | } | ||
119 | DEFINE_SIMPLE_ATTRIBUTE(xbar_cfc_fops, xbar_cfc_read, | ||
120 | xbar_cfc_write, "%llu\n"); | ||
121 | |||
122 | static int gpc_cfc_read(void *data , u64 *val) | ||
123 | { | ||
124 | struct gk20a *g = (struct gk20a *)data; | ||
125 | bool bload = boardobjgrpmask_bitget( | ||
126 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
127 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
128 | |||
129 | /* val = 1 implies CLFC is loaded or enabled */ | ||
130 | *val = bload ? 1 : 0; | ||
131 | return 0; | ||
132 | } | ||
133 | static int gpc_cfc_write(void *data , u64 val) | ||
134 | { | ||
135 | struct gk20a *g = (struct gk20a *)data; | ||
136 | int status; | ||
137 | /* val = 1 implies load or enable the CLFC */ | ||
138 | bool bload = val ? true : false; | ||
139 | |||
140 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
141 | status = clk_pmu_freq_controller_load(g, bload, | ||
142 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
143 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
144 | |||
145 | return status; | ||
146 | } | ||
147 | DEFINE_SIMPLE_ATTRIBUTE(gpc_cfc_fops, gpc_cfc_read, gpc_cfc_write, "%llu\n"); | ||
148 | |||
149 | int gv100_clk_init_debugfs(struct gk20a *g) | ||
150 | { | ||
151 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
152 | struct dentry *gpu_root = l->debugfs; | ||
153 | struct dentry *clocks_root, *clk_freq_ctlr_root; | ||
154 | struct dentry *d; | ||
155 | unsigned int i; | ||
156 | |||
157 | if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root))) | ||
158 | return -ENOMEM; | ||
159 | |||
160 | clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root); | ||
161 | if (clk_freq_ctlr_root == NULL) | ||
162 | return -ENOMEM; | ||
163 | |||
164 | d = debugfs_create_file("sys", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
165 | g, &sys_cfc_fops); | ||
166 | d = debugfs_create_file("ltc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
167 | g, <c_cfc_fops); | ||
168 | d = debugfs_create_file("xbar", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
169 | g, &xbar_cfc_fops); | ||
170 | d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
171 | g, &gpc_cfc_fops); | ||
172 | |||
173 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
174 | |||
175 | for (i = 0; i < g->clk.namemap_num; i++) { | ||
176 | if (g->clk.clk_namemap[i].is_enable) { | ||
177 | d = debugfs_create_file( | ||
178 | g->clk.clk_namemap[i].name, | ||
179 | S_IRUGO, | ||
180 | clocks_root, | ||
181 | &g->clk.clk_namemap[i], | ||
182 | &get_rate_fops); | ||
183 | if (!d) | ||
184 | goto err_out; | ||
185 | } | ||
186 | } | ||
187 | return 0; | ||
188 | |||
189 | err_out: | ||
190 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
191 | debugfs_remove_recursive(clocks_root); | ||
192 | return -ENOMEM; | ||
193 | } | ||
diff --git a/include/os/linux/debug_clk_gv100.h b/include/os/linux/debug_clk_gv100.h new file mode 100644 index 0000000..419b4ab --- /dev/null +++ b/include/os/linux/debug_clk_gv100.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __DEBUG_CLK_GV100_H | ||
18 | #define __DEBUG_CLK_GV100_H | ||
19 | |||
20 | #ifdef CONFIG_DEBUG_FS | ||
21 | int gv100_clk_init_debugfs(struct gk20a *g); | ||
22 | #else | ||
23 | static inline int gv100_clk_init_debugfs(struct gk20a *g) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | #endif | ||
28 | |||
29 | #endif | ||
diff --git a/include/os/linux/debug_fecs_trace.c b/include/os/linux/debug_fecs_trace.c new file mode 100644 index 0000000..7786053 --- /dev/null +++ b/include/os/linux/debug_fecs_trace.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/debugfs.h> | ||
18 | |||
19 | #include <nvgpu/fecs_trace.h> | ||
20 | |||
21 | #include "os_linux.h" | ||
22 | |||
23 | /* | ||
24 | * The sequence iterator functions. We simply use the count of the | ||
25 | * next line as our internal position. | ||
26 | */ | ||
27 | static void *gk20a_fecs_trace_debugfs_ring_seq_start( | ||
28 | struct seq_file *s, loff_t *pos) | ||
29 | { | ||
30 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
31 | return NULL; | ||
32 | |||
33 | return pos; | ||
34 | } | ||
35 | |||
36 | static void *gk20a_fecs_trace_debugfs_ring_seq_next( | ||
37 | struct seq_file *s, void *v, loff_t *pos) | ||
38 | { | ||
39 | ++(*pos); | ||
40 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
41 | return NULL; | ||
42 | return pos; | ||
43 | } | ||
44 | |||
45 | static void gk20a_fecs_trace_debugfs_ring_seq_stop( | ||
46 | struct seq_file *s, void *v) | ||
47 | { | ||
48 | } | ||
49 | |||
50 | static int gk20a_fecs_trace_debugfs_ring_seq_show( | ||
51 | struct seq_file *s, void *v) | ||
52 | { | ||
53 | loff_t *pos = (loff_t *) v; | ||
54 | struct gk20a *g = *(struct gk20a **)s->private; | ||
55 | struct gk20a_fecs_trace_record *r = | ||
56 | gk20a_fecs_trace_get_record(g, *pos); | ||
57 | int i; | ||
58 | const u32 invalid_tag = gk20a_fecs_trace_record_ts_tag_invalid_ts_v(); | ||
59 | u32 tag; | ||
60 | u64 timestamp; | ||
61 | |||
62 | seq_printf(s, "record #%lld (%p)\n", *pos, r); | ||
63 | seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); | ||
64 | seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); | ||
65 | if (gk20a_fecs_trace_is_valid_record(r)) { | ||
66 | seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); | ||
67 | seq_printf(s, "\tcontext_id=%08x\n", r->context_id); | ||
68 | seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); | ||
69 | seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); | ||
70 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
71 | tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
72 | if (tag == invalid_tag) | ||
73 | continue; | ||
74 | timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
75 | timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
76 | seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); | ||
77 | } | ||
78 | } | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Tie them all together into a set of seq_operations. | ||
84 | */ | ||
85 | static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = { | ||
86 | .start = gk20a_fecs_trace_debugfs_ring_seq_start, | ||
87 | .next = gk20a_fecs_trace_debugfs_ring_seq_next, | ||
88 | .stop = gk20a_fecs_trace_debugfs_ring_seq_stop, | ||
89 | .show = gk20a_fecs_trace_debugfs_ring_seq_show | ||
90 | }; | ||
91 | |||
92 | /* | ||
93 | * Time to set up the file operations for our /proc file. In this case, | ||
94 | * all we need is an open function which sets up the sequence ops. | ||
95 | */ | ||
96 | |||
97 | static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode, | ||
98 | struct file *file) | ||
99 | { | ||
100 | struct gk20a **p; | ||
101 | |||
102 | p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops, | ||
103 | sizeof(struct gk20a *)); | ||
104 | if (!p) | ||
105 | return -ENOMEM; | ||
106 | |||
107 | *p = (struct gk20a *)inode->i_private; | ||
108 | return 0; | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * The file operations structure contains our open function along with | ||
113 | * set of the canned seq_ ops. | ||
114 | */ | ||
115 | static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = { | ||
116 | .owner = THIS_MODULE, | ||
117 | .open = gk20a_ctxsw_debugfs_ring_open, | ||
118 | .read = seq_read, | ||
119 | .llseek = seq_lseek, | ||
120 | .release = seq_release_private | ||
121 | }; | ||
122 | |||
123 | static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val) | ||
124 | { | ||
125 | *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg); | ||
126 | return 0; | ||
127 | } | ||
128 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops, | ||
129 | gk20a_fecs_trace_debugfs_read, NULL, "%llu\n"); | ||
130 | |||
131 | static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val) | ||
132 | { | ||
133 | *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg); | ||
134 | return 0; | ||
135 | } | ||
136 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops, | ||
137 | gk20a_fecs_trace_debugfs_write, NULL, "%llu\n"); | ||
138 | |||
139 | int nvgpu_fecs_trace_init_debugfs(struct gk20a *g) | ||
140 | { | ||
141 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
142 | |||
143 | debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g, | ||
144 | &gk20a_fecs_trace_debugfs_read_fops); | ||
145 | debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g, | ||
146 | &gk20a_fecs_trace_debugfs_write_fops); | ||
147 | debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g, | ||
148 | &gk20a_fecs_trace_debugfs_ring_fops); | ||
149 | |||
150 | return 0; | ||
151 | } | ||
diff --git a/include/os/linux/debug_fecs_trace.h b/include/os/linux/debug_fecs_trace.h new file mode 100644 index 0000000..54ebaaf --- /dev/null +++ b/include/os/linux/debug_fecs_trace.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef LINUX_DEBUG_FECS_TRACE_H | ||
18 | #define LINUX_DEBUG_FECS_TRACE_H | ||
19 | |||
20 | struct gk20a; | ||
21 | |||
22 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_GK20A_CTXSW_TRACE) | ||
23 | int nvgpu_fecs_trace_init_debugfs(struct gk20a *g); | ||
24 | #else | ||
25 | static int nvgpu_fecs_trace_init_debugfs(struct gk20a *g) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | #endif | ||
30 | #endif | ||
diff --git a/include/os/linux/debug_fifo.c b/include/os/linux/debug_fifo.c new file mode 100644 index 0000000..98da8bc --- /dev/null +++ b/include/os/linux/debug_fifo.c | |||
@@ -0,0 +1,376 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_fifo.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include <nvgpu/sort.h> | ||
22 | #include <nvgpu/timers.h> | ||
23 | #include <nvgpu/channel.h> | ||
24 | |||
25 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); | ||
26 | |||
27 | static void *gk20a_fifo_sched_debugfs_seq_start( | ||
28 | struct seq_file *s, loff_t *pos) | ||
29 | { | ||
30 | struct gk20a *g = s->private; | ||
31 | struct fifo_gk20a *f = &g->fifo; | ||
32 | |||
33 | if (*pos >= f->num_channels) | ||
34 | return NULL; | ||
35 | |||
36 | return &f->channel[*pos]; | ||
37 | } | ||
38 | |||
39 | static void *gk20a_fifo_sched_debugfs_seq_next( | ||
40 | struct seq_file *s, void *v, loff_t *pos) | ||
41 | { | ||
42 | struct gk20a *g = s->private; | ||
43 | struct fifo_gk20a *f = &g->fifo; | ||
44 | |||
45 | ++(*pos); | ||
46 | if (*pos >= f->num_channels) | ||
47 | return NULL; | ||
48 | |||
49 | return &f->channel[*pos]; | ||
50 | } | ||
51 | |||
52 | static void gk20a_fifo_sched_debugfs_seq_stop( | ||
53 | struct seq_file *s, void *v) | ||
54 | { | ||
55 | } | ||
56 | |||
57 | static int gk20a_fifo_sched_debugfs_seq_show( | ||
58 | struct seq_file *s, void *v) | ||
59 | { | ||
60 | struct gk20a *g = s->private; | ||
61 | struct fifo_gk20a *f = &g->fifo; | ||
62 | struct channel_gk20a *ch = v; | ||
63 | struct tsg_gk20a *tsg = NULL; | ||
64 | |||
65 | struct fifo_engine_info_gk20a *engine_info; | ||
66 | struct fifo_runlist_info_gk20a *runlist; | ||
67 | u32 runlist_id; | ||
68 | int ret = SEQ_SKIP; | ||
69 | u32 engine_id; | ||
70 | |||
71 | engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
72 | engine_info = (f->engine_info + engine_id); | ||
73 | runlist_id = engine_info->runlist_id; | ||
74 | runlist = &f->runlist_info[runlist_id]; | ||
75 | |||
76 | if (ch == f->channel) { | ||
77 | seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); | ||
78 | seq_puts(s, " (usecs) (msecs)\n"); | ||
79 | ret = 0; | ||
80 | } | ||
81 | |||
82 | if (!test_bit(ch->chid, runlist->active_channels)) | ||
83 | return ret; | ||
84 | |||
85 | if (gk20a_channel_get(ch)) { | ||
86 | tsg = tsg_gk20a_from_ch(ch); | ||
87 | |||
88 | if (tsg) | ||
89 | seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", | ||
90 | ch->chid, | ||
91 | ch->tsgid, | ||
92 | ch->tgid, | ||
93 | tsg->timeslice_us, | ||
94 | ch->timeout_ms_max, | ||
95 | tsg->interleave_level, | ||
96 | tsg->gr_ctx.graphics_preempt_mode, | ||
97 | tsg->gr_ctx.compute_preempt_mode); | ||
98 | gk20a_channel_put(ch); | ||
99 | } | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { | ||
104 | .start = gk20a_fifo_sched_debugfs_seq_start, | ||
105 | .next = gk20a_fifo_sched_debugfs_seq_next, | ||
106 | .stop = gk20a_fifo_sched_debugfs_seq_stop, | ||
107 | .show = gk20a_fifo_sched_debugfs_seq_show | ||
108 | }; | ||
109 | |||
110 | static int gk20a_fifo_sched_debugfs_open(struct inode *inode, | ||
111 | struct file *file) | ||
112 | { | ||
113 | struct gk20a *g = inode->i_private; | ||
114 | int err; | ||
115 | |||
116 | err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); | ||
117 | if (err) | ||
118 | return err; | ||
119 | |||
120 | nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private); | ||
121 | |||
122 | ((struct seq_file *)file->private_data)->private = inode->i_private; | ||
123 | return 0; | ||
124 | }; | ||
125 | |||
126 | /* | ||
127 | * The file operations structure contains our open function along with | ||
128 | * set of the canned seq_ ops. | ||
129 | */ | ||
130 | static const struct file_operations gk20a_fifo_sched_debugfs_fops = { | ||
131 | .owner = THIS_MODULE, | ||
132 | .open = gk20a_fifo_sched_debugfs_open, | ||
133 | .read = seq_read, | ||
134 | .llseek = seq_lseek, | ||
135 | .release = seq_release | ||
136 | }; | ||
137 | |||
138 | static int gk20a_fifo_profile_enable(void *data, u64 val) | ||
139 | { | ||
140 | struct gk20a *g = (struct gk20a *) data; | ||
141 | struct fifo_gk20a *f = &g->fifo; | ||
142 | |||
143 | |||
144 | nvgpu_mutex_acquire(&f->profile.lock); | ||
145 | if (val == 0) { | ||
146 | if (f->profile.enabled) { | ||
147 | f->profile.enabled = false; | ||
148 | nvgpu_ref_put(&f->profile.ref, | ||
149 | __gk20a_fifo_profile_free); | ||
150 | } | ||
151 | } else { | ||
152 | if (!f->profile.enabled) { | ||
153 | /* not kref init as it can have a running condition if | ||
154 | * we enable/disable/enable while kickoff is happening | ||
155 | */ | ||
156 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { | ||
157 | f->profile.data = nvgpu_vzalloc(g, | ||
158 | FIFO_PROFILING_ENTRIES * | ||
159 | sizeof(struct fifo_profile_gk20a)); | ||
160 | f->profile.sorted = nvgpu_vzalloc(g, | ||
161 | FIFO_PROFILING_ENTRIES * | ||
162 | sizeof(u64)); | ||
163 | if (!(f->profile.data && f->profile.sorted)) { | ||
164 | nvgpu_vfree(g, f->profile.data); | ||
165 | nvgpu_vfree(g, f->profile.sorted); | ||
166 | nvgpu_mutex_release(&f->profile.lock); | ||
167 | return -ENOMEM; | ||
168 | } | ||
169 | nvgpu_ref_init(&f->profile.ref); | ||
170 | } | ||
171 | atomic_set(&f->profile.get.atomic_var, 0); | ||
172 | f->profile.enabled = true; | ||
173 | } | ||
174 | } | ||
175 | nvgpu_mutex_release(&f->profile.lock); | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | DEFINE_SIMPLE_ATTRIBUTE( | ||
181 | gk20a_fifo_profile_enable_debugfs_fops, | ||
182 | NULL, | ||
183 | gk20a_fifo_profile_enable, | ||
184 | "%llu\n" | ||
185 | ); | ||
186 | |||
187 | static int __profile_cmp(const void *a, const void *b) | ||
188 | { | ||
189 | return *((unsigned long long *) a) - *((unsigned long long *) b); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * This uses about 800b in the stack, but the function using it is not part | ||
194 | * of a callstack where much memory is being used, so it is fine | ||
195 | */ | ||
196 | #define PERCENTILE_WIDTH 5 | ||
197 | #define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) | ||
198 | |||
199 | static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, | ||
200 | u64 *percentiles, u32 index_end, u32 index_start) | ||
201 | { | ||
202 | unsigned int nelem = 0; | ||
203 | unsigned int index; | ||
204 | struct fifo_profile_gk20a *profile; | ||
205 | |||
206 | for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { | ||
207 | profile = &g->fifo.profile.data[index]; | ||
208 | |||
209 | if (profile->timestamp[index_end] > | ||
210 | profile->timestamp[index_start]) { | ||
211 | /* This is a valid element */ | ||
212 | g->fifo.profile.sorted[nelem] = | ||
213 | profile->timestamp[index_end] - | ||
214 | profile->timestamp[index_start]; | ||
215 | nelem++; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | /* sort it */ | ||
220 | sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), | ||
221 | __profile_cmp, NULL); | ||
222 | |||
223 | /* build ranges */ | ||
224 | for (index = 0; index < PERCENTILE_RANGES; index++) { | ||
225 | percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : | ||
226 | g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * | ||
227 | nelem)/100 - 1]; | ||
228 | } | ||
229 | return nelem; | ||
230 | } | ||
231 | |||
232 | static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) | ||
233 | { | ||
234 | struct gk20a *g = s->private; | ||
235 | unsigned int get, nelem, index; | ||
236 | /* | ||
237 | * 800B in the stack, but function is declared statically and only | ||
238 | * called from debugfs handler | ||
239 | */ | ||
240 | u64 percentiles_ioctl[PERCENTILE_RANGES]; | ||
241 | u64 percentiles_kickoff[PERCENTILE_RANGES]; | ||
242 | u64 percentiles_jobtracking[PERCENTILE_RANGES]; | ||
243 | u64 percentiles_append[PERCENTILE_RANGES]; | ||
244 | u64 percentiles_userd[PERCENTILE_RANGES]; | ||
245 | |||
246 | if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { | ||
247 | seq_printf(s, "Profiling disabled\n"); | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | get = atomic_read(&g->fifo.profile.get.atomic_var); | ||
252 | |||
253 | __gk20a_fifo_create_stats(g, percentiles_ioctl, | ||
254 | PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); | ||
255 | __gk20a_fifo_create_stats(g, percentiles_kickoff, | ||
256 | PROFILE_END, PROFILE_ENTRY); | ||
257 | __gk20a_fifo_create_stats(g, percentiles_jobtracking, | ||
258 | PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); | ||
259 | __gk20a_fifo_create_stats(g, percentiles_append, | ||
260 | PROFILE_APPEND, PROFILE_JOB_TRACKING); | ||
261 | nelem = __gk20a_fifo_create_stats(g, percentiles_userd, | ||
262 | PROFILE_END, PROFILE_APPEND); | ||
263 | |||
264 | seq_printf(s, "Number of kickoffs: %d\n", nelem); | ||
265 | seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); | ||
266 | |||
267 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
268 | seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", | ||
269 | PERCENTILE_WIDTH * (index+1), | ||
270 | percentiles_ioctl[index], | ||
271 | percentiles_kickoff[index], | ||
272 | percentiles_append[index], | ||
273 | percentiles_jobtracking[index], | ||
274 | percentiles_userd[index]); | ||
275 | |||
276 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) | ||
282 | { | ||
283 | return single_open(file, gk20a_fifo_profile_stats, inode->i_private); | ||
284 | } | ||
285 | |||
286 | static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { | ||
287 | .open = gk20a_fifo_profile_stats_open, | ||
288 | .read = seq_read, | ||
289 | .llseek = seq_lseek, | ||
290 | .release = single_release, | ||
291 | }; | ||
292 | |||
293 | |||
294 | void gk20a_fifo_debugfs_init(struct gk20a *g) | ||
295 | { | ||
296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
297 | struct dentry *gpu_root = l->debugfs; | ||
298 | struct dentry *fifo_root; | ||
299 | struct dentry *profile_root; | ||
300 | |||
301 | fifo_root = debugfs_create_dir("fifo", gpu_root); | ||
302 | if (IS_ERR_OR_NULL(fifo_root)) | ||
303 | return; | ||
304 | |||
305 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
306 | |||
307 | debugfs_create_file("sched", 0600, fifo_root, g, | ||
308 | &gk20a_fifo_sched_debugfs_fops); | ||
309 | |||
310 | profile_root = debugfs_create_dir("profile", fifo_root); | ||
311 | if (IS_ERR_OR_NULL(profile_root)) | ||
312 | return; | ||
313 | |||
314 | nvgpu_mutex_init(&g->fifo.profile.lock); | ||
315 | g->fifo.profile.enabled = false; | ||
316 | atomic_set(&g->fifo.profile.get.atomic_var, 0); | ||
317 | atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); | ||
318 | |||
319 | debugfs_create_file("enable", 0600, profile_root, g, | ||
320 | &gk20a_fifo_profile_enable_debugfs_fops); | ||
321 | |||
322 | debugfs_create_file("stats", 0600, profile_root, g, | ||
323 | &gk20a_fifo_profile_stats_debugfs_fops); | ||
324 | |||
325 | } | ||
326 | |||
327 | void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx) | ||
328 | { | ||
329 | if (profile) | ||
330 | profile->timestamp[idx] = nvgpu_current_time_ns(); | ||
331 | } | ||
332 | |||
333 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) | ||
334 | { | ||
335 | struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, | ||
336 | profile.ref); | ||
337 | nvgpu_vfree(f->g, f->profile.data); | ||
338 | nvgpu_vfree(f->g, f->profile.sorted); | ||
339 | } | ||
340 | |||
341 | /* Get the next element in the ring buffer of profile entries | ||
342 | * and grab a reference to the structure | ||
343 | */ | ||
344 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) | ||
345 | { | ||
346 | struct fifo_gk20a *f = &g->fifo; | ||
347 | struct fifo_profile_gk20a *profile; | ||
348 | unsigned int index; | ||
349 | |||
350 | /* If kref is zero, profiling is not enabled */ | ||
351 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) | ||
352 | return NULL; | ||
353 | index = atomic_inc_return(&f->profile.get.atomic_var); | ||
354 | profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; | ||
355 | |||
356 | return profile; | ||
357 | } | ||
358 | |||
359 | /* Free the reference to the structure. This allows deferred cleanups */ | ||
360 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
361 | struct fifo_profile_gk20a *profile) | ||
362 | { | ||
363 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
364 | } | ||
365 | |||
366 | void gk20a_fifo_debugfs_deinit(struct gk20a *g) | ||
367 | { | ||
368 | struct fifo_gk20a *f = &g->fifo; | ||
369 | |||
370 | nvgpu_mutex_acquire(&f->profile.lock); | ||
371 | if (f->profile.enabled) { | ||
372 | f->profile.enabled = false; | ||
373 | nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
374 | } | ||
375 | nvgpu_mutex_release(&f->profile.lock); | ||
376 | } | ||
diff --git a/include/os/linux/debug_fifo.h b/include/os/linux/debug_fifo.h new file mode 100644 index 0000000..46ac853 --- /dev/null +++ b/include/os/linux/debug_fifo.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_FIFO_H__ | ||
16 | #define __NVGPU_DEBUG_FIFO_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_fifo_debugfs_init(struct gk20a *g); | ||
20 | void gk20a_fifo_debugfs_deinit(struct gk20a *g); | ||
21 | |||
22 | #endif /* __NVGPU_DEBUG_FIFO_H__ */ | ||
diff --git a/include/os/linux/debug_gr.c b/include/os/linux/debug_gr.c new file mode 100644 index 0000000..d54c6d6 --- /dev/null +++ b/include/os/linux/debug_gr.c | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_gr.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | |||
20 | int gr_gk20a_debugfs_init(struct gk20a *g) | ||
21 | { | ||
22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
23 | |||
24 | l->debugfs_gr_default_attrib_cb_size = | ||
25 | debugfs_create_u32("gr_default_attrib_cb_size", | ||
26 | S_IRUGO|S_IWUSR, l->debugfs, | ||
27 | &g->gr.attrib_cb_default_size); | ||
28 | |||
29 | return 0; | ||
30 | } | ||
31 | |||
diff --git a/include/os/linux/debug_gr.h b/include/os/linux/debug_gr.h new file mode 100644 index 0000000..4b46acb --- /dev/null +++ b/include/os/linux/debug_gr.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_GR_H__ | ||
16 | #define __NVGPU_DEBUG_GR_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int gr_gk20a_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_GR_H__ */ | ||
diff --git a/include/os/linux/debug_hal.c b/include/os/linux/debug_hal.c new file mode 100644 index 0000000..031e335 --- /dev/null +++ b/include/os/linux/debug_hal.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_hal.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | /* Format and print a single function pointer to the specified seq_file. */ | ||
22 | static void __hal_print_op(struct seq_file *s, void *op_ptr) | ||
23 | { | ||
24 | seq_printf(s, "%pF\n", op_ptr); | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * Prints an array of function pointer addresses in op_ptrs to the | ||
29 | * specified seq_file | ||
30 | */ | ||
31 | static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops) | ||
32 | { | ||
33 | int i; | ||
34 | |||
35 | for (i = 0; i < num_ops; i++) | ||
36 | __hal_print_op(s, op_ptrs[i]); | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * Show file operation, which generates content of the file once. Prints a list | ||
41 | * of gpu operations as defined by gops and the corresponding function pointer | ||
42 | * destination addresses. Relies on no compiler reordering of struct fields and | ||
43 | * assumption that all members are function pointers. | ||
44 | */ | ||
45 | static int __hal_show(struct seq_file *s, void *unused) | ||
46 | { | ||
47 | struct gpu_ops *gops = s->private; | ||
48 | |||
49 | __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *)); | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static int __hal_open(struct inode *inode, struct file *file) | ||
55 | { | ||
56 | return single_open(file, __hal_show, inode->i_private); | ||
57 | } | ||
58 | |||
59 | static const struct file_operations __hal_fops = { | ||
60 | .open = __hal_open, | ||
61 | .read = seq_read, | ||
62 | .llseek = seq_lseek, | ||
63 | .release = single_release, | ||
64 | }; | ||
65 | |||
66 | void nvgpu_hal_debugfs_fini(struct gk20a *g) | ||
67 | { | ||
68 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
69 | |||
70 | if (!(l->debugfs_hal == NULL)) | ||
71 | debugfs_remove_recursive(l->debugfs_hal); | ||
72 | } | ||
73 | |||
74 | void nvgpu_hal_debugfs_init(struct gk20a *g) | ||
75 | { | ||
76 | struct dentry *d; | ||
77 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
78 | |||
79 | if (!l->debugfs) | ||
80 | return; | ||
81 | l->debugfs_hal = debugfs_create_dir("hal", l->debugfs); | ||
82 | if (IS_ERR_OR_NULL(l->debugfs_hal)) { | ||
83 | l->debugfs_hal = NULL; | ||
84 | return; | ||
85 | } | ||
86 | |||
87 | /* Pass along reference to the gpu_ops struct as private data */ | ||
88 | d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal, | ||
89 | &g->ops, &__hal_fops); | ||
90 | if (!d) { | ||
91 | nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__); | ||
92 | debugfs_remove_recursive(l->debugfs_hal); | ||
93 | return; | ||
94 | } | ||
95 | } | ||
diff --git a/include/os/linux/debug_hal.h b/include/os/linux/debug_hal.h new file mode 100644 index 0000000..eee6f23 --- /dev/null +++ b/include/os/linux/debug_hal.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_HAL_H__ | ||
16 | #define __NVGPU_DEBUG_HAL_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void nvgpu_hal_debugfs_fini(struct gk20a *g); | ||
20 | void nvgpu_hal_debugfs_init(struct gk20a *g); | ||
21 | |||
22 | #endif /* __NVGPU_DEBUG_HAL_H__ */ | ||
diff --git a/include/os/linux/debug_kmem.c b/include/os/linux/debug_kmem.c new file mode 100644 index 0000000..a0c7d47 --- /dev/null +++ b/include/os/linux/debug_kmem.c | |||
@@ -0,0 +1,312 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/debugfs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | #include "debug_kmem.h" | ||
19 | #include "kmem_priv.h" | ||
20 | |||
21 | /** | ||
22 | * to_human_readable_bytes - Determine suffix for passed size. | ||
23 | * | ||
24 | * @bytes - Number of bytes to generate a suffix for. | ||
25 | * @hr_bytes [out] - The human readable number of bytes. | ||
26 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
27 | * | ||
28 | * Computes a human readable decomposition of the passed number of bytes. The | ||
29 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
30 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
31 | * ranges: | ||
32 | * | ||
33 | * 0 - 1023 B | ||
34 | * 1 - 1023 KB | ||
35 | * 1 - 1023 MB | ||
36 | * 1 - 1023 GB | ||
37 | * 1 - 1023 TB | ||
38 | * 1 - ... PB | ||
39 | */ | ||
40 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
41 | const char **hr_suffix) | ||
42 | { | ||
43 | static const char *suffixes[] = | ||
44 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
45 | |||
46 | u64 suffix_ind = 0; | ||
47 | |||
48 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
49 | bytes >>= 10; | ||
50 | suffix_ind++; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Handle case where bytes > 1023PB. | ||
55 | */ | ||
56 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
57 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
58 | |||
59 | *hr_bytes = bytes; | ||
60 | *hr_suffix = suffixes[suffix_ind]; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * print_hr_bytes - Print human readable bytes | ||
65 | * | ||
66 | * @s - A seq_file to print to. May be NULL. | ||
67 | * @msg - A message to print before the bytes. | ||
68 | * @bytes - Number of bytes. | ||
69 | * | ||
70 | * Print @msg followed by the human readable decomposition of the passed number | ||
71 | * of bytes. | ||
72 | * | ||
73 | * If @s is NULL then this prints will be made to the kernel log. | ||
74 | */ | ||
75 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
76 | { | ||
77 | u64 hr_bytes; | ||
78 | const char *hr_suffix; | ||
79 | |||
80 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
81 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * print_histogram - Build a histogram of the memory usage. | ||
86 | * | ||
87 | * @tracker The tracking to pull data from. | ||
88 | * @s A seq_file to dump info into. | ||
89 | */ | ||
90 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, | ||
91 | struct seq_file *s) | ||
92 | { | ||
93 | int i; | ||
94 | u64 pot_min, pot_max; | ||
95 | u64 nr_buckets; | ||
96 | unsigned int *buckets; | ||
97 | unsigned int total_allocs; | ||
98 | struct nvgpu_rbtree_node *node; | ||
99 | static const char histogram_line[] = | ||
100 | "++++++++++++++++++++++++++++++++++++++++"; | ||
101 | |||
102 | /* | ||
103 | * pot_min is essentially a round down to the nearest power of 2. This | ||
104 | * is the start of the histogram. pot_max is just a round up to the | ||
105 | * nearest power of two. Each histogram bucket is one power of two so | ||
106 | * the histogram buckets are exponential. | ||
107 | */ | ||
108 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
109 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
110 | |||
111 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
112 | |||
113 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
114 | if (!buckets) { | ||
115 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
116 | return; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Iterate across all of the allocs and determine what bucket they | ||
121 | * should go in. Round the size down to the nearest power of two to | ||
122 | * find the right bucket. | ||
123 | */ | ||
124 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
125 | while (node) { | ||
126 | int b; | ||
127 | u64 bucket_min; | ||
128 | struct nvgpu_mem_alloc *alloc = | ||
129 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
130 | |||
131 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
132 | if (bucket_min < tracker->min_alloc) | ||
133 | bucket_min = tracker->min_alloc; | ||
134 | |||
135 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
136 | |||
137 | /* | ||
138 | * Handle the one case were there's an alloc exactly as big as | ||
139 | * the maximum bucket size of the largest bucket. Most of the | ||
140 | * buckets have an inclusive minimum and exclusive maximum. But | ||
141 | * the largest bucket needs to have an _inclusive_ maximum as | ||
142 | * well. | ||
143 | */ | ||
144 | if (b == (int)nr_buckets) | ||
145 | b--; | ||
146 | |||
147 | buckets[b]++; | ||
148 | |||
149 | nvgpu_rbtree_enum_next(&node, node); | ||
150 | } | ||
151 | |||
152 | total_allocs = 0; | ||
153 | for (i = 0; i < (int)nr_buckets; i++) | ||
154 | total_allocs += buckets[i]; | ||
155 | |||
156 | __pstat(s, "Alloc histogram:\n"); | ||
157 | |||
158 | /* | ||
159 | * Actually compute the histogram lines. | ||
160 | */ | ||
161 | for (i = 0; i < (int)nr_buckets; i++) { | ||
162 | char this_line[sizeof(histogram_line) + 1]; | ||
163 | u64 line_length; | ||
164 | u64 hr_bytes; | ||
165 | const char *hr_suffix; | ||
166 | |||
167 | memset(this_line, 0, sizeof(this_line)); | ||
168 | |||
169 | /* | ||
170 | * Compute the normalized line length. Cant use floating point | ||
171 | * so we will just multiply everything by 1000 and use fixed | ||
172 | * point. | ||
173 | */ | ||
174 | line_length = (1000 * buckets[i]) / total_allocs; | ||
175 | line_length *= sizeof(histogram_line); | ||
176 | line_length /= 1000; | ||
177 | |||
178 | memset(this_line, '+', line_length); | ||
179 | |||
180 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
181 | &hr_bytes, &hr_suffix); | ||
182 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
183 | hr_bytes, hr_bytes << 1, | ||
184 | hr_suffix, buckets[i], this_line); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /** | ||
189 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
190 | * | ||
191 | * @tracker The tracking to pull data from. | ||
192 | * @s A seq_file to dump info into. | ||
193 | * | ||
194 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
195 | * used with @s. Otherwise the stats are pr_info()ed. | ||
196 | */ | ||
197 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
198 | struct seq_file *s) | ||
199 | { | ||
200 | nvgpu_lock_tracker(tracker); | ||
201 | |||
202 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
203 | |||
204 | __pstat(s, "Basic Stats:\n"); | ||
205 | __pstat(s, " Number of allocs %lld\n", | ||
206 | tracker->nr_allocs); | ||
207 | __pstat(s, " Number of frees %lld\n", | ||
208 | tracker->nr_frees); | ||
209 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
210 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
211 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
212 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
213 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
214 | tracker->bytes_alloced_real); | ||
215 | print_hr_bytes(s, " Bytes freed (real) ", | ||
216 | tracker->bytes_freed_real); | ||
217 | __pstat(s, "\n"); | ||
218 | |||
219 | print_histogram(tracker, s); | ||
220 | |||
221 | nvgpu_unlock_tracker(tracker); | ||
222 | } | ||
223 | |||
224 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
225 | { | ||
226 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
227 | |||
228 | nvgpu_kmem_print_stats(tracker, s); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
234 | { | ||
235 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
236 | } | ||
237 | |||
238 | static const struct file_operations __kmem_tracking_fops = { | ||
239 | .open = __kmem_tracking_open, | ||
240 | .read = seq_read, | ||
241 | .llseek = seq_lseek, | ||
242 | .release = single_release, | ||
243 | }; | ||
244 | |||
245 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
246 | struct nvgpu_mem_alloc_tracker *tracker, | ||
247 | struct seq_file *s) | ||
248 | { | ||
249 | struct nvgpu_rbtree_node *node; | ||
250 | |||
251 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
252 | while (node) { | ||
253 | struct nvgpu_mem_alloc *alloc = | ||
254 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
255 | |||
256 | kmem_print_mem_alloc(g, alloc, s); | ||
257 | |||
258 | nvgpu_rbtree_enum_next(&node, node); | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
265 | { | ||
266 | struct gk20a *g = s->private; | ||
267 | |||
268 | nvgpu_lock_tracker(g->vmallocs); | ||
269 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
270 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
271 | seq_puts(s, "\n"); | ||
272 | nvgpu_unlock_tracker(g->vmallocs); | ||
273 | |||
274 | nvgpu_lock_tracker(g->kmallocs); | ||
275 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
276 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
277 | nvgpu_unlock_tracker(g->kmallocs); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
283 | { | ||
284 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
285 | } | ||
286 | |||
287 | static const struct file_operations __kmem_traces_fops = { | ||
288 | .open = __kmem_traces_open, | ||
289 | .read = seq_read, | ||
290 | .llseek = seq_lseek, | ||
291 | .release = single_release, | ||
292 | }; | ||
293 | |||
294 | void nvgpu_kmem_debugfs_init(struct gk20a *g) | ||
295 | { | ||
296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
297 | struct dentry *node; | ||
298 | |||
299 | l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs); | ||
300 | if (IS_ERR_OR_NULL(l->debugfs_kmem)) | ||
301 | return; | ||
302 | |||
303 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
304 | l->debugfs_kmem, | ||
305 | g->vmallocs, &__kmem_tracking_fops); | ||
306 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
307 | l->debugfs_kmem, | ||
308 | g->kmallocs, &__kmem_tracking_fops); | ||
309 | node = debugfs_create_file("traces", S_IRUGO, | ||
310 | l->debugfs_kmem, | ||
311 | g, &__kmem_traces_fops); | ||
312 | } | ||
diff --git a/include/os/linux/debug_kmem.h b/include/os/linux/debug_kmem.h new file mode 100644 index 0000000..44322b5 --- /dev/null +++ b/include/os/linux/debug_kmem.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_KMEM_H__ | ||
16 | #define __NVGPU_DEBUG_KMEM_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
20 | void nvgpu_kmem_debugfs_init(struct gk20a *g); | ||
21 | #endif | ||
22 | |||
23 | #endif /* __NVGPU_DEBUG_KMEM_H__ */ | ||
diff --git a/include/os/linux/debug_ltc.c b/include/os/linux/debug_ltc.c new file mode 100644 index 0000000..1b4c221 --- /dev/null +++ b/include/os/linux/debug_ltc.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_ltc.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <nvgpu/gk20a.h> | ||
19 | |||
20 | #include <linux/debugfs.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | static ssize_t ltc_intr_illegal_compstat_read(struct file *file, | ||
24 | char __user *user_buf, size_t count, loff_t *ppos) | ||
25 | { | ||
26 | char buf[3]; | ||
27 | struct gk20a *g = file->private_data; | ||
28 | |||
29 | if (g->ltc_intr_en_illegal_compstat) | ||
30 | buf[0] = 'Y'; | ||
31 | else | ||
32 | buf[0] = 'N'; | ||
33 | buf[1] = '\n'; | ||
34 | buf[2] = 0x00; | ||
35 | |||
36 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
37 | } | ||
38 | |||
39 | static ssize_t ltc_intr_illegal_compstat_write(struct file *file, | ||
40 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
41 | { | ||
42 | char buf[3]; | ||
43 | int buf_size; | ||
44 | bool intr_illegal_compstat_enabled; | ||
45 | struct gk20a *g = file->private_data; | ||
46 | int err; | ||
47 | |||
48 | if (!g->ops.ltc.intr_en_illegal_compstat) | ||
49 | return -EINVAL; | ||
50 | |||
51 | buf_size = min(count, (sizeof(buf)-1)); | ||
52 | if (copy_from_user(buf, user_buf, buf_size)) | ||
53 | return -EFAULT; | ||
54 | |||
55 | err = gk20a_busy(g); | ||
56 | if (err) | ||
57 | return err; | ||
58 | |||
59 | if (strtobool(buf, &intr_illegal_compstat_enabled) == 0) { | ||
60 | g->ops.ltc.intr_en_illegal_compstat(g, | ||
61 | intr_illegal_compstat_enabled); | ||
62 | g->ltc_intr_en_illegal_compstat = intr_illegal_compstat_enabled; | ||
63 | } | ||
64 | |||
65 | gk20a_idle(g); | ||
66 | |||
67 | return buf_size; | ||
68 | } | ||
69 | |||
70 | static const struct file_operations ltc_intr_illegal_compstat_fops = { | ||
71 | .open = simple_open, | ||
72 | .read = ltc_intr_illegal_compstat_read, | ||
73 | .write = ltc_intr_illegal_compstat_write, | ||
74 | }; | ||
75 | |||
76 | int nvgpu_ltc_debugfs_init(struct gk20a *g) | ||
77 | { | ||
78 | struct dentry *d; | ||
79 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
80 | struct dentry *gpu_root = l->debugfs; | ||
81 | |||
82 | l->debugfs_ltc = debugfs_create_dir("ltc", gpu_root); | ||
83 | if (IS_ERR_OR_NULL(l->debugfs_ltc)) | ||
84 | return -ENODEV; | ||
85 | |||
86 | /* Debug fs node to enable/disable illegal_compstat */ | ||
87 | d = debugfs_create_file("intr_illegal_compstat_enable", 0600, | ||
88 | l->debugfs_ltc, g, | ||
89 | <c_intr_illegal_compstat_fops); | ||
90 | if (!d) | ||
91 | return -ENOMEM; | ||
92 | |||
93 | return 0; | ||
94 | } | ||
diff --git a/include/os/linux/debug_ltc.h b/include/os/linux/debug_ltc.h new file mode 100644 index 0000000..3ad734c --- /dev/null +++ b/include/os/linux/debug_ltc.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_LTC_H__ | ||
16 | #define __NVGPU_DEBUG_LTC_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int nvgpu_ltc_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_LTC_H__ */ | ||
diff --git a/include/os/linux/debug_pmgr.c b/include/os/linux/debug_pmgr.c new file mode 100644 index 0000000..c264978 --- /dev/null +++ b/include/os/linux/debug_pmgr.c | |||
@@ -0,0 +1,104 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/debugfs.h> | ||
18 | |||
19 | #include "os_linux.h" | ||
20 | |||
21 | #include "pmgr/pmgr.h" | ||
22 | |||
23 | static int pmgr_pwr_devices_get_power_u64(void *data, u64 *p) | ||
24 | { | ||
25 | struct gk20a *g = (struct gk20a *)data; | ||
26 | int err; | ||
27 | u32 val; | ||
28 | |||
29 | err = pmgr_pwr_devices_get_power(g, &val); | ||
30 | *p = val; | ||
31 | |||
32 | return err; | ||
33 | } | ||
34 | |||
35 | static int pmgr_pwr_devices_get_current_u64(void *data, u64 *p) | ||
36 | { | ||
37 | struct gk20a *g = (struct gk20a *)data; | ||
38 | int err; | ||
39 | u32 val; | ||
40 | |||
41 | err = pmgr_pwr_devices_get_current(g, &val); | ||
42 | *p = val; | ||
43 | |||
44 | return err; | ||
45 | } | ||
46 | |||
47 | static int pmgr_pwr_devices_get_voltage_u64(void *data, u64 *p) | ||
48 | { | ||
49 | struct gk20a *g = (struct gk20a *)data; | ||
50 | int err; | ||
51 | u32 val; | ||
52 | |||
53 | err = pmgr_pwr_devices_get_voltage(g, &val); | ||
54 | *p = val; | ||
55 | |||
56 | return err; | ||
57 | } | ||
58 | |||
59 | DEFINE_SIMPLE_ATTRIBUTE( | ||
60 | pmgr_power_ctrl_fops, pmgr_pwr_devices_get_power_u64, NULL, "%llu\n"); | ||
61 | |||
62 | DEFINE_SIMPLE_ATTRIBUTE( | ||
63 | pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current_u64, NULL, "%llu\n"); | ||
64 | |||
65 | DEFINE_SIMPLE_ATTRIBUTE( | ||
66 | pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_voltage_u64, NULL, "%llu\n"); | ||
67 | |||
68 | static void pmgr_debugfs_init(struct gk20a *g) | ||
69 | { | ||
70 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
71 | struct dentry *dbgentry; | ||
72 | |||
73 | dbgentry = debugfs_create_file( | ||
74 | "power", S_IRUGO, l->debugfs, g, &pmgr_power_ctrl_fops); | ||
75 | if (!dbgentry) | ||
76 | nvgpu_err(g, "debugfs entry create failed for power"); | ||
77 | |||
78 | dbgentry = debugfs_create_file( | ||
79 | "current", S_IRUGO, l->debugfs, g, &pmgr_current_ctrl_fops); | ||
80 | if (!dbgentry) | ||
81 | nvgpu_err(g, "debugfs entry create failed for current"); | ||
82 | |||
83 | dbgentry = debugfs_create_file( | ||
84 | "voltage", S_IRUGO, l->debugfs, g, &pmgr_voltage_ctrl_fops); | ||
85 | if (!dbgentry) | ||
86 | nvgpu_err(g, "debugfs entry create failed for voltage"); | ||
87 | } | ||
88 | |||
89 | int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l) | ||
90 | { | ||
91 | struct gk20a *g = &l->g; | ||
92 | int ret = 0; | ||
93 | |||
94 | if (!nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) | ||
95 | return ret; | ||
96 | |||
97 | if (!g->ops.clk.support_pmgr_domain) | ||
98 | return ret; | ||
99 | |||
100 | pmgr_debugfs_init(g); | ||
101 | |||
102 | return ret; | ||
103 | } | ||
104 | |||
diff --git a/include/os/linux/debug_pmgr.h b/include/os/linux/debug_pmgr.h new file mode 100644 index 0000000..bd6c556 --- /dev/null +++ b/include/os/linux/debug_pmgr.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __LINUX_DEBUG_PMGR_H | ||
18 | #define __LINUX_DEBUG_PMGR_H | ||
19 | |||
20 | #ifdef CONFIG_DEBUG_FS | ||
21 | int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l); | ||
22 | #else | ||
23 | int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | #endif | ||
28 | #endif | ||
diff --git a/include/os/linux/debug_pmu.c b/include/os/linux/debug_pmu.c new file mode 100644 index 0000000..f3e36d0 --- /dev/null +++ b/include/os/linux/debug_pmu.c | |||
@@ -0,0 +1,484 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <nvgpu/enabled.h> | ||
16 | #include "debug_pmu.h" | ||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | static int lpwr_debug_show(struct seq_file *s, void *data) | ||
24 | { | ||
25 | struct gk20a *g = s->private; | ||
26 | |||
27 | if (g->ops.pmu.pmu_pg_engines_feature_list && | ||
28 | g->ops.pmu.pmu_pg_engines_feature_list(g, | ||
29 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != | ||
30 | NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { | ||
31 | seq_printf(s, "PSTATE: %u\n" | ||
32 | "RPPG Enabled: %u\n" | ||
33 | "RPPG ref count: %u\n" | ||
34 | "RPPG state: %u\n" | ||
35 | "MSCG Enabled: %u\n" | ||
36 | "MSCG pstate state: %u\n" | ||
37 | "MSCG transition state: %u\n", | ||
38 | g->ops.clk_arb.get_current_pstate(g), | ||
39 | g->elpg_enabled, g->pmu.elpg_refcnt, | ||
40 | g->pmu.elpg_stat, g->mscg_enabled, | ||
41 | g->pmu.mscg_stat, g->pmu.mscg_transition_state); | ||
42 | |||
43 | } else | ||
44 | seq_printf(s, "ELPG Enabled: %u\n" | ||
45 | "ELPG ref count: %u\n" | ||
46 | "ELPG state: %u\n", | ||
47 | g->elpg_enabled, g->pmu.elpg_refcnt, | ||
48 | g->pmu.elpg_stat); | ||
49 | |||
50 | return 0; | ||
51 | |||
52 | } | ||
53 | |||
54 | static int lpwr_debug_open(struct inode *inode, struct file *file) | ||
55 | { | ||
56 | return single_open(file, lpwr_debug_show, inode->i_private); | ||
57 | } | ||
58 | |||
59 | static const struct file_operations lpwr_debug_fops = { | ||
60 | .open = lpwr_debug_open, | ||
61 | .read = seq_read, | ||
62 | .llseek = seq_lseek, | ||
63 | .release = single_release, | ||
64 | }; | ||
65 | |||
66 | static int mscg_stat_show(struct seq_file *s, void *data) | ||
67 | { | ||
68 | struct gk20a *g = s->private; | ||
69 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
70 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
71 | int err; | ||
72 | |||
73 | /* Don't unnecessarily power on the device */ | ||
74 | if (g->power_on) { | ||
75 | err = gk20a_busy(g); | ||
76 | if (err) | ||
77 | return err; | ||
78 | |||
79 | nvgpu_pmu_get_pg_stats(g, | ||
80 | PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); | ||
81 | gk20a_idle(g); | ||
82 | } | ||
83 | total_ingating = g->pg_ingating_time_us + | ||
84 | (u64)pg_stat_data.ingating_time; | ||
85 | total_ungating = g->pg_ungating_time_us + | ||
86 | (u64)pg_stat_data.ungating_time; | ||
87 | |||
88 | divisor = total_ingating + total_ungating; | ||
89 | |||
90 | /* We compute the residency on a scale of 1000 */ | ||
91 | dividend = total_ingating * 1000; | ||
92 | |||
93 | if (divisor) | ||
94 | residency = div64_u64(dividend, divisor); | ||
95 | else | ||
96 | residency = 0; | ||
97 | |||
98 | seq_printf(s, | ||
99 | "Time in MSCG: %llu us\n" | ||
100 | "Time out of MSCG: %llu us\n" | ||
101 | "MSCG residency ratio: %llu\n" | ||
102 | "MSCG Entry Count: %u\n" | ||
103 | "MSCG Avg Entry latency %u\n" | ||
104 | "MSCG Avg Exit latency %u\n", | ||
105 | total_ingating, total_ungating, | ||
106 | residency, pg_stat_data.gating_cnt, | ||
107 | pg_stat_data.avg_entry_latency_us, | ||
108 | pg_stat_data.avg_exit_latency_us); | ||
109 | return 0; | ||
110 | |||
111 | } | ||
112 | |||
113 | static int mscg_stat_open(struct inode *inode, struct file *file) | ||
114 | { | ||
115 | return single_open(file, mscg_stat_show, inode->i_private); | ||
116 | } | ||
117 | |||
118 | static const struct file_operations mscg_stat_fops = { | ||
119 | .open = mscg_stat_open, | ||
120 | .read = seq_read, | ||
121 | .llseek = seq_lseek, | ||
122 | .release = single_release, | ||
123 | }; | ||
124 | |||
125 | static int mscg_transitions_show(struct seq_file *s, void *data) | ||
126 | { | ||
127 | struct gk20a *g = s->private; | ||
128 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
129 | u32 total_gating_cnt; | ||
130 | int err; | ||
131 | |||
132 | if (g->power_on) { | ||
133 | err = gk20a_busy(g); | ||
134 | if (err) | ||
135 | return err; | ||
136 | |||
137 | nvgpu_pmu_get_pg_stats(g, | ||
138 | PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); | ||
139 | gk20a_idle(g); | ||
140 | } | ||
141 | total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; | ||
142 | |||
143 | seq_printf(s, "%u\n", total_gating_cnt); | ||
144 | return 0; | ||
145 | |||
146 | } | ||
147 | |||
148 | static int mscg_transitions_open(struct inode *inode, struct file *file) | ||
149 | { | ||
150 | return single_open(file, mscg_transitions_show, inode->i_private); | ||
151 | } | ||
152 | |||
153 | static const struct file_operations mscg_transitions_fops = { | ||
154 | .open = mscg_transitions_open, | ||
155 | .read = seq_read, | ||
156 | .llseek = seq_lseek, | ||
157 | .release = single_release, | ||
158 | }; | ||
159 | |||
160 | static int elpg_stat_show(struct seq_file *s, void *data) | ||
161 | { | ||
162 | struct gk20a *g = s->private; | ||
163 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
164 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
165 | int err; | ||
166 | |||
167 | /* Don't unnecessarily power on the device */ | ||
168 | if (g->power_on) { | ||
169 | err = gk20a_busy(g); | ||
170 | if (err) | ||
171 | return err; | ||
172 | |||
173 | nvgpu_pmu_get_pg_stats(g, | ||
174 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); | ||
175 | gk20a_idle(g); | ||
176 | } | ||
177 | total_ingating = g->pg_ingating_time_us + | ||
178 | (u64)pg_stat_data.ingating_time; | ||
179 | total_ungating = g->pg_ungating_time_us + | ||
180 | (u64)pg_stat_data.ungating_time; | ||
181 | divisor = total_ingating + total_ungating; | ||
182 | |||
183 | /* We compute the residency on a scale of 1000 */ | ||
184 | dividend = total_ingating * 1000; | ||
185 | |||
186 | if (divisor) | ||
187 | residency = div64_u64(dividend, divisor); | ||
188 | else | ||
189 | residency = 0; | ||
190 | |||
191 | seq_printf(s, | ||
192 | "Time in ELPG: %llu us\n" | ||
193 | "Time out of ELPG: %llu us\n" | ||
194 | "ELPG residency ratio: %llu\n" | ||
195 | "ELPG Entry Count: %u\n" | ||
196 | "ELPG Avg Entry latency %u us\n" | ||
197 | "ELPG Avg Exit latency %u us\n", | ||
198 | total_ingating, total_ungating, | ||
199 | residency, pg_stat_data.gating_cnt, | ||
200 | pg_stat_data.avg_entry_latency_us, | ||
201 | pg_stat_data.avg_exit_latency_us); | ||
202 | return 0; | ||
203 | |||
204 | } | ||
205 | |||
206 | static int elpg_stat_open(struct inode *inode, struct file *file) | ||
207 | { | ||
208 | return single_open(file, elpg_stat_show, inode->i_private); | ||
209 | } | ||
210 | |||
211 | static const struct file_operations elpg_stat_fops = { | ||
212 | .open = elpg_stat_open, | ||
213 | .read = seq_read, | ||
214 | .llseek = seq_lseek, | ||
215 | .release = single_release, | ||
216 | }; | ||
217 | |||
218 | static int elpg_transitions_show(struct seq_file *s, void *data) | ||
219 | { | ||
220 | struct gk20a *g = s->private; | ||
221 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
222 | u32 total_gating_cnt; | ||
223 | int err; | ||
224 | |||
225 | if (g->power_on) { | ||
226 | err = gk20a_busy(g); | ||
227 | if (err) | ||
228 | return err; | ||
229 | |||
230 | nvgpu_pmu_get_pg_stats(g, | ||
231 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); | ||
232 | gk20a_idle(g); | ||
233 | } | ||
234 | total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; | ||
235 | |||
236 | seq_printf(s, "%u\n", total_gating_cnt); | ||
237 | return 0; | ||
238 | |||
239 | } | ||
240 | |||
241 | static int elpg_transitions_open(struct inode *inode, struct file *file) | ||
242 | { | ||
243 | return single_open(file, elpg_transitions_show, inode->i_private); | ||
244 | } | ||
245 | |||
246 | static const struct file_operations elpg_transitions_fops = { | ||
247 | .open = elpg_transitions_open, | ||
248 | .read = seq_read, | ||
249 | .llseek = seq_lseek, | ||
250 | .release = single_release, | ||
251 | }; | ||
252 | |||
253 | static int falc_trace_show(struct seq_file *s, void *data) | ||
254 | { | ||
255 | struct gk20a *g = s->private; | ||
256 | struct nvgpu_pmu *pmu = &g->pmu; | ||
257 | u32 i = 0, j = 0, k, l, m; | ||
258 | char part_str[40]; | ||
259 | void *tracebuffer; | ||
260 | char *trace; | ||
261 | u32 *trace1; | ||
262 | |||
263 | /* allocate system memory to copy pmu trace buffer */ | ||
264 | tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); | ||
265 | if (tracebuffer == NULL) | ||
266 | return -ENOMEM; | ||
267 | |||
268 | /* read pmu traces into system memory buffer */ | ||
269 | nvgpu_mem_rd_n(g, &pmu->trace_buf, | ||
270 | 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); | ||
271 | |||
272 | trace = (char *)tracebuffer; | ||
273 | trace1 = (u32 *)tracebuffer; | ||
274 | |||
275 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { | ||
276 | for (j = 0; j < 0x40; j++) | ||
277 | if (trace1[(i / 4) + j]) | ||
278 | break; | ||
279 | if (j == 0x40) | ||
280 | break; | ||
281 | seq_printf(s, "Index %x: ", trace1[(i / 4)]); | ||
282 | l = 0; | ||
283 | m = 0; | ||
284 | while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { | ||
285 | if (k >= 40) | ||
286 | break; | ||
287 | strncpy(part_str, (trace+i+20+m), k); | ||
288 | part_str[k] = 0; | ||
289 | seq_printf(s, "%s0x%x", part_str, | ||
290 | trace1[(i / 4) + 1 + l]); | ||
291 | l++; | ||
292 | m += k + 2; | ||
293 | } | ||
294 | seq_printf(s, "%s", (trace+i+20+m)); | ||
295 | } | ||
296 | |||
297 | nvgpu_kfree(g, tracebuffer); | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int falc_trace_open(struct inode *inode, struct file *file) | ||
302 | { | ||
303 | return single_open(file, falc_trace_show, inode->i_private); | ||
304 | } | ||
305 | |||
306 | static const struct file_operations falc_trace_fops = { | ||
307 | .open = falc_trace_open, | ||
308 | .read = seq_read, | ||
309 | .llseek = seq_lseek, | ||
310 | .release = single_release, | ||
311 | }; | ||
312 | |||
313 | static int perfmon_events_enable_show(struct seq_file *s, void *data) | ||
314 | { | ||
315 | struct gk20a *g = s->private; | ||
316 | |||
317 | seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); | ||
318 | return 0; | ||
319 | |||
320 | } | ||
321 | |||
322 | static int perfmon_events_enable_open(struct inode *inode, struct file *file) | ||
323 | { | ||
324 | return single_open(file, perfmon_events_enable_show, inode->i_private); | ||
325 | } | ||
326 | |||
327 | static ssize_t perfmon_events_enable_write(struct file *file, | ||
328 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
329 | { | ||
330 | struct seq_file *s = file->private_data; | ||
331 | struct gk20a *g = s->private; | ||
332 | unsigned long val = 0; | ||
333 | char buf[40]; | ||
334 | int buf_size; | ||
335 | int err; | ||
336 | |||
337 | memset(buf, 0, sizeof(buf)); | ||
338 | buf_size = min(count, (sizeof(buf)-1)); | ||
339 | |||
340 | if (copy_from_user(buf, userbuf, buf_size)) | ||
341 | return -EFAULT; | ||
342 | |||
343 | if (kstrtoul(buf, 10, &val) < 0) | ||
344 | return -EINVAL; | ||
345 | |||
346 | /* Don't turn on gk20a unnecessarily */ | ||
347 | if (g->power_on) { | ||
348 | err = gk20a_busy(g); | ||
349 | if (err) | ||
350 | return err; | ||
351 | |||
352 | if (val && !g->pmu.perfmon_sampling_enabled && | ||
353 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | ||
354 | g->pmu.perfmon_sampling_enabled = true; | ||
355 | g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); | ||
356 | } else if (!val && g->pmu.perfmon_sampling_enabled && | ||
357 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | ||
358 | g->pmu.perfmon_sampling_enabled = false; | ||
359 | g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); | ||
360 | } | ||
361 | gk20a_idle(g); | ||
362 | } else { | ||
363 | g->pmu.perfmon_sampling_enabled = val ? true : false; | ||
364 | } | ||
365 | |||
366 | return count; | ||
367 | } | ||
368 | |||
369 | static const struct file_operations perfmon_events_enable_fops = { | ||
370 | .open = perfmon_events_enable_open, | ||
371 | .read = seq_read, | ||
372 | .write = perfmon_events_enable_write, | ||
373 | .llseek = seq_lseek, | ||
374 | .release = single_release, | ||
375 | }; | ||
376 | |||
377 | static int perfmon_events_count_show(struct seq_file *s, void *data) | ||
378 | { | ||
379 | struct gk20a *g = s->private; | ||
380 | |||
381 | seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); | ||
382 | return 0; | ||
383 | |||
384 | } | ||
385 | |||
386 | static int perfmon_events_count_open(struct inode *inode, struct file *file) | ||
387 | { | ||
388 | return single_open(file, perfmon_events_count_show, inode->i_private); | ||
389 | } | ||
390 | |||
391 | static const struct file_operations perfmon_events_count_fops = { | ||
392 | .open = perfmon_events_count_open, | ||
393 | .read = seq_read, | ||
394 | .llseek = seq_lseek, | ||
395 | .release = single_release, | ||
396 | }; | ||
397 | |||
398 | static int security_show(struct seq_file *s, void *data) | ||
399 | { | ||
400 | struct gk20a *g = s->private; | ||
401 | |||
402 | seq_printf(s, "%d\n", g->pmu.pmu_mode); | ||
403 | return 0; | ||
404 | |||
405 | } | ||
406 | |||
407 | static int security_open(struct inode *inode, struct file *file) | ||
408 | { | ||
409 | return single_open(file, security_show, inode->i_private); | ||
410 | } | ||
411 | |||
412 | static const struct file_operations security_fops = { | ||
413 | .open = security_open, | ||
414 | .read = seq_read, | ||
415 | .llseek = seq_lseek, | ||
416 | .release = single_release, | ||
417 | }; | ||
418 | |||
419 | int gk20a_pmu_debugfs_init(struct gk20a *g) | ||
420 | { | ||
421 | struct dentry *d; | ||
422 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
423 | |||
424 | d = debugfs_create_file( | ||
425 | "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
426 | &lpwr_debug_fops); | ||
427 | if (!d) | ||
428 | goto err_out; | ||
429 | |||
430 | d = debugfs_create_file( | ||
431 | "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
432 | &mscg_stat_fops); | ||
433 | if (!d) | ||
434 | goto err_out; | ||
435 | |||
436 | d = debugfs_create_file( | ||
437 | "mscg_transitions", S_IRUGO, l->debugfs, g, | ||
438 | &mscg_transitions_fops); | ||
439 | if (!d) | ||
440 | goto err_out; | ||
441 | |||
442 | d = debugfs_create_file( | ||
443 | "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
444 | &elpg_stat_fops); | ||
445 | if (!d) | ||
446 | goto err_out; | ||
447 | |||
448 | d = debugfs_create_file( | ||
449 | "elpg_transitions", S_IRUGO, l->debugfs, g, | ||
450 | &elpg_transitions_fops); | ||
451 | if (!d) | ||
452 | goto err_out; | ||
453 | |||
454 | d = debugfs_create_file( | ||
455 | "pmu_security", S_IRUGO, l->debugfs, g, | ||
456 | &security_fops); | ||
457 | if (!d) | ||
458 | goto err_out; | ||
459 | |||
460 | /* No access to PMU if virtual */ | ||
461 | if (!g->is_virtual) { | ||
462 | d = debugfs_create_file( | ||
463 | "falc_trace", S_IRUGO, l->debugfs, g, | ||
464 | &falc_trace_fops); | ||
465 | if (!d) | ||
466 | goto err_out; | ||
467 | |||
468 | d = debugfs_create_file( | ||
469 | "perfmon_events_enable", S_IRUGO, l->debugfs, g, | ||
470 | &perfmon_events_enable_fops); | ||
471 | if (!d) | ||
472 | goto err_out; | ||
473 | |||
474 | d = debugfs_create_file( | ||
475 | "perfmon_events_count", S_IRUGO, l->debugfs, g, | ||
476 | &perfmon_events_count_fops); | ||
477 | if (!d) | ||
478 | goto err_out; | ||
479 | } | ||
480 | return 0; | ||
481 | err_out: | ||
482 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
483 | return -ENOMEM; | ||
484 | } | ||
diff --git a/include/os/linux/debug_pmu.h b/include/os/linux/debug_pmu.h new file mode 100644 index 0000000..c4e3243 --- /dev/null +++ b/include/os/linux/debug_pmu.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_PMU_H__ | ||
16 | #define __NVGPU_DEBUG_PMU_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int gk20a_pmu_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_PMU_H__ */ | ||
diff --git a/include/os/linux/debug_sched.c b/include/os/linux/debug_sched.c new file mode 100644 index 0000000..fa43dc4 --- /dev/null +++ b/include/os/linux/debug_sched.c | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_sched.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) | ||
22 | { | ||
23 | struct gk20a *g = s->private; | ||
24 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
25 | bool sched_busy = true; | ||
26 | |||
27 | int n = sched->bitmap_size / sizeof(u64); | ||
28 | int i; | ||
29 | int err; | ||
30 | |||
31 | err = gk20a_busy(g); | ||
32 | if (err) | ||
33 | return err; | ||
34 | |||
35 | if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { | ||
36 | sched_busy = false; | ||
37 | nvgpu_mutex_release(&sched->busy_lock); | ||
38 | } | ||
39 | |||
40 | seq_printf(s, "control_locked=%d\n", sched->control_locked); | ||
41 | seq_printf(s, "busy=%d\n", sched_busy); | ||
42 | seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); | ||
43 | |||
44 | nvgpu_mutex_acquire(&sched->status_lock); | ||
45 | |||
46 | seq_puts(s, "active_tsg_bitmap\n"); | ||
47 | for (i = 0; i < n; i++) | ||
48 | seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); | ||
49 | |||
50 | seq_puts(s, "recent_tsg_bitmap\n"); | ||
51 | for (i = 0; i < n; i++) | ||
52 | seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); | ||
53 | |||
54 | nvgpu_mutex_release(&sched->status_lock); | ||
55 | |||
56 | gk20a_idle(g); | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) | ||
62 | { | ||
63 | return single_open(file, gk20a_sched_debugfs_show, inode->i_private); | ||
64 | } | ||
65 | |||
66 | static const struct file_operations gk20a_sched_debugfs_fops = { | ||
67 | .open = gk20a_sched_debugfs_open, | ||
68 | .read = seq_read, | ||
69 | .llseek = seq_lseek, | ||
70 | .release = single_release, | ||
71 | }; | ||
72 | |||
73 | void gk20a_sched_debugfs_init(struct gk20a *g) | ||
74 | { | ||
75 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
76 | |||
77 | debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs, | ||
78 | g, &gk20a_sched_debugfs_fops); | ||
79 | } | ||
diff --git a/include/os/linux/debug_sched.h b/include/os/linux/debug_sched.h new file mode 100644 index 0000000..34a8f55 --- /dev/null +++ b/include/os/linux/debug_sched.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_SCHED_H__ | ||
16 | #define __NVGPU_DEBUG_SCHED_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_sched_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_SCHED_H__ */ | ||
diff --git a/include/os/linux/debug_therm_gp106.c b/include/os/linux/debug_therm_gp106.c new file mode 100644 index 0000000..dfe3946 --- /dev/null +++ b/include/os/linux/debug_therm_gp106.c | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/debugfs.h> | ||
18 | |||
19 | #include "os_linux.h" | ||
20 | |||
21 | static int therm_get_internal_sensor_curr_temp(void *data, u64 *val) | ||
22 | { | ||
23 | struct gk20a *g = (struct gk20a *)data; | ||
24 | u32 readval; | ||
25 | int err; | ||
26 | |||
27 | if (!g->ops.therm.get_internal_sensor_curr_temp) | ||
28 | return -EINVAL; | ||
29 | |||
30 | err = g->ops.therm.get_internal_sensor_curr_temp(g, &readval); | ||
31 | if (!err) | ||
32 | *val = readval; | ||
33 | |||
34 | return err; | ||
35 | } | ||
36 | DEFINE_SIMPLE_ATTRIBUTE(therm_ctrl_fops, therm_get_internal_sensor_curr_temp, NULL, "%llu\n"); | ||
37 | |||
38 | int gp106_therm_init_debugfs(struct gk20a *g) | ||
39 | { | ||
40 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
41 | struct dentry *dbgentry; | ||
42 | |||
43 | dbgentry = debugfs_create_file( | ||
44 | "temp", S_IRUGO, l->debugfs, g, &therm_ctrl_fops); | ||
45 | if (!dbgentry) | ||
46 | nvgpu_err(g, "debugfs entry create failed for therm_curr_temp"); | ||
47 | |||
48 | return 0; | ||
49 | } | ||
diff --git a/include/os/linux/debug_therm_gp106.h b/include/os/linux/debug_therm_gp106.h new file mode 100644 index 0000000..3e9380d --- /dev/null +++ b/include/os/linux/debug_therm_gp106.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __DEBUG_THERM_GP106_H | ||
18 | #define __DEBUG_THERM_GP106_H | ||
19 | |||
20 | #ifdef CONFIG_DEBUG_FS | ||
21 | int gp106_therm_init_debugfs(struct gk20a *g); | ||
22 | #else | ||
23 | static inline int gp106_therm_init_debugfs(struct gk20a *g) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | #endif | ||
28 | |||
29 | #endif | ||
diff --git a/include/os/linux/debug_xve.c b/include/os/linux/debug_xve.c new file mode 100644 index 0000000..128d316 --- /dev/null +++ b/include/os/linux/debug_xve.c | |||
@@ -0,0 +1,177 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <nvgpu/types.h> | ||
16 | #include <nvgpu/xve.h> | ||
17 | #include <nvgpu/timers.h> | ||
18 | |||
19 | #include "debug_xve.h" | ||
20 | #include "os_linux.h" | ||
21 | |||
22 | #include <linux/debugfs.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | |||
25 | static ssize_t xve_link_speed_write(struct file *filp, | ||
26 | const char __user *buff, | ||
27 | size_t len, loff_t *off) | ||
28 | { | ||
29 | struct gk20a *g = ((struct seq_file *)filp->private_data)->private; | ||
30 | char kbuff[16]; | ||
31 | u32 buff_size, check_len; | ||
32 | u32 link_speed = 0; | ||
33 | int ret; | ||
34 | |||
35 | buff_size = min_t(size_t, 16, len); | ||
36 | |||
37 | memset(kbuff, 0, 16); | ||
38 | if (copy_from_user(kbuff, buff, buff_size)) | ||
39 | return -EFAULT; | ||
40 | |||
41 | check_len = strlen("Gen1"); | ||
42 | if (strncmp(kbuff, "Gen1", check_len) == 0) | ||
43 | link_speed = GPU_XVE_SPEED_2P5; | ||
44 | else if (strncmp(kbuff, "Gen2", check_len) == 0) | ||
45 | link_speed = GPU_XVE_SPEED_5P0; | ||
46 | else if (strncmp(kbuff, "Gen3", check_len) == 0) | ||
47 | link_speed = GPU_XVE_SPEED_8P0; | ||
48 | else | ||
49 | nvgpu_err(g, "%s: Unknown PCIe speed: %s", | ||
50 | __func__, kbuff); | ||
51 | |||
52 | if (!link_speed) | ||
53 | return -EINVAL; | ||
54 | |||
55 | /* Brief pause... To help rate limit this. */ | ||
56 | nvgpu_msleep(250); | ||
57 | |||
58 | /* | ||
59 | * And actually set the speed. Yay. | ||
60 | */ | ||
61 | ret = g->ops.xve.set_speed(g, link_speed); | ||
62 | if (ret) | ||
63 | return ret; | ||
64 | |||
65 | return len; | ||
66 | } | ||
67 | |||
68 | static int xve_link_speed_show(struct seq_file *s, void *unused) | ||
69 | { | ||
70 | struct gk20a *g = s->private; | ||
71 | u32 speed; | ||
72 | int err; | ||
73 | |||
74 | err = g->ops.xve.get_speed(g, &speed); | ||
75 | if (err) | ||
76 | return err; | ||
77 | |||
78 | seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed)); | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static int xve_link_speed_open(struct inode *inode, struct file *file) | ||
84 | { | ||
85 | return single_open(file, xve_link_speed_show, inode->i_private); | ||
86 | } | ||
87 | |||
88 | static const struct file_operations xve_link_speed_fops = { | ||
89 | .open = xve_link_speed_open, | ||
90 | .read = seq_read, | ||
91 | .write = xve_link_speed_write, | ||
92 | .llseek = seq_lseek, | ||
93 | .release = single_release, | ||
94 | }; | ||
95 | |||
96 | static int xve_available_speeds_show(struct seq_file *s, void *unused) | ||
97 | { | ||
98 | struct gk20a *g = s->private; | ||
99 | u32 available_speeds; | ||
100 | |||
101 | g->ops.xve.available_speeds(g, &available_speeds); | ||
102 | |||
103 | seq_puts(s, "Available PCIe bus speeds:\n"); | ||
104 | if (available_speeds & GPU_XVE_SPEED_2P5) | ||
105 | seq_puts(s, " Gen1\n"); | ||
106 | if (available_speeds & GPU_XVE_SPEED_5P0) | ||
107 | seq_puts(s, " Gen2\n"); | ||
108 | if (available_speeds & GPU_XVE_SPEED_8P0) | ||
109 | seq_puts(s, " Gen3\n"); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static int xve_available_speeds_open(struct inode *inode, struct file *file) | ||
115 | { | ||
116 | return single_open(file, xve_available_speeds_show, inode->i_private); | ||
117 | } | ||
118 | |||
119 | static const struct file_operations xve_available_speeds_fops = { | ||
120 | .open = xve_available_speeds_open, | ||
121 | .read = seq_read, | ||
122 | .llseek = seq_lseek, | ||
123 | .release = single_release, | ||
124 | }; | ||
125 | |||
126 | static int xve_link_control_status_show(struct seq_file *s, void *unused) | ||
127 | { | ||
128 | struct gk20a *g = s->private; | ||
129 | u32 link_status; | ||
130 | |||
131 | link_status = g->ops.xve.get_link_control_status(g); | ||
132 | seq_printf(s, "0x%08x\n", link_status); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static int xve_link_control_status_open(struct inode *inode, struct file *file) | ||
138 | { | ||
139 | return single_open(file, xve_link_control_status_show, inode->i_private); | ||
140 | } | ||
141 | |||
142 | static const struct file_operations xve_link_control_status_fops = { | ||
143 | .open = xve_link_control_status_open, | ||
144 | .read = seq_read, | ||
145 | .llseek = seq_lseek, | ||
146 | .release = single_release, | ||
147 | }; | ||
148 | |||
149 | int nvgpu_xve_debugfs_init(struct gk20a *g) | ||
150 | { | ||
151 | int err = -ENODEV; | ||
152 | |||
153 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
154 | struct dentry *gpu_root = l->debugfs; | ||
155 | |||
156 | l->debugfs_xve = debugfs_create_dir("xve", gpu_root); | ||
157 | if (IS_ERR_OR_NULL(l->debugfs_xve)) | ||
158 | goto fail; | ||
159 | |||
160 | /* | ||
161 | * These are just debug nodes. If they fail to get made it's not worth | ||
162 | * worrying the higher level SW. | ||
163 | */ | ||
164 | debugfs_create_file("link_speed", S_IRUGO, | ||
165 | l->debugfs_xve, g, | ||
166 | &xve_link_speed_fops); | ||
167 | debugfs_create_file("available_speeds", S_IRUGO, | ||
168 | l->debugfs_xve, g, | ||
169 | &xve_available_speeds_fops); | ||
170 | debugfs_create_file("link_control_status", S_IRUGO, | ||
171 | l->debugfs_xve, g, | ||
172 | &xve_link_control_status_fops); | ||
173 | |||
174 | err = 0; | ||
175 | fail: | ||
176 | return err; | ||
177 | } | ||
diff --git a/include/os/linux/debug_xve.h b/include/os/linux/debug_xve.h new file mode 100644 index 0000000..f3b1ac5 --- /dev/null +++ b/include/os/linux/debug_xve.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_XVE_H__ | ||
16 | #define __NVGPU_DEBUG_XVE_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int nvgpu_xve_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_SVE_H__ */ | ||
diff --git a/include/os/linux/dmabuf.c b/include/os/linux/dmabuf.c new file mode 100644 index 0000000..e8e3313 --- /dev/null +++ b/include/os/linux/dmabuf.c | |||
@@ -0,0 +1,219 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/device.h> | ||
18 | #include <linux/dma-buf.h> | ||
19 | #include <linux/scatterlist.h> | ||
20 | |||
21 | #include <nvgpu/comptags.h> | ||
22 | #include <nvgpu/enabled.h> | ||
23 | #include <nvgpu/gk20a.h> | ||
24 | |||
25 | #include <nvgpu/linux/vm.h> | ||
26 | |||
27 | #include "gk20a/fence_gk20a.h" | ||
28 | |||
29 | #include "platform_gk20a.h" | ||
30 | #include "dmabuf.h" | ||
31 | #include "os_linux.h" | ||
32 | #include "dmabuf_vidmem.h" | ||
33 | |||
34 | static void gk20a_mm_delete_priv(void *_priv) | ||
35 | { | ||
36 | struct gk20a_buffer_state *s, *s_tmp; | ||
37 | struct gk20a_dmabuf_priv *priv = _priv; | ||
38 | struct gk20a *g; | ||
39 | |||
40 | if (!priv) | ||
41 | return; | ||
42 | |||
43 | g = priv->g; | ||
44 | |||
45 | if (priv->comptags.allocated && priv->comptags.lines) { | ||
46 | BUG_ON(!priv->comptag_allocator); | ||
47 | gk20a_comptaglines_free(priv->comptag_allocator, | ||
48 | priv->comptags.offset, | ||
49 | priv->comptags.lines); | ||
50 | } | ||
51 | |||
52 | /* Free buffer states */ | ||
53 | nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states, | ||
54 | gk20a_buffer_state, list) { | ||
55 | gk20a_fence_put(s->fence); | ||
56 | nvgpu_list_del(&s->list); | ||
57 | nvgpu_kfree(g, s); | ||
58 | } | ||
59 | |||
60 | nvgpu_kfree(g, priv); | ||
61 | } | ||
62 | |||
63 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | ||
64 | struct dma_buf *dmabuf) | ||
65 | { | ||
66 | struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf); | ||
67 | bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY); | ||
68 | |||
69 | if (buf_owner == NULL) { | ||
70 | /* Not nvgpu-allocated, assume system memory */ | ||
71 | return APERTURE_SYSMEM; | ||
72 | } else if (WARN_ON(buf_owner == g && unified_memory)) { | ||
73 | /* Looks like our video memory, but this gpu doesn't support | ||
74 | * it. Warn about a bug and bail out */ | ||
75 | nvgpu_warn(g, | ||
76 | "dmabuf is our vidmem but we don't have local vidmem"); | ||
77 | return APERTURE_INVALID; | ||
78 | } else if (buf_owner != g) { | ||
79 | /* Someone else's vidmem */ | ||
80 | return APERTURE_INVALID; | ||
81 | } else { | ||
82 | /* Yay, buf_owner == g */ | ||
83 | return APERTURE_VIDMEM; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, | ||
88 | struct dma_buf_attachment **attachment) | ||
89 | { | ||
90 | struct gk20a_dmabuf_priv *priv; | ||
91 | |||
92 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
93 | if (WARN_ON(!priv)) | ||
94 | return ERR_PTR(-EINVAL); | ||
95 | |||
96 | nvgpu_mutex_acquire(&priv->lock); | ||
97 | |||
98 | if (priv->pin_count == 0) { | ||
99 | priv->attach = dma_buf_attach(dmabuf, dev); | ||
100 | if (IS_ERR(priv->attach)) { | ||
101 | nvgpu_mutex_release(&priv->lock); | ||
102 | return (struct sg_table *)priv->attach; | ||
103 | } | ||
104 | |||
105 | priv->sgt = dma_buf_map_attachment(priv->attach, | ||
106 | DMA_BIDIRECTIONAL); | ||
107 | if (IS_ERR(priv->sgt)) { | ||
108 | dma_buf_detach(dmabuf, priv->attach); | ||
109 | nvgpu_mutex_release(&priv->lock); | ||
110 | return priv->sgt; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | priv->pin_count++; | ||
115 | nvgpu_mutex_release(&priv->lock); | ||
116 | *attachment = priv->attach; | ||
117 | return priv->sgt; | ||
118 | } | ||
119 | |||
120 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
121 | struct dma_buf_attachment *attachment, | ||
122 | struct sg_table *sgt) | ||
123 | { | ||
124 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
125 | dma_addr_t dma_addr; | ||
126 | |||
127 | if (IS_ERR(priv) || !priv) | ||
128 | return; | ||
129 | |||
130 | nvgpu_mutex_acquire(&priv->lock); | ||
131 | WARN_ON(priv->sgt != sgt); | ||
132 | WARN_ON(priv->attach != attachment); | ||
133 | priv->pin_count--; | ||
134 | WARN_ON(priv->pin_count < 0); | ||
135 | dma_addr = sg_dma_address(priv->sgt->sgl); | ||
136 | if (priv->pin_count == 0) { | ||
137 | dma_buf_unmap_attachment(priv->attach, priv->sgt, | ||
138 | DMA_BIDIRECTIONAL); | ||
139 | dma_buf_detach(dmabuf, priv->attach); | ||
140 | } | ||
141 | nvgpu_mutex_release(&priv->lock); | ||
142 | } | ||
143 | |||
144 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) | ||
145 | { | ||
146 | struct gk20a *g = gk20a_get_platform(dev)->g; | ||
147 | struct gk20a_dmabuf_priv *priv; | ||
148 | |||
149 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
150 | if (likely(priv)) | ||
151 | return 0; | ||
152 | |||
153 | nvgpu_mutex_acquire(&g->mm.priv_lock); | ||
154 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
155 | if (priv) | ||
156 | goto priv_exist_or_err; | ||
157 | |||
158 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
159 | if (!priv) { | ||
160 | priv = ERR_PTR(-ENOMEM); | ||
161 | goto priv_exist_or_err; | ||
162 | } | ||
163 | |||
164 | nvgpu_mutex_init(&priv->lock); | ||
165 | nvgpu_init_list_node(&priv->states); | ||
166 | priv->g = g; | ||
167 | dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); | ||
168 | |||
169 | priv_exist_or_err: | ||
170 | nvgpu_mutex_release(&g->mm.priv_lock); | ||
171 | if (IS_ERR(priv)) | ||
172 | return -ENOMEM; | ||
173 | |||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | ||
178 | u64 offset, struct gk20a_buffer_state **state) | ||
179 | { | ||
180 | int err = 0; | ||
181 | struct gk20a_dmabuf_priv *priv; | ||
182 | struct gk20a_buffer_state *s; | ||
183 | struct device *dev = dev_from_gk20a(g); | ||
184 | |||
185 | if (WARN_ON(offset >= (u64)dmabuf->size)) | ||
186 | return -EINVAL; | ||
187 | |||
188 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); | ||
189 | if (err) | ||
190 | return err; | ||
191 | |||
192 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
193 | if (WARN_ON(!priv)) | ||
194 | return -ENOSYS; | ||
195 | |||
196 | nvgpu_mutex_acquire(&priv->lock); | ||
197 | |||
198 | nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list) | ||
199 | if (s->offset == offset) | ||
200 | goto out; | ||
201 | |||
202 | /* State not found, create state. */ | ||
203 | s = nvgpu_kzalloc(g, sizeof(*s)); | ||
204 | if (!s) { | ||
205 | err = -ENOMEM; | ||
206 | goto out; | ||
207 | } | ||
208 | |||
209 | s->offset = offset; | ||
210 | nvgpu_init_list_node(&s->list); | ||
211 | nvgpu_mutex_init(&s->lock); | ||
212 | nvgpu_list_add_tail(&s->list, &priv->states); | ||
213 | |||
214 | out: | ||
215 | nvgpu_mutex_release(&priv->lock); | ||
216 | if (!err) | ||
217 | *state = s; | ||
218 | return err; | ||
219 | } | ||
diff --git a/include/os/linux/dmabuf.h b/include/os/linux/dmabuf.h new file mode 100644 index 0000000..8399eaa --- /dev/null +++ b/include/os/linux/dmabuf.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __COMMON_LINUX_DMABUF_H__ | ||
18 | #define __COMMON_LINUX_DMABUF_H__ | ||
19 | |||
20 | #include <nvgpu/comptags.h> | ||
21 | #include <nvgpu/list.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/gmmu.h> | ||
24 | |||
25 | struct sg_table; | ||
26 | struct dma_buf; | ||
27 | struct dma_buf_attachment; | ||
28 | struct device; | ||
29 | |||
30 | struct gk20a; | ||
31 | struct gk20a_buffer_state; | ||
32 | |||
33 | struct gk20a_dmabuf_priv { | ||
34 | struct nvgpu_mutex lock; | ||
35 | |||
36 | struct gk20a *g; | ||
37 | |||
38 | struct gk20a_comptag_allocator *comptag_allocator; | ||
39 | struct gk20a_comptags comptags; | ||
40 | |||
41 | struct dma_buf_attachment *attach; | ||
42 | struct sg_table *sgt; | ||
43 | |||
44 | int pin_count; | ||
45 | |||
46 | struct nvgpu_list_node states; | ||
47 | |||
48 | u64 buffer_id; | ||
49 | }; | ||
50 | |||
51 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, | ||
52 | struct dma_buf_attachment **attachment); | ||
53 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
54 | struct dma_buf_attachment *attachment, | ||
55 | struct sg_table *sgt); | ||
56 | |||
57 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | ||
58 | |||
59 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | ||
60 | u64 offset, struct gk20a_buffer_state **state); | ||
61 | |||
62 | #endif | ||
diff --git a/include/os/linux/dmabuf_vidmem.c b/include/os/linux/dmabuf_vidmem.c new file mode 100644 index 0000000..bada5dc --- /dev/null +++ b/include/os/linux/dmabuf_vidmem.c | |||
@@ -0,0 +1,269 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | #include <linux/version.h> | ||
19 | #include <uapi/linux/nvgpu.h> | ||
20 | |||
21 | #ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD | ||
22 | #include <linux/platform/tegra/tegra_fd.h> | ||
23 | #endif | ||
24 | |||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/enabled.h> | ||
27 | #include <nvgpu/vidmem.h> | ||
28 | #include <nvgpu/nvgpu_mem.h> | ||
29 | #include <nvgpu/page_allocator.h> | ||
30 | #include <nvgpu/gk20a.h> | ||
31 | |||
32 | #include <nvgpu/linux/vm.h> | ||
33 | #include <nvgpu/linux/dma.h> | ||
34 | |||
35 | #include "gk20a/mm_gk20a.h" | ||
36 | #include "dmabuf_vidmem.h" | ||
37 | |||
38 | bool nvgpu_addr_is_vidmem_page_alloc(u64 addr) | ||
39 | { | ||
40 | return !!(addr & 1ULL); | ||
41 | } | ||
42 | |||
43 | void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr) | ||
44 | { | ||
45 | /* set bit 0 to indicate vidmem allocation */ | ||
46 | sg_dma_address(sgl) = (addr | 1ULL); | ||
47 | } | ||
48 | |||
49 | struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl) | ||
50 | { | ||
51 | u64 addr; | ||
52 | |||
53 | addr = sg_dma_address(sgl); | ||
54 | |||
55 | if (nvgpu_addr_is_vidmem_page_alloc(addr)) | ||
56 | addr = addr & ~1ULL; | ||
57 | else | ||
58 | WARN_ON(1); | ||
59 | |||
60 | return (struct nvgpu_page_alloc *)(uintptr_t)addr; | ||
61 | } | ||
62 | |||
63 | static struct sg_table *gk20a_vidbuf_map_dma_buf( | ||
64 | struct dma_buf_attachment *attach, enum dma_data_direction dir) | ||
65 | { | ||
66 | struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv; | ||
67 | |||
68 | return buf->mem->priv.sgt; | ||
69 | } | ||
70 | |||
71 | static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, | ||
72 | struct sg_table *sgt, | ||
73 | enum dma_data_direction dir) | ||
74 | { | ||
75 | } | ||
76 | |||
77 | static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | ||
78 | { | ||
79 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
80 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
81 | struct gk20a *g = buf->g; | ||
82 | |||
83 | vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", | ||
84 | dmabuf, buf->mem->size >> 10); | ||
85 | |||
86 | if (linux_buf && linux_buf->dmabuf_priv_delete) | ||
87 | linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv); | ||
88 | |||
89 | nvgpu_kfree(g, linux_buf); | ||
90 | nvgpu_vidmem_buf_free(g, buf); | ||
91 | |||
92 | gk20a_put(g); | ||
93 | } | ||
94 | |||
95 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | ||
96 | { | ||
97 | WARN_ON("Not supported"); | ||
98 | return NULL; | ||
99 | } | ||
100 | |||
101 | static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, | ||
102 | unsigned long page_num) | ||
103 | { | ||
104 | WARN_ON("Not supported"); | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) | ||
109 | { | ||
110 | return -EINVAL; | ||
111 | } | ||
112 | |||
113 | static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, | ||
114 | struct device *dev, void *priv, void (*delete)(void *priv)) | ||
115 | { | ||
116 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
117 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
118 | |||
119 | linux_buf->dmabuf_priv = priv; | ||
120 | linux_buf->dmabuf_priv_delete = delete; | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, | ||
126 | struct device *dev) | ||
127 | { | ||
128 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
129 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
130 | |||
131 | return linux_buf->dmabuf_priv; | ||
132 | } | ||
133 | |||
134 | static const struct dma_buf_ops gk20a_vidbuf_ops = { | ||
135 | .map_dma_buf = gk20a_vidbuf_map_dma_buf, | ||
136 | .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, | ||
137 | .release = gk20a_vidbuf_release, | ||
138 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) | ||
139 | .map_atomic = gk20a_vidbuf_kmap_atomic, | ||
140 | .map = gk20a_vidbuf_kmap, | ||
141 | #else | ||
142 | .kmap_atomic = gk20a_vidbuf_kmap_atomic, | ||
143 | .kmap = gk20a_vidbuf_kmap, | ||
144 | #endif | ||
145 | .mmap = gk20a_vidbuf_mmap, | ||
146 | .set_drvdata = gk20a_vidbuf_set_private, | ||
147 | .get_drvdata = gk20a_vidbuf_get_private, | ||
148 | }; | ||
149 | |||
150 | static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf) | ||
151 | { | ||
152 | DEFINE_DMA_BUF_EXPORT_INFO(exp_info); | ||
153 | |||
154 | exp_info.priv = buf; | ||
155 | exp_info.ops = &gk20a_vidbuf_ops; | ||
156 | exp_info.size = buf->mem->size; | ||
157 | exp_info.flags = O_RDWR; | ||
158 | |||
159 | return dma_buf_export(&exp_info); | ||
160 | } | ||
161 | |||
162 | struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | ||
163 | { | ||
164 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
165 | |||
166 | if (dmabuf->ops != &gk20a_vidbuf_ops) | ||
167 | return NULL; | ||
168 | |||
169 | return buf->g; | ||
170 | } | ||
171 | |||
172 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | ||
173 | { | ||
174 | struct nvgpu_vidmem_buf *buf = NULL; | ||
175 | struct nvgpu_vidmem_linux *priv; | ||
176 | int err, fd; | ||
177 | |||
178 | /* | ||
179 | * This ref is released when the dma_buf is closed. | ||
180 | */ | ||
181 | if (!gk20a_get(g)) | ||
182 | return -ENODEV; | ||
183 | |||
184 | vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes); | ||
185 | |||
186 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
187 | if (!priv) { | ||
188 | err = -ENOMEM; | ||
189 | goto fail; | ||
190 | } | ||
191 | |||
192 | buf = nvgpu_vidmem_user_alloc(g, bytes); | ||
193 | if (IS_ERR(buf)) { | ||
194 | err = PTR_ERR(buf); | ||
195 | goto fail; | ||
196 | } | ||
197 | |||
198 | priv->dmabuf = gk20a_vidbuf_export(buf); | ||
199 | if (IS_ERR(priv->dmabuf)) { | ||
200 | err = PTR_ERR(priv->dmabuf); | ||
201 | goto fail; | ||
202 | } | ||
203 | |||
204 | buf->priv = priv; | ||
205 | |||
206 | #ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD | ||
207 | fd = tegra_alloc_fd(current->files, 1024, O_RDWR); | ||
208 | #else | ||
209 | fd = get_unused_fd_flags(O_RDWR); | ||
210 | #endif | ||
211 | if (fd < 0) { | ||
212 | /* ->release frees what we have done */ | ||
213 | dma_buf_put(priv->dmabuf); | ||
214 | return fd; | ||
215 | } | ||
216 | |||
217 | /* fclose() on this drops one ref, freeing the dma buf */ | ||
218 | fd_install(fd, priv->dmabuf->file); | ||
219 | |||
220 | vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", | ||
221 | priv->dmabuf, buf->mem->size >> 10); | ||
222 | |||
223 | return fd; | ||
224 | |||
225 | fail: | ||
226 | nvgpu_vidmem_buf_free(g, buf); | ||
227 | nvgpu_kfree(g, priv); | ||
228 | gk20a_put(g); | ||
229 | |||
230 | vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err); | ||
231 | return err; | ||
232 | } | ||
233 | |||
234 | int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | ||
235 | void *buffer, u64 offset, u64 size, u32 cmd) | ||
236 | { | ||
237 | struct nvgpu_vidmem_buf *vidmem_buf; | ||
238 | struct nvgpu_mem *mem; | ||
239 | int err = 0; | ||
240 | |||
241 | if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM) | ||
242 | return -EINVAL; | ||
243 | |||
244 | vidmem_buf = dmabuf->priv; | ||
245 | mem = vidmem_buf->mem; | ||
246 | |||
247 | nvgpu_speculation_barrier(); | ||
248 | switch (cmd) { | ||
249 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: | ||
250 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); | ||
251 | break; | ||
252 | |||
253 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE: | ||
254 | nvgpu_mem_wr_n(g, mem, offset, buffer, size); | ||
255 | break; | ||
256 | |||
257 | default: | ||
258 | err = -EINVAL; | ||
259 | } | ||
260 | |||
261 | return err; | ||
262 | } | ||
263 | |||
264 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) | ||
265 | { | ||
266 | nvgpu_free(vidmem->allocator, | ||
267 | (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); | ||
268 | nvgpu_free_sgtable(g, &vidmem->priv.sgt); | ||
269 | } | ||
diff --git a/include/os/linux/dmabuf_vidmem.h b/include/os/linux/dmabuf_vidmem.h new file mode 100644 index 0000000..977fd78 --- /dev/null +++ b/include/os/linux/dmabuf_vidmem.h | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_LINUX_DMABUF_VIDMEM_H__ | ||
18 | #define __NVGPU_LINUX_DMABUF_VIDMEM_H__ | ||
19 | |||
20 | #include <nvgpu/types.h> | ||
21 | |||
22 | struct dma_buf; | ||
23 | |||
24 | struct gk20a; | ||
25 | struct scatterlist; | ||
26 | |||
27 | #ifdef CONFIG_GK20A_VIDMEM | ||
28 | |||
29 | struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf); | ||
30 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes); | ||
31 | |||
32 | void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr); | ||
33 | struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl); | ||
34 | |||
35 | int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | ||
36 | void *buffer, u64 offset, u64 size, u32 cmd); | ||
37 | |||
38 | #else /* !CONFIG_GK20A_VIDMEM */ | ||
39 | |||
40 | static inline struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | ||
41 | { | ||
42 | return NULL; | ||
43 | } | ||
44 | |||
45 | static inline int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | ||
46 | { | ||
47 | return -ENOSYS; | ||
48 | } | ||
49 | |||
50 | static inline void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, | ||
51 | u64 addr) | ||
52 | { | ||
53 | } | ||
54 | |||
55 | static inline struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc( | ||
56 | struct scatterlist *sgl) | ||
57 | { | ||
58 | return NULL; | ||
59 | } | ||
60 | |||
61 | static inline int nvgpu_vidmem_buf_access_memory(struct gk20a *g, | ||
62 | struct dma_buf *dmabuf, | ||
63 | void *buffer, u64 offset, | ||
64 | u64 size, u32 cmd) | ||
65 | { | ||
66 | return -ENOSYS; | ||
67 | } | ||
68 | |||
69 | #endif | ||
70 | |||
71 | |||
72 | struct nvgpu_vidmem_linux { | ||
73 | struct dma_buf *dmabuf; | ||
74 | void *dmabuf_priv; | ||
75 | void (*dmabuf_priv_delete)(void *); | ||
76 | }; | ||
77 | |||
78 | #endif | ||
diff --git a/include/os/linux/driver_common.c b/include/os/linux/driver_common.c new file mode 100644 index 0000000..c76dabe --- /dev/null +++ b/include/os/linux/driver_common.c | |||
@@ -0,0 +1,351 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/reboot.h> | ||
18 | #include <linux/dma-mapping.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <uapi/linux/nvgpu.h> | ||
22 | |||
23 | #include <nvgpu/defaults.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/nvgpu_common.h> | ||
26 | #include <nvgpu/soc.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | #include <nvgpu/debug.h> | ||
30 | #include <nvgpu/sizes.h> | ||
31 | #include <nvgpu/gk20a.h> | ||
32 | |||
33 | #include "platform_gk20a.h" | ||
34 | #include "module.h" | ||
35 | #include "os_linux.h" | ||
36 | #include "sysfs.h" | ||
37 | #include "ioctl.h" | ||
38 | #include "gk20a/regops_gk20a.h" | ||
39 | |||
40 | #define EMC3D_DEFAULT_RATIO 750 | ||
41 | |||
42 | void nvgpu_kernel_restart(void *cmd) | ||
43 | { | ||
44 | kernel_restart(cmd); | ||
45 | } | ||
46 | |||
47 | static void nvgpu_init_vars(struct gk20a *g) | ||
48 | { | ||
49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
50 | struct device *dev = dev_from_gk20a(g); | ||
51 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
52 | |||
53 | nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq); | ||
54 | nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq); | ||
55 | |||
56 | init_rwsem(&l->busy_lock); | ||
57 | nvgpu_rwsem_init(&g->deterministic_busy); | ||
58 | |||
59 | nvgpu_spinlock_init(&g->mc_enable_lock); | ||
60 | |||
61 | nvgpu_mutex_init(&platform->railgate_lock); | ||
62 | nvgpu_mutex_init(&g->dbg_sessions_lock); | ||
63 | nvgpu_mutex_init(&g->client_lock); | ||
64 | nvgpu_mutex_init(&g->power_lock); | ||
65 | nvgpu_mutex_init(&g->ctxsw_disable_lock); | ||
66 | nvgpu_mutex_init(&g->tpc_pg_lock); | ||
67 | nvgpu_mutex_init(&g->clk_arb_enable_lock); | ||
68 | nvgpu_mutex_init(&g->cg_pg_lock); | ||
69 | |||
70 | /* Init the clock req count to 0 */ | ||
71 | nvgpu_atomic_set(&g->clk_arb_global_nr, 0); | ||
72 | |||
73 | nvgpu_mutex_init(&l->ctrl.privs_lock); | ||
74 | nvgpu_init_list_node(&l->ctrl.privs); | ||
75 | |||
76 | l->regs_saved = l->regs; | ||
77 | l->bar1_saved = l->bar1; | ||
78 | |||
79 | g->emc3d_ratio = EMC3D_DEFAULT_RATIO; | ||
80 | |||
81 | /* Set DMA parameters to allow larger sgt lists */ | ||
82 | dev->dma_parms = &l->dma_parms; | ||
83 | dma_set_max_seg_size(dev, UINT_MAX); | ||
84 | |||
85 | /* | ||
86 | * A default of 16GB is the largest supported DMA size that is | ||
87 | * acceptable to all currently supported Tegra SoCs. | ||
88 | */ | ||
89 | if (!platform->dma_mask) | ||
90 | platform->dma_mask = DMA_BIT_MASK(34); | ||
91 | |||
92 | dma_set_mask(dev, platform->dma_mask); | ||
93 | dma_set_coherent_mask(dev, platform->dma_mask); | ||
94 | |||
95 | nvgpu_init_list_node(&g->profiler_objects); | ||
96 | |||
97 | nvgpu_init_list_node(&g->boardobj_head); | ||
98 | nvgpu_init_list_node(&g->boardobjgrp_head); | ||
99 | |||
100 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); | ||
101 | } | ||
102 | |||
103 | static void nvgpu_init_gr_vars(struct gk20a *g) | ||
104 | { | ||
105 | gk20a_init_gr(g); | ||
106 | |||
107 | nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); | ||
108 | g->gr.max_comptag_mem = totalram_size_in_mb; | ||
109 | } | ||
110 | |||
111 | static void nvgpu_init_timeout(struct gk20a *g) | ||
112 | { | ||
113 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
114 | |||
115 | g->timeouts_disabled_by_user = false; | ||
116 | nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0); | ||
117 | |||
118 | if (nvgpu_platform_is_silicon(g)) { | ||
119 | g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; | ||
120 | } else if (nvgpu_platform_is_fpga(g)) { | ||
121 | g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA; | ||
122 | } else { | ||
123 | g->gr_idle_timeout_default = (u32)ULONG_MAX; | ||
124 | } | ||
125 | g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; | ||
126 | g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US; | ||
127 | } | ||
128 | |||
129 | static void nvgpu_init_timeslice(struct gk20a *g) | ||
130 | { | ||
131 | g->runlist_interleave = true; | ||
132 | |||
133 | g->timeslice_low_priority_us = 1300; | ||
134 | g->timeslice_medium_priority_us = 2600; | ||
135 | g->timeslice_high_priority_us = 5200; | ||
136 | |||
137 | g->min_timeslice_us = 1000; | ||
138 | g->max_timeslice_us = 50000; | ||
139 | } | ||
140 | |||
141 | static void nvgpu_init_pm_vars(struct gk20a *g) | ||
142 | { | ||
143 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
144 | u32 i = 0; | ||
145 | |||
146 | /* | ||
147 | * Set up initial power settings. For non-slicon platforms, disable | ||
148 | * power features and for silicon platforms, read from platform data | ||
149 | */ | ||
150 | g->slcg_enabled = | ||
151 | nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false; | ||
152 | g->blcg_enabled = | ||
153 | nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false; | ||
154 | g->elcg_enabled = | ||
155 | nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false; | ||
156 | g->elpg_enabled = | ||
157 | nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false; | ||
158 | g->aelpg_enabled = | ||
159 | nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false; | ||
160 | g->mscg_enabled = | ||
161 | nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false; | ||
162 | g->can_elpg = | ||
163 | nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false; | ||
164 | |||
165 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG, | ||
166 | nvgpu_platform_is_silicon(g) ? platform->can_elcg : false); | ||
167 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG, | ||
168 | nvgpu_platform_is_silicon(g) ? platform->can_slcg : false); | ||
169 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG, | ||
170 | nvgpu_platform_is_silicon(g) ? platform->can_blcg : false); | ||
171 | |||
172 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | ||
173 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | ||
174 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
175 | g->has_cde = platform->has_cde; | ||
176 | #endif | ||
177 | g->ptimer_src_freq = platform->ptimer_src_freq; | ||
178 | g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); | ||
179 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); | ||
180 | g->can_tpc_powergate = platform->can_tpc_powergate; | ||
181 | |||
182 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) | ||
183 | g->valid_tpc_mask[i] = platform->valid_tpc_mask[i]; | ||
184 | |||
185 | g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; | ||
186 | /* if default delay is not set, set default delay to 500msec */ | ||
187 | if (platform->railgate_delay_init) | ||
188 | g->railgate_delay = platform->railgate_delay_init; | ||
189 | else | ||
190 | g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT; | ||
191 | __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon); | ||
192 | |||
193 | /* set default values to aelpg parameters */ | ||
194 | g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; | ||
195 | g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; | ||
196 | g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; | ||
197 | g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US; | ||
198 | g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; | ||
199 | |||
200 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm); | ||
201 | } | ||
202 | |||
203 | static void nvgpu_init_vbios_vars(struct gk20a *g) | ||
204 | { | ||
205 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
206 | |||
207 | __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos); | ||
208 | g->vbios_min_version = platform->vbios_min_version; | ||
209 | } | ||
210 | |||
211 | static void nvgpu_init_ltc_vars(struct gk20a *g) | ||
212 | { | ||
213 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
214 | |||
215 | g->ltc_streamid = platform->ltc_streamid; | ||
216 | } | ||
217 | |||
218 | static void nvgpu_init_mm_vars(struct gk20a *g) | ||
219 | { | ||
220 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
221 | |||
222 | g->mm.disable_bigpage = platform->disable_bigpage; | ||
223 | __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE, | ||
224 | platform->honors_aperture); | ||
225 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, | ||
226 | platform->unified_memory); | ||
227 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, | ||
228 | platform->unify_address_spaces); | ||
229 | __nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM, | ||
230 | platform->force_128K_pmu_vm); | ||
231 | |||
232 | nvgpu_mutex_init(&g->mm.tlb_lock); | ||
233 | nvgpu_mutex_init(&g->mm.priv_lock); | ||
234 | } | ||
235 | |||
236 | int nvgpu_probe(struct gk20a *g, | ||
237 | const char *debugfs_symlink, | ||
238 | const char *interface_name, | ||
239 | struct class *class) | ||
240 | { | ||
241 | struct device *dev = dev_from_gk20a(g); | ||
242 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
243 | int err = 0; | ||
244 | |||
245 | nvgpu_init_vars(g); | ||
246 | nvgpu_init_gr_vars(g); | ||
247 | nvgpu_init_timeout(g); | ||
248 | nvgpu_init_timeslice(g); | ||
249 | nvgpu_init_pm_vars(g); | ||
250 | nvgpu_init_vbios_vars(g); | ||
251 | nvgpu_init_ltc_vars(g); | ||
252 | err = nvgpu_init_soc_vars(g); | ||
253 | if (err) { | ||
254 | nvgpu_err(g, "init soc vars failed"); | ||
255 | return err; | ||
256 | } | ||
257 | |||
258 | /* Initialize the platform interface. */ | ||
259 | err = platform->probe(dev); | ||
260 | if (err) { | ||
261 | if (err == -EPROBE_DEFER) | ||
262 | nvgpu_info(g, "platform probe failed"); | ||
263 | else | ||
264 | nvgpu_err(g, "platform probe failed"); | ||
265 | return err; | ||
266 | } | ||
267 | |||
268 | nvgpu_init_mm_vars(g); | ||
269 | |||
270 | /* platform probe can defer do user init only if probe succeeds */ | ||
271 | err = gk20a_user_init(dev, interface_name, class); | ||
272 | if (err) | ||
273 | return err; | ||
274 | |||
275 | if (platform->late_probe) { | ||
276 | err = platform->late_probe(dev); | ||
277 | if (err) { | ||
278 | nvgpu_err(g, "late probe failed"); | ||
279 | return err; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | nvgpu_create_sysfs(dev); | ||
284 | gk20a_debug_init(g, debugfs_symlink); | ||
285 | |||
286 | g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); | ||
287 | if (!g->dbg_regops_tmp_buf) { | ||
288 | nvgpu_err(g, "couldn't allocate regops tmp buf"); | ||
289 | return -ENOMEM; | ||
290 | } | ||
291 | g->dbg_regops_tmp_buf_ops = | ||
292 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
293 | |||
294 | g->remove_support = gk20a_remove_support; | ||
295 | |||
296 | nvgpu_ref_init(&g->refcount); | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | /** | ||
302 | * cyclic_delta - Returns delta of cyclic integers a and b. | ||
303 | * | ||
304 | * @a - First integer | ||
305 | * @b - Second integer | ||
306 | * | ||
307 | * Note: if a is ahead of b, delta is positive. | ||
308 | */ | ||
309 | static int cyclic_delta(int a, int b) | ||
310 | { | ||
311 | return a - b; | ||
312 | } | ||
313 | |||
314 | /** | ||
315 | * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete | ||
316 | * | ||
317 | * @g - The GPU to wait on. | ||
318 | * | ||
319 | * Waits until all interrupt handlers that have been scheduled to run have | ||
320 | * completed. | ||
321 | */ | ||
322 | void nvgpu_wait_for_deferred_interrupts(struct gk20a *g) | ||
323 | { | ||
324 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
325 | int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count); | ||
326 | int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count); | ||
327 | |||
328 | /* wait until all stalling irqs are handled */ | ||
329 | NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq, | ||
330 | cyclic_delta(stall_irq_threshold, | ||
331 | atomic_read(&l->sw_irq_stall_last_handled)) | ||
332 | <= 0, 0); | ||
333 | |||
334 | /* wait until all non-stalling irqs are handled */ | ||
335 | NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq, | ||
336 | cyclic_delta(nonstall_irq_threshold, | ||
337 | atomic_read(&l->sw_irq_nonstall_last_handled)) | ||
338 | <= 0, 0); | ||
339 | } | ||
340 | |||
341 | static void nvgpu_free_gk20a(struct gk20a *g) | ||
342 | { | ||
343 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
344 | |||
345 | kfree(l); | ||
346 | } | ||
347 | |||
348 | void nvgpu_init_gk20a(struct gk20a *g) | ||
349 | { | ||
350 | g->free = nvgpu_free_gk20a; | ||
351 | } | ||
diff --git a/include/os/linux/driver_common.h b/include/os/linux/driver_common.h new file mode 100644 index 0000000..6f42f77 --- /dev/null +++ b/include/os/linux/driver_common.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_LINUX_DRIVER_COMMON | ||
18 | #define NVGPU_LINUX_DRIVER_COMMON | ||
19 | |||
20 | void nvgpu_init_gk20a(struct gk20a *g); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/dt.c b/include/os/linux/dt.c new file mode 100644 index 0000000..88e391e --- /dev/null +++ b/include/os/linux/dt.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/dt.h> | ||
18 | #include <linux/of.h> | ||
19 | |||
20 | #include "os_linux.h" | ||
21 | |||
22 | int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name, | ||
23 | u32 index, u32 *value) | ||
24 | { | ||
25 | struct device *dev = dev_from_gk20a(g); | ||
26 | struct device_node *np = dev->of_node; | ||
27 | |||
28 | return of_property_read_u32_index(np, name, index, value); | ||
29 | } | ||
diff --git a/include/os/linux/ecc_sysfs.c b/include/os/linux/ecc_sysfs.c new file mode 100644 index 0000000..73ae3dc --- /dev/null +++ b/include/os/linux/ecc_sysfs.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/ecc.h> | ||
18 | #include <nvgpu/gk20a.h> | ||
19 | |||
20 | #include "os_linux.h" | ||
21 | |||
22 | int nvgpu_ecc_sysfs_init(struct gk20a *g) | ||
23 | { | ||
24 | struct device *dev = dev_from_gk20a(g); | ||
25 | struct nvgpu_ecc *ecc = &g->ecc; | ||
26 | struct dev_ext_attribute *attr; | ||
27 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
28 | struct nvgpu_ecc_stat *stat; | ||
29 | int i = 0, err; | ||
30 | |||
31 | attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count); | ||
32 | if (!attr) | ||
33 | return -ENOMEM; | ||
34 | |||
35 | nvgpu_list_for_each_entry(stat, | ||
36 | &ecc->stats_list, nvgpu_ecc_stat, node) { | ||
37 | if (i >= ecc->stats_count) { | ||
38 | err = -EINVAL; | ||
39 | nvgpu_err(g, "stats_list longer than stats_count %d", | ||
40 | ecc->stats_count); | ||
41 | break; | ||
42 | } | ||
43 | sysfs_attr_init(&attr[i].attr.attr); | ||
44 | attr[i].attr.attr.name = stat->name; | ||
45 | attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); | ||
46 | attr[i].var = &stat->counter; | ||
47 | attr[i].attr.show = device_show_int; | ||
48 | err = device_create_file(dev, &attr[i].attr); | ||
49 | if (err) { | ||
50 | nvgpu_err(g, "sysfs node create failed for %s\n", | ||
51 | stat->name); | ||
52 | break; | ||
53 | } | ||
54 | i++; | ||
55 | } | ||
56 | |||
57 | if (err) { | ||
58 | while (i-- > 0) | ||
59 | device_remove_file(dev, &attr[i].attr); | ||
60 | nvgpu_kfree(g, attr); | ||
61 | return err; | ||
62 | } | ||
63 | |||
64 | l->ecc_attrs = attr; | ||
65 | |||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | void nvgpu_ecc_sysfs_remove(struct gk20a *g) | ||
70 | { | ||
71 | struct device *dev = dev_from_gk20a(g); | ||
72 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
73 | struct nvgpu_ecc *ecc = &g->ecc; | ||
74 | int i; | ||
75 | |||
76 | for (i = 0; i < ecc->stats_count; i++) | ||
77 | device_remove_file(dev, &l->ecc_attrs[i].attr); | ||
78 | nvgpu_kfree(g, l->ecc_attrs); | ||
79 | l->ecc_attrs = NULL; | ||
80 | } | ||
diff --git a/include/os/linux/firmware.c b/include/os/linux/firmware.c new file mode 100644 index 0000000..8f0344b --- /dev/null +++ b/include/os/linux/firmware.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/firmware.h> | ||
18 | |||
19 | #include <nvgpu/kmem.h> | ||
20 | #include <nvgpu/bug.h> | ||
21 | #include <nvgpu/firmware.h> | ||
22 | #include <nvgpu/gk20a.h> | ||
23 | |||
24 | #include "platform_gk20a.h" | ||
25 | #include "os_linux.h" | ||
26 | |||
27 | static const struct firmware *do_request_firmware(struct device *dev, | ||
28 | const char *prefix, const char *fw_name, int flags) | ||
29 | { | ||
30 | const struct firmware *fw; | ||
31 | char *fw_path = NULL; | ||
32 | int path_len, err; | ||
33 | |||
34 | if (prefix) { | ||
35 | path_len = strlen(prefix) + strlen(fw_name); | ||
36 | path_len += 2; /* for the path separator and zero terminator*/ | ||
37 | |||
38 | fw_path = nvgpu_kzalloc(get_gk20a(dev), | ||
39 | sizeof(*fw_path) * path_len); | ||
40 | if (!fw_path) | ||
41 | return NULL; | ||
42 | |||
43 | sprintf(fw_path, "%s/%s", prefix, fw_name); | ||
44 | fw_name = fw_path; | ||
45 | } | ||
46 | |||
47 | if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN) | ||
48 | err = request_firmware_direct(&fw, fw_name, dev); | ||
49 | else | ||
50 | err = request_firmware(&fw, fw_name, dev); | ||
51 | |||
52 | nvgpu_kfree(get_gk20a(dev), fw_path); | ||
53 | if (err) | ||
54 | return NULL; | ||
55 | return fw; | ||
56 | } | ||
57 | |||
58 | /* This is a simple wrapper around request_firmware that takes 'fw_name' and | ||
59 | * applies an IP specific relative path prefix to it. The caller is | ||
60 | * responsible for calling nvgpu_release_firmware later. */ | ||
61 | struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g, | ||
62 | const char *fw_name, | ||
63 | int flags) | ||
64 | { | ||
65 | struct device *dev = dev_from_gk20a(g); | ||
66 | struct nvgpu_firmware *fw; | ||
67 | const struct firmware *linux_fw; | ||
68 | |||
69 | /* current->fs is NULL when calling from SYS_EXIT. | ||
70 | Add a check here to prevent crash in request_firmware */ | ||
71 | if (!current->fs || !fw_name) | ||
72 | return NULL; | ||
73 | |||
74 | fw = nvgpu_kzalloc(g, sizeof(*fw)); | ||
75 | if (!fw) | ||
76 | return NULL; | ||
77 | |||
78 | linux_fw = do_request_firmware(dev, g->name, fw_name, flags); | ||
79 | |||
80 | #ifdef CONFIG_TEGRA_GK20A | ||
81 | /* TO BE REMOVED - Support loading from legacy SOC specific path. */ | ||
82 | if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) { | ||
83 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
84 | linux_fw = do_request_firmware(dev, | ||
85 | platform->soc_name, fw_name, flags); | ||
86 | } | ||
87 | #endif | ||
88 | |||
89 | if (!linux_fw) | ||
90 | goto err; | ||
91 | |||
92 | fw->data = nvgpu_kmalloc(g, linux_fw->size); | ||
93 | if (!fw->data) | ||
94 | goto err_release; | ||
95 | |||
96 | memcpy(fw->data, linux_fw->data, linux_fw->size); | ||
97 | fw->size = linux_fw->size; | ||
98 | |||
99 | release_firmware(linux_fw); | ||
100 | |||
101 | return fw; | ||
102 | |||
103 | err_release: | ||
104 | release_firmware(linux_fw); | ||
105 | err: | ||
106 | nvgpu_kfree(g, fw); | ||
107 | return NULL; | ||
108 | } | ||
109 | |||
110 | void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw) | ||
111 | { | ||
112 | if(!fw) | ||
113 | return; | ||
114 | |||
115 | nvgpu_kfree(g, fw->data); | ||
116 | nvgpu_kfree(g, fw); | ||
117 | } | ||
diff --git a/include/os/linux/fuse.c b/include/os/linux/fuse.c new file mode 100644 index 0000000..27851f9 --- /dev/null +++ b/include/os/linux/fuse.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <soc/tegra/fuse.h> | ||
15 | |||
16 | #include <nvgpu/fuse.h> | ||
17 | |||
18 | int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g) | ||
19 | { | ||
20 | return tegra_sku_info.gpu_speedo_id; | ||
21 | } | ||
22 | |||
23 | /* | ||
24 | * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100 | ||
25 | * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100 | ||
26 | */ | ||
27 | void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val) | ||
28 | { | ||
29 | tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0); | ||
30 | } | ||
31 | |||
32 | void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val) | ||
33 | { | ||
34 | tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0); | ||
35 | } | ||
36 | |||
37 | void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val) | ||
38 | { | ||
39 | tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0); | ||
40 | } | ||
41 | |||
42 | void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val) | ||
43 | { | ||
44 | tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0); | ||
45 | } | ||
46 | |||
47 | int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val) | ||
48 | { | ||
49 | return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val); | ||
50 | } | ||
51 | |||
52 | int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val) | ||
53 | { | ||
54 | return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val); | ||
55 | } | ||
diff --git a/include/os/linux/intr.c b/include/os/linux/intr.c new file mode 100644 index 0000000..8838b72 --- /dev/null +++ b/include/os/linux/intr.c | |||
@@ -0,0 +1,136 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <trace/events/gk20a.h> | ||
15 | #include <linux/irqreturn.h> | ||
16 | |||
17 | #include <nvgpu/gk20a.h> | ||
18 | |||
19 | #include <nvgpu/atomic.h> | ||
20 | #include <nvgpu/unit.h> | ||
21 | #include "os_linux.h" | ||
22 | |||
23 | irqreturn_t nvgpu_intr_stall(struct gk20a *g) | ||
24 | { | ||
25 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
26 | u32 mc_intr_0; | ||
27 | |||
28 | trace_mc_gk20a_intr_stall(g->name); | ||
29 | |||
30 | if (!g->power_on) | ||
31 | return IRQ_NONE; | ||
32 | |||
33 | /* not from gpu when sharing irq with others */ | ||
34 | mc_intr_0 = g->ops.mc.intr_stall(g); | ||
35 | if (unlikely(!mc_intr_0)) | ||
36 | return IRQ_NONE; | ||
37 | |||
38 | g->ops.mc.intr_stall_pause(g); | ||
39 | |||
40 | atomic_inc(&l->hw_irq_stall_count); | ||
41 | |||
42 | trace_mc_gk20a_intr_stall_done(g->name); | ||
43 | |||
44 | return IRQ_WAKE_THREAD; | ||
45 | } | ||
46 | |||
47 | irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g) | ||
48 | { | ||
49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
50 | int hw_irq_count; | ||
51 | |||
52 | nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched"); | ||
53 | |||
54 | trace_mc_gk20a_intr_thread_stall(g->name); | ||
55 | |||
56 | hw_irq_count = atomic_read(&l->hw_irq_stall_count); | ||
57 | g->ops.mc.isr_stall(g); | ||
58 | g->ops.mc.intr_stall_resume(g); | ||
59 | /* sync handled irq counter before re-enabling interrupts */ | ||
60 | atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count); | ||
61 | |||
62 | nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq); | ||
63 | |||
64 | trace_mc_gk20a_intr_thread_stall_done(g->name); | ||
65 | |||
66 | return IRQ_HANDLED; | ||
67 | } | ||
68 | |||
69 | irqreturn_t nvgpu_intr_nonstall(struct gk20a *g) | ||
70 | { | ||
71 | u32 non_stall_intr_val; | ||
72 | u32 hw_irq_count; | ||
73 | int ops_old, ops_new, ops = 0; | ||
74 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
75 | |||
76 | if (!g->power_on) | ||
77 | return IRQ_NONE; | ||
78 | |||
79 | /* not from gpu when sharing irq with others */ | ||
80 | non_stall_intr_val = g->ops.mc.intr_nonstall(g); | ||
81 | if (unlikely(!non_stall_intr_val)) | ||
82 | return IRQ_NONE; | ||
83 | |||
84 | g->ops.mc.intr_nonstall_pause(g); | ||
85 | |||
86 | ops = g->ops.mc.isr_nonstall(g); | ||
87 | if (ops) { | ||
88 | do { | ||
89 | ops_old = atomic_read(&l->nonstall_ops); | ||
90 | ops_new = ops_old | ops; | ||
91 | } while (ops_old != atomic_cmpxchg(&l->nonstall_ops, | ||
92 | ops_old, ops_new)); | ||
93 | |||
94 | queue_work(l->nonstall_work_queue, &l->nonstall_fn_work); | ||
95 | } | ||
96 | |||
97 | hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count); | ||
98 | |||
99 | /* sync handled irq counter before re-enabling interrupts */ | ||
100 | atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count); | ||
101 | |||
102 | g->ops.mc.intr_nonstall_resume(g); | ||
103 | |||
104 | nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq); | ||
105 | |||
106 | return IRQ_HANDLED; | ||
107 | } | ||
108 | |||
109 | static void mc_gk20a_handle_intr_nonstall(struct gk20a *g, u32 ops) | ||
110 | { | ||
111 | bool semaphore_wakeup, post_events; | ||
112 | |||
113 | semaphore_wakeup = | ||
114 | (((ops & GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ? | ||
115 | true : false); | ||
116 | post_events = (((ops & GK20A_NONSTALL_OPS_POST_EVENTS) != 0U) ? | ||
117 | true: false); | ||
118 | |||
119 | if (semaphore_wakeup) { | ||
120 | g->ops.semaphore_wakeup(g, post_events); | ||
121 | } | ||
122 | } | ||
123 | |||
124 | void nvgpu_intr_nonstall_cb(struct work_struct *work) | ||
125 | { | ||
126 | struct nvgpu_os_linux *l = | ||
127 | container_of(work, struct nvgpu_os_linux, nonstall_fn_work); | ||
128 | struct gk20a *g = &l->g; | ||
129 | |||
130 | do { | ||
131 | u32 ops; | ||
132 | |||
133 | ops = atomic_xchg(&l->nonstall_ops, 0); | ||
134 | mc_gk20a_handle_intr_nonstall(g, ops); | ||
135 | } while (atomic_read(&l->nonstall_ops) != 0); | ||
136 | } | ||
diff --git a/include/os/linux/intr.h b/include/os/linux/intr.h new file mode 100644 index 0000000..d43cdcc --- /dev/null +++ b/include/os/linux/intr.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef __NVGPU_LINUX_INTR_H__ | ||
15 | #define __NVGPU_LINUX_INTR_H__ | ||
16 | struct gk20a; | ||
17 | |||
18 | irqreturn_t nvgpu_intr_stall(struct gk20a *g); | ||
19 | irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g); | ||
20 | irqreturn_t nvgpu_intr_nonstall(struct gk20a *g); | ||
21 | void nvgpu_intr_nonstall_cb(struct work_struct *work); | ||
22 | #endif | ||
diff --git a/include/os/linux/io.c b/include/os/linux/io.c new file mode 100644 index 0000000..3e84e88 --- /dev/null +++ b/include/os/linux/io.c | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/io.h> | ||
15 | #include <nvgpu/types.h> | ||
16 | #include <nvgpu/gk20a.h> | ||
17 | |||
18 | #include "os_linux.h" | ||
19 | |||
20 | void nvgpu_writel(struct gk20a *g, u32 r, u32 v) | ||
21 | { | ||
22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
23 | |||
24 | if (unlikely(!l->regs)) { | ||
25 | __gk20a_warn_on_no_regs(); | ||
26 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
27 | } else { | ||
28 | writel_relaxed(v, l->regs + r); | ||
29 | nvgpu_wmb(); | ||
30 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | void nvgpu_writel_relaxed(struct gk20a *g, u32 r, u32 v) | ||
35 | { | ||
36 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
37 | |||
38 | if (unlikely(!l->regs)) { | ||
39 | __gk20a_warn_on_no_regs(); | ||
40 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
41 | } else { | ||
42 | writel_relaxed(v, l->regs + r); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | u32 nvgpu_readl(struct gk20a *g, u32 r) | ||
47 | { | ||
48 | u32 v = __nvgpu_readl(g, r); | ||
49 | |||
50 | if (v == 0xffffffff) | ||
51 | __nvgpu_check_gpu_state(g); | ||
52 | |||
53 | return v; | ||
54 | } | ||
55 | |||
56 | u32 __nvgpu_readl(struct gk20a *g, u32 r) | ||
57 | { | ||
58 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
59 | u32 v = 0xffffffff; | ||
60 | |||
61 | if (unlikely(!l->regs)) { | ||
62 | __gk20a_warn_on_no_regs(); | ||
63 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
64 | } else { | ||
65 | v = readl(l->regs + r); | ||
66 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
67 | } | ||
68 | |||
69 | return v; | ||
70 | } | ||
71 | |||
72 | void nvgpu_writel_loop(struct gk20a *g, u32 r, u32 v) | ||
73 | { | ||
74 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
75 | |||
76 | if (unlikely(!l->regs)) { | ||
77 | __gk20a_warn_on_no_regs(); | ||
78 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
79 | } else { | ||
80 | nvgpu_wmb(); | ||
81 | do { | ||
82 | writel_relaxed(v, l->regs + r); | ||
83 | } while (readl(l->regs + r) != v); | ||
84 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v) | ||
89 | { | ||
90 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
91 | |||
92 | if (unlikely(!l->bar1)) { | ||
93 | __gk20a_warn_on_no_regs(); | ||
94 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | ||
95 | } else { | ||
96 | nvgpu_wmb(); | ||
97 | writel_relaxed(v, l->bar1 + b); | ||
98 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | ||
99 | } | ||
100 | } | ||
101 | |||
102 | u32 nvgpu_bar1_readl(struct gk20a *g, u32 b) | ||
103 | { | ||
104 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
105 | u32 v = 0xffffffff; | ||
106 | |||
107 | if (unlikely(!l->bar1)) { | ||
108 | __gk20a_warn_on_no_regs(); | ||
109 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | ||
110 | } else { | ||
111 | v = readl(l->bar1 + b); | ||
112 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | ||
113 | } | ||
114 | |||
115 | return v; | ||
116 | } | ||
117 | |||
118 | bool nvgpu_io_exists(struct gk20a *g) | ||
119 | { | ||
120 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
121 | |||
122 | return l->regs != NULL; | ||
123 | } | ||
124 | |||
125 | bool nvgpu_io_valid_reg(struct gk20a *g, u32 r) | ||
126 | { | ||
127 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
128 | |||
129 | return r < resource_size(l->regs); | ||
130 | } | ||
diff --git a/include/os/linux/io_usermode.c b/include/os/linux/io_usermode.c new file mode 100644 index 0000000..ffc532f --- /dev/null +++ b/include/os/linux/io_usermode.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/io.h> | ||
15 | #include <nvgpu/types.h> | ||
16 | #include <nvgpu/gk20a.h> | ||
17 | |||
18 | #include "os_linux.h" | ||
19 | |||
20 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
21 | |||
22 | void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v) | ||
23 | { | ||
24 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
25 | void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r()); | ||
26 | |||
27 | writel_relaxed(v, reg); | ||
28 | nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v); | ||
29 | } | ||
diff --git a/include/os/linux/ioctl.c b/include/os/linux/ioctl.c new file mode 100644 index 0000000..a40df2a --- /dev/null +++ b/include/os/linux/ioctl.c | |||
@@ -0,0 +1,297 @@ | |||
1 | /* | ||
2 | * NVGPU IOCTLs | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/file.h> | ||
20 | |||
21 | #include <nvgpu/nvgpu_common.h> | ||
22 | #include <nvgpu/ctxsw_trace.h> | ||
23 | #include <nvgpu/gk20a.h> | ||
24 | |||
25 | #include "gk20a/dbg_gpu_gk20a.h" | ||
26 | |||
27 | #include "ioctl_channel.h" | ||
28 | #include "ioctl_ctrl.h" | ||
29 | #include "ioctl_as.h" | ||
30 | #include "ioctl_tsg.h" | ||
31 | #include "ioctl_dbg.h" | ||
32 | #include "module.h" | ||
33 | #include "os_linux.h" | ||
34 | #include "ctxsw_trace.h" | ||
35 | #include "platform_gk20a.h" | ||
36 | |||
37 | #define GK20A_NUM_CDEVS 7 | ||
38 | |||
39 | const struct file_operations gk20a_channel_ops = { | ||
40 | .owner = THIS_MODULE, | ||
41 | .release = gk20a_channel_release, | ||
42 | .open = gk20a_channel_open, | ||
43 | #ifdef CONFIG_COMPAT | ||
44 | .compat_ioctl = gk20a_channel_ioctl, | ||
45 | #endif | ||
46 | .unlocked_ioctl = gk20a_channel_ioctl, | ||
47 | }; | ||
48 | |||
49 | static const struct file_operations gk20a_ctrl_ops = { | ||
50 | .owner = THIS_MODULE, | ||
51 | .release = gk20a_ctrl_dev_release, | ||
52 | .open = gk20a_ctrl_dev_open, | ||
53 | .unlocked_ioctl = gk20a_ctrl_dev_ioctl, | ||
54 | #ifdef CONFIG_COMPAT | ||
55 | .compat_ioctl = gk20a_ctrl_dev_ioctl, | ||
56 | #endif | ||
57 | .mmap = gk20a_ctrl_dev_mmap, | ||
58 | }; | ||
59 | |||
60 | static const struct file_operations gk20a_dbg_ops = { | ||
61 | .owner = THIS_MODULE, | ||
62 | .release = gk20a_dbg_gpu_dev_release, | ||
63 | .open = gk20a_dbg_gpu_dev_open, | ||
64 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
65 | .poll = gk20a_dbg_gpu_dev_poll, | ||
66 | #ifdef CONFIG_COMPAT | ||
67 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
68 | #endif | ||
69 | }; | ||
70 | |||
71 | static const struct file_operations gk20a_as_ops = { | ||
72 | .owner = THIS_MODULE, | ||
73 | .release = gk20a_as_dev_release, | ||
74 | .open = gk20a_as_dev_open, | ||
75 | #ifdef CONFIG_COMPAT | ||
76 | .compat_ioctl = gk20a_as_dev_ioctl, | ||
77 | #endif | ||
78 | .unlocked_ioctl = gk20a_as_dev_ioctl, | ||
79 | }; | ||
80 | |||
81 | /* | ||
82 | * Note: We use a different 'open' to trigger handling of the profiler session. | ||
83 | * Most of the code is shared between them... Though, at some point if the | ||
84 | * code does get too tangled trying to handle each in the same path we can | ||
85 | * separate them cleanly. | ||
86 | */ | ||
87 | static const struct file_operations gk20a_prof_ops = { | ||
88 | .owner = THIS_MODULE, | ||
89 | .release = gk20a_dbg_gpu_dev_release, | ||
90 | .open = gk20a_prof_gpu_dev_open, | ||
91 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
92 | #ifdef CONFIG_COMPAT | ||
93 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
94 | #endif | ||
95 | }; | ||
96 | |||
97 | static const struct file_operations gk20a_tsg_ops = { | ||
98 | .owner = THIS_MODULE, | ||
99 | .release = nvgpu_ioctl_tsg_dev_release, | ||
100 | .open = nvgpu_ioctl_tsg_dev_open, | ||
101 | #ifdef CONFIG_COMPAT | ||
102 | .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl, | ||
103 | #endif | ||
104 | .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl, | ||
105 | }; | ||
106 | |||
107 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
108 | static const struct file_operations gk20a_ctxsw_ops = { | ||
109 | .owner = THIS_MODULE, | ||
110 | .release = gk20a_ctxsw_dev_release, | ||
111 | .open = gk20a_ctxsw_dev_open, | ||
112 | #ifdef CONFIG_COMPAT | ||
113 | .compat_ioctl = gk20a_ctxsw_dev_ioctl, | ||
114 | #endif | ||
115 | .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, | ||
116 | .poll = gk20a_ctxsw_dev_poll, | ||
117 | .read = gk20a_ctxsw_dev_read, | ||
118 | .mmap = gk20a_ctxsw_dev_mmap, | ||
119 | }; | ||
120 | #endif | ||
121 | |||
122 | static const struct file_operations gk20a_sched_ops = { | ||
123 | .owner = THIS_MODULE, | ||
124 | .release = gk20a_sched_dev_release, | ||
125 | .open = gk20a_sched_dev_open, | ||
126 | #ifdef CONFIG_COMPAT | ||
127 | .compat_ioctl = gk20a_sched_dev_ioctl, | ||
128 | #endif | ||
129 | .unlocked_ioctl = gk20a_sched_dev_ioctl, | ||
130 | .poll = gk20a_sched_dev_poll, | ||
131 | .read = gk20a_sched_dev_read, | ||
132 | }; | ||
133 | |||
134 | static int gk20a_create_device( | ||
135 | struct device *dev, int devno, | ||
136 | const char *interface_name, const char *cdev_name, | ||
137 | struct cdev *cdev, struct device **out, | ||
138 | const struct file_operations *ops, | ||
139 | struct class *class) | ||
140 | { | ||
141 | struct device *subdev; | ||
142 | int err; | ||
143 | struct gk20a *g = gk20a_from_dev(dev); | ||
144 | |||
145 | nvgpu_log_fn(g, " "); | ||
146 | |||
147 | cdev_init(cdev, ops); | ||
148 | cdev->owner = THIS_MODULE; | ||
149 | |||
150 | err = cdev_add(cdev, devno, 1); | ||
151 | if (err) { | ||
152 | dev_err(dev, "failed to add %s cdev\n", cdev_name); | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | subdev = device_create(class, NULL, devno, NULL, | ||
157 | interface_name, cdev_name); | ||
158 | |||
159 | if (IS_ERR(subdev)) { | ||
160 | err = PTR_ERR(dev); | ||
161 | cdev_del(cdev); | ||
162 | dev_err(dev, "failed to create %s device for %s\n", | ||
163 | cdev_name, dev_name(dev)); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | *out = subdev; | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | void gk20a_user_deinit(struct device *dev, struct class *class) | ||
172 | { | ||
173 | struct gk20a *g = gk20a_from_dev(dev); | ||
174 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
175 | |||
176 | if (l->channel.node) { | ||
177 | device_destroy(class, l->channel.cdev.dev); | ||
178 | cdev_del(&l->channel.cdev); | ||
179 | } | ||
180 | |||
181 | if (l->as_dev.node) { | ||
182 | device_destroy(class, l->as_dev.cdev.dev); | ||
183 | cdev_del(&l->as_dev.cdev); | ||
184 | } | ||
185 | |||
186 | if (l->ctrl.node) { | ||
187 | device_destroy(class, l->ctrl.cdev.dev); | ||
188 | cdev_del(&l->ctrl.cdev); | ||
189 | } | ||
190 | |||
191 | if (l->dbg.node) { | ||
192 | device_destroy(class, l->dbg.cdev.dev); | ||
193 | cdev_del(&l->dbg.cdev); | ||
194 | } | ||
195 | |||
196 | if (l->prof.node) { | ||
197 | device_destroy(class, l->prof.cdev.dev); | ||
198 | cdev_del(&l->prof.cdev); | ||
199 | } | ||
200 | |||
201 | if (l->tsg.node) { | ||
202 | device_destroy(class, l->tsg.cdev.dev); | ||
203 | cdev_del(&l->tsg.cdev); | ||
204 | } | ||
205 | |||
206 | if (l->ctxsw.node) { | ||
207 | device_destroy(class, l->ctxsw.cdev.dev); | ||
208 | cdev_del(&l->ctxsw.cdev); | ||
209 | } | ||
210 | |||
211 | if (l->sched.node) { | ||
212 | device_destroy(class, l->sched.cdev.dev); | ||
213 | cdev_del(&l->sched.cdev); | ||
214 | } | ||
215 | |||
216 | if (l->cdev_region) | ||
217 | unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS); | ||
218 | } | ||
219 | |||
220 | int gk20a_user_init(struct device *dev, const char *interface_name, | ||
221 | struct class *class) | ||
222 | { | ||
223 | int err; | ||
224 | dev_t devno; | ||
225 | struct gk20a *g = gk20a_from_dev(dev); | ||
226 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
227 | |||
228 | err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev)); | ||
229 | if (err) { | ||
230 | dev_err(dev, "failed to allocate devno\n"); | ||
231 | goto fail; | ||
232 | } | ||
233 | l->cdev_region = devno; | ||
234 | |||
235 | err = gk20a_create_device(dev, devno++, interface_name, "", | ||
236 | &l->channel.cdev, &l->channel.node, | ||
237 | &gk20a_channel_ops, | ||
238 | class); | ||
239 | if (err) | ||
240 | goto fail; | ||
241 | |||
242 | err = gk20a_create_device(dev, devno++, interface_name, "-as", | ||
243 | &l->as_dev.cdev, &l->as_dev.node, | ||
244 | &gk20a_as_ops, | ||
245 | class); | ||
246 | if (err) | ||
247 | goto fail; | ||
248 | |||
249 | err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", | ||
250 | &l->ctrl.cdev, &l->ctrl.node, | ||
251 | &gk20a_ctrl_ops, | ||
252 | class); | ||
253 | if (err) | ||
254 | goto fail; | ||
255 | |||
256 | err = gk20a_create_device(dev, devno++, interface_name, "-dbg", | ||
257 | &l->dbg.cdev, &l->dbg.node, | ||
258 | &gk20a_dbg_ops, | ||
259 | class); | ||
260 | if (err) | ||
261 | goto fail; | ||
262 | |||
263 | err = gk20a_create_device(dev, devno++, interface_name, "-prof", | ||
264 | &l->prof.cdev, &l->prof.node, | ||
265 | &gk20a_prof_ops, | ||
266 | class); | ||
267 | if (err) | ||
268 | goto fail; | ||
269 | |||
270 | err = gk20a_create_device(dev, devno++, interface_name, "-tsg", | ||
271 | &l->tsg.cdev, &l->tsg.node, | ||
272 | &gk20a_tsg_ops, | ||
273 | class); | ||
274 | if (err) | ||
275 | goto fail; | ||
276 | |||
277 | #if defined(CONFIG_GK20A_CTXSW_TRACE) | ||
278 | err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", | ||
279 | &l->ctxsw.cdev, &l->ctxsw.node, | ||
280 | &gk20a_ctxsw_ops, | ||
281 | class); | ||
282 | if (err) | ||
283 | goto fail; | ||
284 | #endif | ||
285 | |||
286 | err = gk20a_create_device(dev, devno++, interface_name, "-sched", | ||
287 | &l->sched.cdev, &l->sched.node, | ||
288 | &gk20a_sched_ops, | ||
289 | class); | ||
290 | if (err) | ||
291 | goto fail; | ||
292 | |||
293 | return 0; | ||
294 | fail: | ||
295 | gk20a_user_deinit(dev, &nvgpu_class); | ||
296 | return err; | ||
297 | } | ||
diff --git a/include/os/linux/ioctl.h b/include/os/linux/ioctl.h new file mode 100644 index 0000000..7bf1671 --- /dev/null +++ b/include/os/linux/ioctl.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef __NVGPU_IOCTL_H__ | ||
14 | #define __NVGPU_IOCTL_H__ | ||
15 | |||
16 | struct device; | ||
17 | struct class; | ||
18 | |||
19 | int gk20a_user_init(struct device *dev, const char *interface_name, | ||
20 | struct class *class); | ||
21 | void gk20a_user_deinit(struct device *dev, struct class *class); | ||
22 | |||
23 | #endif | ||
diff --git a/include/os/linux/ioctl_as.c b/include/os/linux/ioctl_as.c new file mode 100644 index 0000000..f0cec17 --- /dev/null +++ b/include/os/linux/ioctl_as.c | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * GK20A Address Spaces | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/cdev.h> | ||
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/fs.h> | ||
19 | |||
20 | #include <trace/events/gk20a.h> | ||
21 | |||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <nvgpu/gmmu.h> | ||
25 | #include <nvgpu/vm_area.h> | ||
26 | #include <nvgpu/log2.h> | ||
27 | #include <nvgpu/gk20a.h> | ||
28 | #include <nvgpu/channel.h> | ||
29 | |||
30 | #include <nvgpu/linux/vm.h> | ||
31 | |||
32 | #include "platform_gk20a.h" | ||
33 | #include "ioctl_as.h" | ||
34 | #include "os_linux.h" | ||
35 | |||
36 | static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags) | ||
37 | { | ||
38 | u32 core_flags = 0; | ||
39 | |||
40 | if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | ||
41 | core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET; | ||
42 | if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) | ||
43 | core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE; | ||
44 | |||
45 | return core_flags; | ||
46 | } | ||
47 | |||
48 | static int gk20a_as_ioctl_bind_channel( | ||
49 | struct gk20a_as_share *as_share, | ||
50 | struct nvgpu_as_bind_channel_args *args) | ||
51 | { | ||
52 | int err = 0; | ||
53 | struct channel_gk20a *ch; | ||
54 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
55 | |||
56 | nvgpu_log_fn(g, " "); | ||
57 | |||
58 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
59 | if (!ch) | ||
60 | return -EINVAL; | ||
61 | |||
62 | if (gk20a_channel_as_bound(ch)) { | ||
63 | err = -EINVAL; | ||
64 | goto out; | ||
65 | } | ||
66 | |||
67 | /* this will set channel_gk20a->vm */ | ||
68 | err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch); | ||
69 | |||
70 | out: | ||
71 | gk20a_channel_put(ch); | ||
72 | return err; | ||
73 | } | ||
74 | |||
75 | static int gk20a_as_ioctl_alloc_space( | ||
76 | struct gk20a_as_share *as_share, | ||
77 | struct nvgpu_as_alloc_space_args *args) | ||
78 | { | ||
79 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
80 | |||
81 | nvgpu_log_fn(g, " "); | ||
82 | return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size, | ||
83 | &args->o_a.offset, | ||
84 | gk20a_as_translate_as_alloc_space_flags(g, | ||
85 | args->flags)); | ||
86 | } | ||
87 | |||
88 | static int gk20a_as_ioctl_free_space( | ||
89 | struct gk20a_as_share *as_share, | ||
90 | struct nvgpu_as_free_space_args *args) | ||
91 | { | ||
92 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
93 | |||
94 | nvgpu_log_fn(g, " "); | ||
95 | return nvgpu_vm_area_free(as_share->vm, args->offset); | ||
96 | } | ||
97 | |||
98 | static int gk20a_as_ioctl_map_buffer_ex( | ||
99 | struct gk20a_as_share *as_share, | ||
100 | struct nvgpu_as_map_buffer_ex_args *args) | ||
101 | { | ||
102 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
103 | |||
104 | nvgpu_log_fn(g, " "); | ||
105 | |||
106 | /* unsupported, direct kind control must be used */ | ||
107 | if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) { | ||
108 | struct gk20a *g = as_share->vm->mm->g; | ||
109 | nvgpu_log_info(g, "Direct kind control must be requested"); | ||
110 | return -EINVAL; | ||
111 | } | ||
112 | |||
113 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, | ||
114 | &args->offset, args->flags, | ||
115 | args->page_size, | ||
116 | args->compr_kind, | ||
117 | args->incompr_kind, | ||
118 | args->buffer_offset, | ||
119 | args->mapping_size, | ||
120 | NULL); | ||
121 | } | ||
122 | |||
123 | static int gk20a_as_ioctl_unmap_buffer( | ||
124 | struct gk20a_as_share *as_share, | ||
125 | struct nvgpu_as_unmap_buffer_args *args) | ||
126 | { | ||
127 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
128 | |||
129 | nvgpu_log_fn(g, " "); | ||
130 | |||
131 | nvgpu_vm_unmap(as_share->vm, args->offset, NULL); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int gk20a_as_ioctl_map_buffer_batch( | ||
137 | struct gk20a_as_share *as_share, | ||
138 | struct nvgpu_as_map_buffer_batch_args *args) | ||
139 | { | ||
140 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
141 | u32 i; | ||
142 | int err = 0; | ||
143 | |||
144 | struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = | ||
145 | (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) | ||
146 | args->unmaps; | ||
147 | struct nvgpu_as_map_buffer_ex_args __user *user_map_args = | ||
148 | (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) | ||
149 | args->maps; | ||
150 | |||
151 | struct vm_gk20a_mapping_batch batch; | ||
152 | |||
153 | nvgpu_log_fn(g, " "); | ||
154 | |||
155 | if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT || | ||
156 | args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT) | ||
157 | return -EINVAL; | ||
158 | |||
159 | nvgpu_vm_mapping_batch_start(&batch); | ||
160 | |||
161 | for (i = 0; i < args->num_unmaps; ++i) { | ||
162 | struct nvgpu_as_unmap_buffer_args unmap_args; | ||
163 | |||
164 | if (copy_from_user(&unmap_args, &user_unmap_args[i], | ||
165 | sizeof(unmap_args))) { | ||
166 | err = -EFAULT; | ||
167 | break; | ||
168 | } | ||
169 | |||
170 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); | ||
171 | } | ||
172 | |||
173 | nvgpu_speculation_barrier(); | ||
174 | if (err) { | ||
175 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | ||
176 | |||
177 | args->num_unmaps = i; | ||
178 | args->num_maps = 0; | ||
179 | return err; | ||
180 | } | ||
181 | |||
182 | for (i = 0; i < args->num_maps; ++i) { | ||
183 | s16 compressible_kind; | ||
184 | s16 incompressible_kind; | ||
185 | |||
186 | struct nvgpu_as_map_buffer_ex_args map_args; | ||
187 | memset(&map_args, 0, sizeof(map_args)); | ||
188 | |||
189 | if (copy_from_user(&map_args, &user_map_args[i], | ||
190 | sizeof(map_args))) { | ||
191 | err = -EFAULT; | ||
192 | break; | ||
193 | } | ||
194 | |||
195 | if (map_args.flags & | ||
196 | NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
197 | compressible_kind = map_args.compr_kind; | ||
198 | incompressible_kind = map_args.incompr_kind; | ||
199 | } else { | ||
200 | /* direct kind control must be used */ | ||
201 | err = -EINVAL; | ||
202 | break; | ||
203 | } | ||
204 | |||
205 | err = nvgpu_vm_map_buffer( | ||
206 | as_share->vm, map_args.dmabuf_fd, | ||
207 | &map_args.offset, map_args.flags, map_args.page_size, | ||
208 | compressible_kind, incompressible_kind, | ||
209 | map_args.buffer_offset, | ||
210 | map_args.mapping_size, | ||
211 | &batch); | ||
212 | if (err) | ||
213 | break; | ||
214 | } | ||
215 | |||
216 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | ||
217 | |||
218 | if (err) | ||
219 | args->num_maps = i; | ||
220 | /* note: args->num_unmaps will be unmodified, which is ok | ||
221 | * since all unmaps are done */ | ||
222 | |||
223 | return err; | ||
224 | } | ||
225 | |||
226 | static int gk20a_as_ioctl_get_va_regions( | ||
227 | struct gk20a_as_share *as_share, | ||
228 | struct nvgpu_as_get_va_regions_args *args) | ||
229 | { | ||
230 | unsigned int i; | ||
231 | unsigned int write_entries; | ||
232 | struct nvgpu_as_va_region __user *user_region_ptr; | ||
233 | struct vm_gk20a *vm = as_share->vm; | ||
234 | struct gk20a *g = gk20a_from_vm(vm); | ||
235 | unsigned int page_sizes = GMMU_PAGE_SIZE_KERNEL; | ||
236 | |||
237 | nvgpu_log_fn(g, " "); | ||
238 | |||
239 | if (!vm->big_pages) | ||
240 | page_sizes--; | ||
241 | |||
242 | write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); | ||
243 | if (write_entries > page_sizes) | ||
244 | write_entries = page_sizes; | ||
245 | |||
246 | user_region_ptr = | ||
247 | (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; | ||
248 | |||
249 | for (i = 0; i < write_entries; ++i) { | ||
250 | struct nvgpu_as_va_region region; | ||
251 | struct nvgpu_allocator *vma = vm->vma[i]; | ||
252 | |||
253 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | ||
254 | |||
255 | region.page_size = vm->gmmu_page_sizes[i]; | ||
256 | region.offset = nvgpu_alloc_base(vma); | ||
257 | /* No __aeabi_uldivmod() on some platforms... */ | ||
258 | region.pages = (nvgpu_alloc_end(vma) - | ||
259 | nvgpu_alloc_base(vma)) >> ilog2(region.page_size); | ||
260 | |||
261 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | ||
262 | return -EFAULT; | ||
263 | } | ||
264 | |||
265 | args->buf_size = | ||
266 | page_sizes * sizeof(struct nvgpu_as_va_region); | ||
267 | |||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | static int nvgpu_as_ioctl_get_sync_ro_map( | ||
272 | struct gk20a_as_share *as_share, | ||
273 | struct nvgpu_as_get_sync_ro_map_args *args) | ||
274 | { | ||
275 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
276 | struct vm_gk20a *vm = as_share->vm; | ||
277 | struct gk20a *g = gk20a_from_vm(vm); | ||
278 | u64 base_gpuva; | ||
279 | u32 sync_size; | ||
280 | int err = 0; | ||
281 | |||
282 | if (!g->ops.fifo.get_sync_ro_map) | ||
283 | return -EINVAL; | ||
284 | |||
285 | if (!nvgpu_has_syncpoints(g)) | ||
286 | return -EINVAL; | ||
287 | |||
288 | err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size); | ||
289 | if (err) | ||
290 | return err; | ||
291 | |||
292 | args->base_gpuva = base_gpuva; | ||
293 | args->sync_size = sync_size; | ||
294 | |||
295 | return err; | ||
296 | #else | ||
297 | return -EINVAL; | ||
298 | #endif | ||
299 | } | ||
300 | |||
301 | int gk20a_as_dev_open(struct inode *inode, struct file *filp) | ||
302 | { | ||
303 | struct nvgpu_os_linux *l; | ||
304 | struct gk20a_as_share *as_share; | ||
305 | struct gk20a *g; | ||
306 | int err; | ||
307 | |||
308 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev); | ||
309 | g = &l->g; | ||
310 | |||
311 | nvgpu_log_fn(g, " "); | ||
312 | |||
313 | err = gk20a_as_alloc_share(g, 0, 0, &as_share); | ||
314 | if (err) { | ||
315 | nvgpu_log_fn(g, "failed to alloc share"); | ||
316 | return err; | ||
317 | } | ||
318 | |||
319 | filp->private_data = as_share; | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | int gk20a_as_dev_release(struct inode *inode, struct file *filp) | ||
324 | { | ||
325 | struct gk20a_as_share *as_share = filp->private_data; | ||
326 | |||
327 | if (!as_share) | ||
328 | return 0; | ||
329 | |||
330 | return gk20a_as_release_share(as_share); | ||
331 | } | ||
332 | |||
333 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
334 | { | ||
335 | int err = 0; | ||
336 | struct gk20a_as_share *as_share = filp->private_data; | ||
337 | struct gk20a *g = gk20a_from_as(as_share->as); | ||
338 | |||
339 | u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE]; | ||
340 | |||
341 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
342 | |||
343 | if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) || | ||
344 | (_IOC_NR(cmd) == 0) || | ||
345 | (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) || | ||
346 | (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE)) | ||
347 | return -EINVAL; | ||
348 | |||
349 | memset(buf, 0, sizeof(buf)); | ||
350 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
351 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
352 | return -EFAULT; | ||
353 | } | ||
354 | |||
355 | err = gk20a_busy(g); | ||
356 | if (err) | ||
357 | return err; | ||
358 | |||
359 | nvgpu_speculation_barrier(); | ||
360 | switch (cmd) { | ||
361 | case NVGPU_AS_IOCTL_BIND_CHANNEL: | ||
362 | trace_gk20a_as_ioctl_bind_channel(g->name); | ||
363 | err = gk20a_as_ioctl_bind_channel(as_share, | ||
364 | (struct nvgpu_as_bind_channel_args *)buf); | ||
365 | |||
366 | break; | ||
367 | case NVGPU32_AS_IOCTL_ALLOC_SPACE: | ||
368 | { | ||
369 | struct nvgpu32_as_alloc_space_args *args32 = | ||
370 | (struct nvgpu32_as_alloc_space_args *)buf; | ||
371 | struct nvgpu_as_alloc_space_args args; | ||
372 | |||
373 | args.pages = args32->pages; | ||
374 | args.page_size = args32->page_size; | ||
375 | args.flags = args32->flags; | ||
376 | args.o_a.offset = args32->o_a.offset; | ||
377 | trace_gk20a_as_ioctl_alloc_space(g->name); | ||
378 | err = gk20a_as_ioctl_alloc_space(as_share, &args); | ||
379 | args32->o_a.offset = args.o_a.offset; | ||
380 | break; | ||
381 | } | ||
382 | case NVGPU_AS_IOCTL_ALLOC_SPACE: | ||
383 | trace_gk20a_as_ioctl_alloc_space(g->name); | ||
384 | err = gk20a_as_ioctl_alloc_space(as_share, | ||
385 | (struct nvgpu_as_alloc_space_args *)buf); | ||
386 | break; | ||
387 | case NVGPU_AS_IOCTL_FREE_SPACE: | ||
388 | trace_gk20a_as_ioctl_free_space(g->name); | ||
389 | err = gk20a_as_ioctl_free_space(as_share, | ||
390 | (struct nvgpu_as_free_space_args *)buf); | ||
391 | break; | ||
392 | case NVGPU_AS_IOCTL_MAP_BUFFER_EX: | ||
393 | trace_gk20a_as_ioctl_map_buffer(g->name); | ||
394 | err = gk20a_as_ioctl_map_buffer_ex(as_share, | ||
395 | (struct nvgpu_as_map_buffer_ex_args *)buf); | ||
396 | break; | ||
397 | case NVGPU_AS_IOCTL_UNMAP_BUFFER: | ||
398 | trace_gk20a_as_ioctl_unmap_buffer(g->name); | ||
399 | err = gk20a_as_ioctl_unmap_buffer(as_share, | ||
400 | (struct nvgpu_as_unmap_buffer_args *)buf); | ||
401 | break; | ||
402 | case NVGPU_AS_IOCTL_GET_VA_REGIONS: | ||
403 | trace_gk20a_as_ioctl_get_va_regions(g->name); | ||
404 | err = gk20a_as_ioctl_get_va_regions(as_share, | ||
405 | (struct nvgpu_as_get_va_regions_args *)buf); | ||
406 | break; | ||
407 | case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: | ||
408 | err = gk20a_as_ioctl_map_buffer_batch(as_share, | ||
409 | (struct nvgpu_as_map_buffer_batch_args *)buf); | ||
410 | break; | ||
411 | case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP: | ||
412 | err = nvgpu_as_ioctl_get_sync_ro_map(as_share, | ||
413 | (struct nvgpu_as_get_sync_ro_map_args *)buf); | ||
414 | break; | ||
415 | default: | ||
416 | err = -ENOTTY; | ||
417 | break; | ||
418 | } | ||
419 | |||
420 | gk20a_idle(g); | ||
421 | |||
422 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
423 | if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) | ||
424 | err = -EFAULT; | ||
425 | |||
426 | return err; | ||
427 | } | ||
diff --git a/include/os/linux/ioctl_as.h b/include/os/linux/ioctl_as.h new file mode 100644 index 0000000..b3de378 --- /dev/null +++ b/include/os/linux/ioctl_as.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * GK20A Address Spaces | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | #ifndef __NVGPU_COMMON_LINUX_AS_H__ | ||
16 | #define __NVGPU_COMMON_LINUX_AS_H__ | ||
17 | |||
18 | struct inode; | ||
19 | struct file; | ||
20 | |||
21 | /* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and | ||
22 | * num_maps */ | ||
23 | #define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256 | ||
24 | |||
25 | /* struct file_operations driver interface */ | ||
26 | int gk20a_as_dev_open(struct inode *inode, struct file *filp); | ||
27 | int gk20a_as_dev_release(struct inode *inode, struct file *filp); | ||
28 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
29 | |||
30 | #endif | ||
diff --git a/include/os/linux/ioctl_channel.c b/include/os/linux/ioctl_channel.c new file mode 100644 index 0000000..0f39cc7 --- /dev/null +++ b/include/os/linux/ioctl_channel.c | |||
@@ -0,0 +1,1388 @@ | |||
1 | /* | ||
2 | * GK20A Graphics channel | ||
3 | * | ||
4 | * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <trace/events/gk20a.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/anon_inodes.h> | ||
22 | #include <linux/dma-buf.h> | ||
23 | #include <linux/poll.h> | ||
24 | #include <uapi/linux/nvgpu.h> | ||
25 | |||
26 | #include <nvgpu/semaphore.h> | ||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/kmem.h> | ||
29 | #include <nvgpu/log.h> | ||
30 | #include <nvgpu/list.h> | ||
31 | #include <nvgpu/debug.h> | ||
32 | #include <nvgpu/enabled.h> | ||
33 | #include <nvgpu/error_notifier.h> | ||
34 | #include <nvgpu/barrier.h> | ||
35 | #include <nvgpu/nvhost.h> | ||
36 | #include <nvgpu/os_sched.h> | ||
37 | #include <nvgpu/gk20a.h> | ||
38 | #include <nvgpu/channel.h> | ||
39 | #include <nvgpu/channel_sync.h> | ||
40 | |||
41 | #include "gk20a/dbg_gpu_gk20a.h" | ||
42 | #include "gk20a/fence_gk20a.h" | ||
43 | |||
44 | #include "platform_gk20a.h" | ||
45 | #include "ioctl_channel.h" | ||
46 | #include "channel.h" | ||
47 | #include "os_linux.h" | ||
48 | #include "ctxsw_trace.h" | ||
49 | |||
50 | /* the minimal size of client buffer */ | ||
51 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | ||
52 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | ||
53 | sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) | ||
54 | |||
55 | static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) | ||
56 | { | ||
57 | switch (graphics_preempt_mode) { | ||
58 | case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: | ||
59 | return "WFI"; | ||
60 | default: | ||
61 | return "?"; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode) | ||
66 | { | ||
67 | switch (compute_preempt_mode) { | ||
68 | case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: | ||
69 | return "WFI"; | ||
70 | case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: | ||
71 | return "CTA"; | ||
72 | default: | ||
73 | return "?"; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | static void gk20a_channel_trace_sched_param( | ||
78 | void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice, | ||
79 | u32 timeout, const char *interleave, | ||
80 | const char *graphics_preempt_mode, | ||
81 | const char *compute_preempt_mode), | ||
82 | struct channel_gk20a *ch) | ||
83 | { | ||
84 | struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); | ||
85 | |||
86 | if (!tsg) | ||
87 | return; | ||
88 | |||
89 | (trace)(ch->chid, ch->tsgid, ch->pid, | ||
90 | tsg_gk20a_from_ch(ch)->timeslice_us, | ||
91 | ch->timeout_ms_max, | ||
92 | gk20a_fifo_interleave_level_name(tsg->interleave_level), | ||
93 | gr_gk20a_graphics_preempt_mode_name( | ||
94 | tsg->gr_ctx.graphics_preempt_mode), | ||
95 | gr_gk20a_compute_preempt_mode_name( | ||
96 | tsg->gr_ctx.compute_preempt_mode)); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Although channels do have pointers back to the gk20a struct that they were | ||
101 | * created under in cases where the driver is killed that pointer can be bad. | ||
102 | * The channel memory can be freed before the release() function for a given | ||
103 | * channel is called. This happens when the driver dies and userspace doesn't | ||
104 | * get a chance to call release() until after the entire gk20a driver data is | ||
105 | * unloaded and freed. | ||
106 | */ | ||
107 | struct channel_priv { | ||
108 | struct gk20a *g; | ||
109 | struct channel_gk20a *c; | ||
110 | }; | ||
111 | |||
112 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
113 | |||
114 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) | ||
115 | { | ||
116 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
117 | |||
118 | /* disable existing cyclestats buffer */ | ||
119 | nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); | ||
120 | if (priv->cyclestate_buffer_handler) { | ||
121 | dma_buf_vunmap(priv->cyclestate_buffer_handler, | ||
122 | ch->cyclestate.cyclestate_buffer); | ||
123 | dma_buf_put(priv->cyclestate_buffer_handler); | ||
124 | priv->cyclestate_buffer_handler = NULL; | ||
125 | ch->cyclestate.cyclestate_buffer = NULL; | ||
126 | ch->cyclestate.cyclestate_buffer_size = 0; | ||
127 | } | ||
128 | nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); | ||
129 | } | ||
130 | |||
131 | int gk20a_channel_cycle_stats(struct channel_gk20a *ch, int dmabuf_fd) | ||
132 | { | ||
133 | struct dma_buf *dmabuf; | ||
134 | void *virtual_address; | ||
135 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
136 | |||
137 | /* is it allowed to handle calls for current GPU? */ | ||
138 | if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS)) | ||
139 | return -ENOSYS; | ||
140 | |||
141 | if (dmabuf_fd && !priv->cyclestate_buffer_handler) { | ||
142 | |||
143 | /* set up new cyclestats buffer */ | ||
144 | dmabuf = dma_buf_get(dmabuf_fd); | ||
145 | if (IS_ERR(dmabuf)) | ||
146 | return PTR_ERR(dmabuf); | ||
147 | virtual_address = dma_buf_vmap(dmabuf); | ||
148 | if (!virtual_address) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | priv->cyclestate_buffer_handler = dmabuf; | ||
152 | ch->cyclestate.cyclestate_buffer = virtual_address; | ||
153 | ch->cyclestate.cyclestate_buffer_size = dmabuf->size; | ||
154 | return 0; | ||
155 | |||
156 | } else if (!dmabuf_fd && priv->cyclestate_buffer_handler) { | ||
157 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
158 | return 0; | ||
159 | |||
160 | } else if (!dmabuf_fd && !priv->cyclestate_buffer_handler) { | ||
161 | /* no request from GL */ | ||
162 | return 0; | ||
163 | |||
164 | } else { | ||
165 | pr_err("channel already has cyclestats buffer\n"); | ||
166 | return -EINVAL; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
171 | { | ||
172 | int ret; | ||
173 | |||
174 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
175 | if (ch->cs_client) | ||
176 | ret = gr_gk20a_css_flush(ch, ch->cs_client); | ||
177 | else | ||
178 | ret = -EBADF; | ||
179 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
180 | |||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
185 | u32 dmabuf_fd, | ||
186 | u32 perfmon_id_count, | ||
187 | u32 *perfmon_id_start) | ||
188 | { | ||
189 | int ret = 0; | ||
190 | struct gk20a *g = ch->g; | ||
191 | struct gk20a_cs_snapshot_client_linux *client_linux; | ||
192 | struct gk20a_cs_snapshot_client *client; | ||
193 | |||
194 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
195 | if (ch->cs_client) { | ||
196 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
197 | return -EEXIST; | ||
198 | } | ||
199 | |||
200 | client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); | ||
201 | if (!client_linux) { | ||
202 | ret = -ENOMEM; | ||
203 | goto err; | ||
204 | } | ||
205 | |||
206 | client_linux->dmabuf_fd = dmabuf_fd; | ||
207 | client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); | ||
208 | if (IS_ERR(client_linux->dma_handler)) { | ||
209 | ret = PTR_ERR(client_linux->dma_handler); | ||
210 | client_linux->dma_handler = NULL; | ||
211 | goto err_free; | ||
212 | } | ||
213 | |||
214 | client = &client_linux->cs_client; | ||
215 | client->snapshot_size = client_linux->dma_handler->size; | ||
216 | if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { | ||
217 | ret = -ENOMEM; | ||
218 | goto err_put; | ||
219 | } | ||
220 | |||
221 | client->snapshot = (struct gk20a_cs_snapshot_fifo *) | ||
222 | dma_buf_vmap(client_linux->dma_handler); | ||
223 | if (!client->snapshot) { | ||
224 | ret = -ENOMEM; | ||
225 | goto err_put; | ||
226 | } | ||
227 | |||
228 | ch->cs_client = client; | ||
229 | |||
230 | ret = gr_gk20a_css_attach(ch, | ||
231 | perfmon_id_count, | ||
232 | perfmon_id_start, | ||
233 | ch->cs_client); | ||
234 | |||
235 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
236 | |||
237 | return ret; | ||
238 | |||
239 | err_put: | ||
240 | dma_buf_put(client_linux->dma_handler); | ||
241 | err_free: | ||
242 | nvgpu_kfree(g, client_linux); | ||
243 | err: | ||
244 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
249 | { | ||
250 | int ret; | ||
251 | struct gk20a_cs_snapshot_client_linux *client_linux; | ||
252 | |||
253 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
254 | if (!ch->cs_client) { | ||
255 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | client_linux = container_of(ch->cs_client, | ||
260 | struct gk20a_cs_snapshot_client_linux, | ||
261 | cs_client); | ||
262 | |||
263 | ret = gr_gk20a_css_detach(ch, ch->cs_client); | ||
264 | |||
265 | if (client_linux->dma_handler) { | ||
266 | if (ch->cs_client->snapshot) | ||
267 | dma_buf_vunmap(client_linux->dma_handler, | ||
268 | ch->cs_client->snapshot); | ||
269 | dma_buf_put(client_linux->dma_handler); | ||
270 | } | ||
271 | |||
272 | ch->cs_client = NULL; | ||
273 | nvgpu_kfree(ch->g, client_linux); | ||
274 | |||
275 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
276 | |||
277 | return ret; | ||
278 | } | ||
279 | #endif | ||
280 | |||
281 | static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, | ||
282 | struct nvgpu_channel_wdt_args *args) | ||
283 | { | ||
284 | u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT | | ||
285 | NVGPU_IOCTL_CHANNEL_ENABLE_WDT); | ||
286 | |||
287 | if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT) | ||
288 | ch->timeout.enabled = false; | ||
289 | else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT) | ||
290 | ch->timeout.enabled = true; | ||
291 | else | ||
292 | return -EINVAL; | ||
293 | |||
294 | if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT) | ||
295 | ch->timeout.limit_ms = args->timeout_ms; | ||
296 | |||
297 | ch->timeout.debug_dump = (args->wdt_status & | ||
298 | NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0; | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) | ||
304 | { | ||
305 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
306 | |||
307 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
308 | if (priv->error_notifier.dmabuf) { | ||
309 | dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); | ||
310 | dma_buf_put(priv->error_notifier.dmabuf); | ||
311 | priv->error_notifier.dmabuf = NULL; | ||
312 | priv->error_notifier.notification = NULL; | ||
313 | priv->error_notifier.vaddr = NULL; | ||
314 | } | ||
315 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
316 | } | ||
317 | |||
318 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | ||
319 | struct nvgpu_set_error_notifier *args) | ||
320 | { | ||
321 | struct dma_buf *dmabuf; | ||
322 | void *va; | ||
323 | u64 end = args->offset + sizeof(struct nvgpu_notification); | ||
324 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
325 | |||
326 | if (!args->mem) { | ||
327 | pr_err("gk20a_init_error_notifier: invalid memory handle\n"); | ||
328 | return -EINVAL; | ||
329 | } | ||
330 | |||
331 | dmabuf = dma_buf_get(args->mem); | ||
332 | |||
333 | gk20a_channel_free_error_notifiers(ch); | ||
334 | |||
335 | if (IS_ERR(dmabuf)) { | ||
336 | pr_err("Invalid handle: %d\n", args->mem); | ||
337 | return -EINVAL; | ||
338 | } | ||
339 | |||
340 | if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { | ||
341 | dma_buf_put(dmabuf); | ||
342 | nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset"); | ||
343 | return -EINVAL; | ||
344 | } | ||
345 | |||
346 | nvgpu_speculation_barrier(); | ||
347 | |||
348 | /* map handle */ | ||
349 | va = dma_buf_vmap(dmabuf); | ||
350 | if (!va) { | ||
351 | dma_buf_put(dmabuf); | ||
352 | pr_err("Cannot map notifier handle\n"); | ||
353 | return -ENOMEM; | ||
354 | } | ||
355 | |||
356 | priv->error_notifier.notification = va + args->offset; | ||
357 | priv->error_notifier.vaddr = va; | ||
358 | memset(priv->error_notifier.notification, 0, | ||
359 | sizeof(struct nvgpu_notification)); | ||
360 | |||
361 | /* set channel notifiers pointer */ | ||
362 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
363 | priv->error_notifier.dmabuf = dmabuf; | ||
364 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
365 | |||
366 | return 0; | ||
367 | } | ||
368 | |||
369 | /* | ||
370 | * This returns the channel with a reference. The caller must | ||
371 | * gk20a_channel_put() the ref back after use. | ||
372 | * | ||
373 | * NULL is returned if the channel was not found. | ||
374 | */ | ||
375 | struct channel_gk20a *gk20a_get_channel_from_file(int fd) | ||
376 | { | ||
377 | struct channel_gk20a *ch; | ||
378 | struct channel_priv *priv; | ||
379 | struct file *f = fget(fd); | ||
380 | |||
381 | if (!f) | ||
382 | return NULL; | ||
383 | |||
384 | if (f->f_op != &gk20a_channel_ops) { | ||
385 | fput(f); | ||
386 | return NULL; | ||
387 | } | ||
388 | |||
389 | priv = (struct channel_priv *)f->private_data; | ||
390 | ch = gk20a_channel_get(priv->c); | ||
391 | fput(f); | ||
392 | return ch; | ||
393 | } | ||
394 | |||
395 | int gk20a_channel_release(struct inode *inode, struct file *filp) | ||
396 | { | ||
397 | struct channel_priv *priv = filp->private_data; | ||
398 | struct channel_gk20a *ch; | ||
399 | struct gk20a *g; | ||
400 | |||
401 | int err; | ||
402 | |||
403 | /* We could still end up here even if the channel_open failed, e.g. | ||
404 | * if we ran out of hw channel IDs. | ||
405 | */ | ||
406 | if (!priv) | ||
407 | return 0; | ||
408 | |||
409 | ch = priv->c; | ||
410 | g = priv->g; | ||
411 | |||
412 | err = gk20a_busy(g); | ||
413 | if (err) { | ||
414 | nvgpu_err(g, "failed to release a channel!"); | ||
415 | goto channel_release; | ||
416 | } | ||
417 | |||
418 | trace_gk20a_channel_release(dev_name(dev_from_gk20a(g))); | ||
419 | |||
420 | gk20a_channel_close(ch); | ||
421 | gk20a_channel_free_error_notifiers(ch); | ||
422 | |||
423 | gk20a_idle(g); | ||
424 | |||
425 | channel_release: | ||
426 | gk20a_put(g); | ||
427 | nvgpu_kfree(g, filp->private_data); | ||
428 | filp->private_data = NULL; | ||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | /* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */ | ||
433 | static int __gk20a_channel_open(struct gk20a *g, | ||
434 | struct file *filp, s32 runlist_id) | ||
435 | { | ||
436 | int err; | ||
437 | struct channel_gk20a *ch; | ||
438 | struct channel_priv *priv; | ||
439 | |||
440 | nvgpu_log_fn(g, " "); | ||
441 | |||
442 | g = gk20a_get(g); | ||
443 | if (!g) | ||
444 | return -ENODEV; | ||
445 | |||
446 | trace_gk20a_channel_open(dev_name(dev_from_gk20a(g))); | ||
447 | |||
448 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
449 | if (!priv) { | ||
450 | err = -ENOMEM; | ||
451 | goto free_ref; | ||
452 | } | ||
453 | |||
454 | err = gk20a_busy(g); | ||
455 | if (err) { | ||
456 | nvgpu_err(g, "failed to power on, %d", err); | ||
457 | goto fail_busy; | ||
458 | } | ||
459 | /* All the user space channel should be non privilege */ | ||
460 | ch = gk20a_open_new_channel(g, runlist_id, false, | ||
461 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
462 | gk20a_idle(g); | ||
463 | if (!ch) { | ||
464 | nvgpu_err(g, | ||
465 | "failed to get f"); | ||
466 | err = -ENOMEM; | ||
467 | goto fail_busy; | ||
468 | } | ||
469 | |||
470 | gk20a_channel_trace_sched_param( | ||
471 | trace_gk20a_channel_sched_defaults, ch); | ||
472 | |||
473 | priv->g = g; | ||
474 | priv->c = ch; | ||
475 | |||
476 | filp->private_data = priv; | ||
477 | return 0; | ||
478 | |||
479 | fail_busy: | ||
480 | nvgpu_kfree(g, priv); | ||
481 | free_ref: | ||
482 | gk20a_put(g); | ||
483 | return err; | ||
484 | } | ||
485 | |||
486 | int gk20a_channel_open(struct inode *inode, struct file *filp) | ||
487 | { | ||
488 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
489 | struct nvgpu_os_linux, channel.cdev); | ||
490 | struct gk20a *g = &l->g; | ||
491 | int ret; | ||
492 | |||
493 | nvgpu_log_fn(g, "start"); | ||
494 | ret = __gk20a_channel_open(g, filp, -1); | ||
495 | |||
496 | nvgpu_log_fn(g, "end"); | ||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | int gk20a_channel_open_ioctl(struct gk20a *g, | ||
501 | struct nvgpu_channel_open_args *args) | ||
502 | { | ||
503 | int err; | ||
504 | int fd; | ||
505 | struct file *file; | ||
506 | char name[64]; | ||
507 | s32 runlist_id = args->in.runlist_id; | ||
508 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
509 | |||
510 | err = get_unused_fd_flags(O_RDWR); | ||
511 | if (err < 0) | ||
512 | return err; | ||
513 | fd = err; | ||
514 | |||
515 | snprintf(name, sizeof(name), "nvhost-%s-fd%d", | ||
516 | dev_name(dev_from_gk20a(g)), fd); | ||
517 | |||
518 | file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR); | ||
519 | if (IS_ERR(file)) { | ||
520 | err = PTR_ERR(file); | ||
521 | goto clean_up; | ||
522 | } | ||
523 | |||
524 | err = __gk20a_channel_open(g, file, runlist_id); | ||
525 | if (err) | ||
526 | goto clean_up_file; | ||
527 | |||
528 | fd_install(fd, file); | ||
529 | args->out.channel_fd = fd; | ||
530 | return 0; | ||
531 | |||
532 | clean_up_file: | ||
533 | fput(file); | ||
534 | clean_up: | ||
535 | put_unused_fd(fd); | ||
536 | return err; | ||
537 | } | ||
538 | |||
539 | static u32 nvgpu_setup_bind_user_flags_to_common_flags(u32 user_flags) | ||
540 | { | ||
541 | u32 flags = 0; | ||
542 | |||
543 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_VPR_ENABLED) | ||
544 | flags |= NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR; | ||
545 | |||
546 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_DETERMINISTIC) | ||
547 | flags |= NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC; | ||
548 | |||
549 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) | ||
550 | flags |= NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE; | ||
551 | |||
552 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT) | ||
553 | flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT; | ||
554 | |||
555 | return flags; | ||
556 | } | ||
557 | |||
558 | static void nvgpu_get_setup_bind_args( | ||
559 | struct nvgpu_channel_setup_bind_args *channel_setup_bind_args, | ||
560 | struct nvgpu_setup_bind_args *setup_bind_args) | ||
561 | { | ||
562 | setup_bind_args->num_gpfifo_entries = | ||
563 | channel_setup_bind_args->num_gpfifo_entries; | ||
564 | setup_bind_args->num_inflight_jobs = | ||
565 | channel_setup_bind_args->num_inflight_jobs; | ||
566 | setup_bind_args->userd_dmabuf_fd = | ||
567 | channel_setup_bind_args->userd_dmabuf_fd; | ||
568 | setup_bind_args->userd_dmabuf_offset = | ||
569 | channel_setup_bind_args->userd_dmabuf_offset; | ||
570 | setup_bind_args->gpfifo_dmabuf_fd = | ||
571 | channel_setup_bind_args->gpfifo_dmabuf_fd; | ||
572 | setup_bind_args->gpfifo_dmabuf_offset = | ||
573 | channel_setup_bind_args->gpfifo_dmabuf_offset; | ||
574 | setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags( | ||
575 | channel_setup_bind_args->flags); | ||
576 | } | ||
577 | |||
578 | static void nvgpu_get_gpfifo_ex_args( | ||
579 | struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args, | ||
580 | struct nvgpu_setup_bind_args *setup_bind_args) | ||
581 | { | ||
582 | setup_bind_args->num_gpfifo_entries = alloc_gpfifo_ex_args->num_entries; | ||
583 | setup_bind_args->num_inflight_jobs = | ||
584 | alloc_gpfifo_ex_args->num_inflight_jobs; | ||
585 | setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags( | ||
586 | alloc_gpfifo_ex_args->flags); | ||
587 | } | ||
588 | |||
589 | static void nvgpu_get_gpfifo_args( | ||
590 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args, | ||
591 | struct nvgpu_setup_bind_args *setup_bind_args) | ||
592 | { | ||
593 | /* | ||
594 | * Kernel can insert one extra gpfifo entry before user | ||
595 | * submitted gpfifos and another one after, for internal usage. | ||
596 | * Triple the requested size. | ||
597 | */ | ||
598 | setup_bind_args->num_gpfifo_entries = | ||
599 | alloc_gpfifo_args->num_entries * 3; | ||
600 | setup_bind_args->num_inflight_jobs = 0; | ||
601 | setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags( | ||
602 | alloc_gpfifo_args->flags); | ||
603 | } | ||
604 | |||
605 | static void nvgpu_get_fence_args( | ||
606 | struct nvgpu_fence *fence_args_in, | ||
607 | struct nvgpu_channel_fence *fence_args_out) | ||
608 | { | ||
609 | fence_args_out->id = fence_args_in->id; | ||
610 | fence_args_out->value = fence_args_in->value; | ||
611 | } | ||
612 | |||
613 | static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, | ||
614 | ulong id, u32 offset, | ||
615 | u32 payload, u32 timeout) | ||
616 | { | ||
617 | struct dma_buf *dmabuf; | ||
618 | void *data; | ||
619 | u32 *semaphore; | ||
620 | int ret = 0; | ||
621 | |||
622 | /* do not wait if channel has timed out */ | ||
623 | if (gk20a_channel_check_timedout(ch)) { | ||
624 | return -ETIMEDOUT; | ||
625 | } | ||
626 | |||
627 | dmabuf = dma_buf_get(id); | ||
628 | if (IS_ERR(dmabuf)) { | ||
629 | nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id); | ||
630 | return -EINVAL; | ||
631 | } | ||
632 | |||
633 | data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); | ||
634 | if (!data) { | ||
635 | nvgpu_err(ch->g, "failed to map notifier memory"); | ||
636 | ret = -EINVAL; | ||
637 | goto cleanup_put; | ||
638 | } | ||
639 | |||
640 | semaphore = data + (offset & ~PAGE_MASK); | ||
641 | |||
642 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
643 | &ch->semaphore_wq, | ||
644 | *semaphore == payload || | ||
645 | gk20a_channel_check_timedout(ch), | ||
646 | timeout); | ||
647 | |||
648 | dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); | ||
649 | cleanup_put: | ||
650 | dma_buf_put(dmabuf); | ||
651 | return ret; | ||
652 | } | ||
653 | |||
654 | static int gk20a_channel_wait(struct channel_gk20a *ch, | ||
655 | struct nvgpu_wait_args *args) | ||
656 | { | ||
657 | struct dma_buf *dmabuf; | ||
658 | struct gk20a *g = ch->g; | ||
659 | struct notification *notif; | ||
660 | struct timespec tv; | ||
661 | u64 jiffies; | ||
662 | ulong id; | ||
663 | u32 offset; | ||
664 | int remain, ret = 0; | ||
665 | u64 end; | ||
666 | |||
667 | nvgpu_log_fn(g, " "); | ||
668 | |||
669 | if (gk20a_channel_check_timedout(ch)) { | ||
670 | return -ETIMEDOUT; | ||
671 | } | ||
672 | |||
673 | switch (args->type) { | ||
674 | case NVGPU_WAIT_TYPE_NOTIFIER: | ||
675 | id = args->condition.notifier.dmabuf_fd; | ||
676 | offset = args->condition.notifier.offset; | ||
677 | end = offset + sizeof(struct notification); | ||
678 | |||
679 | dmabuf = dma_buf_get(id); | ||
680 | if (IS_ERR(dmabuf)) { | ||
681 | nvgpu_err(g, "invalid notifier nvmap handle 0x%lx", | ||
682 | id); | ||
683 | return -EINVAL; | ||
684 | } | ||
685 | |||
686 | if (end > dmabuf->size || end < sizeof(struct notification)) { | ||
687 | dma_buf_put(dmabuf); | ||
688 | nvgpu_err(g, "invalid notifier offset"); | ||
689 | return -EINVAL; | ||
690 | } | ||
691 | |||
692 | nvgpu_speculation_barrier(); | ||
693 | |||
694 | notif = dma_buf_vmap(dmabuf); | ||
695 | if (!notif) { | ||
696 | nvgpu_err(g, "failed to map notifier memory"); | ||
697 | return -ENOMEM; | ||
698 | } | ||
699 | |||
700 | notif = (struct notification *)((uintptr_t)notif + offset); | ||
701 | |||
702 | /* user should set status pending before | ||
703 | * calling this ioctl */ | ||
704 | remain = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
705 | &ch->notifier_wq, | ||
706 | notif->status == 0 || | ||
707 | gk20a_channel_check_timedout(ch), | ||
708 | args->timeout); | ||
709 | |||
710 | if (remain == 0 && notif->status != 0) { | ||
711 | ret = -ETIMEDOUT; | ||
712 | goto notif_clean_up; | ||
713 | } else if (remain < 0) { | ||
714 | ret = -EINTR; | ||
715 | goto notif_clean_up; | ||
716 | } | ||
717 | |||
718 | /* TBD: fill in correct information */ | ||
719 | jiffies = get_jiffies_64(); | ||
720 | jiffies_to_timespec(jiffies, &tv); | ||
721 | notif->timestamp.nanoseconds[0] = tv.tv_nsec; | ||
722 | notif->timestamp.nanoseconds[1] = tv.tv_sec; | ||
723 | notif->info32 = 0xDEADBEEF; /* should be object name */ | ||
724 | notif->info16 = ch->chid; /* should be method offset */ | ||
725 | |||
726 | notif_clean_up: | ||
727 | dma_buf_vunmap(dmabuf, notif); | ||
728 | return ret; | ||
729 | |||
730 | case NVGPU_WAIT_TYPE_SEMAPHORE: | ||
731 | ret = gk20a_channel_wait_semaphore(ch, | ||
732 | args->condition.semaphore.dmabuf_fd, | ||
733 | args->condition.semaphore.offset, | ||
734 | args->condition.semaphore.payload, | ||
735 | args->timeout); | ||
736 | |||
737 | break; | ||
738 | |||
739 | default: | ||
740 | ret = -EINVAL; | ||
741 | break; | ||
742 | } | ||
743 | |||
744 | return ret; | ||
745 | } | ||
746 | |||
747 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | ||
748 | struct nvgpu_zcull_bind_args *args) | ||
749 | { | ||
750 | struct gk20a *g = ch->g; | ||
751 | struct gr_gk20a *gr = &g->gr; | ||
752 | |||
753 | nvgpu_log_fn(gr->g, " "); | ||
754 | |||
755 | return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, | ||
756 | args->gpu_va, args->mode); | ||
757 | } | ||
758 | |||
759 | static int gk20a_ioctl_channel_submit_gpfifo( | ||
760 | struct channel_gk20a *ch, | ||
761 | struct nvgpu_submit_gpfifo_args *args) | ||
762 | { | ||
763 | struct nvgpu_channel_fence fence; | ||
764 | struct gk20a_fence *fence_out; | ||
765 | struct fifo_profile_gk20a *profile = NULL; | ||
766 | u32 submit_flags = 0; | ||
767 | int fd = -1; | ||
768 | struct gk20a *g = ch->g; | ||
769 | struct nvgpu_gpfifo_userdata userdata; | ||
770 | |||
771 | int ret = 0; | ||
772 | nvgpu_log_fn(g, " "); | ||
773 | |||
774 | profile = gk20a_fifo_profile_acquire(ch->g); | ||
775 | gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); | ||
776 | |||
777 | if (gk20a_channel_check_timedout(ch)) { | ||
778 | return -ETIMEDOUT; | ||
779 | } | ||
780 | |||
781 | nvgpu_get_fence_args(&args->fence, &fence); | ||
782 | submit_flags = | ||
783 | nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); | ||
784 | |||
785 | /* Try and allocate an fd here*/ | ||
786 | if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
787 | && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { | ||
788 | fd = get_unused_fd_flags(O_RDWR); | ||
789 | if (fd < 0) | ||
790 | return fd; | ||
791 | } | ||
792 | |||
793 | userdata.entries = (struct nvgpu_gpfifo_entry __user *) | ||
794 | (uintptr_t)args->gpfifo; | ||
795 | userdata.context = NULL; | ||
796 | |||
797 | ret = nvgpu_submit_channel_gpfifo_user(ch, | ||
798 | userdata, args->num_entries, | ||
799 | submit_flags, &fence, &fence_out, profile); | ||
800 | |||
801 | if (ret) { | ||
802 | if (fd != -1) | ||
803 | put_unused_fd(fd); | ||
804 | goto clean_up; | ||
805 | } | ||
806 | |||
807 | /* Convert fence_out to something we can pass back to user space. */ | ||
808 | if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { | ||
809 | if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | ||
810 | ret = gk20a_fence_install_fd(fence_out, fd); | ||
811 | if (ret) | ||
812 | put_unused_fd(fd); | ||
813 | else | ||
814 | args->fence.id = fd; | ||
815 | } else { | ||
816 | args->fence.id = fence_out->syncpt_id; | ||
817 | args->fence.value = fence_out->syncpt_value; | ||
818 | } | ||
819 | } | ||
820 | gk20a_fence_put(fence_out); | ||
821 | |||
822 | gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT); | ||
823 | if (profile) | ||
824 | gk20a_fifo_profile_release(ch->g, profile); | ||
825 | |||
826 | clean_up: | ||
827 | return ret; | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_* | ||
832 | * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_* | ||
833 | */ | ||
834 | u32 nvgpu_get_common_runlist_level(u32 level) | ||
835 | { | ||
836 | nvgpu_speculation_barrier(); | ||
837 | switch (level) { | ||
838 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: | ||
839 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | ||
840 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | ||
841 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM; | ||
842 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: | ||
843 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH; | ||
844 | default: | ||
845 | pr_err("%s: incorrect runlist level\n", __func__); | ||
846 | } | ||
847 | |||
848 | return level; | ||
849 | } | ||
850 | |||
851 | static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags) | ||
852 | { | ||
853 | u32 flags = 0; | ||
854 | |||
855 | if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) | ||
856 | flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP; | ||
857 | |||
858 | if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) | ||
859 | flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP; | ||
860 | |||
861 | return flags; | ||
862 | } | ||
863 | |||
864 | static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch, | ||
865 | u32 class_num, u32 user_flags) | ||
866 | { | ||
867 | return ch->g->ops.gr.alloc_obj_ctx(ch, class_num, | ||
868 | nvgpu_obj_ctx_user_flags_to_common_flags(user_flags)); | ||
869 | } | ||
870 | |||
871 | /* | ||
872 | * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
873 | * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
874 | */ | ||
875 | u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags) | ||
876 | { | ||
877 | u32 flags = 0; | ||
878 | |||
879 | if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI) | ||
880 | flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
881 | if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) | ||
882 | flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
883 | |||
884 | return flags; | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
889 | * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
890 | */ | ||
891 | u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags) | ||
892 | { | ||
893 | u32 flags = 0; | ||
894 | |||
895 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI) | ||
896 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
897 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA) | ||
898 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
899 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP) | ||
900 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
901 | |||
902 | return flags; | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
907 | * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
908 | */ | ||
909 | u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode) | ||
910 | { | ||
911 | switch (graphics_preempt_mode) { | ||
912 | case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: | ||
913 | return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
914 | case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: | ||
915 | return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
916 | } | ||
917 | |||
918 | return graphics_preempt_mode; | ||
919 | } | ||
920 | |||
921 | /* | ||
922 | * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
923 | * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
924 | */ | ||
925 | u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) | ||
926 | { | ||
927 | switch (compute_preempt_mode) { | ||
928 | case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: | ||
929 | return NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
930 | case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: | ||
931 | return NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
932 | case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: | ||
933 | return NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
934 | } | ||
935 | |||
936 | return compute_preempt_mode; | ||
937 | } | ||
938 | |||
939 | /* | ||
940 | * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
941 | * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
942 | */ | ||
943 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | ||
944 | { | ||
945 | nvgpu_speculation_barrier(); | ||
946 | switch (graphics_preempt_mode) { | ||
947 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: | ||
948 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
949 | case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP: | ||
950 | return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; | ||
951 | } | ||
952 | |||
953 | return graphics_preempt_mode; | ||
954 | } | ||
955 | |||
956 | /* | ||
957 | * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
958 | * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
959 | */ | ||
960 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) | ||
961 | { | ||
962 | nvgpu_speculation_barrier(); | ||
963 | switch (compute_preempt_mode) { | ||
964 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | ||
965 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; | ||
966 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: | ||
967 | return NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
968 | case NVGPU_COMPUTE_PREEMPTION_MODE_CILP: | ||
969 | return NVGPU_PREEMPTION_MODE_COMPUTE_CILP; | ||
970 | } | ||
971 | |||
972 | return compute_preempt_mode; | ||
973 | } | ||
974 | |||
975 | static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, | ||
976 | u32 graphics_preempt_mode, u32 compute_preempt_mode) | ||
977 | { | ||
978 | int err; | ||
979 | |||
980 | if (ch->g->ops.gr.set_preemption_mode) { | ||
981 | err = gk20a_busy(ch->g); | ||
982 | if (err) { | ||
983 | nvgpu_err(ch->g, "failed to power on, %d", err); | ||
984 | return err; | ||
985 | } | ||
986 | err = ch->g->ops.gr.set_preemption_mode(ch, | ||
987 | nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode), | ||
988 | nvgpu_get_common_compute_preempt_mode(compute_preempt_mode)); | ||
989 | gk20a_idle(ch->g); | ||
990 | } else { | ||
991 | err = -EINVAL; | ||
992 | } | ||
993 | |||
994 | return err; | ||
995 | } | ||
996 | |||
997 | static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, | ||
998 | struct nvgpu_get_user_syncpoint_args *args) | ||
999 | { | ||
1000 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
1001 | struct gk20a *g = ch->g; | ||
1002 | int err; | ||
1003 | |||
1004 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { | ||
1005 | nvgpu_err(g, "user syncpoints not supported"); | ||
1006 | return -EINVAL; | ||
1007 | } | ||
1008 | |||
1009 | if (!nvgpu_has_syncpoints(g)) { | ||
1010 | nvgpu_err(g, "syncpoints not supported"); | ||
1011 | return -EINVAL; | ||
1012 | } | ||
1013 | |||
1014 | if (g->aggressive_sync_destroy_thresh) { | ||
1015 | nvgpu_err(g, "sufficient syncpoints not available"); | ||
1016 | return -EINVAL; | ||
1017 | } | ||
1018 | |||
1019 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
1020 | if (ch->user_sync) { | ||
1021 | nvgpu_mutex_release(&ch->sync_lock); | ||
1022 | } else { | ||
1023 | ch->user_sync = nvgpu_channel_sync_create(ch, true); | ||
1024 | if (!ch->user_sync) { | ||
1025 | nvgpu_mutex_release(&ch->sync_lock); | ||
1026 | return -ENOMEM; | ||
1027 | } | ||
1028 | nvgpu_mutex_release(&ch->sync_lock); | ||
1029 | |||
1030 | if (g->ops.fifo.resetup_ramfc) { | ||
1031 | err = g->ops.fifo.resetup_ramfc(ch); | ||
1032 | if (err) | ||
1033 | return err; | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync); | ||
1038 | args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, | ||
1039 | args->syncpoint_id); | ||
1040 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) | ||
1041 | args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync); | ||
1042 | else | ||
1043 | args->gpu_va = 0; | ||
1044 | |||
1045 | return 0; | ||
1046 | #else | ||
1047 | return -EINVAL; | ||
1048 | #endif | ||
1049 | } | ||
1050 | |||
1051 | long gk20a_channel_ioctl(struct file *filp, | ||
1052 | unsigned int cmd, unsigned long arg) | ||
1053 | { | ||
1054 | struct channel_priv *priv = filp->private_data; | ||
1055 | struct channel_gk20a *ch = priv->c; | ||
1056 | struct device *dev = dev_from_gk20a(ch->g); | ||
1057 | u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0}; | ||
1058 | int err = 0; | ||
1059 | struct gk20a *g = ch->g; | ||
1060 | |||
1061 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
1062 | |||
1063 | if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || | ||
1064 | (_IOC_NR(cmd) == 0) || | ||
1065 | (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || | ||
1066 | (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) | ||
1067 | return -EINVAL; | ||
1068 | |||
1069 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1070 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
1071 | return -EFAULT; | ||
1072 | } | ||
1073 | |||
1074 | /* take a ref or return timeout if channel refs can't be taken */ | ||
1075 | ch = gk20a_channel_get(ch); | ||
1076 | if (!ch) | ||
1077 | return -ETIMEDOUT; | ||
1078 | |||
1079 | /* protect our sanity for threaded userspace - most of the channel is | ||
1080 | * not thread safe */ | ||
1081 | nvgpu_mutex_acquire(&ch->ioctl_lock); | ||
1082 | |||
1083 | /* this ioctl call keeps a ref to the file which keeps a ref to the | ||
1084 | * channel */ | ||
1085 | |||
1086 | nvgpu_speculation_barrier(); | ||
1087 | switch (cmd) { | ||
1088 | case NVGPU_IOCTL_CHANNEL_OPEN: | ||
1089 | err = gk20a_channel_open_ioctl(ch->g, | ||
1090 | (struct nvgpu_channel_open_args *)buf); | ||
1091 | break; | ||
1092 | case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: | ||
1093 | break; | ||
1094 | case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: | ||
1095 | { | ||
1096 | struct nvgpu_alloc_obj_ctx_args *args = | ||
1097 | (struct nvgpu_alloc_obj_ctx_args *)buf; | ||
1098 | |||
1099 | err = gk20a_busy(ch->g); | ||
1100 | if (err) { | ||
1101 | dev_err(dev, | ||
1102 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1103 | __func__, cmd); | ||
1104 | break; | ||
1105 | } | ||
1106 | err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags); | ||
1107 | gk20a_idle(ch->g); | ||
1108 | break; | ||
1109 | } | ||
1110 | case NVGPU_IOCTL_CHANNEL_SETUP_BIND: | ||
1111 | { | ||
1112 | struct nvgpu_channel_setup_bind_args *channel_setup_bind_args = | ||
1113 | (struct nvgpu_channel_setup_bind_args *)buf; | ||
1114 | struct nvgpu_setup_bind_args setup_bind_args; | ||
1115 | |||
1116 | nvgpu_get_setup_bind_args(channel_setup_bind_args, | ||
1117 | &setup_bind_args); | ||
1118 | |||
1119 | err = gk20a_busy(ch->g); | ||
1120 | if (err) { | ||
1121 | dev_err(dev, | ||
1122 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1123 | __func__, cmd); | ||
1124 | break; | ||
1125 | } | ||
1126 | |||
1127 | if (!is_power_of_2(setup_bind_args.num_gpfifo_entries)) { | ||
1128 | err = -EINVAL; | ||
1129 | gk20a_idle(ch->g); | ||
1130 | break; | ||
1131 | } | ||
1132 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
1133 | channel_setup_bind_args->work_submit_token = | ||
1134 | setup_bind_args.work_submit_token; | ||
1135 | gk20a_idle(ch->g); | ||
1136 | break; | ||
1137 | } | ||
1138 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: | ||
1139 | { | ||
1140 | struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args = | ||
1141 | (struct nvgpu_alloc_gpfifo_ex_args *)buf; | ||
1142 | struct nvgpu_setup_bind_args setup_bind_args; | ||
1143 | |||
1144 | nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &setup_bind_args); | ||
1145 | |||
1146 | err = gk20a_busy(ch->g); | ||
1147 | if (err) { | ||
1148 | dev_err(dev, | ||
1149 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1150 | __func__, cmd); | ||
1151 | break; | ||
1152 | } | ||
1153 | |||
1154 | if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { | ||
1155 | err = -EINVAL; | ||
1156 | gk20a_idle(ch->g); | ||
1157 | break; | ||
1158 | } | ||
1159 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
1160 | gk20a_idle(ch->g); | ||
1161 | break; | ||
1162 | } | ||
1163 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | ||
1164 | { | ||
1165 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = | ||
1166 | (struct nvgpu_alloc_gpfifo_args *)buf; | ||
1167 | struct nvgpu_setup_bind_args setup_bind_args; | ||
1168 | |||
1169 | nvgpu_get_gpfifo_args(alloc_gpfifo_args, &setup_bind_args); | ||
1170 | |||
1171 | err = gk20a_busy(ch->g); | ||
1172 | if (err) { | ||
1173 | dev_err(dev, | ||
1174 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1175 | __func__, cmd); | ||
1176 | break; | ||
1177 | } | ||
1178 | |||
1179 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
1180 | gk20a_idle(ch->g); | ||
1181 | break; | ||
1182 | } | ||
1183 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: | ||
1184 | err = gk20a_ioctl_channel_submit_gpfifo(ch, | ||
1185 | (struct nvgpu_submit_gpfifo_args *)buf); | ||
1186 | break; | ||
1187 | case NVGPU_IOCTL_CHANNEL_WAIT: | ||
1188 | err = gk20a_busy(ch->g); | ||
1189 | if (err) { | ||
1190 | dev_err(dev, | ||
1191 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1192 | __func__, cmd); | ||
1193 | break; | ||
1194 | } | ||
1195 | |||
1196 | /* waiting is thread-safe, not dropping this mutex could | ||
1197 | * deadlock in certain conditions */ | ||
1198 | nvgpu_mutex_release(&ch->ioctl_lock); | ||
1199 | |||
1200 | err = gk20a_channel_wait(ch, | ||
1201 | (struct nvgpu_wait_args *)buf); | ||
1202 | |||
1203 | nvgpu_mutex_acquire(&ch->ioctl_lock); | ||
1204 | |||
1205 | gk20a_idle(ch->g); | ||
1206 | break; | ||
1207 | case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: | ||
1208 | err = gk20a_busy(ch->g); | ||
1209 | if (err) { | ||
1210 | dev_err(dev, | ||
1211 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1212 | __func__, cmd); | ||
1213 | break; | ||
1214 | } | ||
1215 | err = gk20a_channel_zcull_bind(ch, | ||
1216 | (struct nvgpu_zcull_bind_args *)buf); | ||
1217 | gk20a_idle(ch->g); | ||
1218 | break; | ||
1219 | case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: | ||
1220 | err = gk20a_busy(ch->g); | ||
1221 | if (err) { | ||
1222 | dev_err(dev, | ||
1223 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1224 | __func__, cmd); | ||
1225 | break; | ||
1226 | } | ||
1227 | err = gk20a_init_error_notifier(ch, | ||
1228 | (struct nvgpu_set_error_notifier *)buf); | ||
1229 | gk20a_idle(ch->g); | ||
1230 | break; | ||
1231 | case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: | ||
1232 | { | ||
1233 | u32 timeout = | ||
1234 | (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; | ||
1235 | nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
1236 | timeout, ch->chid); | ||
1237 | ch->timeout_ms_max = timeout; | ||
1238 | gk20a_channel_trace_sched_param( | ||
1239 | trace_gk20a_channel_set_timeout, ch); | ||
1240 | break; | ||
1241 | } | ||
1242 | case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: | ||
1243 | { | ||
1244 | u32 timeout = | ||
1245 | (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; | ||
1246 | bool timeout_debug_dump = !((u32) | ||
1247 | ((struct nvgpu_set_timeout_ex_args *)buf)->flags & | ||
1248 | (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); | ||
1249 | nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
1250 | timeout, ch->chid); | ||
1251 | ch->timeout_ms_max = timeout; | ||
1252 | ch->timeout_debug_dump = timeout_debug_dump; | ||
1253 | gk20a_channel_trace_sched_param( | ||
1254 | trace_gk20a_channel_set_timeout, ch); | ||
1255 | break; | ||
1256 | } | ||
1257 | case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: | ||
1258 | ((struct nvgpu_get_param_args *)buf)->value = | ||
1259 | gk20a_channel_check_timedout(ch); | ||
1260 | break; | ||
1261 | case NVGPU_IOCTL_CHANNEL_ENABLE: | ||
1262 | err = gk20a_busy(ch->g); | ||
1263 | if (err) { | ||
1264 | dev_err(dev, | ||
1265 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1266 | __func__, cmd); | ||
1267 | break; | ||
1268 | } | ||
1269 | if (ch->g->ops.fifo.enable_channel) | ||
1270 | ch->g->ops.fifo.enable_channel(ch); | ||
1271 | else | ||
1272 | err = -ENOSYS; | ||
1273 | gk20a_idle(ch->g); | ||
1274 | break; | ||
1275 | case NVGPU_IOCTL_CHANNEL_DISABLE: | ||
1276 | err = gk20a_busy(ch->g); | ||
1277 | if (err) { | ||
1278 | dev_err(dev, | ||
1279 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1280 | __func__, cmd); | ||
1281 | break; | ||
1282 | } | ||
1283 | if (ch->g->ops.fifo.disable_channel) | ||
1284 | ch->g->ops.fifo.disable_channel(ch); | ||
1285 | else | ||
1286 | err = -ENOSYS; | ||
1287 | gk20a_idle(ch->g); | ||
1288 | break; | ||
1289 | case NVGPU_IOCTL_CHANNEL_PREEMPT: | ||
1290 | err = gk20a_busy(ch->g); | ||
1291 | if (err) { | ||
1292 | dev_err(dev, | ||
1293 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1294 | __func__, cmd); | ||
1295 | break; | ||
1296 | } | ||
1297 | err = gk20a_fifo_preempt(ch->g, ch); | ||
1298 | gk20a_idle(ch->g); | ||
1299 | break; | ||
1300 | case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: | ||
1301 | if (!capable(CAP_SYS_NICE)) { | ||
1302 | err = -EPERM; | ||
1303 | break; | ||
1304 | } | ||
1305 | if (!ch->g->ops.fifo.reschedule_runlist) { | ||
1306 | err = -ENOSYS; | ||
1307 | break; | ||
1308 | } | ||
1309 | err = gk20a_busy(ch->g); | ||
1310 | if (err) { | ||
1311 | dev_err(dev, | ||
1312 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1313 | __func__, cmd); | ||
1314 | break; | ||
1315 | } | ||
1316 | err = ch->g->ops.fifo.reschedule_runlist(ch, | ||
1317 | NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & | ||
1318 | ((struct nvgpu_reschedule_runlist_args *)buf)->flags); | ||
1319 | gk20a_idle(ch->g); | ||
1320 | break; | ||
1321 | case NVGPU_IOCTL_CHANNEL_FORCE_RESET: | ||
1322 | err = gk20a_busy(ch->g); | ||
1323 | if (err) { | ||
1324 | dev_err(dev, | ||
1325 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1326 | __func__, cmd); | ||
1327 | break; | ||
1328 | } | ||
1329 | err = ch->g->ops.fifo.force_reset_ch(ch, | ||
1330 | NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); | ||
1331 | gk20a_idle(ch->g); | ||
1332 | break; | ||
1333 | case NVGPU_IOCTL_CHANNEL_WDT: | ||
1334 | err = gk20a_channel_set_wdt_status(ch, | ||
1335 | (struct nvgpu_channel_wdt_args *)buf); | ||
1336 | break; | ||
1337 | case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE: | ||
1338 | err = nvgpu_ioctl_channel_set_preemption_mode(ch, | ||
1339 | ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode, | ||
1340 | ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode); | ||
1341 | break; | ||
1342 | case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: | ||
1343 | if (ch->g->ops.gr.set_boosted_ctx) { | ||
1344 | bool boost = | ||
1345 | ((struct nvgpu_boosted_ctx_args *)buf)->boost; | ||
1346 | |||
1347 | err = gk20a_busy(ch->g); | ||
1348 | if (err) { | ||
1349 | dev_err(dev, | ||
1350 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1351 | __func__, cmd); | ||
1352 | break; | ||
1353 | } | ||
1354 | err = ch->g->ops.gr.set_boosted_ctx(ch, boost); | ||
1355 | gk20a_idle(ch->g); | ||
1356 | } else { | ||
1357 | err = -EINVAL; | ||
1358 | } | ||
1359 | break; | ||
1360 | case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: | ||
1361 | err = gk20a_busy(ch->g); | ||
1362 | if (err) { | ||
1363 | dev_err(dev, | ||
1364 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1365 | __func__, cmd); | ||
1366 | break; | ||
1367 | } | ||
1368 | err = nvgpu_ioctl_channel_get_user_syncpoint(ch, | ||
1369 | (struct nvgpu_get_user_syncpoint_args *)buf); | ||
1370 | gk20a_idle(ch->g); | ||
1371 | break; | ||
1372 | default: | ||
1373 | dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); | ||
1374 | err = -ENOTTY; | ||
1375 | break; | ||
1376 | } | ||
1377 | |||
1378 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
1379 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
1380 | |||
1381 | nvgpu_mutex_release(&ch->ioctl_lock); | ||
1382 | |||
1383 | gk20a_channel_put(ch); | ||
1384 | |||
1385 | nvgpu_log_fn(g, "end"); | ||
1386 | |||
1387 | return err; | ||
1388 | } | ||
diff --git a/include/os/linux/ioctl_channel.h b/include/os/linux/ioctl_channel.h new file mode 100644 index 0000000..3e80289 --- /dev/null +++ b/include/os/linux/ioctl_channel.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef __NVGPU_IOCTL_CHANNEL_H__ | ||
14 | #define __NVGPU_IOCTL_CHANNEL_H__ | ||
15 | |||
16 | #include <linux/fs.h> | ||
17 | |||
18 | #include "gk20a/css_gr_gk20a.h" | ||
19 | |||
20 | struct inode; | ||
21 | struct file; | ||
22 | struct gk20a; | ||
23 | struct nvgpu_channel_open_args; | ||
24 | |||
25 | struct gk20a_cs_snapshot_client_linux { | ||
26 | struct gk20a_cs_snapshot_client cs_client; | ||
27 | |||
28 | u32 dmabuf_fd; | ||
29 | struct dma_buf *dma_handler; | ||
30 | }; | ||
31 | |||
32 | int gk20a_channel_open(struct inode *inode, struct file *filp); | ||
33 | int gk20a_channel_release(struct inode *inode, struct file *filp); | ||
34 | long gk20a_channel_ioctl(struct file *filp, | ||
35 | unsigned int cmd, unsigned long arg); | ||
36 | int gk20a_channel_open_ioctl(struct gk20a *g, | ||
37 | struct nvgpu_channel_open_args *args); | ||
38 | |||
39 | int gk20a_channel_cycle_stats(struct channel_gk20a *ch, int dmabuf_fd); | ||
40 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); | ||
41 | |||
42 | int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
43 | u32 dmabuf_fd, | ||
44 | u32 perfmon_id_count, | ||
45 | u32 *perfmon_id_start); | ||
46 | int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch); | ||
47 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); | ||
48 | |||
49 | extern const struct file_operations gk20a_channel_ops; | ||
50 | |||
51 | u32 nvgpu_get_common_runlist_level(u32 level); | ||
52 | |||
53 | u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags); | ||
54 | u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags); | ||
55 | u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode); | ||
56 | u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode); | ||
57 | #endif | ||
diff --git a/include/os/linux/ioctl_clk_arb.c b/include/os/linux/ioctl_clk_arb.c new file mode 100644 index 0000000..477222d --- /dev/null +++ b/include/os/linux/ioctl_clk_arb.c | |||
@@ -0,0 +1,574 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/cdev.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/anon_inodes.h> | ||
20 | #include <linux/uaccess.h> | ||
21 | #include <linux/poll.h> | ||
22 | #ifdef CONFIG_DEBUG_FS | ||
23 | #include <linux/debugfs.h> | ||
24 | #endif | ||
25 | #include <uapi/linux/nvgpu.h> | ||
26 | |||
27 | #include <nvgpu/bitops.h> | ||
28 | #include <nvgpu/lock.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/atomic.h> | ||
31 | #include <nvgpu/bug.h> | ||
32 | #include <nvgpu/kref.h> | ||
33 | #include <nvgpu/log.h> | ||
34 | #include <nvgpu/barrier.h> | ||
35 | #include <nvgpu/cond.h> | ||
36 | #include <nvgpu/list.h> | ||
37 | #include <nvgpu/clk_arb.h> | ||
38 | #include <nvgpu/gk20a.h> | ||
39 | |||
40 | #include "clk/clk.h" | ||
41 | #include "pstate/pstate.h" | ||
42 | #include "lpwr/lpwr.h" | ||
43 | #include "volt/volt.h" | ||
44 | |||
45 | #ifdef CONFIG_DEBUG_FS | ||
46 | #include "os_linux.h" | ||
47 | #endif | ||
48 | |||
49 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | ||
50 | struct file *filp) | ||
51 | { | ||
52 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
53 | struct nvgpu_clk_session *session = dev->session; | ||
54 | |||
55 | |||
56 | clk_arb_dbg(session->g, " "); | ||
57 | |||
58 | /* This is done to account for the extra refcount taken in | ||
59 | * nvgpu_clk_arb_commit_request_fd without events support in iGPU | ||
60 | */ | ||
61 | if (!session->g->clk_arb->clk_arb_events_supported) { | ||
62 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
63 | } | ||
64 | |||
65 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
66 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask) | ||
71 | { | ||
72 | unsigned int poll_mask = 0; | ||
73 | |||
74 | if (nvgpu_poll_mask & NVGPU_POLLIN) | ||
75 | poll_mask |= POLLIN; | ||
76 | if (nvgpu_poll_mask & NVGPU_POLLPRI) | ||
77 | poll_mask |= POLLPRI; | ||
78 | if (nvgpu_poll_mask & NVGPU_POLLOUT) | ||
79 | poll_mask |= POLLOUT; | ||
80 | if (nvgpu_poll_mask & NVGPU_POLLRDNORM) | ||
81 | poll_mask |= POLLRDNORM; | ||
82 | if (nvgpu_poll_mask & NVGPU_POLLHUP) | ||
83 | poll_mask |= POLLHUP; | ||
84 | |||
85 | return poll_mask; | ||
86 | } | ||
87 | |||
88 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) | ||
89 | { | ||
90 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
91 | |||
92 | clk_arb_dbg(dev->session->g, " "); | ||
93 | |||
94 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
95 | return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0)); | ||
96 | } | ||
97 | |||
98 | void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev) | ||
99 | { | ||
100 | nvgpu_cond_broadcast_interruptible(&dev->readout_wq); | ||
101 | } | ||
102 | |||
103 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | ||
104 | struct file *filp) | ||
105 | { | ||
106 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
107 | struct nvgpu_clk_session *session = dev->session; | ||
108 | struct nvgpu_clk_arb *arb; | ||
109 | |||
110 | arb = session->g->clk_arb; | ||
111 | |||
112 | clk_arb_dbg(session->g, " "); | ||
113 | |||
114 | if (arb) { | ||
115 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
116 | nvgpu_list_del(&dev->link); | ||
117 | nvgpu_spinlock_release(&arb->users_lock); | ||
118 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); | ||
119 | } | ||
120 | |||
121 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
122 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
123 | |||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event) | ||
128 | { | ||
129 | u32 nvgpu_gpu_event; | ||
130 | |||
131 | switch (nvgpu_event) { | ||
132 | case NVGPU_EVENT_VF_UPDATE: | ||
133 | nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE; | ||
134 | break; | ||
135 | case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE: | ||
136 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE; | ||
137 | break; | ||
138 | case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE: | ||
139 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE; | ||
140 | break; | ||
141 | case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED: | ||
142 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED; | ||
143 | break; | ||
144 | case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED: | ||
145 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED; | ||
146 | break; | ||
147 | case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD: | ||
148 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD; | ||
149 | break; | ||
150 | case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD: | ||
151 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD; | ||
152 | break; | ||
153 | case NVGPU_EVENT_ALARM_GPU_LOST: | ||
154 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST; | ||
155 | break; | ||
156 | default: | ||
157 | /* Control shouldn't come here */ | ||
158 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1; | ||
159 | break; | ||
160 | } | ||
161 | return nvgpu_gpu_event; | ||
162 | } | ||
163 | |||
164 | static inline u32 __pending_event(struct nvgpu_clk_dev *dev, | ||
165 | struct nvgpu_gpu_event_info *info) { | ||
166 | |||
167 | u32 tail, head; | ||
168 | u32 events = 0; | ||
169 | struct nvgpu_clk_notification *p_notif; | ||
170 | |||
171 | tail = nvgpu_atomic_read(&dev->queue.tail); | ||
172 | head = nvgpu_atomic_read(&dev->queue.head); | ||
173 | |||
174 | head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; | ||
175 | |||
176 | if (_WRAPGTEQ(tail, head) && info) { | ||
177 | head++; | ||
178 | p_notif = &dev->queue.notifications[head % dev->queue.size]; | ||
179 | events |= nvgpu_convert_gpu_event(p_notif->notification); | ||
180 | info->event_id = ffs(events) - 1; | ||
181 | info->timestamp = p_notif->timestamp; | ||
182 | nvgpu_atomic_set(&dev->queue.head, head); | ||
183 | } | ||
184 | |||
185 | return events; | ||
186 | } | ||
187 | |||
188 | static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, | ||
189 | size_t size, loff_t *off) | ||
190 | { | ||
191 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
192 | struct nvgpu_gpu_event_info info; | ||
193 | ssize_t err; | ||
194 | |||
195 | clk_arb_dbg(dev->session->g, | ||
196 | "filp=%p, buf=%p, size=%zu", filp, buf, size); | ||
197 | |||
198 | if ((size - *off) < sizeof(info)) | ||
199 | return 0; | ||
200 | |||
201 | memset(&info, 0, sizeof(info)); | ||
202 | /* Get the oldest event from the queue */ | ||
203 | while (!__pending_event(dev, &info)) { | ||
204 | if (filp->f_flags & O_NONBLOCK) | ||
205 | return -EAGAIN; | ||
206 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
207 | __pending_event(dev, &info), 0); | ||
208 | if (err) | ||
209 | return err; | ||
210 | if (info.timestamp) | ||
211 | break; | ||
212 | } | ||
213 | |||
214 | if (copy_to_user(buf + *off, &info, sizeof(info))) | ||
215 | return -EFAULT; | ||
216 | |||
217 | return sizeof(info); | ||
218 | } | ||
219 | |||
220 | static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, | ||
221 | struct nvgpu_gpu_set_event_filter_args *args) | ||
222 | { | ||
223 | struct gk20a *g = dev->session->g; | ||
224 | u32 mask; | ||
225 | |||
226 | nvgpu_log(g, gpu_dbg_fn, " "); | ||
227 | |||
228 | if (args->flags) | ||
229 | return -EINVAL; | ||
230 | |||
231 | if (args->size != 1) | ||
232 | return -EINVAL; | ||
233 | |||
234 | if (copy_from_user(&mask, (void __user *) args->buffer, | ||
235 | args->size * sizeof(u32))) | ||
236 | return -EFAULT; | ||
237 | |||
238 | /* update alarm mask */ | ||
239 | nvgpu_atomic_set(&dev->enabled_mask, mask); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, | ||
245 | unsigned long arg) | ||
246 | { | ||
247 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
248 | struct gk20a *g = dev->session->g; | ||
249 | u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; | ||
250 | int err = 0; | ||
251 | |||
252 | nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); | ||
253 | |||
254 | if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
255 | || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) | ||
256 | return -EINVAL; | ||
257 | |||
258 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); | ||
259 | |||
260 | memset(buf, 0, sizeof(buf)); | ||
261 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
262 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
263 | return -EFAULT; | ||
264 | } | ||
265 | |||
266 | switch (cmd) { | ||
267 | case NVGPU_EVENT_IOCTL_SET_FILTER: | ||
268 | err = nvgpu_clk_arb_set_event_filter(dev, | ||
269 | (struct nvgpu_gpu_set_event_filter_args *)buf); | ||
270 | break; | ||
271 | default: | ||
272 | nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); | ||
273 | err = -ENOTTY; | ||
274 | } | ||
275 | |||
276 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
277 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
278 | |||
279 | return err; | ||
280 | } | ||
281 | |||
282 | static const struct file_operations completion_dev_ops = { | ||
283 | .owner = THIS_MODULE, | ||
284 | .release = nvgpu_clk_arb_release_completion_dev, | ||
285 | .poll = nvgpu_clk_arb_poll_dev, | ||
286 | }; | ||
287 | |||
288 | static const struct file_operations event_dev_ops = { | ||
289 | .owner = THIS_MODULE, | ||
290 | .release = nvgpu_clk_arb_release_event_dev, | ||
291 | .poll = nvgpu_clk_arb_poll_dev, | ||
292 | .read = nvgpu_clk_arb_read_event_dev, | ||
293 | #ifdef CONFIG_COMPAT | ||
294 | .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
295 | #endif | ||
296 | .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
297 | }; | ||
298 | |||
299 | static int nvgpu_clk_arb_install_fd(struct gk20a *g, | ||
300 | struct nvgpu_clk_session *session, | ||
301 | const struct file_operations *fops, | ||
302 | struct nvgpu_clk_dev **_dev) | ||
303 | { | ||
304 | struct file *file; | ||
305 | int fd; | ||
306 | int err; | ||
307 | int status; | ||
308 | char name[64]; | ||
309 | struct nvgpu_clk_dev *dev; | ||
310 | |||
311 | clk_arb_dbg(g, " "); | ||
312 | |||
313 | dev = nvgpu_kzalloc(g, sizeof(*dev)); | ||
314 | if (!dev) | ||
315 | return -ENOMEM; | ||
316 | |||
317 | status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, | ||
318 | DEFAULT_EVENT_NUMBER); | ||
319 | if (status < 0) { | ||
320 | err = status; | ||
321 | goto fail; | ||
322 | } | ||
323 | |||
324 | fd = get_unused_fd_flags(O_RDWR); | ||
325 | if (fd < 0) { | ||
326 | err = fd; | ||
327 | goto fail; | ||
328 | } | ||
329 | |||
330 | snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); | ||
331 | file = anon_inode_getfile(name, fops, dev, O_RDWR); | ||
332 | if (IS_ERR(file)) { | ||
333 | err = PTR_ERR(file); | ||
334 | goto fail_fd; | ||
335 | } | ||
336 | |||
337 | fd_install(fd, file); | ||
338 | |||
339 | nvgpu_cond_init(&dev->readout_wq); | ||
340 | |||
341 | nvgpu_atomic_set(&dev->poll_mask, 0); | ||
342 | |||
343 | dev->session = session; | ||
344 | nvgpu_ref_init(&dev->refcount); | ||
345 | |||
346 | nvgpu_ref_get(&session->refcount); | ||
347 | |||
348 | *_dev = dev; | ||
349 | |||
350 | return fd; | ||
351 | |||
352 | fail_fd: | ||
353 | put_unused_fd(fd); | ||
354 | fail: | ||
355 | nvgpu_kfree(g, dev); | ||
356 | |||
357 | return err; | ||
358 | } | ||
359 | |||
360 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | ||
361 | struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) | ||
362 | { | ||
363 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
364 | struct nvgpu_clk_dev *dev; | ||
365 | int fd; | ||
366 | |||
367 | clk_arb_dbg(g, " "); | ||
368 | |||
369 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); | ||
370 | if (fd < 0) | ||
371 | return fd; | ||
372 | |||
373 | /* TODO: alarm mask needs to be set to default value to prevent | ||
374 | * failures of legacy tests. This will be removed when sanity is | ||
375 | * updated | ||
376 | */ | ||
377 | if (alarm_mask) | ||
378 | nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); | ||
379 | else | ||
380 | nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); | ||
381 | |||
382 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); | ||
383 | |||
384 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
385 | nvgpu_list_add_tail(&dev->link, &arb->users); | ||
386 | nvgpu_spinlock_release(&arb->users_lock); | ||
387 | |||
388 | *event_fd = fd; | ||
389 | |||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
394 | struct nvgpu_clk_session *session, int *request_fd) | ||
395 | { | ||
396 | struct nvgpu_clk_dev *dev; | ||
397 | int fd; | ||
398 | |||
399 | clk_arb_dbg(g, " "); | ||
400 | |||
401 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | ||
402 | if (fd < 0) | ||
403 | return fd; | ||
404 | |||
405 | *request_fd = fd; | ||
406 | |||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | ||
411 | struct nvgpu_clk_session *session, int request_fd) | ||
412 | { | ||
413 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
414 | struct nvgpu_clk_dev *dev; | ||
415 | struct fd fd; | ||
416 | int err = 0; | ||
417 | |||
418 | clk_arb_dbg(g, " "); | ||
419 | |||
420 | fd = fdget(request_fd); | ||
421 | if (!fd.file) | ||
422 | return -EINVAL; | ||
423 | |||
424 | if (fd.file->f_op != &completion_dev_ops) { | ||
425 | err = -EINVAL; | ||
426 | goto fdput_fd; | ||
427 | } | ||
428 | |||
429 | dev = (struct nvgpu_clk_dev *) fd.file->private_data; | ||
430 | |||
431 | if (!dev || dev->session != session) { | ||
432 | err = -EINVAL; | ||
433 | goto fdput_fd; | ||
434 | } | ||
435 | |||
436 | clk_arb_dbg(g, "requested target = %u\n", | ||
437 | (u32)dev->gpc2clk_target_mhz); | ||
438 | |||
439 | nvgpu_atomic_inc(&g->clk_arb_global_nr); | ||
440 | nvgpu_ref_get(&dev->refcount); | ||
441 | nvgpu_spinlock_acquire(&session->session_lock); | ||
442 | nvgpu_list_add(&dev->node, &session->targets); | ||
443 | nvgpu_spinlock_release(&session->session_lock); | ||
444 | nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); | ||
445 | |||
446 | fdput_fd: | ||
447 | fdput(fd); | ||
448 | return err; | ||
449 | } | ||
450 | |||
451 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, | ||
452 | int request_fd, u32 api_domain, u16 target_mhz) | ||
453 | { | ||
454 | struct nvgpu_clk_dev *dev; | ||
455 | struct fd fd; | ||
456 | int err = 0; | ||
457 | |||
458 | clk_arb_dbg(session->g, | ||
459 | "domain=0x%08x target_mhz=%u", api_domain, target_mhz); | ||
460 | |||
461 | fd = fdget(request_fd); | ||
462 | if (!fd.file) | ||
463 | return -EINVAL; | ||
464 | |||
465 | if (fd.file->f_op != &completion_dev_ops) { | ||
466 | err = -EINVAL; | ||
467 | goto fdput_fd; | ||
468 | } | ||
469 | |||
470 | dev = fd.file->private_data; | ||
471 | if (!dev || dev->session != session) { | ||
472 | err = -EINVAL; | ||
473 | goto fdput_fd; | ||
474 | } | ||
475 | |||
476 | switch (api_domain) { | ||
477 | case NVGPU_CLK_DOMAIN_MCLK: | ||
478 | dev->mclk_target_mhz = target_mhz; | ||
479 | break; | ||
480 | |||
481 | case NVGPU_CLK_DOMAIN_GPCCLK: | ||
482 | dev->gpc2clk_target_mhz = target_mhz * 2ULL; | ||
483 | break; | ||
484 | |||
485 | default: | ||
486 | err = -EINVAL; | ||
487 | } | ||
488 | |||
489 | fdput_fd: | ||
490 | fdput(fd); | ||
491 | return err; | ||
492 | } | ||
493 | |||
494 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) | ||
495 | { | ||
496 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | ||
497 | u32 api_domains = 0; | ||
498 | |||
499 | if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) | ||
500 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); | ||
501 | |||
502 | if (clk_domains & CTRL_CLK_DOMAIN_MCLK) | ||
503 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); | ||
504 | |||
505 | return api_domains; | ||
506 | } | ||
507 | |||
508 | #ifdef CONFIG_DEBUG_FS | ||
509 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | ||
510 | { | ||
511 | struct gk20a *g = s->private; | ||
512 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
513 | struct nvgpu_clk_arb_debug *debug; | ||
514 | |||
515 | u64 num; | ||
516 | s64 tmp, avg, std, max, min; | ||
517 | |||
518 | debug = NV_ACCESS_ONCE(arb->debug); | ||
519 | /* Make copy of structure and ensure no reordering */ | ||
520 | nvgpu_smp_rmb(); | ||
521 | if (!debug) | ||
522 | return -EINVAL; | ||
523 | |||
524 | std = debug->switch_std; | ||
525 | avg = debug->switch_avg; | ||
526 | max = debug->switch_max; | ||
527 | min = debug->switch_min; | ||
528 | num = debug->switch_num; | ||
529 | |||
530 | tmp = std; | ||
531 | do_div(tmp, num); | ||
532 | seq_printf(s, "Number of transitions: %lld\n", | ||
533 | num); | ||
534 | seq_printf(s, "max / min : %lld / %lld usec\n", | ||
535 | max, min); | ||
536 | seq_printf(s, "avg / std : %lld / %ld usec\n", | ||
537 | avg, int_sqrt(tmp)); | ||
538 | |||
539 | return 0; | ||
540 | } | ||
541 | |||
542 | static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) | ||
543 | { | ||
544 | return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); | ||
545 | } | ||
546 | |||
547 | static const struct file_operations nvgpu_clk_arb_stats_fops = { | ||
548 | .open = nvgpu_clk_arb_stats_open, | ||
549 | .read = seq_read, | ||
550 | .llseek = seq_lseek, | ||
551 | .release = single_release, | ||
552 | }; | ||
553 | |||
554 | |||
555 | int nvgpu_clk_arb_debugfs_init(struct gk20a *g) | ||
556 | { | ||
557 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
558 | struct dentry *gpu_root = l->debugfs; | ||
559 | struct dentry *d; | ||
560 | |||
561 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
562 | |||
563 | d = debugfs_create_file( | ||
564 | "arb_stats", | ||
565 | S_IRUGO, | ||
566 | gpu_root, | ||
567 | g, | ||
568 | &nvgpu_clk_arb_stats_fops); | ||
569 | if (!d) | ||
570 | return -ENOMEM; | ||
571 | |||
572 | return 0; | ||
573 | } | ||
574 | #endif | ||
diff --git a/include/os/linux/ioctl_ctrl.c b/include/os/linux/ioctl_ctrl.c new file mode 100644 index 0000000..ee141ff --- /dev/null +++ b/include/os/linux/ioctl_ctrl.c | |||
@@ -0,0 +1,2106 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2020, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/cdev.h> | ||
19 | #include <linux/file.h> | ||
20 | #include <linux/anon_inodes.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include <linux/pm_runtime.h> | ||
23 | #include <uapi/linux/nvgpu.h> | ||
24 | |||
25 | #include <nvgpu/bitops.h> | ||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | #include <nvgpu/ptimer.h> | ||
29 | #include <nvgpu/vidmem.h> | ||
30 | #include <nvgpu/log.h> | ||
31 | #include <nvgpu/enabled.h> | ||
32 | #include <nvgpu/sizes.h> | ||
33 | #include <nvgpu/list.h> | ||
34 | #include <nvgpu/clk_arb.h> | ||
35 | #include <nvgpu/gk20a.h> | ||
36 | #include <nvgpu/channel.h> | ||
37 | |||
38 | #include "ioctl_ctrl.h" | ||
39 | #include "ioctl_dbg.h" | ||
40 | #include "ioctl_as.h" | ||
41 | #include "ioctl_tsg.h" | ||
42 | #include "ioctl_channel.h" | ||
43 | #include "gk20a/fence_gk20a.h" | ||
44 | |||
45 | #include "platform_gk20a.h" | ||
46 | #include "os_linux.h" | ||
47 | #include "dmabuf.h" | ||
48 | #include "channel.h" | ||
49 | #include "dmabuf_vidmem.h" | ||
50 | |||
51 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \ | ||
52 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) | ||
53 | #define MHZ_TO_HZ(a) ((u64)a * MHZ) | ||
54 | |||
55 | struct gk20a_ctrl_priv { | ||
56 | struct device *dev; | ||
57 | struct gk20a *g; | ||
58 | struct nvgpu_clk_session *clk_session; | ||
59 | |||
60 | struct nvgpu_list_node list; | ||
61 | struct { | ||
62 | struct vm_area_struct *vma; | ||
63 | unsigned long flags; | ||
64 | bool vma_mapped; | ||
65 | } usermode_vma; | ||
66 | }; | ||
67 | |||
68 | static inline struct gk20a_ctrl_priv * | ||
69 | gk20a_ctrl_priv_from_list(struct nvgpu_list_node *node) | ||
70 | { | ||
71 | return (struct gk20a_ctrl_priv *) | ||
72 | ((uintptr_t)node - offsetof(struct gk20a_ctrl_priv, list)); | ||
73 | } | ||
74 | |||
75 | static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags) | ||
76 | { | ||
77 | u32 core_flags = 0; | ||
78 | |||
79 | if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) | ||
80 | core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED; | ||
81 | |||
82 | return core_flags; | ||
83 | } | ||
84 | |||
85 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | ||
86 | { | ||
87 | struct nvgpu_os_linux *l; | ||
88 | struct gk20a *g; | ||
89 | struct gk20a_ctrl_priv *priv; | ||
90 | int err = 0; | ||
91 | |||
92 | l = container_of(inode->i_cdev, | ||
93 | struct nvgpu_os_linux, ctrl.cdev); | ||
94 | g = gk20a_get(&l->g); | ||
95 | if (!g) | ||
96 | return -ENODEV; | ||
97 | |||
98 | nvgpu_log_fn(g, " "); | ||
99 | |||
100 | priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv)); | ||
101 | if (!priv) { | ||
102 | err = -ENOMEM; | ||
103 | goto free_ref; | ||
104 | } | ||
105 | filp->private_data = priv; | ||
106 | priv->dev = dev_from_gk20a(g); | ||
107 | /* | ||
108 | * We dont close the arbiter fd's after driver teardown to support | ||
109 | * GPU_LOST events, so we store g here, instead of dereferencing the | ||
110 | * dev structure on teardown | ||
111 | */ | ||
112 | priv->g = g; | ||
113 | |||
114 | if (!g->sw_ready) { | ||
115 | err = gk20a_busy(g); | ||
116 | if (err) | ||
117 | goto free_ref; | ||
118 | gk20a_idle(g); | ||
119 | } | ||
120 | |||
121 | err = nvgpu_clk_arb_init_session(g, &priv->clk_session); | ||
122 | free_ref: | ||
123 | if (err != 0) { | ||
124 | gk20a_put(g); | ||
125 | if (priv) | ||
126 | nvgpu_kfree(g, priv); | ||
127 | } else { | ||
128 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
129 | nvgpu_list_add(&priv->list, &l->ctrl.privs); | ||
130 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
131 | } | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) | ||
136 | { | ||
137 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
138 | struct gk20a *g = priv->g; | ||
139 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
140 | |||
141 | nvgpu_log_fn(g, " "); | ||
142 | |||
143 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
144 | nvgpu_list_del(&priv->list); | ||
145 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
146 | |||
147 | if (priv->clk_session) | ||
148 | nvgpu_clk_arb_release_session(g, priv->clk_session); | ||
149 | |||
150 | gk20a_put(g); | ||
151 | nvgpu_kfree(g, priv); | ||
152 | |||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | struct nvgpu_flags_mapping { | ||
157 | u64 ioctl_flag; | ||
158 | int enabled_flag; | ||
159 | }; | ||
160 | |||
161 | static struct nvgpu_flags_mapping flags_mapping[] = { | ||
162 | {NVGPU_GPU_FLAGS_CAN_RAILGATE, | ||
163 | NVGPU_CAN_RAILGATE}, | ||
164 | {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS, | ||
165 | NVGPU_HAS_SYNCPOINTS}, | ||
166 | {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS, | ||
167 | NVGPU_SUPPORT_PARTIAL_MAPPINGS}, | ||
168 | {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS, | ||
169 | NVGPU_SUPPORT_SPARSE_ALLOCS}, | ||
170 | {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS, | ||
171 | NVGPU_SUPPORT_SYNC_FENCE_FDS}, | ||
172 | {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS, | ||
173 | NVGPU_SUPPORT_CYCLE_STATS}, | ||
174 | {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT, | ||
175 | NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT}, | ||
176 | {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS, | ||
177 | NVGPU_SUPPORT_USERSPACE_MANAGED_AS}, | ||
178 | {NVGPU_GPU_FLAGS_SUPPORT_TSG, | ||
179 | NVGPU_SUPPORT_TSG}, | ||
180 | {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS, | ||
181 | NVGPU_SUPPORT_CLOCK_CONTROLS}, | ||
182 | {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE, | ||
183 | NVGPU_SUPPORT_GET_VOLTAGE}, | ||
184 | {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT, | ||
185 | NVGPU_SUPPORT_GET_CURRENT}, | ||
186 | {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER, | ||
187 | NVGPU_SUPPORT_GET_POWER}, | ||
188 | {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE, | ||
189 | NVGPU_SUPPORT_GET_TEMPERATURE}, | ||
190 | {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT, | ||
191 | NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT}, | ||
192 | {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS, | ||
193 | NVGPU_SUPPORT_DEVICE_EVENTS}, | ||
194 | {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE, | ||
195 | NVGPU_SUPPORT_FECS_CTXSW_TRACE}, | ||
196 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, | ||
197 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, | ||
198 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, | ||
199 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, | ||
200 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS, | ||
201 | NVGPU_SUPPORT_DETERMINISTIC_OPTS}, | ||
202 | {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS, | ||
203 | NVGPU_SUPPORT_SYNCPOINT_ADDRESS}, | ||
204 | {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT, | ||
205 | NVGPU_SUPPORT_USER_SYNCPOINT}, | ||
206 | {NVGPU_GPU_FLAGS_SUPPORT_USERMODE_SUBMIT, | ||
207 | NVGPU_SUPPORT_USERMODE_SUBMIT}, | ||
208 | {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, | ||
209 | NVGPU_SUPPORT_IO_COHERENCE}, | ||
210 | {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, | ||
211 | NVGPU_SUPPORT_RESCHEDULE_RUNLIST}, | ||
212 | {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL, | ||
213 | NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL}, | ||
214 | {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF, | ||
215 | NVGPU_ECC_ENABLED_SM_LRF}, | ||
216 | {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM, | ||
217 | NVGPU_ECC_ENABLED_SM_SHM}, | ||
218 | {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX, | ||
219 | NVGPU_ECC_ENABLED_TEX}, | ||
220 | {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC, | ||
221 | NVGPU_ECC_ENABLED_LTC}, | ||
222 | {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS, | ||
223 | NVGPU_SUPPORT_TSG_SUBCONTEXTS}, | ||
224 | {NVGPU_GPU_FLAGS_SUPPORT_SCG, | ||
225 | NVGPU_SUPPORT_SCG}, | ||
226 | {NVGPU_GPU_FLAGS_SUPPORT_VPR, | ||
227 | NVGPU_SUPPORT_VPR}, | ||
228 | {NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE, | ||
229 | NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE}, | ||
230 | }; | ||
231 | |||
232 | static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) | ||
233 | { | ||
234 | unsigned int i; | ||
235 | u64 ioctl_flags = 0; | ||
236 | |||
237 | for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) { | ||
238 | if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag)) | ||
239 | ioctl_flags |= flags_mapping[i].ioctl_flag; | ||
240 | } | ||
241 | |||
242 | if (!capable(CAP_SYS_NICE)) { | ||
243 | ioctl_flags &= ~NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST; | ||
244 | } | ||
245 | |||
246 | return ioctl_flags; | ||
247 | } | ||
248 | |||
249 | static void nvgpu_set_preemption_mode_flags(struct gk20a *g, | ||
250 | struct nvgpu_gpu_characteristics *gpu) | ||
251 | { | ||
252 | struct nvgpu_preemption_modes_rec preemption_mode_rec; | ||
253 | |||
254 | g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec); | ||
255 | |||
256 | gpu->graphics_preemption_mode_flags = | ||
257 | nvgpu_get_ioctl_graphics_preempt_mode_flags( | ||
258 | preemption_mode_rec.graphics_preemption_mode_flags); | ||
259 | gpu->compute_preemption_mode_flags = | ||
260 | nvgpu_get_ioctl_compute_preempt_mode_flags( | ||
261 | preemption_mode_rec.compute_preemption_mode_flags); | ||
262 | |||
263 | gpu->default_graphics_preempt_mode = | ||
264 | nvgpu_get_ioctl_graphics_preempt_mode( | ||
265 | preemption_mode_rec.default_graphics_preempt_mode); | ||
266 | gpu->default_compute_preempt_mode = | ||
267 | nvgpu_get_ioctl_compute_preempt_mode( | ||
268 | preemption_mode_rec.default_compute_preempt_mode); | ||
269 | } | ||
270 | |||
271 | static long | ||
272 | gk20a_ctrl_ioctl_gpu_characteristics( | ||
273 | struct gk20a *g, | ||
274 | struct nvgpu_gpu_get_characteristics *request) | ||
275 | { | ||
276 | struct nvgpu_gpu_characteristics gpu; | ||
277 | long err = 0; | ||
278 | |||
279 | if (gk20a_busy(g)) { | ||
280 | nvgpu_err(g, "failed to power on gpu"); | ||
281 | return -EINVAL; | ||
282 | } | ||
283 | |||
284 | memset(&gpu, 0, sizeof(gpu)); | ||
285 | |||
286 | gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); | ||
287 | gpu.on_board_video_memory_size = 0; /* integrated GPU */ | ||
288 | |||
289 | gpu.num_gpc = g->gr.gpc_count; | ||
290 | gpu.max_gpc_count = g->gr.max_gpc_count; | ||
291 | |||
292 | gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; | ||
293 | |||
294 | gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ | ||
295 | |||
296 | gpu.compression_page_size = g->ops.fb.compression_page_size(g); | ||
297 | |||
298 | if (g->ops.gr.get_gpc_mask) { | ||
299 | gpu.gpc_mask = g->ops.gr.get_gpc_mask(g); | ||
300 | } else { | ||
301 | gpu.gpc_mask = BIT32(g->gr.gpc_count) - 1; | ||
302 | } | ||
303 | |||
304 | gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); | ||
305 | |||
306 | gpu.arch = g->params.gpu_arch; | ||
307 | gpu.impl = g->params.gpu_impl; | ||
308 | gpu.rev = g->params.gpu_rev; | ||
309 | gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT; | ||
310 | gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ? | ||
311 | NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0; | ||
312 | gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS); | ||
313 | gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS); | ||
314 | gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS); | ||
315 | gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS); | ||
316 | gpu.inline_to_memory_class = | ||
317 | g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS); | ||
318 | gpu.dma_copy_class = | ||
319 | g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); | ||
320 | |||
321 | gpu.vbios_version = g->bios.vbios_version; | ||
322 | gpu.vbios_oem_version = g->bios.vbios_oem_version; | ||
323 | |||
324 | gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g); | ||
325 | gpu.pde_coverage_bit_count = | ||
326 | g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0]; | ||
327 | gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g); | ||
328 | |||
329 | gpu.sm_arch_sm_version = g->params.sm_arch_sm_version; | ||
330 | gpu.sm_arch_spa_version = g->params.sm_arch_spa_version; | ||
331 | gpu.sm_arch_warp_count = g->params.sm_arch_warp_count; | ||
332 | |||
333 | gpu.max_css_buffer_size = g->gr.max_css_buffer_size; | ||
334 | |||
335 | gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; | ||
336 | gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; | ||
337 | gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; | ||
338 | gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; | ||
339 | gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; | ||
340 | gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST; | ||
341 | gpu.gpu_va_bit_count = 40; | ||
342 | |||
343 | strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname)); | ||
344 | gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g); | ||
345 | gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); | ||
346 | gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); | ||
347 | gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); | ||
348 | gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; | ||
349 | gpu.gr_gobs_per_comptagline_per_slice = | ||
350 | g->gr.gobs_per_comptagline_per_slice; | ||
351 | gpu.num_ltc = g->ltc_count; | ||
352 | gpu.lts_per_ltc = g->gr.slices_per_ltc; | ||
353 | gpu.cbc_cache_line_size = g->gr.cacheline_size; | ||
354 | gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; | ||
355 | |||
356 | if (g->ops.clk.get_maxrate) | ||
357 | gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
358 | |||
359 | gpu.local_video_memory_size = g->mm.vidmem.size; | ||
360 | |||
361 | gpu.pci_vendor_id = g->pci_vendor_id; | ||
362 | gpu.pci_device_id = g->pci_device_id; | ||
363 | gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id; | ||
364 | gpu.pci_subsystem_device_id = g->pci_subsystem_device_id; | ||
365 | gpu.pci_class = g->pci_class; | ||
366 | gpu.pci_revision = g->pci_revision; | ||
367 | |||
368 | nvgpu_set_preemption_mode_flags(g, &gpu); | ||
369 | |||
370 | if (request->gpu_characteristics_buf_size > 0) { | ||
371 | size_t write_size = sizeof(gpu); | ||
372 | |||
373 | nvgpu_speculation_barrier(); | ||
374 | if (write_size > request->gpu_characteristics_buf_size) | ||
375 | write_size = request->gpu_characteristics_buf_size; | ||
376 | |||
377 | err = copy_to_user((void __user *)(uintptr_t) | ||
378 | request->gpu_characteristics_buf_addr, | ||
379 | &gpu, write_size); | ||
380 | } | ||
381 | |||
382 | if (err == 0) | ||
383 | request->gpu_characteristics_buf_size = sizeof(gpu); | ||
384 | |||
385 | gk20a_idle(g); | ||
386 | |||
387 | return err; | ||
388 | } | ||
389 | |||
390 | static int gk20a_ctrl_prepare_compressible_read( | ||
391 | struct gk20a *g, | ||
392 | struct nvgpu_gpu_prepare_compressible_read_args *args) | ||
393 | { | ||
394 | int ret = -ENOSYS; | ||
395 | |||
396 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
397 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
398 | struct nvgpu_channel_fence fence; | ||
399 | struct gk20a_fence *fence_out = NULL; | ||
400 | int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( | ||
401 | args->submit_flags); | ||
402 | int fd = -1; | ||
403 | |||
404 | fence.id = args->fence.syncpt_id; | ||
405 | fence.value = args->fence.syncpt_value; | ||
406 | |||
407 | /* Try and allocate an fd here*/ | ||
408 | if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
409 | && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) { | ||
410 | fd = get_unused_fd_flags(O_RDWR); | ||
411 | if (fd < 0) | ||
412 | return fd; | ||
413 | } | ||
414 | |||
415 | ret = gk20a_prepare_compressible_read(l, args->handle, | ||
416 | args->request_compbits, args->offset, | ||
417 | args->compbits_hoffset, args->compbits_voffset, | ||
418 | args->scatterbuffer_offset, | ||
419 | args->width, args->height, args->block_height_log2, | ||
420 | submit_flags, &fence, &args->valid_compbits, | ||
421 | &args->zbc_color, &fence_out); | ||
422 | |||
423 | if (ret) { | ||
424 | if (fd != -1) | ||
425 | put_unused_fd(fd); | ||
426 | return ret; | ||
427 | } | ||
428 | |||
429 | /* Convert fence_out to something we can pass back to user space. */ | ||
430 | if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { | ||
431 | if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
432 | if (fence_out) { | ||
433 | ret = gk20a_fence_install_fd(fence_out, fd); | ||
434 | if (ret) | ||
435 | put_unused_fd(fd); | ||
436 | else | ||
437 | args->fence.fd = fd; | ||
438 | } else { | ||
439 | args->fence.fd = -1; | ||
440 | put_unused_fd(fd); | ||
441 | } | ||
442 | } else { | ||
443 | if (fence_out) { | ||
444 | args->fence.syncpt_id = fence_out->syncpt_id; | ||
445 | args->fence.syncpt_value = | ||
446 | fence_out->syncpt_value; | ||
447 | } else { | ||
448 | args->fence.syncpt_id = -1; | ||
449 | args->fence.syncpt_value = 0; | ||
450 | } | ||
451 | } | ||
452 | } | ||
453 | gk20a_fence_put(fence_out); | ||
454 | #endif | ||
455 | |||
456 | return ret; | ||
457 | } | ||
458 | |||
459 | static int gk20a_ctrl_mark_compressible_write( | ||
460 | struct gk20a *g, | ||
461 | struct nvgpu_gpu_mark_compressible_write_args *args) | ||
462 | { | ||
463 | int ret = -ENOSYS; | ||
464 | |||
465 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
466 | ret = gk20a_mark_compressible_write(g, args->handle, | ||
467 | args->valid_compbits, args->offset, args->zbc_color); | ||
468 | #endif | ||
469 | |||
470 | return ret; | ||
471 | } | ||
472 | |||
473 | static int gk20a_ctrl_alloc_as( | ||
474 | struct gk20a *g, | ||
475 | struct nvgpu_alloc_as_args *args) | ||
476 | { | ||
477 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
478 | struct gk20a_as_share *as_share; | ||
479 | int err; | ||
480 | int fd; | ||
481 | struct file *file; | ||
482 | char name[64]; | ||
483 | |||
484 | err = get_unused_fd_flags(O_RDWR); | ||
485 | if (err < 0) | ||
486 | return err; | ||
487 | fd = err; | ||
488 | |||
489 | snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd); | ||
490 | |||
491 | file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR); | ||
492 | if (IS_ERR(file)) { | ||
493 | err = PTR_ERR(file); | ||
494 | goto clean_up; | ||
495 | } | ||
496 | |||
497 | err = gk20a_as_alloc_share(g, args->big_page_size, | ||
498 | gk20a_as_translate_as_alloc_flags(g, | ||
499 | args->flags), | ||
500 | &as_share); | ||
501 | if (err) | ||
502 | goto clean_up_file; | ||
503 | |||
504 | fd_install(fd, file); | ||
505 | file->private_data = as_share; | ||
506 | |||
507 | args->as_fd = fd; | ||
508 | return 0; | ||
509 | |||
510 | clean_up_file: | ||
511 | fput(file); | ||
512 | clean_up: | ||
513 | put_unused_fd(fd); | ||
514 | return err; | ||
515 | } | ||
516 | |||
517 | static int gk20a_ctrl_open_tsg(struct gk20a *g, | ||
518 | struct nvgpu_gpu_open_tsg_args *args) | ||
519 | { | ||
520 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
521 | int err; | ||
522 | int fd; | ||
523 | struct file *file; | ||
524 | char name[64]; | ||
525 | |||
526 | err = get_unused_fd_flags(O_RDWR); | ||
527 | if (err < 0) | ||
528 | return err; | ||
529 | fd = err; | ||
530 | |||
531 | snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd); | ||
532 | |||
533 | file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR); | ||
534 | if (IS_ERR(file)) { | ||
535 | err = PTR_ERR(file); | ||
536 | goto clean_up; | ||
537 | } | ||
538 | |||
539 | err = nvgpu_ioctl_tsg_open(g, file); | ||
540 | if (err) | ||
541 | goto clean_up_file; | ||
542 | |||
543 | fd_install(fd, file); | ||
544 | args->tsg_fd = fd; | ||
545 | return 0; | ||
546 | |||
547 | clean_up_file: | ||
548 | fput(file); | ||
549 | clean_up: | ||
550 | put_unused_fd(fd); | ||
551 | return err; | ||
552 | } | ||
553 | |||
554 | static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, | ||
555 | struct nvgpu_gpu_get_tpc_masks_args *args) | ||
556 | { | ||
557 | struct gr_gk20a *gr = &g->gr; | ||
558 | int err = 0; | ||
559 | const u32 gpc_tpc_mask_size = sizeof(u32) * gr->max_gpc_count; | ||
560 | |||
561 | if (args->mask_buf_size > 0) { | ||
562 | size_t write_size = gpc_tpc_mask_size; | ||
563 | |||
564 | nvgpu_speculation_barrier(); | ||
565 | if (write_size > args->mask_buf_size) | ||
566 | write_size = args->mask_buf_size; | ||
567 | |||
568 | err = copy_to_user((void __user *)(uintptr_t) | ||
569 | args->mask_buf_addr, | ||
570 | gr->gpc_tpc_mask, write_size); | ||
571 | } | ||
572 | |||
573 | if (err == 0) | ||
574 | args->mask_buf_size = gpc_tpc_mask_size; | ||
575 | |||
576 | return err; | ||
577 | } | ||
578 | |||
579 | static int gk20a_ctrl_get_fbp_l2_masks( | ||
580 | struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) | ||
581 | { | ||
582 | struct gr_gk20a *gr = &g->gr; | ||
583 | int err = 0; | ||
584 | const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; | ||
585 | |||
586 | if (args->mask_buf_size > 0) { | ||
587 | size_t write_size = fbp_l2_mask_size; | ||
588 | |||
589 | nvgpu_speculation_barrier(); | ||
590 | if (write_size > args->mask_buf_size) | ||
591 | write_size = args->mask_buf_size; | ||
592 | |||
593 | err = copy_to_user((void __user *)(uintptr_t) | ||
594 | args->mask_buf_addr, | ||
595 | gr->fbp_rop_l2_en_mask, write_size); | ||
596 | } | ||
597 | |||
598 | if (err == 0) | ||
599 | args->mask_buf_size = fbp_l2_mask_size; | ||
600 | |||
601 | return err; | ||
602 | } | ||
603 | |||
604 | static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, | ||
605 | struct nvgpu_gpu_l2_fb_args *args) | ||
606 | { | ||
607 | int ret; | ||
608 | bool always_poweron; | ||
609 | |||
610 | if ((!args->l2_flush && !args->fb_flush) || | ||
611 | (!args->l2_flush && args->l2_invalidate)) | ||
612 | return -EINVAL; | ||
613 | |||
614 | /* Handle this case for joint rails or DGPU */ | ||
615 | always_poweron = (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || | ||
616 | !pm_runtime_enabled(dev_from_gk20a(g))); | ||
617 | |||
618 | /* In case of not always power_on, exit if g->power_on is false */ | ||
619 | if (!always_poweron && !gk20a_check_poweron(g)) { | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | /* There is a small window between a call to gk20a_idle() has occured | ||
624 | * and railgate being actually triggered(setting g->power_on = false), | ||
625 | * when l2_flush can race with railgate. Its better to take a busy_lock | ||
626 | * to prevent the gk20a_idle() from proceeding. There is a very small | ||
627 | * chance that gk20a_idle() might begin before gk20a_busy(). Having | ||
628 | * a locked access to g->power_on further reduces the probability of | ||
629 | * gk20a_idle() being triggered before gk20a_busy() | ||
630 | */ | ||
631 | ret = gk20a_busy(g); | ||
632 | |||
633 | if (ret != 0) { | ||
634 | nvgpu_err(g, "failed to take power ref"); | ||
635 | return ret; | ||
636 | } | ||
637 | |||
638 | if (args->l2_flush) | ||
639 | g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); | ||
640 | |||
641 | if (args->fb_flush) | ||
642 | g->ops.mm.fb_flush(g); | ||
643 | |||
644 | gk20a_idle(g); | ||
645 | |||
646 | return 0; | ||
647 | } | ||
648 | |||
649 | static int nvgpu_gpu_ioctl_set_mmu_debug_mode( | ||
650 | struct gk20a *g, | ||
651 | struct nvgpu_gpu_mmu_debug_mode_args *args) | ||
652 | { | ||
653 | if (gk20a_busy(g)) { | ||
654 | nvgpu_err(g, "failed to power on gpu"); | ||
655 | return -EINVAL; | ||
656 | } | ||
657 | |||
658 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
659 | g->ops.fb.set_debug_mode(g, args->state == 1); | ||
660 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
661 | |||
662 | gk20a_idle(g); | ||
663 | return 0; | ||
664 | } | ||
665 | |||
666 | static int nvgpu_gpu_ioctl_set_debug_mode( | ||
667 | struct gk20a *g, | ||
668 | struct nvgpu_gpu_sm_debug_mode_args *args) | ||
669 | { | ||
670 | struct channel_gk20a *ch; | ||
671 | int err; | ||
672 | |||
673 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
674 | if (!ch) | ||
675 | return -EINVAL; | ||
676 | |||
677 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
678 | if (g->ops.gr.set_sm_debug_mode) | ||
679 | err = g->ops.gr.set_sm_debug_mode(g, ch, | ||
680 | args->sms, !!args->enable); | ||
681 | else | ||
682 | err = -ENOSYS; | ||
683 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
684 | |||
685 | gk20a_channel_put(ch); | ||
686 | return err; | ||
687 | } | ||
688 | |||
689 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) | ||
690 | { | ||
691 | int err; | ||
692 | |||
693 | err = gk20a_busy(g); | ||
694 | if (err) | ||
695 | return err; | ||
696 | |||
697 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
698 | err = gr_gk20a_elpg_protected_call(g, | ||
699 | g->ops.gr.trigger_suspend(g)); | ||
700 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
701 | |||
702 | gk20a_idle(g); | ||
703 | |||
704 | return err; | ||
705 | } | ||
706 | |||
707 | static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | ||
708 | struct nvgpu_gpu_wait_pause_args *args) | ||
709 | { | ||
710 | int err; | ||
711 | struct warpstate *ioctl_w_state; | ||
712 | struct nvgpu_warpstate *w_state = NULL; | ||
713 | u32 sm_count, ioctl_size, size, sm_id; | ||
714 | |||
715 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | ||
716 | |||
717 | ioctl_size = sm_count * sizeof(struct warpstate); | ||
718 | ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); | ||
719 | if (!ioctl_w_state) | ||
720 | return -ENOMEM; | ||
721 | |||
722 | size = sm_count * sizeof(struct nvgpu_warpstate); | ||
723 | w_state = nvgpu_kzalloc(g, size); | ||
724 | if (!w_state) { | ||
725 | err = -ENOMEM; | ||
726 | goto out_free; | ||
727 | } | ||
728 | |||
729 | err = gk20a_busy(g); | ||
730 | if (err) | ||
731 | goto out_free; | ||
732 | |||
733 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
734 | (void)gr_gk20a_elpg_protected_call(g, | ||
735 | g->ops.gr.wait_for_pause(g, w_state)); | ||
736 | |||
737 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | ||
738 | ioctl_w_state[sm_id].valid_warps[0] = | ||
739 | w_state[sm_id].valid_warps[0]; | ||
740 | ioctl_w_state[sm_id].valid_warps[1] = | ||
741 | w_state[sm_id].valid_warps[1]; | ||
742 | ioctl_w_state[sm_id].trapped_warps[0] = | ||
743 | w_state[sm_id].trapped_warps[0]; | ||
744 | ioctl_w_state[sm_id].trapped_warps[1] = | ||
745 | w_state[sm_id].trapped_warps[1]; | ||
746 | ioctl_w_state[sm_id].paused_warps[0] = | ||
747 | w_state[sm_id].paused_warps[0]; | ||
748 | ioctl_w_state[sm_id].paused_warps[1] = | ||
749 | w_state[sm_id].paused_warps[1]; | ||
750 | } | ||
751 | /* Copy to user space - pointed by "args->pwarpstate" */ | ||
752 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, | ||
753 | w_state, ioctl_size)) { | ||
754 | nvgpu_log_fn(g, "copy_to_user failed!"); | ||
755 | err = -EFAULT; | ||
756 | } | ||
757 | |||
758 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
759 | |||
760 | gk20a_idle(g); | ||
761 | |||
762 | out_free: | ||
763 | nvgpu_kfree(g, w_state); | ||
764 | nvgpu_kfree(g, ioctl_w_state); | ||
765 | |||
766 | return err; | ||
767 | } | ||
768 | |||
769 | static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) | ||
770 | { | ||
771 | int err; | ||
772 | |||
773 | err = gk20a_busy(g); | ||
774 | if (err) | ||
775 | return err; | ||
776 | |||
777 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
778 | err = gr_gk20a_elpg_protected_call(g, | ||
779 | g->ops.gr.resume_from_pause(g)); | ||
780 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
781 | |||
782 | gk20a_idle(g); | ||
783 | |||
784 | return err; | ||
785 | } | ||
786 | |||
787 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) | ||
788 | { | ||
789 | int err; | ||
790 | |||
791 | err = gk20a_busy(g); | ||
792 | if (err) | ||
793 | return err; | ||
794 | |||
795 | err = gr_gk20a_elpg_protected_call(g, | ||
796 | g->ops.gr.clear_sm_errors(g)); | ||
797 | |||
798 | gk20a_idle(g); | ||
799 | |||
800 | return err; | ||
801 | } | ||
802 | |||
803 | static int nvgpu_gpu_ioctl_has_any_exception( | ||
804 | struct gk20a *g, | ||
805 | struct nvgpu_gpu_tpc_exception_en_status_args *args) | ||
806 | { | ||
807 | u32 tpc_exception_en; | ||
808 | |||
809 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
810 | tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); | ||
811 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
812 | |||
813 | args->tpc_exception_en_sm_mask = tpc_exception_en; | ||
814 | |||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, | ||
819 | struct nvgpu_gpu_num_vsms *args) | ||
820 | { | ||
821 | struct gr_gk20a *gr = &g->gr; | ||
822 | args->num_vsms = gr->no_of_sm; | ||
823 | return 0; | ||
824 | } | ||
825 | |||
826 | static int gk20a_ctrl_vsm_mapping(struct gk20a *g, | ||
827 | struct nvgpu_gpu_vsms_mapping *args) | ||
828 | { | ||
829 | int err = 0; | ||
830 | struct gr_gk20a *gr = &g->gr; | ||
831 | size_t write_size = gr->no_of_sm * | ||
832 | sizeof(struct nvgpu_gpu_vsms_mapping_entry); | ||
833 | struct nvgpu_gpu_vsms_mapping_entry *vsms_buf; | ||
834 | u32 i; | ||
835 | |||
836 | vsms_buf = nvgpu_kzalloc(g, write_size); | ||
837 | if (vsms_buf == NULL) | ||
838 | return -ENOMEM; | ||
839 | |||
840 | for (i = 0; i < gr->no_of_sm; i++) { | ||
841 | vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index; | ||
842 | if (g->ops.gr.get_nonpes_aware_tpc) | ||
843 | vsms_buf[i].tpc_index = | ||
844 | g->ops.gr.get_nonpes_aware_tpc(g, | ||
845 | gr->sm_to_cluster[i].gpc_index, | ||
846 | gr->sm_to_cluster[i].tpc_index); | ||
847 | else | ||
848 | vsms_buf[i].tpc_index = | ||
849 | gr->sm_to_cluster[i].tpc_index; | ||
850 | } | ||
851 | |||
852 | err = copy_to_user((void __user *)(uintptr_t) | ||
853 | args->vsms_map_buf_addr, | ||
854 | vsms_buf, write_size); | ||
855 | nvgpu_kfree(g, vsms_buf); | ||
856 | |||
857 | return err; | ||
858 | } | ||
859 | |||
860 | static int nvgpu_gpu_get_cpu_time_correlation_info( | ||
861 | struct gk20a *g, | ||
862 | struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) | ||
863 | { | ||
864 | struct nvgpu_cpu_time_correlation_sample *samples; | ||
865 | int err; | ||
866 | u32 i; | ||
867 | |||
868 | if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT || | ||
869 | args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) | ||
870 | return -EINVAL; | ||
871 | |||
872 | samples = nvgpu_kzalloc(g, args->count * | ||
873 | sizeof(struct nvgpu_cpu_time_correlation_sample)); | ||
874 | if (!samples) { | ||
875 | return -ENOMEM; | ||
876 | } | ||
877 | |||
878 | err = g->ops.ptimer.get_timestamps_zipper(g, | ||
879 | args->source_id, args->count, samples); | ||
880 | if (!err) { | ||
881 | for (i = 0; i < args->count; i++) { | ||
882 | args->samples[i].cpu_timestamp = samples[i].cpu_timestamp; | ||
883 | args->samples[i].gpu_timestamp = samples[i].gpu_timestamp; | ||
884 | } | ||
885 | } | ||
886 | |||
887 | nvgpu_kfree(g, samples); | ||
888 | |||
889 | return err; | ||
890 | } | ||
891 | |||
892 | static int nvgpu_gpu_get_gpu_time( | ||
893 | struct gk20a *g, | ||
894 | struct nvgpu_gpu_get_gpu_time_args *args) | ||
895 | { | ||
896 | u64 time; | ||
897 | int err; | ||
898 | |||
899 | err = gk20a_busy(g); | ||
900 | if (err) | ||
901 | return err; | ||
902 | |||
903 | err = g->ops.ptimer.read_ptimer(g, &time); | ||
904 | if (!err) | ||
905 | args->gpu_timestamp = time; | ||
906 | |||
907 | gk20a_idle(g); | ||
908 | return err; | ||
909 | } | ||
910 | |||
911 | static int nvgpu_gpu_get_engine_info( | ||
912 | struct gk20a *g, | ||
913 | struct nvgpu_gpu_get_engine_info_args *args) | ||
914 | { | ||
915 | int err = 0; | ||
916 | u32 engine_enum = ENGINE_INVAL_GK20A; | ||
917 | u32 report_index = 0; | ||
918 | u32 engine_id_idx; | ||
919 | const u32 max_buffer_engines = args->engine_info_buf_size / | ||
920 | sizeof(struct nvgpu_gpu_get_engine_info_item); | ||
921 | struct nvgpu_gpu_get_engine_info_item __user *dst_item_list = | ||
922 | (void __user *)(uintptr_t)args->engine_info_buf_addr; | ||
923 | |||
924 | for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; | ||
925 | ++engine_id_idx) { | ||
926 | u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx]; | ||
927 | const struct fifo_engine_info_gk20a *src_info = | ||
928 | &g->fifo.engine_info[active_engine_id]; | ||
929 | struct nvgpu_gpu_get_engine_info_item dst_info; | ||
930 | |||
931 | memset(&dst_info, 0, sizeof(dst_info)); | ||
932 | |||
933 | engine_enum = src_info->engine_enum; | ||
934 | |||
935 | switch (engine_enum) { | ||
936 | case ENGINE_GR_GK20A: | ||
937 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR; | ||
938 | break; | ||
939 | |||
940 | case ENGINE_GRCE_GK20A: | ||
941 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY; | ||
942 | break; | ||
943 | |||
944 | case ENGINE_ASYNC_CE_GK20A: | ||
945 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY; | ||
946 | break; | ||
947 | |||
948 | default: | ||
949 | nvgpu_err(g, "Unmapped engine enum %u", | ||
950 | engine_enum); | ||
951 | continue; | ||
952 | } | ||
953 | |||
954 | dst_info.engine_instance = src_info->inst_id; | ||
955 | dst_info.runlist_id = src_info->runlist_id; | ||
956 | |||
957 | if (report_index < max_buffer_engines) { | ||
958 | err = copy_to_user(&dst_item_list[report_index], | ||
959 | &dst_info, sizeof(dst_info)); | ||
960 | if (err) | ||
961 | goto clean_up; | ||
962 | } | ||
963 | |||
964 | ++report_index; | ||
965 | } | ||
966 | |||
967 | args->engine_info_buf_size = | ||
968 | report_index * sizeof(struct nvgpu_gpu_get_engine_info_item); | ||
969 | |||
970 | clean_up: | ||
971 | return err; | ||
972 | } | ||
973 | |||
974 | static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, | ||
975 | struct nvgpu_gpu_alloc_vidmem_args *args) | ||
976 | { | ||
977 | u32 align = args->in.alignment ? args->in.alignment : SZ_4K; | ||
978 | int fd; | ||
979 | |||
980 | nvgpu_log_fn(g, " "); | ||
981 | |||
982 | /* not yet supported */ | ||
983 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) | ||
984 | return -EINVAL; | ||
985 | |||
986 | /* not yet supported */ | ||
987 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) | ||
988 | return -EINVAL; | ||
989 | |||
990 | if (args->in.size & (SZ_4K - 1)) | ||
991 | return -EINVAL; | ||
992 | |||
993 | if (!args->in.size) | ||
994 | return -EINVAL; | ||
995 | |||
996 | if (align & (align - 1)) | ||
997 | return -EINVAL; | ||
998 | |||
999 | if (align > roundup_pow_of_two(args->in.size)) { | ||
1000 | /* log this special case, buddy allocator detail */ | ||
1001 | nvgpu_warn(g, | ||
1002 | "alignment larger than buffer size rounded up to power of 2 is not supported"); | ||
1003 | return -EINVAL; | ||
1004 | } | ||
1005 | |||
1006 | fd = nvgpu_vidmem_export_linux(g, args->in.size); | ||
1007 | if (fd < 0) | ||
1008 | return fd; | ||
1009 | |||
1010 | args->out.dmabuf_fd = fd; | ||
1011 | |||
1012 | nvgpu_log_fn(g, "done, fd=%d", fd); | ||
1013 | |||
1014 | return 0; | ||
1015 | } | ||
1016 | |||
1017 | static int nvgpu_gpu_get_memory_state(struct gk20a *g, | ||
1018 | struct nvgpu_gpu_get_memory_state_args *args) | ||
1019 | { | ||
1020 | int err; | ||
1021 | |||
1022 | nvgpu_log_fn(g, " "); | ||
1023 | |||
1024 | if (args->reserved[0] || args->reserved[1] || | ||
1025 | args->reserved[2] || args->reserved[3]) | ||
1026 | return -EINVAL; | ||
1027 | |||
1028 | err = nvgpu_vidmem_get_space(g, &args->total_free_bytes); | ||
1029 | |||
1030 | nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes); | ||
1031 | |||
1032 | return err; | ||
1033 | } | ||
1034 | |||
1035 | static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain) | ||
1036 | { | ||
1037 | u32 domain = 0; | ||
1038 | |||
1039 | if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK) | ||
1040 | domain = NVGPU_CLK_DOMAIN_MCLK; | ||
1041 | else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK) | ||
1042 | domain = NVGPU_CLK_DOMAIN_GPCCLK; | ||
1043 | else | ||
1044 | domain = NVGPU_CLK_DOMAIN_MAX + 1; | ||
1045 | |||
1046 | return domain; | ||
1047 | } | ||
1048 | |||
1049 | static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g, | ||
1050 | struct gk20a_ctrl_priv *priv, | ||
1051 | struct nvgpu_gpu_clk_vf_points_args *args) | ||
1052 | { | ||
1053 | struct nvgpu_gpu_clk_vf_point clk_point; | ||
1054 | struct nvgpu_gpu_clk_vf_point __user *entry; | ||
1055 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1056 | u32 clk_domains = 0; | ||
1057 | int err; | ||
1058 | u16 last_mhz; | ||
1059 | u16 *fpoints; | ||
1060 | u32 i; | ||
1061 | u32 max_points = 0; | ||
1062 | u32 num_points = 0; | ||
1063 | u16 min_mhz; | ||
1064 | u16 max_mhz; | ||
1065 | |||
1066 | nvgpu_log_fn(g, " "); | ||
1067 | |||
1068 | if (!session || args->flags) | ||
1069 | return -EINVAL; | ||
1070 | |||
1071 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1072 | args->num_entries = 0; | ||
1073 | |||
1074 | if (!nvgpu_clk_arb_is_valid_domain(g, | ||
1075 | nvgpu_gpu_convert_clk_domain(args->clk_domain))) | ||
1076 | return -EINVAL; | ||
1077 | |||
1078 | err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, | ||
1079 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
1080 | &max_points, NULL); | ||
1081 | if (err) | ||
1082 | return err; | ||
1083 | |||
1084 | if (!args->max_entries) { | ||
1085 | args->max_entries = max_points; | ||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | if (args->max_entries < max_points) | ||
1090 | return -EINVAL; | ||
1091 | |||
1092 | err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
1093 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
1094 | &min_mhz, &max_mhz); | ||
1095 | if (err) | ||
1096 | return err; | ||
1097 | |||
1098 | fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16)); | ||
1099 | if (!fpoints) | ||
1100 | return -ENOMEM; | ||
1101 | |||
1102 | err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, | ||
1103 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
1104 | &max_points, fpoints); | ||
1105 | if (err) | ||
1106 | goto fail; | ||
1107 | |||
1108 | entry = (struct nvgpu_gpu_clk_vf_point __user *) | ||
1109 | (uintptr_t)args->clk_vf_point_entries; | ||
1110 | |||
1111 | last_mhz = 0; | ||
1112 | num_points = 0; | ||
1113 | for (i = 0; (i < max_points) && !err; i++) { | ||
1114 | |||
1115 | /* filter out duplicate frequencies */ | ||
1116 | if (fpoints[i] == last_mhz) | ||
1117 | continue; | ||
1118 | |||
1119 | /* filter out out-of-range frequencies */ | ||
1120 | if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz)) | ||
1121 | continue; | ||
1122 | |||
1123 | last_mhz = fpoints[i]; | ||
1124 | clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]); | ||
1125 | |||
1126 | err = copy_to_user((void __user *)entry, &clk_point, | ||
1127 | sizeof(clk_point)); | ||
1128 | |||
1129 | num_points++; | ||
1130 | entry++; | ||
1131 | } | ||
1132 | |||
1133 | args->num_entries = num_points; | ||
1134 | |||
1135 | fail: | ||
1136 | nvgpu_kfree(g, fpoints); | ||
1137 | return err; | ||
1138 | } | ||
1139 | |||
1140 | static int nvgpu_gpu_clk_get_range(struct gk20a *g, | ||
1141 | struct gk20a_ctrl_priv *priv, | ||
1142 | struct nvgpu_gpu_clk_range_args *args) | ||
1143 | { | ||
1144 | struct nvgpu_gpu_clk_range clk_range; | ||
1145 | struct nvgpu_gpu_clk_range __user *entry; | ||
1146 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1147 | |||
1148 | u32 clk_domains = 0; | ||
1149 | u32 num_domains; | ||
1150 | u32 num_entries; | ||
1151 | u32 i; | ||
1152 | int bit; | ||
1153 | int err; | ||
1154 | u16 min_mhz, max_mhz; | ||
1155 | |||
1156 | nvgpu_log_fn(g, " "); | ||
1157 | |||
1158 | if (!session) | ||
1159 | return -EINVAL; | ||
1160 | |||
1161 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1162 | num_domains = hweight_long(clk_domains); | ||
1163 | |||
1164 | if (!args->flags) { | ||
1165 | if (!args->num_entries) { | ||
1166 | args->num_entries = num_domains; | ||
1167 | return 0; | ||
1168 | } | ||
1169 | |||
1170 | if (args->num_entries < num_domains) | ||
1171 | return -EINVAL; | ||
1172 | |||
1173 | args->num_entries = 0; | ||
1174 | num_entries = num_domains; | ||
1175 | |||
1176 | } else { | ||
1177 | if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) | ||
1178 | return -EINVAL; | ||
1179 | |||
1180 | num_entries = args->num_entries; | ||
1181 | if (num_entries > num_domains) | ||
1182 | return -EINVAL; | ||
1183 | } | ||
1184 | |||
1185 | entry = (struct nvgpu_gpu_clk_range __user *) | ||
1186 | (uintptr_t)args->clk_range_entries; | ||
1187 | |||
1188 | for (i = 0; i < num_entries; i++, entry++) { | ||
1189 | |||
1190 | if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { | ||
1191 | if (copy_from_user(&clk_range, (void __user *)entry, | ||
1192 | sizeof(clk_range))) | ||
1193 | return -EFAULT; | ||
1194 | } else { | ||
1195 | bit = ffs(clk_domains) - 1; | ||
1196 | clk_range.clk_domain = bit; | ||
1197 | clk_domains &= ~BIT(bit); | ||
1198 | } | ||
1199 | |||
1200 | clk_range.flags = 0; | ||
1201 | err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
1202 | nvgpu_gpu_convert_clk_domain(clk_range.clk_domain), | ||
1203 | &min_mhz, &max_mhz); | ||
1204 | clk_range.min_hz = MHZ_TO_HZ(min_mhz); | ||
1205 | clk_range.max_hz = MHZ_TO_HZ(max_mhz); | ||
1206 | |||
1207 | if (err) | ||
1208 | return err; | ||
1209 | |||
1210 | err = copy_to_user(entry, &clk_range, sizeof(clk_range)); | ||
1211 | if (err) | ||
1212 | return -EFAULT; | ||
1213 | } | ||
1214 | |||
1215 | args->num_entries = num_entries; | ||
1216 | |||
1217 | return 0; | ||
1218 | } | ||
1219 | |||
1220 | static int nvgpu_gpu_clk_set_info(struct gk20a *g, | ||
1221 | struct gk20a_ctrl_priv *priv, | ||
1222 | struct nvgpu_gpu_clk_set_info_args *args) | ||
1223 | { | ||
1224 | struct nvgpu_gpu_clk_info clk_info; | ||
1225 | struct nvgpu_gpu_clk_info __user *entry; | ||
1226 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1227 | |||
1228 | int fd; | ||
1229 | u32 clk_domains = 0; | ||
1230 | u16 freq_mhz; | ||
1231 | int i; | ||
1232 | int ret; | ||
1233 | |||
1234 | nvgpu_log_fn(g, " "); | ||
1235 | |||
1236 | if (!session || args->flags) | ||
1237 | return -EINVAL; | ||
1238 | |||
1239 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1240 | if (!clk_domains) | ||
1241 | return -EINVAL; | ||
1242 | |||
1243 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
1244 | (uintptr_t)args->clk_info_entries; | ||
1245 | |||
1246 | for (i = 0; i < args->num_entries; i++, entry++) { | ||
1247 | |||
1248 | if (copy_from_user(&clk_info, entry, sizeof(clk_info))) | ||
1249 | return -EFAULT; | ||
1250 | |||
1251 | if (!nvgpu_clk_arb_is_valid_domain(g, | ||
1252 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) | ||
1253 | return -EINVAL; | ||
1254 | } | ||
1255 | nvgpu_speculation_barrier(); | ||
1256 | |||
1257 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
1258 | (uintptr_t)args->clk_info_entries; | ||
1259 | |||
1260 | ret = nvgpu_clk_arb_install_request_fd(g, session, &fd); | ||
1261 | if (ret < 0) | ||
1262 | return ret; | ||
1263 | |||
1264 | for (i = 0; i < args->num_entries; i++, entry++) { | ||
1265 | |||
1266 | if (copy_from_user(&clk_info, (void __user *)entry, | ||
1267 | sizeof(clk_info))) | ||
1268 | return -EFAULT; | ||
1269 | freq_mhz = HZ_TO_MHZ(clk_info.freq_hz); | ||
1270 | |||
1271 | nvgpu_clk_arb_set_session_target_mhz(session, fd, | ||
1272 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); | ||
1273 | } | ||
1274 | |||
1275 | nvgpu_speculation_barrier(); | ||
1276 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); | ||
1277 | if (ret < 0) | ||
1278 | return ret; | ||
1279 | |||
1280 | args->completion_fd = fd; | ||
1281 | |||
1282 | return ret; | ||
1283 | } | ||
1284 | |||
1285 | static int nvgpu_gpu_clk_get_info(struct gk20a *g, | ||
1286 | struct gk20a_ctrl_priv *priv, | ||
1287 | struct nvgpu_gpu_clk_get_info_args *args) | ||
1288 | { | ||
1289 | struct nvgpu_gpu_clk_info clk_info; | ||
1290 | struct nvgpu_gpu_clk_info __user *entry; | ||
1291 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1292 | u32 clk_domains = 0; | ||
1293 | u32 num_domains; | ||
1294 | u32 num_entries; | ||
1295 | u32 i; | ||
1296 | u16 freq_mhz; | ||
1297 | int err; | ||
1298 | int bit; | ||
1299 | |||
1300 | nvgpu_log_fn(g, " "); | ||
1301 | |||
1302 | if (!session) | ||
1303 | return -EINVAL; | ||
1304 | |||
1305 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1306 | num_domains = hweight_long(clk_domains); | ||
1307 | |||
1308 | if (!args->flags) { | ||
1309 | if (!args->num_entries) { | ||
1310 | args->num_entries = num_domains; | ||
1311 | return 0; | ||
1312 | } | ||
1313 | |||
1314 | if (args->num_entries < num_domains) | ||
1315 | return -EINVAL; | ||
1316 | |||
1317 | args->num_entries = 0; | ||
1318 | num_entries = num_domains; | ||
1319 | |||
1320 | } else { | ||
1321 | if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) | ||
1322 | return -EINVAL; | ||
1323 | |||
1324 | num_entries = args->num_entries; | ||
1325 | if (num_entries > num_domains * 3) | ||
1326 | return -EINVAL; | ||
1327 | } | ||
1328 | |||
1329 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
1330 | (uintptr_t)args->clk_info_entries; | ||
1331 | |||
1332 | for (i = 0; i < num_entries; i++, entry++) { | ||
1333 | |||
1334 | if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { | ||
1335 | if (copy_from_user(&clk_info, (void __user *)entry, | ||
1336 | sizeof(clk_info))) | ||
1337 | return -EFAULT; | ||
1338 | } else { | ||
1339 | bit = ffs(clk_domains) - 1; | ||
1340 | clk_info.clk_domain = bit; | ||
1341 | clk_domains &= ~BIT(bit); | ||
1342 | clk_info.clk_type = args->clk_type; | ||
1343 | } | ||
1344 | |||
1345 | nvgpu_speculation_barrier(); | ||
1346 | switch (clk_info.clk_type) { | ||
1347 | case NVGPU_GPU_CLK_TYPE_TARGET: | ||
1348 | err = nvgpu_clk_arb_get_session_target_mhz(session, | ||
1349 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
1350 | &freq_mhz); | ||
1351 | break; | ||
1352 | case NVGPU_GPU_CLK_TYPE_ACTUAL: | ||
1353 | err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, | ||
1354 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
1355 | &freq_mhz); | ||
1356 | break; | ||
1357 | case NVGPU_GPU_CLK_TYPE_EFFECTIVE: | ||
1358 | err = nvgpu_clk_arb_get_arbiter_effective_mhz(g, | ||
1359 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
1360 | &freq_mhz); | ||
1361 | break; | ||
1362 | default: | ||
1363 | freq_mhz = 0; | ||
1364 | err = -EINVAL; | ||
1365 | break; | ||
1366 | } | ||
1367 | if (err) | ||
1368 | return err; | ||
1369 | |||
1370 | clk_info.flags = 0; | ||
1371 | clk_info.freq_hz = MHZ_TO_HZ(freq_mhz); | ||
1372 | |||
1373 | err = copy_to_user((void __user *)entry, &clk_info, | ||
1374 | sizeof(clk_info)); | ||
1375 | if (err) | ||
1376 | return -EFAULT; | ||
1377 | } | ||
1378 | |||
1379 | nvgpu_speculation_barrier(); | ||
1380 | args->num_entries = num_entries; | ||
1381 | |||
1382 | return 0; | ||
1383 | } | ||
1384 | |||
1385 | static int nvgpu_gpu_get_event_fd(struct gk20a *g, | ||
1386 | struct gk20a_ctrl_priv *priv, | ||
1387 | struct nvgpu_gpu_get_event_fd_args *args) | ||
1388 | { | ||
1389 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1390 | |||
1391 | nvgpu_log_fn(g, " "); | ||
1392 | |||
1393 | if (!session) | ||
1394 | return -EINVAL; | ||
1395 | |||
1396 | return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd, | ||
1397 | args->flags); | ||
1398 | } | ||
1399 | |||
1400 | static int nvgpu_gpu_get_voltage(struct gk20a *g, | ||
1401 | struct nvgpu_gpu_get_voltage_args *args) | ||
1402 | { | ||
1403 | int err = -EINVAL; | ||
1404 | |||
1405 | nvgpu_log_fn(g, " "); | ||
1406 | |||
1407 | if (args->reserved) | ||
1408 | return -EINVAL; | ||
1409 | |||
1410 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE)) | ||
1411 | return -EINVAL; | ||
1412 | |||
1413 | err = gk20a_busy(g); | ||
1414 | if (err) | ||
1415 | return err; | ||
1416 | |||
1417 | nvgpu_speculation_barrier(); | ||
1418 | switch (args->which) { | ||
1419 | case NVGPU_GPU_VOLTAGE_CORE: | ||
1420 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); | ||
1421 | break; | ||
1422 | case NVGPU_GPU_VOLTAGE_SRAM: | ||
1423 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage); | ||
1424 | break; | ||
1425 | case NVGPU_GPU_VOLTAGE_BUS: | ||
1426 | err = pmgr_pwr_devices_get_voltage(g, &args->voltage); | ||
1427 | break; | ||
1428 | default: | ||
1429 | err = -EINVAL; | ||
1430 | } | ||
1431 | |||
1432 | gk20a_idle(g); | ||
1433 | |||
1434 | return err; | ||
1435 | } | ||
1436 | |||
1437 | static int nvgpu_gpu_get_current(struct gk20a *g, | ||
1438 | struct nvgpu_gpu_get_current_args *args) | ||
1439 | { | ||
1440 | int err; | ||
1441 | |||
1442 | nvgpu_log_fn(g, " "); | ||
1443 | |||
1444 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1445 | return -EINVAL; | ||
1446 | |||
1447 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT)) | ||
1448 | return -EINVAL; | ||
1449 | |||
1450 | err = gk20a_busy(g); | ||
1451 | if (err) | ||
1452 | return err; | ||
1453 | |||
1454 | err = pmgr_pwr_devices_get_current(g, &args->currnt); | ||
1455 | |||
1456 | gk20a_idle(g); | ||
1457 | |||
1458 | return err; | ||
1459 | } | ||
1460 | |||
1461 | static int nvgpu_gpu_get_power(struct gk20a *g, | ||
1462 | struct nvgpu_gpu_get_power_args *args) | ||
1463 | { | ||
1464 | int err; | ||
1465 | |||
1466 | nvgpu_log_fn(g, " "); | ||
1467 | |||
1468 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1469 | return -EINVAL; | ||
1470 | |||
1471 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER)) | ||
1472 | return -EINVAL; | ||
1473 | |||
1474 | err = gk20a_busy(g); | ||
1475 | if (err) | ||
1476 | return err; | ||
1477 | |||
1478 | err = pmgr_pwr_devices_get_power(g, &args->power); | ||
1479 | |||
1480 | gk20a_idle(g); | ||
1481 | |||
1482 | return err; | ||
1483 | } | ||
1484 | |||
1485 | static int nvgpu_gpu_get_temperature(struct gk20a *g, | ||
1486 | struct nvgpu_gpu_get_temperature_args *args) | ||
1487 | { | ||
1488 | int err; | ||
1489 | u32 temp_f24_8; | ||
1490 | |||
1491 | nvgpu_log_fn(g, " "); | ||
1492 | |||
1493 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1494 | return -EINVAL; | ||
1495 | |||
1496 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE)) | ||
1497 | return -EINVAL; | ||
1498 | |||
1499 | if (!g->ops.therm.get_internal_sensor_curr_temp) | ||
1500 | return -EINVAL; | ||
1501 | |||
1502 | err = gk20a_busy(g); | ||
1503 | if (err) | ||
1504 | return err; | ||
1505 | |||
1506 | err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8); | ||
1507 | |||
1508 | gk20a_idle(g); | ||
1509 | |||
1510 | args->temp_f24_8 = (s32)temp_f24_8; | ||
1511 | |||
1512 | return err; | ||
1513 | } | ||
1514 | |||
1515 | static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, | ||
1516 | struct nvgpu_gpu_set_therm_alert_limit_args *args) | ||
1517 | { | ||
1518 | int err; | ||
1519 | |||
1520 | nvgpu_log_fn(g, " "); | ||
1521 | |||
1522 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1523 | return -EINVAL; | ||
1524 | |||
1525 | if (!g->ops.therm.configure_therm_alert) | ||
1526 | return -EINVAL; | ||
1527 | |||
1528 | err = gk20a_busy(g); | ||
1529 | if (err) | ||
1530 | return err; | ||
1531 | |||
1532 | err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8); | ||
1533 | |||
1534 | gk20a_idle(g); | ||
1535 | |||
1536 | return err; | ||
1537 | } | ||
1538 | |||
1539 | static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch, | ||
1540 | u32 flags) | ||
1541 | { | ||
1542 | int err = 0; | ||
1543 | bool allow; | ||
1544 | bool disallow; | ||
1545 | |||
1546 | allow = flags & | ||
1547 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING; | ||
1548 | |||
1549 | disallow = flags & | ||
1550 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING; | ||
1551 | |||
1552 | /* Can't be both at the same time */ | ||
1553 | if (allow && disallow) | ||
1554 | return -EINVAL; | ||
1555 | |||
1556 | /* Nothing to do */ | ||
1557 | if (!allow && !disallow) | ||
1558 | return 0; | ||
1559 | |||
1560 | /* | ||
1561 | * Moving into explicit idle or back from it? A call that doesn't | ||
1562 | * change the status is a no-op. | ||
1563 | */ | ||
1564 | if (!ch->deterministic_railgate_allowed && | ||
1565 | allow) { | ||
1566 | gk20a_idle(ch->g); | ||
1567 | } else if (ch->deterministic_railgate_allowed && | ||
1568 | !allow) { | ||
1569 | err = gk20a_busy(ch->g); | ||
1570 | if (err) { | ||
1571 | nvgpu_warn(ch->g, | ||
1572 | "cannot busy to restore deterministic ch"); | ||
1573 | return err; | ||
1574 | } | ||
1575 | } | ||
1576 | ch->deterministic_railgate_allowed = allow; | ||
1577 | |||
1578 | return err; | ||
1579 | } | ||
1580 | |||
1581 | static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags) | ||
1582 | { | ||
1583 | if (!ch->deterministic) | ||
1584 | return -EINVAL; | ||
1585 | |||
1586 | return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags); | ||
1587 | } | ||
1588 | |||
1589 | static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, | ||
1590 | struct nvgpu_gpu_set_deterministic_opts_args *args) | ||
1591 | { | ||
1592 | int __user *user_channels; | ||
1593 | u32 i = 0; | ||
1594 | int err = 0; | ||
1595 | |||
1596 | nvgpu_log_fn(g, " "); | ||
1597 | |||
1598 | user_channels = (int __user *)(uintptr_t)args->channels; | ||
1599 | |||
1600 | /* Upper limit; prevent holding deterministic_busy for long */ | ||
1601 | if (args->num_channels > g->fifo.num_channels) { | ||
1602 | err = -EINVAL; | ||
1603 | goto out; | ||
1604 | } | ||
1605 | |||
1606 | /* Trivial sanity check first */ | ||
1607 | if (!access_ok(VERIFY_READ, user_channels, | ||
1608 | args->num_channels * sizeof(int))) { | ||
1609 | err = -EFAULT; | ||
1610 | goto out; | ||
1611 | } | ||
1612 | |||
1613 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1614 | |||
1615 | /* note: we exit at the first failure */ | ||
1616 | for (; i < args->num_channels; i++) { | ||
1617 | int ch_fd = 0; | ||
1618 | struct channel_gk20a *ch; | ||
1619 | |||
1620 | if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) { | ||
1621 | /* User raced with above access_ok */ | ||
1622 | err = -EFAULT; | ||
1623 | break; | ||
1624 | } | ||
1625 | |||
1626 | ch = gk20a_get_channel_from_file(ch_fd); | ||
1627 | if (!ch) { | ||
1628 | err = -EINVAL; | ||
1629 | break; | ||
1630 | } | ||
1631 | |||
1632 | err = nvgpu_gpu_set_deterministic_ch(ch, args->flags); | ||
1633 | |||
1634 | gk20a_channel_put(ch); | ||
1635 | |||
1636 | if (err) | ||
1637 | break; | ||
1638 | } | ||
1639 | |||
1640 | nvgpu_speculation_barrier(); | ||
1641 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1642 | |||
1643 | out: | ||
1644 | args->num_channels = i; | ||
1645 | return err; | ||
1646 | } | ||
1647 | |||
1648 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
1649 | { | ||
1650 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
1651 | struct gk20a *g = priv->g; | ||
1652 | struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; | ||
1653 | struct nvgpu_gpu_zcull_get_info_args *get_info_args; | ||
1654 | struct nvgpu_gpu_zbc_set_table_args *set_table_args; | ||
1655 | struct nvgpu_gpu_zbc_query_table_args *query_table_args; | ||
1656 | u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE]; | ||
1657 | struct gr_zcull_info *zcull_info; | ||
1658 | struct zbc_entry *zbc_val; | ||
1659 | struct zbc_query_params *zbc_tbl; | ||
1660 | int i, err = 0; | ||
1661 | |||
1662 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
1663 | |||
1664 | if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) || | ||
1665 | (_IOC_NR(cmd) == 0) || | ||
1666 | (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) || | ||
1667 | (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE)) | ||
1668 | return -EINVAL; | ||
1669 | |||
1670 | memset(buf, 0, sizeof(buf)); | ||
1671 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1672 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
1673 | return -EFAULT; | ||
1674 | } | ||
1675 | |||
1676 | if (!g->sw_ready) { | ||
1677 | err = gk20a_busy(g); | ||
1678 | if (err) | ||
1679 | return err; | ||
1680 | |||
1681 | gk20a_idle(g); | ||
1682 | } | ||
1683 | |||
1684 | nvgpu_speculation_barrier(); | ||
1685 | switch (cmd) { | ||
1686 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: | ||
1687 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; | ||
1688 | |||
1689 | get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); | ||
1690 | |||
1691 | break; | ||
1692 | case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: | ||
1693 | get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf; | ||
1694 | |||
1695 | memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args)); | ||
1696 | |||
1697 | zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info)); | ||
1698 | if (zcull_info == NULL) | ||
1699 | return -ENOMEM; | ||
1700 | |||
1701 | err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); | ||
1702 | if (err) { | ||
1703 | nvgpu_kfree(g, zcull_info); | ||
1704 | break; | ||
1705 | } | ||
1706 | |||
1707 | get_info_args->width_align_pixels = zcull_info->width_align_pixels; | ||
1708 | get_info_args->height_align_pixels = zcull_info->height_align_pixels; | ||
1709 | get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; | ||
1710 | get_info_args->aliquot_total = zcull_info->aliquot_total; | ||
1711 | get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; | ||
1712 | get_info_args->region_header_size = zcull_info->region_header_size; | ||
1713 | get_info_args->subregion_header_size = zcull_info->subregion_header_size; | ||
1714 | get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; | ||
1715 | get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; | ||
1716 | get_info_args->subregion_count = zcull_info->subregion_count; | ||
1717 | |||
1718 | nvgpu_kfree(g, zcull_info); | ||
1719 | break; | ||
1720 | case NVGPU_GPU_IOCTL_ZBC_SET_TABLE: | ||
1721 | set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf; | ||
1722 | |||
1723 | zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry)); | ||
1724 | if (zbc_val == NULL) | ||
1725 | return -ENOMEM; | ||
1726 | |||
1727 | zbc_val->format = set_table_args->format; | ||
1728 | zbc_val->type = set_table_args->type; | ||
1729 | |||
1730 | nvgpu_speculation_barrier(); | ||
1731 | switch (zbc_val->type) { | ||
1732 | case GK20A_ZBC_TYPE_COLOR: | ||
1733 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
1734 | zbc_val->color_ds[i] = set_table_args->color_ds[i]; | ||
1735 | zbc_val->color_l2[i] = set_table_args->color_l2[i]; | ||
1736 | } | ||
1737 | break; | ||
1738 | case GK20A_ZBC_TYPE_DEPTH: | ||
1739 | case T19X_ZBC: | ||
1740 | zbc_val->depth = set_table_args->depth; | ||
1741 | break; | ||
1742 | default: | ||
1743 | err = -EINVAL; | ||
1744 | } | ||
1745 | |||
1746 | if (!err) { | ||
1747 | err = gk20a_busy(g); | ||
1748 | if (!err) { | ||
1749 | err = g->ops.gr.zbc_set_table(g, &g->gr, | ||
1750 | zbc_val); | ||
1751 | gk20a_idle(g); | ||
1752 | } | ||
1753 | } | ||
1754 | |||
1755 | if (zbc_val) | ||
1756 | nvgpu_kfree(g, zbc_val); | ||
1757 | break; | ||
1758 | case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE: | ||
1759 | query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf; | ||
1760 | |||
1761 | zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params)); | ||
1762 | if (zbc_tbl == NULL) | ||
1763 | return -ENOMEM; | ||
1764 | |||
1765 | zbc_tbl->type = query_table_args->type; | ||
1766 | zbc_tbl->index_size = query_table_args->index_size; | ||
1767 | |||
1768 | err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl); | ||
1769 | |||
1770 | if (!err) { | ||
1771 | switch (zbc_tbl->type) { | ||
1772 | case GK20A_ZBC_TYPE_COLOR: | ||
1773 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
1774 | query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; | ||
1775 | query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; | ||
1776 | } | ||
1777 | break; | ||
1778 | case GK20A_ZBC_TYPE_DEPTH: | ||
1779 | case T19X_ZBC: | ||
1780 | query_table_args->depth = zbc_tbl->depth; | ||
1781 | break; | ||
1782 | case GK20A_ZBC_TYPE_INVALID: | ||
1783 | query_table_args->index_size = zbc_tbl->index_size; | ||
1784 | break; | ||
1785 | default: | ||
1786 | err = -EINVAL; | ||
1787 | } | ||
1788 | if (!err) { | ||
1789 | query_table_args->format = zbc_tbl->format; | ||
1790 | query_table_args->ref_cnt = zbc_tbl->ref_cnt; | ||
1791 | } | ||
1792 | } | ||
1793 | |||
1794 | if (zbc_tbl) | ||
1795 | nvgpu_kfree(g, zbc_tbl); | ||
1796 | break; | ||
1797 | |||
1798 | case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS: | ||
1799 | err = gk20a_ctrl_ioctl_gpu_characteristics( | ||
1800 | g, (struct nvgpu_gpu_get_characteristics *)buf); | ||
1801 | break; | ||
1802 | case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ: | ||
1803 | err = gk20a_ctrl_prepare_compressible_read(g, | ||
1804 | (struct nvgpu_gpu_prepare_compressible_read_args *)buf); | ||
1805 | break; | ||
1806 | case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE: | ||
1807 | err = gk20a_ctrl_mark_compressible_write(g, | ||
1808 | (struct nvgpu_gpu_mark_compressible_write_args *)buf); | ||
1809 | break; | ||
1810 | case NVGPU_GPU_IOCTL_ALLOC_AS: | ||
1811 | err = gk20a_ctrl_alloc_as(g, | ||
1812 | (struct nvgpu_alloc_as_args *)buf); | ||
1813 | break; | ||
1814 | case NVGPU_GPU_IOCTL_OPEN_TSG: | ||
1815 | err = gk20a_ctrl_open_tsg(g, | ||
1816 | (struct nvgpu_gpu_open_tsg_args *)buf); | ||
1817 | break; | ||
1818 | case NVGPU_GPU_IOCTL_GET_TPC_MASKS: | ||
1819 | err = gk20a_ctrl_get_tpc_masks(g, | ||
1820 | (struct nvgpu_gpu_get_tpc_masks_args *)buf); | ||
1821 | break; | ||
1822 | case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS: | ||
1823 | err = gk20a_ctrl_get_fbp_l2_masks(g, | ||
1824 | (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf); | ||
1825 | break; | ||
1826 | case NVGPU_GPU_IOCTL_OPEN_CHANNEL: | ||
1827 | /* this arg type here, but ..gpu_open_channel_args in nvgpu.h | ||
1828 | * for consistency - they are the same */ | ||
1829 | err = gk20a_channel_open_ioctl(g, | ||
1830 | (struct nvgpu_channel_open_args *)buf); | ||
1831 | break; | ||
1832 | case NVGPU_GPU_IOCTL_FLUSH_L2: | ||
1833 | err = nvgpu_gpu_ioctl_l2_fb_ops(g, | ||
1834 | (struct nvgpu_gpu_l2_fb_args *)buf); | ||
1835 | break; | ||
1836 | |||
1837 | case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: | ||
1838 | err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, | ||
1839 | (struct nvgpu_gpu_mmu_debug_mode_args *)buf); | ||
1840 | break; | ||
1841 | |||
1842 | case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: | ||
1843 | err = gr_gk20a_elpg_protected_call(g, | ||
1844 | nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); | ||
1845 | break; | ||
1846 | |||
1847 | case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND: | ||
1848 | err = nvgpu_gpu_ioctl_trigger_suspend(g); | ||
1849 | break; | ||
1850 | |||
1851 | case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: | ||
1852 | err = nvgpu_gpu_ioctl_wait_for_pause(g, | ||
1853 | (struct nvgpu_gpu_wait_pause_args *)buf); | ||
1854 | break; | ||
1855 | |||
1856 | case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE: | ||
1857 | err = nvgpu_gpu_ioctl_resume_from_pause(g); | ||
1858 | break; | ||
1859 | |||
1860 | case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS: | ||
1861 | err = nvgpu_gpu_ioctl_clear_sm_errors(g); | ||
1862 | break; | ||
1863 | |||
1864 | case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: | ||
1865 | err = nvgpu_gpu_ioctl_has_any_exception(g, | ||
1866 | (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); | ||
1867 | break; | ||
1868 | |||
1869 | case NVGPU_GPU_IOCTL_NUM_VSMS: | ||
1870 | err = gk20a_ctrl_get_num_vsms(g, | ||
1871 | (struct nvgpu_gpu_num_vsms *)buf); | ||
1872 | break; | ||
1873 | case NVGPU_GPU_IOCTL_VSMS_MAPPING: | ||
1874 | err = gk20a_ctrl_vsm_mapping(g, | ||
1875 | (struct nvgpu_gpu_vsms_mapping *)buf); | ||
1876 | break; | ||
1877 | |||
1878 | case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO: | ||
1879 | err = nvgpu_gpu_get_cpu_time_correlation_info(g, | ||
1880 | (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf); | ||
1881 | break; | ||
1882 | |||
1883 | case NVGPU_GPU_IOCTL_GET_GPU_TIME: | ||
1884 | err = nvgpu_gpu_get_gpu_time(g, | ||
1885 | (struct nvgpu_gpu_get_gpu_time_args *)buf); | ||
1886 | break; | ||
1887 | |||
1888 | case NVGPU_GPU_IOCTL_GET_ENGINE_INFO: | ||
1889 | err = nvgpu_gpu_get_engine_info(g, | ||
1890 | (struct nvgpu_gpu_get_engine_info_args *)buf); | ||
1891 | break; | ||
1892 | |||
1893 | case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: | ||
1894 | err = nvgpu_gpu_alloc_vidmem(g, | ||
1895 | (struct nvgpu_gpu_alloc_vidmem_args *)buf); | ||
1896 | break; | ||
1897 | |||
1898 | case NVGPU_GPU_IOCTL_GET_MEMORY_STATE: | ||
1899 | err = nvgpu_gpu_get_memory_state(g, | ||
1900 | (struct nvgpu_gpu_get_memory_state_args *)buf); | ||
1901 | break; | ||
1902 | |||
1903 | case NVGPU_GPU_IOCTL_CLK_GET_RANGE: | ||
1904 | err = nvgpu_gpu_clk_get_range(g, priv, | ||
1905 | (struct nvgpu_gpu_clk_range_args *)buf); | ||
1906 | break; | ||
1907 | |||
1908 | case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS: | ||
1909 | err = nvgpu_gpu_clk_get_vf_points(g, priv, | ||
1910 | (struct nvgpu_gpu_clk_vf_points_args *)buf); | ||
1911 | break; | ||
1912 | |||
1913 | case NVGPU_GPU_IOCTL_CLK_SET_INFO: | ||
1914 | err = nvgpu_gpu_clk_set_info(g, priv, | ||
1915 | (struct nvgpu_gpu_clk_set_info_args *)buf); | ||
1916 | break; | ||
1917 | |||
1918 | case NVGPU_GPU_IOCTL_CLK_GET_INFO: | ||
1919 | err = nvgpu_gpu_clk_get_info(g, priv, | ||
1920 | (struct nvgpu_gpu_clk_get_info_args *)buf); | ||
1921 | break; | ||
1922 | |||
1923 | case NVGPU_GPU_IOCTL_GET_EVENT_FD: | ||
1924 | err = nvgpu_gpu_get_event_fd(g, priv, | ||
1925 | (struct nvgpu_gpu_get_event_fd_args *)buf); | ||
1926 | break; | ||
1927 | |||
1928 | case NVGPU_GPU_IOCTL_GET_VOLTAGE: | ||
1929 | err = nvgpu_gpu_get_voltage(g, | ||
1930 | (struct nvgpu_gpu_get_voltage_args *)buf); | ||
1931 | break; | ||
1932 | |||
1933 | case NVGPU_GPU_IOCTL_GET_CURRENT: | ||
1934 | err = nvgpu_gpu_get_current(g, | ||
1935 | (struct nvgpu_gpu_get_current_args *)buf); | ||
1936 | break; | ||
1937 | |||
1938 | case NVGPU_GPU_IOCTL_GET_POWER: | ||
1939 | err = nvgpu_gpu_get_power(g, | ||
1940 | (struct nvgpu_gpu_get_power_args *)buf); | ||
1941 | break; | ||
1942 | |||
1943 | case NVGPU_GPU_IOCTL_GET_TEMPERATURE: | ||
1944 | err = nvgpu_gpu_get_temperature(g, | ||
1945 | (struct nvgpu_gpu_get_temperature_args *)buf); | ||
1946 | break; | ||
1947 | |||
1948 | case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT: | ||
1949 | err = nvgpu_gpu_set_therm_alert_limit(g, | ||
1950 | (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); | ||
1951 | break; | ||
1952 | |||
1953 | case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS: | ||
1954 | err = nvgpu_gpu_set_deterministic_opts(g, | ||
1955 | (struct nvgpu_gpu_set_deterministic_opts_args *)buf); | ||
1956 | break; | ||
1957 | |||
1958 | default: | ||
1959 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
1960 | err = -ENOTTY; | ||
1961 | break; | ||
1962 | } | ||
1963 | |||
1964 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
1965 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
1966 | |||
1967 | return err; | ||
1968 | } | ||
1969 | |||
1970 | static void usermode_vma_close(struct vm_area_struct *vma) | ||
1971 | { | ||
1972 | struct gk20a_ctrl_priv *priv = vma->vm_private_data; | ||
1973 | struct gk20a *g = priv->g; | ||
1974 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
1975 | |||
1976 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
1977 | priv->usermode_vma.vma = NULL; | ||
1978 | priv->usermode_vma.vma_mapped = false; | ||
1979 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
1980 | } | ||
1981 | |||
1982 | struct vm_operations_struct usermode_vma_ops = { | ||
1983 | /* no .open - we use VM_DONTCOPY and don't support fork */ | ||
1984 | .close = usermode_vma_close, | ||
1985 | }; | ||
1986 | |||
1987 | int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
1988 | { | ||
1989 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
1990 | struct gk20a *g = priv->g; | ||
1991 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
1992 | u64 addr; | ||
1993 | int err; | ||
1994 | |||
1995 | if (g->ops.fifo.usermode_base == NULL) | ||
1996 | return -ENOSYS; | ||
1997 | |||
1998 | if (priv->usermode_vma.vma != NULL) | ||
1999 | return -EBUSY; | ||
2000 | |||
2001 | if (vma->vm_end - vma->vm_start != SZ_4K) | ||
2002 | return -EINVAL; | ||
2003 | |||
2004 | if (vma->vm_pgoff != 0UL) | ||
2005 | return -EINVAL; | ||
2006 | |||
2007 | addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g); | ||
2008 | |||
2009 | /* Sync with poweron/poweroff, and require valid regs */ | ||
2010 | err = gk20a_busy(g); | ||
2011 | if (err) { | ||
2012 | return err; | ||
2013 | } | ||
2014 | |||
2015 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
2016 | |||
2017 | vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | | ||
2018 | VM_DONTDUMP | VM_PFNMAP; | ||
2019 | vma->vm_ops = &usermode_vma_ops; | ||
2020 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
2021 | |||
2022 | err = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, | ||
2023 | vma->vm_end - vma->vm_start, vma->vm_page_prot); | ||
2024 | if (!err) { | ||
2025 | priv->usermode_vma.vma = vma; | ||
2026 | priv->usermode_vma.flags = vma->vm_flags; | ||
2027 | vma->vm_private_data = priv; | ||
2028 | priv->usermode_vma.vma_mapped = true; | ||
2029 | } | ||
2030 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
2031 | |||
2032 | gk20a_idle(g); | ||
2033 | |||
2034 | return err; | ||
2035 | } | ||
2036 | |||
2037 | static void alter_usermode_mapping(struct gk20a *g, | ||
2038 | struct gk20a_ctrl_priv *priv, | ||
2039 | bool poweroff) | ||
2040 | { | ||
2041 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
2042 | struct vm_area_struct *vma = priv->usermode_vma.vma; | ||
2043 | bool vma_mapped = priv->usermode_vma.vma_mapped; | ||
2044 | u64 addr; | ||
2045 | int err; | ||
2046 | |||
2047 | if (!vma) { | ||
2048 | /* Nothing to do - no mmap called */ | ||
2049 | return; | ||
2050 | } | ||
2051 | |||
2052 | addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g); | ||
2053 | |||
2054 | down_write(&vma->vm_mm->mmap_sem); | ||
2055 | |||
2056 | /* | ||
2057 | * This is a no-op for the below cases | ||
2058 | * a) poweroff and !vma_mapped - > do nothing as no map exists | ||
2059 | * b) !poweroff and vmap_mapped -> do nothing as already mapped | ||
2060 | */ | ||
2061 | if (poweroff && vma_mapped) { | ||
2062 | err = zap_vma_ptes(vma, vma->vm_start, SZ_4K); | ||
2063 | if (err == 0) { | ||
2064 | vma->vm_flags = VM_NONE; | ||
2065 | priv->usermode_vma.vma_mapped = false; | ||
2066 | } else { | ||
2067 | nvgpu_err(g, "can't remove usermode mapping"); | ||
2068 | } | ||
2069 | } else if (!poweroff && !vma_mapped) { | ||
2070 | vma->vm_flags = priv->usermode_vma.flags; | ||
2071 | err = io_remap_pfn_range(vma, vma->vm_start, | ||
2072 | addr >> PAGE_SHIFT, | ||
2073 | SZ_4K, vma->vm_page_prot); | ||
2074 | if (err != 0) { | ||
2075 | nvgpu_err(g, "can't restore usermode mapping"); | ||
2076 | vma->vm_flags = VM_NONE; | ||
2077 | } else { | ||
2078 | priv->usermode_vma.vma_mapped = true; | ||
2079 | } | ||
2080 | } | ||
2081 | |||
2082 | up_write(&vma->vm_mm->mmap_sem); | ||
2083 | } | ||
2084 | |||
2085 | static void alter_usermode_mappings(struct gk20a *g, bool poweroff) | ||
2086 | { | ||
2087 | struct gk20a_ctrl_priv *priv; | ||
2088 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
2089 | |||
2090 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
2091 | nvgpu_list_for_each_entry(priv, &l->ctrl.privs, | ||
2092 | gk20a_ctrl_priv, list) { | ||
2093 | alter_usermode_mapping(g, priv, poweroff); | ||
2094 | } | ||
2095 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
2096 | } | ||
2097 | |||
2098 | void nvgpu_hide_usermode_for_poweroff(struct gk20a *g) | ||
2099 | { | ||
2100 | alter_usermode_mappings(g, true); | ||
2101 | } | ||
2102 | |||
2103 | void nvgpu_restore_usermode_for_poweron(struct gk20a *g) | ||
2104 | { | ||
2105 | alter_usermode_mappings(g, false); | ||
2106 | } | ||
diff --git a/include/os/linux/ioctl_ctrl.h b/include/os/linux/ioctl_ctrl.h new file mode 100644 index 0000000..3e1f798 --- /dev/null +++ b/include/os/linux/ioctl_ctrl.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_IOCTL_CTRL_H__ | ||
17 | #define __NVGPU_IOCTL_CTRL_H__ | ||
18 | |||
19 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); | ||
20 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); | ||
21 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
22 | int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma); | ||
23 | |||
24 | void nvgpu_hide_usermode_for_poweroff(struct gk20a *g); | ||
25 | void nvgpu_restore_usermode_for_poweron(struct gk20a *g); | ||
26 | |||
27 | #endif | ||
diff --git a/include/os/linux/ioctl_dbg.c b/include/os/linux/ioctl_dbg.c new file mode 100644 index 0000000..b5a1071 --- /dev/null +++ b/include/os/linux/ioctl_dbg.c | |||
@@ -0,0 +1,2210 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger/Profiler Driver | ||
3 | * | ||
4 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/cdev.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/dma-buf.h> | ||
24 | #include <linux/poll.h> | ||
25 | #include <uapi/linux/nvgpu.h> | ||
26 | |||
27 | #include <nvgpu/kmem.h> | ||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/vm.h> | ||
30 | #include <nvgpu/atomic.h> | ||
31 | #include <nvgpu/cond.h> | ||
32 | #include <nvgpu/utils.h> | ||
33 | #include <nvgpu/gk20a.h> | ||
34 | #include <nvgpu/channel.h> | ||
35 | #include <nvgpu/tsg.h> | ||
36 | |||
37 | #include <nvgpu/linux/vm.h> | ||
38 | |||
39 | #include "gk20a/gr_gk20a.h" | ||
40 | #include "gk20a/regops_gk20a.h" | ||
41 | #include "gk20a/dbg_gpu_gk20a.h" | ||
42 | #include "os_linux.h" | ||
43 | #include "platform_gk20a.h" | ||
44 | #include "ioctl_dbg.h" | ||
45 | #include "ioctl_channel.h" | ||
46 | #include "dmabuf_vidmem.h" | ||
47 | |||
48 | struct dbg_session_gk20a_linux { | ||
49 | struct device *dev; | ||
50 | struct dbg_session_gk20a dbg_s; | ||
51 | }; | ||
52 | |||
53 | struct dbg_session_channel_data_linux { | ||
54 | /* | ||
55 | * We have to keep a ref to the _file_, not the channel, because | ||
56 | * close(channel_fd) is synchronous and would deadlock if we had an | ||
57 | * open debug session fd holding a channel ref at that time. Holding a | ||
58 | * ref to the file makes close(channel_fd) just drop a kernel ref to | ||
59 | * the file; the channel will close when the last file ref is dropped. | ||
60 | */ | ||
61 | struct file *ch_f; | ||
62 | struct dbg_session_channel_data ch_data; | ||
63 | }; | ||
64 | /* turn seriously unwieldy names -> something shorter */ | ||
65 | #define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x | ||
66 | |||
67 | /* silly allocator - just increment id */ | ||
68 | static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); | ||
69 | static int generate_unique_id(void) | ||
70 | { | ||
71 | return nvgpu_atomic_add_return(1, &unique_id); | ||
72 | } | ||
73 | |||
74 | static int alloc_profiler(struct gk20a *g, | ||
75 | struct dbg_profiler_object_data **_prof) | ||
76 | { | ||
77 | struct dbg_profiler_object_data *prof; | ||
78 | *_prof = NULL; | ||
79 | |||
80 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
81 | |||
82 | prof = nvgpu_kzalloc(g, sizeof(*prof)); | ||
83 | if (!prof) | ||
84 | return -ENOMEM; | ||
85 | |||
86 | prof->prof_handle = generate_unique_id(); | ||
87 | *_prof = prof; | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux) | ||
92 | { | ||
93 | struct dbg_session_gk20a_linux *dbg_s_linux; | ||
94 | *_dbg_s_linux = NULL; | ||
95 | |||
96 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
97 | |||
98 | dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux)); | ||
99 | if (!dbg_s_linux) | ||
100 | return -ENOMEM; | ||
101 | |||
102 | dbg_s_linux->dbg_s.id = generate_unique_id(); | ||
103 | *_dbg_s_linux = dbg_s_linux; | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); | ||
108 | |||
109 | static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
110 | struct nvgpu_dbg_gpu_exec_reg_ops_args *args); | ||
111 | |||
112 | static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
113 | struct nvgpu_dbg_gpu_powergate_args *args); | ||
114 | |||
115 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
116 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); | ||
117 | |||
118 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
119 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); | ||
120 | |||
121 | static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode( | ||
122 | struct dbg_session_gk20a *dbg_s, | ||
123 | struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args); | ||
124 | |||
125 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | ||
126 | struct dbg_session_gk20a *dbg_s, | ||
127 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); | ||
128 | |||
129 | static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s, | ||
130 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); | ||
131 | |||
132 | static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux, | ||
133 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); | ||
134 | |||
135 | static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | ||
136 | struct nvgpu_dbg_gpu_profiler_reserve_args *args); | ||
137 | |||
138 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
139 | struct nvgpu_dbg_gpu_perfbuf_map_args *args); | ||
140 | |||
141 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
142 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); | ||
143 | |||
144 | static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | ||
145 | int timeout_mode); | ||
146 | |||
147 | static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, | ||
148 | u32 profiler_handle); | ||
149 | |||
150 | static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s); | ||
151 | |||
152 | static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s); | ||
153 | |||
154 | static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, | ||
155 | u32 profiler_handle); | ||
156 | |||
157 | static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s); | ||
158 | |||
159 | static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | ||
160 | struct file *filp, bool is_profiler); | ||
161 | |||
162 | static int nvgpu_set_sm_exception_type_mask_locked( | ||
163 | struct dbg_session_gk20a *dbg_s, | ||
164 | u32 exception_mask); | ||
165 | |||
166 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) | ||
167 | { | ||
168 | unsigned int mask = 0; | ||
169 | struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data; | ||
170 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
171 | struct gk20a *g = dbg_s->g; | ||
172 | |||
173 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
174 | |||
175 | poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait); | ||
176 | |||
177 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
178 | |||
179 | if (dbg_s->dbg_events.events_enabled && | ||
180 | dbg_s->dbg_events.num_pending_events > 0) { | ||
181 | nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d", | ||
182 | dbg_s->id); | ||
183 | nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", | ||
184 | dbg_s->dbg_events.num_pending_events); | ||
185 | mask = (POLLPRI | POLLIN); | ||
186 | } | ||
187 | |||
188 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
189 | |||
190 | return mask; | ||
191 | } | ||
192 | |||
193 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | ||
194 | { | ||
195 | struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data; | ||
196 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
197 | struct gk20a *g = dbg_s->g; | ||
198 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
199 | |||
200 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name); | ||
201 | |||
202 | /* unbind channels */ | ||
203 | dbg_unbind_all_channels_gk20a(dbg_s); | ||
204 | |||
205 | /* Powergate/Timeout enable is called here as possibility of dbg_session | ||
206 | * which called powergate/timeout disable ioctl, to be killed without | ||
207 | * calling powergate/timeout enable ioctl | ||
208 | */ | ||
209 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
210 | if (dbg_s->is_pg_disabled) { | ||
211 | nvgpu_set_powergate_locked(dbg_s, false); | ||
212 | } | ||
213 | nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); | ||
214 | |||
215 | /* If this session owned the perf buffer, release it */ | ||
216 | if (g->perfbuf.owner == dbg_s) | ||
217 | gk20a_perfbuf_release_locked(g, g->perfbuf.offset); | ||
218 | |||
219 | /* Per-context profiler objects were released when we called | ||
220 | * dbg_unbind_all_channels. We could still have global ones. | ||
221 | */ | ||
222 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
223 | dbg_profiler_object_data, prof_obj_entry) { | ||
224 | if (prof_obj->session_id == dbg_s->id) { | ||
225 | if (prof_obj->has_reservation) | ||
226 | g->ops.dbg_session_ops. | ||
227 | release_profiler_reservation(dbg_s, prof_obj); | ||
228 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
229 | nvgpu_kfree(g, prof_obj); | ||
230 | } | ||
231 | } | ||
232 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
233 | |||
234 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | ||
235 | nvgpu_mutex_destroy(&dbg_s->ioctl_lock); | ||
236 | |||
237 | nvgpu_kfree(g, dbg_session_linux); | ||
238 | gk20a_put(g); | ||
239 | |||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) | ||
244 | { | ||
245 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
246 | struct nvgpu_os_linux, prof.cdev); | ||
247 | struct gk20a *g = &l->g; | ||
248 | |||
249 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
250 | return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); | ||
251 | } | ||
252 | |||
253 | static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, | ||
254 | struct nvgpu_dbg_gpu_timeout_args *args) | ||
255 | { | ||
256 | int err; | ||
257 | struct gk20a *g = dbg_s->g; | ||
258 | |||
259 | nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable); | ||
260 | |||
261 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
262 | err = nvgpu_dbg_timeout_enable(dbg_s, args->enable); | ||
263 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
264 | |||
265 | return err; | ||
266 | } | ||
267 | |||
268 | static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | ||
269 | struct dbg_session_gk20a *dbg_s, | ||
270 | struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) | ||
271 | { | ||
272 | struct gk20a *g = dbg_s->g; | ||
273 | struct gr_gk20a *gr = &g->gr; | ||
274 | struct nvgpu_tsg_sm_error_state *sm_error_state; | ||
275 | struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; | ||
276 | struct channel_gk20a *ch; | ||
277 | struct tsg_gk20a *tsg; | ||
278 | u32 sm_id; | ||
279 | int err = 0; | ||
280 | |||
281 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
282 | if (ch == NULL) { | ||
283 | return -EINVAL; | ||
284 | } | ||
285 | |||
286 | tsg = tsg_gk20a_from_ch(ch); | ||
287 | if (tsg == NULL) { | ||
288 | nvgpu_err(g, "no valid tsg from ch"); | ||
289 | return -EINVAL; | ||
290 | } | ||
291 | |||
292 | sm_id = args->sm_id; | ||
293 | if (sm_id >= gr->no_of_sm) { | ||
294 | return -EINVAL; | ||
295 | } | ||
296 | |||
297 | if (tsg->sm_error_states == NULL) { | ||
298 | return -EINVAL; | ||
299 | } | ||
300 | |||
301 | nvgpu_speculation_barrier(); | ||
302 | |||
303 | sm_error_state = tsg->sm_error_states + sm_id; | ||
304 | sm_error_state_record.hww_global_esr = | ||
305 | sm_error_state->hww_global_esr; | ||
306 | sm_error_state_record.hww_warp_esr = | ||
307 | sm_error_state->hww_warp_esr; | ||
308 | sm_error_state_record.hww_warp_esr_pc = | ||
309 | sm_error_state->hww_warp_esr_pc; | ||
310 | sm_error_state_record.hww_global_esr_report_mask = | ||
311 | sm_error_state->hww_global_esr_report_mask; | ||
312 | sm_error_state_record.hww_warp_esr_report_mask = | ||
313 | sm_error_state->hww_warp_esr_report_mask; | ||
314 | |||
315 | if (args->sm_error_state_record_size > 0) { | ||
316 | size_t write_size = sizeof(*sm_error_state); | ||
317 | |||
318 | nvgpu_speculation_barrier(); | ||
319 | if (write_size > args->sm_error_state_record_size) | ||
320 | write_size = args->sm_error_state_record_size; | ||
321 | |||
322 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
323 | err = copy_to_user((void __user *)(uintptr_t) | ||
324 | args->sm_error_state_record_mem, | ||
325 | &sm_error_state_record, | ||
326 | write_size); | ||
327 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
328 | if (err != 0) { | ||
329 | nvgpu_err(g, "copy_to_user failed!"); | ||
330 | return err; | ||
331 | } | ||
332 | |||
333 | args->sm_error_state_record_size = write_size; | ||
334 | } | ||
335 | |||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | |||
340 | static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( | ||
341 | struct dbg_session_gk20a *dbg_s, | ||
342 | struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args) | ||
343 | { | ||
344 | struct gk20a *g = dbg_s->g; | ||
345 | |||
346 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
347 | |||
348 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
349 | |||
350 | dbg_s->broadcast_stop_trigger = (args->broadcast != 0); | ||
351 | |||
352 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
353 | |||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | ||
358 | int timeout_mode) | ||
359 | { | ||
360 | struct gk20a *g = dbg_s->g; | ||
361 | int err = 0; | ||
362 | |||
363 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", | ||
364 | timeout_mode); | ||
365 | |||
366 | nvgpu_speculation_barrier(); | ||
367 | switch (timeout_mode) { | ||
368 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: | ||
369 | if (dbg_s->is_timeout_disabled == true) | ||
370 | nvgpu_atomic_dec(&g->timeouts_disabled_refcount); | ||
371 | dbg_s->is_timeout_disabled = false; | ||
372 | break; | ||
373 | |||
374 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE: | ||
375 | if (dbg_s->is_timeout_disabled == false) | ||
376 | nvgpu_atomic_inc(&g->timeouts_disabled_refcount); | ||
377 | dbg_s->is_timeout_disabled = true; | ||
378 | break; | ||
379 | |||
380 | default: | ||
381 | nvgpu_err(g, | ||
382 | "unrecognized dbg gpu timeout mode : 0x%x", | ||
383 | timeout_mode); | ||
384 | err = -EINVAL; | ||
385 | break; | ||
386 | } | ||
387 | |||
388 | if (!err) | ||
389 | nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, " | ||
390 | "timeouts disabled refcount %d", | ||
391 | dbg_s->is_timeout_disabled ? "true" : "false", | ||
392 | nvgpu_atomic_read(&g->timeouts_disabled_refcount)); | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | ||
397 | struct file *filp, bool is_profiler) | ||
398 | { | ||
399 | struct nvgpu_os_linux *l; | ||
400 | struct dbg_session_gk20a_linux *dbg_session_linux; | ||
401 | struct dbg_session_gk20a *dbg_s; | ||
402 | struct gk20a *g; | ||
403 | |||
404 | struct device *dev; | ||
405 | |||
406 | int err; | ||
407 | |||
408 | if (!is_profiler) | ||
409 | l = container_of(inode->i_cdev, | ||
410 | struct nvgpu_os_linux, dbg.cdev); | ||
411 | else | ||
412 | l = container_of(inode->i_cdev, | ||
413 | struct nvgpu_os_linux, prof.cdev); | ||
414 | g = gk20a_get(&l->g); | ||
415 | if (!g) | ||
416 | return -ENODEV; | ||
417 | |||
418 | dev = dev_from_gk20a(g); | ||
419 | |||
420 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name); | ||
421 | |||
422 | err = alloc_session(g, &dbg_session_linux); | ||
423 | if (err) | ||
424 | goto free_ref; | ||
425 | |||
426 | dbg_s = &dbg_session_linux->dbg_s; | ||
427 | |||
428 | filp->private_data = dbg_session_linux; | ||
429 | dbg_session_linux->dev = dev; | ||
430 | dbg_s->g = g; | ||
431 | dbg_s->is_profiler = is_profiler; | ||
432 | dbg_s->is_pg_disabled = false; | ||
433 | dbg_s->is_timeout_disabled = false; | ||
434 | |||
435 | nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); | ||
436 | nvgpu_init_list_node(&dbg_s->ch_list); | ||
437 | err = nvgpu_mutex_init(&dbg_s->ch_list_lock); | ||
438 | if (err) | ||
439 | goto err_free_session; | ||
440 | err = nvgpu_mutex_init(&dbg_s->ioctl_lock); | ||
441 | if (err) | ||
442 | goto err_destroy_lock; | ||
443 | dbg_s->dbg_events.events_enabled = false; | ||
444 | dbg_s->dbg_events.num_pending_events = 0; | ||
445 | |||
446 | return 0; | ||
447 | |||
448 | err_destroy_lock: | ||
449 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | ||
450 | err_free_session: | ||
451 | nvgpu_kfree(g, dbg_session_linux); | ||
452 | free_ref: | ||
453 | gk20a_put(g); | ||
454 | return err; | ||
455 | } | ||
456 | |||
457 | void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) | ||
458 | { | ||
459 | nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue); | ||
460 | } | ||
461 | |||
462 | static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
463 | struct dbg_session_channel_data *ch_data) | ||
464 | { | ||
465 | struct gk20a *g = dbg_s->g; | ||
466 | u32 chid; | ||
467 | struct dbg_session_data *session_data; | ||
468 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
469 | struct dbg_session_channel_data_linux *ch_data_linux; | ||
470 | |||
471 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
472 | |||
473 | chid = ch_data->chid; | ||
474 | |||
475 | /* If there's a profiler ctx reservation record associated with this | ||
476 | * session/channel pair, release it. | ||
477 | */ | ||
478 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
479 | dbg_profiler_object_data, prof_obj_entry) { | ||
480 | if ((prof_obj->session_id == dbg_s->id) && | ||
481 | (prof_obj->ch->chid == chid)) { | ||
482 | if (prof_obj->has_reservation) { | ||
483 | g->ops.dbg_session_ops. | ||
484 | release_profiler_reservation(dbg_s, prof_obj); | ||
485 | } | ||
486 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
487 | nvgpu_kfree(g, prof_obj); | ||
488 | } | ||
489 | } | ||
490 | |||
491 | nvgpu_list_del(&ch_data->ch_entry); | ||
492 | |||
493 | session_data = ch_data->session_data; | ||
494 | nvgpu_list_del(&session_data->dbg_s_entry); | ||
495 | nvgpu_kfree(dbg_s->g, session_data); | ||
496 | |||
497 | ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux, | ||
498 | ch_data); | ||
499 | |||
500 | fput(ch_data_linux->ch_f); | ||
501 | nvgpu_kfree(dbg_s->g, ch_data_linux); | ||
502 | |||
503 | return 0; | ||
504 | } | ||
505 | |||
506 | static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
507 | struct nvgpu_dbg_gpu_bind_channel_args *args) | ||
508 | { | ||
509 | struct file *f; | ||
510 | struct gk20a *g = dbg_s->g; | ||
511 | struct channel_gk20a *ch; | ||
512 | struct dbg_session_channel_data_linux *ch_data_linux; | ||
513 | struct dbg_session_data *session_data; | ||
514 | int err = 0; | ||
515 | |||
516 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
517 | g->name, args->channel_fd); | ||
518 | |||
519 | /* | ||
520 | * Although gk20a_get_channel_from_file gives us a channel ref, need to | ||
521 | * hold a ref to the file during the session lifetime. See comment in | ||
522 | * struct dbg_session_channel_data. | ||
523 | */ | ||
524 | f = fget(args->channel_fd); | ||
525 | if (!f) | ||
526 | return -ENODEV; | ||
527 | |||
528 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
529 | if (!ch) { | ||
530 | nvgpu_log_fn(g, "no channel found for fd"); | ||
531 | err = -EINVAL; | ||
532 | goto out_fput; | ||
533 | } | ||
534 | |||
535 | nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid); | ||
536 | |||
537 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
538 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
539 | |||
540 | ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux)); | ||
541 | if (!ch_data_linux) { | ||
542 | err = -ENOMEM; | ||
543 | goto out_chput; | ||
544 | } | ||
545 | ch_data_linux->ch_f = f; | ||
546 | ch_data_linux->ch_data.channel_fd = args->channel_fd; | ||
547 | ch_data_linux->ch_data.chid = ch->chid; | ||
548 | ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a; | ||
549 | nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry); | ||
550 | |||
551 | session_data = nvgpu_kzalloc(g, sizeof(*session_data)); | ||
552 | if (!session_data) { | ||
553 | err = -ENOMEM; | ||
554 | goto out_kfree; | ||
555 | } | ||
556 | session_data->dbg_s = dbg_s; | ||
557 | nvgpu_init_list_node(&session_data->dbg_s_entry); | ||
558 | ch_data_linux->ch_data.session_data = session_data; | ||
559 | |||
560 | nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list); | ||
561 | |||
562 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
563 | nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list); | ||
564 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
565 | |||
566 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
567 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
568 | |||
569 | gk20a_channel_put(ch); | ||
570 | |||
571 | return 0; | ||
572 | |||
573 | out_kfree: | ||
574 | nvgpu_kfree(g, ch_data_linux); | ||
575 | out_chput: | ||
576 | gk20a_channel_put(ch); | ||
577 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
578 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
579 | out_fput: | ||
580 | fput(f); | ||
581 | return err; | ||
582 | } | ||
583 | |||
584 | static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s) | ||
585 | { | ||
586 | struct dbg_session_channel_data *ch_data, *tmp; | ||
587 | struct gk20a *g = dbg_s->g; | ||
588 | |||
589 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
590 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
591 | nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, | ||
592 | dbg_session_channel_data, ch_entry) | ||
593 | ch_data->unbind_single_channel(dbg_s, ch_data); | ||
594 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
595 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
596 | |||
597 | return 0; | ||
598 | } | ||
599 | |||
600 | /* | ||
601 | * Convert common regops op values of the form of NVGPU_DBG_REG_OP_* | ||
602 | * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* | ||
603 | */ | ||
604 | static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) | ||
605 | { | ||
606 | switch (regops_op) { | ||
607 | case REGOP(READ_32): | ||
608 | return REGOP_LINUX(READ_32); | ||
609 | case REGOP(WRITE_32): | ||
610 | return REGOP_LINUX(WRITE_32); | ||
611 | case REGOP(READ_64): | ||
612 | return REGOP_LINUX(READ_64); | ||
613 | case REGOP(WRITE_64): | ||
614 | return REGOP_LINUX(WRITE_64); | ||
615 | case REGOP(READ_08): | ||
616 | return REGOP_LINUX(READ_08); | ||
617 | case REGOP(WRITE_08): | ||
618 | return REGOP_LINUX(WRITE_08); | ||
619 | } | ||
620 | |||
621 | return regops_op; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* | ||
626 | * into common regops op values of the form of NVGPU_DBG_REG_OP_* | ||
627 | */ | ||
628 | static u32 nvgpu_get_regops_op_values_common(u32 regops_op) | ||
629 | { | ||
630 | switch (regops_op) { | ||
631 | case REGOP_LINUX(READ_32): | ||
632 | return REGOP(READ_32); | ||
633 | case REGOP_LINUX(WRITE_32): | ||
634 | return REGOP(WRITE_32); | ||
635 | case REGOP_LINUX(READ_64): | ||
636 | return REGOP(READ_64); | ||
637 | case REGOP_LINUX(WRITE_64): | ||
638 | return REGOP(WRITE_64); | ||
639 | case REGOP_LINUX(READ_08): | ||
640 | return REGOP(READ_08); | ||
641 | case REGOP_LINUX(WRITE_08): | ||
642 | return REGOP(WRITE_08); | ||
643 | } | ||
644 | |||
645 | return regops_op; | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* | ||
650 | * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* | ||
651 | */ | ||
652 | static u32 nvgpu_get_regops_type_values_linux(u32 regops_type) | ||
653 | { | ||
654 | switch (regops_type) { | ||
655 | case REGOP(TYPE_GLOBAL): | ||
656 | return REGOP_LINUX(TYPE_GLOBAL); | ||
657 | case REGOP(TYPE_GR_CTX): | ||
658 | return REGOP_LINUX(TYPE_GR_CTX); | ||
659 | case REGOP(TYPE_GR_CTX_TPC): | ||
660 | return REGOP_LINUX(TYPE_GR_CTX_TPC); | ||
661 | case REGOP(TYPE_GR_CTX_SM): | ||
662 | return REGOP_LINUX(TYPE_GR_CTX_SM); | ||
663 | case REGOP(TYPE_GR_CTX_CROP): | ||
664 | return REGOP_LINUX(TYPE_GR_CTX_CROP); | ||
665 | case REGOP(TYPE_GR_CTX_ZROP): | ||
666 | return REGOP_LINUX(TYPE_GR_CTX_ZROP); | ||
667 | case REGOP(TYPE_GR_CTX_QUAD): | ||
668 | return REGOP_LINUX(TYPE_GR_CTX_QUAD); | ||
669 | } | ||
670 | |||
671 | return regops_type; | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* | ||
676 | * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* | ||
677 | */ | ||
678 | static u32 nvgpu_get_regops_type_values_common(u32 regops_type) | ||
679 | { | ||
680 | switch (regops_type) { | ||
681 | case REGOP_LINUX(TYPE_GLOBAL): | ||
682 | return REGOP(TYPE_GLOBAL); | ||
683 | case REGOP_LINUX(TYPE_GR_CTX): | ||
684 | return REGOP(TYPE_GR_CTX); | ||
685 | case REGOP_LINUX(TYPE_GR_CTX_TPC): | ||
686 | return REGOP(TYPE_GR_CTX_TPC); | ||
687 | case REGOP_LINUX(TYPE_GR_CTX_SM): | ||
688 | return REGOP(TYPE_GR_CTX_SM); | ||
689 | case REGOP_LINUX(TYPE_GR_CTX_CROP): | ||
690 | return REGOP(TYPE_GR_CTX_CROP); | ||
691 | case REGOP_LINUX(TYPE_GR_CTX_ZROP): | ||
692 | return REGOP(TYPE_GR_CTX_ZROP); | ||
693 | case REGOP_LINUX(TYPE_GR_CTX_QUAD): | ||
694 | return REGOP(TYPE_GR_CTX_QUAD); | ||
695 | } | ||
696 | |||
697 | return regops_type; | ||
698 | } | ||
699 | |||
700 | /* | ||
701 | * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_* | ||
702 | * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* | ||
703 | */ | ||
704 | static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) | ||
705 | { | ||
706 | switch (regops_status) { | ||
707 | case REGOP(STATUS_SUCCESS): | ||
708 | return REGOP_LINUX(STATUS_SUCCESS); | ||
709 | case REGOP(STATUS_INVALID_OP): | ||
710 | return REGOP_LINUX(STATUS_INVALID_OP); | ||
711 | case REGOP(STATUS_INVALID_TYPE): | ||
712 | return REGOP_LINUX(STATUS_INVALID_TYPE); | ||
713 | case REGOP(STATUS_INVALID_OFFSET): | ||
714 | return REGOP_LINUX(STATUS_INVALID_OFFSET); | ||
715 | case REGOP(STATUS_UNSUPPORTED_OP): | ||
716 | return REGOP_LINUX(STATUS_UNSUPPORTED_OP); | ||
717 | case REGOP(STATUS_INVALID_MASK ): | ||
718 | return REGOP_LINUX(STATUS_INVALID_MASK); | ||
719 | } | ||
720 | |||
721 | return regops_status; | ||
722 | } | ||
723 | |||
724 | /* | ||
725 | * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* | ||
726 | * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_* | ||
727 | */ | ||
728 | static u32 nvgpu_get_regops_status_values_common(u32 regops_status) | ||
729 | { | ||
730 | switch (regops_status) { | ||
731 | case REGOP_LINUX(STATUS_SUCCESS): | ||
732 | return REGOP(STATUS_SUCCESS); | ||
733 | case REGOP_LINUX(STATUS_INVALID_OP): | ||
734 | return REGOP(STATUS_INVALID_OP); | ||
735 | case REGOP_LINUX(STATUS_INVALID_TYPE): | ||
736 | return REGOP(STATUS_INVALID_TYPE); | ||
737 | case REGOP_LINUX(STATUS_INVALID_OFFSET): | ||
738 | return REGOP(STATUS_INVALID_OFFSET); | ||
739 | case REGOP_LINUX(STATUS_UNSUPPORTED_OP): | ||
740 | return REGOP(STATUS_UNSUPPORTED_OP); | ||
741 | case REGOP_LINUX(STATUS_INVALID_MASK ): | ||
742 | return REGOP(STATUS_INVALID_MASK); | ||
743 | } | ||
744 | |||
745 | return regops_status; | ||
746 | } | ||
747 | |||
748 | static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in, | ||
749 | struct nvgpu_dbg_reg_op *out, u32 num_ops) | ||
750 | { | ||
751 | u32 i; | ||
752 | |||
753 | if(in == NULL || out == NULL) | ||
754 | return -ENOMEM; | ||
755 | |||
756 | for (i = 0; i < num_ops; i++) { | ||
757 | out[i].op = nvgpu_get_regops_op_values_common(in[i].op); | ||
758 | out[i].type = nvgpu_get_regops_type_values_common(in[i].type); | ||
759 | out[i].status = nvgpu_get_regops_status_values_common(in[i].status); | ||
760 | out[i].quad = in[i].quad; | ||
761 | out[i].group_mask = in[i].group_mask; | ||
762 | out[i].sub_group_mask = in[i].sub_group_mask; | ||
763 | out[i].offset = in[i].offset; | ||
764 | out[i].value_lo = in[i].value_lo; | ||
765 | out[i].value_hi = in[i].value_hi; | ||
766 | out[i].and_n_mask_lo = in[i].and_n_mask_lo; | ||
767 | out[i].and_n_mask_hi = in[i].and_n_mask_hi; | ||
768 | } | ||
769 | |||
770 | return 0; | ||
771 | } | ||
772 | |||
773 | static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in, | ||
774 | struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops) | ||
775 | { | ||
776 | u32 i; | ||
777 | |||
778 | if(in == NULL || out == NULL) | ||
779 | return -ENOMEM; | ||
780 | |||
781 | for (i = 0; i < num_ops; i++) { | ||
782 | out[i].op = nvgpu_get_regops_op_values_linux(in[i].op); | ||
783 | out[i].type = nvgpu_get_regops_type_values_linux(in[i].type); | ||
784 | out[i].status = nvgpu_get_regops_status_values_linux(in[i].status); | ||
785 | out[i].quad = in[i].quad; | ||
786 | out[i].group_mask = in[i].group_mask; | ||
787 | out[i].sub_group_mask = in[i].sub_group_mask; | ||
788 | out[i].offset = in[i].offset; | ||
789 | out[i].value_lo = in[i].value_lo; | ||
790 | out[i].value_hi = in[i].value_hi; | ||
791 | out[i].and_n_mask_lo = in[i].and_n_mask_lo; | ||
792 | out[i].and_n_mask_hi = in[i].and_n_mask_hi; | ||
793 | } | ||
794 | |||
795 | return 0; | ||
796 | } | ||
797 | |||
798 | static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
799 | struct nvgpu_dbg_gpu_exec_reg_ops_args *args) | ||
800 | { | ||
801 | int err = 0, powergate_err = 0; | ||
802 | bool is_pg_disabled = false; | ||
803 | |||
804 | struct gk20a *g = dbg_s->g; | ||
805 | struct channel_gk20a *ch; | ||
806 | |||
807 | bool is_current_ctx; | ||
808 | |||
809 | |||
810 | nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); | ||
811 | |||
812 | if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) { | ||
813 | nvgpu_err(g, "regops limit exceeded"); | ||
814 | return -EINVAL; | ||
815 | } | ||
816 | |||
817 | if (args->num_ops == 0) { | ||
818 | /* Nothing to do */ | ||
819 | return 0; | ||
820 | } | ||
821 | |||
822 | if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { | ||
823 | nvgpu_err(g, "reg ops work buffer not allocated"); | ||
824 | return -ENODEV; | ||
825 | } | ||
826 | |||
827 | if (!dbg_s->id) { | ||
828 | nvgpu_err(g, "can't call reg_ops on an unbound debugger session"); | ||
829 | return -EINVAL; | ||
830 | } | ||
831 | |||
832 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
833 | if (!dbg_s->is_profiler && !ch) { | ||
834 | nvgpu_err(g, "bind a channel before regops for a debugging session"); | ||
835 | return -EINVAL; | ||
836 | } | ||
837 | |||
838 | /* since exec_reg_ops sends methods to the ucode, it must take the | ||
839 | * global gpu lock to protect against mixing methods from debug sessions | ||
840 | * on other channels */ | ||
841 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
842 | |||
843 | if (!dbg_s->is_pg_disabled && !g->is_virtual) { | ||
844 | /* In the virtual case, the server will handle | ||
845 | * disabling/enabling powergating when processing reg ops | ||
846 | */ | ||
847 | powergate_err = nvgpu_set_powergate_locked(dbg_s, true); | ||
848 | if (!powergate_err) { | ||
849 | is_pg_disabled = true; | ||
850 | } | ||
851 | } | ||
852 | |||
853 | if (!powergate_err) { | ||
854 | u64 ops_offset = 0; /* index offset */ | ||
855 | |||
856 | struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL; | ||
857 | |||
858 | linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops * | ||
859 | sizeof(struct nvgpu_dbg_gpu_reg_op)); | ||
860 | |||
861 | if (!linux_fragment) | ||
862 | return -ENOMEM; | ||
863 | |||
864 | while (ops_offset < args->num_ops && !err) { | ||
865 | const u64 num_ops = | ||
866 | min(args->num_ops - ops_offset, | ||
867 | (u64)(g->dbg_regops_tmp_buf_ops)); | ||
868 | const u64 fragment_size = | ||
869 | num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op); | ||
870 | |||
871 | void __user *const fragment = | ||
872 | (void __user *)(uintptr_t) | ||
873 | (args->ops + | ||
874 | ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op)); | ||
875 | |||
876 | nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu", | ||
877 | ops_offset, num_ops); | ||
878 | |||
879 | nvgpu_log_fn(g, "Copying regops from userspace"); | ||
880 | |||
881 | if (copy_from_user(linux_fragment, | ||
882 | fragment, fragment_size)) { | ||
883 | nvgpu_err(g, "copy_from_user failed!"); | ||
884 | err = -EFAULT; | ||
885 | break; | ||
886 | } | ||
887 | |||
888 | err = nvgpu_get_regops_data_common(linux_fragment, | ||
889 | g->dbg_regops_tmp_buf, num_ops); | ||
890 | |||
891 | if (err) | ||
892 | break; | ||
893 | |||
894 | err = g->ops.regops.exec_regops( | ||
895 | dbg_s, g->dbg_regops_tmp_buf, num_ops, &is_current_ctx); | ||
896 | |||
897 | if (err) { | ||
898 | break; | ||
899 | } | ||
900 | |||
901 | if (ops_offset == 0) { | ||
902 | args->gr_ctx_resident = is_current_ctx; | ||
903 | } | ||
904 | |||
905 | err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf, | ||
906 | linux_fragment, num_ops); | ||
907 | |||
908 | if (err) | ||
909 | break; | ||
910 | |||
911 | nvgpu_log_fn(g, "Copying result to userspace"); | ||
912 | |||
913 | if (copy_to_user(fragment, linux_fragment, | ||
914 | fragment_size)) { | ||
915 | nvgpu_err(g, "copy_to_user failed!"); | ||
916 | err = -EFAULT; | ||
917 | break; | ||
918 | } | ||
919 | |||
920 | ops_offset += num_ops; | ||
921 | } | ||
922 | |||
923 | nvgpu_speculation_barrier(); | ||
924 | nvgpu_kfree(g, linux_fragment); | ||
925 | |||
926 | /* enable powergate, if previously disabled */ | ||
927 | if (is_pg_disabled) { | ||
928 | powergate_err = nvgpu_set_powergate_locked(dbg_s, | ||
929 | false); | ||
930 | } | ||
931 | } | ||
932 | |||
933 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
934 | |||
935 | if (!err && powergate_err) | ||
936 | err = powergate_err; | ||
937 | |||
938 | if (err) | ||
939 | nvgpu_err(g, "dbg regops failed"); | ||
940 | |||
941 | return err; | ||
942 | } | ||
943 | |||
944 | static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
945 | struct nvgpu_dbg_gpu_powergate_args *args) | ||
946 | { | ||
947 | int err; | ||
948 | struct gk20a *g = dbg_s->g; | ||
949 | nvgpu_log_fn(g, "%s powergate mode = %d", | ||
950 | g->name, args->mode); | ||
951 | |||
952 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
953 | if ((args->mode != NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) && | ||
954 | (args->mode != NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE)) { | ||
955 | nvgpu_err(g, "invalid powergate mode"); | ||
956 | err = -EINVAL; | ||
957 | goto pg_err_end; | ||
958 | } | ||
959 | |||
960 | err = nvgpu_set_powergate_locked(dbg_s, | ||
961 | args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE); | ||
962 | pg_err_end: | ||
963 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
964 | return err; | ||
965 | } | ||
966 | |||
967 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
968 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) | ||
969 | { | ||
970 | int err; | ||
971 | struct gk20a *g = dbg_s->g; | ||
972 | struct channel_gk20a *ch_gk20a; | ||
973 | |||
974 | nvgpu_log_fn(g, "%s smpc ctxsw mode = %d", | ||
975 | g->name, args->mode); | ||
976 | |||
977 | err = gk20a_busy(g); | ||
978 | if (err) { | ||
979 | nvgpu_err(g, "failed to poweron"); | ||
980 | return err; | ||
981 | } | ||
982 | |||
983 | /* Take the global lock, since we'll be doing global regops */ | ||
984 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
985 | |||
986 | ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
987 | if (!ch_gk20a) { | ||
988 | nvgpu_err(g, | ||
989 | "no bound channel for smpc ctxsw mode update"); | ||
990 | err = -EINVAL; | ||
991 | goto clean_up; | ||
992 | } | ||
993 | |||
994 | err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a, | ||
995 | args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); | ||
996 | if (err) { | ||
997 | nvgpu_err(g, | ||
998 | "error (%d) during smpc ctxsw mode update", err); | ||
999 | } | ||
1000 | |||
1001 | clean_up: | ||
1002 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1003 | gk20a_idle(g); | ||
1004 | return err; | ||
1005 | } | ||
1006 | |||
1007 | /* | ||
1008 | * Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_* | ||
1009 | * into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_* | ||
1010 | */ | ||
1011 | |||
1012 | static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) | ||
1013 | { | ||
1014 | nvgpu_speculation_barrier(); | ||
1015 | switch (mode){ | ||
1016 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: | ||
1017 | return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; | ||
1018 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW: | ||
1019 | return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW; | ||
1020 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: | ||
1021 | return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW; | ||
1022 | } | ||
1023 | |||
1024 | return mode; | ||
1025 | } | ||
1026 | |||
1027 | |||
1028 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
1029 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) | ||
1030 | { | ||
1031 | int err; | ||
1032 | struct gk20a *g = dbg_s->g; | ||
1033 | struct channel_gk20a *ch_gk20a; | ||
1034 | u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode); | ||
1035 | |||
1036 | nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); | ||
1037 | |||
1038 | /* Must have a valid reservation to enable/disable hwpm cxtsw. | ||
1039 | * Just print an error message for now, but eventually this should | ||
1040 | * return an error, at the point where all client sw has been | ||
1041 | * cleaned up. | ||
1042 | */ | ||
1043 | if (!dbg_s->has_profiler_reservation) { | ||
1044 | nvgpu_err(g, | ||
1045 | "session doesn't have a valid reservation"); | ||
1046 | } | ||
1047 | |||
1048 | err = gk20a_busy(g); | ||
1049 | if (err) { | ||
1050 | nvgpu_err(g, "failed to poweron"); | ||
1051 | return err; | ||
1052 | } | ||
1053 | |||
1054 | /* Take the global lock, since we'll be doing global regops */ | ||
1055 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1056 | |||
1057 | ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1058 | if (!ch_gk20a) { | ||
1059 | nvgpu_err(g, | ||
1060 | "no bound channel for pm ctxsw mode update"); | ||
1061 | err = -EINVAL; | ||
1062 | goto clean_up; | ||
1063 | } | ||
1064 | if (g->dbg_powergating_disabled_refcount == 0) { | ||
1065 | nvgpu_err(g, "powergate is not disabled"); | ||
1066 | err = -ENOSYS; | ||
1067 | goto clean_up; | ||
1068 | } | ||
1069 | err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, | ||
1070 | mode); | ||
1071 | |||
1072 | if (err) | ||
1073 | nvgpu_err(g, | ||
1074 | "error (%d) during pm ctxsw mode update", err); | ||
1075 | /* gk20a would require a WAR to set the core PM_ENABLE bit, not | ||
1076 | * added here with gk20a being deprecated | ||
1077 | */ | ||
1078 | clean_up: | ||
1079 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1080 | gk20a_idle(g); | ||
1081 | return err; | ||
1082 | } | ||
1083 | |||
1084 | static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode( | ||
1085 | struct dbg_session_gk20a *dbg_s, | ||
1086 | struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args) | ||
1087 | { | ||
1088 | int err; | ||
1089 | struct gk20a *g = dbg_s->g; | ||
1090 | struct channel_gk20a *ch; | ||
1091 | bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED); | ||
1092 | |||
1093 | nvgpu_log_fn(g, "mode=%u", args->mode); | ||
1094 | |||
1095 | if (args->reserved != 0U) { | ||
1096 | return -EINVAL; | ||
1097 | } | ||
1098 | |||
1099 | if ((g->ops.fb.set_mmu_debug_mode == NULL) && | ||
1100 | (g->ops.gr.set_mmu_debug_mode == NULL)) { | ||
1101 | return -ENOSYS; | ||
1102 | } | ||
1103 | |||
1104 | err = gk20a_busy(g); | ||
1105 | if (err) { | ||
1106 | nvgpu_err(g, "failed to poweron"); | ||
1107 | return err; | ||
1108 | } | ||
1109 | |||
1110 | /* Take the global lock, since we'll be doing global regops */ | ||
1111 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1112 | |||
1113 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1114 | if (!ch) { | ||
1115 | nvgpu_err(g, "no bound channel for mmu debug mode"); | ||
1116 | err = -EINVAL; | ||
1117 | goto clean_up; | ||
1118 | } | ||
1119 | |||
1120 | err = nvgpu_tsg_set_mmu_debug_mode(ch, enable); | ||
1121 | if (err) { | ||
1122 | nvgpu_err(g, "set mmu debug mode failed, err=%d", err); | ||
1123 | } | ||
1124 | |||
1125 | clean_up: | ||
1126 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1127 | gk20a_idle(g); | ||
1128 | return err; | ||
1129 | } | ||
1130 | |||
1131 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | ||
1132 | struct dbg_session_gk20a *dbg_s, | ||
1133 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) | ||
1134 | { | ||
1135 | struct gk20a *g = dbg_s->g; | ||
1136 | struct channel_gk20a *ch; | ||
1137 | int err = 0, action = args->mode; | ||
1138 | |||
1139 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode); | ||
1140 | |||
1141 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1142 | if (!ch) | ||
1143 | return -EINVAL; | ||
1144 | |||
1145 | err = gk20a_busy(g); | ||
1146 | if (err) { | ||
1147 | nvgpu_err(g, "failed to poweron"); | ||
1148 | return err; | ||
1149 | } | ||
1150 | |||
1151 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1152 | |||
1153 | /* Suspend GPU context switching */ | ||
1154 | err = gr_gk20a_disable_ctxsw(g); | ||
1155 | if (err) { | ||
1156 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
1157 | /* this should probably be ctx-fatal... */ | ||
1158 | goto clean_up; | ||
1159 | } | ||
1160 | |||
1161 | nvgpu_speculation_barrier(); | ||
1162 | switch (action) { | ||
1163 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: | ||
1164 | gr_gk20a_suspend_context(ch); | ||
1165 | break; | ||
1166 | |||
1167 | case NVGPU_DBG_GPU_RESUME_ALL_SMS: | ||
1168 | gr_gk20a_resume_context(ch); | ||
1169 | break; | ||
1170 | } | ||
1171 | |||
1172 | err = gr_gk20a_enable_ctxsw(g); | ||
1173 | if (err) | ||
1174 | nvgpu_err(g, "unable to restart ctxsw!"); | ||
1175 | |||
1176 | clean_up: | ||
1177 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1178 | gk20a_idle(g); | ||
1179 | |||
1180 | return err; | ||
1181 | } | ||
1182 | |||
1183 | static int nvgpu_ioctl_allocate_profiler_object( | ||
1184 | struct dbg_session_gk20a_linux *dbg_session_linux, | ||
1185 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) | ||
1186 | { | ||
1187 | int err = 0; | ||
1188 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
1189 | struct gk20a *g = get_gk20a(dbg_session_linux->dev); | ||
1190 | struct dbg_profiler_object_data *prof_obj; | ||
1191 | |||
1192 | nvgpu_log_fn(g, "%s", g->name); | ||
1193 | |||
1194 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1195 | |||
1196 | err = alloc_profiler(g, &prof_obj); | ||
1197 | if (err) | ||
1198 | goto clean_up; | ||
1199 | |||
1200 | prof_obj->session_id = dbg_s->id; | ||
1201 | |||
1202 | if (dbg_s->is_profiler) | ||
1203 | prof_obj->ch = NULL; | ||
1204 | else { | ||
1205 | prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1206 | if (prof_obj->ch == NULL) { | ||
1207 | nvgpu_err(g, | ||
1208 | "bind a channel for dbg session"); | ||
1209 | nvgpu_kfree(g, prof_obj); | ||
1210 | err = -EINVAL; | ||
1211 | goto clean_up; | ||
1212 | } | ||
1213 | } | ||
1214 | |||
1215 | /* Return handle to client */ | ||
1216 | args->profiler_handle = prof_obj->prof_handle; | ||
1217 | |||
1218 | nvgpu_init_list_node(&prof_obj->prof_obj_entry); | ||
1219 | |||
1220 | nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects); | ||
1221 | clean_up: | ||
1222 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1223 | return err; | ||
1224 | } | ||
1225 | |||
1226 | static int nvgpu_ioctl_free_profiler_object( | ||
1227 | struct dbg_session_gk20a_linux *dbg_s_linux, | ||
1228 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) | ||
1229 | { | ||
1230 | int err = 0; | ||
1231 | struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; | ||
1232 | struct gk20a *g = get_gk20a(dbg_s_linux->dev); | ||
1233 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
1234 | bool obj_found = false; | ||
1235 | |||
1236 | nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x", | ||
1237 | g->name, dbg_s->id, args->profiler_handle); | ||
1238 | |||
1239 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1240 | |||
1241 | /* Remove profiler object from the list, if a match is found */ | ||
1242 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
1243 | dbg_profiler_object_data, prof_obj_entry) { | ||
1244 | if (prof_obj->prof_handle == args->profiler_handle) { | ||
1245 | if (prof_obj->session_id != dbg_s->id) { | ||
1246 | nvgpu_err(g, | ||
1247 | "invalid handle %x", | ||
1248 | args->profiler_handle); | ||
1249 | err = -EINVAL; | ||
1250 | break; | ||
1251 | } | ||
1252 | if (prof_obj->has_reservation) | ||
1253 | g->ops.dbg_session_ops. | ||
1254 | release_profiler_reservation(dbg_s, prof_obj); | ||
1255 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
1256 | nvgpu_kfree(g, prof_obj); | ||
1257 | obj_found = true; | ||
1258 | break; | ||
1259 | } | ||
1260 | } | ||
1261 | if (!obj_found) { | ||
1262 | nvgpu_err(g, "profiler %x not found", | ||
1263 | args->profiler_handle); | ||
1264 | err = -EINVAL; | ||
1265 | } | ||
1266 | |||
1267 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1268 | return err; | ||
1269 | } | ||
1270 | |||
1271 | static struct dbg_profiler_object_data *find_matching_prof_obj( | ||
1272 | struct dbg_session_gk20a *dbg_s, | ||
1273 | u32 profiler_handle) | ||
1274 | { | ||
1275 | struct gk20a *g = dbg_s->g; | ||
1276 | struct dbg_profiler_object_data *prof_obj; | ||
1277 | |||
1278 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
1279 | dbg_profiler_object_data, prof_obj_entry) { | ||
1280 | if (prof_obj->prof_handle == profiler_handle) { | ||
1281 | if (prof_obj->session_id != dbg_s->id) { | ||
1282 | nvgpu_err(g, | ||
1283 | "invalid handle %x", | ||
1284 | profiler_handle); | ||
1285 | return NULL; | ||
1286 | } | ||
1287 | return prof_obj; | ||
1288 | } | ||
1289 | } | ||
1290 | return NULL; | ||
1291 | } | ||
1292 | |||
1293 | /* used in scenarios where the debugger session can take just the inter-session | ||
1294 | * lock for performance, but the profiler session must take the per-gpu lock | ||
1295 | * since it might not have an associated channel. */ | ||
1296 | static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s) | ||
1297 | { | ||
1298 | struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1299 | |||
1300 | if (dbg_s->is_profiler || !ch) | ||
1301 | nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock); | ||
1302 | else | ||
1303 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
1304 | } | ||
1305 | |||
1306 | static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s) | ||
1307 | { | ||
1308 | struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1309 | |||
1310 | if (dbg_s->is_profiler || !ch) | ||
1311 | nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock); | ||
1312 | else | ||
1313 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
1314 | } | ||
1315 | |||
1316 | static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) | ||
1317 | { | ||
1318 | struct gk20a *g = dbg_s->g; | ||
1319 | |||
1320 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1321 | |||
1322 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
1323 | |||
1324 | dbg_s->dbg_events.events_enabled = true; | ||
1325 | dbg_s->dbg_events.num_pending_events = 0; | ||
1326 | |||
1327 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
1328 | } | ||
1329 | |||
1330 | static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) | ||
1331 | { | ||
1332 | struct gk20a *g = dbg_s->g; | ||
1333 | |||
1334 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1335 | |||
1336 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
1337 | |||
1338 | dbg_s->dbg_events.events_enabled = false; | ||
1339 | dbg_s->dbg_events.num_pending_events = 0; | ||
1340 | |||
1341 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
1342 | } | ||
1343 | |||
1344 | static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) | ||
1345 | { | ||
1346 | struct gk20a *g = dbg_s->g; | ||
1347 | |||
1348 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1349 | |||
1350 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
1351 | |||
1352 | if (dbg_s->dbg_events.events_enabled && | ||
1353 | dbg_s->dbg_events.num_pending_events > 0) | ||
1354 | dbg_s->dbg_events.num_pending_events--; | ||
1355 | |||
1356 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
1357 | } | ||
1358 | |||
1359 | |||
1360 | static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | ||
1361 | struct nvgpu_dbg_gpu_events_ctrl_args *args) | ||
1362 | { | ||
1363 | int ret = 0; | ||
1364 | struct channel_gk20a *ch; | ||
1365 | struct gk20a *g = dbg_s->g; | ||
1366 | |||
1367 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); | ||
1368 | |||
1369 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1370 | if (!ch) { | ||
1371 | nvgpu_err(g, "no channel bound to dbg session"); | ||
1372 | return -EINVAL; | ||
1373 | } | ||
1374 | |||
1375 | nvgpu_speculation_barrier(); | ||
1376 | switch (args->cmd) { | ||
1377 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | ||
1378 | gk20a_dbg_gpu_events_enable(dbg_s); | ||
1379 | break; | ||
1380 | |||
1381 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: | ||
1382 | gk20a_dbg_gpu_events_disable(dbg_s); | ||
1383 | break; | ||
1384 | |||
1385 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: | ||
1386 | gk20a_dbg_gpu_events_clear(dbg_s); | ||
1387 | break; | ||
1388 | |||
1389 | default: | ||
1390 | nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x", | ||
1391 | args->cmd); | ||
1392 | ret = -EINVAL; | ||
1393 | break; | ||
1394 | } | ||
1395 | |||
1396 | return ret; | ||
1397 | } | ||
1398 | |||
1399 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
1400 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | ||
1401 | { | ||
1402 | struct gk20a *g = dbg_s->g; | ||
1403 | struct mm_gk20a *mm = &g->mm; | ||
1404 | int err; | ||
1405 | u32 virt_size; | ||
1406 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
1407 | |||
1408 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1409 | |||
1410 | if (g->perfbuf.owner) { | ||
1411 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1412 | return -EBUSY; | ||
1413 | } | ||
1414 | |||
1415 | mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size, | ||
1416 | big_page_size << 10, | ||
1417 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
1418 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
1419 | false, false, "perfbuf"); | ||
1420 | if (!mm->perfbuf.vm) { | ||
1421 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1422 | return -ENOMEM; | ||
1423 | } | ||
1424 | |||
1425 | err = nvgpu_vm_map_buffer(mm->perfbuf.vm, | ||
1426 | args->dmabuf_fd, | ||
1427 | &args->offset, | ||
1428 | 0, | ||
1429 | SZ_4K, | ||
1430 | 0, | ||
1431 | 0, | ||
1432 | 0, | ||
1433 | 0, | ||
1434 | NULL); | ||
1435 | if (err) | ||
1436 | goto err_remove_vm; | ||
1437 | |||
1438 | /* perf output buffer may not cross a 4GB boundary */ | ||
1439 | virt_size = u64_lo32(args->mapping_size); | ||
1440 | if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size - 1)) { | ||
1441 | err = -EINVAL; | ||
1442 | goto err_unmap; | ||
1443 | } | ||
1444 | |||
1445 | err = g->ops.dbg_session_ops.perfbuffer_enable(g, | ||
1446 | args->offset, virt_size); | ||
1447 | if (err) | ||
1448 | goto err_unmap; | ||
1449 | |||
1450 | g->perfbuf.owner = dbg_s; | ||
1451 | g->perfbuf.offset = args->offset; | ||
1452 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1453 | |||
1454 | return 0; | ||
1455 | |||
1456 | err_unmap: | ||
1457 | nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); | ||
1458 | err_remove_vm: | ||
1459 | nvgpu_vm_put(mm->perfbuf.vm); | ||
1460 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1461 | return err; | ||
1462 | } | ||
1463 | |||
1464 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
1465 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | ||
1466 | { | ||
1467 | struct gk20a *g = dbg_s->g; | ||
1468 | int err; | ||
1469 | |||
1470 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1471 | if ((g->perfbuf.owner != dbg_s) || | ||
1472 | (g->perfbuf.offset != args->offset)) { | ||
1473 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1474 | return -EINVAL; | ||
1475 | } | ||
1476 | |||
1477 | err = gk20a_perfbuf_release_locked(g, args->offset); | ||
1478 | |||
1479 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1480 | |||
1481 | return err; | ||
1482 | } | ||
1483 | |||
1484 | static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, | ||
1485 | struct nvgpu_dbg_gpu_pc_sampling_args *args) | ||
1486 | { | ||
1487 | struct channel_gk20a *ch; | ||
1488 | struct gk20a *g = dbg_s->g; | ||
1489 | |||
1490 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1491 | if (!ch) | ||
1492 | return -EINVAL; | ||
1493 | |||
1494 | nvgpu_log_fn(g, " "); | ||
1495 | |||
1496 | return g->ops.gr.update_pc_sampling ? | ||
1497 | g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; | ||
1498 | } | ||
1499 | |||
1500 | static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( | ||
1501 | struct dbg_session_gk20a *dbg_s, | ||
1502 | struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) | ||
1503 | { | ||
1504 | struct gk20a *g = dbg_s->g; | ||
1505 | struct gr_gk20a *gr = &g->gr; | ||
1506 | u32 sm_id; | ||
1507 | struct channel_gk20a *ch; | ||
1508 | int err = 0; | ||
1509 | |||
1510 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1511 | if (ch == NULL) { | ||
1512 | return -EINVAL; | ||
1513 | } | ||
1514 | |||
1515 | sm_id = args->sm_id; | ||
1516 | if (sm_id >= gr->no_of_sm) | ||
1517 | return -EINVAL; | ||
1518 | |||
1519 | nvgpu_speculation_barrier(); | ||
1520 | |||
1521 | err = gk20a_busy(g); | ||
1522 | if (err != 0) { | ||
1523 | return err; | ||
1524 | } | ||
1525 | |||
1526 | err = gr_gk20a_elpg_protected_call(g, | ||
1527 | g->ops.gr.clear_sm_error_state(g, ch, sm_id)); | ||
1528 | |||
1529 | gk20a_idle(g); | ||
1530 | |||
1531 | return err; | ||
1532 | } | ||
1533 | |||
1534 | static int | ||
1535 | nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, | ||
1536 | struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args) | ||
1537 | { | ||
1538 | struct gk20a *g = dbg_s->g; | ||
1539 | int err = 0; | ||
1540 | int ctx_resident_ch_fd = -1; | ||
1541 | |||
1542 | err = gk20a_busy(g); | ||
1543 | if (err) | ||
1544 | return err; | ||
1545 | |||
1546 | nvgpu_speculation_barrier(); | ||
1547 | switch (args->action) { | ||
1548 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: | ||
1549 | err = g->ops.gr.suspend_contexts(g, dbg_s, | ||
1550 | &ctx_resident_ch_fd); | ||
1551 | break; | ||
1552 | |||
1553 | case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS: | ||
1554 | err = g->ops.gr.resume_contexts(g, dbg_s, | ||
1555 | &ctx_resident_ch_fd); | ||
1556 | break; | ||
1557 | } | ||
1558 | |||
1559 | if (ctx_resident_ch_fd < 0) { | ||
1560 | args->is_resident_context = 0; | ||
1561 | } else { | ||
1562 | args->is_resident_context = 1; | ||
1563 | args->resident_context_fd = ctx_resident_ch_fd; | ||
1564 | } | ||
1565 | |||
1566 | gk20a_idle(g); | ||
1567 | |||
1568 | return err; | ||
1569 | } | ||
1570 | |||
1571 | static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | ||
1572 | struct nvgpu_dbg_gpu_access_fb_memory_args *args) | ||
1573 | { | ||
1574 | struct gk20a *g = dbg_s->g; | ||
1575 | struct dma_buf *dmabuf; | ||
1576 | void __user *user_buffer = (void __user *)(uintptr_t)args->buffer; | ||
1577 | void *buffer; | ||
1578 | u64 size, access_size, offset; | ||
1579 | u64 access_limit_size = SZ_4K; | ||
1580 | int err = 0; | ||
1581 | |||
1582 | if ((args->offset & 3) || (!args->size) || (args->size & 3)) | ||
1583 | return -EINVAL; | ||
1584 | |||
1585 | dmabuf = dma_buf_get(args->dmabuf_fd); | ||
1586 | if (IS_ERR(dmabuf)) | ||
1587 | return -EINVAL; | ||
1588 | |||
1589 | if ((args->offset > dmabuf->size) || | ||
1590 | (args->size > dmabuf->size) || | ||
1591 | (args->offset + args->size > dmabuf->size)) { | ||
1592 | err = -EINVAL; | ||
1593 | goto fail_dmabuf_put; | ||
1594 | } | ||
1595 | |||
1596 | buffer = nvgpu_big_zalloc(g, access_limit_size); | ||
1597 | if (!buffer) { | ||
1598 | err = -ENOMEM; | ||
1599 | goto fail_dmabuf_put; | ||
1600 | } | ||
1601 | |||
1602 | size = args->size; | ||
1603 | offset = 0; | ||
1604 | |||
1605 | err = gk20a_busy(g); | ||
1606 | if (err) | ||
1607 | goto fail_free_buffer; | ||
1608 | |||
1609 | while (size) { | ||
1610 | /* Max access size of access_limit_size in one loop */ | ||
1611 | access_size = min(access_limit_size, size); | ||
1612 | |||
1613 | if (args->cmd == | ||
1614 | NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) { | ||
1615 | err = copy_from_user(buffer, user_buffer + offset, | ||
1616 | access_size); | ||
1617 | if (err) | ||
1618 | goto fail_idle; | ||
1619 | } | ||
1620 | |||
1621 | err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer, | ||
1622 | args->offset + offset, access_size, | ||
1623 | args->cmd); | ||
1624 | if (err) | ||
1625 | goto fail_idle; | ||
1626 | |||
1627 | if (args->cmd == | ||
1628 | NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) { | ||
1629 | err = copy_to_user(user_buffer + offset, | ||
1630 | buffer, access_size); | ||
1631 | if (err) | ||
1632 | goto fail_idle; | ||
1633 | } | ||
1634 | |||
1635 | size -= access_size; | ||
1636 | offset += access_size; | ||
1637 | } | ||
1638 | nvgpu_speculation_barrier(); | ||
1639 | |||
1640 | fail_idle: | ||
1641 | gk20a_idle(g); | ||
1642 | fail_free_buffer: | ||
1643 | nvgpu_big_free(g, buffer); | ||
1644 | fail_dmabuf_put: | ||
1645 | dma_buf_put(dmabuf); | ||
1646 | |||
1647 | return err; | ||
1648 | } | ||
1649 | |||
1650 | static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | ||
1651 | struct nvgpu_dbg_gpu_profiler_reserve_args *args) | ||
1652 | { | ||
1653 | if (args->acquire) | ||
1654 | return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle); | ||
1655 | |||
1656 | return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); | ||
1657 | } | ||
1658 | |||
1659 | static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, | ||
1660 | struct nvgpu_dbg_gpu_timeout_args *args) | ||
1661 | { | ||
1662 | bool status; | ||
1663 | struct gk20a *g = dbg_s->g; | ||
1664 | |||
1665 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1666 | status = nvgpu_is_timeouts_enabled(g); | ||
1667 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1668 | |||
1669 | if (status) | ||
1670 | args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE; | ||
1671 | else | ||
1672 | args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; | ||
1673 | } | ||
1674 | |||
1675 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | ||
1676 | { | ||
1677 | struct mm_gk20a *mm = &g->mm; | ||
1678 | struct vm_gk20a *vm = mm->perfbuf.vm; | ||
1679 | int err; | ||
1680 | |||
1681 | err = g->ops.dbg_session_ops.perfbuffer_disable(g); | ||
1682 | |||
1683 | nvgpu_vm_unmap(vm, offset, NULL); | ||
1684 | nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); | ||
1685 | nvgpu_vm_put(vm); | ||
1686 | |||
1687 | g->perfbuf.owner = NULL; | ||
1688 | g->perfbuf.offset = 0; | ||
1689 | return err; | ||
1690 | } | ||
1691 | |||
1692 | static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, | ||
1693 | u32 profiler_handle) | ||
1694 | { | ||
1695 | struct gk20a *g = dbg_s->g; | ||
1696 | struct dbg_profiler_object_data *prof_obj; | ||
1697 | int err = 0; | ||
1698 | |||
1699 | nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); | ||
1700 | |||
1701 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1702 | |||
1703 | /* Find matching object. */ | ||
1704 | prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); | ||
1705 | |||
1706 | if (!prof_obj) { | ||
1707 | nvgpu_err(g, "object not found"); | ||
1708 | err = -EINVAL; | ||
1709 | goto exit; | ||
1710 | } | ||
1711 | |||
1712 | if (prof_obj->has_reservation) | ||
1713 | g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj); | ||
1714 | else { | ||
1715 | nvgpu_err(g, "No reservation found"); | ||
1716 | err = -EINVAL; | ||
1717 | goto exit; | ||
1718 | } | ||
1719 | exit: | ||
1720 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1721 | return err; | ||
1722 | } | ||
1723 | |||
1724 | static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, | ||
1725 | u32 profiler_handle) | ||
1726 | { | ||
1727 | struct gk20a *g = dbg_s->g; | ||
1728 | struct dbg_profiler_object_data *prof_obj, *my_prof_obj; | ||
1729 | int err = 0; | ||
1730 | struct tsg_gk20a *tsg; | ||
1731 | |||
1732 | nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); | ||
1733 | |||
1734 | if (g->profiler_reservation_count < 0) { | ||
1735 | nvgpu_err(g, "Negative reservation count!"); | ||
1736 | return -EINVAL; | ||
1737 | } | ||
1738 | |||
1739 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1740 | |||
1741 | /* Find matching object. */ | ||
1742 | my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); | ||
1743 | |||
1744 | if (!my_prof_obj) { | ||
1745 | nvgpu_err(g, "object not found"); | ||
1746 | err = -EINVAL; | ||
1747 | goto exit; | ||
1748 | } | ||
1749 | |||
1750 | /* If we already have the reservation, we're done */ | ||
1751 | if (my_prof_obj->has_reservation) { | ||
1752 | err = 0; | ||
1753 | goto exit; | ||
1754 | } | ||
1755 | |||
1756 | if (my_prof_obj->ch == NULL) { | ||
1757 | /* Global reservations are only allowed if there are no other | ||
1758 | * global or per-context reservations currently held | ||
1759 | */ | ||
1760 | if (!g->ops.dbg_session_ops.check_and_set_global_reservation( | ||
1761 | dbg_s, my_prof_obj)) { | ||
1762 | nvgpu_err(g, | ||
1763 | "global reserve: have existing reservation"); | ||
1764 | err = -EBUSY; | ||
1765 | } | ||
1766 | } else if (g->global_profiler_reservation_held) { | ||
1767 | /* If there's a global reservation, | ||
1768 | * we can't take a per-context one. | ||
1769 | */ | ||
1770 | nvgpu_err(g, | ||
1771 | "per-ctxt reserve: global reservation in effect"); | ||
1772 | err = -EBUSY; | ||
1773 | } else if ((tsg = tsg_gk20a_from_ch(my_prof_obj->ch)) != NULL) { | ||
1774 | /* TSG: check that another channel in the TSG | ||
1775 | * doesn't already have the reservation | ||
1776 | */ | ||
1777 | u32 my_tsgid = tsg->tsgid; | ||
1778 | |||
1779 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
1780 | dbg_profiler_object_data, prof_obj_entry) { | ||
1781 | if (prof_obj->has_reservation && | ||
1782 | (prof_obj->ch->tsgid == my_tsgid)) { | ||
1783 | nvgpu_err(g, | ||
1784 | "per-ctxt reserve (tsg): already reserved"); | ||
1785 | err = -EBUSY; | ||
1786 | goto exit; | ||
1787 | } | ||
1788 | } | ||
1789 | |||
1790 | if (!g->ops.dbg_session_ops.check_and_set_context_reservation( | ||
1791 | dbg_s, my_prof_obj)) { | ||
1792 | /* Another guest OS has the global reservation */ | ||
1793 | nvgpu_err(g, | ||
1794 | "per-ctxt reserve: global reservation in effect"); | ||
1795 | err = -EBUSY; | ||
1796 | } | ||
1797 | } else { | ||
1798 | /* channel: check that some other profiler object doesn't | ||
1799 | * already have the reservation. | ||
1800 | */ | ||
1801 | struct channel_gk20a *my_ch = my_prof_obj->ch; | ||
1802 | |||
1803 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
1804 | dbg_profiler_object_data, prof_obj_entry) { | ||
1805 | if (prof_obj->has_reservation && | ||
1806 | (prof_obj->ch == my_ch)) { | ||
1807 | nvgpu_err(g, | ||
1808 | "per-ctxt reserve (ch): already reserved"); | ||
1809 | err = -EBUSY; | ||
1810 | goto exit; | ||
1811 | } | ||
1812 | } | ||
1813 | |||
1814 | if (!g->ops.dbg_session_ops.check_and_set_context_reservation( | ||
1815 | dbg_s, my_prof_obj)) { | ||
1816 | /* Another guest OS has the global reservation */ | ||
1817 | nvgpu_err(g, | ||
1818 | "per-ctxt reserve: global reservation in effect"); | ||
1819 | err = -EBUSY; | ||
1820 | } | ||
1821 | } | ||
1822 | exit: | ||
1823 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1824 | return err; | ||
1825 | } | ||
1826 | |||
1827 | static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
1828 | struct nvgpu_dbg_gpu_unbind_channel_args *args) | ||
1829 | { | ||
1830 | struct dbg_session_channel_data *ch_data; | ||
1831 | struct gk20a *g = dbg_s->g; | ||
1832 | bool channel_found = false; | ||
1833 | struct channel_gk20a *ch; | ||
1834 | int err; | ||
1835 | |||
1836 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
1837 | g->name, args->channel_fd); | ||
1838 | |||
1839 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
1840 | if (!ch) { | ||
1841 | nvgpu_log_fn(g, "no channel found for fd"); | ||
1842 | return -EINVAL; | ||
1843 | } | ||
1844 | |||
1845 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
1846 | nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, | ||
1847 | dbg_session_channel_data, ch_entry) { | ||
1848 | if (ch->chid == ch_data->chid) { | ||
1849 | channel_found = true; | ||
1850 | break; | ||
1851 | } | ||
1852 | } | ||
1853 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
1854 | |||
1855 | if (!channel_found) { | ||
1856 | nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd); | ||
1857 | err = -EINVAL; | ||
1858 | goto out; | ||
1859 | } | ||
1860 | |||
1861 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1862 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
1863 | err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data); | ||
1864 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
1865 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1866 | |||
1867 | out: | ||
1868 | gk20a_channel_put(ch); | ||
1869 | return err; | ||
1870 | } | ||
1871 | |||
1872 | static int nvgpu_set_sm_exception_type_mask_locked( | ||
1873 | struct dbg_session_gk20a *dbg_s, | ||
1874 | u32 exception_mask) | ||
1875 | { | ||
1876 | struct gk20a *g = dbg_s->g; | ||
1877 | int err = 0; | ||
1878 | struct channel_gk20a *ch = NULL; | ||
1879 | |||
1880 | /* | ||
1881 | * Obtain the fisrt channel from the channel list in | ||
1882 | * dbg_session, find the context associated with channel | ||
1883 | * and set the sm_mask_type to that context | ||
1884 | */ | ||
1885 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1886 | if (ch != NULL) { | ||
1887 | struct tsg_gk20a *tsg; | ||
1888 | |||
1889 | tsg = tsg_gk20a_from_ch(ch); | ||
1890 | if (tsg != NULL) { | ||
1891 | tsg->sm_exception_mask_type = exception_mask; | ||
1892 | goto type_mask_end; | ||
1893 | } | ||
1894 | } | ||
1895 | |||
1896 | nvgpu_log_fn(g, "unable to find the TSG\n"); | ||
1897 | err = -EINVAL; | ||
1898 | |||
1899 | type_mask_end: | ||
1900 | return err; | ||
1901 | } | ||
1902 | |||
1903 | static int nvgpu_dbg_gpu_set_sm_exception_type_mask( | ||
1904 | struct dbg_session_gk20a *dbg_s, | ||
1905 | struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *args) | ||
1906 | { | ||
1907 | int err = 0; | ||
1908 | struct gk20a *g = dbg_s->g; | ||
1909 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
1910 | |||
1911 | nvgpu_speculation_barrier(); | ||
1912 | switch (args->exception_type_mask) { | ||
1913 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: | ||
1914 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; | ||
1915 | break; | ||
1916 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE: | ||
1917 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
1918 | break; | ||
1919 | default: | ||
1920 | nvgpu_err(g, | ||
1921 | "unrecognized dbg sm exception type mask: 0x%x", | ||
1922 | args->exception_type_mask); | ||
1923 | err = -EINVAL; | ||
1924 | break; | ||
1925 | } | ||
1926 | |||
1927 | if (err != 0) { | ||
1928 | return err; | ||
1929 | } | ||
1930 | |||
1931 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1932 | err = nvgpu_set_sm_exception_type_mask_locked(dbg_s, | ||
1933 | sm_exception_mask_type); | ||
1934 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1935 | |||
1936 | return err; | ||
1937 | } | ||
1938 | |||
1939 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
1940 | static int nvgpu_dbg_gpu_cycle_stats(struct dbg_session_gk20a *dbg_s, | ||
1941 | struct nvgpu_dbg_gpu_cycle_stats_args *args) | ||
1942 | { | ||
1943 | struct channel_gk20a *ch = NULL; | ||
1944 | int err; | ||
1945 | |||
1946 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1947 | if (ch == NULL) { | ||
1948 | return -EINVAL; | ||
1949 | } | ||
1950 | |||
1951 | err = gk20a_busy(ch->g); | ||
1952 | if (err != 0) { | ||
1953 | return err; | ||
1954 | } | ||
1955 | |||
1956 | err = gk20a_channel_cycle_stats(ch, args->dmabuf_fd); | ||
1957 | |||
1958 | gk20a_idle(ch->g); | ||
1959 | return err; | ||
1960 | } | ||
1961 | |||
1962 | static int nvgpu_dbg_gpu_cycle_stats_snapshot(struct dbg_session_gk20a *dbg_s, | ||
1963 | struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *args) | ||
1964 | { | ||
1965 | struct channel_gk20a *ch = NULL; | ||
1966 | int err; | ||
1967 | |||
1968 | if (!args->dmabuf_fd) { | ||
1969 | return -EINVAL; | ||
1970 | } | ||
1971 | |||
1972 | nvgpu_speculation_barrier(); | ||
1973 | |||
1974 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1975 | if (ch == NULL) { | ||
1976 | return -EINVAL; | ||
1977 | } | ||
1978 | |||
1979 | /* is it allowed to handle calls for current GPU? */ | ||
1980 | if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) { | ||
1981 | return -ENOSYS; | ||
1982 | } | ||
1983 | |||
1984 | err = gk20a_busy(ch->g); | ||
1985 | if (err != 0) { | ||
1986 | return err; | ||
1987 | } | ||
1988 | |||
1989 | /* handle the command (most frequent cases first) */ | ||
1990 | switch (args->cmd) { | ||
1991 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: | ||
1992 | err = gk20a_flush_cycle_stats_snapshot(ch); | ||
1993 | args->extra = 0; | ||
1994 | break; | ||
1995 | |||
1996 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: | ||
1997 | err = gk20a_attach_cycle_stats_snapshot(ch, | ||
1998 | args->dmabuf_fd, | ||
1999 | args->extra, | ||
2000 | &args->extra); | ||
2001 | break; | ||
2002 | |||
2003 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: | ||
2004 | err = gk20a_channel_free_cycle_stats_snapshot(ch); | ||
2005 | args->extra = 0; | ||
2006 | break; | ||
2007 | |||
2008 | default: | ||
2009 | pr_err("cyclestats: unknown command %u\n", args->cmd); | ||
2010 | err = -EINVAL; | ||
2011 | break; | ||
2012 | } | ||
2013 | |||
2014 | gk20a_idle(ch->g); | ||
2015 | return err; | ||
2016 | } | ||
2017 | |||
2018 | #endif | ||
2019 | |||
2020 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) | ||
2021 | { | ||
2022 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
2023 | struct nvgpu_os_linux, dbg.cdev); | ||
2024 | struct gk20a *g = &l->g; | ||
2025 | |||
2026 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
2027 | return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); | ||
2028 | } | ||
2029 | |||
2030 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | ||
2031 | unsigned long arg) | ||
2032 | { | ||
2033 | struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data; | ||
2034 | struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; | ||
2035 | struct gk20a *g = dbg_s->g; | ||
2036 | u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; | ||
2037 | int err = 0; | ||
2038 | |||
2039 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
2040 | |||
2041 | if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) || | ||
2042 | (_IOC_NR(cmd) == 0) || | ||
2043 | (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) || | ||
2044 | (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE)) | ||
2045 | return -EINVAL; | ||
2046 | |||
2047 | memset(buf, 0, sizeof(buf)); | ||
2048 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
2049 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
2050 | return -EFAULT; | ||
2051 | } | ||
2052 | |||
2053 | if (!g->sw_ready) { | ||
2054 | err = gk20a_busy(g); | ||
2055 | if (err) | ||
2056 | return err; | ||
2057 | |||
2058 | gk20a_idle(g); | ||
2059 | } | ||
2060 | |||
2061 | /* protect from threaded user space calls */ | ||
2062 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); | ||
2063 | |||
2064 | nvgpu_speculation_barrier(); | ||
2065 | switch (cmd) { | ||
2066 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: | ||
2067 | err = dbg_bind_channel_gk20a(dbg_s, | ||
2068 | (struct nvgpu_dbg_gpu_bind_channel_args *)buf); | ||
2069 | break; | ||
2070 | |||
2071 | case NVGPU_DBG_GPU_IOCTL_REG_OPS: | ||
2072 | err = nvgpu_ioctl_channel_reg_ops(dbg_s, | ||
2073 | (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); | ||
2074 | break; | ||
2075 | |||
2076 | case NVGPU_DBG_GPU_IOCTL_POWERGATE: | ||
2077 | err = nvgpu_ioctl_powergate_gk20a(dbg_s, | ||
2078 | (struct nvgpu_dbg_gpu_powergate_args *)buf); | ||
2079 | break; | ||
2080 | |||
2081 | case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: | ||
2082 | err = gk20a_dbg_gpu_events_ctrl(dbg_s, | ||
2083 | (struct nvgpu_dbg_gpu_events_ctrl_args *)buf); | ||
2084 | break; | ||
2085 | |||
2086 | case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: | ||
2087 | err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, | ||
2088 | (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); | ||
2089 | break; | ||
2090 | |||
2091 | case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE: | ||
2092 | err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s, | ||
2093 | (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf); | ||
2094 | break; | ||
2095 | |||
2096 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: | ||
2097 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, | ||
2098 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); | ||
2099 | break; | ||
2100 | |||
2101 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: | ||
2102 | err = gk20a_perfbuf_map(dbg_s, | ||
2103 | (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); | ||
2104 | break; | ||
2105 | |||
2106 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: | ||
2107 | err = gk20a_perfbuf_unmap(dbg_s, | ||
2108 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); | ||
2109 | break; | ||
2110 | |||
2111 | case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: | ||
2112 | err = gk20a_dbg_pc_sampling(dbg_s, | ||
2113 | (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); | ||
2114 | break; | ||
2115 | |||
2116 | case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE: | ||
2117 | err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s, | ||
2118 | (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf); | ||
2119 | break; | ||
2120 | |||
2121 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT: | ||
2122 | err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s, | ||
2123 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | ||
2124 | break; | ||
2125 | |||
2126 | case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT: | ||
2127 | nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s, | ||
2128 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | ||
2129 | break; | ||
2130 | |||
2131 | case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
2132 | err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, | ||
2133 | (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); | ||
2134 | break; | ||
2135 | |||
2136 | case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE: | ||
2137 | err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s, | ||
2138 | (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf); | ||
2139 | break; | ||
2140 | |||
2141 | case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL: | ||
2142 | err = dbg_unbind_channel_gk20a(dbg_s, | ||
2143 | (struct nvgpu_dbg_gpu_unbind_channel_args *)buf); | ||
2144 | break; | ||
2145 | |||
2146 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS: | ||
2147 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s, | ||
2148 | (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf); | ||
2149 | break; | ||
2150 | |||
2151 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY: | ||
2152 | err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s, | ||
2153 | (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); | ||
2154 | break; | ||
2155 | |||
2156 | case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE: | ||
2157 | err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux, | ||
2158 | (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); | ||
2159 | break; | ||
2160 | |||
2161 | case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE: | ||
2162 | err = nvgpu_ioctl_free_profiler_object(dbg_s_linux, | ||
2163 | (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); | ||
2164 | break; | ||
2165 | |||
2166 | case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE: | ||
2167 | err = nvgpu_ioctl_profiler_reserve(dbg_s, | ||
2168 | (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf); | ||
2169 | break; | ||
2170 | |||
2171 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK: | ||
2172 | err = nvgpu_dbg_gpu_set_sm_exception_type_mask(dbg_s, | ||
2173 | (struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *)buf); | ||
2174 | break; | ||
2175 | |||
2176 | case NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE: | ||
2177 | err = nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(dbg_s, | ||
2178 | (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf); | ||
2179 | break; | ||
2180 | |||
2181 | #ifdef CONFIG_GK20A_CYCLE_STATS | ||
2182 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS: | ||
2183 | err = nvgpu_dbg_gpu_cycle_stats(dbg_s, | ||
2184 | (struct nvgpu_dbg_gpu_cycle_stats_args *)buf); | ||
2185 | break; | ||
2186 | |||
2187 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT: | ||
2188 | err = nvgpu_dbg_gpu_cycle_stats_snapshot(dbg_s, | ||
2189 | (struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *)buf); | ||
2190 | break; | ||
2191 | #endif | ||
2192 | |||
2193 | default: | ||
2194 | nvgpu_err(g, | ||
2195 | "unrecognized dbg gpu ioctl cmd: 0x%x", | ||
2196 | cmd); | ||
2197 | err = -ENOTTY; | ||
2198 | break; | ||
2199 | } | ||
2200 | |||
2201 | nvgpu_mutex_release(&dbg_s->ioctl_lock); | ||
2202 | |||
2203 | nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); | ||
2204 | |||
2205 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
2206 | err = copy_to_user((void __user *)arg, | ||
2207 | buf, _IOC_SIZE(cmd)); | ||
2208 | |||
2209 | return err; | ||
2210 | } | ||
diff --git a/include/os/linux/ioctl_dbg.h b/include/os/linux/ioctl_dbg.h new file mode 100644 index 0000000..2e188cc --- /dev/null +++ b/include/os/linux/ioctl_dbg.h | |||
@@ -0,0 +1,38 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger Driver | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #ifndef DBG_GPU_IOCTL_GK20A_H | ||
19 | #define DBG_GPU_IOCTL_GK20A_H | ||
20 | |||
21 | struct inode; | ||
22 | struct file; | ||
23 | typedef struct poll_table_struct poll_table; | ||
24 | |||
25 | /* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number | ||
26 | * of regops */ | ||
27 | #define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024 | ||
28 | |||
29 | /* module debug driver interface */ | ||
30 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); | ||
31 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); | ||
32 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
33 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); | ||
34 | |||
35 | /* used by profiler driver interface */ | ||
36 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); | ||
37 | |||
38 | #endif | ||
diff --git a/include/os/linux/ioctl_tsg.c b/include/os/linux/ioctl_tsg.c new file mode 100644 index 0000000..2f8cb3a --- /dev/null +++ b/include/os/linux/ioctl_tsg.c | |||
@@ -0,0 +1,750 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/fs.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/cdev.h> | ||
20 | #include <linux/uaccess.h> | ||
21 | #include <linux/poll.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | #include <linux/anon_inodes.h> | ||
24 | |||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/log.h> | ||
27 | #include <nvgpu/os_sched.h> | ||
28 | #include <nvgpu/gk20a.h> | ||
29 | #include <nvgpu/channel.h> | ||
30 | #include <nvgpu/tsg.h> | ||
31 | |||
32 | #include "gv11b/fifo_gv11b.h" | ||
33 | #include "platform_gk20a.h" | ||
34 | #include "ioctl_tsg.h" | ||
35 | #include "ioctl_channel.h" | ||
36 | #include "os_linux.h" | ||
37 | |||
38 | struct tsg_private { | ||
39 | struct gk20a *g; | ||
40 | struct tsg_gk20a *tsg; | ||
41 | }; | ||
42 | |||
43 | static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) | ||
44 | { | ||
45 | struct channel_gk20a *ch; | ||
46 | int err; | ||
47 | |||
48 | ch = gk20a_get_channel_from_file(ch_fd); | ||
49 | if (!ch) | ||
50 | return -EINVAL; | ||
51 | |||
52 | err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); | ||
53 | |||
54 | gk20a_channel_put(ch); | ||
55 | return err; | ||
56 | } | ||
57 | |||
58 | static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, | ||
59 | struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) | ||
60 | { | ||
61 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
62 | struct channel_gk20a *ch; | ||
63 | struct gr_gk20a *gr = &g->gr; | ||
64 | int err = 0; | ||
65 | |||
66 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
67 | |||
68 | nvgpu_mutex_acquire(&sched->control_lock); | ||
69 | if (sched->control_locked) { | ||
70 | err = -EPERM; | ||
71 | goto mutex_release; | ||
72 | } | ||
73 | err = gk20a_busy(g); | ||
74 | if (err) { | ||
75 | nvgpu_err(g, "failed to power on gpu"); | ||
76 | goto mutex_release; | ||
77 | } | ||
78 | |||
79 | ch = gk20a_get_channel_from_file(arg->channel_fd); | ||
80 | if (!ch) { | ||
81 | err = -EINVAL; | ||
82 | goto idle; | ||
83 | } | ||
84 | |||
85 | if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { | ||
86 | if ((arg->num_active_tpcs > gr->max_tpc_count) || | ||
87 | !(arg->num_active_tpcs)) { | ||
88 | nvgpu_err(g, "Invalid num of active TPCs"); | ||
89 | err = -EINVAL; | ||
90 | goto ch_put; | ||
91 | } | ||
92 | tsg->tpc_num_initialized = true; | ||
93 | tsg->num_active_tpcs = arg->num_active_tpcs; | ||
94 | tsg->tpc_pg_enabled = true; | ||
95 | } else { | ||
96 | tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled"); | ||
97 | } | ||
98 | |||
99 | if (arg->subcontext_id < g->fifo.max_subctx_count) { | ||
100 | ch->subctx_id = arg->subcontext_id; | ||
101 | } else { | ||
102 | err = -EINVAL; | ||
103 | goto ch_put; | ||
104 | } | ||
105 | |||
106 | nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", | ||
107 | ch->chid, ch->subctx_id); | ||
108 | |||
109 | /* Use runqueue selector 1 for all ASYNC ids */ | ||
110 | if (ch->subctx_id > CHANNEL_INFO_VEID0) | ||
111 | ch->runqueue_sel = 1; | ||
112 | |||
113 | err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); | ||
114 | ch_put: | ||
115 | gk20a_channel_put(ch); | ||
116 | idle: | ||
117 | gk20a_idle(g); | ||
118 | mutex_release: | ||
119 | nvgpu_mutex_release(&sched->control_lock); | ||
120 | return err; | ||
121 | } | ||
122 | |||
123 | static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) | ||
124 | { | ||
125 | struct channel_gk20a *ch; | ||
126 | int err = 0; | ||
127 | |||
128 | ch = gk20a_get_channel_from_file(ch_fd); | ||
129 | if (!ch) | ||
130 | return -EINVAL; | ||
131 | |||
132 | if (ch->tsgid != tsg->tsgid) { | ||
133 | err = -EINVAL; | ||
134 | goto out; | ||
135 | } | ||
136 | |||
137 | err = gk20a_tsg_unbind_channel(ch); | ||
138 | |||
139 | /* | ||
140 | * Mark the channel timedout since channel unbound from TSG | ||
141 | * has no context of its own so it can't serve any job | ||
142 | */ | ||
143 | gk20a_channel_set_timedout(ch); | ||
144 | |||
145 | out: | ||
146 | gk20a_channel_put(ch); | ||
147 | return err; | ||
148 | } | ||
149 | |||
150 | static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg, | ||
151 | unsigned int event_id, | ||
152 | struct gk20a_event_id_data **event_id_data) | ||
153 | { | ||
154 | struct gk20a_event_id_data *local_event_id_data; | ||
155 | bool event_found = false; | ||
156 | |||
157 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
158 | nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list, | ||
159 | gk20a_event_id_data, event_id_node) { | ||
160 | if (local_event_id_data->event_id == event_id) { | ||
161 | event_found = true; | ||
162 | break; | ||
163 | } | ||
164 | } | ||
165 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
166 | |||
167 | if (event_found) { | ||
168 | *event_id_data = local_event_id_data; | ||
169 | return 0; | ||
170 | } else { | ||
171 | return -1; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific | ||
177 | * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs | ||
178 | */ | ||
179 | static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id) | ||
180 | { | ||
181 | switch (event_id) { | ||
182 | case NVGPU_EVENT_ID_BPT_INT: | ||
183 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT; | ||
184 | case NVGPU_EVENT_ID_BPT_PAUSE: | ||
185 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE; | ||
186 | case NVGPU_EVENT_ID_BLOCKING_SYNC: | ||
187 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC; | ||
188 | case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED: | ||
189 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED; | ||
190 | case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE: | ||
191 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE; | ||
192 | case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN: | ||
193 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN; | ||
194 | } | ||
195 | |||
196 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX; | ||
197 | } | ||
198 | |||
199 | void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, | ||
200 | int __event_id) | ||
201 | { | ||
202 | struct gk20a_event_id_data *event_id_data; | ||
203 | u32 event_id; | ||
204 | int err = 0; | ||
205 | struct gk20a *g = tsg->g; | ||
206 | |||
207 | event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id); | ||
208 | if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | ||
209 | return; | ||
210 | |||
211 | err = gk20a_tsg_get_event_data_from_id(tsg, event_id, | ||
212 | &event_id_data); | ||
213 | if (err) | ||
214 | return; | ||
215 | |||
216 | nvgpu_mutex_acquire(&event_id_data->lock); | ||
217 | |||
218 | nvgpu_log_info(g, | ||
219 | "posting event for event_id=%d on tsg=%d\n", | ||
220 | event_id, tsg->tsgid); | ||
221 | event_id_data->event_posted = true; | ||
222 | |||
223 | nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq); | ||
224 | |||
225 | nvgpu_mutex_release(&event_id_data->lock); | ||
226 | } | ||
227 | |||
228 | static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait) | ||
229 | { | ||
230 | unsigned int mask = 0; | ||
231 | struct gk20a_event_id_data *event_id_data = filep->private_data; | ||
232 | struct gk20a *g = event_id_data->g; | ||
233 | u32 event_id = event_id_data->event_id; | ||
234 | struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; | ||
235 | |||
236 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " "); | ||
237 | |||
238 | poll_wait(filep, &event_id_data->event_id_wq.wq, wait); | ||
239 | |||
240 | nvgpu_mutex_acquire(&event_id_data->lock); | ||
241 | |||
242 | if (event_id_data->event_posted) { | ||
243 | nvgpu_log_info(g, | ||
244 | "found pending event_id=%d on TSG=%d\n", | ||
245 | event_id, tsg->tsgid); | ||
246 | mask = (POLLPRI | POLLIN); | ||
247 | event_id_data->event_posted = false; | ||
248 | } | ||
249 | |||
250 | nvgpu_mutex_release(&event_id_data->lock); | ||
251 | |||
252 | return mask; | ||
253 | } | ||
254 | |||
255 | static int gk20a_event_id_release(struct inode *inode, struct file *filp) | ||
256 | { | ||
257 | struct gk20a_event_id_data *event_id_data = filp->private_data; | ||
258 | struct gk20a *g = event_id_data->g; | ||
259 | struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; | ||
260 | |||
261 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
262 | nvgpu_list_del(&event_id_data->event_id_node); | ||
263 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
264 | |||
265 | nvgpu_mutex_destroy(&event_id_data->lock); | ||
266 | gk20a_put(g); | ||
267 | nvgpu_kfree(g, event_id_data); | ||
268 | filp->private_data = NULL; | ||
269 | |||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | const struct file_operations gk20a_event_id_ops = { | ||
274 | .owner = THIS_MODULE, | ||
275 | .poll = gk20a_event_id_poll, | ||
276 | .release = gk20a_event_id_release, | ||
277 | }; | ||
278 | |||
279 | static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg, | ||
280 | int event_id, | ||
281 | int *fd) | ||
282 | { | ||
283 | int err = 0; | ||
284 | int local_fd; | ||
285 | struct file *file; | ||
286 | char name[64]; | ||
287 | struct gk20a_event_id_data *event_id_data; | ||
288 | struct gk20a *g; | ||
289 | |||
290 | g = gk20a_get(tsg->g); | ||
291 | if (!g) | ||
292 | return -ENODEV; | ||
293 | |||
294 | err = gk20a_tsg_get_event_data_from_id(tsg, | ||
295 | event_id, &event_id_data); | ||
296 | if (err == 0) { | ||
297 | /* We already have event enabled */ | ||
298 | err = -EINVAL; | ||
299 | goto free_ref; | ||
300 | } | ||
301 | |||
302 | err = get_unused_fd_flags(O_RDWR); | ||
303 | if (err < 0) | ||
304 | goto free_ref; | ||
305 | local_fd = err; | ||
306 | |||
307 | snprintf(name, sizeof(name), "nvgpu-event%d-fd%d", | ||
308 | event_id, local_fd); | ||
309 | |||
310 | file = anon_inode_getfile(name, &gk20a_event_id_ops, | ||
311 | NULL, O_RDWR); | ||
312 | if (IS_ERR(file)) { | ||
313 | err = PTR_ERR(file); | ||
314 | goto clean_up; | ||
315 | } | ||
316 | |||
317 | event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data)); | ||
318 | if (!event_id_data) { | ||
319 | err = -ENOMEM; | ||
320 | goto clean_up_file; | ||
321 | } | ||
322 | event_id_data->g = g; | ||
323 | event_id_data->id = tsg->tsgid; | ||
324 | event_id_data->event_id = event_id; | ||
325 | |||
326 | nvgpu_cond_init(&event_id_data->event_id_wq); | ||
327 | err = nvgpu_mutex_init(&event_id_data->lock); | ||
328 | if (err) | ||
329 | goto clean_up_free; | ||
330 | |||
331 | nvgpu_init_list_node(&event_id_data->event_id_node); | ||
332 | |||
333 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
334 | nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list); | ||
335 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
336 | |||
337 | fd_install(local_fd, file); | ||
338 | file->private_data = event_id_data; | ||
339 | |||
340 | *fd = local_fd; | ||
341 | |||
342 | return 0; | ||
343 | |||
344 | clean_up_free: | ||
345 | nvgpu_kfree(g, event_id_data); | ||
346 | clean_up_file: | ||
347 | fput(file); | ||
348 | clean_up: | ||
349 | put_unused_fd(local_fd); | ||
350 | free_ref: | ||
351 | gk20a_put(g); | ||
352 | return err; | ||
353 | } | ||
354 | |||
355 | static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, | ||
356 | struct nvgpu_event_id_ctrl_args *args) | ||
357 | { | ||
358 | int err = 0; | ||
359 | int fd = -1; | ||
360 | |||
361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | ||
362 | return -EINVAL; | ||
363 | |||
364 | nvgpu_speculation_barrier(); | ||
365 | switch (args->cmd) { | ||
366 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: | ||
367 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); | ||
368 | if (!err) | ||
369 | args->event_fd = fd; | ||
370 | break; | ||
371 | |||
372 | default: | ||
373 | nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x", | ||
374 | args->cmd); | ||
375 | err = -EINVAL; | ||
376 | break; | ||
377 | } | ||
378 | |||
379 | return err; | ||
380 | } | ||
381 | |||
382 | int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp) | ||
383 | { | ||
384 | struct tsg_private *priv; | ||
385 | struct tsg_gk20a *tsg; | ||
386 | struct device *dev; | ||
387 | int err; | ||
388 | |||
389 | g = gk20a_get(g); | ||
390 | if (!g) | ||
391 | return -ENODEV; | ||
392 | |||
393 | dev = dev_from_gk20a(g); | ||
394 | |||
395 | nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev)); | ||
396 | |||
397 | priv = nvgpu_kmalloc(g, sizeof(*priv)); | ||
398 | if (!priv) { | ||
399 | err = -ENOMEM; | ||
400 | goto free_ref; | ||
401 | } | ||
402 | |||
403 | err = gk20a_busy(g); | ||
404 | if (err) { | ||
405 | nvgpu_err(g, "failed to power on, %d", err); | ||
406 | goto free_mem; | ||
407 | } | ||
408 | |||
409 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
410 | gk20a_idle(g); | ||
411 | if (!tsg) { | ||
412 | err = -ENOMEM; | ||
413 | goto free_mem; | ||
414 | } | ||
415 | |||
416 | priv->g = g; | ||
417 | priv->tsg = tsg; | ||
418 | filp->private_data = priv; | ||
419 | |||
420 | gk20a_sched_ctrl_tsg_added(g, tsg); | ||
421 | |||
422 | return 0; | ||
423 | |||
424 | free_mem: | ||
425 | nvgpu_kfree(g, priv); | ||
426 | free_ref: | ||
427 | gk20a_put(g); | ||
428 | return err; | ||
429 | } | ||
430 | |||
431 | int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp) | ||
432 | { | ||
433 | struct nvgpu_os_linux *l; | ||
434 | struct gk20a *g; | ||
435 | int ret; | ||
436 | |||
437 | l = container_of(inode->i_cdev, | ||
438 | struct nvgpu_os_linux, tsg.cdev); | ||
439 | g = &l->g; | ||
440 | |||
441 | nvgpu_log_fn(g, " "); | ||
442 | |||
443 | ret = gk20a_busy(g); | ||
444 | if (ret) { | ||
445 | nvgpu_err(g, "failed to power on, %d", ret); | ||
446 | return ret; | ||
447 | } | ||
448 | |||
449 | ret = nvgpu_ioctl_tsg_open(&l->g, filp); | ||
450 | |||
451 | gk20a_idle(g); | ||
452 | nvgpu_log_fn(g, "done"); | ||
453 | return ret; | ||
454 | } | ||
455 | |||
456 | void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref) | ||
457 | { | ||
458 | struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); | ||
459 | struct gk20a *g = tsg->g; | ||
460 | |||
461 | gk20a_sched_ctrl_tsg_removed(g, tsg); | ||
462 | |||
463 | gk20a_tsg_release(ref); | ||
464 | gk20a_put(g); | ||
465 | } | ||
466 | |||
467 | int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp) | ||
468 | { | ||
469 | struct tsg_private *priv = filp->private_data; | ||
470 | struct tsg_gk20a *tsg; | ||
471 | |||
472 | if (!priv) { | ||
473 | /* open failed, never got a tsg for this file */ | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | tsg = priv->tsg; | ||
478 | |||
479 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
480 | nvgpu_kfree(tsg->g, priv); | ||
481 | return 0; | ||
482 | } | ||
483 | |||
484 | static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, | ||
485 | struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) | ||
486 | { | ||
487 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
488 | u32 level = arg->level; | ||
489 | int err; | ||
490 | |||
491 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
492 | |||
493 | nvgpu_mutex_acquire(&sched->control_lock); | ||
494 | if (sched->control_locked) { | ||
495 | err = -EPERM; | ||
496 | goto done; | ||
497 | } | ||
498 | err = gk20a_busy(g); | ||
499 | if (err) { | ||
500 | nvgpu_err(g, "failed to power on gpu"); | ||
501 | goto done; | ||
502 | } | ||
503 | |||
504 | level = nvgpu_get_common_runlist_level(level); | ||
505 | err = gk20a_tsg_set_runlist_interleave(tsg, level); | ||
506 | |||
507 | gk20a_idle(g); | ||
508 | done: | ||
509 | nvgpu_mutex_release(&sched->control_lock); | ||
510 | return err; | ||
511 | } | ||
512 | |||
513 | static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, | ||
514 | struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) | ||
515 | { | ||
516 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
517 | int err; | ||
518 | |||
519 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
520 | |||
521 | nvgpu_mutex_acquire(&sched->control_lock); | ||
522 | if (sched->control_locked) { | ||
523 | err = -EPERM; | ||
524 | goto done; | ||
525 | } | ||
526 | err = gk20a_busy(g); | ||
527 | if (err) { | ||
528 | nvgpu_err(g, "failed to power on gpu"); | ||
529 | goto done; | ||
530 | } | ||
531 | err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); | ||
532 | gk20a_idle(g); | ||
533 | done: | ||
534 | nvgpu_mutex_release(&sched->control_lock); | ||
535 | return err; | ||
536 | } | ||
537 | |||
538 | static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g, | ||
539 | struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) | ||
540 | { | ||
541 | arg->timeslice_us = gk20a_tsg_get_timeslice(tsg); | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, | ||
546 | struct tsg_gk20a *tsg, | ||
547 | struct nvgpu_tsg_read_single_sm_error_state_args *args) | ||
548 | { | ||
549 | struct gr_gk20a *gr = &g->gr; | ||
550 | struct nvgpu_tsg_sm_error_state *sm_error_state; | ||
551 | struct nvgpu_tsg_sm_error_state_record sm_error_state_record; | ||
552 | u32 sm_id; | ||
553 | int err = 0; | ||
554 | |||
555 | sm_id = args->sm_id; | ||
556 | if (sm_id >= gr->no_of_sm) | ||
557 | return -EINVAL; | ||
558 | |||
559 | nvgpu_speculation_barrier(); | ||
560 | |||
561 | sm_error_state = tsg->sm_error_states + sm_id; | ||
562 | sm_error_state_record.global_esr = | ||
563 | sm_error_state->hww_global_esr; | ||
564 | sm_error_state_record.warp_esr = | ||
565 | sm_error_state->hww_warp_esr; | ||
566 | sm_error_state_record.warp_esr_pc = | ||
567 | sm_error_state->hww_warp_esr_pc; | ||
568 | sm_error_state_record.global_esr_report_mask = | ||
569 | sm_error_state->hww_global_esr_report_mask; | ||
570 | sm_error_state_record.warp_esr_report_mask = | ||
571 | sm_error_state->hww_warp_esr_report_mask; | ||
572 | |||
573 | if (args->record_size > 0) { | ||
574 | size_t write_size = sizeof(*sm_error_state); | ||
575 | |||
576 | nvgpu_speculation_barrier(); | ||
577 | if (write_size > args->record_size) | ||
578 | write_size = args->record_size; | ||
579 | |||
580 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
581 | err = copy_to_user((void __user *)(uintptr_t) | ||
582 | args->record_mem, | ||
583 | &sm_error_state_record, | ||
584 | write_size); | ||
585 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
586 | if (err) { | ||
587 | nvgpu_err(g, "copy_to_user failed!"); | ||
588 | return err; | ||
589 | } | ||
590 | |||
591 | args->record_size = write_size; | ||
592 | } | ||
593 | |||
594 | return 0; | ||
595 | } | ||
596 | |||
597 | long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, | ||
598 | unsigned long arg) | ||
599 | { | ||
600 | struct tsg_private *priv = filp->private_data; | ||
601 | struct tsg_gk20a *tsg = priv->tsg; | ||
602 | struct gk20a *g = tsg->g; | ||
603 | u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE]; | ||
604 | int err = 0; | ||
605 | |||
606 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
607 | |||
608 | if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) || | ||
609 | (_IOC_NR(cmd) == 0) || | ||
610 | (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) || | ||
611 | (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE)) | ||
612 | return -EINVAL; | ||
613 | |||
614 | memset(buf, 0, sizeof(buf)); | ||
615 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
616 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
617 | return -EFAULT; | ||
618 | } | ||
619 | |||
620 | if (!g->sw_ready) { | ||
621 | err = gk20a_busy(g); | ||
622 | if (err) | ||
623 | return err; | ||
624 | |||
625 | gk20a_idle(g); | ||
626 | } | ||
627 | |||
628 | switch (cmd) { | ||
629 | case NVGPU_TSG_IOCTL_BIND_CHANNEL: | ||
630 | { | ||
631 | int ch_fd = *(int *)buf; | ||
632 | if (ch_fd < 0) { | ||
633 | err = -EINVAL; | ||
634 | break; | ||
635 | } | ||
636 | err = gk20a_tsg_bind_channel_fd(tsg, ch_fd); | ||
637 | break; | ||
638 | } | ||
639 | |||
640 | case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: | ||
641 | { | ||
642 | err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg, | ||
643 | (struct nvgpu_tsg_bind_channel_ex_args *)buf); | ||
644 | break; | ||
645 | } | ||
646 | |||
647 | case NVGPU_TSG_IOCTL_UNBIND_CHANNEL: | ||
648 | { | ||
649 | int ch_fd = *(int *)buf; | ||
650 | |||
651 | if (ch_fd < 0) { | ||
652 | err = -EINVAL; | ||
653 | break; | ||
654 | } | ||
655 | err = gk20a_busy(g); | ||
656 | if (err) { | ||
657 | nvgpu_err(g, | ||
658 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
659 | break; | ||
660 | } | ||
661 | err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd); | ||
662 | gk20a_idle(g); | ||
663 | break; | ||
664 | } | ||
665 | |||
666 | case NVGPU_IOCTL_TSG_ENABLE: | ||
667 | { | ||
668 | err = gk20a_busy(g); | ||
669 | if (err) { | ||
670 | nvgpu_err(g, | ||
671 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
672 | return err; | ||
673 | } | ||
674 | g->ops.fifo.enable_tsg(tsg); | ||
675 | gk20a_idle(g); | ||
676 | break; | ||
677 | } | ||
678 | |||
679 | case NVGPU_IOCTL_TSG_DISABLE: | ||
680 | { | ||
681 | err = gk20a_busy(g); | ||
682 | if (err) { | ||
683 | nvgpu_err(g, | ||
684 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
685 | return err; | ||
686 | } | ||
687 | g->ops.fifo.disable_tsg(tsg); | ||
688 | gk20a_idle(g); | ||
689 | break; | ||
690 | } | ||
691 | |||
692 | case NVGPU_IOCTL_TSG_PREEMPT: | ||
693 | { | ||
694 | err = gk20a_busy(g); | ||
695 | if (err) { | ||
696 | nvgpu_err(g, | ||
697 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
698 | return err; | ||
699 | } | ||
700 | /* preempt TSG */ | ||
701 | err = g->ops.fifo.preempt_tsg(g, tsg); | ||
702 | gk20a_idle(g); | ||
703 | break; | ||
704 | } | ||
705 | |||
706 | case NVGPU_IOCTL_TSG_EVENT_ID_CTRL: | ||
707 | { | ||
708 | err = gk20a_tsg_event_id_ctrl(g, tsg, | ||
709 | (struct nvgpu_event_id_ctrl_args *)buf); | ||
710 | break; | ||
711 | } | ||
712 | |||
713 | case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: | ||
714 | err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, | ||
715 | (struct nvgpu_runlist_interleave_args *)buf); | ||
716 | break; | ||
717 | |||
718 | case NVGPU_IOCTL_TSG_SET_TIMESLICE: | ||
719 | { | ||
720 | err = gk20a_tsg_ioctl_set_timeslice(g, tsg, | ||
721 | (struct nvgpu_timeslice_args *)buf); | ||
722 | break; | ||
723 | } | ||
724 | case NVGPU_IOCTL_TSG_GET_TIMESLICE: | ||
725 | { | ||
726 | err = gk20a_tsg_ioctl_get_timeslice(g, tsg, | ||
727 | (struct nvgpu_timeslice_args *)buf); | ||
728 | break; | ||
729 | } | ||
730 | |||
731 | case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
732 | { | ||
733 | err = gk20a_tsg_ioctl_read_single_sm_error_state(g, tsg, | ||
734 | (struct nvgpu_tsg_read_single_sm_error_state_args *)buf); | ||
735 | break; | ||
736 | } | ||
737 | |||
738 | default: | ||
739 | nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", | ||
740 | cmd); | ||
741 | err = -ENOTTY; | ||
742 | break; | ||
743 | } | ||
744 | |||
745 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
746 | err = copy_to_user((void __user *)arg, | ||
747 | buf, _IOC_SIZE(cmd)); | ||
748 | |||
749 | return err; | ||
750 | } | ||
diff --git a/include/os/linux/ioctl_tsg.h b/include/os/linux/ioctl_tsg.h new file mode 100644 index 0000000..67399fd --- /dev/null +++ b/include/os/linux/ioctl_tsg.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef NVGPU_IOCTL_TSG_H | ||
14 | #define NVGPU_IOCTL_TSG_H | ||
15 | |||
16 | struct inode; | ||
17 | struct file; | ||
18 | struct gk20a; | ||
19 | struct nvgpu_ref; | ||
20 | |||
21 | int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp); | ||
22 | int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp); | ||
23 | int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp); | ||
24 | long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, | ||
25 | unsigned int cmd, unsigned long arg); | ||
26 | void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref); | ||
27 | |||
28 | #endif | ||
diff --git a/include/os/linux/kmem.c b/include/os/linux/kmem.c new file mode 100644 index 0000000..395cc45 --- /dev/null +++ b/include/os/linux/kmem.c | |||
@@ -0,0 +1,653 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/mm.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/stacktrace.h> | ||
23 | |||
24 | #include <nvgpu/lock.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/atomic.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | #include <nvgpu/gk20a.h> | ||
29 | |||
30 | #include "kmem_priv.h" | ||
31 | |||
32 | /* | ||
33 | * Statically declared because this needs to be shared across all nvgpu driver | ||
34 | * instances. This makes sure that all kmem caches are _definitely_ uniquely | ||
35 | * named. | ||
36 | */ | ||
37 | static atomic_t kmem_cache_id; | ||
38 | |||
39 | void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear) | ||
40 | { | ||
41 | void *p; | ||
42 | |||
43 | if (size > PAGE_SIZE) { | ||
44 | if (clear) | ||
45 | p = nvgpu_vzalloc(g, size); | ||
46 | else | ||
47 | p = nvgpu_vmalloc(g, size); | ||
48 | } else { | ||
49 | if (clear) | ||
50 | p = nvgpu_kzalloc(g, size); | ||
51 | else | ||
52 | p = nvgpu_kmalloc(g, size); | ||
53 | } | ||
54 | |||
55 | return p; | ||
56 | } | ||
57 | |||
58 | void nvgpu_big_free(struct gk20a *g, void *p) | ||
59 | { | ||
60 | /* | ||
61 | * This will have to be fixed eventually. Allocs that use | ||
62 | * nvgpu_big_[mz]alloc() will need to remember the size of the alloc | ||
63 | * when freeing. | ||
64 | */ | ||
65 | if (is_vmalloc_addr(p)) | ||
66 | nvgpu_vfree(g, p); | ||
67 | else | ||
68 | nvgpu_kfree(g, p); | ||
69 | } | ||
70 | |||
71 | void *__nvgpu_kmalloc(struct gk20a *g, size_t size, void *ip) | ||
72 | { | ||
73 | void *alloc; | ||
74 | |||
75 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
76 | alloc = __nvgpu_track_kmalloc(g, size, ip); | ||
77 | #else | ||
78 | alloc = kmalloc(size, GFP_KERNEL); | ||
79 | #endif | ||
80 | |||
81 | kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
82 | size, alloc, GFP_KERNEL); | ||
83 | |||
84 | return alloc; | ||
85 | } | ||
86 | |||
87 | void *__nvgpu_kzalloc(struct gk20a *g, size_t size, void *ip) | ||
88 | { | ||
89 | void *alloc; | ||
90 | |||
91 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
92 | alloc = __nvgpu_track_kzalloc(g, size, ip); | ||
93 | #else | ||
94 | alloc = kzalloc(size, GFP_KERNEL); | ||
95 | #endif | ||
96 | |||
97 | kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
98 | size, alloc, GFP_KERNEL); | ||
99 | |||
100 | return alloc; | ||
101 | } | ||
102 | |||
103 | void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, void *ip) | ||
104 | { | ||
105 | void *alloc; | ||
106 | |||
107 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
108 | alloc = __nvgpu_track_kcalloc(g, n, size, ip); | ||
109 | #else | ||
110 | alloc = kcalloc(n, size, GFP_KERNEL); | ||
111 | #endif | ||
112 | |||
113 | kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
114 | n * size, alloc, GFP_KERNEL); | ||
115 | |||
116 | return alloc; | ||
117 | } | ||
118 | |||
119 | void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, void *ip) | ||
120 | { | ||
121 | void *alloc; | ||
122 | |||
123 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
124 | alloc = __nvgpu_track_vmalloc(g, size, ip); | ||
125 | #else | ||
126 | alloc = vmalloc(size); | ||
127 | #endif | ||
128 | |||
129 | kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc); | ||
130 | |||
131 | return alloc; | ||
132 | } | ||
133 | |||
134 | void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, void *ip) | ||
135 | { | ||
136 | void *alloc; | ||
137 | |||
138 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
139 | alloc = __nvgpu_track_vzalloc(g, size, ip); | ||
140 | #else | ||
141 | alloc = vzalloc(size); | ||
142 | #endif | ||
143 | |||
144 | kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc); | ||
145 | |||
146 | return alloc; | ||
147 | } | ||
148 | |||
149 | void __nvgpu_kfree(struct gk20a *g, void *addr) | ||
150 | { | ||
151 | kmem_dbg(g, "kfree: addr=0x%p", addr); | ||
152 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
153 | __nvgpu_track_kfree(g, addr); | ||
154 | #else | ||
155 | kfree(addr); | ||
156 | #endif | ||
157 | } | ||
158 | |||
159 | void __nvgpu_vfree(struct gk20a *g, void *addr) | ||
160 | { | ||
161 | kmem_dbg(g, "vfree: addr=0x%p", addr); | ||
162 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
163 | __nvgpu_track_vfree(g, addr); | ||
164 | #else | ||
165 | vfree(addr); | ||
166 | #endif | ||
167 | } | ||
168 | |||
169 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
170 | |||
171 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
172 | { | ||
173 | nvgpu_mutex_acquire(&tracker->lock); | ||
174 | } | ||
175 | |||
176 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
177 | { | ||
178 | nvgpu_mutex_release(&tracker->lock); | ||
179 | } | ||
180 | |||
181 | void kmem_print_mem_alloc(struct gk20a *g, | ||
182 | struct nvgpu_mem_alloc *alloc, | ||
183 | struct seq_file *s) | ||
184 | { | ||
185 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
186 | int i; | ||
187 | |||
188 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", | ||
189 | alloc->addr, alloc->size); | ||
190 | for (i = 0; i < alloc->stack_length; i++) | ||
191 | __pstat(s, " %3d [<%p>] %pS\n", i, | ||
192 | (void *)alloc->stack[i], | ||
193 | (void *)alloc->stack[i]); | ||
194 | __pstat(s, "\n"); | ||
195 | #else | ||
196 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", | ||
197 | alloc->addr, alloc->size, alloc->ip); | ||
198 | #endif | ||
199 | } | ||
200 | |||
201 | static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
202 | struct nvgpu_mem_alloc *alloc) | ||
203 | { | ||
204 | alloc->allocs_entry.key_start = alloc->addr; | ||
205 | alloc->allocs_entry.key_end = alloc->addr + alloc->size; | ||
206 | |||
207 | nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs); | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static struct nvgpu_mem_alloc *nvgpu_rem_alloc( | ||
212 | struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) | ||
213 | { | ||
214 | struct nvgpu_mem_alloc *alloc; | ||
215 | struct nvgpu_rbtree_node *node = NULL; | ||
216 | |||
217 | nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs); | ||
218 | if (!node) | ||
219 | return NULL; | ||
220 | |||
221 | alloc = nvgpu_mem_alloc_from_rbtree_node(node); | ||
222 | |||
223 | nvgpu_rbtree_unlink(node, &tracker->allocs); | ||
224 | |||
225 | return alloc; | ||
226 | } | ||
227 | |||
228 | static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
229 | unsigned long size, unsigned long real_size, | ||
230 | u64 addr, void *ip) | ||
231 | { | ||
232 | int ret; | ||
233 | struct nvgpu_mem_alloc *alloc; | ||
234 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
235 | struct stack_trace stack_trace; | ||
236 | #endif | ||
237 | |||
238 | alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); | ||
239 | if (!alloc) | ||
240 | return -ENOMEM; | ||
241 | |||
242 | alloc->owner = tracker; | ||
243 | alloc->size = size; | ||
244 | alloc->real_size = real_size; | ||
245 | alloc->addr = addr; | ||
246 | alloc->ip = ip; | ||
247 | |||
248 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
249 | stack_trace.max_entries = MAX_STACK_TRACE; | ||
250 | stack_trace.nr_entries = 0; | ||
251 | stack_trace.entries = alloc->stack; | ||
252 | /* | ||
253 | * This 4 here skips the 2 function calls that happen for all traced | ||
254 | * allocs due to nvgpu: | ||
255 | * | ||
256 | * __nvgpu_save_kmem_alloc+0x7c/0x128 | ||
257 | * __nvgpu_track_kzalloc+0xcc/0xf8 | ||
258 | * | ||
259 | * And the function calls that get made by the stack trace code itself. | ||
260 | * If the trace savings code changes this will likely have to change | ||
261 | * as well. | ||
262 | */ | ||
263 | stack_trace.skip = 4; | ||
264 | save_stack_trace(&stack_trace); | ||
265 | alloc->stack_length = stack_trace.nr_entries; | ||
266 | #endif | ||
267 | |||
268 | nvgpu_lock_tracker(tracker); | ||
269 | tracker->bytes_alloced += size; | ||
270 | tracker->bytes_alloced_real += real_size; | ||
271 | tracker->nr_allocs++; | ||
272 | |||
273 | /* Keep track of this for building a histogram later on. */ | ||
274 | if (tracker->max_alloc < size) | ||
275 | tracker->max_alloc = size; | ||
276 | if (tracker->min_alloc > size) | ||
277 | tracker->min_alloc = size; | ||
278 | |||
279 | ret = nvgpu_add_alloc(tracker, alloc); | ||
280 | if (ret) { | ||
281 | WARN(1, "Duplicate alloc??? 0x%llx\n", addr); | ||
282 | kfree(alloc); | ||
283 | nvgpu_unlock_tracker(tracker); | ||
284 | return ret; | ||
285 | } | ||
286 | nvgpu_unlock_tracker(tracker); | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
292 | u64 addr) | ||
293 | { | ||
294 | struct nvgpu_mem_alloc *alloc; | ||
295 | |||
296 | nvgpu_lock_tracker(tracker); | ||
297 | alloc = nvgpu_rem_alloc(tracker, addr); | ||
298 | if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { | ||
299 | nvgpu_unlock_tracker(tracker); | ||
300 | return -EINVAL; | ||
301 | } | ||
302 | |||
303 | memset((void *)alloc->addr, 0, alloc->size); | ||
304 | |||
305 | tracker->nr_frees++; | ||
306 | tracker->bytes_freed += alloc->size; | ||
307 | tracker->bytes_freed_real += alloc->real_size; | ||
308 | nvgpu_unlock_tracker(tracker); | ||
309 | |||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | static void __nvgpu_check_valloc_size(unsigned long size) | ||
314 | { | ||
315 | WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); | ||
316 | } | ||
317 | |||
318 | static void __nvgpu_check_kalloc_size(size_t size) | ||
319 | { | ||
320 | WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); | ||
321 | } | ||
322 | |||
323 | void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, | ||
324 | void *ip) | ||
325 | { | ||
326 | void *alloc = vmalloc(size); | ||
327 | |||
328 | if (!alloc) | ||
329 | return NULL; | ||
330 | |||
331 | __nvgpu_check_valloc_size(size); | ||
332 | |||
333 | /* | ||
334 | * Ignore the return message. If this fails let's not cause any issues | ||
335 | * for the rest of the driver. | ||
336 | */ | ||
337 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
338 | (u64)(uintptr_t)alloc, ip); | ||
339 | |||
340 | return alloc; | ||
341 | } | ||
342 | |||
343 | void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, | ||
344 | void *ip) | ||
345 | { | ||
346 | void *alloc = vzalloc(size); | ||
347 | |||
348 | if (!alloc) | ||
349 | return NULL; | ||
350 | |||
351 | __nvgpu_check_valloc_size(size); | ||
352 | |||
353 | /* | ||
354 | * Ignore the return message. If this fails let's not cause any issues | ||
355 | * for the rest of the driver. | ||
356 | */ | ||
357 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
358 | (u64)(uintptr_t)alloc, ip); | ||
359 | |||
360 | return alloc; | ||
361 | } | ||
362 | |||
363 | void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, void *ip) | ||
364 | { | ||
365 | void *alloc = kmalloc(size, GFP_KERNEL); | ||
366 | |||
367 | if (!alloc) | ||
368 | return NULL; | ||
369 | |||
370 | __nvgpu_check_kalloc_size(size); | ||
371 | |||
372 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
373 | (u64)(uintptr_t)alloc, ip); | ||
374 | |||
375 | return alloc; | ||
376 | } | ||
377 | |||
378 | void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, void *ip) | ||
379 | { | ||
380 | void *alloc = kzalloc(size, GFP_KERNEL); | ||
381 | |||
382 | if (!alloc) | ||
383 | return NULL; | ||
384 | |||
385 | __nvgpu_check_kalloc_size(size); | ||
386 | |||
387 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
388 | (u64)(uintptr_t)alloc, ip); | ||
389 | |||
390 | return alloc; | ||
391 | } | ||
392 | |||
393 | void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, | ||
394 | void *ip) | ||
395 | { | ||
396 | void *alloc = kcalloc(n, size, GFP_KERNEL); | ||
397 | |||
398 | if (!alloc) | ||
399 | return NULL; | ||
400 | |||
401 | __nvgpu_check_kalloc_size(n * size); | ||
402 | |||
403 | __nvgpu_save_kmem_alloc(g->kmallocs, n * size, | ||
404 | roundup_pow_of_two(n * size), | ||
405 | (u64)(uintptr_t)alloc, ip); | ||
406 | |||
407 | return alloc; | ||
408 | } | ||
409 | |||
410 | void __nvgpu_track_vfree(struct gk20a *g, void *addr) | ||
411 | { | ||
412 | /* | ||
413 | * Often it is accepted practice to pass NULL pointers into free | ||
414 | * functions to save code. | ||
415 | */ | ||
416 | if (!addr) | ||
417 | return; | ||
418 | |||
419 | __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); | ||
420 | |||
421 | vfree(addr); | ||
422 | } | ||
423 | |||
424 | void __nvgpu_track_kfree(struct gk20a *g, void *addr) | ||
425 | { | ||
426 | if (!addr) | ||
427 | return; | ||
428 | |||
429 | __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); | ||
430 | |||
431 | kfree(addr); | ||
432 | } | ||
433 | |||
434 | static int __do_check_for_outstanding_allocs( | ||
435 | struct gk20a *g, | ||
436 | struct nvgpu_mem_alloc_tracker *tracker, | ||
437 | const char *type, bool silent) | ||
438 | { | ||
439 | struct nvgpu_rbtree_node *node; | ||
440 | int count = 0; | ||
441 | |||
442 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
443 | while (node) { | ||
444 | struct nvgpu_mem_alloc *alloc = | ||
445 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
446 | |||
447 | if (!silent) | ||
448 | kmem_print_mem_alloc(g, alloc, NULL); | ||
449 | |||
450 | count++; | ||
451 | nvgpu_rbtree_enum_next(&node, node); | ||
452 | } | ||
453 | |||
454 | return count; | ||
455 | } | ||
456 | |||
457 | /** | ||
458 | * check_for_outstanding_allocs - Count and display outstanding allocs | ||
459 | * | ||
460 | * @g - The GPU. | ||
461 | * @silent - If set don't print anything about the allocs. | ||
462 | * | ||
463 | * Dump (or just count) the number of allocations left outstanding. | ||
464 | */ | ||
465 | static int check_for_outstanding_allocs(struct gk20a *g, bool silent) | ||
466 | { | ||
467 | int count = 0; | ||
468 | |||
469 | count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", | ||
470 | silent); | ||
471 | count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", | ||
472 | silent); | ||
473 | |||
474 | return count; | ||
475 | } | ||
476 | |||
477 | static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, | ||
478 | void (*force_free_func)(const void *)) | ||
479 | { | ||
480 | struct nvgpu_rbtree_node *node; | ||
481 | |||
482 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
483 | while (node) { | ||
484 | struct nvgpu_mem_alloc *alloc = | ||
485 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
486 | |||
487 | if (force_free_func) | ||
488 | force_free_func((void *)alloc->addr); | ||
489 | |||
490 | nvgpu_rbtree_unlink(node, &tracker->allocs); | ||
491 | kfree(alloc); | ||
492 | |||
493 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
494 | } | ||
495 | } | ||
496 | |||
497 | /** | ||
498 | * nvgpu_kmem_cleanup - Cleanup the kmem tracking | ||
499 | * | ||
500 | * @g - The GPU. | ||
501 | * @force_free - If set will also free leaked objects if possible. | ||
502 | * | ||
503 | * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free | ||
504 | * is non-zero then the allocation made by nvgpu is also freed. This is risky, | ||
505 | * though, as it is possible that the memory is still in use by other parts of | ||
506 | * the GPU driver not aware that this has happened. | ||
507 | * | ||
508 | * In theory it should be fine if the GPU driver has been deinitialized and | ||
509 | * there are no bugs in that code. However, if there are any bugs in that code | ||
510 | * then they could likely manifest as odd crashes indeterminate amounts of time | ||
511 | * in the future. So use @force_free at your own risk. | ||
512 | */ | ||
513 | static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) | ||
514 | { | ||
515 | do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); | ||
516 | do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); | ||
517 | } | ||
518 | |||
519 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
520 | { | ||
521 | int count; | ||
522 | bool silent, force_free; | ||
523 | |||
524 | if (!flags) | ||
525 | return; | ||
526 | |||
527 | silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); | ||
528 | force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
529 | |||
530 | count = check_for_outstanding_allocs(g, silent); | ||
531 | nvgpu_kmem_cleanup(g, force_free); | ||
532 | |||
533 | /* | ||
534 | * If we leak objects we can either BUG() out or just WARN(). In general | ||
535 | * it doesn't make sense to BUG() on here since leaking a few objects | ||
536 | * won't crash the kernel but it can be helpful for development. | ||
537 | * | ||
538 | * If neither flag is set then we just silently do nothing. | ||
539 | */ | ||
540 | if (count > 0) { | ||
541 | if (flags & NVGPU_KMEM_FINI_WARN) { | ||
542 | WARN(1, "Letting %d allocs leak!!\n", count); | ||
543 | } else if (flags & NVGPU_KMEM_FINI_BUG) { | ||
544 | nvgpu_err(g, "Letting %d allocs leak!!", count); | ||
545 | BUG(); | ||
546 | } | ||
547 | } | ||
548 | } | ||
549 | |||
550 | int nvgpu_kmem_init(struct gk20a *g) | ||
551 | { | ||
552 | int err; | ||
553 | |||
554 | g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); | ||
555 | g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); | ||
556 | |||
557 | if (!g->vmallocs || !g->kmallocs) { | ||
558 | err = -ENOMEM; | ||
559 | goto fail; | ||
560 | } | ||
561 | |||
562 | g->vmallocs->name = "vmalloc"; | ||
563 | g->kmallocs->name = "kmalloc"; | ||
564 | |||
565 | g->vmallocs->allocs = NULL; | ||
566 | g->kmallocs->allocs = NULL; | ||
567 | |||
568 | nvgpu_mutex_init(&g->vmallocs->lock); | ||
569 | nvgpu_mutex_init(&g->kmallocs->lock); | ||
570 | |||
571 | g->vmallocs->min_alloc = PAGE_SIZE; | ||
572 | g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; | ||
573 | |||
574 | /* | ||
575 | * This needs to go after all the other initialization since they use | ||
576 | * the nvgpu_kzalloc() API. | ||
577 | */ | ||
578 | g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
579 | sizeof(struct nvgpu_mem_alloc)); | ||
580 | g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
581 | sizeof(struct nvgpu_mem_alloc)); | ||
582 | |||
583 | if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { | ||
584 | err = -ENOMEM; | ||
585 | if (g->vmallocs->allocs_cache) | ||
586 | nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); | ||
587 | if (g->kmallocs->allocs_cache) | ||
588 | nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); | ||
589 | goto fail; | ||
590 | } | ||
591 | |||
592 | return 0; | ||
593 | |||
594 | fail: | ||
595 | if (g->vmallocs) | ||
596 | kfree(g->vmallocs); | ||
597 | if (g->kmallocs) | ||
598 | kfree(g->kmallocs); | ||
599 | return err; | ||
600 | } | ||
601 | |||
602 | #else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
603 | |||
604 | int nvgpu_kmem_init(struct gk20a *g) | ||
605 | { | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
610 | { | ||
611 | } | ||
612 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
613 | |||
614 | struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) | ||
615 | { | ||
616 | struct nvgpu_kmem_cache *cache = | ||
617 | nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); | ||
618 | |||
619 | if (!cache) | ||
620 | return NULL; | ||
621 | |||
622 | cache->g = g; | ||
623 | |||
624 | snprintf(cache->name, sizeof(cache->name), | ||
625 | "nvgpu-cache-0x%p-%d-%d", g, (int)size, | ||
626 | atomic_inc_return(&kmem_cache_id)); | ||
627 | cache->cache = kmem_cache_create(cache->name, | ||
628 | size, size, 0, NULL); | ||
629 | if (!cache->cache) { | ||
630 | nvgpu_kfree(g, cache); | ||
631 | return NULL; | ||
632 | } | ||
633 | |||
634 | return cache; | ||
635 | } | ||
636 | |||
637 | void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) | ||
638 | { | ||
639 | struct gk20a *g = cache->g; | ||
640 | |||
641 | kmem_cache_destroy(cache->cache); | ||
642 | nvgpu_kfree(g, cache); | ||
643 | } | ||
644 | |||
645 | void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) | ||
646 | { | ||
647 | return kmem_cache_alloc(cache->cache, GFP_KERNEL); | ||
648 | } | ||
649 | |||
650 | void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr) | ||
651 | { | ||
652 | kmem_cache_free(cache->cache, ptr); | ||
653 | } | ||
diff --git a/include/os/linux/kmem_priv.h b/include/os/linux/kmem_priv.h new file mode 100644 index 0000000..a41762a --- /dev/null +++ b/include/os/linux/kmem_priv.h | |||
@@ -0,0 +1,105 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __KMEM_PRIV_H__ | ||
18 | #define __KMEM_PRIV_H__ | ||
19 | |||
20 | #include <nvgpu/rbtree.h> | ||
21 | #include <nvgpu/lock.h> | ||
22 | |||
23 | struct seq_file; | ||
24 | |||
25 | #define __pstat(s, fmt, msg...) \ | ||
26 | do { \ | ||
27 | if (s) \ | ||
28 | seq_printf(s, fmt, ##msg); \ | ||
29 | else \ | ||
30 | pr_info(fmt, ##msg); \ | ||
31 | } while (0) | ||
32 | |||
33 | #define MAX_STACK_TRACE 20 | ||
34 | |||
35 | /* | ||
36 | * Linux specific version of the nvgpu_kmem_cache struct. This type is | ||
37 | * completely opaque to the rest of the driver. | ||
38 | */ | ||
39 | struct nvgpu_kmem_cache { | ||
40 | struct gk20a *g; | ||
41 | struct kmem_cache *cache; | ||
42 | |||
43 | /* | ||
44 | * Memory to hold the kmem_cache unique name. Only necessary on our | ||
45 | * k3.10 kernel when not using the SLUB allocator but it's easier to | ||
46 | * just carry this on to newer kernels. | ||
47 | */ | ||
48 | char name[128]; | ||
49 | }; | ||
50 | |||
51 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
52 | |||
53 | struct nvgpu_mem_alloc { | ||
54 | struct nvgpu_mem_alloc_tracker *owner; | ||
55 | |||
56 | void *ip; | ||
57 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
58 | unsigned long stack[MAX_STACK_TRACE]; | ||
59 | int stack_length; | ||
60 | #endif | ||
61 | |||
62 | u64 addr; | ||
63 | |||
64 | unsigned long size; | ||
65 | unsigned long real_size; | ||
66 | |||
67 | struct nvgpu_rbtree_node allocs_entry; | ||
68 | }; | ||
69 | |||
70 | static inline struct nvgpu_mem_alloc * | ||
71 | nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) | ||
72 | { | ||
73 | return (struct nvgpu_mem_alloc *) | ||
74 | ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry)); | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * Linux specific tracking of vmalloc, kmalloc, etc. | ||
79 | */ | ||
80 | struct nvgpu_mem_alloc_tracker { | ||
81 | const char *name; | ||
82 | struct nvgpu_kmem_cache *allocs_cache; | ||
83 | struct nvgpu_rbtree_node *allocs; | ||
84 | struct nvgpu_mutex lock; | ||
85 | |||
86 | u64 bytes_alloced; | ||
87 | u64 bytes_freed; | ||
88 | u64 bytes_alloced_real; | ||
89 | u64 bytes_freed_real; | ||
90 | u64 nr_allocs; | ||
91 | u64 nr_frees; | ||
92 | |||
93 | unsigned long min_alloc; | ||
94 | unsigned long max_alloc; | ||
95 | }; | ||
96 | |||
97 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker); | ||
98 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker); | ||
99 | |||
100 | void kmem_print_mem_alloc(struct gk20a *g, | ||
101 | struct nvgpu_mem_alloc *alloc, | ||
102 | struct seq_file *s); | ||
103 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
104 | |||
105 | #endif /* __KMEM_PRIV_H__ */ | ||
diff --git a/include/os/linux/linux-channel.c b/include/os/linux/linux-channel.c new file mode 100644 index 0000000..d035baf --- /dev/null +++ b/include/os/linux/linux-channel.c | |||
@@ -0,0 +1,657 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/enabled.h> | ||
18 | #include <nvgpu/debug.h> | ||
19 | #include <nvgpu/error_notifier.h> | ||
20 | #include <nvgpu/os_sched.h> | ||
21 | #include <nvgpu/gk20a.h> | ||
22 | #include <nvgpu/channel.h> | ||
23 | #include <nvgpu/dma.h> | ||
24 | |||
25 | /* | ||
26 | * This is required for nvgpu_vm_find_buf() which is used in the tracing | ||
27 | * code. Once we can get and access userspace buffers without requiring | ||
28 | * direct dma_buf usage this can be removed. | ||
29 | */ | ||
30 | #include <nvgpu/linux/vm.h> | ||
31 | |||
32 | #include "channel.h" | ||
33 | #include "ioctl_channel.h" | ||
34 | #include "os_linux.h" | ||
35 | #include "dmabuf.h" | ||
36 | |||
37 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
38 | |||
39 | #include <linux/uaccess.h> | ||
40 | #include <linux/dma-buf.h> | ||
41 | #include <trace/events/gk20a.h> | ||
42 | #include <uapi/linux/nvgpu.h> | ||
43 | |||
44 | #include "sync_sema_android.h" | ||
45 | |||
46 | u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) | ||
47 | { | ||
48 | u32 flags = 0; | ||
49 | |||
50 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) | ||
51 | flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; | ||
52 | |||
53 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
54 | flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
55 | |||
56 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) | ||
57 | flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; | ||
58 | |||
59 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | ||
60 | flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; | ||
61 | |||
62 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) | ||
63 | flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; | ||
64 | |||
65 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) | ||
66 | flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; | ||
67 | |||
68 | return flags; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * API to convert error_notifiers in common code and of the form | ||
73 | * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user | ||
74 | * space and of the form NVGPU_CHANNEL_* | ||
75 | */ | ||
76 | static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) | ||
77 | { | ||
78 | switch (error_notifier) { | ||
79 | case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: | ||
80 | return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; | ||
81 | case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: | ||
82 | return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; | ||
83 | case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: | ||
84 | return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; | ||
85 | case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: | ||
86 | return NVGPU_CHANNEL_GR_EXCEPTION; | ||
87 | case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: | ||
88 | return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; | ||
89 | case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: | ||
90 | return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; | ||
91 | case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: | ||
92 | return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; | ||
93 | case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: | ||
94 | return NVGPU_CHANNEL_PBDMA_ERROR; | ||
95 | case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: | ||
96 | return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; | ||
97 | case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: | ||
98 | return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; | ||
99 | case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: | ||
100 | return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; | ||
101 | } | ||
102 | |||
103 | pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); | ||
104 | |||
105 | return error_notifier; | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | * nvgpu_set_error_notifier_locked() | ||
110 | * Should be called with ch->error_notifier_mutex held | ||
111 | * | ||
112 | * error should be of the form NVGPU_ERR_NOTIFIER_* | ||
113 | */ | ||
114 | void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) | ||
115 | { | ||
116 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
117 | |||
118 | error = nvgpu_error_notifier_to_channel_notifier(error); | ||
119 | |||
120 | if (priv->error_notifier.dmabuf) { | ||
121 | struct nvgpu_notification *notification = | ||
122 | priv->error_notifier.notification; | ||
123 | struct timespec time_data; | ||
124 | u64 nsec; | ||
125 | |||
126 | getnstimeofday(&time_data); | ||
127 | nsec = ((u64)time_data.tv_sec) * 1000000000u + | ||
128 | (u64)time_data.tv_nsec; | ||
129 | notification->time_stamp.nanoseconds[0] = | ||
130 | (u32)nsec; | ||
131 | notification->time_stamp.nanoseconds[1] = | ||
132 | (u32)(nsec >> 32); | ||
133 | notification->info32 = error; | ||
134 | notification->status = 0xffff; | ||
135 | |||
136 | nvgpu_err(ch->g, | ||
137 | "error notifier set to %d for ch %d", error, ch->chid); | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* error should be of the form NVGPU_ERR_NOTIFIER_* */ | ||
142 | void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) | ||
143 | { | ||
144 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
145 | |||
146 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
147 | nvgpu_set_error_notifier_locked(ch, error); | ||
148 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
149 | } | ||
150 | |||
151 | void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) | ||
152 | { | ||
153 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
154 | |||
155 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
156 | if (priv->error_notifier.dmabuf) { | ||
157 | struct nvgpu_notification *notification = | ||
158 | priv->error_notifier.notification; | ||
159 | |||
160 | /* Don't overwrite error flag if it is already set */ | ||
161 | if (notification->status != 0xffff) | ||
162 | nvgpu_set_error_notifier_locked(ch, error); | ||
163 | } | ||
164 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
165 | } | ||
166 | |||
167 | /* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ | ||
168 | bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) | ||
169 | { | ||
170 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
171 | bool notifier_set = false; | ||
172 | |||
173 | error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); | ||
174 | |||
175 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
176 | if (priv->error_notifier.dmabuf) { | ||
177 | struct nvgpu_notification *notification = | ||
178 | priv->error_notifier.notification; | ||
179 | u32 err = notification->info32; | ||
180 | |||
181 | if (err == error_notifier) | ||
182 | notifier_set = true; | ||
183 | } | ||
184 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
185 | |||
186 | return notifier_set; | ||
187 | } | ||
188 | |||
189 | static void gk20a_channel_update_runcb_fn(struct work_struct *work) | ||
190 | { | ||
191 | struct nvgpu_channel_completion_cb *completion_cb = | ||
192 | container_of(work, struct nvgpu_channel_completion_cb, work); | ||
193 | struct nvgpu_channel_linux *priv = | ||
194 | container_of(completion_cb, | ||
195 | struct nvgpu_channel_linux, completion_cb); | ||
196 | struct channel_gk20a *ch = priv->ch; | ||
197 | void (*fn)(struct channel_gk20a *, void *); | ||
198 | void *user_data; | ||
199 | |||
200 | nvgpu_spinlock_acquire(&completion_cb->lock); | ||
201 | fn = completion_cb->fn; | ||
202 | user_data = completion_cb->user_data; | ||
203 | nvgpu_spinlock_release(&completion_cb->lock); | ||
204 | |||
205 | if (fn) | ||
206 | fn(ch, user_data); | ||
207 | } | ||
208 | |||
209 | static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) | ||
210 | { | ||
211 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
212 | |||
213 | priv->completion_cb.fn = NULL; | ||
214 | priv->completion_cb.user_data = NULL; | ||
215 | nvgpu_spinlock_init(&priv->completion_cb.lock); | ||
216 | INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); | ||
217 | } | ||
218 | |||
219 | static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) | ||
220 | { | ||
221 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
222 | |||
223 | nvgpu_spinlock_acquire(&priv->completion_cb.lock); | ||
224 | priv->completion_cb.fn = NULL; | ||
225 | priv->completion_cb.user_data = NULL; | ||
226 | nvgpu_spinlock_release(&priv->completion_cb.lock); | ||
227 | cancel_work_sync(&priv->completion_cb.work); | ||
228 | } | ||
229 | |||
230 | static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) | ||
231 | { | ||
232 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
233 | |||
234 | if (priv->completion_cb.fn) | ||
235 | schedule_work(&priv->completion_cb.work); | ||
236 | } | ||
237 | |||
238 | static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) | ||
239 | { | ||
240 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
241 | |||
242 | if (priv->completion_cb.fn) | ||
243 | cancel_work_sync(&priv->completion_cb.work); | ||
244 | } | ||
245 | |||
246 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
247 | void (*update_fn)(struct channel_gk20a *, void *), | ||
248 | void *update_fn_data, | ||
249 | int runlist_id, | ||
250 | bool is_privileged_channel) | ||
251 | { | ||
252 | struct channel_gk20a *ch; | ||
253 | struct nvgpu_channel_linux *priv; | ||
254 | |||
255 | ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, | ||
256 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
257 | |||
258 | if (ch) { | ||
259 | priv = ch->os_priv; | ||
260 | nvgpu_spinlock_acquire(&priv->completion_cb.lock); | ||
261 | priv->completion_cb.fn = update_fn; | ||
262 | priv->completion_cb.user_data = update_fn_data; | ||
263 | nvgpu_spinlock_release(&priv->completion_cb.lock); | ||
264 | } | ||
265 | |||
266 | return ch; | ||
267 | } | ||
268 | |||
269 | static void nvgpu_channel_open_linux(struct channel_gk20a *ch) | ||
270 | { | ||
271 | } | ||
272 | |||
273 | static void nvgpu_channel_close_linux(struct channel_gk20a *ch) | ||
274 | { | ||
275 | nvgpu_channel_work_completion_clear(ch); | ||
276 | |||
277 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
278 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
279 | gk20a_channel_free_cycle_stats_snapshot(ch); | ||
280 | #endif | ||
281 | } | ||
282 | |||
283 | static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) | ||
284 | { | ||
285 | struct nvgpu_channel_linux *priv; | ||
286 | int err; | ||
287 | |||
288 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
289 | if (!priv) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | ch->os_priv = priv; | ||
293 | priv->ch = ch; | ||
294 | |||
295 | #ifdef CONFIG_SYNC | ||
296 | ch->has_os_fence_framework_support = true; | ||
297 | #endif | ||
298 | |||
299 | err = nvgpu_mutex_init(&priv->error_notifier.mutex); | ||
300 | if (err) { | ||
301 | nvgpu_kfree(g, priv); | ||
302 | return err; | ||
303 | } | ||
304 | |||
305 | nvgpu_channel_work_completion_init(ch); | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) | ||
311 | { | ||
312 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
313 | |||
314 | nvgpu_mutex_destroy(&priv->error_notifier.mutex); | ||
315 | nvgpu_kfree(g, priv); | ||
316 | |||
317 | ch->os_priv = NULL; | ||
318 | |||
319 | #ifdef CONFIG_SYNC | ||
320 | ch->has_os_fence_framework_support = false; | ||
321 | #endif | ||
322 | } | ||
323 | |||
324 | static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, | ||
325 | const char *fmt, ...) | ||
326 | { | ||
327 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
328 | struct nvgpu_os_fence_framework *fence_framework; | ||
329 | char name[30]; | ||
330 | va_list args; | ||
331 | |||
332 | fence_framework = &priv->fence_framework; | ||
333 | |||
334 | va_start(args, fmt); | ||
335 | vsnprintf(name, sizeof(name), fmt, args); | ||
336 | va_end(args); | ||
337 | |||
338 | fence_framework->timeline = gk20a_sync_timeline_create(name); | ||
339 | |||
340 | if (!fence_framework->timeline) | ||
341 | return -EINVAL; | ||
342 | |||
343 | return 0; | ||
344 | } | ||
345 | static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) | ||
346 | { | ||
347 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
348 | struct nvgpu_os_fence_framework *fence_framework; | ||
349 | |||
350 | fence_framework = &priv->fence_framework; | ||
351 | |||
352 | gk20a_sync_timeline_signal(fence_framework->timeline); | ||
353 | } | ||
354 | |||
355 | static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) | ||
356 | { | ||
357 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
358 | struct nvgpu_os_fence_framework *fence_framework; | ||
359 | |||
360 | fence_framework = &priv->fence_framework; | ||
361 | |||
362 | gk20a_sync_timeline_destroy(fence_framework->timeline); | ||
363 | fence_framework->timeline = NULL; | ||
364 | } | ||
365 | |||
366 | static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) | ||
367 | { | ||
368 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
369 | struct nvgpu_os_fence_framework *fence_framework; | ||
370 | |||
371 | fence_framework = &priv->fence_framework; | ||
372 | |||
373 | return (fence_framework->timeline != NULL); | ||
374 | } | ||
375 | |||
376 | static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest, | ||
377 | struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length) | ||
378 | { | ||
379 | struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries; | ||
380 | unsigned long n; | ||
381 | |||
382 | n = copy_from_user(dest, user_gpfifo + start, | ||
383 | length * sizeof(struct nvgpu_gpfifo_entry)); | ||
384 | |||
385 | return n == 0 ? 0 : -EFAULT; | ||
386 | } | ||
387 | |||
388 | int nvgpu_usermode_buf_from_dmabuf(struct gk20a *g, int dmabuf_fd, | ||
389 | struct nvgpu_mem *mem, struct nvgpu_usermode_buf_linux *buf) | ||
390 | { | ||
391 | struct device *dev = dev_from_gk20a(g); | ||
392 | struct dma_buf *dmabuf; | ||
393 | struct sg_table *sgt; | ||
394 | struct dma_buf_attachment *attachment; | ||
395 | int err; | ||
396 | |||
397 | dmabuf = dma_buf_get(dmabuf_fd); | ||
398 | if (IS_ERR(dmabuf)) { | ||
399 | return PTR_ERR(dmabuf); | ||
400 | } | ||
401 | |||
402 | if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { | ||
403 | err = -EINVAL; | ||
404 | goto put_dmabuf; | ||
405 | } | ||
406 | |||
407 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); | ||
408 | if (err != 0) { | ||
409 | goto put_dmabuf; | ||
410 | } | ||
411 | |||
412 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | ||
413 | if (IS_ERR(sgt)) { | ||
414 | nvgpu_warn(g, "Failed to pin dma_buf!"); | ||
415 | err = PTR_ERR(sgt); | ||
416 | goto put_dmabuf; | ||
417 | } | ||
418 | |||
419 | buf->dmabuf = dmabuf; | ||
420 | buf->attachment = attachment; | ||
421 | buf->sgt = sgt; | ||
422 | |||
423 | /* | ||
424 | * This mem is unmapped and freed in a common path; for Linux, we'll | ||
425 | * also need to unref the dmabuf stuff (above) but the sgt here is only | ||
426 | * borrowed, so it cannot be freed by nvgpu_mem_*. | ||
427 | */ | ||
428 | mem->mem_flags = NVGPU_MEM_FLAG_FOREIGN_SGT; | ||
429 | mem->aperture = APERTURE_SYSMEM; | ||
430 | mem->skip_wmb = 0; | ||
431 | mem->size = dmabuf->size; | ||
432 | |||
433 | mem->priv.flags = 0; | ||
434 | mem->priv.pages = NULL; | ||
435 | mem->priv.sgt = sgt; | ||
436 | |||
437 | return 0; | ||
438 | put_dmabuf: | ||
439 | dma_buf_put(dmabuf); | ||
440 | return err; | ||
441 | } | ||
442 | |||
443 | void nvgpu_channel_free_usermode_buffers(struct channel_gk20a *c) | ||
444 | { | ||
445 | struct nvgpu_channel_linux *priv = c->os_priv; | ||
446 | struct gk20a *g = c->g; | ||
447 | struct device *dev = dev_from_gk20a(g); | ||
448 | |||
449 | if (priv->usermode.gpfifo.dmabuf != NULL) { | ||
450 | gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf, | ||
451 | priv->usermode.gpfifo.attachment, | ||
452 | priv->usermode.gpfifo.sgt); | ||
453 | dma_buf_put(priv->usermode.gpfifo.dmabuf); | ||
454 | priv->usermode.gpfifo.dmabuf = NULL; | ||
455 | } | ||
456 | |||
457 | if (priv->usermode.userd.dmabuf != NULL) { | ||
458 | gk20a_mm_unpin(dev, priv->usermode.userd.dmabuf, | ||
459 | priv->usermode.userd.attachment, | ||
460 | priv->usermode.userd.sgt); | ||
461 | dma_buf_put(priv->usermode.userd.dmabuf); | ||
462 | priv->usermode.userd.dmabuf = NULL; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | static int nvgpu_channel_alloc_usermode_buffers(struct channel_gk20a *c, | ||
467 | struct nvgpu_setup_bind_args *args) | ||
468 | { | ||
469 | struct nvgpu_channel_linux *priv = c->os_priv; | ||
470 | struct gk20a *g = c->g; | ||
471 | struct device *dev = dev_from_gk20a(g); | ||
472 | size_t gpfifo_size; | ||
473 | int err; | ||
474 | |||
475 | if (args->gpfifo_dmabuf_fd == 0 || args->userd_dmabuf_fd == 0) { | ||
476 | return -EINVAL; | ||
477 | } | ||
478 | |||
479 | if (args->gpfifo_dmabuf_offset != 0 || | ||
480 | args->userd_dmabuf_offset != 0) { | ||
481 | /* TODO - not yet supported */ | ||
482 | return -EINVAL; | ||
483 | } | ||
484 | |||
485 | err = nvgpu_usermode_buf_from_dmabuf(g, args->gpfifo_dmabuf_fd, | ||
486 | &c->usermode_gpfifo, &priv->usermode.gpfifo); | ||
487 | if (err < 0) { | ||
488 | return err; | ||
489 | } | ||
490 | |||
491 | gpfifo_size = max_t(u32, SZ_4K, | ||
492 | args->num_gpfifo_entries * | ||
493 | nvgpu_get_gpfifo_entry_size()); | ||
494 | |||
495 | if (c->usermode_gpfifo.size < gpfifo_size) { | ||
496 | err = -EINVAL; | ||
497 | goto free_gpfifo; | ||
498 | } | ||
499 | |||
500 | c->usermode_gpfifo.gpu_va = nvgpu_gmmu_map(c->vm, &c->usermode_gpfifo, | ||
501 | c->usermode_gpfifo.size, 0, gk20a_mem_flag_none, | ||
502 | false, c->usermode_gpfifo.aperture); | ||
503 | |||
504 | if (c->usermode_gpfifo.gpu_va == 0) { | ||
505 | err = -ENOMEM; | ||
506 | goto unmap_free_gpfifo; | ||
507 | } | ||
508 | |||
509 | err = nvgpu_usermode_buf_from_dmabuf(g, args->userd_dmabuf_fd, | ||
510 | &c->usermode_userd, &priv->usermode.userd); | ||
511 | if (err < 0) { | ||
512 | goto unmap_free_gpfifo; | ||
513 | } | ||
514 | |||
515 | args->work_submit_token = g->fifo.channel_base + c->chid; | ||
516 | |||
517 | return 0; | ||
518 | unmap_free_gpfifo: | ||
519 | nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo); | ||
520 | free_gpfifo: | ||
521 | gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf, | ||
522 | priv->usermode.gpfifo.attachment, | ||
523 | priv->usermode.gpfifo.sgt); | ||
524 | dma_buf_put(priv->usermode.gpfifo.dmabuf); | ||
525 | priv->usermode.gpfifo.dmabuf = NULL; | ||
526 | return err; | ||
527 | } | ||
528 | |||
529 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) | ||
530 | { | ||
531 | struct gk20a *g = &l->g; | ||
532 | struct fifo_gk20a *f = &g->fifo; | ||
533 | int chid; | ||
534 | int err; | ||
535 | |||
536 | for (chid = 0; chid < (int)f->num_channels; chid++) { | ||
537 | struct channel_gk20a *ch = &f->channel[chid]; | ||
538 | |||
539 | err = nvgpu_channel_alloc_linux(g, ch); | ||
540 | if (err) | ||
541 | goto err_clean; | ||
542 | } | ||
543 | |||
544 | g->os_channel.open = nvgpu_channel_open_linux; | ||
545 | g->os_channel.close = nvgpu_channel_close_linux; | ||
546 | g->os_channel.work_completion_signal = | ||
547 | nvgpu_channel_work_completion_signal; | ||
548 | g->os_channel.work_completion_cancel_sync = | ||
549 | nvgpu_channel_work_completion_cancel_sync; | ||
550 | |||
551 | g->os_channel.os_fence_framework_inst_exists = | ||
552 | nvgpu_channel_fence_framework_exists; | ||
553 | g->os_channel.init_os_fence_framework = | ||
554 | nvgpu_channel_init_os_fence_framework; | ||
555 | g->os_channel.signal_os_fence_framework = | ||
556 | nvgpu_channel_signal_os_fence_framework; | ||
557 | g->os_channel.destroy_os_fence_framework = | ||
558 | nvgpu_channel_destroy_os_fence_framework; | ||
559 | |||
560 | g->os_channel.copy_user_gpfifo = | ||
561 | nvgpu_channel_copy_user_gpfifo; | ||
562 | |||
563 | g->os_channel.alloc_usermode_buffers = | ||
564 | nvgpu_channel_alloc_usermode_buffers; | ||
565 | |||
566 | g->os_channel.free_usermode_buffers = | ||
567 | nvgpu_channel_free_usermode_buffers; | ||
568 | |||
569 | return 0; | ||
570 | |||
571 | err_clean: | ||
572 | for (; chid >= 0; chid--) { | ||
573 | struct channel_gk20a *ch = &f->channel[chid]; | ||
574 | |||
575 | nvgpu_channel_free_linux(g, ch); | ||
576 | } | ||
577 | return err; | ||
578 | } | ||
579 | |||
580 | void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) | ||
581 | { | ||
582 | struct gk20a *g = &l->g; | ||
583 | struct fifo_gk20a *f = &g->fifo; | ||
584 | unsigned int chid; | ||
585 | |||
586 | for (chid = 0; chid < f->num_channels; chid++) { | ||
587 | struct channel_gk20a *ch = &f->channel[chid]; | ||
588 | |||
589 | nvgpu_channel_free_linux(g, ch); | ||
590 | } | ||
591 | |||
592 | g->os_channel.os_fence_framework_inst_exists = NULL; | ||
593 | g->os_channel.init_os_fence_framework = NULL; | ||
594 | g->os_channel.signal_os_fence_framework = NULL; | ||
595 | g->os_channel.destroy_os_fence_framework = NULL; | ||
596 | } | ||
597 | |||
598 | u32 nvgpu_get_gpfifo_entry_size(void) | ||
599 | { | ||
600 | return sizeof(struct nvgpu_gpfifo_entry); | ||
601 | } | ||
602 | |||
603 | #ifdef CONFIG_DEBUG_FS | ||
604 | static void trace_write_pushbuffer(struct channel_gk20a *c, | ||
605 | struct nvgpu_gpfifo_entry *g) | ||
606 | { | ||
607 | void *mem = NULL; | ||
608 | unsigned int words; | ||
609 | u64 offset; | ||
610 | struct dma_buf *dmabuf = NULL; | ||
611 | |||
612 | if (gk20a_debug_trace_cmdbuf) { | ||
613 | u64 gpu_va = (u64)g->entry0 | | ||
614 | (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); | ||
615 | int err; | ||
616 | |||
617 | words = pbdma_gp_entry1_length_v(g->entry1); | ||
618 | err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); | ||
619 | if (!err) | ||
620 | mem = dma_buf_vmap(dmabuf); | ||
621 | } | ||
622 | |||
623 | if (mem) { | ||
624 | u32 i; | ||
625 | /* | ||
626 | * Write in batches of 128 as there seems to be a limit | ||
627 | * of how much you can output to ftrace at once. | ||
628 | */ | ||
629 | for (i = 0; i < words; i += 128U) { | ||
630 | trace_gk20a_push_cmdbuf( | ||
631 | c->g->name, | ||
632 | 0, | ||
633 | min(words - i, 128U), | ||
634 | offset + i * sizeof(u32), | ||
635 | mem); | ||
636 | } | ||
637 | dma_buf_vunmap(dmabuf, mem); | ||
638 | } | ||
639 | } | ||
640 | |||
641 | void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | ||
642 | { | ||
643 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; | ||
644 | u32 n = c->gpfifo.entry_num; | ||
645 | u32 start = c->gpfifo.put; | ||
646 | u32 i; | ||
647 | |||
648 | if (!gk20a_debug_trace_cmdbuf) | ||
649 | return; | ||
650 | |||
651 | if (!gp) | ||
652 | return; | ||
653 | |||
654 | for (i = 0; i < count; i++) | ||
655 | trace_write_pushbuffer(c, &gp[(start + i) % n]); | ||
656 | } | ||
657 | #endif | ||
diff --git a/include/os/linux/linux-dma.c b/include/os/linux/linux-dma.c new file mode 100644 index 0000000..d704b2a --- /dev/null +++ b/include/os/linux/linux-dma.c | |||
@@ -0,0 +1,534 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-mapping.h> | ||
18 | #include <linux/version.h> | ||
19 | |||
20 | #include <nvgpu/log.h> | ||
21 | #include <nvgpu/dma.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/bug.h> | ||
24 | #include <nvgpu/gmmu.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/enabled.h> | ||
27 | #include <nvgpu/vidmem.h> | ||
28 | #include <nvgpu/gk20a.h> | ||
29 | |||
30 | #include <nvgpu/linux/dma.h> | ||
31 | |||
32 | #include "platform_gk20a.h" | ||
33 | #include "os_linux.h" | ||
34 | #include "dmabuf_vidmem.h" | ||
35 | |||
36 | #ifdef __DMA_ATTRS_LONGS | ||
37 | #define NVGPU_DEFINE_DMA_ATTRS(x) \ | ||
38 | struct dma_attrs x = { \ | ||
39 | .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ | ||
40 | } | ||
41 | #define NVGPU_DMA_ATTR(attrs) &attrs | ||
42 | #else | ||
43 | #define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0 | ||
44 | #define NVGPU_DMA_ATTR(attrs) attrs | ||
45 | #endif | ||
46 | |||
47 | /* | ||
48 | * Enough to hold all the possible flags in string form. When a new flag is | ||
49 | * added it must be added here as well!! | ||
50 | */ | ||
51 | #define NVGPU_DMA_STR_SIZE \ | ||
52 | sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS") | ||
53 | |||
54 | /* | ||
55 | * The returned string is kmalloc()ed here but must be freed by the caller. | ||
56 | */ | ||
57 | static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags) | ||
58 | { | ||
59 | char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE); | ||
60 | int bytes_available = NVGPU_DMA_STR_SIZE; | ||
61 | |||
62 | /* | ||
63 | * Return the empty buffer if there's no flags. Makes it easier on the | ||
64 | * calling code to just print it instead of any if (NULL) type logic. | ||
65 | */ | ||
66 | if (!flags) | ||
67 | return buf; | ||
68 | |||
69 | #define APPEND_FLAG(flag, str_flag) \ | ||
70 | do { \ | ||
71 | if (flags & flag) { \ | ||
72 | strncat(buf, str_flag, bytes_available); \ | ||
73 | bytes_available -= strlen(str_flag); \ | ||
74 | } \ | ||
75 | } while (0) | ||
76 | |||
77 | APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING "); | ||
78 | APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS "); | ||
79 | #undef APPEND_FLAG | ||
80 | |||
81 | return buf; | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * __dma_dbg - Debug print for DMA allocs and frees. | ||
86 | * | ||
87 | * @g - The GPU. | ||
88 | * @size - The requested size of the alloc (size_t). | ||
89 | * @flags - The flags (unsigned long). | ||
90 | * @type - A string describing the type (i.e: sysmem or vidmem). | ||
91 | * @what - A string with 'alloc' or 'free'. | ||
92 | * | ||
93 | * @flags is the DMA flags. If there are none or it doesn't make sense to print | ||
94 | * flags just pass 0. | ||
95 | * | ||
96 | * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function. | ||
97 | */ | ||
98 | static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags, | ||
99 | const char *type, const char *what, | ||
100 | const char *func, int line) | ||
101 | { | ||
102 | char *flags_str = NULL; | ||
103 | |||
104 | /* | ||
105 | * Don't bother making the flags_str if debugging is | ||
106 | * not enabled. This saves a malloc and a free. | ||
107 | */ | ||
108 | if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma)) | ||
109 | return; | ||
110 | |||
111 | flags_str = nvgpu_dma_flags_to_str(g, flags); | ||
112 | |||
113 | __nvgpu_log_dbg(g, gpu_dbg_dma, | ||
114 | func, line, | ||
115 | "DMA %s: [%s] size=%-7zu " | ||
116 | "aligned=%-7zu total=%-10llukB %s", | ||
117 | what, type, | ||
118 | size, PAGE_ALIGN(size), | ||
119 | g->dma_memory_used >> 10, | ||
120 | flags_str); | ||
121 | |||
122 | if (flags_str) | ||
123 | nvgpu_kfree(g, flags_str); | ||
124 | } | ||
125 | |||
126 | #define dma_dbg_alloc(g, size, flags, type) \ | ||
127 | __dma_dbg(g, size, flags, type, "alloc", __func__, __LINE__) | ||
128 | #define dma_dbg_free(g, size, flags, type) \ | ||
129 | __dma_dbg(g, size, flags, type, "free", __func__, __LINE__) | ||
130 | |||
131 | /* | ||
132 | * For after the DMA alloc is done. | ||
133 | */ | ||
134 | #define __dma_dbg_done(g, size, type, what) \ | ||
135 | nvgpu_log(g, gpu_dbg_dma, \ | ||
136 | "DMA %s: [%s] size=%-7zu Done!", \ | ||
137 | what, type, size); \ | ||
138 | |||
139 | #define dma_dbg_alloc_done(g, size, type) \ | ||
140 | __dma_dbg_done(g, size, type, "alloc") | ||
141 | #define dma_dbg_free_done(g, size, type) \ | ||
142 | __dma_dbg_done(g, size, type, "free") | ||
143 | |||
144 | #if defined(CONFIG_GK20A_VIDMEM) | ||
145 | static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at, | ||
146 | size_t size) | ||
147 | { | ||
148 | u64 addr = 0; | ||
149 | |||
150 | if (at) | ||
151 | addr = nvgpu_alloc_fixed(allocator, at, size, 0); | ||
152 | else | ||
153 | addr = nvgpu_alloc(allocator, size); | ||
154 | |||
155 | return addr; | ||
156 | } | ||
157 | #endif | ||
158 | |||
159 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) | ||
160 | static void nvgpu_dma_flags_to_attrs(unsigned long *attrs, | ||
161 | unsigned long flags) | ||
162 | #define ATTR_ARG(x) *x | ||
163 | #else | ||
164 | static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs, | ||
165 | unsigned long flags) | ||
166 | #define ATTR_ARG(x) x | ||
167 | #endif | ||
168 | { | ||
169 | if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
170 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs)); | ||
171 | if (flags & NVGPU_DMA_FORCE_CONTIGUOUS) | ||
172 | dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs)); | ||
173 | #undef ATTR_ARG | ||
174 | } | ||
175 | |||
176 | int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | ||
177 | size_t size, struct nvgpu_mem *mem) | ||
178 | { | ||
179 | struct device *d = dev_from_gk20a(g); | ||
180 | int err; | ||
181 | dma_addr_t iova; | ||
182 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
183 | void *alloc_ret; | ||
184 | |||
185 | if (nvgpu_mem_is_valid(mem)) { | ||
186 | nvgpu_warn(g, "memory leak !!"); | ||
187 | WARN_ON(1); | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
192 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
193 | * | ||
194 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
195 | * and then make a vmap() ourselves. | ||
196 | */ | ||
197 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
198 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
199 | |||
200 | /* | ||
201 | * Before the debug print so we see this in the total. But during | ||
202 | * cleanup in the fail path this has to be subtracted. | ||
203 | */ | ||
204 | g->dma_memory_used += PAGE_ALIGN(size); | ||
205 | |||
206 | dma_dbg_alloc(g, size, flags, "sysmem"); | ||
207 | |||
208 | /* | ||
209 | * Save the old size but for actual allocation purposes the size is | ||
210 | * going to be page aligned. | ||
211 | */ | ||
212 | mem->size = size; | ||
213 | size = PAGE_ALIGN(size); | ||
214 | |||
215 | nvgpu_dma_flags_to_attrs(&dma_attrs, flags); | ||
216 | |||
217 | alloc_ret = dma_alloc_attrs(d, size, &iova, | ||
218 | GFP_KERNEL|__GFP_ZERO, | ||
219 | NVGPU_DMA_ATTR(dma_attrs)); | ||
220 | if (!alloc_ret) | ||
221 | return -ENOMEM; | ||
222 | |||
223 | if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { | ||
224 | mem->priv.pages = alloc_ret; | ||
225 | err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt, | ||
226 | mem->priv.pages, | ||
227 | iova, size); | ||
228 | } else { | ||
229 | mem->cpu_va = alloc_ret; | ||
230 | err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va, | ||
231 | iova, size, flags); | ||
232 | } | ||
233 | if (err) | ||
234 | goto fail_free_dma; | ||
235 | |||
236 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
237 | mem->cpu_va = vmap(mem->priv.pages, | ||
238 | size >> PAGE_SHIFT, | ||
239 | 0, PAGE_KERNEL); | ||
240 | if (!mem->cpu_va) { | ||
241 | err = -ENOMEM; | ||
242 | goto fail_free_sgt; | ||
243 | } | ||
244 | } | ||
245 | |||
246 | mem->aligned_size = size; | ||
247 | mem->aperture = APERTURE_SYSMEM; | ||
248 | mem->priv.flags = flags; | ||
249 | |||
250 | dma_dbg_alloc_done(g, mem->size, "sysmem"); | ||
251 | |||
252 | return 0; | ||
253 | |||
254 | fail_free_sgt: | ||
255 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
256 | fail_free_dma: | ||
257 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | ||
258 | mem->cpu_va = NULL; | ||
259 | mem->priv.sgt = NULL; | ||
260 | mem->size = 0; | ||
261 | g->dma_memory_used -= mem->aligned_size; | ||
262 | return err; | ||
263 | } | ||
264 | |||
265 | int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, | ||
266 | size_t size, struct nvgpu_mem *mem, u64 at) | ||
267 | { | ||
268 | #if defined(CONFIG_GK20A_VIDMEM) | ||
269 | u64 addr; | ||
270 | int err; | ||
271 | struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? | ||
272 | &g->mm.vidmem.allocator : | ||
273 | &g->mm.vidmem.bootstrap_allocator; | ||
274 | u64 before_pending; | ||
275 | |||
276 | if (nvgpu_mem_is_valid(mem)) { | ||
277 | nvgpu_warn(g, "memory leak !!"); | ||
278 | WARN_ON(1); | ||
279 | } | ||
280 | |||
281 | dma_dbg_alloc(g, size, flags, "vidmem"); | ||
282 | |||
283 | mem->size = size; | ||
284 | size = PAGE_ALIGN(size); | ||
285 | |||
286 | if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | ||
287 | return -ENOSYS; | ||
288 | |||
289 | /* | ||
290 | * Our own allocator doesn't have any flags yet, and we can't | ||
291 | * kernel-map these, so require explicit flags. | ||
292 | */ | ||
293 | WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING); | ||
294 | |||
295 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
296 | before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var); | ||
297 | addr = __nvgpu_dma_alloc(vidmem_alloc, at, size); | ||
298 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
299 | if (!addr) { | ||
300 | /* | ||
301 | * If memory is known to be freed soon, let the user know that | ||
302 | * it may be available after a while. | ||
303 | */ | ||
304 | if (before_pending) | ||
305 | return -EAGAIN; | ||
306 | else | ||
307 | return -ENOMEM; | ||
308 | } | ||
309 | |||
310 | if (at) | ||
311 | mem->mem_flags |= NVGPU_MEM_FLAG_FIXED; | ||
312 | |||
313 | mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
314 | if (!mem->priv.sgt) { | ||
315 | err = -ENOMEM; | ||
316 | goto fail_physfree; | ||
317 | } | ||
318 | |||
319 | err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL); | ||
320 | if (err) | ||
321 | goto fail_kfree; | ||
322 | |||
323 | nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr); | ||
324 | sg_set_page(mem->priv.sgt->sgl, NULL, size, 0); | ||
325 | |||
326 | mem->aligned_size = size; | ||
327 | mem->aperture = APERTURE_VIDMEM; | ||
328 | mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr; | ||
329 | mem->allocator = vidmem_alloc; | ||
330 | mem->priv.flags = flags; | ||
331 | |||
332 | nvgpu_init_list_node(&mem->clear_list_entry); | ||
333 | |||
334 | dma_dbg_alloc_done(g, mem->size, "vidmem"); | ||
335 | |||
336 | return 0; | ||
337 | |||
338 | fail_kfree: | ||
339 | nvgpu_kfree(g, mem->priv.sgt); | ||
340 | fail_physfree: | ||
341 | nvgpu_free(&g->mm.vidmem.allocator, addr); | ||
342 | mem->size = 0; | ||
343 | return err; | ||
344 | #else | ||
345 | return -ENOSYS; | ||
346 | #endif | ||
347 | } | ||
348 | |||
349 | void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | ||
350 | { | ||
351 | struct device *d = dev_from_gk20a(g); | ||
352 | |||
353 | g->dma_memory_used -= mem->aligned_size; | ||
354 | |||
355 | dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem"); | ||
356 | |||
357 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | ||
358 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | ||
359 | (mem->cpu_va || mem->priv.pages)) { | ||
360 | /* | ||
361 | * Free side of WAR for bug 2040115. | ||
362 | */ | ||
363 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
364 | vunmap(mem->cpu_va); | ||
365 | |||
366 | if (mem->priv.flags) { | ||
367 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
368 | |||
369 | nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags); | ||
370 | |||
371 | if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) { | ||
372 | dma_free_attrs(d, mem->aligned_size, mem->priv.pages, | ||
373 | sg_dma_address(mem->priv.sgt->sgl), | ||
374 | NVGPU_DMA_ATTR(dma_attrs)); | ||
375 | } else { | ||
376 | dma_free_attrs(d, mem->aligned_size, mem->cpu_va, | ||
377 | sg_dma_address(mem->priv.sgt->sgl), | ||
378 | NVGPU_DMA_ATTR(dma_attrs)); | ||
379 | } | ||
380 | } else { | ||
381 | dma_free_coherent(d, mem->aligned_size, mem->cpu_va, | ||
382 | sg_dma_address(mem->priv.sgt->sgl)); | ||
383 | } | ||
384 | mem->cpu_va = NULL; | ||
385 | mem->priv.pages = NULL; | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * When this flag is set we expect that pages is still populated but not | ||
390 | * by the DMA API. | ||
391 | */ | ||
392 | if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) | ||
393 | nvgpu_kfree(g, mem->priv.pages); | ||
394 | |||
395 | if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 && | ||
396 | mem->priv.sgt != NULL) { | ||
397 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
398 | } | ||
399 | |||
400 | dma_dbg_free_done(g, mem->size, "sysmem"); | ||
401 | |||
402 | mem->size = 0; | ||
403 | mem->aligned_size = 0; | ||
404 | mem->aperture = APERTURE_INVALID; | ||
405 | } | ||
406 | |||
407 | void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | ||
408 | { | ||
409 | #if defined(CONFIG_GK20A_VIDMEM) | ||
410 | size_t mem_size = mem->size; | ||
411 | |||
412 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); | ||
413 | |||
414 | /* Sanity check - only this supported when allocating. */ | ||
415 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); | ||
416 | |||
417 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { | ||
418 | int err = nvgpu_vidmem_clear_list_enqueue(g, mem); | ||
419 | |||
420 | /* | ||
421 | * If there's an error here then that means we can't clear the | ||
422 | * vidmem. That's too bad; however, we still own the nvgpu_mem | ||
423 | * buf so we have to free that. | ||
424 | * | ||
425 | * We don't need to worry about the vidmem allocator itself | ||
426 | * since when that gets cleaned up in the driver shutdown path | ||
427 | * all the outstanding allocs are force freed. | ||
428 | */ | ||
429 | if (err) | ||
430 | nvgpu_kfree(g, mem); | ||
431 | } else { | ||
432 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); | ||
433 | nvgpu_free(mem->allocator, | ||
434 | (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); | ||
435 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
436 | |||
437 | mem->size = 0; | ||
438 | mem->aligned_size = 0; | ||
439 | mem->aperture = APERTURE_INVALID; | ||
440 | } | ||
441 | |||
442 | dma_dbg_free_done(g, mem_size, "vidmem"); | ||
443 | #endif | ||
444 | } | ||
445 | |||
446 | int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt, | ||
447 | void *cpuva, u64 iova, size_t size, unsigned long flags) | ||
448 | { | ||
449 | int err = 0; | ||
450 | struct sg_table *tbl; | ||
451 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
452 | |||
453 | tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
454 | if (!tbl) { | ||
455 | err = -ENOMEM; | ||
456 | goto fail; | ||
457 | } | ||
458 | |||
459 | nvgpu_dma_flags_to_attrs(&dma_attrs, flags); | ||
460 | err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova, | ||
461 | size, NVGPU_DMA_ATTR(dma_attrs)); | ||
462 | if (err) | ||
463 | goto fail; | ||
464 | |||
465 | sg_dma_address(tbl->sgl) = iova; | ||
466 | *sgt = tbl; | ||
467 | |||
468 | return 0; | ||
469 | |||
470 | fail: | ||
471 | if (tbl) | ||
472 | nvgpu_kfree(g, tbl); | ||
473 | |||
474 | return err; | ||
475 | } | ||
476 | |||
477 | int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt, | ||
478 | void *cpuva, u64 iova, size_t size) | ||
479 | { | ||
480 | return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0); | ||
481 | } | ||
482 | |||
483 | int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt, | ||
484 | struct page **pages, u64 iova, size_t size) | ||
485 | { | ||
486 | int err = 0; | ||
487 | struct sg_table *tbl; | ||
488 | |||
489 | tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
490 | if (!tbl) { | ||
491 | err = -ENOMEM; | ||
492 | goto fail; | ||
493 | } | ||
494 | |||
495 | err = sg_alloc_table_from_pages(tbl, pages, | ||
496 | DIV_ROUND_UP(size, PAGE_SIZE), | ||
497 | 0, size, GFP_KERNEL); | ||
498 | if (err) | ||
499 | goto fail; | ||
500 | |||
501 | sg_dma_address(tbl->sgl) = iova; | ||
502 | *sgt = tbl; | ||
503 | |||
504 | return 0; | ||
505 | |||
506 | fail: | ||
507 | if (tbl) | ||
508 | nvgpu_kfree(g, tbl); | ||
509 | |||
510 | return err; | ||
511 | } | ||
512 | |||
513 | void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt) | ||
514 | { | ||
515 | sg_free_table(*sgt); | ||
516 | nvgpu_kfree(g, *sgt); | ||
517 | *sgt = NULL; | ||
518 | } | ||
519 | |||
520 | bool nvgpu_iommuable(struct gk20a *g) | ||
521 | { | ||
522 | #ifdef CONFIG_TEGRA_GK20A | ||
523 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
524 | |||
525 | /* | ||
526 | * Check against the nvgpu device to see if it's been marked as | ||
527 | * IOMMU'able. | ||
528 | */ | ||
529 | if (!device_is_iommuable(l->dev)) | ||
530 | return false; | ||
531 | #endif | ||
532 | |||
533 | return true; | ||
534 | } | ||
diff --git a/include/os/linux/log.c b/include/os/linux/log.c new file mode 100644 index 0000000..bd9f67d --- /dev/null +++ b/include/os/linux/log.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/device.h> | ||
19 | |||
20 | #include <nvgpu/log.h> | ||
21 | #include <nvgpu/gk20a.h> | ||
22 | |||
23 | #include "platform_gk20a.h" | ||
24 | #include "os_linux.h" | ||
25 | |||
26 | /* | ||
27 | * Define a length for log buffers. This is the buffer that the 'fmt, ...' part | ||
28 | * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it | ||
29 | * needs to not be overly sized since we have limited kernel stack space. But at | ||
30 | * the same time we don't want it to be restrictive either. | ||
31 | */ | ||
32 | #define LOG_BUFFER_LENGTH 160 | ||
33 | |||
34 | /* | ||
35 | * Annoying quirk of Linux: this has to be a string literal since the printk() | ||
36 | * function and friends use the preprocessor to concatenate stuff to the start | ||
37 | * of this string when printing. | ||
38 | */ | ||
39 | #define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n" | ||
40 | |||
41 | static const char *log_types[] = { | ||
42 | "ERR", | ||
43 | "WRN", | ||
44 | "DBG", | ||
45 | "INFO", | ||
46 | }; | ||
47 | |||
48 | int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask) | ||
49 | { | ||
50 | return !!(g->log_mask & log_mask); | ||
51 | } | ||
52 | |||
53 | static inline const char *nvgpu_log_name(struct gk20a *g) | ||
54 | { | ||
55 | return dev_name(dev_from_gk20a(g)); | ||
56 | } | ||
57 | |||
58 | #ifdef CONFIG_GK20A_TRACE_PRINTK | ||
59 | static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name, | ||
60 | const char *func_name, int line, | ||
61 | const char *log_type, const char *log) | ||
62 | { | ||
63 | trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log); | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | static void __nvgpu_really_print_log(u32 trace, const char *gpu_name, | ||
68 | const char *func_name, int line, | ||
69 | enum nvgpu_log_type type, const char *log) | ||
70 | { | ||
71 | const char *name = gpu_name ? gpu_name : ""; | ||
72 | const char *log_type = log_types[type]; | ||
73 | |||
74 | #ifdef CONFIG_GK20A_TRACE_PRINTK | ||
75 | if (trace) | ||
76 | return __nvgpu_trace_printk_log(trace, name, func_name, | ||
77 | line, log_type, log); | ||
78 | #endif | ||
79 | switch (type) { | ||
80 | case NVGPU_DEBUG: | ||
81 | /* | ||
82 | * We could use pr_debug() here but we control debug enablement | ||
83 | * separately from the Linux kernel. Perhaps this is a bug in | ||
84 | * nvgpu. | ||
85 | */ | ||
86 | pr_info(LOG_FMT, name, func_name, line, log_type, log); | ||
87 | break; | ||
88 | case NVGPU_INFO: | ||
89 | pr_info(LOG_FMT, name, func_name, line, log_type, log); | ||
90 | break; | ||
91 | case NVGPU_WARNING: | ||
92 | pr_warn(LOG_FMT, name, func_name, line, log_type, log); | ||
93 | break; | ||
94 | case NVGPU_ERROR: | ||
95 | pr_err(LOG_FMT, name, func_name, line, log_type, log); | ||
96 | break; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | __attribute__((format (printf, 5, 6))) | ||
101 | void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line, | ||
102 | enum nvgpu_log_type type, const char *fmt, ...) | ||
103 | { | ||
104 | char log[LOG_BUFFER_LENGTH]; | ||
105 | va_list args; | ||
106 | |||
107 | va_start(args, fmt); | ||
108 | vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); | ||
109 | va_end(args); | ||
110 | |||
111 | __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "", | ||
112 | func_name, line, type, log); | ||
113 | } | ||
114 | |||
115 | __attribute__((format (printf, 5, 6))) | ||
116 | void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask, | ||
117 | const char *func_name, int line, | ||
118 | const char *fmt, ...) | ||
119 | { | ||
120 | char log[LOG_BUFFER_LENGTH]; | ||
121 | va_list args; | ||
122 | |||
123 | if ((log_mask & g->log_mask) == 0) | ||
124 | return; | ||
125 | |||
126 | va_start(args, fmt); | ||
127 | vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); | ||
128 | va_end(args); | ||
129 | |||
130 | __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g), | ||
131 | func_name, line, NVGPU_DEBUG, log); | ||
132 | } | ||
diff --git a/include/os/linux/ltc.c b/include/os/linux/ltc.c new file mode 100644 index 0000000..baeb20b --- /dev/null +++ b/include/os/linux/ltc.c | |||
@@ -0,0 +1,60 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/ltc.h> | ||
24 | #include <nvgpu/dma.h> | ||
25 | #include <nvgpu/nvgpu_mem.h> | ||
26 | #include <nvgpu/gk20a.h> | ||
27 | |||
28 | #include "gk20a/gr_gk20a.h" | ||
29 | |||
30 | int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, | ||
31 | bool vidmem_alloc) | ||
32 | { | ||
33 | struct gr_gk20a *gr = &g->gr; | ||
34 | unsigned long flags = 0; | ||
35 | |||
36 | if (nvgpu_mem_is_valid(&gr->compbit_store.mem)) | ||
37 | return 0; | ||
38 | |||
39 | if (vidmem_alloc) { | ||
40 | /* | ||
41 | * Backing store MUST be physically contiguous and allocated in | ||
42 | * one chunk | ||
43 | * Vidmem allocation API does not support FORCE_CONTIGUOUS like | ||
44 | * flag to allocate contiguous memory | ||
45 | * But this allocation will happen in vidmem bootstrap allocator | ||
46 | * which always allocates contiguous memory | ||
47 | */ | ||
48 | return nvgpu_dma_alloc_vid(g, | ||
49 | compbit_backing_size, | ||
50 | &gr->compbit_store.mem); | ||
51 | } else { | ||
52 | if (!nvgpu_iommuable(g)) | ||
53 | flags = NVGPU_DMA_FORCE_CONTIGUOUS; | ||
54 | |||
55 | return nvgpu_dma_alloc_flags_sys(g, | ||
56 | flags, | ||
57 | compbit_backing_size, | ||
58 | &gr->compbit_store.mem); | ||
59 | } | ||
60 | } | ||
diff --git a/include/os/linux/module.c b/include/os/linux/module.c new file mode 100644 index 0000000..807df2c --- /dev/null +++ b/include/os/linux/module.c | |||
@@ -0,0 +1,1529 @@ | |||
1 | /* | ||
2 | * GK20A Graphics | ||
3 | * | ||
4 | * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/module.h> | ||
20 | #include <linux/of.h> | ||
21 | #include <linux/of_device.h> | ||
22 | #include <linux/of_platform.h> | ||
23 | #include <linux/of_address.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/pm_runtime.h> | ||
26 | #include <linux/reset.h> | ||
27 | #include <linux/reboot.h> | ||
28 | #include <linux/notifier.h> | ||
29 | #include <linux/platform/tegra/common.h> | ||
30 | #include <linux/pci.h> | ||
31 | |||
32 | #include <uapi/linux/nvgpu.h> | ||
33 | #include <dt-bindings/soc/gm20b-fuse.h> | ||
34 | #include <dt-bindings/soc/gp10b-fuse.h> | ||
35 | #include <dt-bindings/soc/gv11b-fuse.h> | ||
36 | |||
37 | #include <soc/tegra/fuse.h> | ||
38 | |||
39 | #include <nvgpu/hal_init.h> | ||
40 | #include <nvgpu/dma.h> | ||
41 | #include <nvgpu/kmem.h> | ||
42 | #include <nvgpu/nvgpu_common.h> | ||
43 | #include <nvgpu/soc.h> | ||
44 | #include <nvgpu/enabled.h> | ||
45 | #include <nvgpu/debug.h> | ||
46 | #include <nvgpu/ctxsw_trace.h> | ||
47 | #include <nvgpu/vidmem.h> | ||
48 | #include <nvgpu/sim.h> | ||
49 | #include <nvgpu/clk_arb.h> | ||
50 | #include <nvgpu/timers.h> | ||
51 | #include <nvgpu/channel.h> | ||
52 | |||
53 | #include "platform_gk20a.h" | ||
54 | #include "sysfs.h" | ||
55 | #include "vgpu/vgpu_linux.h" | ||
56 | #include "scale.h" | ||
57 | #include "pci.h" | ||
58 | #include "module.h" | ||
59 | #include "module_usermode.h" | ||
60 | #include "intr.h" | ||
61 | #include "ioctl.h" | ||
62 | #include "ioctl_ctrl.h" | ||
63 | |||
64 | #include "os_linux.h" | ||
65 | #include "os_ops.h" | ||
66 | #include "ctxsw_trace.h" | ||
67 | #include "driver_common.h" | ||
68 | #include "channel.h" | ||
69 | #include "debug_pmgr.h" | ||
70 | |||
71 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
72 | #include "cde.h" | ||
73 | #endif | ||
74 | |||
75 | #define CLASS_NAME "nvidia-gpu" | ||
76 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ | ||
77 | |||
78 | #define GK20A_WAIT_FOR_IDLE_MS 2000 | ||
79 | |||
80 | #define CREATE_TRACE_POINTS | ||
81 | #include <trace/events/gk20a.h> | ||
82 | |||
83 | static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb, | ||
84 | unsigned long event, void *unused) | ||
85 | { | ||
86 | struct gk20a *g = container_of(nb, struct gk20a, nvgpu_reboot_nb); | ||
87 | |||
88 | __nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); | ||
89 | return NOTIFY_DONE; | ||
90 | } | ||
91 | |||
92 | struct device_node *nvgpu_get_node(struct gk20a *g) | ||
93 | { | ||
94 | struct device *dev = dev_from_gk20a(g); | ||
95 | |||
96 | if (dev_is_pci(dev)) { | ||
97 | struct pci_bus *bus = to_pci_dev(dev)->bus; | ||
98 | |||
99 | while (!pci_is_root_bus(bus)) | ||
100 | bus = bus->parent; | ||
101 | |||
102 | return bus->bridge->parent->of_node; | ||
103 | } | ||
104 | |||
105 | return dev->of_node; | ||
106 | } | ||
107 | |||
108 | void gk20a_busy_noresume(struct gk20a *g) | ||
109 | { | ||
110 | pm_runtime_get_noresume(dev_from_gk20a(g)); | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Check if the device can go busy. | ||
115 | */ | ||
116 | static int nvgpu_can_busy(struct gk20a *g) | ||
117 | { | ||
118 | /* Can't do anything if the system is rebooting/shutting down. */ | ||
119 | if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING)) | ||
120 | return 0; | ||
121 | |||
122 | /* Can't do anything if the driver is restarting. */ | ||
123 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
124 | return 0; | ||
125 | |||
126 | return 1; | ||
127 | } | ||
128 | |||
129 | int gk20a_busy(struct gk20a *g) | ||
130 | { | ||
131 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
132 | int ret = 0; | ||
133 | struct device *dev; | ||
134 | |||
135 | if (!g) | ||
136 | return -ENODEV; | ||
137 | |||
138 | atomic_inc(&g->usage_count.atomic_var); | ||
139 | |||
140 | down_read(&l->busy_lock); | ||
141 | |||
142 | if (!nvgpu_can_busy(g)) { | ||
143 | ret = -ENODEV; | ||
144 | atomic_dec(&g->usage_count.atomic_var); | ||
145 | goto fail; | ||
146 | } | ||
147 | |||
148 | dev = dev_from_gk20a(g); | ||
149 | |||
150 | if (pm_runtime_enabled(dev)) { | ||
151 | /* Increment usage count and attempt to resume device */ | ||
152 | ret = pm_runtime_get_sync(dev); | ||
153 | if (ret < 0) { | ||
154 | /* Mark suspended so runtime pm will retry later */ | ||
155 | pm_runtime_set_suspended(dev); | ||
156 | pm_runtime_put_noidle(dev); | ||
157 | atomic_dec(&g->usage_count.atomic_var); | ||
158 | goto fail; | ||
159 | } | ||
160 | } else { | ||
161 | ret = gk20a_gpu_is_virtual(dev) ? | ||
162 | vgpu_pm_finalize_poweron(dev) : | ||
163 | gk20a_pm_finalize_poweron(dev); | ||
164 | if (ret) { | ||
165 | atomic_dec(&g->usage_count.atomic_var); | ||
166 | goto fail; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | fail: | ||
171 | up_read(&l->busy_lock); | ||
172 | |||
173 | return ret < 0 ? ret : 0; | ||
174 | } | ||
175 | |||
176 | void gk20a_idle_nosuspend(struct gk20a *g) | ||
177 | { | ||
178 | pm_runtime_put_noidle(dev_from_gk20a(g)); | ||
179 | } | ||
180 | |||
181 | void gk20a_idle(struct gk20a *g) | ||
182 | { | ||
183 | struct device *dev; | ||
184 | |||
185 | atomic_dec(&g->usage_count.atomic_var); | ||
186 | |||
187 | dev = dev_from_gk20a(g); | ||
188 | |||
189 | if (!(dev && nvgpu_can_busy(g))) | ||
190 | return; | ||
191 | |||
192 | if (pm_runtime_enabled(dev)) { | ||
193 | pm_runtime_mark_last_busy(dev); | ||
194 | pm_runtime_put_sync_autosuspend(dev); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * Undoes gk20a_lockout_registers(). | ||
200 | */ | ||
201 | static int gk20a_restore_registers(struct gk20a *g) | ||
202 | { | ||
203 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
204 | |||
205 | l->regs = l->regs_saved; | ||
206 | l->bar1 = l->bar1_saved; | ||
207 | |||
208 | nvgpu_restore_usermode_registers(g); | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l) | ||
214 | { | ||
215 | struct gk20a *g = &l->g; | ||
216 | int err; | ||
217 | |||
218 | if (l->init_done) | ||
219 | return 0; | ||
220 | |||
221 | err = nvgpu_init_channel_support_linux(l); | ||
222 | if (err) { | ||
223 | nvgpu_err(g, "failed to init linux channel support"); | ||
224 | return err; | ||
225 | } | ||
226 | |||
227 | if (l->ops.clk.init_debugfs) { | ||
228 | err = l->ops.clk.init_debugfs(g); | ||
229 | if (err) { | ||
230 | nvgpu_err(g, "failed to init linux clk debugfs"); | ||
231 | return err; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | if (l->ops.therm.init_debugfs) { | ||
236 | err = l->ops.therm.init_debugfs(g); | ||
237 | if (err) { | ||
238 | nvgpu_err(g, "failed to init linux therm debugfs"); | ||
239 | return err; | ||
240 | } | ||
241 | } | ||
242 | |||
243 | if (l->ops.fecs_trace.init_debugfs) { | ||
244 | err = l->ops.fecs_trace.init_debugfs(g); | ||
245 | if (err) { | ||
246 | nvgpu_err(g, "failed to init linux fecs trace debugfs"); | ||
247 | return err; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | err = nvgpu_pmgr_init_debugfs_linux(l); | ||
252 | if (err) { | ||
253 | nvgpu_err(g, "failed to init linux pmgr debugfs"); | ||
254 | return err; | ||
255 | } | ||
256 | |||
257 | l->init_done = true; | ||
258 | |||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | bool gk20a_check_poweron(struct gk20a *g) | ||
263 | { | ||
264 | bool ret; | ||
265 | |||
266 | nvgpu_mutex_acquire(&g->power_lock); | ||
267 | ret = g->power_on; | ||
268 | nvgpu_mutex_release(&g->power_lock); | ||
269 | |||
270 | return ret; | ||
271 | } | ||
272 | |||
273 | int gk20a_pm_finalize_poweron(struct device *dev) | ||
274 | { | ||
275 | struct gk20a *g = get_gk20a(dev); | ||
276 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
277 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
278 | int err = 0; | ||
279 | |||
280 | nvgpu_log_fn(g, " "); | ||
281 | |||
282 | nvgpu_mutex_acquire(&g->power_lock); | ||
283 | |||
284 | if (g->power_on) | ||
285 | goto done; | ||
286 | |||
287 | trace_gk20a_finalize_poweron(dev_name(dev)); | ||
288 | |||
289 | /* Increment platform power refcount */ | ||
290 | if (platform->busy) { | ||
291 | err = platform->busy(dev); | ||
292 | if (err < 0) { | ||
293 | nvgpu_err(g, "failed to poweron platform dependency"); | ||
294 | goto done; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | err = gk20a_restore_registers(g); | ||
299 | if (err) | ||
300 | goto done; | ||
301 | |||
302 | nvgpu_restore_usermode_for_poweron(g); | ||
303 | |||
304 | /* Enable interrupt workqueue */ | ||
305 | if (!l->nonstall_work_queue) { | ||
306 | l->nonstall_work_queue = alloc_workqueue("%s", | ||
307 | WQ_HIGHPRI, 1, "mc_nonstall"); | ||
308 | INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb); | ||
309 | } | ||
310 | |||
311 | err = nvgpu_detect_chip(g); | ||
312 | if (err) | ||
313 | goto done; | ||
314 | |||
315 | if (g->sim) { | ||
316 | if (g->sim->sim_init_late) | ||
317 | g->sim->sim_init_late(g); | ||
318 | } | ||
319 | |||
320 | err = gk20a_finalize_poweron(g); | ||
321 | if (err) | ||
322 | goto done; | ||
323 | |||
324 | err = nvgpu_init_os_linux_ops(l); | ||
325 | if (err) | ||
326 | goto done; | ||
327 | |||
328 | err = nvgpu_finalize_poweron_linux(l); | ||
329 | if (err) | ||
330 | goto done; | ||
331 | |||
332 | nvgpu_init_mm_ce_context(g); | ||
333 | |||
334 | nvgpu_vidmem_thread_unpause(&g->mm); | ||
335 | |||
336 | /* Initialise scaling: it will initialize scaling drive only once */ | ||
337 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) && | ||
338 | nvgpu_platform_is_silicon(g)) { | ||
339 | gk20a_scale_init(dev); | ||
340 | if (platform->initscale) | ||
341 | platform->initscale(dev); | ||
342 | } | ||
343 | |||
344 | trace_gk20a_finalize_poweron_done(dev_name(dev)); | ||
345 | |||
346 | enable_irq(g->irq_stall); | ||
347 | if (g->irq_stall != g->irq_nonstall) | ||
348 | enable_irq(g->irq_nonstall); | ||
349 | g->irqs_enabled = 1; | ||
350 | |||
351 | gk20a_scale_resume(dev_from_gk20a(g)); | ||
352 | |||
353 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
354 | if (platform->has_cde) | ||
355 | gk20a_init_cde_support(l); | ||
356 | #endif | ||
357 | |||
358 | err = gk20a_sched_ctrl_init(g); | ||
359 | if (err) { | ||
360 | nvgpu_err(g, "failed to init sched control"); | ||
361 | goto done; | ||
362 | } | ||
363 | |||
364 | g->sw_ready = true; | ||
365 | |||
366 | done: | ||
367 | if (err) | ||
368 | g->power_on = false; | ||
369 | |||
370 | nvgpu_mutex_release(&g->power_lock); | ||
371 | return err; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * Locks out the driver from accessing GPU registers. This prevents access to | ||
376 | * thse registers after the GPU has been clock or power gated. This should help | ||
377 | * find annoying bugs where register reads and writes are silently dropped | ||
378 | * after the GPU has been turned off. On older chips these reads and writes can | ||
379 | * also lock the entire CPU up. | ||
380 | */ | ||
381 | static int gk20a_lockout_registers(struct gk20a *g) | ||
382 | { | ||
383 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
384 | |||
385 | l->regs = NULL; | ||
386 | l->bar1 = NULL; | ||
387 | |||
388 | nvgpu_lockout_usermode_registers(g); | ||
389 | |||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | static int gk20a_pm_prepare_poweroff(struct device *dev) | ||
394 | { | ||
395 | struct gk20a *g = get_gk20a(dev); | ||
396 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
397 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
398 | #endif | ||
399 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
400 | bool irqs_enabled; | ||
401 | int ret = 0; | ||
402 | |||
403 | nvgpu_log_fn(g, " "); | ||
404 | |||
405 | nvgpu_mutex_acquire(&g->power_lock); | ||
406 | |||
407 | if (!g->power_on) | ||
408 | goto done; | ||
409 | |||
410 | /* disable IRQs and wait for completion */ | ||
411 | irqs_enabled = g->irqs_enabled; | ||
412 | if (irqs_enabled) { | ||
413 | disable_irq(g->irq_stall); | ||
414 | if (g->irq_stall != g->irq_nonstall) | ||
415 | disable_irq(g->irq_nonstall); | ||
416 | g->irqs_enabled = 0; | ||
417 | } | ||
418 | |||
419 | gk20a_scale_suspend(dev); | ||
420 | |||
421 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
422 | gk20a_cde_suspend(l); | ||
423 | #endif | ||
424 | |||
425 | ret = gk20a_prepare_poweroff(g); | ||
426 | if (ret) | ||
427 | goto error; | ||
428 | |||
429 | /* Decrement platform power refcount */ | ||
430 | if (platform->idle) | ||
431 | platform->idle(dev); | ||
432 | |||
433 | /* Stop CPU from accessing the GPU registers. */ | ||
434 | gk20a_lockout_registers(g); | ||
435 | |||
436 | nvgpu_hide_usermode_for_poweroff(g); | ||
437 | nvgpu_mutex_release(&g->power_lock); | ||
438 | return 0; | ||
439 | |||
440 | error: | ||
441 | /* re-enabled IRQs if previously enabled */ | ||
442 | if (irqs_enabled) { | ||
443 | enable_irq(g->irq_stall); | ||
444 | if (g->irq_stall != g->irq_nonstall) | ||
445 | enable_irq(g->irq_nonstall); | ||
446 | g->irqs_enabled = 1; | ||
447 | } | ||
448 | |||
449 | gk20a_scale_resume(dev); | ||
450 | done: | ||
451 | nvgpu_mutex_release(&g->power_lock); | ||
452 | |||
453 | return ret; | ||
454 | } | ||
455 | |||
456 | static struct of_device_id tegra_gk20a_of_match[] = { | ||
457 | #ifdef CONFIG_TEGRA_GK20A | ||
458 | { .compatible = "nvidia,tegra210-gm20b", | ||
459 | .data = &gm20b_tegra_platform }, | ||
460 | { .compatible = "nvidia,tegra186-gp10b", | ||
461 | .data = &gp10b_tegra_platform }, | ||
462 | { .compatible = "nvidia,gv11b", | ||
463 | .data = &gv11b_tegra_platform }, | ||
464 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
465 | { .compatible = "nvidia,gv11b-vgpu", | ||
466 | .data = &gv11b_vgpu_tegra_platform}, | ||
467 | #endif | ||
468 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
469 | { .compatible = "nvidia,tegra124-gk20a-vgpu", | ||
470 | .data = &vgpu_tegra_platform }, | ||
471 | #endif | ||
472 | #endif | ||
473 | |||
474 | { }, | ||
475 | }; | ||
476 | MODULE_DEVICE_TABLE(of, tegra_gk20a_of_match); | ||
477 | |||
478 | #ifdef CONFIG_PM | ||
479 | /** | ||
480 | * __gk20a_do_idle() - force the GPU to idle and railgate | ||
481 | * | ||
482 | * In success, this call MUST be balanced by caller with __gk20a_do_unidle() | ||
483 | * | ||
484 | * Acquires two locks : &l->busy_lock and &platform->railgate_lock | ||
485 | * In success, we hold these locks and return | ||
486 | * In failure, we release these locks and return | ||
487 | */ | ||
488 | int __gk20a_do_idle(struct gk20a *g, bool force_reset) | ||
489 | { | ||
490 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
491 | struct device *dev = dev_from_gk20a(g); | ||
492 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
493 | struct nvgpu_timeout timeout; | ||
494 | int ref_cnt; | ||
495 | int target_ref_cnt = 0; | ||
496 | bool is_railgated; | ||
497 | int err = 0; | ||
498 | |||
499 | /* | ||
500 | * Hold back deterministic submits and changes to deterministic | ||
501 | * channels - this must be outside the power busy locks. | ||
502 | */ | ||
503 | gk20a_channel_deterministic_idle(g); | ||
504 | |||
505 | /* acquire busy lock to block other busy() calls */ | ||
506 | down_write(&l->busy_lock); | ||
507 | |||
508 | /* acquire railgate lock to prevent unrailgate in midst of do_idle() */ | ||
509 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
510 | |||
511 | /* check if it is already railgated ? */ | ||
512 | if (platform->is_railgated(dev)) | ||
513 | return 0; | ||
514 | |||
515 | /* | ||
516 | * release railgate_lock, prevent suspend by incrementing usage counter, | ||
517 | * re-acquire railgate_lock | ||
518 | */ | ||
519 | nvgpu_mutex_release(&platform->railgate_lock); | ||
520 | pm_runtime_get_sync(dev); | ||
521 | |||
522 | /* | ||
523 | * One refcount taken in this API | ||
524 | * If User disables rail gating, we take one more | ||
525 | * extra refcount | ||
526 | */ | ||
527 | if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) | ||
528 | target_ref_cnt = 1; | ||
529 | else | ||
530 | target_ref_cnt = 2; | ||
531 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
532 | |||
533 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
534 | NVGPU_TIMER_CPU_TIMER); | ||
535 | |||
536 | /* check and wait until GPU is idle (with a timeout) */ | ||
537 | do { | ||
538 | nvgpu_usleep_range(1000, 1100); | ||
539 | ref_cnt = atomic_read(&dev->power.usage_count); | ||
540 | } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout)); | ||
541 | |||
542 | if (ref_cnt != target_ref_cnt) { | ||
543 | nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt", | ||
544 | ref_cnt); | ||
545 | goto fail_drop_usage_count; | ||
546 | } | ||
547 | |||
548 | /* check if global force_reset flag is set */ | ||
549 | force_reset |= platform->force_reset_in_do_idle; | ||
550 | |||
551 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
552 | NVGPU_TIMER_CPU_TIMER); | ||
553 | |||
554 | if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) && !force_reset) { | ||
555 | /* | ||
556 | * Case 1 : GPU railgate is supported | ||
557 | * | ||
558 | * if GPU is now idle, we will have only one ref count, | ||
559 | * drop this ref which will rail gate the GPU | ||
560 | */ | ||
561 | pm_runtime_put_sync(dev); | ||
562 | |||
563 | /* add sufficient delay to allow GPU to rail gate */ | ||
564 | nvgpu_msleep(g->railgate_delay); | ||
565 | |||
566 | /* check in loop if GPU is railgated or not */ | ||
567 | do { | ||
568 | nvgpu_usleep_range(1000, 1100); | ||
569 | is_railgated = platform->is_railgated(dev); | ||
570 | } while (!is_railgated && !nvgpu_timeout_expired(&timeout)); | ||
571 | |||
572 | if (is_railgated) { | ||
573 | return 0; | ||
574 | } else { | ||
575 | nvgpu_err(g, "failed to idle in timeout"); | ||
576 | goto fail_timeout; | ||
577 | } | ||
578 | } else { | ||
579 | /* | ||
580 | * Case 2 : GPU railgate is not supported or we explicitly | ||
581 | * do not want to depend on runtime PM | ||
582 | * | ||
583 | * if GPU is now idle, call prepare_poweroff() to save the | ||
584 | * state and then do explicit railgate | ||
585 | * | ||
586 | * __gk20a_do_unidle() needs to unrailgate, call | ||
587 | * finalize_poweron(), and then call pm_runtime_put_sync() | ||
588 | * to balance the GPU usage counter | ||
589 | */ | ||
590 | |||
591 | /* Save the GPU state */ | ||
592 | err = gk20a_pm_prepare_poweroff(dev); | ||
593 | if (err) | ||
594 | goto fail_drop_usage_count; | ||
595 | |||
596 | /* railgate GPU */ | ||
597 | platform->railgate(dev); | ||
598 | |||
599 | nvgpu_udelay(10); | ||
600 | |||
601 | g->forced_reset = true; | ||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | fail_drop_usage_count: | ||
606 | pm_runtime_put_noidle(dev); | ||
607 | fail_timeout: | ||
608 | nvgpu_mutex_release(&platform->railgate_lock); | ||
609 | up_write(&l->busy_lock); | ||
610 | gk20a_channel_deterministic_unidle(g); | ||
611 | return -EBUSY; | ||
612 | } | ||
613 | |||
614 | /** | ||
615 | * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called | ||
616 | * from outside of GPU driver | ||
617 | * | ||
618 | * In success, this call MUST be balanced by caller with gk20a_do_unidle() | ||
619 | */ | ||
620 | static int gk20a_do_idle(void *_g) | ||
621 | { | ||
622 | struct gk20a *g = (struct gk20a *)_g; | ||
623 | |||
624 | return __gk20a_do_idle(g, true); | ||
625 | } | ||
626 | |||
627 | /** | ||
628 | * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle() | ||
629 | */ | ||
630 | int __gk20a_do_unidle(struct gk20a *g) | ||
631 | { | ||
632 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
633 | struct device *dev = dev_from_gk20a(g); | ||
634 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
635 | int err; | ||
636 | |||
637 | if (g->forced_reset) { | ||
638 | /* | ||
639 | * If we did a forced-reset/railgate | ||
640 | * then unrailgate the GPU here first | ||
641 | */ | ||
642 | platform->unrailgate(dev); | ||
643 | |||
644 | /* restore the GPU state */ | ||
645 | err = gk20a_pm_finalize_poweron(dev); | ||
646 | if (err) | ||
647 | return err; | ||
648 | |||
649 | /* balance GPU usage counter */ | ||
650 | pm_runtime_put_sync(dev); | ||
651 | |||
652 | g->forced_reset = false; | ||
653 | } | ||
654 | |||
655 | /* release the lock and open up all other busy() calls */ | ||
656 | nvgpu_mutex_release(&platform->railgate_lock); | ||
657 | up_write(&l->busy_lock); | ||
658 | |||
659 | gk20a_channel_deterministic_unidle(g); | ||
660 | |||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | /** | ||
665 | * gk20a_do_unidle() - wrap up for __gk20a_do_unidle() | ||
666 | */ | ||
667 | static int gk20a_do_unidle(void *_g) | ||
668 | { | ||
669 | struct gk20a *g = (struct gk20a *)_g; | ||
670 | |||
671 | return __gk20a_do_unidle(g); | ||
672 | } | ||
673 | #endif | ||
674 | |||
675 | void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i, | ||
676 | struct resource **out) | ||
677 | { | ||
678 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
679 | |||
680 | if (!r) | ||
681 | return NULL; | ||
682 | if (out) | ||
683 | *out = r; | ||
684 | return devm_ioremap_resource(&dev->dev, r); | ||
685 | } | ||
686 | |||
687 | void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset, | ||
688 | resource_size_t size) | ||
689 | { | ||
690 | return devm_ioremap(dev, offset, size); | ||
691 | } | ||
692 | |||
693 | u64 nvgpu_resource_addr(struct platform_device *dev, int i) | ||
694 | { | ||
695 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
696 | |||
697 | if (!r) | ||
698 | return 0; | ||
699 | |||
700 | return r->start; | ||
701 | } | ||
702 | |||
703 | static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) | ||
704 | { | ||
705 | struct gk20a *g = dev_id; | ||
706 | |||
707 | return nvgpu_intr_stall(g); | ||
708 | } | ||
709 | |||
710 | static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) | ||
711 | { | ||
712 | struct gk20a *g = dev_id; | ||
713 | |||
714 | return nvgpu_intr_nonstall(g); | ||
715 | } | ||
716 | |||
717 | static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) | ||
718 | { | ||
719 | struct gk20a *g = dev_id; | ||
720 | |||
721 | return nvgpu_intr_thread_stall(g); | ||
722 | } | ||
723 | |||
724 | void gk20a_remove_support(struct gk20a *g) | ||
725 | { | ||
726 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
727 | struct sim_nvgpu_linux *sim_linux; | ||
728 | |||
729 | tegra_unregister_idle_unidle(gk20a_do_idle); | ||
730 | |||
731 | nvgpu_kfree(g, g->dbg_regops_tmp_buf); | ||
732 | |||
733 | nvgpu_remove_channel_support_linux(l); | ||
734 | |||
735 | if (g->pmu.remove_support) | ||
736 | g->pmu.remove_support(&g->pmu); | ||
737 | |||
738 | if (g->acr.remove_support != NULL) { | ||
739 | g->acr.remove_support(&g->acr); | ||
740 | } | ||
741 | |||
742 | if (g->gr.remove_support) | ||
743 | g->gr.remove_support(&g->gr); | ||
744 | |||
745 | if (g->mm.remove_ce_support) | ||
746 | g->mm.remove_ce_support(&g->mm); | ||
747 | |||
748 | if (g->fifo.remove_support) | ||
749 | g->fifo.remove_support(&g->fifo); | ||
750 | |||
751 | if (g->mm.remove_support) | ||
752 | g->mm.remove_support(&g->mm); | ||
753 | |||
754 | if (g->sim) { | ||
755 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
756 | if (g->sim->remove_support) | ||
757 | g->sim->remove_support(g); | ||
758 | if (sim_linux->remove_support_linux) | ||
759 | sim_linux->remove_support_linux(g); | ||
760 | } | ||
761 | |||
762 | nvgpu_remove_usermode_support(g); | ||
763 | |||
764 | nvgpu_free_enabled_flags(g); | ||
765 | |||
766 | gk20a_lockout_registers(g); | ||
767 | } | ||
768 | |||
769 | static int gk20a_init_support(struct platform_device *pdev) | ||
770 | { | ||
771 | struct device *dev = &pdev->dev; | ||
772 | struct gk20a *g = get_gk20a(dev); | ||
773 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
774 | int err = -ENOMEM; | ||
775 | |||
776 | tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g); | ||
777 | |||
778 | l->regs = nvgpu_devm_ioremap_resource(pdev, | ||
779 | GK20A_BAR0_IORESOURCE_MEM, | ||
780 | &l->reg_mem); | ||
781 | if (IS_ERR(l->regs)) { | ||
782 | nvgpu_err(g, "failed to remap gk20a registers"); | ||
783 | err = PTR_ERR(l->regs); | ||
784 | goto fail; | ||
785 | } | ||
786 | |||
787 | l->regs_bus_addr = nvgpu_resource_addr(pdev, | ||
788 | GK20A_BAR0_IORESOURCE_MEM); | ||
789 | if (!l->regs_bus_addr) { | ||
790 | nvgpu_err(g, "failed to read register bus offset"); | ||
791 | err = -ENODEV; | ||
792 | goto fail; | ||
793 | } | ||
794 | |||
795 | l->bar1 = nvgpu_devm_ioremap_resource(pdev, | ||
796 | GK20A_BAR1_IORESOURCE_MEM, | ||
797 | &l->bar1_mem); | ||
798 | if (IS_ERR(l->bar1)) { | ||
799 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
800 | err = PTR_ERR(l->bar1); | ||
801 | goto fail; | ||
802 | } | ||
803 | |||
804 | err = nvgpu_init_sim_support_linux(g, pdev); | ||
805 | if (err) | ||
806 | goto fail; | ||
807 | err = nvgpu_init_sim_support(g); | ||
808 | if (err) | ||
809 | goto fail_sim; | ||
810 | |||
811 | nvgpu_init_usermode_support(g); | ||
812 | return 0; | ||
813 | |||
814 | fail_sim: | ||
815 | nvgpu_remove_sim_support_linux(g); | ||
816 | fail: | ||
817 | if (l->regs) | ||
818 | l->regs = NULL; | ||
819 | |||
820 | if (l->bar1) | ||
821 | l->bar1 = NULL; | ||
822 | |||
823 | return err; | ||
824 | } | ||
825 | |||
826 | static int gk20a_pm_railgate(struct device *dev) | ||
827 | { | ||
828 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
829 | int ret = 0; | ||
830 | struct gk20a *g = get_gk20a(dev); | ||
831 | |||
832 | /* return early if platform didn't implement railgate */ | ||
833 | if (!platform->railgate) | ||
834 | return 0; | ||
835 | |||
836 | /* if platform is already railgated, then just return */ | ||
837 | if (platform->is_railgated && platform->is_railgated(dev)) | ||
838 | return ret; | ||
839 | |||
840 | #ifdef CONFIG_DEBUG_FS | ||
841 | g->pstats.last_rail_gate_start = jiffies; | ||
842 | |||
843 | if (g->pstats.railgating_cycle_count >= 1) | ||
844 | g->pstats.total_rail_ungate_time_ms = | ||
845 | g->pstats.total_rail_ungate_time_ms + | ||
846 | jiffies_to_msecs(g->pstats.last_rail_gate_start - | ||
847 | g->pstats.last_rail_ungate_complete); | ||
848 | #endif | ||
849 | |||
850 | ret = platform->railgate(dev); | ||
851 | if (ret) { | ||
852 | nvgpu_err(g, "failed to railgate platform, err=%d", ret); | ||
853 | return ret; | ||
854 | } | ||
855 | |||
856 | #ifdef CONFIG_DEBUG_FS | ||
857 | g->pstats.last_rail_gate_complete = jiffies; | ||
858 | #endif | ||
859 | ret = tegra_fuse_clock_disable(); | ||
860 | if (ret) | ||
861 | nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret); | ||
862 | |||
863 | return ret; | ||
864 | } | ||
865 | |||
866 | static int gk20a_pm_unrailgate(struct device *dev) | ||
867 | { | ||
868 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
869 | int ret = 0; | ||
870 | struct gk20a *g = get_gk20a(dev); | ||
871 | |||
872 | /* return early if platform didn't implement unrailgate */ | ||
873 | if (!platform->unrailgate) | ||
874 | return 0; | ||
875 | |||
876 | ret = tegra_fuse_clock_enable(); | ||
877 | if (ret) { | ||
878 | nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret); | ||
879 | return ret; | ||
880 | } | ||
881 | #ifdef CONFIG_DEBUG_FS | ||
882 | g->pstats.last_rail_ungate_start = jiffies; | ||
883 | if (g->pstats.railgating_cycle_count >= 1) | ||
884 | g->pstats.total_rail_gate_time_ms = | ||
885 | g->pstats.total_rail_gate_time_ms + | ||
886 | jiffies_to_msecs(g->pstats.last_rail_ungate_start - | ||
887 | g->pstats.last_rail_gate_complete); | ||
888 | |||
889 | g->pstats.railgating_cycle_count++; | ||
890 | #endif | ||
891 | |||
892 | trace_gk20a_pm_unrailgate(dev_name(dev)); | ||
893 | |||
894 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
895 | ret = platform->unrailgate(dev); | ||
896 | nvgpu_mutex_release(&platform->railgate_lock); | ||
897 | |||
898 | #ifdef CONFIG_DEBUG_FS | ||
899 | g->pstats.last_rail_ungate_complete = jiffies; | ||
900 | #endif | ||
901 | |||
902 | return ret; | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * Remove association of the driver with OS interrupt handler | ||
907 | */ | ||
908 | void nvgpu_free_irq(struct gk20a *g) | ||
909 | { | ||
910 | struct device *dev = dev_from_gk20a(g); | ||
911 | |||
912 | devm_free_irq(dev, g->irq_stall, g); | ||
913 | if (g->irq_stall != g->irq_nonstall) | ||
914 | devm_free_irq(dev, g->irq_nonstall, g); | ||
915 | } | ||
916 | |||
917 | /* | ||
918 | * Idle the GPU in preparation of shutdown/remove. | ||
919 | * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW | ||
920 | * state to prevent further activity on the driver SW side. | ||
921 | * On driver removal quiesce() should be called after start_unload() | ||
922 | */ | ||
923 | int nvgpu_quiesce(struct gk20a *g) | ||
924 | { | ||
925 | int err; | ||
926 | struct device *dev = dev_from_gk20a(g); | ||
927 | |||
928 | if (g->power_on) { | ||
929 | err = gk20a_wait_for_idle(g); | ||
930 | if (err) { | ||
931 | nvgpu_err(g, "failed to idle GPU, err=%d", err); | ||
932 | return err; | ||
933 | } | ||
934 | |||
935 | err = gk20a_fifo_disable_all_engine_activity(g, true); | ||
936 | if (err) { | ||
937 | nvgpu_err(g, | ||
938 | "failed to disable engine activity, err=%d", | ||
939 | err); | ||
940 | return err; | ||
941 | } | ||
942 | |||
943 | err = gk20a_fifo_wait_engine_idle(g); | ||
944 | if (err) { | ||
945 | nvgpu_err(g, "failed to idle engines, err=%d", | ||
946 | err); | ||
947 | return err; | ||
948 | } | ||
949 | } | ||
950 | |||
951 | if (gk20a_gpu_is_virtual(dev)) | ||
952 | err = vgpu_pm_prepare_poweroff(dev); | ||
953 | else | ||
954 | err = gk20a_pm_prepare_poweroff(dev); | ||
955 | |||
956 | if (err) | ||
957 | nvgpu_err(g, "failed to prepare for poweroff, err=%d", | ||
958 | err); | ||
959 | |||
960 | return err; | ||
961 | } | ||
962 | |||
963 | static void gk20a_pm_shutdown(struct platform_device *pdev) | ||
964 | { | ||
965 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
966 | struct gk20a *g = platform->g; | ||
967 | int err; | ||
968 | |||
969 | nvgpu_info(g, "shutting down"); | ||
970 | |||
971 | /* vgpu has nothing to clean up currently */ | ||
972 | if (gk20a_gpu_is_virtual(&pdev->dev)) | ||
973 | return; | ||
974 | |||
975 | if (!g->power_on) | ||
976 | goto finish; | ||
977 | |||
978 | gk20a_driver_start_unload(g); | ||
979 | |||
980 | /* If GPU is already railgated, | ||
981 | * just prevent more requests, and return */ | ||
982 | if (platform->is_railgated && platform->is_railgated(&pdev->dev)) { | ||
983 | __pm_runtime_disable(&pdev->dev, false); | ||
984 | nvgpu_info(g, "already railgated, shut down complete"); | ||
985 | return; | ||
986 | } | ||
987 | |||
988 | /* Prevent more requests by disabling Runtime PM */ | ||
989 | __pm_runtime_disable(&pdev->dev, false); | ||
990 | |||
991 | err = nvgpu_quiesce(g); | ||
992 | if (err) | ||
993 | goto finish; | ||
994 | |||
995 | err = gk20a_pm_railgate(&pdev->dev); | ||
996 | if (err) | ||
997 | nvgpu_err(g, "failed to railgate, err=%d", err); | ||
998 | |||
999 | finish: | ||
1000 | nvgpu_info(g, "shut down complete"); | ||
1001 | } | ||
1002 | |||
1003 | #ifdef CONFIG_PM | ||
1004 | static int gk20a_pm_runtime_resume(struct device *dev) | ||
1005 | { | ||
1006 | int err = 0; | ||
1007 | |||
1008 | err = gk20a_pm_unrailgate(dev); | ||
1009 | if (err) | ||
1010 | goto fail; | ||
1011 | |||
1012 | if (gk20a_gpu_is_virtual(dev)) | ||
1013 | err = vgpu_pm_finalize_poweron(dev); | ||
1014 | else | ||
1015 | err = gk20a_pm_finalize_poweron(dev); | ||
1016 | if (err) | ||
1017 | goto fail_poweron; | ||
1018 | |||
1019 | return 0; | ||
1020 | |||
1021 | fail_poweron: | ||
1022 | gk20a_pm_railgate(dev); | ||
1023 | fail: | ||
1024 | return err; | ||
1025 | } | ||
1026 | |||
1027 | static int gk20a_pm_runtime_suspend(struct device *dev) | ||
1028 | { | ||
1029 | int err = 0; | ||
1030 | struct gk20a *g = get_gk20a(dev); | ||
1031 | |||
1032 | if (!g) | ||
1033 | return 0; | ||
1034 | |||
1035 | if (gk20a_gpu_is_virtual(dev)) | ||
1036 | err = vgpu_pm_prepare_poweroff(dev); | ||
1037 | else | ||
1038 | err = gk20a_pm_prepare_poweroff(dev); | ||
1039 | if (err) { | ||
1040 | nvgpu_err(g, "failed to power off, err=%d", err); | ||
1041 | goto fail; | ||
1042 | } | ||
1043 | |||
1044 | err = gk20a_pm_railgate(dev); | ||
1045 | if (err) | ||
1046 | goto fail; | ||
1047 | |||
1048 | return 0; | ||
1049 | |||
1050 | fail: | ||
1051 | gk20a_pm_finalize_poweron(dev); | ||
1052 | pm_runtime_mark_last_busy(dev); | ||
1053 | return err; | ||
1054 | } | ||
1055 | |||
1056 | static int gk20a_pm_suspend(struct device *dev) | ||
1057 | { | ||
1058 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
1059 | struct gk20a *g = get_gk20a(dev); | ||
1060 | int ret = 0; | ||
1061 | int usage_count; | ||
1062 | struct nvgpu_timeout timeout; | ||
1063 | |||
1064 | if (!g->power_on) { | ||
1065 | if (platform->suspend) | ||
1066 | ret = platform->suspend(dev); | ||
1067 | |||
1068 | if (ret) | ||
1069 | return ret; | ||
1070 | |||
1071 | if (!pm_runtime_enabled(dev)) | ||
1072 | ret = gk20a_pm_railgate(dev); | ||
1073 | |||
1074 | return ret; | ||
1075 | } | ||
1076 | |||
1077 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
1078 | NVGPU_TIMER_CPU_TIMER); | ||
1079 | /* | ||
1080 | * Hold back deterministic submits and changes to deterministic | ||
1081 | * channels - this must be outside the power busy locks. | ||
1082 | */ | ||
1083 | gk20a_channel_deterministic_idle(g); | ||
1084 | |||
1085 | /* check and wait until GPU is idle (with a timeout) */ | ||
1086 | do { | ||
1087 | nvgpu_usleep_range(1000, 1100); | ||
1088 | usage_count = nvgpu_atomic_read(&g->usage_count); | ||
1089 | } while (usage_count != 0 && !nvgpu_timeout_expired(&timeout)); | ||
1090 | |||
1091 | if (usage_count != 0) { | ||
1092 | nvgpu_err(g, "failed to idle - usage_count %d", usage_count); | ||
1093 | ret = -EINVAL; | ||
1094 | goto fail_idle; | ||
1095 | } | ||
1096 | |||
1097 | ret = gk20a_pm_runtime_suspend(dev); | ||
1098 | if (ret) | ||
1099 | goto fail_idle; | ||
1100 | |||
1101 | if (platform->suspend) | ||
1102 | ret = platform->suspend(dev); | ||
1103 | if (ret) | ||
1104 | goto fail_suspend; | ||
1105 | |||
1106 | g->suspended = true; | ||
1107 | |||
1108 | return 0; | ||
1109 | |||
1110 | fail_suspend: | ||
1111 | gk20a_pm_runtime_resume(dev); | ||
1112 | fail_idle: | ||
1113 | gk20a_channel_deterministic_unidle(g); | ||
1114 | return ret; | ||
1115 | } | ||
1116 | |||
1117 | static int gk20a_pm_resume(struct device *dev) | ||
1118 | { | ||
1119 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
1120 | struct gk20a *g = get_gk20a(dev); | ||
1121 | int ret = 0; | ||
1122 | |||
1123 | if (!g->suspended) { | ||
1124 | if (platform->resume) | ||
1125 | ret = platform->resume(dev); | ||
1126 | if (ret) | ||
1127 | return ret; | ||
1128 | |||
1129 | if (!pm_runtime_enabled(dev)) | ||
1130 | ret = gk20a_pm_unrailgate(dev); | ||
1131 | |||
1132 | return ret; | ||
1133 | } | ||
1134 | |||
1135 | if (platform->resume) | ||
1136 | ret = platform->resume(dev); | ||
1137 | if (ret) | ||
1138 | return ret; | ||
1139 | |||
1140 | ret = gk20a_pm_runtime_resume(dev); | ||
1141 | if (ret) | ||
1142 | return ret; | ||
1143 | |||
1144 | g->suspended = false; | ||
1145 | |||
1146 | gk20a_channel_deterministic_unidle(g); | ||
1147 | |||
1148 | return ret; | ||
1149 | } | ||
1150 | |||
1151 | static const struct dev_pm_ops gk20a_pm_ops = { | ||
1152 | .runtime_resume = gk20a_pm_runtime_resume, | ||
1153 | .runtime_suspend = gk20a_pm_runtime_suspend, | ||
1154 | .resume = gk20a_pm_resume, | ||
1155 | .suspend = gk20a_pm_suspend, | ||
1156 | }; | ||
1157 | #endif | ||
1158 | |||
1159 | static int gk20a_pm_init(struct device *dev) | ||
1160 | { | ||
1161 | struct gk20a *g = get_gk20a(dev); | ||
1162 | int err = 0; | ||
1163 | |||
1164 | nvgpu_log_fn(g, " "); | ||
1165 | |||
1166 | /* | ||
1167 | * Initialise pm runtime. For railgate disable | ||
1168 | * case, set autosuspend delay to negative which | ||
1169 | * will suspend runtime pm | ||
1170 | */ | ||
1171 | if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) | ||
1172 | pm_runtime_set_autosuspend_delay(dev, | ||
1173 | g->railgate_delay); | ||
1174 | else | ||
1175 | pm_runtime_set_autosuspend_delay(dev, -1); | ||
1176 | |||
1177 | pm_runtime_use_autosuspend(dev); | ||
1178 | pm_runtime_enable(dev); | ||
1179 | |||
1180 | return err; | ||
1181 | } | ||
1182 | |||
1183 | static int gk20a_pm_deinit(struct device *dev) | ||
1184 | { | ||
1185 | pm_runtime_dont_use_autosuspend(dev); | ||
1186 | pm_runtime_disable(dev); | ||
1187 | return 0; | ||
1188 | } | ||
1189 | |||
1190 | /* | ||
1191 | * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING. | ||
1192 | */ | ||
1193 | void gk20a_driver_start_unload(struct gk20a *g) | ||
1194 | { | ||
1195 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
1196 | |||
1197 | nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n"); | ||
1198 | |||
1199 | down_write(&l->busy_lock); | ||
1200 | __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); | ||
1201 | /* GR SW ready needs to be invalidated at this time with the busy lock | ||
1202 | * held to prevent a racing condition on the gr/mm code */ | ||
1203 | g->gr.sw_ready = false; | ||
1204 | g->sw_ready = false; | ||
1205 | up_write(&l->busy_lock); | ||
1206 | |||
1207 | if (g->is_virtual) | ||
1208 | return; | ||
1209 | |||
1210 | gk20a_wait_for_idle(g); | ||
1211 | |||
1212 | nvgpu_wait_for_deferred_interrupts(g); | ||
1213 | |||
1214 | if (l->nonstall_work_queue) { | ||
1215 | cancel_work_sync(&l->nonstall_fn_work); | ||
1216 | destroy_workqueue(l->nonstall_work_queue); | ||
1217 | l->nonstall_work_queue = NULL; | ||
1218 | } | ||
1219 | } | ||
1220 | |||
1221 | static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a) | ||
1222 | { | ||
1223 | gk20a_get_platform(&pdev->dev)->g = gk20a; | ||
1224 | } | ||
1225 | |||
1226 | static int nvgpu_read_fuse_overrides(struct gk20a *g) | ||
1227 | { | ||
1228 | struct device_node *np = nvgpu_get_node(g); | ||
1229 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
1230 | u32 *fuses; | ||
1231 | int count, i; | ||
1232 | |||
1233 | if (!np) /* may be pcie device */ | ||
1234 | return 0; | ||
1235 | |||
1236 | count = of_property_count_elems_of_size(np, "fuse-overrides", 8); | ||
1237 | if (count <= 0) | ||
1238 | return count; | ||
1239 | |||
1240 | fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2); | ||
1241 | if (!fuses) | ||
1242 | return -ENOMEM; | ||
1243 | of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2); | ||
1244 | for (i = 0; i < count; i++) { | ||
1245 | u32 fuse, value; | ||
1246 | |||
1247 | fuse = fuses[2 * i]; | ||
1248 | value = fuses[2 * i + 1]; | ||
1249 | switch (fuse) { | ||
1250 | case GM20B_FUSE_OPT_TPC_DISABLE: | ||
1251 | g->tpc_fs_mask_user = ~value; | ||
1252 | break; | ||
1253 | case GP10B_FUSE_OPT_ECC_EN: | ||
1254 | g->gr.fecs_feature_override_ecc_val = value; | ||
1255 | break; | ||
1256 | case GV11B_FUSE_OPT_TPC_DISABLE: | ||
1257 | if (platform->set_tpc_pg_mask != NULL) | ||
1258 | platform->set_tpc_pg_mask(dev_from_gk20a(g), | ||
1259 | value); | ||
1260 | break; | ||
1261 | default: | ||
1262 | nvgpu_err(g, "ignore unknown fuse override %08x", fuse); | ||
1263 | break; | ||
1264 | } | ||
1265 | } | ||
1266 | |||
1267 | nvgpu_kfree(g, fuses); | ||
1268 | |||
1269 | return 0; | ||
1270 | } | ||
1271 | |||
1272 | static int gk20a_probe(struct platform_device *dev) | ||
1273 | { | ||
1274 | struct nvgpu_os_linux *l = NULL; | ||
1275 | struct gk20a *gk20a; | ||
1276 | int err; | ||
1277 | struct gk20a_platform *platform = NULL; | ||
1278 | struct device_node *np; | ||
1279 | |||
1280 | if (dev->dev.of_node) { | ||
1281 | const struct of_device_id *match; | ||
1282 | |||
1283 | match = of_match_device(tegra_gk20a_of_match, &dev->dev); | ||
1284 | if (match) | ||
1285 | platform = (struct gk20a_platform *)match->data; | ||
1286 | } else | ||
1287 | platform = (struct gk20a_platform *)dev->dev.platform_data; | ||
1288 | |||
1289 | if (!platform) { | ||
1290 | dev_err(&dev->dev, "no platform data\n"); | ||
1291 | return -ENODATA; | ||
1292 | } | ||
1293 | |||
1294 | platform_set_drvdata(dev, platform); | ||
1295 | |||
1296 | if (gk20a_gpu_is_virtual(&dev->dev)) | ||
1297 | return vgpu_probe(dev); | ||
1298 | |||
1299 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
1300 | if (!l) { | ||
1301 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | ||
1302 | return -ENOMEM; | ||
1303 | } | ||
1304 | |||
1305 | hash_init(l->ecc_sysfs_stats_htable); | ||
1306 | |||
1307 | gk20a = &l->g; | ||
1308 | |||
1309 | nvgpu_log_fn(gk20a, " "); | ||
1310 | |||
1311 | nvgpu_init_gk20a(gk20a); | ||
1312 | set_gk20a(dev, gk20a); | ||
1313 | l->dev = &dev->dev; | ||
1314 | gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK; | ||
1315 | |||
1316 | nvgpu_kmem_init(gk20a); | ||
1317 | |||
1318 | err = nvgpu_init_enabled_flags(gk20a); | ||
1319 | if (err) | ||
1320 | goto return_err; | ||
1321 | |||
1322 | np = nvgpu_get_node(gk20a); | ||
1323 | if (of_dma_is_coherent(np)) { | ||
1324 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1325 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1326 | } | ||
1327 | |||
1328 | if (nvgpu_platform_is_simulation(gk20a)) | ||
1329 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | ||
1330 | |||
1331 | gk20a->irq_stall = platform_get_irq(dev, 0); | ||
1332 | gk20a->irq_nonstall = platform_get_irq(dev, 1); | ||
1333 | if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) { | ||
1334 | err = -ENXIO; | ||
1335 | goto return_err; | ||
1336 | } | ||
1337 | |||
1338 | err = devm_request_threaded_irq(&dev->dev, | ||
1339 | gk20a->irq_stall, | ||
1340 | gk20a_intr_isr_stall, | ||
1341 | gk20a_intr_thread_stall, | ||
1342 | 0, "gk20a_stall", gk20a); | ||
1343 | if (err) { | ||
1344 | dev_err(&dev->dev, | ||
1345 | "failed to request stall intr irq @ %d\n", | ||
1346 | gk20a->irq_stall); | ||
1347 | goto return_err; | ||
1348 | } | ||
1349 | err = devm_request_irq(&dev->dev, | ||
1350 | gk20a->irq_nonstall, | ||
1351 | gk20a_intr_isr_nonstall, | ||
1352 | 0, "gk20a_nonstall", gk20a); | ||
1353 | if (err) { | ||
1354 | dev_err(&dev->dev, | ||
1355 | "failed to request non-stall intr irq @ %d\n", | ||
1356 | gk20a->irq_nonstall); | ||
1357 | goto return_err; | ||
1358 | } | ||
1359 | disable_irq(gk20a->irq_stall); | ||
1360 | if (gk20a->irq_stall != gk20a->irq_nonstall) | ||
1361 | disable_irq(gk20a->irq_nonstall); | ||
1362 | |||
1363 | err = gk20a_init_support(dev); | ||
1364 | if (err) | ||
1365 | goto return_err; | ||
1366 | |||
1367 | err = nvgpu_read_fuse_overrides(gk20a); | ||
1368 | |||
1369 | #ifdef CONFIG_RESET_CONTROLLER | ||
1370 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); | ||
1371 | if (IS_ERR(platform->reset_control)) | ||
1372 | platform->reset_control = NULL; | ||
1373 | #endif | ||
1374 | |||
1375 | err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class); | ||
1376 | if (err) | ||
1377 | goto return_err; | ||
1378 | |||
1379 | err = gk20a_pm_init(&dev->dev); | ||
1380 | if (err) { | ||
1381 | dev_err(&dev->dev, "pm init failed"); | ||
1382 | goto return_err; | ||
1383 | } | ||
1384 | |||
1385 | gk20a->nvgpu_reboot_nb.notifier_call = | ||
1386 | nvgpu_kernel_shutdown_notification; | ||
1387 | err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb); | ||
1388 | if (err) | ||
1389 | goto return_err; | ||
1390 | |||
1391 | return 0; | ||
1392 | |||
1393 | return_err: | ||
1394 | nvgpu_free_enabled_flags(gk20a); | ||
1395 | |||
1396 | /* | ||
1397 | * Last since the above allocs may use data structures in here. | ||
1398 | */ | ||
1399 | nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
1400 | |||
1401 | kfree(l); | ||
1402 | |||
1403 | return err; | ||
1404 | } | ||
1405 | |||
1406 | int nvgpu_remove(struct device *dev, struct class *class) | ||
1407 | { | ||
1408 | struct gk20a *g = get_gk20a(dev); | ||
1409 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
1410 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
1411 | #endif | ||
1412 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
1413 | int err; | ||
1414 | |||
1415 | nvgpu_log_fn(g, " "); | ||
1416 | |||
1417 | err = nvgpu_quiesce(g); | ||
1418 | WARN(err, "gpu failed to idle during driver removal"); | ||
1419 | |||
1420 | if (nvgpu_mem_is_valid(&g->syncpt_mem)) | ||
1421 | nvgpu_dma_free(g, &g->syncpt_mem); | ||
1422 | |||
1423 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
1424 | if (platform->has_cde) | ||
1425 | gk20a_cde_destroy(l); | ||
1426 | #endif | ||
1427 | |||
1428 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
1429 | gk20a_ctxsw_trace_cleanup(g); | ||
1430 | #endif | ||
1431 | |||
1432 | gk20a_sched_ctrl_cleanup(g); | ||
1433 | |||
1434 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
1435 | gk20a_scale_exit(dev); | ||
1436 | |||
1437 | nvgpu_clk_arb_cleanup_arbiter(g); | ||
1438 | |||
1439 | gk20a_user_deinit(dev, class); | ||
1440 | |||
1441 | gk20a_debug_deinit(g); | ||
1442 | |||
1443 | nvgpu_remove_sysfs(dev); | ||
1444 | |||
1445 | if (platform->secure_buffer.destroy) | ||
1446 | platform->secure_buffer.destroy(g, | ||
1447 | &platform->secure_buffer); | ||
1448 | |||
1449 | if (platform->remove) | ||
1450 | platform->remove(dev); | ||
1451 | |||
1452 | nvgpu_mutex_destroy(&g->clk_arb_enable_lock); | ||
1453 | |||
1454 | nvgpu_log_fn(g, "removed"); | ||
1455 | |||
1456 | return err; | ||
1457 | } | ||
1458 | |||
1459 | static int __exit gk20a_remove(struct platform_device *pdev) | ||
1460 | { | ||
1461 | int err; | ||
1462 | struct device *dev = &pdev->dev; | ||
1463 | struct gk20a *g = get_gk20a(dev); | ||
1464 | |||
1465 | if (gk20a_gpu_is_virtual(dev)) | ||
1466 | return vgpu_remove(pdev); | ||
1467 | |||
1468 | err = nvgpu_remove(dev, &nvgpu_class); | ||
1469 | |||
1470 | unregister_reboot_notifier(&g->nvgpu_reboot_nb); | ||
1471 | |||
1472 | set_gk20a(pdev, NULL); | ||
1473 | |||
1474 | gk20a_put(g); | ||
1475 | |||
1476 | gk20a_pm_deinit(dev); | ||
1477 | |||
1478 | return err; | ||
1479 | } | ||
1480 | |||
1481 | static struct platform_driver gk20a_driver = { | ||
1482 | .probe = gk20a_probe, | ||
1483 | .remove = __exit_p(gk20a_remove), | ||
1484 | .shutdown = gk20a_pm_shutdown, | ||
1485 | .driver = { | ||
1486 | .owner = THIS_MODULE, | ||
1487 | .name = "gk20a", | ||
1488 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, | ||
1489 | #ifdef CONFIG_OF | ||
1490 | .of_match_table = tegra_gk20a_of_match, | ||
1491 | #endif | ||
1492 | #ifdef CONFIG_PM | ||
1493 | .pm = &gk20a_pm_ops, | ||
1494 | #endif | ||
1495 | .suppress_bind_attrs = true, | ||
1496 | } | ||
1497 | }; | ||
1498 | |||
1499 | struct class nvgpu_class = { | ||
1500 | .owner = THIS_MODULE, | ||
1501 | .name = CLASS_NAME, | ||
1502 | }; | ||
1503 | |||
1504 | static int __init gk20a_init(void) | ||
1505 | { | ||
1506 | |||
1507 | int ret; | ||
1508 | |||
1509 | ret = class_register(&nvgpu_class); | ||
1510 | if (ret) | ||
1511 | return ret; | ||
1512 | |||
1513 | ret = nvgpu_pci_init(); | ||
1514 | if (ret) | ||
1515 | return ret; | ||
1516 | |||
1517 | return platform_driver_register(&gk20a_driver); | ||
1518 | } | ||
1519 | |||
1520 | static void __exit gk20a_exit(void) | ||
1521 | { | ||
1522 | nvgpu_pci_exit(); | ||
1523 | platform_driver_unregister(&gk20a_driver); | ||
1524 | class_unregister(&nvgpu_class); | ||
1525 | } | ||
1526 | |||
1527 | MODULE_LICENSE("GPL v2"); | ||
1528 | module_init(gk20a_init); | ||
1529 | module_exit(gk20a_exit); | ||
diff --git a/include/os/linux/module.h b/include/os/linux/module.h new file mode 100644 index 0000000..76c7274 --- /dev/null +++ b/include/os/linux/module.h | |||
@@ -0,0 +1,35 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef __NVGPU_COMMON_LINUX_MODULE_H__ | ||
14 | #define __NVGPU_COMMON_LINUX_MODULE_H__ | ||
15 | |||
16 | struct gk20a; | ||
17 | struct device; | ||
18 | struct nvgpu_os_linux; | ||
19 | |||
20 | int gk20a_pm_finalize_poweron(struct device *dev); | ||
21 | int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l); | ||
22 | void gk20a_remove_support(struct gk20a *g); | ||
23 | void gk20a_driver_start_unload(struct gk20a *g); | ||
24 | int nvgpu_quiesce(struct gk20a *g); | ||
25 | int nvgpu_remove(struct device *dev, struct class *class); | ||
26 | void nvgpu_free_irq(struct gk20a *g); | ||
27 | struct device_node *nvgpu_get_node(struct gk20a *g); | ||
28 | void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i, | ||
29 | struct resource **out); | ||
30 | void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset, | ||
31 | resource_size_t size); | ||
32 | u64 nvgpu_resource_addr(struct platform_device *dev, int i); | ||
33 | extern struct class nvgpu_class; | ||
34 | |||
35 | #endif | ||
diff --git a/include/os/linux/module_usermode.c b/include/os/linux/module_usermode.c new file mode 100644 index 0000000..ea01c1b --- /dev/null +++ b/include/os/linux/module_usermode.c | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/types.h> | ||
18 | |||
19 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
20 | |||
21 | #include "os_linux.h" | ||
22 | |||
23 | /* | ||
24 | * Locks out the driver from accessing GPU registers. This prevents access to | ||
25 | * thse registers after the GPU has been clock or power gated. This should help | ||
26 | * find annoying bugs where register reads and writes are silently dropped | ||
27 | * after the GPU has been turned off. On older chips these reads and writes can | ||
28 | * also lock the entire CPU up. | ||
29 | */ | ||
30 | void nvgpu_lockout_usermode_registers(struct gk20a *g) | ||
31 | { | ||
32 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
33 | |||
34 | l->usermode_regs = NULL; | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * Undoes t19x_lockout_registers(). | ||
39 | */ | ||
40 | void nvgpu_restore_usermode_registers(struct gk20a *g) | ||
41 | { | ||
42 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
43 | |||
44 | l->usermode_regs = l->usermode_regs_saved; | ||
45 | } | ||
46 | |||
47 | void nvgpu_remove_usermode_support(struct gk20a *g) | ||
48 | { | ||
49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
50 | |||
51 | if (l->usermode_regs) { | ||
52 | l->usermode_regs = NULL; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | void nvgpu_init_usermode_support(struct gk20a *g) | ||
57 | { | ||
58 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
59 | |||
60 | l->usermode_regs = l->regs + usermode_cfg0_r(); | ||
61 | l->usermode_regs_saved = l->usermode_regs; | ||
62 | } | ||
diff --git a/include/os/linux/module_usermode.h b/include/os/linux/module_usermode.h new file mode 100644 index 0000000..b17053c --- /dev/null +++ b/include/os/linux/module_usermode.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_MODULE_T19X_H__ | ||
18 | #define __NVGPU_MODULE_T19X_H__ | ||
19 | |||
20 | struct gk20a; | ||
21 | |||
22 | void nvgpu_init_usermode_support(struct gk20a *g); | ||
23 | void nvgpu_remove_usermode_support(struct gk20a *g); | ||
24 | void nvgpu_lockout_usermode_registers(struct gk20a *g); | ||
25 | void nvgpu_restore_usermode_registers(struct gk20a *g); | ||
26 | |||
27 | #endif | ||
diff --git a/include/os/linux/nvgpu_mem.c b/include/os/linux/nvgpu_mem.c new file mode 100644 index 0000000..d6a3189 --- /dev/null +++ b/include/os/linux/nvgpu_mem.c | |||
@@ -0,0 +1,348 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/dma.h> | ||
18 | #include <nvgpu/gmmu.h> | ||
19 | #include <nvgpu/nvgpu_mem.h> | ||
20 | #include <nvgpu/page_allocator.h> | ||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/bug.h> | ||
23 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/vidmem.h> | ||
26 | #include <nvgpu/gk20a.h> | ||
27 | |||
28 | #include <nvgpu/linux/dma.h> | ||
29 | |||
30 | #include <linux/vmalloc.h> | ||
31 | #include <linux/dma-mapping.h> | ||
32 | |||
33 | #include "os_linux.h" | ||
34 | #include "dmabuf_vidmem.h" | ||
35 | |||
36 | #include "gk20a/mm_gk20a.h" | ||
37 | #include "platform_gk20a.h" | ||
38 | |||
39 | static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
40 | { | ||
41 | struct device *dev = dev_from_gk20a(g); | ||
42 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
43 | u64 ipa = sg_phys((struct scatterlist *)sgl); | ||
44 | |||
45 | if (platform->phys_addr) | ||
46 | return platform->phys_addr(g, ipa); | ||
47 | |||
48 | return ipa; | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Obtain a SYSMEM address from a Linux SGL. This should eventually go away | ||
53 | * and/or become private to this file once all bad usages of Linux SGLs are | ||
54 | * cleaned up in the driver. | ||
55 | */ | ||
56 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | ||
57 | { | ||
58 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || | ||
59 | !nvgpu_iommuable(g)) | ||
60 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
61 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
62 | |||
63 | if (sg_dma_address(sgl) == 0) | ||
64 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
65 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
66 | |||
67 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
68 | return 0; | ||
69 | |||
70 | return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM | ||
75 | * allocation. | ||
76 | */ | ||
77 | static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) | ||
78 | { | ||
79 | return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM | ||
84 | * allocation. | ||
85 | * | ||
86 | * Note: this API does not make sense to use for _VIDMEM_ buffers with greater | ||
87 | * than one scatterlist chunk. If there's more than one scatterlist chunk then | ||
88 | * the buffer will not be contiguous. As such the base address probably isn't | ||
89 | * very useful. This is true for SYSMEM as well, if there's no IOMMU. | ||
90 | * | ||
91 | * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's | ||
92 | * an IOMMU present and enabled for the GPU. | ||
93 | * | ||
94 | * %attrs can be NULL. If it is not NULL then it may be inspected to determine | ||
95 | * if the address needs to be modified before writing into a PTE. | ||
96 | */ | ||
97 | u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
98 | { | ||
99 | struct nvgpu_page_alloc *alloc; | ||
100 | |||
101 | if (mem->aperture == APERTURE_SYSMEM) | ||
102 | return nvgpu_mem_get_addr_sysmem(g, mem); | ||
103 | |||
104 | /* | ||
105 | * Otherwise get the vidmem address. | ||
106 | */ | ||
107 | alloc = mem->vidmem_alloc; | ||
108 | |||
109 | /* This API should not be used with > 1 chunks */ | ||
110 | WARN_ON(alloc->nr_chunks != 1); | ||
111 | |||
112 | return alloc->base; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * This should only be used on contiguous buffers regardless of whether | ||
117 | * there's an IOMMU present/enabled. This applies to both SYSMEM and | ||
118 | * VIDMEM. | ||
119 | */ | ||
120 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
121 | { | ||
122 | /* | ||
123 | * For a VIDMEM buf, this is identical to simply get_addr() so just fall | ||
124 | * back to that. | ||
125 | */ | ||
126 | if (mem->aperture == APERTURE_VIDMEM) | ||
127 | return nvgpu_mem_get_addr(g, mem); | ||
128 | |||
129 | return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl); | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Be careful how you use this! You are responsible for correctly freeing this | ||
134 | * memory. | ||
135 | */ | ||
136 | int nvgpu_mem_create_from_mem(struct gk20a *g, | ||
137 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | ||
138 | u64 start_page, int nr_pages) | ||
139 | { | ||
140 | int ret; | ||
141 | u64 start = start_page * PAGE_SIZE; | ||
142 | u64 size = nr_pages * PAGE_SIZE; | ||
143 | dma_addr_t new_iova; | ||
144 | |||
145 | if (src->aperture != APERTURE_SYSMEM) | ||
146 | return -EINVAL; | ||
147 | |||
148 | /* Some silly things a caller might do... */ | ||
149 | if (size > src->size) | ||
150 | return -EINVAL; | ||
151 | if ((start + size) > src->size) | ||
152 | return -EINVAL; | ||
153 | |||
154 | dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; | ||
155 | dest->aperture = src->aperture; | ||
156 | dest->skip_wmb = src->skip_wmb; | ||
157 | dest->size = size; | ||
158 | |||
159 | /* | ||
160 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | ||
161 | * | ||
162 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
163 | * re-use. | ||
164 | */ | ||
165 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || | ||
166 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
167 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | ||
168 | |||
169 | dest->priv.pages = src->priv.pages + start_page; | ||
170 | dest->priv.flags = src->priv.flags; | ||
171 | |||
172 | new_iova = sg_dma_address(src->priv.sgt->sgl) ? | ||
173 | sg_dma_address(src->priv.sgt->sgl) + start : 0; | ||
174 | |||
175 | /* | ||
176 | * Make a new SG table that is based only on the subset of pages that | ||
177 | * is passed to us. This table gets freed by the dma free routines. | ||
178 | */ | ||
179 | if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
180 | ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, | ||
181 | src->priv.pages + start_page, | ||
182 | new_iova, size); | ||
183 | else | ||
184 | ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, | ||
185 | new_iova, size); | ||
186 | |||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | ||
191 | struct page **pages, int nr_pages) | ||
192 | { | ||
193 | struct sg_table *sgt; | ||
194 | struct page **our_pages = | ||
195 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
196 | |||
197 | if (!our_pages) | ||
198 | return -ENOMEM; | ||
199 | |||
200 | memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); | ||
201 | |||
202 | if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, | ||
203 | nr_pages * PAGE_SIZE)) { | ||
204 | nvgpu_kfree(g, our_pages); | ||
205 | return -ENOMEM; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * If we are making an SGT from physical pages we can be reasonably | ||
210 | * certain that this should bypass the SMMU - thus we set the DMA (aka | ||
211 | * IOVA) address to 0. This tells the GMMU mapping code to not make a | ||
212 | * mapping directed to the SMMU. | ||
213 | */ | ||
214 | sg_dma_address(sgt->sgl) = 0; | ||
215 | |||
216 | dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; | ||
217 | dest->aperture = APERTURE_SYSMEM; | ||
218 | dest->skip_wmb = 0; | ||
219 | dest->size = PAGE_SIZE * nr_pages; | ||
220 | |||
221 | dest->priv.flags = 0; | ||
222 | dest->priv.pages = our_pages; | ||
223 | dest->priv.sgt = sgt; | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
229 | int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, | ||
230 | u64 src_phys, int nr_pages) | ||
231 | { | ||
232 | struct page **pages = | ||
233 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
234 | int i, ret = 0; | ||
235 | |||
236 | if (!pages) | ||
237 | return -ENOMEM; | ||
238 | |||
239 | for (i = 0; i < nr_pages; i++) | ||
240 | pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); | ||
241 | |||
242 | ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); | ||
243 | nvgpu_kfree(g, pages); | ||
244 | |||
245 | return ret; | ||
246 | } | ||
247 | #endif | ||
248 | |||
249 | static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) | ||
250 | { | ||
251 | return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); | ||
252 | } | ||
253 | |||
254 | static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
255 | { | ||
256 | return (u64)__nvgpu_sgl_phys(g, sgl); | ||
257 | } | ||
258 | |||
259 | static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl) | ||
260 | { | ||
261 | return (u64)sg_dma_address((struct scatterlist *)sgl); | ||
262 | } | ||
263 | |||
264 | static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl) | ||
265 | { | ||
266 | return (u64)((struct scatterlist *)sgl)->length; | ||
267 | } | ||
268 | |||
269 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, | ||
270 | struct nvgpu_sgl *sgl, | ||
271 | struct nvgpu_gmmu_attrs *attrs) | ||
272 | { | ||
273 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
274 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
275 | __nvgpu_sgl_phys(g, sgl)); | ||
276 | |||
277 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
278 | return 0; | ||
279 | |||
280 | return nvgpu_mem_iommu_translate(g, | ||
281 | sg_dma_address((struct scatterlist *)sgl)); | ||
282 | } | ||
283 | |||
284 | static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, | ||
285 | struct nvgpu_sgt *sgt) | ||
286 | { | ||
287 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) | ||
288 | return false; | ||
289 | return true; | ||
290 | } | ||
291 | |||
292 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
293 | { | ||
294 | /* | ||
295 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
296 | * SGT/SGL needs to be freed separately. | ||
297 | */ | ||
298 | nvgpu_kfree(g, sgt); | ||
299 | } | ||
300 | |||
301 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
302 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
303 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
304 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
305 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
306 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
307 | .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, | ||
308 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
309 | }; | ||
310 | |||
311 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
312 | struct gk20a *g, | ||
313 | struct scatterlist *linux_sgl) | ||
314 | { | ||
315 | struct nvgpu_page_alloc *vidmem_alloc; | ||
316 | |||
317 | vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); | ||
318 | if (!vidmem_alloc) | ||
319 | return NULL; | ||
320 | |||
321 | return &vidmem_alloc->sgt; | ||
322 | } | ||
323 | |||
324 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) | ||
325 | { | ||
326 | struct nvgpu_sgt *nvgpu_sgt; | ||
327 | struct scatterlist *linux_sgl = sgt->sgl; | ||
328 | |||
329 | if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
330 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); | ||
331 | |||
332 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); | ||
333 | if (!nvgpu_sgt) | ||
334 | return NULL; | ||
335 | |||
336 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); | ||
337 | |||
338 | nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl; | ||
339 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; | ||
340 | |||
341 | return nvgpu_sgt; | ||
342 | } | ||
343 | |||
344 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
345 | struct nvgpu_mem *mem) | ||
346 | { | ||
347 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); | ||
348 | } | ||
diff --git a/include/os/linux/nvhost.c b/include/os/linux/nvhost.c new file mode 100644 index 0000000..a9341c7 --- /dev/null +++ b/include/os/linux/nvhost.c | |||
@@ -0,0 +1,295 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/nvhost.h> | ||
18 | #include <linux/nvhost_t194.h> | ||
19 | #include <uapi/linux/nvhost_ioctl.h> | ||
20 | #include <linux/of_platform.h> | ||
21 | |||
22 | #include <nvgpu/gk20a.h> | ||
23 | #include <nvgpu/nvhost.h> | ||
24 | #include <nvgpu/enabled.h> | ||
25 | |||
26 | #include "nvhost_priv.h" | ||
27 | |||
28 | #include "os_linux.h" | ||
29 | #include "module.h" | ||
30 | |||
31 | int nvgpu_get_nvhost_dev(struct gk20a *g) | ||
32 | { | ||
33 | struct device_node *np = nvgpu_get_node(g); | ||
34 | struct platform_device *host1x_pdev = NULL; | ||
35 | const __be32 *host1x_ptr; | ||
36 | |||
37 | host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); | ||
38 | if (host1x_ptr) { | ||
39 | struct device_node *host1x_node = | ||
40 | of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); | ||
41 | |||
42 | host1x_pdev = of_find_device_by_node(host1x_node); | ||
43 | if (!host1x_pdev) { | ||
44 | nvgpu_warn(g, "host1x device not available"); | ||
45 | return -EPROBE_DEFER; | ||
46 | } | ||
47 | |||
48 | } else { | ||
49 | if (nvgpu_has_syncpoints(g)) { | ||
50 | nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support"); | ||
51 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
52 | } | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev)); | ||
57 | if (!g->nvhost_dev) | ||
58 | return -ENOMEM; | ||
59 | |||
60 | g->nvhost_dev->host1x_pdev = host1x_pdev; | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | void nvgpu_free_nvhost_dev(struct gk20a *g) | ||
66 | { | ||
67 | nvgpu_kfree(g, g->nvhost_dev); | ||
68 | } | ||
69 | |||
70 | int nvgpu_nvhost_module_busy_ext( | ||
71 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
72 | { | ||
73 | return nvhost_module_busy_ext(nvhost_dev->host1x_pdev); | ||
74 | } | ||
75 | |||
76 | void nvgpu_nvhost_module_idle_ext( | ||
77 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
78 | { | ||
79 | nvhost_module_idle_ext(nvhost_dev->host1x_pdev); | ||
80 | } | ||
81 | |||
82 | void nvgpu_nvhost_debug_dump_device( | ||
83 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
84 | { | ||
85 | nvhost_debug_dump_device(nvhost_dev->host1x_pdev); | ||
86 | } | ||
87 | |||
88 | const char *nvgpu_nvhost_syncpt_get_name( | ||
89 | struct nvgpu_nvhost_dev *nvhost_dev, int id) | ||
90 | { | ||
91 | return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id); | ||
92 | } | ||
93 | |||
94 | bool nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
95 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
96 | { | ||
97 | return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id); | ||
98 | } | ||
99 | |||
100 | int nvgpu_nvhost_syncpt_is_expired_ext( | ||
101 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) | ||
102 | { | ||
103 | return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev, | ||
104 | id, thresh); | ||
105 | } | ||
106 | |||
107 | u32 nvgpu_nvhost_syncpt_incr_max_ext( | ||
108 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs) | ||
109 | { | ||
110 | return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs); | ||
111 | } | ||
112 | |||
113 | int nvgpu_nvhost_intr_register_notifier( | ||
114 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, | ||
115 | void (*callback)(void *, int), void *private_data) | ||
116 | { | ||
117 | return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev, | ||
118 | id, thresh, | ||
119 | callback, private_data); | ||
120 | } | ||
121 | |||
122 | void nvgpu_nvhost_syncpt_set_min_eq_max_ext( | ||
123 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
124 | { | ||
125 | nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id); | ||
126 | } | ||
127 | |||
128 | void nvgpu_nvhost_syncpt_put_ref_ext( | ||
129 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
130 | { | ||
131 | nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id); | ||
132 | } | ||
133 | |||
134 | u32 nvgpu_nvhost_get_syncpt_host_managed( | ||
135 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
136 | u32 param, const char *syncpt_name) | ||
137 | { | ||
138 | return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev, | ||
139 | param, syncpt_name); | ||
140 | } | ||
141 | |||
142 | u32 nvgpu_nvhost_get_syncpt_client_managed( | ||
143 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
144 | const char *syncpt_name) | ||
145 | { | ||
146 | return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev, | ||
147 | syncpt_name); | ||
148 | } | ||
149 | |||
150 | int nvgpu_nvhost_syncpt_wait_timeout_ext( | ||
151 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, | ||
152 | u32 thresh, u32 timeout, u32 *value, struct timespec *ts) | ||
153 | { | ||
154 | return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev, | ||
155 | id, thresh, timeout, value, ts); | ||
156 | } | ||
157 | |||
158 | int nvgpu_nvhost_syncpt_read_ext_check( | ||
159 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val) | ||
160 | { | ||
161 | return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val); | ||
162 | } | ||
163 | |||
164 | u32 nvgpu_nvhost_syncpt_read_maxval( | ||
165 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
166 | { | ||
167 | return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id); | ||
168 | } | ||
169 | |||
170 | void nvgpu_nvhost_syncpt_set_safe_state( | ||
171 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
172 | { | ||
173 | u32 val; | ||
174 | |||
175 | /* | ||
176 | * Add large number of increments to current value | ||
177 | * so that all waiters on this syncpoint are released | ||
178 | * | ||
179 | * We don't expect any case where more than 0x10000 increments | ||
180 | * are pending | ||
181 | */ | ||
182 | val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id); | ||
183 | val += 0x10000; | ||
184 | |||
185 | nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val); | ||
186 | nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val); | ||
187 | } | ||
188 | |||
189 | int nvgpu_nvhost_create_symlink(struct gk20a *g) | ||
190 | { | ||
191 | struct device *dev = dev_from_gk20a(g); | ||
192 | int err = 0; | ||
193 | |||
194 | if (g->nvhost_dev && | ||
195 | (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { | ||
196 | err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj, | ||
197 | &dev->kobj, | ||
198 | dev_name(dev)); | ||
199 | } | ||
200 | |||
201 | return err; | ||
202 | } | ||
203 | |||
204 | void nvgpu_nvhost_remove_symlink(struct gk20a *g) | ||
205 | { | ||
206 | struct device *dev = dev_from_gk20a(g); | ||
207 | |||
208 | if (g->nvhost_dev && | ||
209 | (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { | ||
210 | sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj, | ||
211 | dev_name(dev)); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | #ifdef CONFIG_SYNC | ||
216 | u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt) | ||
217 | { | ||
218 | return nvhost_sync_pt_id(pt); | ||
219 | } | ||
220 | |||
221 | u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt) | ||
222 | { | ||
223 | return nvhost_sync_pt_thresh(pt); | ||
224 | } | ||
225 | |||
226 | struct sync_fence *nvgpu_nvhost_sync_fdget(int fd) | ||
227 | { | ||
228 | return nvhost_sync_fdget(fd); | ||
229 | } | ||
230 | |||
231 | int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence) | ||
232 | { | ||
233 | return nvhost_sync_num_pts(fence); | ||
234 | } | ||
235 | |||
236 | struct sync_fence *nvgpu_nvhost_sync_create_fence( | ||
237 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
238 | u32 id, u32 thresh, const char *name) | ||
239 | { | ||
240 | struct nvhost_ctrl_sync_fence_info pt = { | ||
241 | .id = id, | ||
242 | .thresh = thresh, | ||
243 | }; | ||
244 | |||
245 | return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name); | ||
246 | } | ||
247 | #endif /* CONFIG_SYNC */ | ||
248 | |||
249 | #ifdef CONFIG_TEGRA_T19X_GRHOST | ||
250 | int nvgpu_nvhost_syncpt_unit_interface_get_aperture( | ||
251 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
252 | u64 *base, size_t *size) | ||
253 | { | ||
254 | return nvhost_syncpt_unit_interface_get_aperture( | ||
255 | nvhost_dev->host1x_pdev, (phys_addr_t *)base, size); | ||
256 | } | ||
257 | |||
258 | u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id) | ||
259 | { | ||
260 | return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id); | ||
261 | } | ||
262 | |||
263 | int nvgpu_nvhost_syncpt_init(struct gk20a *g) | ||
264 | { | ||
265 | int err = 0; | ||
266 | |||
267 | if (!nvgpu_has_syncpoints(g)) | ||
268 | return -ENOSYS; | ||
269 | |||
270 | err = nvgpu_get_nvhost_dev(g); | ||
271 | if (err) { | ||
272 | nvgpu_err(g, "host1x device not available"); | ||
273 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
274 | return -ENOSYS; | ||
275 | } | ||
276 | |||
277 | err = nvgpu_nvhost_syncpt_unit_interface_get_aperture( | ||
278 | g->nvhost_dev, | ||
279 | &g->syncpt_unit_base, | ||
280 | &g->syncpt_unit_size); | ||
281 | if (err) { | ||
282 | nvgpu_err(g, "Failed to get syncpt interface"); | ||
283 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
284 | return -ENOSYS; | ||
285 | } | ||
286 | |||
287 | g->syncpt_size = | ||
288 | nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); | ||
289 | nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", | ||
290 | g->syncpt_unit_base, g->syncpt_unit_size, | ||
291 | g->syncpt_size); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | #endif | ||
diff --git a/include/os/linux/nvhost_priv.h b/include/os/linux/nvhost_priv.h new file mode 100644 index 0000000..c03390a --- /dev/null +++ b/include/os/linux/nvhost_priv.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_NVHOST_PRIV_H__ | ||
18 | #define __NVGPU_NVHOST_PRIV_H__ | ||
19 | |||
20 | struct nvgpu_nvhost_dev { | ||
21 | struct platform_device *host1x_pdev; | ||
22 | }; | ||
23 | |||
24 | #endif /* __NVGPU_NVHOST_PRIV_H__ */ | ||
diff --git a/include/os/linux/nvidia_p2p.c b/include/os/linux/nvidia_p2p.c new file mode 100644 index 0000000..87db8c5 --- /dev/null +++ b/include/os/linux/nvidia_p2p.c | |||
@@ -0,0 +1,299 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <linux/slab.h> | ||
24 | #include <linux/nv-p2p.h> | ||
25 | |||
26 | static void nvidia_p2p_mn_release(struct mmu_notifier *mn, | ||
27 | struct mm_struct *mm) | ||
28 | { | ||
29 | struct nvidia_p2p_page_table *page_table = container_of(mn, | ||
30 | struct nvidia_p2p_page_table, | ||
31 | mn); | ||
32 | |||
33 | page_table->free_callback(page_table->data); | ||
34 | } | ||
35 | |||
36 | static void nvidia_p2p_mn_invl_range_start(struct mmu_notifier *mn, | ||
37 | struct mm_struct *mm, unsigned long start, unsigned long end) | ||
38 | { | ||
39 | struct nvidia_p2p_page_table *page_table = container_of(mn, | ||
40 | struct nvidia_p2p_page_table, | ||
41 | mn); | ||
42 | u64 vaddr = 0; | ||
43 | u64 size = 0; | ||
44 | |||
45 | vaddr = page_table->vaddr; | ||
46 | size = page_table->size; | ||
47 | |||
48 | if (vaddr >= start && vaddr <= end) { | ||
49 | mmu_notifier_unregister_no_release(&page_table->mn, page_table->mm); | ||
50 | page_table->free_callback(page_table->data); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static struct mmu_notifier_ops nvidia_p2p_mmu_ops = { | ||
55 | .release = nvidia_p2p_mn_release, | ||
56 | .invalidate_range_start = nvidia_p2p_mn_invl_range_start, | ||
57 | }; | ||
58 | |||
59 | int nvidia_p2p_get_pages(u64 vaddr, u64 size, | ||
60 | struct nvidia_p2p_page_table **page_table, | ||
61 | void (*free_callback)(void *data), void *data) | ||
62 | { | ||
63 | int ret = 0; | ||
64 | int user_pages = 0; | ||
65 | int locked = 0; | ||
66 | int nr_pages = size >> PAGE_SHIFT; | ||
67 | struct page **pages; | ||
68 | |||
69 | if (nr_pages <= 0) { | ||
70 | return -EINVAL; | ||
71 | } | ||
72 | |||
73 | *page_table = kzalloc(sizeof(**page_table), GFP_KERNEL); | ||
74 | if (!*page_table) { | ||
75 | return -ENOMEM; | ||
76 | } | ||
77 | |||
78 | pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); | ||
79 | if (!pages) { | ||
80 | ret = -ENOMEM; | ||
81 | goto free_page_table; | ||
82 | } | ||
83 | down_read(¤t->mm->mmap_sem); | ||
84 | locked = 1; | ||
85 | user_pages = get_user_pages_locked(vaddr & PAGE_MASK, nr_pages, | ||
86 | FOLL_WRITE | FOLL_FORCE, | ||
87 | pages, &locked); | ||
88 | up_read(¤t->mm->mmap_sem); | ||
89 | if (user_pages != nr_pages) { | ||
90 | ret = user_pages < 0 ? user_pages : -ENOMEM; | ||
91 | goto free_pages; | ||
92 | } | ||
93 | |||
94 | (*page_table)->version = NVIDIA_P2P_PAGE_TABLE_VERSION; | ||
95 | (*page_table)->pages = pages; | ||
96 | (*page_table)->entries = user_pages; | ||
97 | (*page_table)->page_size = NVIDIA_P2P_PAGE_SIZE_4KB; | ||
98 | (*page_table)->size = size; | ||
99 | |||
100 | (*page_table)->mn.ops = &nvidia_p2p_mmu_ops; | ||
101 | (*page_table)->mm = current->mm; | ||
102 | (*page_table)->free_callback = free_callback; | ||
103 | (*page_table)->data = data; | ||
104 | (*page_table)->vaddr = vaddr; | ||
105 | mutex_init(&(*page_table)->lock); | ||
106 | (*page_table)->mapped = NVIDIA_P2P_PINNED; | ||
107 | |||
108 | ret = mmu_notifier_register(&(*page_table)->mn, (*page_table)->mm); | ||
109 | if (ret) { | ||
110 | goto free_pages; | ||
111 | } | ||
112 | |||
113 | return 0; | ||
114 | free_pages: | ||
115 | while (--user_pages >= 0) { | ||
116 | put_page(pages[user_pages]); | ||
117 | } | ||
118 | kfree(pages); | ||
119 | free_page_table: | ||
120 | kfree(*page_table); | ||
121 | *page_table = NULL; | ||
122 | return ret; | ||
123 | } | ||
124 | EXPORT_SYMBOL(nvidia_p2p_get_pages); | ||
125 | |||
126 | int nvidia_p2p_put_pages(struct nvidia_p2p_page_table *page_table) | ||
127 | { | ||
128 | if (!page_table) { | ||
129 | return -EINVAL; | ||
130 | } | ||
131 | |||
132 | mmu_notifier_unregister(&page_table->mn, page_table->mm); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | EXPORT_SYMBOL(nvidia_p2p_put_pages); | ||
137 | |||
138 | int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table) | ||
139 | { | ||
140 | int user_pages = 0; | ||
141 | struct page **pages = NULL; | ||
142 | |||
143 | if (!page_table) { | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | mutex_lock(&page_table->lock); | ||
148 | |||
149 | if (page_table->mapped & NVIDIA_P2P_MAPPED) { | ||
150 | WARN(1, "Attempting to free unmapped pages"); | ||
151 | } | ||
152 | |||
153 | if (page_table->mapped & NVIDIA_P2P_PINNED) { | ||
154 | pages = page_table->pages; | ||
155 | user_pages = page_table->entries; | ||
156 | |||
157 | while (--user_pages >= 0) { | ||
158 | put_page(pages[user_pages]); | ||
159 | } | ||
160 | |||
161 | kfree(pages); | ||
162 | page_table->mapped &= (u32)~NVIDIA_P2P_PINNED; | ||
163 | } | ||
164 | |||
165 | mutex_unlock(&page_table->lock); | ||
166 | |||
167 | return 0; | ||
168 | } | ||
169 | EXPORT_SYMBOL(nvidia_p2p_free_page_table); | ||
170 | |||
171 | int nvidia_p2p_dma_map_pages(struct device *dev, | ||
172 | struct nvidia_p2p_page_table *page_table, | ||
173 | struct nvidia_p2p_dma_mapping **dma_mapping, | ||
174 | enum dma_data_direction direction) | ||
175 | { | ||
176 | struct sg_table *sgt = NULL; | ||
177 | struct scatterlist *sg; | ||
178 | struct page **pages = NULL; | ||
179 | u32 nr_pages = 0; | ||
180 | int ret = 0; | ||
181 | int i, count; | ||
182 | |||
183 | if (!page_table) { | ||
184 | return -EINVAL; | ||
185 | } | ||
186 | |||
187 | mutex_lock(&page_table->lock); | ||
188 | |||
189 | pages = page_table->pages; | ||
190 | nr_pages = page_table->entries; | ||
191 | if (nr_pages <= 0) { | ||
192 | mutex_unlock(&page_table->lock); | ||
193 | return -EINVAL; | ||
194 | } | ||
195 | |||
196 | *dma_mapping = kzalloc(sizeof(**dma_mapping), GFP_KERNEL); | ||
197 | if (!*dma_mapping) { | ||
198 | mutex_unlock(&page_table->lock); | ||
199 | return -ENOMEM; | ||
200 | } | ||
201 | sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); | ||
202 | if (!sgt) { | ||
203 | ret = -ENOMEM; | ||
204 | goto free_dma_mapping; | ||
205 | } | ||
206 | ret = sg_alloc_table_from_pages(sgt, pages, | ||
207 | nr_pages, 0, page_table->size, GFP_KERNEL); | ||
208 | if (ret) { | ||
209 | goto free_sgt; | ||
210 | } | ||
211 | |||
212 | (*dma_mapping)->version = NVIDIA_P2P_DMA_MAPPING_VERSION; | ||
213 | (*dma_mapping)->sgt = sgt; | ||
214 | (*dma_mapping)->dev = dev; | ||
215 | (*dma_mapping)->direction = direction; | ||
216 | (*dma_mapping)->page_table = page_table; | ||
217 | |||
218 | count = dma_map_sg(dev, sgt->sgl, sgt->nents, direction); | ||
219 | if (count < 1) { | ||
220 | goto free_sg_table; | ||
221 | } | ||
222 | |||
223 | (*dma_mapping)->entries = count; | ||
224 | |||
225 | (*dma_mapping)->hw_address = kcalloc(count, sizeof(u64), GFP_KERNEL); | ||
226 | if (!((*dma_mapping)->hw_address)) { | ||
227 | ret = -ENOMEM; | ||
228 | goto unmap_sg; | ||
229 | } | ||
230 | (*dma_mapping)->hw_len = kcalloc(count, sizeof(u64), GFP_KERNEL); | ||
231 | if (!((*dma_mapping)->hw_len)) { | ||
232 | ret = -ENOMEM; | ||
233 | goto free_hw_address; | ||
234 | } | ||
235 | |||
236 | for_each_sg(sgt->sgl, sg, count, i) { | ||
237 | (*dma_mapping)->hw_address[i] = sg_dma_address(sg); | ||
238 | (*dma_mapping)->hw_len[i] = sg_dma_len(sg); | ||
239 | } | ||
240 | (*dma_mapping)->page_table->mapped |= NVIDIA_P2P_MAPPED; | ||
241 | mutex_unlock(&page_table->lock); | ||
242 | |||
243 | return 0; | ||
244 | free_hw_address: | ||
245 | kfree((*dma_mapping)->hw_address); | ||
246 | unmap_sg: | ||
247 | dma_unmap_sg(dev, sgt->sgl, | ||
248 | sgt->nents, direction); | ||
249 | free_sg_table: | ||
250 | sg_free_table(sgt); | ||
251 | free_sgt: | ||
252 | kfree(sgt); | ||
253 | free_dma_mapping: | ||
254 | kfree(*dma_mapping); | ||
255 | *dma_mapping = NULL; | ||
256 | mutex_unlock(&page_table->lock); | ||
257 | |||
258 | return ret; | ||
259 | } | ||
260 | EXPORT_SYMBOL(nvidia_p2p_dma_map_pages); | ||
261 | |||
262 | int nvidia_p2p_dma_unmap_pages(struct nvidia_p2p_dma_mapping *dma_mapping) | ||
263 | { | ||
264 | struct nvidia_p2p_page_table *page_table = NULL; | ||
265 | |||
266 | if (!dma_mapping) { | ||
267 | return -EINVAL; | ||
268 | } | ||
269 | |||
270 | page_table = dma_mapping->page_table; | ||
271 | if (!page_table) { | ||
272 | return -EFAULT; | ||
273 | } | ||
274 | |||
275 | mutex_lock(&page_table->lock); | ||
276 | if (page_table->mapped & NVIDIA_P2P_MAPPED) { | ||
277 | kfree(dma_mapping->hw_len); | ||
278 | kfree(dma_mapping->hw_address); | ||
279 | if (dma_mapping->entries) | ||
280 | dma_unmap_sg(dma_mapping->dev, | ||
281 | dma_mapping->sgt->sgl, | ||
282 | dma_mapping->sgt->nents, | ||
283 | dma_mapping->direction); | ||
284 | sg_free_table(dma_mapping->sgt); | ||
285 | kfree(dma_mapping->sgt); | ||
286 | kfree(dma_mapping); | ||
287 | page_table->mapped &= (u32)~NVIDIA_P2P_MAPPED; | ||
288 | } | ||
289 | mutex_unlock(&page_table->lock); | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages); | ||
294 | |||
295 | int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping) | ||
296 | { | ||
297 | return nvidia_p2p_dma_unmap_pages(dma_mapping); | ||
298 | } | ||
299 | EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping); | ||
diff --git a/include/os/linux/nvlink.c b/include/os/linux/nvlink.c new file mode 100644 index 0000000..dd7c02c --- /dev/null +++ b/include/os/linux/nvlink.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifdef CONFIG_TEGRA_NVLINK | ||
18 | #include <linux/platform/tegra/tegra-nvlink.h> | ||
19 | #endif | ||
20 | |||
21 | #include <nvgpu/gk20a.h> | ||
22 | #include <nvgpu/nvlink.h> | ||
23 | #include <nvgpu/enabled.h> | ||
24 | #include "module.h" | ||
25 | |||
26 | #ifdef CONFIG_TEGRA_NVLINK | ||
27 | int nvgpu_nvlink_read_dt_props(struct gk20a *g) | ||
28 | { | ||
29 | struct device_node *np; | ||
30 | struct nvlink_device *ndev = g->nvlink.priv; | ||
31 | u32 local_dev_id; | ||
32 | u32 local_link_id; | ||
33 | u32 remote_dev_id; | ||
34 | u32 remote_link_id; | ||
35 | bool is_master; | ||
36 | |||
37 | /* Parse DT */ | ||
38 | np = nvgpu_get_node(g); | ||
39 | if (!np) | ||
40 | goto fail; | ||
41 | |||
42 | np = of_get_child_by_name(np, "nvidia,nvlink"); | ||
43 | if (!np) | ||
44 | goto fail; | ||
45 | |||
46 | np = of_get_child_by_name(np, "endpoint"); | ||
47 | if (!np) | ||
48 | goto fail; | ||
49 | |||
50 | /* Parse DT structure to detect endpoint topology */ | ||
51 | of_property_read_u32(np, "local_dev_id", &local_dev_id); | ||
52 | of_property_read_u32(np, "local_link_id", &local_link_id); | ||
53 | of_property_read_u32(np, "remote_dev_id", &remote_dev_id); | ||
54 | of_property_read_u32(np, "remote_link_id", &remote_link_id); | ||
55 | is_master = of_property_read_bool(np, "is_master"); | ||
56 | |||
57 | /* Check that we are in dGPU mode */ | ||
58 | if (local_dev_id != NVLINK_ENDPT_GV100) { | ||
59 | nvgpu_err(g, "Local nvlink device is not dGPU"); | ||
60 | return -EINVAL; | ||
61 | } | ||
62 | |||
63 | ndev->is_master = is_master; | ||
64 | ndev->device_id = local_dev_id; | ||
65 | ndev->link.link_id = local_link_id; | ||
66 | ndev->link.remote_dev_info.device_id = remote_dev_id; | ||
67 | ndev->link.remote_dev_info.link_id = remote_link_id; | ||
68 | |||
69 | return 0; | ||
70 | |||
71 | fail: | ||
72 | nvgpu_info(g, "nvlink endpoint not found or invaling in DT"); | ||
73 | return -ENODEV; | ||
74 | } | ||
75 | #endif /* CONFIG_TEGRA_NVLINK */ | ||
76 | |||
77 | void nvgpu_mss_nvlink_init_credits(struct gk20a *g) | ||
78 | { | ||
79 | /* MSS_NVLINK_1_BASE */ | ||
80 | void __iomem *soc1 = ioremap(0x01f20010, 4096); | ||
81 | /* MSS_NVLINK_2_BASE */ | ||
82 | void __iomem *soc2 = ioremap(0x01f40010, 4096); | ||
83 | /* MSS_NVLINK_3_BASE */ | ||
84 | void __iomem *soc3 = ioremap(0x01f60010, 4096); | ||
85 | /* MSS_NVLINK_4_BASE */ | ||
86 | void __iomem *soc4 = ioremap(0x01f80010, 4096); | ||
87 | u32 val; | ||
88 | |||
89 | nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits"); | ||
90 | |||
91 | val = readl_relaxed(soc1); | ||
92 | writel_relaxed(val, soc1); | ||
93 | val = readl_relaxed(soc1 + 4); | ||
94 | writel_relaxed(val, soc1 + 4); | ||
95 | |||
96 | val = readl_relaxed(soc2); | ||
97 | writel_relaxed(val, soc2); | ||
98 | val = readl_relaxed(soc2 + 4); | ||
99 | writel_relaxed(val, soc2 + 4); | ||
100 | |||
101 | val = readl_relaxed(soc3); | ||
102 | writel_relaxed(val, soc3); | ||
103 | val = readl_relaxed(soc3 + 4); | ||
104 | writel_relaxed(val, soc3 + 4); | ||
105 | |||
106 | val = readl_relaxed(soc4); | ||
107 | writel_relaxed(val, soc4); | ||
108 | val = readl_relaxed(soc4 + 4); | ||
109 | writel_relaxed(val, soc4 + 4); | ||
110 | } | ||
111 | |||
112 | int nvgpu_nvlink_deinit(struct gk20a *g) | ||
113 | { | ||
114 | #ifdef CONFIG_TEGRA_NVLINK | ||
115 | struct nvlink_device *ndev = g->nvlink.priv; | ||
116 | int err; | ||
117 | |||
118 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) | ||
119 | return -ENODEV; | ||
120 | |||
121 | err = nvlink_shutdown(ndev); | ||
122 | if (err) { | ||
123 | nvgpu_err(g, "failed to shut down nvlink"); | ||
124 | return err; | ||
125 | } | ||
126 | |||
127 | nvgpu_nvlink_remove(g); | ||
128 | |||
129 | return 0; | ||
130 | #endif | ||
131 | return -ENODEV; | ||
132 | } | ||
diff --git a/include/os/linux/nvlink.h b/include/os/linux/nvlink.h new file mode 100644 index 0000000..4dc54f6 --- /dev/null +++ b/include/os/linux/nvlink.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_OS_LINUX_NVLINK_H | ||
18 | |||
19 | struct gk20a; | ||
20 | int nvgpu_nvlink_deinit(struct gk20a *g); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/os_fence_android.c b/include/os/linux/os_fence_android.c new file mode 100644 index 0000000..013989e --- /dev/null +++ b/include/os/linux/os_fence_android.c | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #include <nvgpu/types.h> | ||
17 | #include <nvgpu/os_fence.h> | ||
18 | #include <nvgpu/linux/os_fence_android.h> | ||
19 | #include <nvgpu/gk20a.h> | ||
20 | #include <nvgpu/channel.h> | ||
21 | |||
22 | #include "../drivers/staging/android/sync.h" | ||
23 | |||
24 | inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s) | ||
25 | { | ||
26 | struct sync_fence *fence = (struct sync_fence *)s->priv; | ||
27 | return fence; | ||
28 | } | ||
29 | |||
30 | static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out) | ||
31 | { | ||
32 | fence_out->priv = NULL; | ||
33 | fence_out->g = NULL; | ||
34 | fence_out->ops = NULL; | ||
35 | } | ||
36 | |||
37 | void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out, | ||
38 | struct gk20a *g, const struct nvgpu_os_fence_ops *fops, | ||
39 | struct sync_fence *fence) | ||
40 | { | ||
41 | fence_out->g = g; | ||
42 | fence_out->ops = fops; | ||
43 | fence_out->priv = (void *)fence; | ||
44 | } | ||
45 | |||
46 | void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s) | ||
47 | { | ||
48 | struct sync_fence *fence = nvgpu_get_sync_fence(s); | ||
49 | |||
50 | sync_fence_put(fence); | ||
51 | |||
52 | nvgpu_os_fence_clear(s); | ||
53 | } | ||
54 | |||
55 | void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd) | ||
56 | { | ||
57 | struct sync_fence *fence = nvgpu_get_sync_fence(s); | ||
58 | |||
59 | sync_fence_get(fence); | ||
60 | sync_fence_install(fence, fd); | ||
61 | } | ||
62 | |||
63 | int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, | ||
64 | struct channel_gk20a *c, int fd) | ||
65 | { | ||
66 | int err = -ENOSYS; | ||
67 | |||
68 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
69 | err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd); | ||
70 | #endif | ||
71 | |||
72 | if (err) | ||
73 | err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); | ||
74 | |||
75 | if (err) | ||
76 | nvgpu_err(c->g, "error obtaining fence from fd %d", fd); | ||
77 | |||
78 | return err; | ||
79 | } | ||
diff --git a/include/os/linux/os_fence_android_sema.c b/include/os/linux/os_fence_android_sema.c new file mode 100644 index 0000000..eb60600 --- /dev/null +++ b/include/os/linux/os_fence_android_sema.c | |||
@@ -0,0 +1,112 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/errno.h> | ||
18 | |||
19 | #include <nvgpu/types.h> | ||
20 | #include <nvgpu/os_fence.h> | ||
21 | #include <nvgpu/linux/os_fence_android.h> | ||
22 | #include <nvgpu/semaphore.h> | ||
23 | #include <nvgpu/gk20a.h> | ||
24 | #include <nvgpu/channel.h> | ||
25 | #include <nvgpu/channel_sync.h> | ||
26 | |||
27 | #include "gk20a/mm_gk20a.h" | ||
28 | |||
29 | #include "sync_sema_android.h" | ||
30 | |||
31 | #include "../drivers/staging/android/sync.h" | ||
32 | |||
33 | int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, | ||
34 | struct priv_cmd_entry *wait_cmd, | ||
35 | struct channel_gk20a *c, | ||
36 | int max_wait_cmds) | ||
37 | { | ||
38 | int err; | ||
39 | int wait_cmd_size; | ||
40 | int num_wait_cmds; | ||
41 | int i; | ||
42 | struct nvgpu_semaphore *sema; | ||
43 | struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); | ||
44 | |||
45 | wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size(); | ||
46 | |||
47 | num_wait_cmds = sync_fence->num_fences; | ||
48 | if (num_wait_cmds == 0) | ||
49 | return 0; | ||
50 | |||
51 | if (max_wait_cmds && num_wait_cmds > max_wait_cmds) | ||
52 | return -EINVAL; | ||
53 | |||
54 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
55 | wait_cmd_size * num_wait_cmds, | ||
56 | wait_cmd); | ||
57 | if (err) { | ||
58 | return err; | ||
59 | } | ||
60 | |||
61 | for (i = 0; i < num_wait_cmds; i++) { | ||
62 | struct sync_pt *pt = sync_pt_from_fence( | ||
63 | sync_fence->cbs[i].sync_pt); | ||
64 | |||
65 | sema = gk20a_sync_pt_sema(pt); | ||
66 | channel_sync_semaphore_gen_wait_cmd(c, sema, wait_cmd, | ||
67 | wait_cmd_size, i); | ||
68 | } | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | static const struct nvgpu_os_fence_ops sema_ops = { | ||
74 | .program_waits = nvgpu_os_fence_sema_wait_gen_cmd, | ||
75 | .drop_ref = nvgpu_os_fence_android_drop_ref, | ||
76 | .install_fence = nvgpu_os_fence_android_install_fd, | ||
77 | }; | ||
78 | |||
79 | int nvgpu_os_fence_sema_create( | ||
80 | struct nvgpu_os_fence *fence_out, | ||
81 | struct channel_gk20a *c, | ||
82 | struct nvgpu_semaphore *sema) | ||
83 | { | ||
84 | struct sync_fence *fence; | ||
85 | |||
86 | fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x", | ||
87 | nvgpu_semaphore_gpu_ro_va(sema)); | ||
88 | |||
89 | if (!fence) { | ||
90 | nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x", | ||
91 | (u32)nvgpu_semaphore_gpu_ro_va(sema)); | ||
92 | |||
93 | return -ENOMEM; | ||
94 | } | ||
95 | |||
96 | nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out, | ||
102 | struct channel_gk20a *c, int fd) | ||
103 | { | ||
104 | struct sync_fence *fence = gk20a_sync_fence_fdget(fd); | ||
105 | |||
106 | if (!fence) | ||
107 | return -EINVAL; | ||
108 | |||
109 | nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
diff --git a/include/os/linux/os_fence_android_syncpt.c b/include/os/linux/os_fence_android_syncpt.c new file mode 100644 index 0000000..368a03c --- /dev/null +++ b/include/os/linux/os_fence_android_syncpt.c | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/err.h> | ||
18 | #include <nvgpu/errno.h> | ||
19 | |||
20 | #include <nvgpu/types.h> | ||
21 | #include <nvgpu/os_fence.h> | ||
22 | #include <nvgpu/linux/os_fence_android.h> | ||
23 | #include <nvgpu/nvhost.h> | ||
24 | #include <nvgpu/atomic.h> | ||
25 | #include <nvgpu/gk20a.h> | ||
26 | #include <nvgpu/channel.h> | ||
27 | #include <nvgpu/channel_sync.h> | ||
28 | |||
29 | #include "gk20a/mm_gk20a.h" | ||
30 | |||
31 | #include "../drivers/staging/android/sync.h" | ||
32 | |||
33 | int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s, | ||
34 | struct priv_cmd_entry *wait_cmd, | ||
35 | struct channel_gk20a *c, | ||
36 | int max_wait_cmds) | ||
37 | { | ||
38 | int err; | ||
39 | int wait_cmd_size; | ||
40 | int num_wait_cmds; | ||
41 | int i; | ||
42 | u32 wait_id; | ||
43 | struct sync_pt *pt; | ||
44 | |||
45 | struct sync_fence *sync_fence = (struct sync_fence *)s->priv; | ||
46 | |||
47 | if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) | ||
48 | return -EINVAL; | ||
49 | |||
50 | /* validate syncpt ids */ | ||
51 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
52 | pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); | ||
53 | wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
54 | if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
55 | c->g->nvhost_dev, wait_id)) { | ||
56 | return -EINVAL; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); | ||
61 | if (num_wait_cmds == 0) | ||
62 | return 0; | ||
63 | |||
64 | wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); | ||
65 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
66 | wait_cmd_size * num_wait_cmds, wait_cmd); | ||
67 | if (err) { | ||
68 | return err; | ||
69 | } | ||
70 | |||
71 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
72 | struct sync_pt *pt = sync_pt_from_fence( | ||
73 | sync_fence->cbs[i].sync_pt); | ||
74 | u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
75 | u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); | ||
76 | |||
77 | err = channel_sync_syncpt_gen_wait_cmd(c, wait_id, wait_value, | ||
78 | wait_cmd, wait_cmd_size, i, true); | ||
79 | } | ||
80 | |||
81 | WARN_ON(i != num_wait_cmds); | ||
82 | |||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static const struct nvgpu_os_fence_ops syncpt_ops = { | ||
87 | .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd, | ||
88 | .drop_ref = nvgpu_os_fence_android_drop_ref, | ||
89 | .install_fence = nvgpu_os_fence_android_install_fd, | ||
90 | }; | ||
91 | |||
92 | int nvgpu_os_fence_syncpt_create( | ||
93 | struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, | ||
94 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) | ||
95 | { | ||
96 | struct sync_fence *fence = nvgpu_nvhost_sync_create_fence( | ||
97 | nvhost_dev, id, thresh, "fence"); | ||
98 | |||
99 | if (IS_ERR(fence)) { | ||
100 | nvgpu_err(c->g, "error %d during construction of fence.", (int)PTR_ERR(fence)); | ||
101 | return PTR_ERR(fence); | ||
102 | } | ||
103 | |||
104 | nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out, | ||
110 | struct channel_gk20a *c, int fd) | ||
111 | { | ||
112 | struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd); | ||
113 | |||
114 | if (fence == NULL) { | ||
115 | return -ENOMEM; | ||
116 | } | ||
117 | |||
118 | nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); | ||
119 | |||
120 | return 0; | ||
121 | } | ||
diff --git a/include/os/linux/os_linux.h b/include/os/linux/os_linux.h new file mode 100644 index 0000000..25c6c03 --- /dev/null +++ b/include/os/linux/os_linux.h | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_OS_LINUX_H | ||
18 | #define NVGPU_OS_LINUX_H | ||
19 | |||
20 | #include <linux/cdev.h> | ||
21 | #include <linux/iommu.h> | ||
22 | #include <linux/hashtable.h> | ||
23 | |||
24 | #include <nvgpu/gk20a.h> | ||
25 | |||
26 | #include "cde.h" | ||
27 | #include "sched.h" | ||
28 | |||
29 | struct nvgpu_os_linux_ops { | ||
30 | struct { | ||
31 | void (*get_program_numbers)(struct gk20a *g, | ||
32 | u32 block_height_log2, | ||
33 | u32 shader_parameter, | ||
34 | int *hprog, int *vprog); | ||
35 | bool (*need_scatter_buffer)(struct gk20a *g); | ||
36 | int (*populate_scatter_buffer)(struct gk20a *g, | ||
37 | struct sg_table *sgt, | ||
38 | size_t surface_size, | ||
39 | void *scatter_buffer_ptr, | ||
40 | size_t scatter_buffer_size); | ||
41 | } cde; | ||
42 | |||
43 | struct { | ||
44 | int (*init_debugfs)(struct gk20a *g); | ||
45 | } clk; | ||
46 | |||
47 | struct { | ||
48 | int (*init_debugfs)(struct gk20a *g); | ||
49 | } therm; | ||
50 | |||
51 | struct { | ||
52 | int (*init_debugfs)(struct gk20a *g); | ||
53 | } fecs_trace; | ||
54 | }; | ||
55 | |||
56 | struct nvgpu_os_linux { | ||
57 | struct gk20a g; | ||
58 | struct device *dev; | ||
59 | |||
60 | struct { | ||
61 | struct cdev cdev; | ||
62 | struct device *node; | ||
63 | } channel; | ||
64 | |||
65 | struct { | ||
66 | struct cdev cdev; | ||
67 | struct device *node; | ||
68 | /* see gk20a_ctrl_priv */ | ||
69 | struct nvgpu_list_node privs; | ||
70 | /* guards modifications to the list and its contents */ | ||
71 | struct nvgpu_mutex privs_lock; | ||
72 | } ctrl; | ||
73 | |||
74 | struct { | ||
75 | struct cdev cdev; | ||
76 | struct device *node; | ||
77 | } as_dev; | ||
78 | |||
79 | struct { | ||
80 | struct cdev cdev; | ||
81 | struct device *node; | ||
82 | } dbg; | ||
83 | |||
84 | struct { | ||
85 | struct cdev cdev; | ||
86 | struct device *node; | ||
87 | } prof; | ||
88 | |||
89 | struct { | ||
90 | struct cdev cdev; | ||
91 | struct device *node; | ||
92 | } tsg; | ||
93 | |||
94 | struct { | ||
95 | struct cdev cdev; | ||
96 | struct device *node; | ||
97 | } ctxsw; | ||
98 | |||
99 | struct { | ||
100 | struct cdev cdev; | ||
101 | struct device *node; | ||
102 | } sched; | ||
103 | |||
104 | dev_t cdev_region; | ||
105 | |||
106 | struct devfreq *devfreq; | ||
107 | |||
108 | struct device_dma_parameters dma_parms; | ||
109 | |||
110 | atomic_t hw_irq_stall_count; | ||
111 | atomic_t hw_irq_nonstall_count; | ||
112 | |||
113 | struct nvgpu_cond sw_irq_stall_last_handled_wq; | ||
114 | atomic_t sw_irq_stall_last_handled; | ||
115 | |||
116 | atomic_t nonstall_ops; | ||
117 | |||
118 | struct nvgpu_cond sw_irq_nonstall_last_handled_wq; | ||
119 | atomic_t sw_irq_nonstall_last_handled; | ||
120 | |||
121 | struct work_struct nonstall_fn_work; | ||
122 | struct workqueue_struct *nonstall_work_queue; | ||
123 | |||
124 | struct resource *reg_mem; | ||
125 | void __iomem *regs; | ||
126 | void __iomem *regs_saved; | ||
127 | |||
128 | struct resource *bar1_mem; | ||
129 | void __iomem *bar1; | ||
130 | void __iomem *bar1_saved; | ||
131 | |||
132 | void __iomem *usermode_regs; | ||
133 | void __iomem *usermode_regs_saved; | ||
134 | |||
135 | u64 regs_bus_addr; | ||
136 | |||
137 | struct nvgpu_os_linux_ops ops; | ||
138 | |||
139 | #ifdef CONFIG_DEBUG_FS | ||
140 | struct dentry *debugfs; | ||
141 | struct dentry *debugfs_alias; | ||
142 | |||
143 | struct dentry *debugfs_ltc_enabled; | ||
144 | struct dentry *debugfs_timeouts_enabled; | ||
145 | struct dentry *debugfs_gr_idle_timeout_default; | ||
146 | struct dentry *debugfs_disable_bigpage; | ||
147 | struct dentry *debugfs_gr_default_attrib_cb_size; | ||
148 | |||
149 | struct dentry *debugfs_timeslice_low_priority_us; | ||
150 | struct dentry *debugfs_timeslice_medium_priority_us; | ||
151 | struct dentry *debugfs_timeslice_high_priority_us; | ||
152 | struct dentry *debugfs_runlist_interleave; | ||
153 | struct dentry *debugfs_allocators; | ||
154 | struct dentry *debugfs_xve; | ||
155 | struct dentry *debugfs_kmem; | ||
156 | struct dentry *debugfs_hal; | ||
157 | struct dentry *debugfs_ltc; | ||
158 | |||
159 | struct dentry *debugfs_force_preemption_cilp; | ||
160 | struct dentry *debugfs_force_preemption_gfxp; | ||
161 | struct dentry *debugfs_dump_ctxsw_stats; | ||
162 | #endif | ||
163 | DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); | ||
164 | struct dev_ext_attribute *ecc_attrs; | ||
165 | |||
166 | struct gk20a_cde_app cde_app; | ||
167 | |||
168 | struct rw_semaphore busy_lock; | ||
169 | |||
170 | bool init_done; | ||
171 | }; | ||
172 | |||
173 | static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) | ||
174 | { | ||
175 | return container_of(g, struct nvgpu_os_linux, g); | ||
176 | } | ||
177 | |||
178 | static inline struct device *dev_from_gk20a(struct gk20a *g) | ||
179 | { | ||
180 | return nvgpu_os_linux_from_gk20a(g)->dev; | ||
181 | } | ||
182 | |||
183 | #define INTERFACE_NAME "nvhost%s-gpu" | ||
184 | |||
185 | #define totalram_size_in_mb (totalram_pages >> (10 - (PAGE_SHIFT - 10))) | ||
186 | |||
187 | #endif | ||
diff --git a/include/os/linux/os_ops.c b/include/os/linux/os_ops.c new file mode 100644 index 0000000..f1ab4b1 --- /dev/null +++ b/include/os/linux/os_ops.c | |||
@@ -0,0 +1,61 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include "os_ops_gm20b.h" | ||
20 | #include "os_ops_gp10b.h" | ||
21 | #include "os_ops_gp106.h" | ||
22 | #include "os_ops_gv11b.h" | ||
23 | #include "os_ops_gv100.h" | ||
24 | |||
25 | #if defined(CONFIG_TEGRA_GPU_NEXT) | ||
26 | #include "nvgpu_gpuid_next.h" | ||
27 | #endif | ||
28 | |||
29 | int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l) | ||
30 | { | ||
31 | struct gk20a *g = &l->g; | ||
32 | u32 ver = g->params.gpu_arch + g->params.gpu_impl; | ||
33 | |||
34 | switch (ver) { | ||
35 | case GK20A_GPUID_GM20B: | ||
36 | case GK20A_GPUID_GM20B_B: | ||
37 | nvgpu_gm20b_init_os_ops(l); | ||
38 | break; | ||
39 | case NVGPU_GPUID_GP10B: | ||
40 | nvgpu_gp10b_init_os_ops(l); | ||
41 | break; | ||
42 | case NVGPU_GPUID_GP106: | ||
43 | nvgpu_gp106_init_os_ops(l); | ||
44 | break; | ||
45 | case NVGPU_GPUID_GV100: | ||
46 | nvgpu_gv100_init_os_ops(l); | ||
47 | break; | ||
48 | case NVGPU_GPUID_GV11B: | ||
49 | nvgpu_gv11b_init_os_ops(l); | ||
50 | break; | ||
51 | #if defined(CONFIG_TEGRA_GPU_NEXT) | ||
52 | case NVGPU_GPUID_NEXT: | ||
53 | NVGPU_NEXT_INIT_OS_OPS(l); | ||
54 | break; | ||
55 | #endif | ||
56 | default: | ||
57 | break; | ||
58 | } | ||
59 | |||
60 | return 0; | ||
61 | } | ||
diff --git a/include/os/linux/os_ops.h b/include/os/linux/os_ops.h new file mode 100644 index 0000000..af3ce0a --- /dev/null +++ b/include/os/linux/os_ops.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __LINUX_OS_OPS_H | ||
18 | #define __LINUX_OS_OPS_H | ||
19 | |||
20 | int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/os_ops_gm20b.c b/include/os/linux/os_ops_gm20b.c new file mode 100644 index 0000000..77aee39 --- /dev/null +++ b/include/os/linux/os_ops_gm20b.c | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include "cde_gm20b.h" | ||
20 | #include "debug_clk_gm20b.h" | ||
21 | #include "debug_fecs_trace.h" | ||
22 | |||
23 | |||
24 | static struct nvgpu_os_linux_ops gm20b_os_linux_ops = { | ||
25 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
26 | .cde = { | ||
27 | .get_program_numbers = gm20b_cde_get_program_numbers, | ||
28 | }, | ||
29 | #endif | ||
30 | .clk = { | ||
31 | .init_debugfs = gm20b_clk_init_debugfs, | ||
32 | }, | ||
33 | |||
34 | .fecs_trace = { | ||
35 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
36 | }, | ||
37 | }; | ||
38 | |||
39 | void nvgpu_gm20b_init_os_ops(struct nvgpu_os_linux *l) | ||
40 | { | ||
41 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
42 | l->ops.cde = gm20b_os_linux_ops.cde; | ||
43 | #endif | ||
44 | l->ops.clk = gm20b_os_linux_ops.clk; | ||
45 | |||
46 | l->ops.fecs_trace = gm20b_os_linux_ops.fecs_trace; | ||
47 | } | ||
diff --git a/include/os/linux/os_ops_gm20b.h b/include/os/linux/os_ops_gm20b.h new file mode 100644 index 0000000..7d27e40 --- /dev/null +++ b/include/os/linux/os_ops_gm20b.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __LINUX_OS_OPS_GM20B_H | ||
18 | #define __LINUX_OS_OPS_GM20B_H | ||
19 | |||
20 | void nvgpu_gm20b_init_os_ops(struct nvgpu_os_linux *l); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/os_ops_gp106.c b/include/os/linux/os_ops_gp106.c new file mode 100644 index 0000000..14f1b00 --- /dev/null +++ b/include/os/linux/os_ops_gp106.c | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include "debug_clk_gp106.h" | ||
20 | #include "debug_therm_gp106.h" | ||
21 | #include "debug_fecs_trace.h" | ||
22 | |||
23 | static struct nvgpu_os_linux_ops gp106_os_linux_ops = { | ||
24 | .clk = { | ||
25 | .init_debugfs = gp106_clk_init_debugfs, | ||
26 | }, | ||
27 | .therm = { | ||
28 | .init_debugfs = gp106_therm_init_debugfs, | ||
29 | }, | ||
30 | .fecs_trace = { | ||
31 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
32 | }, | ||
33 | }; | ||
34 | |||
35 | void nvgpu_gp106_init_os_ops(struct nvgpu_os_linux *l) | ||
36 | { | ||
37 | l->ops.clk = gp106_os_linux_ops.clk; | ||
38 | l->ops.therm = gp106_os_linux_ops.therm; | ||
39 | l->ops.fecs_trace = gp106_os_linux_ops.fecs_trace; | ||
40 | } | ||
diff --git a/include/os/linux/os_ops_gp106.h b/include/os/linux/os_ops_gp106.h new file mode 100644 index 0000000..7d423d5 --- /dev/null +++ b/include/os/linux/os_ops_gp106.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __LINUX_OS_OPS_GP106_H | ||
18 | #define __LINUX_OS_OPS_GP106_H | ||
19 | |||
20 | void nvgpu_gp106_init_os_ops(struct nvgpu_os_linux *l); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/os_ops_gp10b.c b/include/os/linux/os_ops_gp10b.c new file mode 100644 index 0000000..e2891f7 --- /dev/null +++ b/include/os/linux/os_ops_gp10b.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include "cde_gp10b.h" | ||
20 | #include "debug_fecs_trace.h" | ||
21 | |||
22 | static struct nvgpu_os_linux_ops gp10b_os_linux_ops = { | ||
23 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
24 | .cde = { | ||
25 | .get_program_numbers = gp10b_cde_get_program_numbers, | ||
26 | .need_scatter_buffer = gp10b_need_scatter_buffer, | ||
27 | .populate_scatter_buffer = gp10b_populate_scatter_buffer, | ||
28 | }, | ||
29 | #endif | ||
30 | .fecs_trace = { | ||
31 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
32 | }, | ||
33 | }; | ||
34 | |||
35 | void nvgpu_gp10b_init_os_ops(struct nvgpu_os_linux *l) | ||
36 | { | ||
37 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
38 | l->ops.cde = gp10b_os_linux_ops.cde; | ||
39 | #endif | ||
40 | l->ops.fecs_trace = gp10b_os_linux_ops.fecs_trace; | ||
41 | } | ||
diff --git a/include/os/linux/os_ops_gp10b.h b/include/os/linux/os_ops_gp10b.h new file mode 100644 index 0000000..0be1bca --- /dev/null +++ b/include/os/linux/os_ops_gp10b.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __LINUX_OS_OPS_GP10B_H | ||
18 | #define __LINUX_OS_OPS_GP10B_H | ||
19 | |||
20 | void nvgpu_gp10b_init_os_ops(struct nvgpu_os_linux *l); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/os_ops_gv100.c b/include/os/linux/os_ops_gv100.c new file mode 100644 index 0000000..9d92bdf --- /dev/null +++ b/include/os/linux/os_ops_gv100.c | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include "debug_clk_gv100.h" | ||
20 | #include "debug_therm_gp106.h" | ||
21 | #include "debug_fecs_trace.h" | ||
22 | |||
23 | static struct nvgpu_os_linux_ops gv100_os_linux_ops = { | ||
24 | .clk = { | ||
25 | .init_debugfs = gv100_clk_init_debugfs, | ||
26 | }, | ||
27 | .therm = { | ||
28 | .init_debugfs = gp106_therm_init_debugfs, | ||
29 | }, | ||
30 | .fecs_trace = { | ||
31 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
32 | }, | ||
33 | }; | ||
34 | |||
35 | void nvgpu_gv100_init_os_ops(struct nvgpu_os_linux *l) | ||
36 | { | ||
37 | l->ops.clk = gv100_os_linux_ops.clk; | ||
38 | l->ops.therm = gv100_os_linux_ops.therm; | ||
39 | l->ops.fecs_trace = gv100_os_linux_ops.fecs_trace; | ||
40 | } | ||
diff --git a/include/os/linux/os_ops_gv100.h b/include/os/linux/os_ops_gv100.h new file mode 100644 index 0000000..43923b2 --- /dev/null +++ b/include/os/linux/os_ops_gv100.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __LINUX_OS_OPS_GV100_H | ||
18 | #define __LINUX_OS_OPS_GV100_H | ||
19 | |||
20 | void nvgpu_gv100_init_os_ops(struct nvgpu_os_linux *l); | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/os_ops_gv11b.c b/include/os/linux/os_ops_gv11b.c new file mode 100644 index 0000000..a82ad0a --- /dev/null +++ b/include/os/linux/os_ops_gv11b.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include "debug_fecs_trace.h" | ||
20 | |||
21 | static struct nvgpu_os_linux_ops gv11b_os_linux_ops = { | ||
22 | .fecs_trace = { | ||
23 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
24 | }, | ||
25 | }; | ||
26 | |||
27 | void nvgpu_gv11b_init_os_ops(struct nvgpu_os_linux *l) | ||
28 | { | ||
29 | l->ops.fecs_trace = gv11b_os_linux_ops.fecs_trace; | ||
30 | } | ||
diff --git a/include/os/linux/os_ops_gv11b.h b/include/os/linux/os_ops_gv11b.h new file mode 100644 index 0000000..eef6c4a --- /dev/null +++ b/include/os/linux/os_ops_gv11b.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef LINUX_OS_OPS_GV11B_H | ||
18 | #define LINUX_OS_OPS_GV11B_H | ||
19 | |||
20 | struct nvgpu_os_linux; | ||
21 | |||
22 | void nvgpu_gv11b_init_os_ops(struct nvgpu_os_linux *l); | ||
23 | |||
24 | #endif | ||
diff --git a/include/os/linux/os_sched.c b/include/os/linux/os_sched.c new file mode 100644 index 0000000..9a25da1 --- /dev/null +++ b/include/os/linux/os_sched.c | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/os_sched.h> | ||
15 | |||
16 | #include <linux/sched.h> | ||
17 | |||
18 | int nvgpu_current_tid(struct gk20a *g) | ||
19 | { | ||
20 | return current->pid; | ||
21 | } | ||
22 | |||
23 | int nvgpu_current_pid(struct gk20a *g) | ||
24 | { | ||
25 | return current->tgid; | ||
26 | } | ||
27 | |||
28 | void __nvgpu_print_current(struct gk20a *g, const char *func_name, int line, | ||
29 | void *ctx, enum nvgpu_log_type type) | ||
30 | { | ||
31 | __nvgpu_log_msg(g, func_name, line, type, current->comm); | ||
32 | } | ||
diff --git a/include/os/linux/pci.c b/include/os/linux/pci.c new file mode 100644 index 0000000..07071d1 --- /dev/null +++ b/include/os/linux/pci.c | |||
@@ -0,0 +1,854 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/pci.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/pm_runtime.h> | ||
20 | #include <linux/of_platform.h> | ||
21 | #include <linux/of_address.h> | ||
22 | |||
23 | #include <nvgpu/nvhost.h> | ||
24 | #include <nvgpu/nvgpu_common.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/enabled.h> | ||
27 | #include <nvgpu/nvlink.h> | ||
28 | #include <nvgpu/soc.h> | ||
29 | #include <nvgpu/sim.h> | ||
30 | #include <nvgpu/gk20a.h> | ||
31 | |||
32 | #include "nvlink.h" | ||
33 | #include "clk/clk.h" | ||
34 | #include "clk/clk_mclk.h" | ||
35 | #include "module.h" | ||
36 | #include "intr.h" | ||
37 | #include "sysfs.h" | ||
38 | #include "os_linux.h" | ||
39 | #include "platform_gk20a.h" | ||
40 | |||
41 | #include "pci.h" | ||
42 | #include "pci_usermode.h" | ||
43 | |||
44 | #include "driver_common.h" | ||
45 | |||
46 | #define PCI_INTERFACE_NAME "card-%s%%s" | ||
47 | |||
48 | static int nvgpu_pci_tegra_probe(struct device *dev) | ||
49 | { | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static int nvgpu_pci_tegra_remove(struct device *dev) | ||
54 | { | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | static bool nvgpu_pci_tegra_is_railgated(struct device *pdev) | ||
59 | { | ||
60 | return false; | ||
61 | } | ||
62 | |||
63 | static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate) | ||
64 | { | ||
65 | long ret = (long)rate; | ||
66 | |||
67 | if (rate == UINT_MAX) | ||
68 | ret = BOOT_GPC2CLK_MHZ * 1000000UL; | ||
69 | |||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static struct gk20a_platform nvgpu_pci_device[] = { | ||
74 | { /* DEVICE=0x1c35 */ | ||
75 | /* ptimer src frequency in hz */ | ||
76 | .ptimer_src_freq = 31250000, | ||
77 | |||
78 | .probe = nvgpu_pci_tegra_probe, | ||
79 | .remove = nvgpu_pci_tegra_remove, | ||
80 | |||
81 | /* power management configuration */ | ||
82 | .railgate_delay_init = 500, | ||
83 | .can_railgate_init = false, | ||
84 | .can_elpg_init = true, | ||
85 | .enable_elpg = true, | ||
86 | .enable_elcg = false, | ||
87 | .enable_slcg = true, | ||
88 | .enable_blcg = true, | ||
89 | .enable_mscg = true, | ||
90 | .can_slcg = true, | ||
91 | .can_blcg = true, | ||
92 | .can_elcg = true, | ||
93 | |||
94 | .disable_aspm = true, | ||
95 | |||
96 | /* power management callbacks */ | ||
97 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
98 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
99 | |||
100 | .ch_wdt_timeout_ms = 7000, | ||
101 | |||
102 | .honors_aperture = true, | ||
103 | .dma_mask = DMA_BIT_MASK(40), | ||
104 | .vbios_min_version = 0x86063000, | ||
105 | .hardcode_sw_threshold = true, | ||
106 | .ina3221_dcb_index = 0, | ||
107 | .ina3221_i2c_address = 0x84, | ||
108 | .ina3221_i2c_port = 0x2, | ||
109 | }, | ||
110 | { /* DEVICE=0x1c36 */ | ||
111 | /* ptimer src frequency in hz */ | ||
112 | .ptimer_src_freq = 31250000, | ||
113 | |||
114 | .probe = nvgpu_pci_tegra_probe, | ||
115 | .remove = nvgpu_pci_tegra_remove, | ||
116 | |||
117 | /* power management configuration */ | ||
118 | .railgate_delay_init = 500, | ||
119 | .can_railgate_init = false, | ||
120 | .can_elpg_init = true, | ||
121 | .enable_elpg = true, | ||
122 | .enable_elcg = false, | ||
123 | .enable_slcg = true, | ||
124 | .enable_blcg = true, | ||
125 | .enable_mscg = true, | ||
126 | .can_slcg = true, | ||
127 | .can_blcg = true, | ||
128 | .can_elcg = true, | ||
129 | |||
130 | .disable_aspm = true, | ||
131 | |||
132 | /* power management callbacks */ | ||
133 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
134 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
135 | |||
136 | .ch_wdt_timeout_ms = 7000, | ||
137 | |||
138 | .honors_aperture = true, | ||
139 | .dma_mask = DMA_BIT_MASK(40), | ||
140 | .vbios_min_version = 0x86062d00, | ||
141 | .hardcode_sw_threshold = true, | ||
142 | .ina3221_dcb_index = 0, | ||
143 | .ina3221_i2c_address = 0x84, | ||
144 | .ina3221_i2c_port = 0x2, | ||
145 | }, | ||
146 | { /* DEVICE=0x1c37 */ | ||
147 | /* ptimer src frequency in hz */ | ||
148 | .ptimer_src_freq = 31250000, | ||
149 | |||
150 | .probe = nvgpu_pci_tegra_probe, | ||
151 | .remove = nvgpu_pci_tegra_remove, | ||
152 | |||
153 | /* power management configuration */ | ||
154 | .railgate_delay_init = 500, | ||
155 | .can_railgate_init = false, | ||
156 | .can_elpg_init = true, | ||
157 | .enable_elpg = true, | ||
158 | .enable_elcg = false, | ||
159 | .enable_slcg = true, | ||
160 | .enable_blcg = true, | ||
161 | .enable_mscg = true, | ||
162 | .can_slcg = true, | ||
163 | .can_blcg = true, | ||
164 | .can_elcg = true, | ||
165 | |||
166 | .disable_aspm = true, | ||
167 | |||
168 | /* power management callbacks */ | ||
169 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
170 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
171 | |||
172 | .ch_wdt_timeout_ms = 7000, | ||
173 | |||
174 | .honors_aperture = true, | ||
175 | .dma_mask = DMA_BIT_MASK(40), | ||
176 | .vbios_min_version = 0x86063000, | ||
177 | .hardcode_sw_threshold = true, | ||
178 | .ina3221_dcb_index = 0, | ||
179 | .ina3221_i2c_address = 0x84, | ||
180 | .ina3221_i2c_port = 0x2, | ||
181 | }, | ||
182 | { /* DEVICE=0x1c75 */ | ||
183 | /* ptimer src frequency in hz */ | ||
184 | .ptimer_src_freq = 31250000, | ||
185 | |||
186 | .probe = nvgpu_pci_tegra_probe, | ||
187 | .remove = nvgpu_pci_tegra_remove, | ||
188 | |||
189 | /* power management configuration */ | ||
190 | .railgate_delay_init = 500, | ||
191 | .can_railgate_init = false, | ||
192 | .can_elpg_init = true, | ||
193 | .enable_elpg = true, | ||
194 | .enable_elcg = false, | ||
195 | .enable_slcg = true, | ||
196 | .enable_blcg = true, | ||
197 | .enable_mscg = true, | ||
198 | .can_slcg = true, | ||
199 | .can_blcg = true, | ||
200 | .can_elcg = true, | ||
201 | |||
202 | .disable_aspm = true, | ||
203 | |||
204 | /* power management callbacks */ | ||
205 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
206 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
207 | |||
208 | .ch_wdt_timeout_ms = 7000, | ||
209 | |||
210 | .honors_aperture = true, | ||
211 | .dma_mask = DMA_BIT_MASK(40), | ||
212 | .vbios_min_version = 0x86065300, | ||
213 | .hardcode_sw_threshold = false, | ||
214 | .ina3221_dcb_index = 1, | ||
215 | .ina3221_i2c_address = 0x80, | ||
216 | .ina3221_i2c_port = 0x1, | ||
217 | }, | ||
218 | { /* DEVICE=PG503 SKU 201 */ | ||
219 | /* ptimer src frequency in hz */ | ||
220 | .ptimer_src_freq = 31250000, | ||
221 | |||
222 | .probe = nvgpu_pci_tegra_probe, | ||
223 | .remove = nvgpu_pci_tegra_remove, | ||
224 | |||
225 | /* power management configuration */ | ||
226 | .railgate_delay_init = 500, | ||
227 | .can_railgate_init = false, | ||
228 | .can_elpg_init = false, | ||
229 | .enable_elpg = false, | ||
230 | .enable_elcg = false, | ||
231 | .enable_slcg = false, | ||
232 | .enable_blcg = false, | ||
233 | .enable_mscg = false, | ||
234 | .can_slcg = false, | ||
235 | .can_blcg = false, | ||
236 | .can_elcg = false, | ||
237 | |||
238 | .disable_aspm = true, | ||
239 | |||
240 | /* power management callbacks */ | ||
241 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
242 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
243 | |||
244 | .ch_wdt_timeout_ms = 7000, | ||
245 | |||
246 | .honors_aperture = true, | ||
247 | .dma_mask = DMA_BIT_MASK(40), | ||
248 | .vbios_min_version = 0x88001e00, | ||
249 | .hardcode_sw_threshold = false, | ||
250 | .run_preos = true, | ||
251 | }, | ||
252 | { /* DEVICE=PG503 SKU 200 ES */ | ||
253 | /* ptimer src frequency in hz */ | ||
254 | .ptimer_src_freq = 31250000, | ||
255 | |||
256 | .probe = nvgpu_pci_tegra_probe, | ||
257 | .remove = nvgpu_pci_tegra_remove, | ||
258 | |||
259 | /* power management configuration */ | ||
260 | .railgate_delay_init = 500, | ||
261 | .can_railgate_init = false, | ||
262 | .can_elpg_init = false, | ||
263 | .enable_elpg = false, | ||
264 | .enable_elcg = false, | ||
265 | .enable_slcg = false, | ||
266 | .enable_blcg = false, | ||
267 | .enable_mscg = false, | ||
268 | .can_slcg = false, | ||
269 | .can_blcg = false, | ||
270 | .can_elcg = false, | ||
271 | |||
272 | .disable_aspm = true, | ||
273 | |||
274 | /* power management callbacks */ | ||
275 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
276 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
277 | |||
278 | .ch_wdt_timeout_ms = 7000, | ||
279 | |||
280 | .honors_aperture = true, | ||
281 | .dma_mask = DMA_BIT_MASK(40), | ||
282 | .vbios_min_version = 0x88001e00, | ||
283 | .hardcode_sw_threshold = false, | ||
284 | .run_preos = true, | ||
285 | }, | ||
286 | { | ||
287 | /* ptimer src frequency in hz */ | ||
288 | .ptimer_src_freq = 31250000, | ||
289 | |||
290 | .probe = nvgpu_pci_tegra_probe, | ||
291 | .remove = nvgpu_pci_tegra_remove, | ||
292 | |||
293 | /* power management configuration */ | ||
294 | .railgate_delay_init = 500, | ||
295 | .can_railgate_init = false, | ||
296 | .can_elpg_init = false, | ||
297 | .enable_elpg = false, | ||
298 | .enable_elcg = false, | ||
299 | .enable_slcg = false, | ||
300 | .enable_blcg = false, | ||
301 | .enable_mscg = false, | ||
302 | .can_slcg = false, | ||
303 | .can_blcg = false, | ||
304 | .can_elcg = false, | ||
305 | |||
306 | .disable_aspm = true, | ||
307 | |||
308 | /* power management callbacks */ | ||
309 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
310 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
311 | |||
312 | .ch_wdt_timeout_ms = 7000, | ||
313 | |||
314 | .honors_aperture = true, | ||
315 | .dma_mask = DMA_BIT_MASK(40), | ||
316 | .vbios_min_version = 0x88000126, | ||
317 | .hardcode_sw_threshold = false, | ||
318 | .run_preos = true, | ||
319 | .has_syncpoints = true, | ||
320 | }, | ||
321 | { /* SKU250 */ | ||
322 | /* ptimer src frequency in hz */ | ||
323 | .ptimer_src_freq = 31250000, | ||
324 | |||
325 | .probe = nvgpu_pci_tegra_probe, | ||
326 | .remove = nvgpu_pci_tegra_remove, | ||
327 | |||
328 | /* power management configuration */ | ||
329 | .railgate_delay_init = 500, | ||
330 | .can_railgate_init = false, | ||
331 | .can_elpg_init = false, | ||
332 | .enable_elpg = false, | ||
333 | .enable_elcg = false, | ||
334 | .enable_slcg = true, | ||
335 | .enable_blcg = true, | ||
336 | .enable_mscg = false, | ||
337 | .can_slcg = true, | ||
338 | .can_blcg = true, | ||
339 | .can_elcg = false, | ||
340 | |||
341 | .disable_aspm = true, | ||
342 | |||
343 | /* power management callbacks */ | ||
344 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
345 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
346 | |||
347 | .ch_wdt_timeout_ms = 7000, | ||
348 | |||
349 | .honors_aperture = true, | ||
350 | .dma_mask = DMA_BIT_MASK(40), | ||
351 | .vbios_min_version = 0x1, | ||
352 | .hardcode_sw_threshold = false, | ||
353 | .run_preos = true, | ||
354 | .has_syncpoints = true, | ||
355 | }, | ||
356 | { /* SKU 0x1e3f */ | ||
357 | /* ptimer src frequency in hz */ | ||
358 | .ptimer_src_freq = 31250000, | ||
359 | |||
360 | .probe = nvgpu_pci_tegra_probe, | ||
361 | .remove = nvgpu_pci_tegra_remove, | ||
362 | |||
363 | /* power management configuration */ | ||
364 | .railgate_delay_init = 500, | ||
365 | .can_railgate_init = false, | ||
366 | .can_elpg_init = false, | ||
367 | .enable_elpg = false, | ||
368 | .enable_elcg = false, | ||
369 | .enable_slcg = false, | ||
370 | .enable_blcg = false, | ||
371 | .enable_mscg = false, | ||
372 | .can_slcg = false, | ||
373 | .can_blcg = false, | ||
374 | .can_elcg = false, | ||
375 | |||
376 | .disable_aspm = true, | ||
377 | |||
378 | /* power management callbacks */ | ||
379 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
380 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
381 | |||
382 | /* | ||
383 | * WAR: PCIE X1 is very slow, set to very high value till nvlink is up | ||
384 | */ | ||
385 | .ch_wdt_timeout_ms = 30000, | ||
386 | |||
387 | .honors_aperture = true, | ||
388 | .dma_mask = DMA_BIT_MASK(40), | ||
389 | .vbios_min_version = 0x1, | ||
390 | .hardcode_sw_threshold = false, | ||
391 | .unified_memory = false, | ||
392 | }, | ||
393 | |||
394 | }; | ||
395 | |||
396 | static struct pci_device_id nvgpu_pci_table[] = { | ||
397 | { | ||
398 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35), | ||
399 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
400 | .class_mask = 0xff << 16, | ||
401 | .driver_data = 0, | ||
402 | }, | ||
403 | { | ||
404 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36), | ||
405 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
406 | .class_mask = 0xff << 16, | ||
407 | .driver_data = 1, | ||
408 | }, | ||
409 | { | ||
410 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37), | ||
411 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
412 | .class_mask = 0xff << 16, | ||
413 | .driver_data = 2, | ||
414 | }, | ||
415 | { | ||
416 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75), | ||
417 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
418 | .class_mask = 0xff << 16, | ||
419 | .driver_data = 3, | ||
420 | }, | ||
421 | { | ||
422 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1), | ||
423 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
424 | .class_mask = 0xff << 16, | ||
425 | .driver_data = 4, | ||
426 | }, | ||
427 | { | ||
428 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0), | ||
429 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
430 | .class_mask = 0xff << 16, | ||
431 | .driver_data = 5, | ||
432 | }, | ||
433 | { | ||
434 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe), | ||
435 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
436 | .class_mask = 0xff << 16, | ||
437 | .driver_data = 6, | ||
438 | }, | ||
439 | { | ||
440 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1), | ||
441 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
442 | .class_mask = 0xff << 16, | ||
443 | .driver_data = 7, | ||
444 | }, | ||
445 | { | ||
446 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f), | ||
447 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
448 | .class_mask = 0xff << 16, | ||
449 | .driver_data = 8, | ||
450 | }, | ||
451 | {} | ||
452 | }; | ||
453 | |||
454 | static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id) | ||
455 | { | ||
456 | struct gk20a *g = dev_id; | ||
457 | irqreturn_t ret_stall; | ||
458 | irqreturn_t ret_nonstall; | ||
459 | |||
460 | ret_stall = nvgpu_intr_stall(g); | ||
461 | ret_nonstall = nvgpu_intr_nonstall(g); | ||
462 | |||
463 | #if defined(CONFIG_PCI_MSI) | ||
464 | /* Send MSI EOI */ | ||
465 | if (g->ops.xve.rearm_msi && g->msi_enabled) | ||
466 | g->ops.xve.rearm_msi(g); | ||
467 | #endif | ||
468 | |||
469 | return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD; | ||
470 | } | ||
471 | |||
472 | static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id) | ||
473 | { | ||
474 | struct gk20a *g = dev_id; | ||
475 | |||
476 | return nvgpu_intr_thread_stall(g); | ||
477 | } | ||
478 | |||
479 | static int nvgpu_pci_init_support(struct pci_dev *pdev) | ||
480 | { | ||
481 | int err = 0; | ||
482 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
483 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
484 | struct device *dev = &pdev->dev; | ||
485 | |||
486 | l->regs = nvgpu_devm_ioremap(dev, pci_resource_start(pdev, 0), | ||
487 | pci_resource_len(pdev, 0)); | ||
488 | if (IS_ERR(l->regs)) { | ||
489 | nvgpu_err(g, "failed to remap gk20a registers"); | ||
490 | err = PTR_ERR(l->regs); | ||
491 | goto fail; | ||
492 | } | ||
493 | |||
494 | l->regs_bus_addr = pci_resource_start(pdev, 0); | ||
495 | if (!l->regs_bus_addr) { | ||
496 | nvgpu_err(g, "failed to read register bus offset"); | ||
497 | err = -ENODEV; | ||
498 | goto fail; | ||
499 | } | ||
500 | |||
501 | l->bar1 = nvgpu_devm_ioremap(dev, pci_resource_start(pdev, 1), | ||
502 | pci_resource_len(pdev, 1)); | ||
503 | if (IS_ERR(l->bar1)) { | ||
504 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
505 | err = PTR_ERR(l->bar1); | ||
506 | goto fail; | ||
507 | } | ||
508 | |||
509 | err = nvgpu_init_sim_support_linux_pci(g); | ||
510 | if (err) | ||
511 | goto fail; | ||
512 | err = nvgpu_init_sim_support_pci(g); | ||
513 | if (err) | ||
514 | goto fail_sim; | ||
515 | |||
516 | nvgpu_pci_init_usermode_support(l); | ||
517 | |||
518 | return 0; | ||
519 | |||
520 | fail_sim: | ||
521 | nvgpu_remove_sim_support_linux_pci(g); | ||
522 | fail: | ||
523 | if (l->regs) | ||
524 | l->regs = NULL; | ||
525 | |||
526 | if (l->bar1) | ||
527 | l->bar1 = NULL; | ||
528 | |||
529 | return err; | ||
530 | } | ||
531 | |||
532 | static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode) | ||
533 | { | ||
534 | if (mode) | ||
535 | *mode = S_IRUGO | S_IWUGO; | ||
536 | return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev)); | ||
537 | } | ||
538 | |||
539 | static struct class nvgpu_pci_class = { | ||
540 | .owner = THIS_MODULE, | ||
541 | .name = "nvidia-pci-gpu", | ||
542 | .devnode = nvgpu_pci_devnode, | ||
543 | }; | ||
544 | |||
545 | #ifdef CONFIG_PM | ||
546 | static int nvgpu_pci_pm_runtime_resume(struct device *dev) | ||
547 | { | ||
548 | return gk20a_pm_finalize_poweron(dev); | ||
549 | } | ||
550 | |||
551 | static int nvgpu_pci_pm_runtime_suspend(struct device *dev) | ||
552 | { | ||
553 | return 0; | ||
554 | } | ||
555 | |||
556 | static int nvgpu_pci_pm_resume(struct device *dev) | ||
557 | { | ||
558 | return gk20a_pm_finalize_poweron(dev); | ||
559 | } | ||
560 | |||
561 | static int nvgpu_pci_pm_suspend(struct device *dev) | ||
562 | { | ||
563 | return 0; | ||
564 | } | ||
565 | |||
566 | static const struct dev_pm_ops nvgpu_pci_pm_ops = { | ||
567 | .runtime_resume = nvgpu_pci_pm_runtime_resume, | ||
568 | .runtime_suspend = nvgpu_pci_pm_runtime_suspend, | ||
569 | .resume = nvgpu_pci_pm_resume, | ||
570 | .suspend = nvgpu_pci_pm_suspend, | ||
571 | }; | ||
572 | #endif | ||
573 | |||
574 | static int nvgpu_pci_pm_init(struct device *dev) | ||
575 | { | ||
576 | #ifdef CONFIG_PM | ||
577 | struct gk20a *g = get_gk20a(dev); | ||
578 | |||
579 | if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) { | ||
580 | pm_runtime_disable(dev); | ||
581 | } else { | ||
582 | if (g->railgate_delay) | ||
583 | pm_runtime_set_autosuspend_delay(dev, | ||
584 | g->railgate_delay); | ||
585 | |||
586 | /* | ||
587 | * set gpu dev's use_autosuspend flag to allow | ||
588 | * runtime power management of GPU | ||
589 | */ | ||
590 | pm_runtime_use_autosuspend(dev); | ||
591 | |||
592 | /* | ||
593 | * runtime PM for PCI devices is forbidden | ||
594 | * by default, so unblock RTPM of GPU | ||
595 | */ | ||
596 | pm_runtime_put_noidle(dev); | ||
597 | pm_runtime_allow(dev); | ||
598 | } | ||
599 | #endif | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static int nvgpu_pci_pm_deinit(struct device *dev) | ||
604 | { | ||
605 | #ifdef CONFIG_PM | ||
606 | struct gk20a *g = get_gk20a(dev); | ||
607 | |||
608 | if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) | ||
609 | pm_runtime_enable(dev); | ||
610 | else | ||
611 | pm_runtime_forbid(dev); | ||
612 | #endif | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | static int nvgpu_pci_probe(struct pci_dev *pdev, | ||
617 | const struct pci_device_id *pent) | ||
618 | { | ||
619 | struct gk20a_platform *platform = NULL; | ||
620 | struct nvgpu_os_linux *l; | ||
621 | struct gk20a *g; | ||
622 | int err; | ||
623 | char nodefmt[64]; | ||
624 | struct device_node *np; | ||
625 | |||
626 | /* make sure driver_data is a sane index */ | ||
627 | if (pent->driver_data >= sizeof(nvgpu_pci_device) / | ||
628 | sizeof(nvgpu_pci_device[0])) { | ||
629 | return -EINVAL; | ||
630 | } | ||
631 | |||
632 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
633 | if (!l) { | ||
634 | dev_err(&pdev->dev, "couldn't allocate gk20a support"); | ||
635 | return -ENOMEM; | ||
636 | } | ||
637 | |||
638 | hash_init(l->ecc_sysfs_stats_htable); | ||
639 | |||
640 | g = &l->g; | ||
641 | |||
642 | g->log_mask = NVGPU_DEFAULT_DBG_MASK; | ||
643 | |||
644 | nvgpu_init_gk20a(g); | ||
645 | |||
646 | nvgpu_kmem_init(g); | ||
647 | |||
648 | /* Allocate memory to hold platform data*/ | ||
649 | platform = (struct gk20a_platform *)nvgpu_kzalloc( g, | ||
650 | sizeof(struct gk20a_platform)); | ||
651 | if (!platform) { | ||
652 | dev_err(&pdev->dev, "couldn't allocate platform data"); | ||
653 | err = -ENOMEM; | ||
654 | goto err_free_l; | ||
655 | } | ||
656 | |||
657 | /* copy detected device data to allocated platform space*/ | ||
658 | memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data], | ||
659 | sizeof(struct gk20a_platform)); | ||
660 | |||
661 | pci_set_drvdata(pdev, platform); | ||
662 | |||
663 | err = nvgpu_init_enabled_flags(g); | ||
664 | if (err) | ||
665 | goto err_free_platform; | ||
666 | |||
667 | platform->g = g; | ||
668 | l->dev = &pdev->dev; | ||
669 | |||
670 | np = nvgpu_get_node(g); | ||
671 | if (of_dma_is_coherent(np)) { | ||
672 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
673 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
674 | } | ||
675 | |||
676 | err = pci_enable_device(pdev); | ||
677 | if (err) | ||
678 | goto err_free_platform; | ||
679 | pci_set_master(pdev); | ||
680 | |||
681 | g->pci_vendor_id = pdev->vendor; | ||
682 | g->pci_device_id = pdev->device; | ||
683 | g->pci_subsystem_vendor_id = pdev->subsystem_vendor; | ||
684 | g->pci_subsystem_device_id = pdev->subsystem_device; | ||
685 | g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub | ||
686 | g->pci_revision = pdev->revision; | ||
687 | |||
688 | g->ina3221_dcb_index = platform->ina3221_dcb_index; | ||
689 | g->ina3221_i2c_address = platform->ina3221_i2c_address; | ||
690 | g->ina3221_i2c_port = platform->ina3221_i2c_port; | ||
691 | g->hardcode_sw_threshold = platform->hardcode_sw_threshold; | ||
692 | |||
693 | #if defined(CONFIG_PCI_MSI) | ||
694 | err = pci_enable_msi(pdev); | ||
695 | if (err) { | ||
696 | nvgpu_err(g, | ||
697 | "MSI could not be enabled, falling back to legacy"); | ||
698 | g->msi_enabled = false; | ||
699 | } else | ||
700 | g->msi_enabled = true; | ||
701 | #endif | ||
702 | |||
703 | g->irq_stall = pdev->irq; | ||
704 | g->irq_nonstall = pdev->irq; | ||
705 | if (g->irq_stall < 0) { | ||
706 | err = -ENXIO; | ||
707 | goto err_disable_msi; | ||
708 | } | ||
709 | |||
710 | err = devm_request_threaded_irq(&pdev->dev, | ||
711 | g->irq_stall, | ||
712 | nvgpu_pci_isr, | ||
713 | nvgpu_pci_intr_thread, | ||
714 | #if defined(CONFIG_PCI_MSI) | ||
715 | g->msi_enabled ? 0 : | ||
716 | #endif | ||
717 | IRQF_SHARED, "nvgpu", g); | ||
718 | if (err) { | ||
719 | nvgpu_err(g, | ||
720 | "failed to request irq @ %d", g->irq_stall); | ||
721 | goto err_disable_msi; | ||
722 | } | ||
723 | disable_irq(g->irq_stall); | ||
724 | |||
725 | err = nvgpu_pci_init_support(pdev); | ||
726 | if (err) | ||
727 | goto err_free_irq; | ||
728 | |||
729 | if (strchr(dev_name(&pdev->dev), '%')) { | ||
730 | nvgpu_err(g, "illegal character in device name"); | ||
731 | err = -EINVAL; | ||
732 | goto err_free_irq; | ||
733 | } | ||
734 | |||
735 | snprintf(nodefmt, sizeof(nodefmt), | ||
736 | PCI_INTERFACE_NAME, dev_name(&pdev->dev)); | ||
737 | |||
738 | err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class); | ||
739 | if (err) | ||
740 | goto err_free_irq; | ||
741 | |||
742 | err = nvgpu_pci_pm_init(&pdev->dev); | ||
743 | if (err) { | ||
744 | nvgpu_err(g, "pm init failed"); | ||
745 | goto err_free_irq; | ||
746 | } | ||
747 | |||
748 | err = nvgpu_nvlink_probe(g); | ||
749 | /* | ||
750 | * ENODEV is a legal error which means there is no NVLINK | ||
751 | * any other error is fatal | ||
752 | */ | ||
753 | if (err) { | ||
754 | if (err != -ENODEV) { | ||
755 | nvgpu_err(g, "fatal error probing nvlink, bailing out"); | ||
756 | goto err_free_irq; | ||
757 | } | ||
758 | /* Enable Semaphore SHIM on nvlink only for now. */ | ||
759 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); | ||
760 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
761 | } else { | ||
762 | err = nvgpu_nvhost_syncpt_init(g); | ||
763 | if (err) { | ||
764 | if (err != -ENOSYS) { | ||
765 | nvgpu_err(g, "syncpt init failed"); | ||
766 | goto err_free_irq; | ||
767 | } | ||
768 | } | ||
769 | } | ||
770 | |||
771 | return 0; | ||
772 | |||
773 | err_free_irq: | ||
774 | nvgpu_free_irq(g); | ||
775 | err_disable_msi: | ||
776 | #if defined(CONFIG_PCI_MSI) | ||
777 | if (g->msi_enabled) | ||
778 | pci_disable_msi(pdev); | ||
779 | #endif | ||
780 | err_free_platform: | ||
781 | nvgpu_kfree(g, platform); | ||
782 | err_free_l: | ||
783 | kfree(l); | ||
784 | return err; | ||
785 | } | ||
786 | |||
787 | static void nvgpu_pci_remove(struct pci_dev *pdev) | ||
788 | { | ||
789 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
790 | struct device *dev = dev_from_gk20a(g); | ||
791 | int err; | ||
792 | |||
793 | /* no support yet for unbind if DGPU is in VGPU mode */ | ||
794 | if (gk20a_gpu_is_virtual(dev)) | ||
795 | return; | ||
796 | |||
797 | err = nvgpu_nvlink_deinit(g); | ||
798 | WARN(err, "gpu failed to remove nvlink"); | ||
799 | |||
800 | gk20a_driver_start_unload(g); | ||
801 | |||
802 | err = nvgpu_quiesce(g); | ||
803 | /* TODO: handle failure to idle */ | ||
804 | WARN(err, "gpu failed to idle during driver removal"); | ||
805 | |||
806 | nvgpu_free_irq(g); | ||
807 | |||
808 | nvgpu_remove(dev, &nvgpu_pci_class); | ||
809 | |||
810 | #if defined(CONFIG_PCI_MSI) | ||
811 | if (g->msi_enabled) | ||
812 | pci_disable_msi(pdev); | ||
813 | else { | ||
814 | /* IRQ does not need to be enabled in MSI as the line is not | ||
815 | * shared | ||
816 | */ | ||
817 | enable_irq(g->irq_stall); | ||
818 | } | ||
819 | #endif | ||
820 | nvgpu_pci_pm_deinit(&pdev->dev); | ||
821 | |||
822 | /* free allocated platform data space */ | ||
823 | gk20a_get_platform(&pdev->dev)->g = NULL; | ||
824 | nvgpu_kfree(g, gk20a_get_platform(&pdev->dev)); | ||
825 | |||
826 | gk20a_put(g); | ||
827 | } | ||
828 | |||
829 | static struct pci_driver nvgpu_pci_driver = { | ||
830 | .name = "nvgpu", | ||
831 | .id_table = nvgpu_pci_table, | ||
832 | .probe = nvgpu_pci_probe, | ||
833 | .remove = nvgpu_pci_remove, | ||
834 | #ifdef CONFIG_PM | ||
835 | .driver.pm = &nvgpu_pci_pm_ops, | ||
836 | #endif | ||
837 | }; | ||
838 | |||
839 | int __init nvgpu_pci_init(void) | ||
840 | { | ||
841 | int ret; | ||
842 | |||
843 | ret = class_register(&nvgpu_pci_class); | ||
844 | if (ret) | ||
845 | return ret; | ||
846 | |||
847 | return pci_register_driver(&nvgpu_pci_driver); | ||
848 | } | ||
849 | |||
850 | void __exit nvgpu_pci_exit(void) | ||
851 | { | ||
852 | pci_unregister_driver(&nvgpu_pci_driver); | ||
853 | class_unregister(&nvgpu_pci_class); | ||
854 | } | ||
diff --git a/include/os/linux/pci.h b/include/os/linux/pci.h new file mode 100644 index 0000000..cc6b77b --- /dev/null +++ b/include/os/linux/pci.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef NVGPU_PCI_H | ||
17 | #define NVGPU_PCI_H | ||
18 | |||
19 | #ifdef CONFIG_GK20A_PCI | ||
20 | int nvgpu_pci_init(void); | ||
21 | void nvgpu_pci_exit(void); | ||
22 | #else | ||
23 | static inline int nvgpu_pci_init(void) { return 0; } | ||
24 | static inline void nvgpu_pci_exit(void) {} | ||
25 | #endif | ||
26 | |||
27 | #endif | ||
diff --git a/include/os/linux/pci_usermode.c b/include/os/linux/pci_usermode.c new file mode 100644 index 0000000..270b834 --- /dev/null +++ b/include/os/linux/pci_usermode.c | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/types.h> | ||
15 | |||
16 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
17 | |||
18 | #include "os_linux.h" | ||
19 | |||
20 | void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l) | ||
21 | { | ||
22 | l->usermode_regs = l->regs + usermode_cfg0_r(); | ||
23 | l->usermode_regs_saved = l->usermode_regs; | ||
24 | } | ||
diff --git a/include/os/linux/pci_usermode.h b/include/os/linux/pci_usermode.h new file mode 100644 index 0000000..25a08d2 --- /dev/null +++ b/include/os/linux/pci_usermode.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_PCI_USERMODE_H__ | ||
17 | #define __NVGPU_PCI_USERMODE_H__ | ||
18 | |||
19 | struct nvgpu_os_linux; | ||
20 | |||
21 | void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l); | ||
22 | |||
23 | #endif | ||
diff --git a/include/os/linux/platform_gk20a.h b/include/os/linux/platform_gk20a.h new file mode 100644 index 0000000..adec860 --- /dev/null +++ b/include/os/linux/platform_gk20a.h | |||
@@ -0,0 +1,329 @@ | |||
1 | /* | ||
2 | * GK20A Platform (SoC) Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _GK20A_PLATFORM_H_ | ||
17 | #define _GK20A_PLATFORM_H_ | ||
18 | |||
19 | #include <linux/device.h> | ||
20 | |||
21 | #include <nvgpu/lock.h> | ||
22 | #include <nvgpu/gk20a.h> | ||
23 | |||
24 | #define GK20A_CLKS_MAX 4 | ||
25 | |||
26 | struct gk20a; | ||
27 | struct channel_gk20a; | ||
28 | struct gr_ctx_buffer_desc; | ||
29 | struct gk20a_scale_profile; | ||
30 | |||
31 | struct secure_page_buffer { | ||
32 | void (*destroy)(struct gk20a *, struct secure_page_buffer *); | ||
33 | size_t size; | ||
34 | dma_addr_t phys; | ||
35 | size_t used; | ||
36 | }; | ||
37 | |||
38 | struct gk20a_platform { | ||
39 | /* Populated by the gk20a driver before probing the platform. */ | ||
40 | struct gk20a *g; | ||
41 | |||
42 | /* Should be populated at probe. */ | ||
43 | bool can_railgate_init; | ||
44 | |||
45 | /* Should be populated at probe. */ | ||
46 | bool can_tpc_powergate; | ||
47 | |||
48 | /* Should be populated at probe. */ | ||
49 | bool can_elpg_init; | ||
50 | |||
51 | /* Should be populated at probe. */ | ||
52 | bool has_syncpoints; | ||
53 | |||
54 | /* channel limit after which to start aggressive sync destroy */ | ||
55 | unsigned int aggressive_sync_destroy_thresh; | ||
56 | |||
57 | /* flag to set sync destroy aggressiveness */ | ||
58 | bool aggressive_sync_destroy; | ||
59 | |||
60 | /* set if ASPM should be disabled on boot; only makes sense for PCI */ | ||
61 | bool disable_aspm; | ||
62 | |||
63 | /* Set if the platform can unify the small/large address spaces. */ | ||
64 | bool unify_address_spaces; | ||
65 | |||
66 | /* Clock configuration is stored here. Platform probe is responsible | ||
67 | * for filling this data. */ | ||
68 | struct clk *clk[GK20A_CLKS_MAX]; | ||
69 | int num_clks; | ||
70 | int maxmin_clk_id; | ||
71 | |||
72 | #ifdef CONFIG_RESET_CONTROLLER | ||
73 | /* Reset control for device */ | ||
74 | struct reset_control *reset_control; | ||
75 | #endif | ||
76 | /* valid TPC-MASK */ | ||
77 | u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS]; | ||
78 | |||
79 | /* Delay before rail gated */ | ||
80 | int railgate_delay_init; | ||
81 | |||
82 | /* init value for slowdown factor */ | ||
83 | u8 ldiv_slowdown_factor_init; | ||
84 | |||
85 | /* Second Level Clock Gating: true = enable false = disable */ | ||
86 | bool enable_slcg; | ||
87 | |||
88 | /* Block Level Clock Gating: true = enable flase = disable */ | ||
89 | bool enable_blcg; | ||
90 | |||
91 | /* Engine Level Clock Gating: true = enable flase = disable */ | ||
92 | bool enable_elcg; | ||
93 | |||
94 | /* Should be populated at probe. */ | ||
95 | bool can_slcg; | ||
96 | |||
97 | /* Should be populated at probe. */ | ||
98 | bool can_blcg; | ||
99 | |||
100 | /* Should be populated at probe. */ | ||
101 | bool can_elcg; | ||
102 | |||
103 | /* Engine Level Power Gating: true = enable flase = disable */ | ||
104 | bool enable_elpg; | ||
105 | |||
106 | /* Adaptative ELPG: true = enable flase = disable */ | ||
107 | bool enable_aelpg; | ||
108 | |||
109 | /* PMU Perfmon: true = enable false = disable */ | ||
110 | bool enable_perfmon; | ||
111 | |||
112 | /* Memory System Clock Gating: true = enable flase = disable*/ | ||
113 | bool enable_mscg; | ||
114 | |||
115 | /* Timeout for per-channel watchdog (in mS) */ | ||
116 | u32 ch_wdt_timeout_ms; | ||
117 | |||
118 | /* Disable big page support */ | ||
119 | bool disable_bigpage; | ||
120 | |||
121 | /* | ||
122 | * gk20a_do_idle() API can take GPU either into rail gate or CAR reset | ||
123 | * This flag can be used to force CAR reset case instead of rail gate | ||
124 | */ | ||
125 | bool force_reset_in_do_idle; | ||
126 | |||
127 | /* guest/vm id, needed for IPA to PA transation */ | ||
128 | int vmid; | ||
129 | |||
130 | /* Initialize the platform interface of the gk20a driver. | ||
131 | * | ||
132 | * The platform implementation of this function must | ||
133 | * - set the power and clocks of the gk20a device to a known | ||
134 | * state, and | ||
135 | * - populate the gk20a_platform structure (a pointer to the | ||
136 | * structure can be obtained by calling gk20a_get_platform). | ||
137 | * | ||
138 | * After this function is finished, the driver will initialise | ||
139 | * pm runtime and genpd based on the platform configuration. | ||
140 | */ | ||
141 | int (*probe)(struct device *dev); | ||
142 | |||
143 | /* Second stage initialisation - called once all power management | ||
144 | * initialisations are done. | ||
145 | */ | ||
146 | int (*late_probe)(struct device *dev); | ||
147 | |||
148 | /* Remove device after power management has been done | ||
149 | */ | ||
150 | int (*remove)(struct device *dev); | ||
151 | |||
152 | /* Poweron platform dependencies */ | ||
153 | int (*busy)(struct device *dev); | ||
154 | |||
155 | /* Powerdown platform dependencies */ | ||
156 | void (*idle)(struct device *dev); | ||
157 | |||
158 | /* Preallocated VPR buffer for kernel */ | ||
159 | size_t secure_buffer_size; | ||
160 | struct secure_page_buffer secure_buffer; | ||
161 | |||
162 | /* Device is going to be suspended */ | ||
163 | int (*suspend)(struct device *); | ||
164 | |||
165 | /* Device is going to be resumed */ | ||
166 | int (*resume)(struct device *); | ||
167 | |||
168 | /* Called to turn off the device */ | ||
169 | int (*railgate)(struct device *dev); | ||
170 | |||
171 | /* Called to turn on the device */ | ||
172 | int (*unrailgate)(struct device *dev); | ||
173 | struct nvgpu_mutex railgate_lock; | ||
174 | |||
175 | /* Called to check state of device */ | ||
176 | bool (*is_railgated)(struct device *dev); | ||
177 | |||
178 | /* get supported frequency list */ | ||
179 | int (*get_clk_freqs)(struct device *pdev, | ||
180 | unsigned long **freqs, int *num_freqs); | ||
181 | |||
182 | /* clk related supported functions */ | ||
183 | long (*clk_round_rate)(struct device *dev, | ||
184 | unsigned long rate); | ||
185 | |||
186 | /* Called to register GPCPLL with common clk framework */ | ||
187 | int (*clk_register)(struct gk20a *g); | ||
188 | |||
189 | /* platform specific scale init quirks */ | ||
190 | void (*initscale)(struct device *dev); | ||
191 | |||
192 | /* Postscale callback is called after frequency change */ | ||
193 | void (*postscale)(struct device *dev, | ||
194 | unsigned long freq); | ||
195 | |||
196 | /* Pre callback is called before frequency change */ | ||
197 | void (*prescale)(struct device *dev); | ||
198 | |||
199 | /* Set TPC_PG_MASK during probe */ | ||
200 | void (*set_tpc_pg_mask)(struct device *dev, u32 tpc_pg_mask); | ||
201 | |||
202 | /* Devfreq governor name. If scaling is enabled, we request | ||
203 | * this governor to be used in scaling */ | ||
204 | const char *devfreq_governor; | ||
205 | |||
206 | /* Quality of service notifier callback. If this is set, the scaling | ||
207 | * routines will register a callback to Qos. Each time we receive | ||
208 | * a new value, this callback gets called. */ | ||
209 | int (*qos_notify)(struct notifier_block *nb, | ||
210 | unsigned long n, void *p); | ||
211 | |||
212 | /* Called as part of debug dump. If the gpu gets hung, this function | ||
213 | * is responsible for delivering all necessary debug data of other | ||
214 | * hw units which may interact with the gpu without direct supervision | ||
215 | * of the CPU. | ||
216 | */ | ||
217 | void (*dump_platform_dependencies)(struct device *dev); | ||
218 | |||
219 | /* Defined when SMMU stage-2 is enabled, and we need to use physical | ||
220 | * addresses (not IPA). This is the case for GV100 nvlink in HV+L | ||
221 | * configuration, when dGPU is in pass-through mode. | ||
222 | */ | ||
223 | u64 (*phys_addr)(struct gk20a *g, u64 ipa); | ||
224 | |||
225 | /* Callbacks to assert/deassert GPU reset */ | ||
226 | int (*reset_assert)(struct device *dev); | ||
227 | int (*reset_deassert)(struct device *dev); | ||
228 | struct clk *clk_reset; | ||
229 | struct dvfs_rail *gpu_rail; | ||
230 | |||
231 | bool virtual_dev; | ||
232 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
233 | void *vgpu_priv; | ||
234 | #endif | ||
235 | /* source frequency for ptimer in hz */ | ||
236 | u32 ptimer_src_freq; | ||
237 | |||
238 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
239 | bool has_cde; | ||
240 | #endif | ||
241 | |||
242 | /* soc name for finding firmware files */ | ||
243 | const char *soc_name; | ||
244 | |||
245 | /* false if vidmem aperture actually points to sysmem */ | ||
246 | bool honors_aperture; | ||
247 | /* unified or split memory with separate vidmem? */ | ||
248 | bool unified_memory; | ||
249 | /* WAR for gm20b chips. */ | ||
250 | bool force_128K_pmu_vm; | ||
251 | |||
252 | /* | ||
253 | * DMA mask for Linux (both coh and non-coh). If not set defaults to | ||
254 | * 0x3ffffffff (i.e a 34 bit mask). | ||
255 | */ | ||
256 | u64 dma_mask; | ||
257 | |||
258 | /* minimum supported VBIOS version */ | ||
259 | u32 vbios_min_version; | ||
260 | |||
261 | /* true if we run preos microcode on this board */ | ||
262 | bool run_preos; | ||
263 | |||
264 | /* true if we need to program sw threshold for | ||
265 | * power limits | ||
266 | */ | ||
267 | bool hardcode_sw_threshold; | ||
268 | |||
269 | /* i2c device index, port and address for INA3221 */ | ||
270 | u32 ina3221_dcb_index; | ||
271 | u32 ina3221_i2c_address; | ||
272 | u32 ina3221_i2c_port; | ||
273 | |||
274 | /* stream id to use */ | ||
275 | u32 ltc_streamid; | ||
276 | |||
277 | /* synchronized access to platform->clk_get_freqs */ | ||
278 | struct nvgpu_mutex clk_get_freq_lock; | ||
279 | }; | ||
280 | |||
281 | static inline struct gk20a_platform *gk20a_get_platform( | ||
282 | struct device *dev) | ||
283 | { | ||
284 | return (struct gk20a_platform *)dev_get_drvdata(dev); | ||
285 | } | ||
286 | |||
287 | #ifdef CONFIG_TEGRA_GK20A | ||
288 | extern struct gk20a_platform gm20b_tegra_platform; | ||
289 | extern struct gk20a_platform gp10b_tegra_platform; | ||
290 | extern struct gk20a_platform gv11b_tegra_platform; | ||
291 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
292 | extern struct gk20a_platform vgpu_tegra_platform; | ||
293 | extern struct gk20a_platform gv11b_vgpu_tegra_platform; | ||
294 | #endif | ||
295 | #endif | ||
296 | |||
297 | int gk20a_tegra_busy(struct device *dev); | ||
298 | void gk20a_tegra_idle(struct device *dev); | ||
299 | void gk20a_tegra_debug_dump(struct device *pdev); | ||
300 | |||
301 | static inline struct gk20a *get_gk20a(struct device *dev) | ||
302 | { | ||
303 | return gk20a_get_platform(dev)->g; | ||
304 | } | ||
305 | static inline struct gk20a *gk20a_from_dev(struct device *dev) | ||
306 | { | ||
307 | if (!dev) | ||
308 | return NULL; | ||
309 | |||
310 | return ((struct gk20a_platform *)dev_get_drvdata(dev))->g; | ||
311 | } | ||
312 | static inline bool gk20a_gpu_is_virtual(struct device *dev) | ||
313 | { | ||
314 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
315 | |||
316 | return platform->virtual_dev; | ||
317 | } | ||
318 | |||
319 | static inline int support_gk20a_pmu(struct device *dev) | ||
320 | { | ||
321 | if (IS_ENABLED(CONFIG_GK20A_PMU)) { | ||
322 | /* gPMU is not supported for vgpu */ | ||
323 | return !gk20a_gpu_is_virtual(dev); | ||
324 | } | ||
325 | |||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | #endif | ||
diff --git a/include/os/linux/platform_gk20a_tegra.c b/include/os/linux/platform_gk20a_tegra.c new file mode 100644 index 0000000..c39e4f0 --- /dev/null +++ b/include/os/linux/platform_gk20a_tegra.c | |||
@@ -0,0 +1,966 @@ | |||
1 | /* | ||
2 | * GK20A Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/clkdev.h> | ||
17 | #include <linux/of_platform.h> | ||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/platform_data/tegra_edp.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <uapi/linux/nvgpu.h> | ||
22 | #include <linux/dma-buf.h> | ||
23 | #include <linux/dma-attrs.h> | ||
24 | #include <linux/nvmap.h> | ||
25 | #include <linux/reset.h> | ||
26 | #if defined(CONFIG_TEGRA_DVFS) | ||
27 | #include <linux/tegra_soctherm.h> | ||
28 | #endif | ||
29 | #include <linux/platform/tegra/common.h> | ||
30 | #include <linux/platform/tegra/mc.h> | ||
31 | #include <linux/clk/tegra.h> | ||
32 | #if defined(CONFIG_COMMON_CLK) | ||
33 | #include <soc/tegra/tegra-dvfs.h> | ||
34 | #endif | ||
35 | #ifdef CONFIG_TEGRA_BWMGR | ||
36 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
37 | #endif | ||
38 | |||
39 | #include <linux/platform/tegra/tegra_emc.h> | ||
40 | #include <soc/tegra/chip-id.h> | ||
41 | |||
42 | #include <nvgpu/kmem.h> | ||
43 | #include <nvgpu/bug.h> | ||
44 | #include <nvgpu/enabled.h> | ||
45 | #include <nvgpu/gk20a.h> | ||
46 | #include <nvgpu/nvhost.h> | ||
47 | |||
48 | #include <nvgpu/linux/dma.h> | ||
49 | |||
50 | #include "gm20b/clk_gm20b.h" | ||
51 | |||
52 | #include "scale.h" | ||
53 | #include "platform_gk20a.h" | ||
54 | #include "clk.h" | ||
55 | #include "os_linux.h" | ||
56 | |||
57 | #include "../../../arch/arm/mach-tegra/iomap.h" | ||
58 | #include <soc/tegra/pmc.h> | ||
59 | |||
60 | #define TEGRA_GK20A_BW_PER_FREQ 32 | ||
61 | #define TEGRA_GM20B_BW_PER_FREQ 64 | ||
62 | #define TEGRA_DDR3_BW_PER_FREQ 16 | ||
63 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
64 | #define MC_CLIENT_GPU 34 | ||
65 | #define PMC_GPU_RG_CNTRL_0 0x2d4 | ||
66 | |||
67 | #ifdef CONFIG_COMMON_CLK | ||
68 | #define GPU_RAIL_NAME "vdd-gpu" | ||
69 | #else | ||
70 | #define GPU_RAIL_NAME "vdd_gpu" | ||
71 | #endif | ||
72 | |||
73 | extern struct device tegra_vpr_dev; | ||
74 | |||
75 | #ifdef CONFIG_TEGRA_BWMGR | ||
76 | struct gk20a_emc_params { | ||
77 | unsigned long bw_ratio; | ||
78 | unsigned long freq_last_set; | ||
79 | struct tegra_bwmgr_client *bwmgr_cl; | ||
80 | }; | ||
81 | #else | ||
82 | struct gk20a_emc_params { | ||
83 | unsigned long bw_ratio; | ||
84 | unsigned long freq_last_set; | ||
85 | }; | ||
86 | #endif | ||
87 | |||
88 | #define MHZ_TO_HZ(x) ((x) * 1000000) | ||
89 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
90 | |||
91 | static void gk20a_tegra_secure_page_destroy(struct gk20a *g, | ||
92 | struct secure_page_buffer *secure_buffer) | ||
93 | { | ||
94 | DEFINE_DMA_ATTRS(attrs); | ||
95 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); | ||
96 | dma_free_attrs(&tegra_vpr_dev, secure_buffer->size, | ||
97 | (void *)(uintptr_t)secure_buffer->phys, | ||
98 | secure_buffer->phys, __DMA_ATTR(attrs)); | ||
99 | |||
100 | secure_buffer->destroy = NULL; | ||
101 | } | ||
102 | |||
103 | static int gk20a_tegra_secure_alloc(struct gk20a *g, | ||
104 | struct gr_ctx_buffer_desc *desc, | ||
105 | size_t size) | ||
106 | { | ||
107 | struct device *dev = dev_from_gk20a(g); | ||
108 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
109 | struct secure_page_buffer *secure_buffer = &platform->secure_buffer; | ||
110 | dma_addr_t phys; | ||
111 | struct sg_table *sgt; | ||
112 | struct page *page; | ||
113 | int err = 0; | ||
114 | size_t aligned_size = PAGE_ALIGN(size); | ||
115 | |||
116 | if (nvgpu_mem_is_valid(&desc->mem)) | ||
117 | return 0; | ||
118 | |||
119 | /* We ran out of preallocated memory */ | ||
120 | if (secure_buffer->used + aligned_size > secure_buffer->size) { | ||
121 | nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used", | ||
122 | size, secure_buffer->used, secure_buffer->size); | ||
123 | return -ENOMEM; | ||
124 | } | ||
125 | |||
126 | phys = secure_buffer->phys + secure_buffer->used; | ||
127 | |||
128 | sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt)); | ||
129 | if (!sgt) { | ||
130 | nvgpu_err(platform->g, "failed to allocate memory"); | ||
131 | return -ENOMEM; | ||
132 | } | ||
133 | err = sg_alloc_table(sgt, 1, GFP_KERNEL); | ||
134 | if (err) { | ||
135 | nvgpu_err(platform->g, "failed to allocate sg_table"); | ||
136 | goto fail_sgt; | ||
137 | } | ||
138 | page = phys_to_page(phys); | ||
139 | sg_set_page(sgt->sgl, page, size, 0); | ||
140 | /* This bypasses SMMU for VPR during gmmu_map. */ | ||
141 | sg_dma_address(sgt->sgl) = 0; | ||
142 | |||
143 | desc->destroy = NULL; | ||
144 | |||
145 | desc->mem.priv.sgt = sgt; | ||
146 | desc->mem.size = size; | ||
147 | desc->mem.aperture = APERTURE_SYSMEM; | ||
148 | |||
149 | secure_buffer->used += aligned_size; | ||
150 | |||
151 | return err; | ||
152 | |||
153 | fail_sgt: | ||
154 | nvgpu_kfree(platform->g, sgt); | ||
155 | return err; | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * gk20a_tegra_get_emc_rate() | ||
160 | * | ||
161 | * This function returns the minimum emc clock based on gpu frequency | ||
162 | */ | ||
163 | |||
164 | static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, | ||
165 | struct gk20a_emc_params *emc_params) | ||
166 | { | ||
167 | unsigned long gpu_freq, gpu_fmax_at_vmin; | ||
168 | unsigned long emc_rate, emc_scale; | ||
169 | |||
170 | gpu_freq = clk_get_rate(g->clk.tegra_clk); | ||
171 | gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t( | ||
172 | clk_get_parent(g->clk.tegra_clk)); | ||
173 | |||
174 | /* When scaling emc, account for the gpu load when the | ||
175 | * gpu frequency is less than or equal to fmax@vmin. */ | ||
176 | if (gpu_freq <= gpu_fmax_at_vmin) | ||
177 | emc_scale = min(g->pmu.load_avg, g->emc3d_ratio); | ||
178 | else | ||
179 | emc_scale = g->emc3d_ratio; | ||
180 | |||
181 | emc_rate = | ||
182 | (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000; | ||
183 | |||
184 | return MHZ_TO_HZ(emc_rate); | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * gk20a_tegra_prescale(profile, freq) | ||
189 | * | ||
190 | * This function informs EDP about changed constraints. | ||
191 | */ | ||
192 | |||
193 | static void gk20a_tegra_prescale(struct device *dev) | ||
194 | { | ||
195 | struct gk20a *g = get_gk20a(dev); | ||
196 | u32 avg = 0; | ||
197 | |||
198 | nvgpu_pmu_load_norm(g, &avg); | ||
199 | tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * gk20a_tegra_calibrate_emc() | ||
204 | * | ||
205 | */ | ||
206 | |||
207 | static void gk20a_tegra_calibrate_emc(struct device *dev, | ||
208 | struct gk20a_emc_params *emc_params) | ||
209 | { | ||
210 | enum tegra_chipid cid = tegra_get_chip_id(); | ||
211 | long gpu_bw, emc_bw; | ||
212 | |||
213 | /* store gpu bw based on soc */ | ||
214 | switch (cid) { | ||
215 | case TEGRA210: | ||
216 | gpu_bw = TEGRA_GM20B_BW_PER_FREQ; | ||
217 | break; | ||
218 | case TEGRA124: | ||
219 | case TEGRA132: | ||
220 | gpu_bw = TEGRA_GK20A_BW_PER_FREQ; | ||
221 | break; | ||
222 | default: | ||
223 | gpu_bw = 0; | ||
224 | break; | ||
225 | } | ||
226 | |||
227 | /* TODO detect DDR type. | ||
228 | * Okay for now since DDR3 and DDR4 have the same BW ratio */ | ||
229 | emc_bw = TEGRA_DDR3_BW_PER_FREQ; | ||
230 | |||
231 | /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq | ||
232 | * NOTE the ratio must come out as an integer */ | ||
233 | emc_params->bw_ratio = (gpu_bw / emc_bw); | ||
234 | } | ||
235 | |||
236 | #ifdef CONFIG_TEGRA_BWMGR | ||
237 | #ifdef CONFIG_TEGRA_DVFS | ||
238 | static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb) | ||
239 | { | ||
240 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
241 | struct gk20a_emc_params *params; | ||
242 | unsigned long rate; | ||
243 | |||
244 | if (!profile || !profile->private_data) | ||
245 | return; | ||
246 | |||
247 | params = (struct gk20a_emc_params *)profile->private_data; | ||
248 | rate = (enb) ? params->freq_last_set : 0; | ||
249 | tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
250 | } | ||
251 | #endif | ||
252 | |||
253 | static void gm20b_tegra_postscale(struct device *dev, unsigned long freq) | ||
254 | { | ||
255 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
256 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
257 | struct gk20a_emc_params *emc_params; | ||
258 | unsigned long emc_rate; | ||
259 | |||
260 | if (!profile || !profile->private_data) | ||
261 | return; | ||
262 | |||
263 | emc_params = profile->private_data; | ||
264 | emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params); | ||
265 | |||
266 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
267 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
268 | |||
269 | emc_params->freq_last_set = emc_rate; | ||
270 | if (platform->is_railgated && platform->is_railgated(dev)) | ||
271 | return; | ||
272 | |||
273 | tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate, | ||
274 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
275 | |||
276 | } | ||
277 | |||
278 | #endif | ||
279 | |||
280 | #if defined(CONFIG_TEGRA_DVFS) | ||
281 | /* | ||
282 | * gk20a_tegra_is_railgated() | ||
283 | * | ||
284 | * Check status of gk20a power rail | ||
285 | */ | ||
286 | |||
287 | static bool gk20a_tegra_is_railgated(struct device *dev) | ||
288 | { | ||
289 | struct gk20a *g = get_gk20a(dev); | ||
290 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
291 | bool ret = false; | ||
292 | |||
293 | if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
294 | ret = !tegra_dvfs_is_rail_up(platform->gpu_rail); | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * gm20b_tegra_railgate() | ||
301 | * | ||
302 | * Gate (disable) gm20b power rail | ||
303 | */ | ||
304 | |||
305 | static int gm20b_tegra_railgate(struct device *dev) | ||
306 | { | ||
307 | struct gk20a *g = get_gk20a(dev); | ||
308 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
309 | int ret = 0; | ||
310 | |||
311 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) || | ||
312 | !tegra_dvfs_is_rail_up(platform->gpu_rail)) | ||
313 | return 0; | ||
314 | |||
315 | tegra_mc_flush(MC_CLIENT_GPU); | ||
316 | |||
317 | udelay(10); | ||
318 | |||
319 | /* enable clamp */ | ||
320 | tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0); | ||
321 | tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); | ||
322 | |||
323 | udelay(10); | ||
324 | |||
325 | platform->reset_assert(dev); | ||
326 | |||
327 | udelay(10); | ||
328 | |||
329 | /* | ||
330 | * GPCPLL is already disabled before entering this function; reference | ||
331 | * clocks are enabled until now - disable them just before rail gating | ||
332 | */ | ||
333 | clk_disable_unprepare(platform->clk_reset); | ||
334 | clk_disable_unprepare(platform->clk[0]); | ||
335 | clk_disable_unprepare(platform->clk[1]); | ||
336 | if (platform->clk[3]) | ||
337 | clk_disable_unprepare(platform->clk[3]); | ||
338 | |||
339 | udelay(10); | ||
340 | |||
341 | tegra_soctherm_gpu_tsens_invalidate(1); | ||
342 | |||
343 | if (tegra_dvfs_is_rail_up(platform->gpu_rail)) { | ||
344 | ret = tegra_dvfs_rail_power_down(platform->gpu_rail); | ||
345 | if (ret) | ||
346 | goto err_power_off; | ||
347 | } else | ||
348 | pr_info("No GPU regulator?\n"); | ||
349 | |||
350 | #ifdef CONFIG_TEGRA_BWMGR | ||
351 | gm20b_bwmgr_set_rate(platform, false); | ||
352 | #endif | ||
353 | |||
354 | return 0; | ||
355 | |||
356 | err_power_off: | ||
357 | nvgpu_err(platform->g, "Could not railgate GPU"); | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | |||
362 | /* | ||
363 | * gm20b_tegra_unrailgate() | ||
364 | * | ||
365 | * Ungate (enable) gm20b power rail | ||
366 | */ | ||
367 | |||
368 | static int gm20b_tegra_unrailgate(struct device *dev) | ||
369 | { | ||
370 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
371 | struct gk20a *g = platform->g; | ||
372 | int ret = 0; | ||
373 | bool first = false; | ||
374 | |||
375 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
376 | return 0; | ||
377 | |||
378 | ret = tegra_dvfs_rail_power_up(platform->gpu_rail); | ||
379 | if (ret) | ||
380 | return ret; | ||
381 | |||
382 | #ifdef CONFIG_TEGRA_BWMGR | ||
383 | gm20b_bwmgr_set_rate(platform, true); | ||
384 | #endif | ||
385 | |||
386 | tegra_soctherm_gpu_tsens_invalidate(0); | ||
387 | |||
388 | if (!platform->clk_reset) { | ||
389 | platform->clk_reset = clk_get(dev, "gpu_gate"); | ||
390 | if (IS_ERR(platform->clk_reset)) { | ||
391 | nvgpu_err(g, "fail to get gpu reset clk"); | ||
392 | goto err_clk_on; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | if (!first) { | ||
397 | ret = clk_prepare_enable(platform->clk_reset); | ||
398 | if (ret) { | ||
399 | nvgpu_err(g, "could not turn on gpu_gate"); | ||
400 | goto err_clk_on; | ||
401 | } | ||
402 | |||
403 | ret = clk_prepare_enable(platform->clk[0]); | ||
404 | if (ret) { | ||
405 | nvgpu_err(g, "could not turn on gpu pll"); | ||
406 | goto err_clk_on; | ||
407 | } | ||
408 | ret = clk_prepare_enable(platform->clk[1]); | ||
409 | if (ret) { | ||
410 | nvgpu_err(g, "could not turn on pwr clock"); | ||
411 | goto err_clk_on; | ||
412 | } | ||
413 | |||
414 | if (platform->clk[3]) { | ||
415 | ret = clk_prepare_enable(platform->clk[3]); | ||
416 | if (ret) { | ||
417 | nvgpu_err(g, "could not turn on fuse clock"); | ||
418 | goto err_clk_on; | ||
419 | } | ||
420 | } | ||
421 | } | ||
422 | |||
423 | udelay(10); | ||
424 | |||
425 | platform->reset_assert(dev); | ||
426 | |||
427 | udelay(10); | ||
428 | |||
429 | tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0); | ||
430 | tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); | ||
431 | |||
432 | udelay(10); | ||
433 | |||
434 | clk_disable(platform->clk_reset); | ||
435 | platform->reset_deassert(dev); | ||
436 | clk_enable(platform->clk_reset); | ||
437 | |||
438 | /* Flush MC after boot/railgate/SC7 */ | ||
439 | tegra_mc_flush(MC_CLIENT_GPU); | ||
440 | |||
441 | udelay(10); | ||
442 | |||
443 | tegra_mc_flush_done(MC_CLIENT_GPU); | ||
444 | |||
445 | udelay(10); | ||
446 | |||
447 | return 0; | ||
448 | |||
449 | err_clk_on: | ||
450 | tegra_dvfs_rail_power_down(platform->gpu_rail); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | #endif | ||
455 | |||
456 | |||
457 | static struct { | ||
458 | char *name; | ||
459 | unsigned long default_rate; | ||
460 | } tegra_gk20a_clocks[] = { | ||
461 | {"gpu_ref", UINT_MAX}, | ||
462 | {"pll_p_out5", 204000000}, | ||
463 | {"emc", UINT_MAX}, | ||
464 | {"fuse", UINT_MAX}, | ||
465 | }; | ||
466 | |||
467 | |||
468 | |||
469 | /* | ||
470 | * gk20a_tegra_get_clocks() | ||
471 | * | ||
472 | * This function finds clocks in tegra platform and populates | ||
473 | * the clock information to gk20a platform data. | ||
474 | */ | ||
475 | |||
476 | static int gk20a_tegra_get_clocks(struct device *dev) | ||
477 | { | ||
478 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
479 | char devname[16]; | ||
480 | unsigned int i; | ||
481 | int ret = 0; | ||
482 | |||
483 | BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks)); | ||
484 | |||
485 | snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev)); | ||
486 | |||
487 | platform->num_clks = 0; | ||
488 | for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { | ||
489 | long rate = tegra_gk20a_clocks[i].default_rate; | ||
490 | struct clk *c; | ||
491 | |||
492 | c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); | ||
493 | if (IS_ERR(c)) { | ||
494 | ret = PTR_ERR(c); | ||
495 | goto err_get_clock; | ||
496 | } | ||
497 | rate = clk_round_rate(c, rate); | ||
498 | clk_set_rate(c, rate); | ||
499 | platform->clk[i] = c; | ||
500 | } | ||
501 | platform->num_clks = i; | ||
502 | |||
503 | return 0; | ||
504 | |||
505 | err_get_clock: | ||
506 | |||
507 | while (i--) | ||
508 | clk_put(platform->clk[i]); | ||
509 | return ret; | ||
510 | } | ||
511 | |||
512 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | ||
513 | static int gm20b_tegra_reset_assert(struct device *dev) | ||
514 | { | ||
515 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
516 | |||
517 | if (!platform->reset_control) { | ||
518 | WARN(1, "Reset control not initialized\n"); | ||
519 | return -ENOSYS; | ||
520 | } | ||
521 | |||
522 | return reset_control_assert(platform->reset_control); | ||
523 | } | ||
524 | |||
525 | static int gm20b_tegra_reset_deassert(struct device *dev) | ||
526 | { | ||
527 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
528 | |||
529 | if (!platform->reset_control) { | ||
530 | WARN(1, "Reset control not initialized\n"); | ||
531 | return -ENOSYS; | ||
532 | } | ||
533 | |||
534 | return reset_control_deassert(platform->reset_control); | ||
535 | } | ||
536 | #endif | ||
537 | |||
538 | static void gk20a_tegra_scale_init(struct device *dev) | ||
539 | { | ||
540 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
541 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
542 | struct gk20a_emc_params *emc_params; | ||
543 | struct gk20a *g = platform->g; | ||
544 | |||
545 | if (!profile) | ||
546 | return; | ||
547 | |||
548 | if (profile->private_data) | ||
549 | return; | ||
550 | |||
551 | emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params)); | ||
552 | if (!emc_params) | ||
553 | return; | ||
554 | |||
555 | emc_params->freq_last_set = -1; | ||
556 | gk20a_tegra_calibrate_emc(dev, emc_params); | ||
557 | |||
558 | #ifdef CONFIG_TEGRA_BWMGR | ||
559 | emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
560 | if (!emc_params->bwmgr_cl) { | ||
561 | nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__); | ||
562 | return; | ||
563 | } | ||
564 | #endif | ||
565 | |||
566 | profile->private_data = emc_params; | ||
567 | } | ||
568 | |||
569 | static void gk20a_tegra_scale_exit(struct device *dev) | ||
570 | { | ||
571 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
572 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
573 | struct gk20a_emc_params *emc_params; | ||
574 | |||
575 | if (!profile) | ||
576 | return; | ||
577 | |||
578 | emc_params = profile->private_data; | ||
579 | #ifdef CONFIG_TEGRA_BWMGR | ||
580 | tegra_bwmgr_unregister(emc_params->bwmgr_cl); | ||
581 | #endif | ||
582 | |||
583 | nvgpu_kfree(platform->g, profile->private_data); | ||
584 | } | ||
585 | |||
586 | void gk20a_tegra_debug_dump(struct device *dev) | ||
587 | { | ||
588 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
589 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
590 | struct gk20a *g = platform->g; | ||
591 | |||
592 | if (g->nvhost_dev) | ||
593 | nvgpu_nvhost_debug_dump_device(g->nvhost_dev); | ||
594 | #endif | ||
595 | } | ||
596 | |||
597 | int gk20a_tegra_busy(struct device *dev) | ||
598 | { | ||
599 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
600 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
601 | struct gk20a *g = platform->g; | ||
602 | |||
603 | if (g->nvhost_dev) | ||
604 | return nvgpu_nvhost_module_busy_ext(g->nvhost_dev); | ||
605 | #endif | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | void gk20a_tegra_idle(struct device *dev) | ||
610 | { | ||
611 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
612 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
613 | struct gk20a *g = platform->g; | ||
614 | |||
615 | if (g->nvhost_dev) | ||
616 | nvgpu_nvhost_module_idle_ext(g->nvhost_dev); | ||
617 | #endif | ||
618 | } | ||
619 | |||
620 | int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform) | ||
621 | { | ||
622 | struct gk20a *g = platform->g; | ||
623 | struct secure_page_buffer *secure_buffer = &platform->secure_buffer; | ||
624 | DEFINE_DMA_ATTRS(attrs); | ||
625 | dma_addr_t iova; | ||
626 | |||
627 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
628 | return 0; | ||
629 | |||
630 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); | ||
631 | (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova, | ||
632 | GFP_KERNEL, __DMA_ATTR(attrs)); | ||
633 | /* Some platforms disable VPR. In that case VPR allocations always | ||
634 | * fail. Just disable VPR usage in nvgpu in that case. */ | ||
635 | if (dma_mapping_error(&tegra_vpr_dev, iova)) | ||
636 | return 0; | ||
637 | |||
638 | secure_buffer->size = platform->secure_buffer_size; | ||
639 | secure_buffer->phys = iova; | ||
640 | secure_buffer->destroy = gk20a_tegra_secure_page_destroy; | ||
641 | |||
642 | g->ops.secure_alloc = gk20a_tegra_secure_alloc; | ||
643 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true); | ||
644 | |||
645 | return 0; | ||
646 | } | ||
647 | |||
648 | #ifdef CONFIG_COMMON_CLK | ||
649 | static struct clk *gk20a_clk_get(struct gk20a *g) | ||
650 | { | ||
651 | if (!g->clk.tegra_clk) { | ||
652 | struct clk *clk, *clk_parent; | ||
653 | char clk_dev_id[32]; | ||
654 | struct device *dev = dev_from_gk20a(g); | ||
655 | |||
656 | snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); | ||
657 | |||
658 | clk = clk_get_sys(clk_dev_id, "gpu"); | ||
659 | if (IS_ERR(clk)) { | ||
660 | nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n", | ||
661 | clk_dev_id); | ||
662 | return NULL; | ||
663 | } | ||
664 | |||
665 | clk_parent = clk_get_parent(clk); | ||
666 | if (IS_ERR_OR_NULL(clk_parent)) { | ||
667 | nvgpu_err(g, "fail to get tegra gpu clk parent%s/gpu\n", | ||
668 | clk_dev_id); | ||
669 | return NULL; | ||
670 | } | ||
671 | |||
672 | g->clk.tegra_clk = clk; | ||
673 | g->clk.tegra_clk_parent = clk_parent; | ||
674 | } | ||
675 | |||
676 | return g->clk.tegra_clk; | ||
677 | } | ||
678 | |||
679 | static int gm20b_clk_prepare_ops(struct clk_hw *hw) | ||
680 | { | ||
681 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
682 | return gm20b_clk_prepare(clk); | ||
683 | } | ||
684 | |||
685 | static void gm20b_clk_unprepare_ops(struct clk_hw *hw) | ||
686 | { | ||
687 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
688 | gm20b_clk_unprepare(clk); | ||
689 | } | ||
690 | |||
691 | static int gm20b_clk_is_prepared_ops(struct clk_hw *hw) | ||
692 | { | ||
693 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
694 | return gm20b_clk_is_prepared(clk); | ||
695 | } | ||
696 | |||
697 | static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate) | ||
698 | { | ||
699 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
700 | return gm20b_recalc_rate(clk, parent_rate); | ||
701 | } | ||
702 | |||
703 | static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate, | ||
704 | unsigned long parent_rate) | ||
705 | { | ||
706 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
707 | return gm20b_gpcclk_set_rate(clk, rate, parent_rate); | ||
708 | } | ||
709 | |||
710 | static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate, | ||
711 | unsigned long *parent_rate) | ||
712 | { | ||
713 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
714 | return gm20b_round_rate(clk, rate, parent_rate); | ||
715 | } | ||
716 | |||
717 | static const struct clk_ops gm20b_clk_ops = { | ||
718 | .prepare = gm20b_clk_prepare_ops, | ||
719 | .unprepare = gm20b_clk_unprepare_ops, | ||
720 | .is_prepared = gm20b_clk_is_prepared_ops, | ||
721 | .recalc_rate = gm20b_recalc_rate_ops, | ||
722 | .set_rate = gm20b_gpcclk_set_rate_ops, | ||
723 | .round_rate = gm20b_round_rate_ops, | ||
724 | }; | ||
725 | |||
726 | static int gm20b_register_gpcclk(struct gk20a *g) | ||
727 | { | ||
728 | const char *parent_name = "pllg_ref"; | ||
729 | struct clk_gk20a *clk = &g->clk; | ||
730 | struct clk_init_data init; | ||
731 | struct clk *c; | ||
732 | int err = 0; | ||
733 | |||
734 | /* make sure the clock is available */ | ||
735 | if (!gk20a_clk_get(g)) | ||
736 | return -ENOSYS; | ||
737 | |||
738 | err = gm20b_init_clk_setup_sw(g); | ||
739 | if (err) | ||
740 | return err; | ||
741 | |||
742 | init.name = "gpcclk"; | ||
743 | init.ops = &gm20b_clk_ops; | ||
744 | init.parent_names = &parent_name; | ||
745 | init.num_parents = 1; | ||
746 | init.flags = 0; | ||
747 | |||
748 | /* Data in .init is copied by clk_register(), so stack variable OK */ | ||
749 | clk->hw.init = &init; | ||
750 | c = clk_register(dev_from_gk20a(g), &clk->hw); | ||
751 | if (IS_ERR(c)) { | ||
752 | nvgpu_err(g, "Failed to register GPCPLL clock"); | ||
753 | return -EINVAL; | ||
754 | } | ||
755 | |||
756 | clk->g = g; | ||
757 | clk_register_clkdev(c, "gpcclk", "gpcclk"); | ||
758 | |||
759 | return err; | ||
760 | } | ||
761 | #endif /* CONFIG_COMMON_CLK */ | ||
762 | |||
763 | static int gk20a_tegra_probe(struct device *dev) | ||
764 | { | ||
765 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
766 | struct device_node *np = dev->of_node; | ||
767 | bool joint_xpu_rail = false; | ||
768 | int ret; | ||
769 | struct gk20a *g = platform->g; | ||
770 | |||
771 | #ifdef CONFIG_COMMON_CLK | ||
772 | /* DVFS is not guaranteed to be initialized at the time of probe on | ||
773 | * kernels with Common Clock Framework enabled. | ||
774 | */ | ||
775 | if (!platform->gpu_rail) { | ||
776 | platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME); | ||
777 | if (!platform->gpu_rail) { | ||
778 | nvgpu_log_info(g, "deferring probe no gpu_rail"); | ||
779 | return -EPROBE_DEFER; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) { | ||
784 | nvgpu_log_info(g, "deferring probe gpu_rail not ready"); | ||
785 | return -EPROBE_DEFER; | ||
786 | } | ||
787 | #endif | ||
788 | |||
789 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
790 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
791 | if (ret) | ||
792 | return ret; | ||
793 | #endif | ||
794 | |||
795 | #ifdef CONFIG_OF | ||
796 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
797 | "nvidia,tegra-joint_xpu_rail"); | ||
798 | #endif | ||
799 | |||
800 | if (joint_xpu_rail) { | ||
801 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
802 | platform->can_railgate_init = false; | ||
803 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
804 | } | ||
805 | |||
806 | platform->g->clk.gpc_pll.id = GK20A_GPC_PLL; | ||
807 | if (tegra_get_chip_id() == TEGRA210) { | ||
808 | /* WAR for bug 1547668: Disable railgating and scaling | ||
809 | irrespective of platform data if the rework was not made. */ | ||
810 | np = of_find_node_by_path("/gpu-dvfs-rework"); | ||
811 | if (!(np && of_device_is_available(np))) { | ||
812 | platform->devfreq_governor = ""; | ||
813 | dev_warn(dev, "board does not support scaling"); | ||
814 | } | ||
815 | platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1; | ||
816 | if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p) | ||
817 | platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1; | ||
818 | } | ||
819 | |||
820 | if (tegra_get_chip_id() == TEGRA132) | ||
821 | platform->soc_name = "tegra13x"; | ||
822 | |||
823 | gk20a_tegra_get_clocks(dev); | ||
824 | nvgpu_linux_init_clk_support(platform->g); | ||
825 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
826 | if (ret) | ||
827 | return ret; | ||
828 | |||
829 | if (platform->clk_register) { | ||
830 | ret = platform->clk_register(platform->g); | ||
831 | if (ret) | ||
832 | return ret; | ||
833 | } | ||
834 | |||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | static int gk20a_tegra_late_probe(struct device *dev) | ||
839 | { | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static int gk20a_tegra_remove(struct device *dev) | ||
844 | { | ||
845 | /* deinitialise tegra specific scaling quirks */ | ||
846 | gk20a_tegra_scale_exit(dev); | ||
847 | |||
848 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
849 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
850 | #endif | ||
851 | |||
852 | return 0; | ||
853 | } | ||
854 | |||
855 | static int gk20a_tegra_suspend(struct device *dev) | ||
856 | { | ||
857 | tegra_edp_notify_gpu_load(0, 0); | ||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | #if defined(CONFIG_COMMON_CLK) | ||
862 | static long gk20a_round_clk_rate(struct device *dev, unsigned long rate) | ||
863 | { | ||
864 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
865 | struct gk20a *g = platform->g; | ||
866 | |||
867 | /* make sure the clock is available */ | ||
868 | if (!gk20a_clk_get(g)) | ||
869 | return rate; | ||
870 | |||
871 | return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); | ||
872 | } | ||
873 | |||
874 | static int gk20a_clk_get_freqs(struct device *dev, | ||
875 | unsigned long **freqs, int *num_freqs) | ||
876 | { | ||
877 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
878 | struct gk20a *g = platform->g; | ||
879 | |||
880 | /* make sure the clock is available */ | ||
881 | if (!gk20a_clk_get(g)) | ||
882 | return -ENOSYS; | ||
883 | |||
884 | return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), | ||
885 | freqs, num_freqs); | ||
886 | } | ||
887 | #endif | ||
888 | |||
889 | struct gk20a_platform gm20b_tegra_platform = { | ||
890 | .has_syncpoints = true, | ||
891 | .aggressive_sync_destroy_thresh = 64, | ||
892 | |||
893 | /* power management configuration */ | ||
894 | .railgate_delay_init = 500, | ||
895 | .can_railgate_init = true, | ||
896 | .can_elpg_init = true, | ||
897 | .enable_slcg = true, | ||
898 | .enable_blcg = true, | ||
899 | .enable_elcg = true, | ||
900 | .can_slcg = true, | ||
901 | .can_blcg = true, | ||
902 | .can_elcg = true, | ||
903 | .enable_elpg = true, | ||
904 | .enable_aelpg = true, | ||
905 | .enable_perfmon = true, | ||
906 | .ptimer_src_freq = 19200000, | ||
907 | |||
908 | .force_reset_in_do_idle = false, | ||
909 | |||
910 | .ch_wdt_timeout_ms = 5000, | ||
911 | |||
912 | .probe = gk20a_tegra_probe, | ||
913 | .late_probe = gk20a_tegra_late_probe, | ||
914 | .remove = gk20a_tegra_remove, | ||
915 | /* power management callbacks */ | ||
916 | .suspend = gk20a_tegra_suspend, | ||
917 | |||
918 | #if defined(CONFIG_TEGRA_DVFS) | ||
919 | .railgate = gm20b_tegra_railgate, | ||
920 | .unrailgate = gm20b_tegra_unrailgate, | ||
921 | .is_railgated = gk20a_tegra_is_railgated, | ||
922 | #endif | ||
923 | |||
924 | .busy = gk20a_tegra_busy, | ||
925 | .idle = gk20a_tegra_idle, | ||
926 | |||
927 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | ||
928 | .reset_assert = gm20b_tegra_reset_assert, | ||
929 | .reset_deassert = gm20b_tegra_reset_deassert, | ||
930 | #else | ||
931 | .reset_assert = gk20a_tegra_reset_assert, | ||
932 | .reset_deassert = gk20a_tegra_reset_deassert, | ||
933 | #endif | ||
934 | |||
935 | #if defined(CONFIG_COMMON_CLK) | ||
936 | .clk_round_rate = gk20a_round_clk_rate, | ||
937 | .get_clk_freqs = gk20a_clk_get_freqs, | ||
938 | #endif | ||
939 | |||
940 | #ifdef CONFIG_COMMON_CLK | ||
941 | .clk_register = gm20b_register_gpcclk, | ||
942 | #endif | ||
943 | |||
944 | /* frequency scaling configuration */ | ||
945 | .initscale = gk20a_tegra_scale_init, | ||
946 | .prescale = gk20a_tegra_prescale, | ||
947 | #ifdef CONFIG_TEGRA_BWMGR | ||
948 | .postscale = gm20b_tegra_postscale, | ||
949 | #endif | ||
950 | .devfreq_governor = "nvhost_podgov", | ||
951 | .qos_notify = gk20a_scale_qos_notify, | ||
952 | |||
953 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
954 | |||
955 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
956 | .has_cde = true, | ||
957 | #endif | ||
958 | |||
959 | .soc_name = "tegra21x", | ||
960 | |||
961 | .unified_memory = true, | ||
962 | .dma_mask = DMA_BIT_MASK(34), | ||
963 | .force_128K_pmu_vm = true, | ||
964 | |||
965 | .secure_buffer_size = 335872, | ||
966 | }; | ||
diff --git a/include/os/linux/platform_gk20a_tegra.h b/include/os/linux/platform_gk20a_tegra.h new file mode 100644 index 0000000..f7d5040 --- /dev/null +++ b/include/os/linux/platform_gk20a_tegra.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * GK20A Platform (SoC) Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_ | ||
17 | #define _NVGPU_PLATFORM_GK20A_TEGRA_H_ | ||
18 | |||
19 | struct gk20a_platform; | ||
20 | |||
21 | int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform); | ||
22 | |||
23 | #endif | ||
diff --git a/include/os/linux/platform_gp10b.h b/include/os/linux/platform_gp10b.h new file mode 100644 index 0000000..d256d12 --- /dev/null +++ b/include/os/linux/platform_gp10b.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * GP10B Platform (SoC) Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _GP10B_PLATFORM_H_ | ||
26 | #define _GP10B_PLATFORM_H_ | ||
27 | |||
28 | struct device; | ||
29 | |||
30 | int gp10b_tegra_get_clocks(struct device *dev); | ||
31 | int gp10b_tegra_reset_assert(struct device *dev); | ||
32 | int gp10b_tegra_reset_deassert(struct device *dev); | ||
33 | void gp10b_tegra_scale_init(struct device *dev); | ||
34 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate); | ||
35 | int gp10b_clk_get_freqs(struct device *dev, | ||
36 | unsigned long **freqs, int *num_freqs); | ||
37 | void gp10b_tegra_prescale(struct device *dev); | ||
38 | void gp10b_tegra_postscale(struct device *pdev, unsigned long freq); | ||
39 | #endif | ||
diff --git a/include/os/linux/platform_gp10b_tegra.c b/include/os/linux/platform_gp10b_tegra.c new file mode 100644 index 0000000..9bf8d63 --- /dev/null +++ b/include/os/linux/platform_gp10b_tegra.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * GP10B Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/of_platform.h> | ||
17 | #include <linux/debugfs.h> | ||
18 | #include <linux/dma-buf.h> | ||
19 | #include <linux/nvmap.h> | ||
20 | #include <linux/reset.h> | ||
21 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
22 | |||
23 | #include <uapi/linux/nvgpu.h> | ||
24 | |||
25 | #include <soc/tegra/tegra_bpmp.h> | ||
26 | #include <soc/tegra/tegra_powergate.h> | ||
27 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
28 | |||
29 | #include <dt-bindings/memory/tegra-swgroup.h> | ||
30 | |||
31 | #include <nvgpu/kmem.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/enabled.h> | ||
34 | #include <nvgpu/hashtable.h> | ||
35 | #include <nvgpu/gk20a.h> | ||
36 | #include <nvgpu/nvhost.h> | ||
37 | |||
38 | #include "os_linux.h" | ||
39 | |||
40 | #include "clk.h" | ||
41 | |||
42 | #include "platform_gk20a.h" | ||
43 | #include "platform_gk20a_tegra.h" | ||
44 | #include "platform_gp10b.h" | ||
45 | #include "platform_gp10b_tegra.h" | ||
46 | #include "scale.h" | ||
47 | |||
48 | /* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ | ||
49 | #define GP10B_FREQ_SELECT_STEP 8 | ||
50 | /* Allow limited set of frequencies to be available */ | ||
51 | #define GP10B_NUM_SUPPORTED_FREQS 15 | ||
52 | /* Max number of freq supported in h/w */ | ||
53 | #define GP10B_MAX_SUPPORTED_FREQS 120 | ||
54 | static unsigned long | ||
55 | gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; | ||
56 | |||
57 | static bool freq_table_init_complete; | ||
58 | static int num_supported_freq; | ||
59 | |||
60 | #define TEGRA_GP10B_BW_PER_FREQ 64 | ||
61 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
62 | |||
63 | #define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) | ||
64 | |||
65 | #define GPCCLK_INIT_RATE 1000000000 | ||
66 | |||
67 | static struct { | ||
68 | char *name; | ||
69 | unsigned long default_rate; | ||
70 | } tegra_gp10b_clocks[] = { | ||
71 | {"gpu", GPCCLK_INIT_RATE}, | ||
72 | {"gpu_sys", 204000000} }; | ||
73 | |||
74 | /* | ||
75 | * gp10b_tegra_get_clocks() | ||
76 | * | ||
77 | * This function finds clocks in tegra platform and populates | ||
78 | * the clock information to gp10b platform data. | ||
79 | */ | ||
80 | |||
81 | int gp10b_tegra_get_clocks(struct device *dev) | ||
82 | { | ||
83 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
84 | unsigned int i; | ||
85 | |||
86 | platform->num_clks = 0; | ||
87 | for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { | ||
88 | long rate = tegra_gp10b_clocks[i].default_rate; | ||
89 | struct clk *c; | ||
90 | |||
91 | c = clk_get(dev, tegra_gp10b_clocks[i].name); | ||
92 | if (IS_ERR(c)) { | ||
93 | nvgpu_err(platform->g, "cannot get clock %s", | ||
94 | tegra_gp10b_clocks[i].name); | ||
95 | } else { | ||
96 | clk_set_rate(c, rate); | ||
97 | platform->clk[i] = c; | ||
98 | } | ||
99 | } | ||
100 | platform->num_clks = i; | ||
101 | |||
102 | if (platform->clk[0]) { | ||
103 | i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, | ||
104 | tegra_gp10b_clocks[0].name); | ||
105 | if (i > 0) | ||
106 | platform->maxmin_clk_id = i; | ||
107 | } | ||
108 | |||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | void gp10b_tegra_scale_init(struct device *dev) | ||
113 | { | ||
114 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
115 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
116 | struct tegra_bwmgr_client *bwmgr_handle; | ||
117 | |||
118 | if (!profile) | ||
119 | return; | ||
120 | |||
121 | if ((struct tegra_bwmgr_client *)profile->private_data) | ||
122 | return; | ||
123 | |||
124 | bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
125 | if (!bwmgr_handle) | ||
126 | return; | ||
127 | |||
128 | profile->private_data = (void *)bwmgr_handle; | ||
129 | } | ||
130 | |||
131 | static void gp10b_tegra_scale_exit(struct device *dev) | ||
132 | { | ||
133 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
134 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
135 | |||
136 | if (profile && profile->private_data) | ||
137 | tegra_bwmgr_unregister( | ||
138 | (struct tegra_bwmgr_client *)profile->private_data); | ||
139 | } | ||
140 | |||
141 | static int gp10b_tegra_probe(struct device *dev) | ||
142 | { | ||
143 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
144 | bool joint_xpu_rail = false; | ||
145 | struct gk20a *g = platform->g; | ||
146 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
147 | int ret; | ||
148 | |||
149 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
150 | if (ret) | ||
151 | return ret; | ||
152 | #endif | ||
153 | |||
154 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
155 | if (ret) | ||
156 | return ret; | ||
157 | |||
158 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
159 | |||
160 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
161 | = false; | ||
162 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
163 | = false; | ||
164 | |||
165 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
166 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
167 | |||
168 | #ifdef CONFIG_OF | ||
169 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
170 | "nvidia,tegra-joint_xpu_rail"); | ||
171 | #endif | ||
172 | |||
173 | if (joint_xpu_rail) { | ||
174 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
175 | platform->can_railgate_init = false; | ||
176 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
177 | } | ||
178 | |||
179 | gp10b_tegra_get_clocks(dev); | ||
180 | nvgpu_linux_init_clk_support(platform->g); | ||
181 | |||
182 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
183 | |||
184 | platform->g->ops.clk.support_clk_freq_controller = true; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int gp10b_tegra_late_probe(struct device *dev) | ||
190 | { | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static int gp10b_tegra_remove(struct device *dev) | ||
195 | { | ||
196 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
197 | |||
198 | /* deinitialise tegra specific scaling quirks */ | ||
199 | gp10b_tegra_scale_exit(dev); | ||
200 | |||
201 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
202 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
203 | #endif | ||
204 | |||
205 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static bool gp10b_tegra_is_railgated(struct device *dev) | ||
211 | { | ||
212 | bool ret = false; | ||
213 | |||
214 | if (tegra_bpmp_running()) | ||
215 | ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); | ||
216 | |||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | static int gp10b_tegra_railgate(struct device *dev) | ||
221 | { | ||
222 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
223 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
224 | |||
225 | /* remove emc frequency floor */ | ||
226 | if (profile) | ||
227 | tegra_bwmgr_set_emc( | ||
228 | (struct tegra_bwmgr_client *)profile->private_data, | ||
229 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
230 | |||
231 | if (tegra_bpmp_running() && | ||
232 | tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { | ||
233 | int i; | ||
234 | for (i = 0; i < platform->num_clks; i++) { | ||
235 | if (platform->clk[i]) | ||
236 | clk_disable_unprepare(platform->clk[i]); | ||
237 | } | ||
238 | tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
239 | } | ||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | static int gp10b_tegra_unrailgate(struct device *dev) | ||
244 | { | ||
245 | int ret = 0; | ||
246 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
247 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
248 | |||
249 | if (tegra_bpmp_running()) { | ||
250 | int i; | ||
251 | ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
252 | for (i = 0; i < platform->num_clks; i++) { | ||
253 | if (platform->clk[i]) | ||
254 | clk_prepare_enable(platform->clk[i]); | ||
255 | } | ||
256 | } | ||
257 | |||
258 | /* to start with set emc frequency floor to max rate*/ | ||
259 | if (profile) | ||
260 | tegra_bwmgr_set_emc( | ||
261 | (struct tegra_bwmgr_client *)profile->private_data, | ||
262 | tegra_bwmgr_get_max_emc_rate(), | ||
263 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | static int gp10b_tegra_suspend(struct device *dev) | ||
268 | { | ||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | int gp10b_tegra_reset_assert(struct device *dev) | ||
273 | { | ||
274 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
275 | int ret = 0; | ||
276 | |||
277 | if (!platform->reset_control) | ||
278 | return -EINVAL; | ||
279 | |||
280 | ret = reset_control_assert(platform->reset_control); | ||
281 | |||
282 | return ret; | ||
283 | } | ||
284 | |||
285 | int gp10b_tegra_reset_deassert(struct device *dev) | ||
286 | { | ||
287 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
288 | int ret = 0; | ||
289 | |||
290 | if (!platform->reset_control) | ||
291 | return -EINVAL; | ||
292 | |||
293 | ret = reset_control_deassert(platform->reset_control); | ||
294 | |||
295 | return ret; | ||
296 | } | ||
297 | |||
298 | void gp10b_tegra_prescale(struct device *dev) | ||
299 | { | ||
300 | struct gk20a *g = get_gk20a(dev); | ||
301 | u32 avg = 0; | ||
302 | |||
303 | nvgpu_log_fn(g, " "); | ||
304 | |||
305 | nvgpu_pmu_load_norm(g, &avg); | ||
306 | |||
307 | nvgpu_log_fn(g, "done"); | ||
308 | } | ||
309 | |||
310 | void gp10b_tegra_postscale(struct device *pdev, | ||
311 | unsigned long freq) | ||
312 | { | ||
313 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
314 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
315 | struct gk20a *g = get_gk20a(pdev); | ||
316 | unsigned long emc_rate; | ||
317 | |||
318 | nvgpu_log_fn(g, " "); | ||
319 | if (profile && profile->private_data && | ||
320 | !platform->is_railgated(pdev)) { | ||
321 | unsigned long emc_scale; | ||
322 | |||
323 | if (freq <= gp10b_freq_table[0]) | ||
324 | emc_scale = 0; | ||
325 | else | ||
326 | emc_scale = g->emc3d_ratio; | ||
327 | |||
328 | emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; | ||
329 | |||
330 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
331 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
332 | |||
333 | tegra_bwmgr_set_emc( | ||
334 | (struct tegra_bwmgr_client *)profile->private_data, | ||
335 | emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
336 | } | ||
337 | nvgpu_log_fn(g, "done"); | ||
338 | } | ||
339 | |||
340 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate) | ||
341 | { | ||
342 | struct gk20a *g = get_gk20a(dev); | ||
343 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
344 | unsigned long *freq_table = profile->devfreq_profile.freq_table; | ||
345 | int max_states = profile->devfreq_profile.max_state; | ||
346 | int i; | ||
347 | |||
348 | for (i = 0; i < max_states; ++i) | ||
349 | if (freq_table[i] >= rate) | ||
350 | return freq_table[i]; | ||
351 | |||
352 | return freq_table[max_states - 1]; | ||
353 | } | ||
354 | |||
355 | int gp10b_clk_get_freqs(struct device *dev, | ||
356 | unsigned long **freqs, int *num_freqs) | ||
357 | { | ||
358 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
359 | struct gk20a *g = platform->g; | ||
360 | unsigned long max_rate; | ||
361 | unsigned long new_rate = 0, prev_rate = 0; | ||
362 | int i, freq_counter = 0; | ||
363 | int sel_freq_cnt; | ||
364 | unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS]; | ||
365 | |||
366 | nvgpu_mutex_acquire(&platform->clk_get_freq_lock); | ||
367 | |||
368 | if (freq_table_init_complete) { | ||
369 | |||
370 | *freqs = gp10b_freq_table; | ||
371 | *num_freqs = num_supported_freq; | ||
372 | |||
373 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
374 | |||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); | ||
379 | |||
380 | /* | ||
381 | * Walk the h/w frequency table and update the local table | ||
382 | */ | ||
383 | for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
384 | prev_rate = new_rate; | ||
385 | new_rate = clk_round_rate(platform->clk[0], | ||
386 | prev_rate + 1); | ||
387 | loc_freq_table[i] = new_rate; | ||
388 | if (new_rate == max_rate) | ||
389 | break; | ||
390 | } | ||
391 | freq_counter = i + 1; | ||
392 | WARN_ON(freq_counter == GP10B_MAX_SUPPORTED_FREQS); | ||
393 | |||
394 | /* | ||
395 | * If the number of achievable frequencies is less than or | ||
396 | * equal to GP10B_NUM_SUPPORTED_FREQS, select all frequencies | ||
397 | * else, select one out of every 8 frequencies | ||
398 | */ | ||
399 | if (freq_counter <= GP10B_NUM_SUPPORTED_FREQS) { | ||
400 | for (sel_freq_cnt = 0; sel_freq_cnt < freq_counter; ++sel_freq_cnt) | ||
401 | gp10b_freq_table[sel_freq_cnt] = | ||
402 | loc_freq_table[sel_freq_cnt]; | ||
403 | } else { | ||
404 | /* | ||
405 | * Walk the h/w frequency table and only select | ||
406 | * GP10B_FREQ_SELECT_STEP'th frequencies and | ||
407 | * add MAX freq to last | ||
408 | */ | ||
409 | sel_freq_cnt = 0; | ||
410 | for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
411 | new_rate = loc_freq_table[i]; | ||
412 | |||
413 | if (i % GP10B_FREQ_SELECT_STEP == 0 || | ||
414 | new_rate == max_rate) { | ||
415 | gp10b_freq_table[sel_freq_cnt++] = | ||
416 | new_rate; | ||
417 | |||
418 | if (new_rate == max_rate) | ||
419 | break; | ||
420 | } | ||
421 | } | ||
422 | WARN_ON(sel_freq_cnt == GP10B_MAX_SUPPORTED_FREQS); | ||
423 | } | ||
424 | |||
425 | /* Fill freq table */ | ||
426 | *freqs = gp10b_freq_table; | ||
427 | *num_freqs = sel_freq_cnt; | ||
428 | num_supported_freq = sel_freq_cnt; | ||
429 | |||
430 | freq_table_init_complete = true; | ||
431 | |||
432 | nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", | ||
433 | gp10b_freq_table[0], max_rate, *num_freqs); | ||
434 | |||
435 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
436 | |||
437 | return 0; | ||
438 | } | ||
439 | |||
440 | struct gk20a_platform gp10b_tegra_platform = { | ||
441 | .has_syncpoints = true, | ||
442 | |||
443 | /* power management configuration */ | ||
444 | .railgate_delay_init = 500, | ||
445 | |||
446 | /* ldiv slowdown factor */ | ||
447 | .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, | ||
448 | |||
449 | /* power management configuration */ | ||
450 | .can_railgate_init = true, | ||
451 | .enable_elpg = true, | ||
452 | .can_elpg_init = true, | ||
453 | .enable_blcg = true, | ||
454 | .enable_slcg = true, | ||
455 | .enable_elcg = true, | ||
456 | .can_slcg = true, | ||
457 | .can_blcg = true, | ||
458 | .can_elcg = true, | ||
459 | .enable_aelpg = true, | ||
460 | .enable_perfmon = true, | ||
461 | |||
462 | /* ptimer src frequency in hz*/ | ||
463 | .ptimer_src_freq = 31250000, | ||
464 | |||
465 | .ch_wdt_timeout_ms = 5000, | ||
466 | |||
467 | .probe = gp10b_tegra_probe, | ||
468 | .late_probe = gp10b_tegra_late_probe, | ||
469 | .remove = gp10b_tegra_remove, | ||
470 | |||
471 | /* power management callbacks */ | ||
472 | .suspend = gp10b_tegra_suspend, | ||
473 | .railgate = gp10b_tegra_railgate, | ||
474 | .unrailgate = gp10b_tegra_unrailgate, | ||
475 | .is_railgated = gp10b_tegra_is_railgated, | ||
476 | |||
477 | .busy = gk20a_tegra_busy, | ||
478 | .idle = gk20a_tegra_idle, | ||
479 | |||
480 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
481 | |||
482 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
483 | .has_cde = true, | ||
484 | #endif | ||
485 | |||
486 | .clk_round_rate = gp10b_round_clk_rate, | ||
487 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
488 | |||
489 | /* frequency scaling configuration */ | ||
490 | .initscale = gp10b_tegra_scale_init, | ||
491 | .prescale = gp10b_tegra_prescale, | ||
492 | .postscale = gp10b_tegra_postscale, | ||
493 | .devfreq_governor = "nvhost_podgov", | ||
494 | |||
495 | .qos_notify = gk20a_scale_qos_notify, | ||
496 | |||
497 | .reset_assert = gp10b_tegra_reset_assert, | ||
498 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
499 | |||
500 | .force_reset_in_do_idle = false, | ||
501 | |||
502 | .soc_name = "tegra18x", | ||
503 | |||
504 | .unified_memory = true, | ||
505 | .dma_mask = DMA_BIT_MASK(36), | ||
506 | |||
507 | .ltc_streamid = TEGRA_SID_GPUB, | ||
508 | |||
509 | .secure_buffer_size = 401408, | ||
510 | }; | ||
diff --git a/include/os/linux/platform_gp10b_tegra.h b/include/os/linux/platform_gp10b_tegra.h new file mode 100644 index 0000000..85b46b9 --- /dev/null +++ b/include/os/linux/platform_gp10b_tegra.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef _PLATFORM_GP10B_TEGRA_H_ | ||
18 | #define _PLATFORM_GP10B_TEGRA_H_ | ||
19 | |||
20 | #include "gp10b/gr_gp10b.h" | ||
21 | |||
22 | #endif | ||
diff --git a/include/os/linux/platform_gv11b_tegra.c b/include/os/linux/platform_gv11b_tegra.c new file mode 100644 index 0000000..6c9d0f5 --- /dev/null +++ b/include/os/linux/platform_gv11b_tegra.c | |||
@@ -0,0 +1,331 @@ | |||
1 | /* | ||
2 | * GV11B Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/of_platform.h> | ||
20 | #include <linux/debugfs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <linux/nvmap.h> | ||
23 | #include <linux/reset.h> | ||
24 | #include <linux/hashtable.h> | ||
25 | #include <linux/clk.h> | ||
26 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
27 | |||
28 | #include <nvgpu/gk20a.h> | ||
29 | #include <nvgpu/nvhost.h> | ||
30 | |||
31 | #include <uapi/linux/nvgpu.h> | ||
32 | |||
33 | #include <soc/tegra/tegra_bpmp.h> | ||
34 | #include <soc/tegra/tegra_powergate.h> | ||
35 | |||
36 | #include "platform_gk20a.h" | ||
37 | #include "clk.h" | ||
38 | #include "scale.h" | ||
39 | |||
40 | #include "platform_gp10b.h" | ||
41 | #include "platform_gp10b_tegra.h" | ||
42 | |||
43 | #include "os_linux.h" | ||
44 | #include "platform_gk20a_tegra.h" | ||
45 | #include "gv11b/gr_gv11b.h" | ||
46 | |||
47 | #define EMC3D_GV11B_RATIO 500 | ||
48 | |||
49 | void gv11b_tegra_scale_init(struct device *dev) | ||
50 | { | ||
51 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
52 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
53 | |||
54 | if (!profile) | ||
55 | return; | ||
56 | |||
57 | platform->g->emc3d_ratio = EMC3D_GV11B_RATIO; | ||
58 | |||
59 | gp10b_tegra_scale_init(dev); | ||
60 | } | ||
61 | |||
62 | static void gv11b_tegra_scale_exit(struct device *dev) | ||
63 | { | ||
64 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
65 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
66 | |||
67 | if (profile) | ||
68 | tegra_bwmgr_unregister( | ||
69 | (struct tegra_bwmgr_client *)profile->private_data); | ||
70 | } | ||
71 | |||
72 | static int gv11b_tegra_probe(struct device *dev) | ||
73 | { | ||
74 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
75 | int err; | ||
76 | bool joint_xpu_rail = false; | ||
77 | struct gk20a *g = platform->g; | ||
78 | |||
79 | err = nvgpu_nvhost_syncpt_init(platform->g); | ||
80 | if (err) { | ||
81 | if (err != -ENOSYS) | ||
82 | return err; | ||
83 | } | ||
84 | |||
85 | err = gk20a_tegra_init_secure_alloc(platform); | ||
86 | if (err) | ||
87 | return err; | ||
88 | |||
89 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
90 | |||
91 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
92 | = false; | ||
93 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
94 | = false; | ||
95 | |||
96 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
97 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
98 | |||
99 | #ifdef CONFIG_OF | ||
100 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
101 | "nvidia,tegra-joint_xpu_rail"); | ||
102 | #endif | ||
103 | |||
104 | if (joint_xpu_rail) { | ||
105 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
106 | platform->can_railgate_init = false; | ||
107 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
108 | } | ||
109 | |||
110 | gp10b_tegra_get_clocks(dev); | ||
111 | nvgpu_linux_init_clk_support(platform->g); | ||
112 | |||
113 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
114 | |||
115 | platform->g->ops.clk.support_clk_freq_controller = true; | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int gv11b_tegra_late_probe(struct device *dev) | ||
121 | { | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | |||
126 | static int gv11b_tegra_remove(struct device *dev) | ||
127 | { | ||
128 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
129 | |||
130 | gv11b_tegra_scale_exit(dev); | ||
131 | |||
132 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
133 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
134 | #endif | ||
135 | |||
136 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | static bool gv11b_tegra_is_railgated(struct device *dev) | ||
142 | { | ||
143 | bool ret = false; | ||
144 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
145 | struct gk20a *g = get_gk20a(dev); | ||
146 | |||
147 | if (tegra_bpmp_running()) { | ||
148 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
149 | ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU); | ||
150 | |||
151 | nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no"); | ||
152 | } else { | ||
153 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
154 | } | ||
155 | #endif | ||
156 | return ret; | ||
157 | } | ||
158 | |||
159 | static int gv11b_tegra_railgate(struct device *dev) | ||
160 | { | ||
161 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
162 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
163 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
164 | struct gk20a *g = get_gk20a(dev); | ||
165 | int i; | ||
166 | |||
167 | /* remove emc frequency floor */ | ||
168 | if (profile) | ||
169 | tegra_bwmgr_set_emc( | ||
170 | (struct tegra_bwmgr_client *)profile->private_data, | ||
171 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
172 | |||
173 | if (tegra_bpmp_running()) { | ||
174 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
175 | if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) { | ||
176 | nvgpu_log(g, gpu_dbg_info, "powergate is not powered"); | ||
177 | return 0; | ||
178 | } | ||
179 | nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare"); | ||
180 | for (i = 0; i < platform->num_clks; i++) { | ||
181 | if (platform->clk[i]) | ||
182 | clk_disable_unprepare(platform->clk[i]); | ||
183 | } | ||
184 | nvgpu_log(g, gpu_dbg_info, "powergate_partition"); | ||
185 | tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU); | ||
186 | } else { | ||
187 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
188 | } | ||
189 | #endif | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | static int gv11b_tegra_unrailgate(struct device *dev) | ||
194 | { | ||
195 | int ret = 0; | ||
196 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
197 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
198 | struct gk20a *g = get_gk20a(dev); | ||
199 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
200 | int i; | ||
201 | |||
202 | if (tegra_bpmp_running()) { | ||
203 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
204 | ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU); | ||
205 | if (ret) { | ||
206 | nvgpu_log(g, gpu_dbg_info, | ||
207 | "unpowergate partition failed"); | ||
208 | return ret; | ||
209 | } | ||
210 | nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable"); | ||
211 | for (i = 0; i < platform->num_clks; i++) { | ||
212 | if (platform->clk[i]) | ||
213 | clk_prepare_enable(platform->clk[i]); | ||
214 | } | ||
215 | } else { | ||
216 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
217 | } | ||
218 | |||
219 | /* to start with set emc frequency floor to max rate*/ | ||
220 | if (profile) | ||
221 | tegra_bwmgr_set_emc( | ||
222 | (struct tegra_bwmgr_client *)profile->private_data, | ||
223 | tegra_bwmgr_get_max_emc_rate(), | ||
224 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
225 | #endif | ||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | static int gv11b_tegra_suspend(struct device *dev) | ||
230 | { | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static bool is_tpc_mask_valid(struct gk20a_platform *platform, u32 tpc_pg_mask) | ||
235 | { | ||
236 | u32 i; | ||
237 | bool valid = false; | ||
238 | |||
239 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) { | ||
240 | if (tpc_pg_mask == platform->valid_tpc_mask[i]) { | ||
241 | valid = true; | ||
242 | break; | ||
243 | } | ||
244 | } | ||
245 | return valid; | ||
246 | } | ||
247 | |||
248 | static void gv11b_tegra_set_tpc_pg_mask(struct device *dev, u32 tpc_pg_mask) | ||
249 | { | ||
250 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
251 | struct gk20a *g = get_gk20a(dev); | ||
252 | |||
253 | if (is_tpc_mask_valid(platform, tpc_pg_mask)) { | ||
254 | g->tpc_pg_mask = tpc_pg_mask; | ||
255 | } | ||
256 | |||
257 | } | ||
258 | |||
259 | struct gk20a_platform gv11b_tegra_platform = { | ||
260 | .has_syncpoints = true, | ||
261 | |||
262 | /* ptimer src frequency in hz*/ | ||
263 | .ptimer_src_freq = 31250000, | ||
264 | |||
265 | .ch_wdt_timeout_ms = 5000, | ||
266 | |||
267 | .probe = gv11b_tegra_probe, | ||
268 | .late_probe = gv11b_tegra_late_probe, | ||
269 | .remove = gv11b_tegra_remove, | ||
270 | .railgate_delay_init = 500, | ||
271 | .can_railgate_init = true, | ||
272 | |||
273 | .can_tpc_powergate = true, | ||
274 | .valid_tpc_mask[0] = 0x0, | ||
275 | .valid_tpc_mask[1] = 0x1, | ||
276 | .valid_tpc_mask[2] = 0x2, | ||
277 | .valid_tpc_mask[3] = 0x4, | ||
278 | .valid_tpc_mask[4] = 0x8, | ||
279 | .valid_tpc_mask[5] = 0x5, | ||
280 | .valid_tpc_mask[6] = 0x6, | ||
281 | .valid_tpc_mask[7] = 0x9, | ||
282 | .valid_tpc_mask[8] = 0xa, | ||
283 | |||
284 | .set_tpc_pg_mask = gv11b_tegra_set_tpc_pg_mask, | ||
285 | |||
286 | .can_slcg = true, | ||
287 | .can_blcg = true, | ||
288 | .can_elcg = true, | ||
289 | .enable_slcg = true, | ||
290 | .enable_blcg = true, | ||
291 | .enable_elcg = true, | ||
292 | .enable_perfmon = true, | ||
293 | |||
294 | /* power management configuration */ | ||
295 | .enable_elpg = true, | ||
296 | .can_elpg_init = true, | ||
297 | .enable_aelpg = true, | ||
298 | |||
299 | /* power management callbacks */ | ||
300 | .suspend = gv11b_tegra_suspend, | ||
301 | .railgate = gv11b_tegra_railgate, | ||
302 | .unrailgate = gv11b_tegra_unrailgate, | ||
303 | .is_railgated = gv11b_tegra_is_railgated, | ||
304 | |||
305 | .busy = gk20a_tegra_busy, | ||
306 | .idle = gk20a_tegra_idle, | ||
307 | |||
308 | .clk_round_rate = gp10b_round_clk_rate, | ||
309 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
310 | |||
311 | /* frequency scaling configuration */ | ||
312 | .initscale = gv11b_tegra_scale_init, | ||
313 | .prescale = gp10b_tegra_prescale, | ||
314 | .postscale = gp10b_tegra_postscale, | ||
315 | .devfreq_governor = "nvhost_podgov", | ||
316 | |||
317 | .qos_notify = gk20a_scale_qos_notify, | ||
318 | |||
319 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
320 | |||
321 | .soc_name = "tegra19x", | ||
322 | |||
323 | .honors_aperture = true, | ||
324 | .unified_memory = true, | ||
325 | .dma_mask = DMA_BIT_MASK(36), | ||
326 | |||
327 | .reset_assert = gp10b_tegra_reset_assert, | ||
328 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
329 | |||
330 | .secure_buffer_size = 667648, | ||
331 | }; | ||
diff --git a/include/os/linux/rwsem.c b/include/os/linux/rwsem.c new file mode 100644 index 0000000..297ddf1 --- /dev/null +++ b/include/os/linux/rwsem.c | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/rwsem.h> | ||
15 | |||
16 | void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem) | ||
17 | { | ||
18 | init_rwsem(&rwsem->rwsem); | ||
19 | } | ||
20 | |||
21 | void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem) | ||
22 | { | ||
23 | up_read(&rwsem->rwsem); | ||
24 | } | ||
25 | |||
26 | void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem) | ||
27 | { | ||
28 | down_read(&rwsem->rwsem); | ||
29 | } | ||
30 | |||
31 | void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem) | ||
32 | { | ||
33 | up_write(&rwsem->rwsem); | ||
34 | } | ||
35 | |||
36 | void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem) | ||
37 | { | ||
38 | down_write(&rwsem->rwsem); | ||
39 | } | ||
diff --git a/include/os/linux/scale.c b/include/os/linux/scale.c new file mode 100644 index 0000000..388e168 --- /dev/null +++ b/include/os/linux/scale.c | |||
@@ -0,0 +1,435 @@ | |||
1 | /* | ||
2 | * gk20a clock scaling profile | ||
3 | * | ||
4 | * Copyright (c) 2013-2020, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/devfreq.h> | ||
20 | #include <linux/export.h> | ||
21 | #include <soc/tegra/chip-id.h> | ||
22 | #include <linux/pm_qos.h> | ||
23 | |||
24 | #include <governor.h> | ||
25 | |||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/log.h> | ||
28 | #include <nvgpu/gk20a.h> | ||
29 | #include <nvgpu/clk_arb.h> | ||
30 | |||
31 | #include "platform_gk20a.h" | ||
32 | #include "scale.h" | ||
33 | #include "os_linux.h" | ||
34 | |||
35 | /* | ||
36 | * gk20a_scale_qos_notify() | ||
37 | * | ||
38 | * This function is called when the minimum QoS requirement for the device | ||
39 | * has changed. The function calls postscaling callback if it is defined. | ||
40 | */ | ||
41 | |||
42 | #if defined(CONFIG_GK20A_PM_QOS) && defined(CONFIG_COMMON_CLK) | ||
43 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
44 | unsigned long n, void *p) | ||
45 | { | ||
46 | struct gk20a_scale_profile *profile = | ||
47 | container_of(nb, struct gk20a_scale_profile, | ||
48 | qos_notify_block); | ||
49 | struct gk20a *g = get_gk20a(profile->dev); | ||
50 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
51 | struct devfreq *devfreq = l->devfreq; | ||
52 | |||
53 | if (!devfreq) | ||
54 | return NOTIFY_OK; | ||
55 | |||
56 | mutex_lock(&devfreq->lock); | ||
57 | /* check for pm_qos min and max frequency requirement */ | ||
58 | profile->qos_min_freq = | ||
59 | (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; | ||
60 | profile->qos_max_freq = | ||
61 | (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; | ||
62 | |||
63 | if (profile->qos_min_freq > profile->qos_max_freq) { | ||
64 | nvgpu_err(g, | ||
65 | "QoS: setting invalid limit, min_freq=%lu max_freq=%lu", | ||
66 | profile->qos_min_freq, profile->qos_max_freq); | ||
67 | profile->qos_min_freq = profile->qos_max_freq; | ||
68 | } | ||
69 | |||
70 | update_devfreq(devfreq); | ||
71 | mutex_unlock(&devfreq->lock); | ||
72 | |||
73 | return NOTIFY_OK; | ||
74 | } | ||
75 | #elif defined(CONFIG_GK20A_PM_QOS) | ||
76 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
77 | unsigned long n, void *p) | ||
78 | { | ||
79 | struct gk20a_scale_profile *profile = | ||
80 | container_of(nb, struct gk20a_scale_profile, | ||
81 | qos_notify_block); | ||
82 | struct gk20a_platform *platform = dev_get_drvdata(profile->dev); | ||
83 | struct gk20a *g = get_gk20a(profile->dev); | ||
84 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
85 | unsigned long freq; | ||
86 | |||
87 | if (!platform->postscale) | ||
88 | return NOTIFY_OK; | ||
89 | |||
90 | /* get the frequency requirement. if devfreq is enabled, check if it | ||
91 | * has higher demand than qos */ | ||
92 | freq = platform->clk_round_rate(profile->dev, | ||
93 | (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS)); | ||
94 | if (l->devfreq) | ||
95 | freq = max(l->devfreq->previous_freq, freq); | ||
96 | |||
97 | /* Update gpu load because we may scale the emc target | ||
98 | * if the gpu load changed. */ | ||
99 | nvgpu_pmu_load_update(g); | ||
100 | platform->postscale(profile->dev, freq); | ||
101 | |||
102 | return NOTIFY_OK; | ||
103 | } | ||
104 | #else | ||
105 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
106 | unsigned long n, void *p) | ||
107 | { | ||
108 | return 0; | ||
109 | } | ||
110 | #endif | ||
111 | |||
112 | /* | ||
113 | * gk20a_scale_make_freq_table(profile) | ||
114 | * | ||
115 | * This function initialises the frequency table for the given device profile | ||
116 | */ | ||
117 | |||
118 | static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) | ||
119 | { | ||
120 | struct gk20a_platform *platform = dev_get_drvdata(profile->dev); | ||
121 | int num_freqs, err; | ||
122 | unsigned long *freqs; | ||
123 | |||
124 | if (platform->get_clk_freqs) { | ||
125 | /* get gpu frequency table */ | ||
126 | err = platform->get_clk_freqs(profile->dev, &freqs, | ||
127 | &num_freqs); | ||
128 | |||
129 | if (err) | ||
130 | return -ENOSYS; | ||
131 | } else | ||
132 | return -ENOSYS; | ||
133 | |||
134 | profile->devfreq_profile.freq_table = (unsigned long *)freqs; | ||
135 | profile->devfreq_profile.max_state = num_freqs; | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * gk20a_scale_target(dev, *freq, flags) | ||
142 | * | ||
143 | * This function scales the clock | ||
144 | */ | ||
145 | |||
146 | static int gk20a_scale_target(struct device *dev, unsigned long *freq, | ||
147 | u32 flags) | ||
148 | { | ||
149 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
150 | struct gk20a *g = platform->g; | ||
151 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
152 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
153 | struct devfreq *devfreq = l->devfreq; | ||
154 | unsigned long local_freq = *freq; | ||
155 | unsigned long rounded_rate; | ||
156 | unsigned long min_freq = 0, max_freq = 0; | ||
157 | |||
158 | if (nvgpu_clk_arb_has_active_req(g)) | ||
159 | return 0; | ||
160 | /* | ||
161 | * Calculate floor and cap frequency values | ||
162 | * | ||
163 | * Policy : | ||
164 | * We have two APIs to clip the frequency | ||
165 | * 1. devfreq | ||
166 | * 2. pm_qos | ||
167 | * | ||
168 | * To calculate floor (min) freq, we select MAX of floor frequencies | ||
169 | * requested from both APIs | ||
170 | * To get cap (max) freq, we select MIN of max frequencies | ||
171 | * | ||
172 | * In case we have conflict (min_freq > max_freq) after above | ||
173 | * steps, we ensure that max_freq wins over min_freq | ||
174 | */ | ||
175 | min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq); | ||
176 | max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq); | ||
177 | |||
178 | if (min_freq > max_freq) | ||
179 | min_freq = max_freq; | ||
180 | |||
181 | /* Clip requested frequency */ | ||
182 | if (local_freq < min_freq) | ||
183 | local_freq = min_freq; | ||
184 | |||
185 | if (local_freq > max_freq) | ||
186 | local_freq = max_freq; | ||
187 | |||
188 | /* set the final frequency */ | ||
189 | rounded_rate = platform->clk_round_rate(dev, local_freq); | ||
190 | |||
191 | /* Check for duplicate request */ | ||
192 | if (rounded_rate == g->last_freq) | ||
193 | return 0; | ||
194 | |||
195 | if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate) | ||
196 | *freq = rounded_rate; | ||
197 | else { | ||
198 | g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); | ||
199 | *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
200 | } | ||
201 | |||
202 | g->last_freq = *freq; | ||
203 | |||
204 | /* postscale will only scale emc (dram clock) if evaluating | ||
205 | * gk20a_tegra_get_emc_rate() produces a new or different emc | ||
206 | * target because the load or_and gpufreq has changed */ | ||
207 | if (platform->postscale) | ||
208 | platform->postscale(dev, rounded_rate); | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * update_load_estimate_busy_cycles(dev) | ||
215 | * | ||
216 | * Update load estimate using pmu idle counters. Result is normalised | ||
217 | * based on the time it was asked last time. | ||
218 | */ | ||
219 | |||
220 | static void update_load_estimate_busy_cycles(struct device *dev) | ||
221 | { | ||
222 | struct gk20a *g = get_gk20a(dev); | ||
223 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
224 | unsigned long dt; | ||
225 | u32 busy_cycles_norm; | ||
226 | ktime_t t; | ||
227 | |||
228 | t = ktime_get(); | ||
229 | dt = ktime_us_delta(t, profile->last_event_time); | ||
230 | |||
231 | profile->dev_stat.total_time = dt; | ||
232 | profile->last_event_time = t; | ||
233 | nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm); | ||
234 | profile->dev_stat.busy_time = | ||
235 | (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * gk20a_scale_suspend(dev) | ||
240 | * | ||
241 | * This function informs devfreq of suspend | ||
242 | */ | ||
243 | |||
244 | void gk20a_scale_suspend(struct device *dev) | ||
245 | { | ||
246 | struct gk20a *g = get_gk20a(dev); | ||
247 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
248 | struct devfreq *devfreq = l->devfreq; | ||
249 | |||
250 | if (!devfreq) | ||
251 | return; | ||
252 | |||
253 | devfreq_suspend_device(devfreq); | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * gk20a_scale_resume(dev) | ||
258 | * | ||
259 | * This functions informs devfreq of resume | ||
260 | */ | ||
261 | |||
262 | void gk20a_scale_resume(struct device *dev) | ||
263 | { | ||
264 | struct gk20a *g = get_gk20a(dev); | ||
265 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
266 | struct devfreq *devfreq = l->devfreq; | ||
267 | |||
268 | if (!devfreq) | ||
269 | return; | ||
270 | |||
271 | g->last_freq = 0; | ||
272 | devfreq_resume_device(devfreq); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * gk20a_scale_get_dev_status(dev, *stat) | ||
277 | * | ||
278 | * This function queries the current device status. | ||
279 | */ | ||
280 | |||
281 | static int gk20a_scale_get_dev_status(struct device *dev, | ||
282 | struct devfreq_dev_status *stat) | ||
283 | { | ||
284 | struct gk20a *g = get_gk20a(dev); | ||
285 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
286 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
287 | |||
288 | /* inform edp about new constraint */ | ||
289 | if (platform->prescale) | ||
290 | platform->prescale(dev); | ||
291 | |||
292 | /* Make sure there are correct values for the current frequency */ | ||
293 | profile->dev_stat.current_frequency = | ||
294 | g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
295 | |||
296 | /* Update load estimate */ | ||
297 | update_load_estimate_busy_cycles(dev); | ||
298 | |||
299 | /* Copy the contents of the current device status */ | ||
300 | *stat = profile->dev_stat; | ||
301 | |||
302 | /* Finally, clear out the local values */ | ||
303 | profile->dev_stat.total_time = 0; | ||
304 | profile->dev_stat.busy_time = 0; | ||
305 | |||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * get_cur_freq(struct device *dev, unsigned long *freq) | ||
311 | * | ||
312 | * This function gets the current GPU clock rate. | ||
313 | */ | ||
314 | |||
315 | static int get_cur_freq(struct device *dev, unsigned long *freq) | ||
316 | { | ||
317 | struct gk20a *g = get_gk20a(dev); | ||
318 | *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | |||
323 | /* | ||
324 | * gk20a_scale_init(dev) | ||
325 | */ | ||
326 | |||
327 | void gk20a_scale_init(struct device *dev) | ||
328 | { | ||
329 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
330 | struct gk20a *g = platform->g; | ||
331 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
332 | struct gk20a_scale_profile *profile; | ||
333 | int err; | ||
334 | |||
335 | if (g->scale_profile) | ||
336 | return; | ||
337 | |||
338 | if (!platform->devfreq_governor && !platform->qos_notify) | ||
339 | return; | ||
340 | |||
341 | profile = nvgpu_kzalloc(g, sizeof(*profile)); | ||
342 | if (!profile) | ||
343 | return; | ||
344 | |||
345 | profile->dev = dev; | ||
346 | profile->dev_stat.busy = false; | ||
347 | |||
348 | /* Create frequency table */ | ||
349 | err = gk20a_scale_make_freq_table(profile); | ||
350 | if (err || !profile->devfreq_profile.max_state) | ||
351 | goto err_get_freqs; | ||
352 | |||
353 | profile->qos_min_freq = 0; | ||
354 | profile->qos_max_freq = UINT_MAX; | ||
355 | |||
356 | /* Store device profile so we can access it if devfreq governor | ||
357 | * init needs that */ | ||
358 | g->scale_profile = profile; | ||
359 | |||
360 | if (platform->devfreq_governor) { | ||
361 | struct devfreq *devfreq; | ||
362 | |||
363 | profile->devfreq_profile.initial_freq = | ||
364 | profile->devfreq_profile.freq_table[0]; | ||
365 | profile->devfreq_profile.target = gk20a_scale_target; | ||
366 | profile->devfreq_profile.get_dev_status = | ||
367 | gk20a_scale_get_dev_status; | ||
368 | profile->devfreq_profile.get_cur_freq = get_cur_freq; | ||
369 | profile->devfreq_profile.polling_ms = 25; | ||
370 | |||
371 | devfreq = devm_devfreq_add_device(dev, | ||
372 | &profile->devfreq_profile, | ||
373 | platform->devfreq_governor, NULL); | ||
374 | |||
375 | if (IS_ERR_OR_NULL(devfreq)) | ||
376 | devfreq = NULL; | ||
377 | |||
378 | l->devfreq = devfreq; | ||
379 | } | ||
380 | |||
381 | #ifdef CONFIG_GK20A_PM_QOS | ||
382 | /* Should we register QoS callback for this device? */ | ||
383 | if (platform->qos_notify) { | ||
384 | profile->qos_notify_block.notifier_call = | ||
385 | platform->qos_notify; | ||
386 | |||
387 | pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
388 | &profile->qos_notify_block); | ||
389 | pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
390 | &profile->qos_notify_block); | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | return; | ||
395 | |||
396 | err_get_freqs: | ||
397 | nvgpu_kfree(g, profile); | ||
398 | } | ||
399 | |||
400 | void gk20a_scale_exit(struct device *dev) | ||
401 | { | ||
402 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
403 | struct gk20a *g = platform->g; | ||
404 | |||
405 | #ifdef CONFIG_GK20A_PM_QOS | ||
406 | if (platform->qos_notify) { | ||
407 | pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
408 | &g->scale_profile->qos_notify_block); | ||
409 | pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
410 | &g->scale_profile->qos_notify_block); | ||
411 | } | ||
412 | #endif | ||
413 | |||
414 | nvgpu_kfree(g, g->scale_profile); | ||
415 | g->scale_profile = NULL; | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * gk20a_scale_hw_init(dev) | ||
420 | * | ||
421 | * Initialize hardware portion of the device | ||
422 | */ | ||
423 | |||
424 | void gk20a_scale_hw_init(struct device *dev) | ||
425 | { | ||
426 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
427 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
428 | |||
429 | /* make sure that scaling has bee initialised */ | ||
430 | if (!profile) | ||
431 | return; | ||
432 | |||
433 | profile->dev_stat.total_time = 0; | ||
434 | profile->last_event_time = ktime_get(); | ||
435 | } | ||
diff --git a/include/os/linux/scale.h b/include/os/linux/scale.h new file mode 100644 index 0000000..c1e6fe8 --- /dev/null +++ b/include/os/linux/scale.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * gk20a clock scaling profile | ||
3 | * | ||
4 | * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef GK20A_SCALE_H | ||
20 | #define GK20A_SCALE_H | ||
21 | |||
22 | #include <linux/devfreq.h> | ||
23 | |||
24 | struct clk; | ||
25 | |||
26 | struct gk20a_scale_profile { | ||
27 | struct device *dev; | ||
28 | ktime_t last_event_time; | ||
29 | struct devfreq_dev_profile devfreq_profile; | ||
30 | struct devfreq_dev_status dev_stat; | ||
31 | struct notifier_block qos_notify_block; | ||
32 | unsigned long qos_min_freq; | ||
33 | unsigned long qos_max_freq; | ||
34 | void *private_data; | ||
35 | }; | ||
36 | |||
37 | /* Initialization and de-initialization for module */ | ||
38 | void gk20a_scale_init(struct device *); | ||
39 | void gk20a_scale_exit(struct device *); | ||
40 | void gk20a_scale_hw_init(struct device *dev); | ||
41 | |||
42 | #if defined(CONFIG_GK20A_DEVFREQ) | ||
43 | /* | ||
44 | * call when performing submit to notify scaling mechanism that the module is | ||
45 | * in use | ||
46 | */ | ||
47 | void gk20a_scale_notify_busy(struct device *); | ||
48 | void gk20a_scale_notify_idle(struct device *); | ||
49 | |||
50 | void gk20a_scale_suspend(struct device *); | ||
51 | void gk20a_scale_resume(struct device *); | ||
52 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
53 | unsigned long n, void *p); | ||
54 | #else | ||
55 | static inline void gk20a_scale_notify_busy(struct device *dev) {} | ||
56 | static inline void gk20a_scale_notify_idle(struct device *dev) {} | ||
57 | static inline void gk20a_scale_suspend(struct device *dev) {} | ||
58 | static inline void gk20a_scale_resume(struct device *dev) {} | ||
59 | static inline int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
60 | unsigned long n, void *p) | ||
61 | { | ||
62 | return -ENOSYS; | ||
63 | } | ||
64 | #endif | ||
65 | |||
66 | #endif | ||
diff --git a/include/os/linux/sched.c b/include/os/linux/sched.c new file mode 100644 index 0000000..30c58a1 --- /dev/null +++ b/include/os/linux/sched.c | |||
@@ -0,0 +1,666 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #include <asm/barrier.h> | ||
17 | #include <linux/wait.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/poll.h> | ||
20 | #include <uapi/linux/nvgpu.h> | ||
21 | |||
22 | #include <nvgpu/kmem.h> | ||
23 | #include <nvgpu/log.h> | ||
24 | #include <nvgpu/bug.h> | ||
25 | #include <nvgpu/barrier.h> | ||
26 | #include <nvgpu/gk20a.h> | ||
27 | |||
28 | #include "gk20a/gr_gk20a.h" | ||
29 | #include "sched.h" | ||
30 | #include "os_linux.h" | ||
31 | #include "ioctl_tsg.h" | ||
32 | |||
33 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
34 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
35 | |||
36 | ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, | ||
37 | size_t size, loff_t *off) | ||
38 | { | ||
39 | struct gk20a *g = filp->private_data; | ||
40 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
41 | struct nvgpu_sched_event_arg event = { 0 }; | ||
42 | int err; | ||
43 | |||
44 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, | ||
45 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
46 | |||
47 | if (size < sizeof(event)) | ||
48 | return -EINVAL; | ||
49 | size = sizeof(event); | ||
50 | |||
51 | nvgpu_mutex_acquire(&sched->status_lock); | ||
52 | while (!sched->status) { | ||
53 | nvgpu_mutex_release(&sched->status_lock); | ||
54 | if (filp->f_flags & O_NONBLOCK) | ||
55 | return -EAGAIN; | ||
56 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq, | ||
57 | sched->status, 0); | ||
58 | if (err) | ||
59 | return err; | ||
60 | nvgpu_mutex_acquire(&sched->status_lock); | ||
61 | } | ||
62 | |||
63 | event.reserved = 0; | ||
64 | event.status = sched->status; | ||
65 | |||
66 | if (copy_to_user(buf, &event, size)) { | ||
67 | nvgpu_mutex_release(&sched->status_lock); | ||
68 | return -EFAULT; | ||
69 | } | ||
70 | |||
71 | sched->status = 0; | ||
72 | |||
73 | nvgpu_mutex_release(&sched->status_lock); | ||
74 | |||
75 | return size; | ||
76 | } | ||
77 | |||
78 | unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait) | ||
79 | { | ||
80 | struct gk20a *g = filp->private_data; | ||
81 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
82 | unsigned int mask = 0; | ||
83 | |||
84 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
85 | |||
86 | nvgpu_mutex_acquire(&sched->status_lock); | ||
87 | poll_wait(filp, &sched->readout_wq.wq, wait); | ||
88 | if (sched->status) | ||
89 | mask |= POLLIN | POLLRDNORM; | ||
90 | nvgpu_mutex_release(&sched->status_lock); | ||
91 | |||
92 | return mask; | ||
93 | } | ||
94 | |||
95 | static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a *g, | ||
96 | struct nvgpu_sched_get_tsgs_args *arg) | ||
97 | { | ||
98 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
99 | |||
100 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", | ||
101 | arg->size, arg->buffer); | ||
102 | |||
103 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
104 | arg->size = sched->bitmap_size; | ||
105 | return -ENOSPC; | ||
106 | } | ||
107 | |||
108 | nvgpu_mutex_acquire(&sched->status_lock); | ||
109 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
110 | sched->active_tsg_bitmap, sched->bitmap_size)) { | ||
111 | nvgpu_mutex_release(&sched->status_lock); | ||
112 | return -EFAULT; | ||
113 | } | ||
114 | nvgpu_mutex_release(&sched->status_lock); | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a *g, | ||
120 | struct nvgpu_sched_get_tsgs_args *arg) | ||
121 | { | ||
122 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
123 | |||
124 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", | ||
125 | arg->size, arg->buffer); | ||
126 | |||
127 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
128 | arg->size = sched->bitmap_size; | ||
129 | return -ENOSPC; | ||
130 | } | ||
131 | |||
132 | nvgpu_mutex_acquire(&sched->status_lock); | ||
133 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
134 | sched->recent_tsg_bitmap, sched->bitmap_size)) { | ||
135 | nvgpu_mutex_release(&sched->status_lock); | ||
136 | return -EFAULT; | ||
137 | } | ||
138 | |||
139 | memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); | ||
140 | nvgpu_mutex_release(&sched->status_lock); | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a *g, | ||
146 | struct nvgpu_sched_get_tsgs_by_pid_args *arg) | ||
147 | { | ||
148 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
149 | struct fifo_gk20a *f = &g->fifo; | ||
150 | struct tsg_gk20a *tsg; | ||
151 | u64 *bitmap; | ||
152 | unsigned int tsgid; | ||
153 | /* pid at user level corresponds to kernel tgid */ | ||
154 | pid_t tgid = (pid_t)arg->pid; | ||
155 | int err = 0; | ||
156 | |||
157 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx", | ||
158 | (pid_t)arg->pid, arg->size, arg->buffer); | ||
159 | |||
160 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
161 | arg->size = sched->bitmap_size; | ||
162 | return -ENOSPC; | ||
163 | } | ||
164 | |||
165 | bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
166 | if (!bitmap) | ||
167 | return -ENOMEM; | ||
168 | |||
169 | nvgpu_mutex_acquire(&sched->status_lock); | ||
170 | for (tsgid = 0; tsgid < f->num_channels; tsgid++) { | ||
171 | if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { | ||
172 | tsg = &f->tsg[tsgid]; | ||
173 | if (tsg->tgid == tgid) | ||
174 | NVGPU_SCHED_SET(tsgid, bitmap); | ||
175 | } | ||
176 | } | ||
177 | nvgpu_mutex_release(&sched->status_lock); | ||
178 | |||
179 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
180 | bitmap, sched->bitmap_size)) | ||
181 | err = -EFAULT; | ||
182 | |||
183 | nvgpu_kfree(g, bitmap); | ||
184 | |||
185 | return err; | ||
186 | } | ||
187 | |||
188 | static int gk20a_sched_dev_ioctl_get_params(struct gk20a *g, | ||
189 | struct nvgpu_sched_tsg_get_params_args *arg) | ||
190 | { | ||
191 | struct fifo_gk20a *f = &g->fifo; | ||
192 | struct tsg_gk20a *tsg; | ||
193 | u32 tsgid = arg->tsgid; | ||
194 | |||
195 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
196 | |||
197 | if (tsgid >= f->num_channels) | ||
198 | return -EINVAL; | ||
199 | |||
200 | nvgpu_speculation_barrier(); | ||
201 | |||
202 | tsg = &f->tsg[tsgid]; | ||
203 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
204 | return -ENXIO; | ||
205 | |||
206 | arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ | ||
207 | arg->runlist_interleave = tsg->interleave_level; | ||
208 | arg->timeslice = gk20a_tsg_get_timeslice(tsg); | ||
209 | |||
210 | arg->graphics_preempt_mode = | ||
211 | tsg->gr_ctx.graphics_preempt_mode; | ||
212 | arg->compute_preempt_mode = | ||
213 | tsg->gr_ctx.compute_preempt_mode; | ||
214 | |||
215 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static int gk20a_sched_dev_ioctl_tsg_set_timeslice( | ||
221 | struct gk20a *g, | ||
222 | struct nvgpu_sched_tsg_timeslice_args *arg) | ||
223 | { | ||
224 | struct fifo_gk20a *f = &g->fifo; | ||
225 | struct tsg_gk20a *tsg; | ||
226 | u32 tsgid = arg->tsgid; | ||
227 | int err; | ||
228 | |||
229 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
230 | |||
231 | if (tsgid >= f->num_channels) | ||
232 | return -EINVAL; | ||
233 | |||
234 | nvgpu_speculation_barrier(); | ||
235 | |||
236 | tsg = &f->tsg[tsgid]; | ||
237 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
238 | return -ENXIO; | ||
239 | |||
240 | err = gk20a_busy(g); | ||
241 | if (err) | ||
242 | goto done; | ||
243 | |||
244 | err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); | ||
245 | |||
246 | gk20a_idle(g); | ||
247 | |||
248 | done: | ||
249 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
250 | |||
251 | return err; | ||
252 | } | ||
253 | |||
254 | static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( | ||
255 | struct gk20a *g, | ||
256 | struct nvgpu_sched_tsg_runlist_interleave_args *arg) | ||
257 | { | ||
258 | struct fifo_gk20a *f = &g->fifo; | ||
259 | struct tsg_gk20a *tsg; | ||
260 | u32 tsgid = arg->tsgid; | ||
261 | int err; | ||
262 | |||
263 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
264 | |||
265 | if (tsgid >= f->num_channels) | ||
266 | return -EINVAL; | ||
267 | |||
268 | nvgpu_speculation_barrier(); | ||
269 | |||
270 | tsg = &f->tsg[tsgid]; | ||
271 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
272 | return -ENXIO; | ||
273 | |||
274 | err = gk20a_busy(g); | ||
275 | if (err) | ||
276 | goto done; | ||
277 | |||
278 | err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); | ||
279 | |||
280 | gk20a_idle(g); | ||
281 | |||
282 | done: | ||
283 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
284 | |||
285 | return err; | ||
286 | } | ||
287 | |||
288 | static int gk20a_sched_dev_ioctl_lock_control(struct gk20a *g) | ||
289 | { | ||
290 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
291 | |||
292 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
293 | |||
294 | nvgpu_mutex_acquire(&sched->control_lock); | ||
295 | sched->control_locked = true; | ||
296 | nvgpu_mutex_release(&sched->control_lock); | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a *g) | ||
301 | { | ||
302 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
303 | |||
304 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
305 | |||
306 | nvgpu_mutex_acquire(&sched->control_lock); | ||
307 | sched->control_locked = false; | ||
308 | nvgpu_mutex_release(&sched->control_lock); | ||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a *g, | ||
313 | struct nvgpu_sched_api_version_args *args) | ||
314 | { | ||
315 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
316 | |||
317 | args->version = NVGPU_SCHED_API_VERSION; | ||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a *g, | ||
322 | struct nvgpu_sched_tsg_refcount_args *arg) | ||
323 | { | ||
324 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
325 | struct fifo_gk20a *f = &g->fifo; | ||
326 | struct tsg_gk20a *tsg; | ||
327 | u32 tsgid = arg->tsgid; | ||
328 | |||
329 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
330 | |||
331 | if (tsgid >= f->num_channels) | ||
332 | return -EINVAL; | ||
333 | |||
334 | nvgpu_speculation_barrier(); | ||
335 | |||
336 | tsg = &f->tsg[tsgid]; | ||
337 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
338 | return -ENXIO; | ||
339 | |||
340 | nvgpu_mutex_acquire(&sched->status_lock); | ||
341 | if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
342 | nvgpu_warn(g, "tsgid=%d already referenced", tsgid); | ||
343 | /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */ | ||
344 | nvgpu_mutex_release(&sched->status_lock); | ||
345 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
346 | return -ENXIO; | ||
347 | } | ||
348 | |||
349 | /* keep reference on TSG, will be released on | ||
350 | * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close | ||
351 | */ | ||
352 | NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap); | ||
353 | nvgpu_mutex_release(&sched->status_lock); | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a *g, | ||
359 | struct nvgpu_sched_tsg_refcount_args *arg) | ||
360 | { | ||
361 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
362 | struct fifo_gk20a *f = &g->fifo; | ||
363 | struct tsg_gk20a *tsg; | ||
364 | u32 tsgid = arg->tsgid; | ||
365 | |||
366 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
367 | |||
368 | if (tsgid >= f->num_channels) | ||
369 | return -EINVAL; | ||
370 | |||
371 | nvgpu_speculation_barrier(); | ||
372 | |||
373 | nvgpu_mutex_acquire(&sched->status_lock); | ||
374 | if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
375 | nvgpu_mutex_release(&sched->status_lock); | ||
376 | nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid); | ||
377 | return -ENXIO; | ||
378 | } | ||
379 | NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); | ||
380 | nvgpu_mutex_release(&sched->status_lock); | ||
381 | |||
382 | tsg = &f->tsg[tsgid]; | ||
383 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
384 | |||
385 | return 0; | ||
386 | } | ||
387 | |||
388 | int gk20a_sched_dev_open(struct inode *inode, struct file *filp) | ||
389 | { | ||
390 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
391 | struct nvgpu_os_linux, sched.cdev); | ||
392 | struct gk20a *g; | ||
393 | struct nvgpu_sched_ctrl *sched; | ||
394 | int err = 0; | ||
395 | |||
396 | g = gk20a_get(&l->g); | ||
397 | if (!g) | ||
398 | return -ENODEV; | ||
399 | sched = &g->sched_ctrl; | ||
400 | |||
401 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g); | ||
402 | |||
403 | if (!sched->sw_ready) { | ||
404 | err = gk20a_busy(g); | ||
405 | if (err) | ||
406 | goto free_ref; | ||
407 | |||
408 | gk20a_idle(g); | ||
409 | } | ||
410 | |||
411 | if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) { | ||
412 | err = -EBUSY; | ||
413 | goto free_ref; | ||
414 | } | ||
415 | |||
416 | memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, | ||
417 | sched->bitmap_size); | ||
418 | memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size); | ||
419 | |||
420 | filp->private_data = g; | ||
421 | nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched); | ||
422 | |||
423 | free_ref: | ||
424 | if (err) | ||
425 | gk20a_put(g); | ||
426 | return err; | ||
427 | } | ||
428 | |||
429 | long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, | ||
430 | unsigned long arg) | ||
431 | { | ||
432 | struct gk20a *g = filp->private_data; | ||
433 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
434 | int err = 0; | ||
435 | |||
436 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd)); | ||
437 | |||
438 | if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) || | ||
439 | (_IOC_NR(cmd) == 0) || | ||
440 | (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) || | ||
441 | (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE)) | ||
442 | return -EINVAL; | ||
443 | |||
444 | memset(buf, 0, sizeof(buf)); | ||
445 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
446 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
447 | return -EFAULT; | ||
448 | } | ||
449 | |||
450 | nvgpu_speculation_barrier(); | ||
451 | switch (cmd) { | ||
452 | case NVGPU_SCHED_IOCTL_GET_TSGS: | ||
453 | err = gk20a_sched_dev_ioctl_get_tsgs(g, | ||
454 | (struct nvgpu_sched_get_tsgs_args *)buf); | ||
455 | break; | ||
456 | case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS: | ||
457 | err = gk20a_sched_dev_ioctl_get_recent_tsgs(g, | ||
458 | (struct nvgpu_sched_get_tsgs_args *)buf); | ||
459 | break; | ||
460 | case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID: | ||
461 | err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(g, | ||
462 | (struct nvgpu_sched_get_tsgs_by_pid_args *)buf); | ||
463 | break; | ||
464 | case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS: | ||
465 | err = gk20a_sched_dev_ioctl_get_params(g, | ||
466 | (struct nvgpu_sched_tsg_get_params_args *)buf); | ||
467 | break; | ||
468 | case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE: | ||
469 | err = gk20a_sched_dev_ioctl_tsg_set_timeslice(g, | ||
470 | (struct nvgpu_sched_tsg_timeslice_args *)buf); | ||
471 | break; | ||
472 | case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: | ||
473 | err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(g, | ||
474 | (struct nvgpu_sched_tsg_runlist_interleave_args *)buf); | ||
475 | break; | ||
476 | case NVGPU_SCHED_IOCTL_LOCK_CONTROL: | ||
477 | err = gk20a_sched_dev_ioctl_lock_control(g); | ||
478 | break; | ||
479 | case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: | ||
480 | err = gk20a_sched_dev_ioctl_unlock_control(g); | ||
481 | break; | ||
482 | case NVGPU_SCHED_IOCTL_GET_API_VERSION: | ||
483 | err = gk20a_sched_dev_ioctl_get_api_version(g, | ||
484 | (struct nvgpu_sched_api_version_args *)buf); | ||
485 | break; | ||
486 | case NVGPU_SCHED_IOCTL_GET_TSG: | ||
487 | err = gk20a_sched_dev_ioctl_get_tsg(g, | ||
488 | (struct nvgpu_sched_tsg_refcount_args *)buf); | ||
489 | break; | ||
490 | case NVGPU_SCHED_IOCTL_PUT_TSG: | ||
491 | err = gk20a_sched_dev_ioctl_put_tsg(g, | ||
492 | (struct nvgpu_sched_tsg_refcount_args *)buf); | ||
493 | break; | ||
494 | default: | ||
495 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
496 | err = -ENOTTY; | ||
497 | } | ||
498 | |||
499 | /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on | ||
500 | * purpose with NULL buffer and/or zero size to discover TSG bitmap | ||
501 | * size. We need to update user arguments in this case too, even | ||
502 | * if we return an error. | ||
503 | */ | ||
504 | if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) { | ||
505 | if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) | ||
506 | err = -EFAULT; | ||
507 | } | ||
508 | |||
509 | return err; | ||
510 | } | ||
511 | |||
512 | int gk20a_sched_dev_release(struct inode *inode, struct file *filp) | ||
513 | { | ||
514 | struct gk20a *g = filp->private_data; | ||
515 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
516 | struct fifo_gk20a *f = &g->fifo; | ||
517 | struct tsg_gk20a *tsg; | ||
518 | unsigned int tsgid; | ||
519 | |||
520 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); | ||
521 | |||
522 | /* release any reference to TSGs */ | ||
523 | for (tsgid = 0; tsgid < f->num_channels; tsgid++) { | ||
524 | if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
525 | tsg = &f->tsg[tsgid]; | ||
526 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
527 | } | ||
528 | } | ||
529 | |||
530 | /* unlock control */ | ||
531 | nvgpu_mutex_acquire(&sched->control_lock); | ||
532 | sched->control_locked = false; | ||
533 | nvgpu_mutex_release(&sched->control_lock); | ||
534 | |||
535 | nvgpu_mutex_release(&sched->busy_lock); | ||
536 | gk20a_put(g); | ||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) | ||
541 | { | ||
542 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
543 | int err; | ||
544 | |||
545 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
546 | |||
547 | if (!sched->sw_ready) { | ||
548 | err = gk20a_busy(g); | ||
549 | if (err) { | ||
550 | WARN_ON(err); | ||
551 | return; | ||
552 | } | ||
553 | |||
554 | gk20a_idle(g); | ||
555 | } | ||
556 | |||
557 | nvgpu_mutex_acquire(&sched->status_lock); | ||
558 | NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap); | ||
559 | NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap); | ||
560 | sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN; | ||
561 | nvgpu_mutex_release(&sched->status_lock); | ||
562 | nvgpu_cond_signal_interruptible(&sched->readout_wq); | ||
563 | } | ||
564 | |||
565 | void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg) | ||
566 | { | ||
567 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
568 | |||
569 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
570 | |||
571 | nvgpu_mutex_acquire(&sched->status_lock); | ||
572 | NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap); | ||
573 | |||
574 | /* clear recent_tsg_bitmap as well: if app manager did not | ||
575 | * notice that TSG was previously added, no need to notify it | ||
576 | * if the TSG has been released in the meantime. If the | ||
577 | * TSG gets reallocated, app manager will be notified as usual. | ||
578 | */ | ||
579 | NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap); | ||
580 | |||
581 | /* do not set event_pending, we only want to notify app manager | ||
582 | * when TSGs are added, so that it can apply sched params | ||
583 | */ | ||
584 | nvgpu_mutex_release(&sched->status_lock); | ||
585 | } | ||
586 | |||
587 | int gk20a_sched_ctrl_init(struct gk20a *g) | ||
588 | { | ||
589 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
590 | struct fifo_gk20a *f = &g->fifo; | ||
591 | int err; | ||
592 | |||
593 | if (sched->sw_ready) | ||
594 | return 0; | ||
595 | |||
596 | sched->bitmap_size = roundup(f->num_channels, 64) / 8; | ||
597 | sched->status = 0; | ||
598 | |||
599 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu", | ||
600 | g, sched, sched->bitmap_size); | ||
601 | |||
602 | sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
603 | if (!sched->active_tsg_bitmap) | ||
604 | return -ENOMEM; | ||
605 | |||
606 | sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
607 | if (!sched->recent_tsg_bitmap) { | ||
608 | err = -ENOMEM; | ||
609 | goto free_active; | ||
610 | } | ||
611 | |||
612 | sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
613 | if (!sched->ref_tsg_bitmap) { | ||
614 | err = -ENOMEM; | ||
615 | goto free_recent; | ||
616 | } | ||
617 | |||
618 | nvgpu_cond_init(&sched->readout_wq); | ||
619 | |||
620 | err = nvgpu_mutex_init(&sched->status_lock); | ||
621 | if (err) | ||
622 | goto free_ref; | ||
623 | |||
624 | err = nvgpu_mutex_init(&sched->control_lock); | ||
625 | if (err) | ||
626 | goto free_status_lock; | ||
627 | |||
628 | err = nvgpu_mutex_init(&sched->busy_lock); | ||
629 | if (err) | ||
630 | goto free_control_lock; | ||
631 | |||
632 | sched->sw_ready = true; | ||
633 | |||
634 | return 0; | ||
635 | |||
636 | free_control_lock: | ||
637 | nvgpu_mutex_destroy(&sched->control_lock); | ||
638 | free_status_lock: | ||
639 | nvgpu_mutex_destroy(&sched->status_lock); | ||
640 | free_ref: | ||
641 | nvgpu_kfree(g, sched->ref_tsg_bitmap); | ||
642 | free_recent: | ||
643 | nvgpu_kfree(g, sched->recent_tsg_bitmap); | ||
644 | free_active: | ||
645 | nvgpu_kfree(g, sched->active_tsg_bitmap); | ||
646 | |||
647 | return err; | ||
648 | } | ||
649 | |||
650 | void gk20a_sched_ctrl_cleanup(struct gk20a *g) | ||
651 | { | ||
652 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
653 | |||
654 | nvgpu_kfree(g, sched->active_tsg_bitmap); | ||
655 | nvgpu_kfree(g, sched->recent_tsg_bitmap); | ||
656 | nvgpu_kfree(g, sched->ref_tsg_bitmap); | ||
657 | sched->active_tsg_bitmap = NULL; | ||
658 | sched->recent_tsg_bitmap = NULL; | ||
659 | sched->ref_tsg_bitmap = NULL; | ||
660 | |||
661 | nvgpu_mutex_destroy(&sched->status_lock); | ||
662 | nvgpu_mutex_destroy(&sched->control_lock); | ||
663 | nvgpu_mutex_destroy(&sched->busy_lock); | ||
664 | |||
665 | sched->sw_ready = false; | ||
666 | } | ||
diff --git a/include/os/linux/sched.h b/include/os/linux/sched.h new file mode 100644 index 0000000..e88f37f --- /dev/null +++ b/include/os/linux/sched.h | |||
@@ -0,0 +1,36 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_SCHED_H | ||
17 | #define __NVGPU_SCHED_H | ||
18 | |||
19 | struct gk20a; | ||
20 | struct gpu_ops; | ||
21 | struct tsg_gk20a; | ||
22 | struct poll_table_struct; | ||
23 | |||
24 | int gk20a_sched_dev_release(struct inode *inode, struct file *filp); | ||
25 | int gk20a_sched_dev_open(struct inode *inode, struct file *filp); | ||
26 | long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long); | ||
27 | ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *); | ||
28 | unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *); | ||
29 | |||
30 | void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); | ||
31 | void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); | ||
32 | int gk20a_sched_ctrl_init(struct gk20a *); | ||
33 | |||
34 | void gk20a_sched_ctrl_cleanup(struct gk20a *g); | ||
35 | |||
36 | #endif /* __NVGPU_SCHED_H */ | ||
diff --git a/include/os/linux/sim.c b/include/os/linux/sim.c new file mode 100644 index 0000000..792ce80 --- /dev/null +++ b/include/os/linux/sim.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/io.h> | ||
18 | #include <linux/highmem.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/linux/vm.h> | ||
23 | #include <nvgpu/bitops.h> | ||
24 | #include <nvgpu/nvgpu_mem.h> | ||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/soc.h> | ||
27 | #include <nvgpu/hw_sim.h> | ||
28 | #include <nvgpu/sim.h> | ||
29 | #include <nvgpu/gk20a.h> | ||
30 | |||
31 | #include "platform_gk20a.h" | ||
32 | #include "os_linux.h" | ||
33 | #include "module.h" | ||
34 | |||
35 | void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v) | ||
36 | { | ||
37 | struct sim_nvgpu_linux *sim_linux = | ||
38 | container_of(sim, struct sim_nvgpu_linux, sim); | ||
39 | |||
40 | writel(v, sim_linux->regs + r); | ||
41 | } | ||
42 | |||
43 | u32 sim_readl(struct sim_nvgpu *sim, u32 r) | ||
44 | { | ||
45 | struct sim_nvgpu_linux *sim_linux = | ||
46 | container_of(sim, struct sim_nvgpu_linux, sim); | ||
47 | |||
48 | return readl(sim_linux->regs + r); | ||
49 | } | ||
50 | |||
51 | void nvgpu_remove_sim_support_linux(struct gk20a *g) | ||
52 | { | ||
53 | struct sim_nvgpu_linux *sim_linux; | ||
54 | |||
55 | if (!g->sim) | ||
56 | return; | ||
57 | |||
58 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
59 | if (sim_linux->regs) { | ||
60 | sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); | ||
61 | iounmap(sim_linux->regs); | ||
62 | sim_linux->regs = NULL; | ||
63 | } | ||
64 | nvgpu_kfree(g, sim_linux); | ||
65 | g->sim = NULL; | ||
66 | } | ||
67 | |||
68 | int nvgpu_init_sim_support_linux(struct gk20a *g, | ||
69 | struct platform_device *dev) | ||
70 | { | ||
71 | struct sim_nvgpu_linux *sim_linux; | ||
72 | int err = -ENOMEM; | ||
73 | |||
74 | if (!nvgpu_platform_is_simulation(g)) | ||
75 | return 0; | ||
76 | |||
77 | sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); | ||
78 | if (!sim_linux) | ||
79 | return err; | ||
80 | g->sim = &sim_linux->sim; | ||
81 | g->sim->g = g; | ||
82 | sim_linux->regs = nvgpu_devm_ioremap_resource(dev, | ||
83 | GK20A_SIM_IORESOURCE_MEM, | ||
84 | &sim_linux->reg_mem); | ||
85 | if (IS_ERR(sim_linux->regs)) { | ||
86 | nvgpu_err(g, "failed to remap gk20a sim regs"); | ||
87 | err = PTR_ERR(sim_linux->regs); | ||
88 | goto fail; | ||
89 | } | ||
90 | sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux; | ||
91 | return 0; | ||
92 | |||
93 | fail: | ||
94 | nvgpu_remove_sim_support_linux(g); | ||
95 | return err; | ||
96 | } | ||
diff --git a/include/os/linux/sim_pci.c b/include/os/linux/sim_pci.c new file mode 100644 index 0000000..340f1fa --- /dev/null +++ b/include/os/linux/sim_pci.c | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/io.h> | ||
18 | #include <linux/highmem.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/linux/vm.h> | ||
23 | #include <nvgpu/bitops.h> | ||
24 | #include <nvgpu/nvgpu_mem.h> | ||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/hw_sim_pci.h> | ||
27 | #include <nvgpu/sim.h> | ||
28 | #include <nvgpu/io.h> | ||
29 | #include <nvgpu/gk20a.h> | ||
30 | |||
31 | #include "os_linux.h" | ||
32 | #include "module.h" | ||
33 | |||
34 | static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base) | ||
35 | { | ||
36 | u32 cfg; | ||
37 | bool is_simulation = false; | ||
38 | |||
39 | cfg = nvgpu_readl(g, sim_base + sim_config_r()); | ||
40 | if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v()) | ||
41 | is_simulation = true; | ||
42 | |||
43 | return is_simulation; | ||
44 | } | ||
45 | |||
46 | void nvgpu_remove_sim_support_linux_pci(struct gk20a *g) | ||
47 | { | ||
48 | struct sim_nvgpu_linux *sim_linux; | ||
49 | bool is_simulation; | ||
50 | |||
51 | is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); | ||
52 | |||
53 | if (!is_simulation) { | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | if (!g->sim) { | ||
58 | nvgpu_warn(g, "sim_gk20a not allocated"); | ||
59 | return; | ||
60 | } | ||
61 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
62 | |||
63 | if (sim_linux->regs) { | ||
64 | sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); | ||
65 | sim_linux->regs = NULL; | ||
66 | } | ||
67 | nvgpu_kfree(g, sim_linux); | ||
68 | g->sim = NULL; | ||
69 | } | ||
70 | |||
71 | int nvgpu_init_sim_support_linux_pci(struct gk20a *g) | ||
72 | { | ||
73 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
74 | struct sim_nvgpu_linux *sim_linux; | ||
75 | int err = -ENOMEM; | ||
76 | bool is_simulation; | ||
77 | |||
78 | is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); | ||
79 | __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation); | ||
80 | |||
81 | if (!is_simulation) | ||
82 | return 0; | ||
83 | |||
84 | sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); | ||
85 | if (!sim_linux) | ||
86 | return err; | ||
87 | g->sim = &sim_linux->sim; | ||
88 | g->sim->g = g; | ||
89 | sim_linux->regs = l->regs + sim_r(); | ||
90 | sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci; | ||
91 | |||
92 | return 0; | ||
93 | } | ||
diff --git a/include/os/linux/soc.c b/include/os/linux/soc.c new file mode 100644 index 0000000..1b27d6f --- /dev/null +++ b/include/os/linux/soc.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <soc/tegra/chip-id.h> | ||
15 | #include <soc/tegra/fuse.h> | ||
16 | #include <soc/tegra/tegra_bpmp.h> | ||
17 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
18 | #include <soc/tegra/virt/syscalls.h> | ||
19 | #endif | ||
20 | |||
21 | #include <nvgpu/soc.h> | ||
22 | #include "os_linux.h" | ||
23 | #include "platform_gk20a.h" | ||
24 | |||
25 | bool nvgpu_platform_is_silicon(struct gk20a *g) | ||
26 | { | ||
27 | return tegra_platform_is_silicon(); | ||
28 | } | ||
29 | |||
30 | bool nvgpu_platform_is_simulation(struct gk20a *g) | ||
31 | { | ||
32 | return tegra_platform_is_vdk(); | ||
33 | } | ||
34 | |||
35 | bool nvgpu_platform_is_fpga(struct gk20a *g) | ||
36 | { | ||
37 | return tegra_platform_is_fpga(); | ||
38 | } | ||
39 | |||
40 | bool nvgpu_is_hypervisor_mode(struct gk20a *g) | ||
41 | { | ||
42 | return is_tegra_hypervisor_mode(); | ||
43 | } | ||
44 | |||
45 | bool nvgpu_is_bpmp_running(struct gk20a *g) | ||
46 | { | ||
47 | return tegra_bpmp_running(); | ||
48 | } | ||
49 | |||
50 | bool nvgpu_is_soc_t194_a01(struct gk20a *g) | ||
51 | { | ||
52 | return ((tegra_get_chip_id() == TEGRA194 && | ||
53 | tegra_chip_get_revision() == TEGRA194_REVISION_A01) ? | ||
54 | true : false); | ||
55 | } | ||
56 | |||
57 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
58 | /* When nvlink is enabled on dGPU, we need to use physical memory addresses. | ||
59 | * There is no SMMU translation. However, the device initially enumerates as a | ||
60 | * PCIe device. As such, when allocation memory for this PCIe device, the DMA | ||
61 | * framework ends up allocating memory using SMMU (if enabled in device tree). | ||
62 | * As a result, when we switch to nvlink, we need to use underlying physical | ||
63 | * addresses, even if memory mappings exist in SMMU. | ||
64 | * In addition, when stage-2 SMMU translation is enabled (for instance when HV | ||
65 | * is enabled), the addresses we get from dma_alloc are IPAs. We need to | ||
66 | * convert them to PA. | ||
67 | */ | ||
68 | static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) | ||
69 | { | ||
70 | struct device *dev = dev_from_gk20a(g); | ||
71 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
72 | struct hyp_ipa_pa_info info; | ||
73 | int err; | ||
74 | u64 pa = 0ULL; | ||
75 | |||
76 | err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa); | ||
77 | if (err < 0) { | ||
78 | /* WAR for bug 2096877 | ||
79 | * hyp_read_ipa_pa_info only looks up RAM mappings. | ||
80 | * assume one to one IPA:PA mapping for syncpt aperture | ||
81 | */ | ||
82 | u64 start = g->syncpt_unit_base; | ||
83 | u64 end = g->syncpt_unit_base + g->syncpt_unit_size; | ||
84 | if ((ipa >= start) && (ipa < end)) { | ||
85 | pa = ipa; | ||
86 | nvgpu_log(g, gpu_dbg_map_v, | ||
87 | "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n", | ||
88 | ipa, platform->vmid, pa); | ||
89 | } else { | ||
90 | nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d", | ||
91 | ipa, platform->vmid, err); | ||
92 | } | ||
93 | } else { | ||
94 | pa = info.base + info.offset; | ||
95 | nvgpu_log(g, gpu_dbg_map_v, | ||
96 | "ipa=%llx vmid=%d -> pa=%llx " | ||
97 | "base=%llx offset=%llx size=%llx\n", | ||
98 | ipa, platform->vmid, pa, info.base, | ||
99 | info.offset, info.size); | ||
100 | } | ||
101 | return pa; | ||
102 | } | ||
103 | #endif | ||
104 | |||
105 | int nvgpu_init_soc_vars(struct gk20a *g) | ||
106 | { | ||
107 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
108 | struct device *dev = dev_from_gk20a(g); | ||
109 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
110 | int err; | ||
111 | |||
112 | if (nvgpu_is_hypervisor_mode(g)) { | ||
113 | err = hyp_read_gid(&platform->vmid); | ||
114 | if (err) { | ||
115 | nvgpu_err(g, "failed to read vmid"); | ||
116 | return err; | ||
117 | } | ||
118 | platform->phys_addr = nvgpu_tegra_hv_ipa_pa; | ||
119 | } | ||
120 | #endif | ||
121 | return 0; | ||
122 | } | ||
diff --git a/include/os/linux/sync_sema_android.c b/include/os/linux/sync_sema_android.c new file mode 100644 index 0000000..59e3b7a --- /dev/null +++ b/include/os/linux/sync_sema_android.c | |||
@@ -0,0 +1,418 @@ | |||
1 | /* | ||
2 | * Semaphore Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/hrtimer.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | |||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/semaphore.h> | ||
26 | #include <nvgpu/bug.h> | ||
27 | #include <nvgpu/kref.h> | ||
28 | #include <nvgpu/channel.h> | ||
29 | #include "../linux/channel.h" | ||
30 | |||
31 | #include "../drivers/staging/android/sync.h" | ||
32 | |||
33 | #include "sync_sema_android.h" | ||
34 | |||
35 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
36 | |||
37 | struct gk20a_sync_timeline { | ||
38 | struct sync_timeline obj; | ||
39 | u32 max; | ||
40 | u32 min; | ||
41 | }; | ||
42 | |||
43 | /** | ||
44 | * The sync framework dups pts when merging fences. We share a single | ||
45 | * refcounted gk20a_sync_pt for each duped pt. | ||
46 | */ | ||
47 | struct gk20a_sync_pt { | ||
48 | struct gk20a *g; | ||
49 | struct nvgpu_ref refcount; | ||
50 | u32 thresh; | ||
51 | struct nvgpu_semaphore *sema; | ||
52 | struct gk20a_sync_timeline *obj; | ||
53 | |||
54 | /* | ||
55 | * Use a spin lock here since it will have better performance | ||
56 | * than a mutex - there should be very little contention on this | ||
57 | * lock. | ||
58 | */ | ||
59 | struct nvgpu_spinlock lock; | ||
60 | }; | ||
61 | |||
62 | struct gk20a_sync_pt_inst { | ||
63 | struct sync_pt pt; | ||
64 | struct gk20a_sync_pt *shared; | ||
65 | }; | ||
66 | |||
67 | /** | ||
68 | * Compares sync pt values a and b, both of which will trigger either before | ||
69 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
70 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
71 | * | ||
72 | * Returns -1 if a < b (a triggers before b) | ||
73 | * 0 if a = b (a and b trigger at the same time) | ||
74 | * 1 if a > b (b triggers before a) | ||
75 | */ | ||
76 | static int __gk20a_sync_pt_compare_ref( | ||
77 | u32 ref, | ||
78 | u32 a, | ||
79 | u32 b) | ||
80 | { | ||
81 | /* | ||
82 | * We normalize both a and b by subtracting ref from them. | ||
83 | * Denote the normalized values by a_n and b_n. Note that because | ||
84 | * of wrapping, a_n and/or b_n may be negative. | ||
85 | * | ||
86 | * The normalized values a_n and b_n satisfy: | ||
87 | * - a positive value triggers before a negative value | ||
88 | * - a smaller positive value triggers before a greater positive value | ||
89 | * - a smaller negative value (greater in absolute value) triggers | ||
90 | * before a greater negative value (smaller in absolute value). | ||
91 | * | ||
92 | * Thus we can just stick to unsigned arithmetic and compare | ||
93 | * (u32)a_n to (u32)b_n. | ||
94 | * | ||
95 | * Just to reiterate the possible cases: | ||
96 | * | ||
97 | * 1A) ...ref..a....b.... | ||
98 | * 1B) ...ref..b....a.... | ||
99 | * 2A) ...b....ref..a.... b_n < 0 | ||
100 | * 2B) ...a....ref..b.... a_n > 0 | ||
101 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
102 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
103 | */ | ||
104 | u32 a_n = a - ref; | ||
105 | u32 b_n = b - ref; | ||
106 | if (a_n < b_n) | ||
107 | return -1; | ||
108 | else if (a_n > b_n) | ||
109 | return 1; | ||
110 | else | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
115 | { | ||
116 | struct gk20a_sync_pt_inst *pti = | ||
117 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
118 | return pti->shared; | ||
119 | } | ||
120 | static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
121 | { | ||
122 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
123 | return NULL; | ||
124 | return (struct gk20a_sync_timeline *)obj; | ||
125 | } | ||
126 | |||
127 | static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) | ||
128 | { | ||
129 | struct gk20a_sync_pt *pt = | ||
130 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
131 | struct gk20a *g = pt->g; | ||
132 | |||
133 | if (pt->sema) | ||
134 | nvgpu_semaphore_put(pt->sema); | ||
135 | nvgpu_kfree(g, pt); | ||
136 | } | ||
137 | |||
138 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
139 | struct gk20a *g, | ||
140 | struct gk20a_sync_timeline *obj, | ||
141 | struct nvgpu_semaphore *sema) | ||
142 | { | ||
143 | struct gk20a_sync_pt *shared; | ||
144 | |||
145 | shared = nvgpu_kzalloc(g, sizeof(*shared)); | ||
146 | if (!shared) | ||
147 | return NULL; | ||
148 | |||
149 | nvgpu_ref_init(&shared->refcount); | ||
150 | shared->g = g; | ||
151 | shared->obj = obj; | ||
152 | shared->sema = sema; | ||
153 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
154 | |||
155 | nvgpu_spinlock_init(&shared->lock); | ||
156 | |||
157 | nvgpu_semaphore_get(sema); | ||
158 | |||
159 | return shared; | ||
160 | } | ||
161 | |||
162 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
163 | struct gk20a *g, | ||
164 | struct gk20a_sync_timeline *obj, | ||
165 | struct nvgpu_semaphore *sema) | ||
166 | { | ||
167 | struct gk20a_sync_pt_inst *pti; | ||
168 | |||
169 | pti = (struct gk20a_sync_pt_inst *) | ||
170 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
171 | if (!pti) | ||
172 | return NULL; | ||
173 | |||
174 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); | ||
175 | if (!pti->shared) { | ||
176 | sync_pt_free(&pti->pt); | ||
177 | return NULL; | ||
178 | } | ||
179 | return &pti->pt; | ||
180 | } | ||
181 | |||
182 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
183 | { | ||
184 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
185 | if (pt) | ||
186 | nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
187 | } | ||
188 | |||
189 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
190 | { | ||
191 | struct gk20a_sync_pt_inst *pti; | ||
192 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
193 | |||
194 | pti = (struct gk20a_sync_pt_inst *) | ||
195 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
196 | if (!pti) | ||
197 | return NULL; | ||
198 | pti->shared = pt; | ||
199 | nvgpu_ref_get(&pt->refcount); | ||
200 | return &pti->pt; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * This function must be able to run on the same sync_pt concurrently. This | ||
205 | * requires a lock to protect access to the sync_pt's internal data structures | ||
206 | * which are modified as a side effect of calling this function. | ||
207 | */ | ||
208 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
209 | { | ||
210 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
211 | struct gk20a_sync_timeline *obj = pt->obj; | ||
212 | bool signaled = true; | ||
213 | |||
214 | nvgpu_spinlock_acquire(&pt->lock); | ||
215 | if (!pt->sema) | ||
216 | goto done; | ||
217 | |||
218 | /* Acquired == not realeased yet == active == not signaled. */ | ||
219 | signaled = !nvgpu_semaphore_is_acquired(pt->sema); | ||
220 | |||
221 | if (signaled) { | ||
222 | /* Update min if necessary. */ | ||
223 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
224 | obj->min) == 1) | ||
225 | obj->min = pt->thresh; | ||
226 | |||
227 | /* Release the semaphore to the pool. */ | ||
228 | nvgpu_semaphore_put(pt->sema); | ||
229 | pt->sema = NULL; | ||
230 | } | ||
231 | done: | ||
232 | nvgpu_spinlock_release(&pt->lock); | ||
233 | |||
234 | return signaled; | ||
235 | } | ||
236 | |||
237 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
238 | { | ||
239 | bool a_expired; | ||
240 | bool b_expired; | ||
241 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
242 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
243 | |||
244 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
245 | return 0; | ||
246 | |||
247 | /* Early out */ | ||
248 | if (a == b) | ||
249 | return 0; | ||
250 | |||
251 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
252 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
253 | if (a_expired && !b_expired) { | ||
254 | /* Easy, a was earlier */ | ||
255 | return -1; | ||
256 | } else if (!a_expired && b_expired) { | ||
257 | /* Easy, b was earlier */ | ||
258 | return 1; | ||
259 | } | ||
260 | |||
261 | /* Both a and b are expired (trigger before min) or not | ||
262 | * expired (trigger after min), so we can use min | ||
263 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
264 | */ | ||
265 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
266 | pt_a->thresh, pt_b->thresh); | ||
267 | } | ||
268 | |||
269 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
270 | { | ||
271 | return obj->min; | ||
272 | } | ||
273 | |||
274 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
275 | char *str, int size) | ||
276 | { | ||
277 | struct gk20a_sync_timeline *obj = | ||
278 | (struct gk20a_sync_timeline *)timeline; | ||
279 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
280 | } | ||
281 | |||
282 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
283 | char *str, int size) | ||
284 | { | ||
285 | struct nvgpu_semaphore *s = pt->sema; | ||
286 | |||
287 | snprintf(str, size, "S: pool=%llu [v=%u,r_v=%u]", | ||
288 | s->location.pool->page_idx, | ||
289 | nvgpu_semaphore_get_value(s), | ||
290 | nvgpu_semaphore_read(s)); | ||
291 | } | ||
292 | |||
293 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
294 | int size) | ||
295 | { | ||
296 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
297 | |||
298 | if (pt->sema) { | ||
299 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | snprintf(str, size, "%d", pt->thresh); | ||
304 | } | ||
305 | |||
306 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
307 | .driver_name = "nvgpu_semaphore", | ||
308 | .dup = gk20a_sync_pt_dup_inst, | ||
309 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
310 | .compare = gk20a_sync_pt_compare, | ||
311 | .free_pt = gk20a_sync_pt_free_inst, | ||
312 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
313 | .pt_value_str = gk20a_sync_pt_value_str, | ||
314 | }; | ||
315 | |||
316 | /* Public API */ | ||
317 | |||
318 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
319 | { | ||
320 | struct sync_fence *fence = sync_fence_fdget(fd); | ||
321 | int i; | ||
322 | |||
323 | if (!fence) | ||
324 | return NULL; | ||
325 | |||
326 | for (i = 0; i < fence->num_fences; i++) { | ||
327 | struct sync_pt *spt = sync_pt_from_fence(fence->cbs[i].sync_pt); | ||
328 | struct sync_timeline *t; | ||
329 | |||
330 | if (spt == NULL) { | ||
331 | sync_fence_put(fence); | ||
332 | return NULL; | ||
333 | } | ||
334 | |||
335 | t = sync_pt_parent(spt); | ||
336 | if (t->ops != &gk20a_sync_timeline_ops) { | ||
337 | sync_fence_put(fence); | ||
338 | return NULL; | ||
339 | } | ||
340 | } | ||
341 | |||
342 | return fence; | ||
343 | } | ||
344 | |||
345 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) | ||
346 | { | ||
347 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); | ||
348 | struct nvgpu_semaphore *sema; | ||
349 | |||
350 | nvgpu_spinlock_acquire(&pt->lock); | ||
351 | sema = pt->sema; | ||
352 | if (sema) | ||
353 | nvgpu_semaphore_get(sema); | ||
354 | nvgpu_spinlock_release(&pt->lock); | ||
355 | |||
356 | return sema; | ||
357 | } | ||
358 | |||
359 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
360 | { | ||
361 | sync_timeline_signal(timeline, 0); | ||
362 | } | ||
363 | |||
364 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
365 | { | ||
366 | sync_timeline_destroy(timeline); | ||
367 | } | ||
368 | |||
369 | struct sync_timeline *gk20a_sync_timeline_create( | ||
370 | const char *name) | ||
371 | { | ||
372 | struct gk20a_sync_timeline *obj; | ||
373 | |||
374 | obj = (struct gk20a_sync_timeline *) | ||
375 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
376 | sizeof(struct gk20a_sync_timeline), | ||
377 | name); | ||
378 | if (!obj) | ||
379 | return NULL; | ||
380 | obj->max = 0; | ||
381 | obj->min = 0; | ||
382 | return &obj->obj; | ||
383 | } | ||
384 | |||
385 | struct sync_fence *gk20a_sync_fence_create( | ||
386 | struct channel_gk20a *c, | ||
387 | struct nvgpu_semaphore *sema, | ||
388 | const char *fmt, ...) | ||
389 | { | ||
390 | char name[30]; | ||
391 | va_list args; | ||
392 | struct sync_pt *pt; | ||
393 | struct sync_fence *fence; | ||
394 | struct gk20a *g = c->g; | ||
395 | |||
396 | struct nvgpu_channel_linux *os_channel_priv = c->os_priv; | ||
397 | struct nvgpu_os_fence_framework *fence_framework = NULL; | ||
398 | struct gk20a_sync_timeline *timeline = NULL; | ||
399 | |||
400 | fence_framework = &os_channel_priv->fence_framework; | ||
401 | |||
402 | timeline = to_gk20a_timeline(fence_framework->timeline); | ||
403 | |||
404 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); | ||
405 | if (pt == NULL) | ||
406 | return NULL; | ||
407 | |||
408 | va_start(args, fmt); | ||
409 | vsnprintf(name, sizeof(name), fmt, args); | ||
410 | va_end(args); | ||
411 | |||
412 | fence = sync_fence_create(name, pt); | ||
413 | if (fence == NULL) { | ||
414 | sync_pt_free(pt); | ||
415 | return NULL; | ||
416 | } | ||
417 | return fence; | ||
418 | } | ||
diff --git a/include/os/linux/sync_sema_android.h b/include/os/linux/sync_sema_android.h new file mode 100644 index 0000000..4fca7be --- /dev/null +++ b/include/os/linux/sync_sema_android.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * Semaphore Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _GK20A_SYNC_H_ | ||
20 | #define _GK20A_SYNC_H_ | ||
21 | |||
22 | struct sync_timeline; | ||
23 | struct sync_fence; | ||
24 | struct sync_pt; | ||
25 | struct nvgpu_semaphore; | ||
26 | struct fence; | ||
27 | |||
28 | #ifdef CONFIG_SYNC | ||
29 | struct sync_timeline *gk20a_sync_timeline_create(const char *name); | ||
30 | void gk20a_sync_timeline_destroy(struct sync_timeline *); | ||
31 | void gk20a_sync_timeline_signal(struct sync_timeline *); | ||
32 | struct sync_fence *gk20a_sync_fence_create( | ||
33 | struct channel_gk20a *c, | ||
34 | struct nvgpu_semaphore *, | ||
35 | const char *fmt, ...); | ||
36 | struct sync_fence *gk20a_sync_fence_fdget(int fd); | ||
37 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt); | ||
38 | #else | ||
39 | static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} | ||
40 | static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} | ||
41 | static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
42 | { | ||
43 | return NULL; | ||
44 | } | ||
45 | static inline struct sync_timeline *gk20a_sync_timeline_create( | ||
46 | const char *name) { | ||
47 | return NULL; | ||
48 | } | ||
49 | #endif | ||
50 | |||
51 | #endif | ||
diff --git a/include/os/linux/sysfs.c b/include/os/linux/sysfs.c new file mode 100644 index 0000000..221ea0c --- /dev/null +++ b/include/os/linux/sysfs.c | |||
@@ -0,0 +1,1275 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/device.h> | ||
18 | #include <linux/pm_runtime.h> | ||
19 | #include <linux/fb.h> | ||
20 | |||
21 | #include <nvgpu/kmem.h> | ||
22 | #include <nvgpu/nvhost.h> | ||
23 | #include <nvgpu/ptimer.h> | ||
24 | #include <nvgpu/power_features/cg.h> | ||
25 | #include <nvgpu/power_features/pg.h> | ||
26 | |||
27 | #include "os_linux.h" | ||
28 | #include "sysfs.h" | ||
29 | #include "platform_gk20a.h" | ||
30 | #include "gk20a/gr_gk20a.h" | ||
31 | #include "gv11b/gr_gv11b.h" | ||
32 | |||
33 | #define PTIMER_FP_FACTOR 1000000 | ||
34 | |||
35 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) | ||
36 | |||
37 | #define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0 | ||
38 | |||
39 | static ssize_t elcg_enable_store(struct device *dev, | ||
40 | struct device_attribute *attr, const char *buf, size_t count) | ||
41 | { | ||
42 | struct gk20a *g = get_gk20a(dev); | ||
43 | unsigned long val = 0; | ||
44 | int err; | ||
45 | |||
46 | if (kstrtoul(buf, 10, &val) < 0) | ||
47 | return -EINVAL; | ||
48 | |||
49 | err = gk20a_busy(g); | ||
50 | if (err) | ||
51 | return err; | ||
52 | |||
53 | if (val) { | ||
54 | nvgpu_cg_elcg_set_elcg_enabled(g, true); | ||
55 | } else { | ||
56 | nvgpu_cg_elcg_set_elcg_enabled(g, false); | ||
57 | } | ||
58 | |||
59 | gk20a_idle(g); | ||
60 | |||
61 | nvgpu_info(g, "ELCG is %s.", val ? "enabled" : | ||
62 | "disabled"); | ||
63 | |||
64 | return count; | ||
65 | } | ||
66 | |||
67 | static ssize_t elcg_enable_read(struct device *dev, | ||
68 | struct device_attribute *attr, char *buf) | ||
69 | { | ||
70 | struct gk20a *g = get_gk20a(dev); | ||
71 | |||
72 | return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0); | ||
73 | } | ||
74 | |||
75 | static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store); | ||
76 | |||
77 | static ssize_t blcg_enable_store(struct device *dev, | ||
78 | struct device_attribute *attr, const char *buf, size_t count) | ||
79 | { | ||
80 | struct gk20a *g = get_gk20a(dev); | ||
81 | unsigned long val = 0; | ||
82 | int err; | ||
83 | |||
84 | if (kstrtoul(buf, 10, &val) < 0) | ||
85 | return -EINVAL; | ||
86 | |||
87 | err = gk20a_busy(g); | ||
88 | if (err) | ||
89 | return err; | ||
90 | |||
91 | if (val) { | ||
92 | nvgpu_cg_blcg_set_blcg_enabled(g, true); | ||
93 | } else { | ||
94 | nvgpu_cg_blcg_set_blcg_enabled(g, false); | ||
95 | } | ||
96 | |||
97 | gk20a_idle(g); | ||
98 | |||
99 | nvgpu_info(g, "BLCG is %s.", val ? "enabled" : | ||
100 | "disabled"); | ||
101 | |||
102 | return count; | ||
103 | } | ||
104 | |||
105 | static ssize_t blcg_enable_read(struct device *dev, | ||
106 | struct device_attribute *attr, char *buf) | ||
107 | { | ||
108 | struct gk20a *g = get_gk20a(dev); | ||
109 | |||
110 | return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0); | ||
111 | } | ||
112 | |||
113 | |||
114 | static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store); | ||
115 | |||
116 | static ssize_t slcg_enable_store(struct device *dev, | ||
117 | struct device_attribute *attr, const char *buf, size_t count) | ||
118 | { | ||
119 | struct gk20a *g = get_gk20a(dev); | ||
120 | unsigned long val = 0; | ||
121 | int err; | ||
122 | |||
123 | if (kstrtoul(buf, 10, &val) < 0) | ||
124 | return -EINVAL; | ||
125 | |||
126 | err = gk20a_busy(g); | ||
127 | if (err) { | ||
128 | return err; | ||
129 | } | ||
130 | |||
131 | if (val) { | ||
132 | nvgpu_cg_slcg_set_slcg_enabled(g, true); | ||
133 | } else { | ||
134 | nvgpu_cg_slcg_set_slcg_enabled(g, false); | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * TODO: slcg_therm_load_gating is not enabled anywhere during | ||
139 | * init. Therefore, it would be incongruous to add it here. Once | ||
140 | * it is added to init, we should add it here too. | ||
141 | */ | ||
142 | gk20a_idle(g); | ||
143 | |||
144 | nvgpu_info(g, "SLCG is %s.", val ? "enabled" : | ||
145 | "disabled"); | ||
146 | |||
147 | return count; | ||
148 | } | ||
149 | |||
150 | static ssize_t slcg_enable_read(struct device *dev, | ||
151 | struct device_attribute *attr, char *buf) | ||
152 | { | ||
153 | struct gk20a *g = get_gk20a(dev); | ||
154 | |||
155 | return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0); | ||
156 | } | ||
157 | |||
158 | static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store); | ||
159 | |||
160 | static ssize_t ptimer_scale_factor_show(struct device *dev, | ||
161 | struct device_attribute *attr, | ||
162 | char *buf) | ||
163 | { | ||
164 | struct gk20a *g = get_gk20a(dev); | ||
165 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
166 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
167 | u32 scaling_factor_fp; | ||
168 | ssize_t res; | ||
169 | |||
170 | if (!src_freq_hz) { | ||
171 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
172 | return -EINVAL; | ||
173 | } | ||
174 | |||
175 | scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / | ||
176 | ((u32)(src_freq_hz) / | ||
177 | (u32)(PTIMER_FP_FACTOR)); | ||
178 | res = snprintf(buf, | ||
179 | PAGE_SIZE, | ||
180 | "%u.%u\n", | ||
181 | scaling_factor_fp / PTIMER_FP_FACTOR, | ||
182 | scaling_factor_fp % PTIMER_FP_FACTOR); | ||
183 | |||
184 | return res; | ||
185 | |||
186 | } | ||
187 | |||
188 | static DEVICE_ATTR(ptimer_scale_factor, | ||
189 | S_IRUGO, | ||
190 | ptimer_scale_factor_show, | ||
191 | NULL); | ||
192 | |||
193 | static ssize_t ptimer_ref_freq_show(struct device *dev, | ||
194 | struct device_attribute *attr, | ||
195 | char *buf) | ||
196 | { | ||
197 | struct gk20a *g = get_gk20a(dev); | ||
198 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
199 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
200 | ssize_t res; | ||
201 | |||
202 | if (!src_freq_hz) { | ||
203 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
204 | return -EINVAL; | ||
205 | } | ||
206 | |||
207 | res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ); | ||
208 | |||
209 | return res; | ||
210 | |||
211 | } | ||
212 | |||
213 | static DEVICE_ATTR(ptimer_ref_freq, | ||
214 | S_IRUGO, | ||
215 | ptimer_ref_freq_show, | ||
216 | NULL); | ||
217 | |||
218 | static ssize_t ptimer_src_freq_show(struct device *dev, | ||
219 | struct device_attribute *attr, | ||
220 | char *buf) | ||
221 | { | ||
222 | struct gk20a *g = get_gk20a(dev); | ||
223 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
224 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
225 | ssize_t res; | ||
226 | |||
227 | if (!src_freq_hz) { | ||
228 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
229 | return -EINVAL; | ||
230 | } | ||
231 | |||
232 | res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz); | ||
233 | |||
234 | return res; | ||
235 | |||
236 | } | ||
237 | |||
238 | static DEVICE_ATTR(ptimer_src_freq, | ||
239 | S_IRUGO, | ||
240 | ptimer_src_freq_show, | ||
241 | NULL); | ||
242 | |||
243 | |||
244 | static ssize_t gpu_powered_on_show(struct device *dev, | ||
245 | struct device_attribute *attr, | ||
246 | char *buf) | ||
247 | { | ||
248 | struct gk20a *g = get_gk20a(dev); | ||
249 | |||
250 | return snprintf(buf, PAGE_SIZE, "%u\n", g->power_on); | ||
251 | } | ||
252 | |||
253 | static DEVICE_ATTR(gpu_powered_on, S_IRUGO, gpu_powered_on_show, NULL); | ||
254 | |||
255 | #if defined(CONFIG_PM) | ||
256 | static ssize_t railgate_enable_store(struct device *dev, | ||
257 | struct device_attribute *attr, const char *buf, size_t count) | ||
258 | { | ||
259 | unsigned long railgate_enable = 0; | ||
260 | /* dev is guaranteed to be valid here. Ok to de-reference */ | ||
261 | struct gk20a *g = get_gk20a(dev); | ||
262 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
263 | bool enabled = nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE); | ||
264 | int err; | ||
265 | |||
266 | if (kstrtoul(buf, 10, &railgate_enable) < 0) | ||
267 | return -EINVAL; | ||
268 | |||
269 | /* convert to boolean */ | ||
270 | railgate_enable = !!railgate_enable; | ||
271 | |||
272 | /* writing same value should be treated as nop and successful */ | ||
273 | if (railgate_enable == enabled) | ||
274 | goto out; | ||
275 | |||
276 | if (!platform->can_railgate_init) { | ||
277 | nvgpu_err(g, "Railgating is not supported"); | ||
278 | return -EINVAL; | ||
279 | } | ||
280 | |||
281 | if (railgate_enable) { | ||
282 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, true); | ||
283 | pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); | ||
284 | } else { | ||
285 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
286 | pm_runtime_set_autosuspend_delay(dev, -1); | ||
287 | } | ||
288 | /* wake-up system to make rail-gating setting effective */ | ||
289 | err = gk20a_busy(g); | ||
290 | if (err) | ||
291 | return err; | ||
292 | gk20a_idle(g); | ||
293 | |||
294 | out: | ||
295 | nvgpu_info(g, "railgate is %s.", | ||
296 | nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ? | ||
297 | "enabled" : "disabled"); | ||
298 | |||
299 | return count; | ||
300 | } | ||
301 | |||
302 | static ssize_t railgate_enable_read(struct device *dev, | ||
303 | struct device_attribute *attr, char *buf) | ||
304 | { | ||
305 | struct gk20a *g = get_gk20a(dev); | ||
306 | |||
307 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
308 | nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ? 1 : 0); | ||
309 | } | ||
310 | |||
311 | static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read, | ||
312 | railgate_enable_store); | ||
313 | #endif | ||
314 | |||
315 | static ssize_t railgate_delay_store(struct device *dev, | ||
316 | struct device_attribute *attr, | ||
317 | const char *buf, size_t count) | ||
318 | { | ||
319 | int railgate_delay = 0, ret = 0; | ||
320 | struct gk20a *g = get_gk20a(dev); | ||
321 | int err; | ||
322 | |||
323 | if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) { | ||
324 | nvgpu_info(g, "does not support power-gating"); | ||
325 | return count; | ||
326 | } | ||
327 | |||
328 | ret = sscanf(buf, "%d", &railgate_delay); | ||
329 | if (ret == 1 && railgate_delay >= 0) { | ||
330 | g->railgate_delay = railgate_delay; | ||
331 | pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); | ||
332 | } else | ||
333 | nvgpu_err(g, "Invalid powergate delay"); | ||
334 | |||
335 | /* wake-up system to make rail-gating delay effective immediately */ | ||
336 | err = gk20a_busy(g); | ||
337 | if (err) | ||
338 | return err; | ||
339 | gk20a_idle(g); | ||
340 | |||
341 | return count; | ||
342 | } | ||
343 | static ssize_t railgate_delay_show(struct device *dev, | ||
344 | struct device_attribute *attr, char *buf) | ||
345 | { | ||
346 | struct gk20a *g = get_gk20a(dev); | ||
347 | |||
348 | return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay); | ||
349 | } | ||
350 | static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show, | ||
351 | railgate_delay_store); | ||
352 | |||
353 | static ssize_t is_railgated_show(struct device *dev, | ||
354 | struct device_attribute *attr, char *buf) | ||
355 | { | ||
356 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
357 | bool is_railgated = 0; | ||
358 | |||
359 | if (platform->is_railgated) | ||
360 | is_railgated = platform->is_railgated(dev); | ||
361 | |||
362 | return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no"); | ||
363 | } | ||
364 | static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL); | ||
365 | |||
366 | static ssize_t counters_show(struct device *dev, | ||
367 | struct device_attribute *attr, char *buf) | ||
368 | { | ||
369 | struct gk20a *g = get_gk20a(dev); | ||
370 | u32 busy_cycles, total_cycles; | ||
371 | ssize_t res; | ||
372 | |||
373 | nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); | ||
374 | |||
375 | res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); | ||
376 | |||
377 | return res; | ||
378 | } | ||
379 | static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); | ||
380 | |||
381 | static ssize_t counters_show_reset(struct device *dev, | ||
382 | struct device_attribute *attr, char *buf) | ||
383 | { | ||
384 | ssize_t res = counters_show(dev, attr, buf); | ||
385 | struct gk20a *g = get_gk20a(dev); | ||
386 | |||
387 | nvgpu_pmu_reset_load_counters(g); | ||
388 | |||
389 | return res; | ||
390 | } | ||
391 | static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); | ||
392 | |||
393 | static ssize_t gk20a_load_show(struct device *dev, | ||
394 | struct device_attribute *attr, | ||
395 | char *buf) | ||
396 | { | ||
397 | struct gk20a *g = get_gk20a(dev); | ||
398 | u32 busy_time; | ||
399 | ssize_t res; | ||
400 | int err; | ||
401 | |||
402 | if (!g->power_on) { | ||
403 | busy_time = 0; | ||
404 | } else { | ||
405 | err = gk20a_busy(g); | ||
406 | if (err) | ||
407 | return err; | ||
408 | |||
409 | nvgpu_pmu_load_update(g); | ||
410 | nvgpu_pmu_load_norm(g, &busy_time); | ||
411 | gk20a_idle(g); | ||
412 | } | ||
413 | |||
414 | res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); | ||
415 | |||
416 | return res; | ||
417 | } | ||
418 | static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL); | ||
419 | |||
420 | static ssize_t elpg_enable_store(struct device *dev, | ||
421 | struct device_attribute *attr, const char *buf, size_t count) | ||
422 | { | ||
423 | struct gk20a *g = get_gk20a(dev); | ||
424 | unsigned long val = 0; | ||
425 | int err; | ||
426 | |||
427 | if (kstrtoul(buf, 10, &val) < 0) | ||
428 | return -EINVAL; | ||
429 | |||
430 | if (!g->power_on) { | ||
431 | return -EINVAL; | ||
432 | } else { | ||
433 | err = gk20a_busy(g); | ||
434 | if (err) | ||
435 | return -EAGAIN; | ||
436 | /* | ||
437 | * Since elpg is refcounted, we should not unnecessarily call | ||
438 | * enable/disable if it is already so. | ||
439 | */ | ||
440 | if (val != 0) { | ||
441 | nvgpu_pg_elpg_set_elpg_enabled(g, true); | ||
442 | } else { | ||
443 | nvgpu_pg_elpg_set_elpg_enabled(g, false); | ||
444 | } | ||
445 | gk20a_idle(g); | ||
446 | } | ||
447 | nvgpu_info(g, "ELPG is %s.", val ? "enabled" : | ||
448 | "disabled"); | ||
449 | |||
450 | return count; | ||
451 | } | ||
452 | |||
453 | static ssize_t elpg_enable_read(struct device *dev, | ||
454 | struct device_attribute *attr, char *buf) | ||
455 | { | ||
456 | struct gk20a *g = get_gk20a(dev); | ||
457 | |||
458 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
459 | nvgpu_pg_elpg_is_enabled(g) ? 1 : 0); | ||
460 | } | ||
461 | |||
462 | static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store); | ||
463 | |||
464 | static ssize_t ldiv_slowdown_factor_store(struct device *dev, | ||
465 | struct device_attribute *attr, const char *buf, size_t count) | ||
466 | { | ||
467 | struct gk20a *g = get_gk20a(dev); | ||
468 | unsigned long val = 0; | ||
469 | int err; | ||
470 | |||
471 | if (kstrtoul(buf, 10, &val) < 0) { | ||
472 | nvgpu_err(g, "parse error for input SLOWDOWN factor\n"); | ||
473 | return -EINVAL; | ||
474 | } | ||
475 | |||
476 | if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) { | ||
477 | nvgpu_err(g, "Invalid SLOWDOWN factor\n"); | ||
478 | return -EINVAL; | ||
479 | } | ||
480 | |||
481 | if (val == g->ldiv_slowdown_factor) | ||
482 | return count; | ||
483 | |||
484 | if (!g->power_on) { | ||
485 | g->ldiv_slowdown_factor = val; | ||
486 | } else { | ||
487 | err = gk20a_busy(g); | ||
488 | if (err) | ||
489 | return -EAGAIN; | ||
490 | |||
491 | g->ldiv_slowdown_factor = val; | ||
492 | |||
493 | if (g->ops.pmu.pmu_pg_init_param) | ||
494 | g->ops.pmu.pmu_pg_init_param(g, | ||
495 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS); | ||
496 | |||
497 | gk20a_idle(g); | ||
498 | } | ||
499 | |||
500 | nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor); | ||
501 | |||
502 | return count; | ||
503 | } | ||
504 | |||
505 | static ssize_t ldiv_slowdown_factor_read(struct device *dev, | ||
506 | struct device_attribute *attr, char *buf) | ||
507 | { | ||
508 | struct gk20a *g = get_gk20a(dev); | ||
509 | |||
510 | return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor); | ||
511 | } | ||
512 | |||
513 | static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW, | ||
514 | ldiv_slowdown_factor_read, ldiv_slowdown_factor_store); | ||
515 | |||
516 | static ssize_t mscg_enable_store(struct device *dev, | ||
517 | struct device_attribute *attr, const char *buf, size_t count) | ||
518 | { | ||
519 | struct gk20a *g = get_gk20a(dev); | ||
520 | struct nvgpu_pmu *pmu = &g->pmu; | ||
521 | unsigned long val = 0; | ||
522 | int err; | ||
523 | |||
524 | if (kstrtoul(buf, 10, &val) < 0) | ||
525 | return -EINVAL; | ||
526 | |||
527 | if (!g->power_on) { | ||
528 | g->mscg_enabled = val ? true : false; | ||
529 | } else { | ||
530 | err = gk20a_busy(g); | ||
531 | if (err) | ||
532 | return -EAGAIN; | ||
533 | /* | ||
534 | * Since elpg is refcounted, we should not unnecessarily call | ||
535 | * enable/disable if it is already so. | ||
536 | */ | ||
537 | if (val && !g->mscg_enabled) { | ||
538 | g->mscg_enabled = true; | ||
539 | if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, | ||
540 | PMU_PG_LPWR_FEATURE_MSCG)) { | ||
541 | if (!ACCESS_ONCE(pmu->mscg_stat)) { | ||
542 | WRITE_ONCE(pmu->mscg_stat, | ||
543 | PMU_MSCG_ENABLED); | ||
544 | /* make status visible */ | ||
545 | smp_mb(); | ||
546 | } | ||
547 | } | ||
548 | |||
549 | } else if (!val && g->mscg_enabled) { | ||
550 | if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, | ||
551 | PMU_PG_LPWR_FEATURE_MSCG)) { | ||
552 | nvgpu_pmu_pg_global_enable(g, false); | ||
553 | WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); | ||
554 | /* make status visible */ | ||
555 | smp_mb(); | ||
556 | g->mscg_enabled = false; | ||
557 | if (nvgpu_pg_elpg_is_enabled(g)) { | ||
558 | nvgpu_pg_elpg_enable(g); | ||
559 | } | ||
560 | } | ||
561 | g->mscg_enabled = false; | ||
562 | } | ||
563 | gk20a_idle(g); | ||
564 | } | ||
565 | nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" : | ||
566 | "disabled"); | ||
567 | |||
568 | return count; | ||
569 | } | ||
570 | |||
571 | static ssize_t mscg_enable_read(struct device *dev, | ||
572 | struct device_attribute *attr, char *buf) | ||
573 | { | ||
574 | struct gk20a *g = get_gk20a(dev); | ||
575 | |||
576 | return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0); | ||
577 | } | ||
578 | |||
579 | static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store); | ||
580 | |||
581 | static ssize_t aelpg_param_store(struct device *dev, | ||
582 | struct device_attribute *attr, const char *buf, size_t count) | ||
583 | { | ||
584 | struct gk20a *g = get_gk20a(dev); | ||
585 | int status = 0; | ||
586 | union pmu_ap_cmd ap_cmd; | ||
587 | int *paramlist = (int *)g->pmu.aelpg_param; | ||
588 | u32 defaultparam[5] = { | ||
589 | APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US, | ||
590 | APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US, | ||
591 | APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US, | ||
592 | APCTRL_POWER_BREAKEVEN_DEFAULT_US, | ||
593 | APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT | ||
594 | }; | ||
595 | |||
596 | /* Get each parameter value from input string*/ | ||
597 | sscanf(buf, "%d %d %d %d %d", ¶mlist[0], ¶mlist[1], | ||
598 | ¶mlist[2], ¶mlist[3], ¶mlist[4]); | ||
599 | |||
600 | /* If parameter value is 0 then reset to SW default values*/ | ||
601 | if ((paramlist[0] | paramlist[1] | paramlist[2] | ||
602 | | paramlist[3] | paramlist[4]) == 0x00) { | ||
603 | memcpy(paramlist, defaultparam, sizeof(defaultparam)); | ||
604 | } | ||
605 | |||
606 | /* If aelpg is enabled & pmu is ready then post values to | ||
607 | * PMU else store then post later | ||
608 | */ | ||
609 | if (g->aelpg_enabled && g->pmu.pmu_ready) { | ||
610 | /* Disable AELPG */ | ||
611 | ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; | ||
612 | ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
613 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
614 | |||
615 | /* Enable AELPG */ | ||
616 | nvgpu_aelpg_init(g); | ||
617 | nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); | ||
618 | } | ||
619 | |||
620 | return count; | ||
621 | } | ||
622 | |||
623 | static ssize_t aelpg_param_read(struct device *dev, | ||
624 | struct device_attribute *attr, char *buf) | ||
625 | { | ||
626 | struct gk20a *g = get_gk20a(dev); | ||
627 | |||
628 | return snprintf(buf, PAGE_SIZE, | ||
629 | "%d %d %d %d %d\n", g->pmu.aelpg_param[0], | ||
630 | g->pmu.aelpg_param[1], g->pmu.aelpg_param[2], | ||
631 | g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]); | ||
632 | } | ||
633 | |||
634 | static DEVICE_ATTR(aelpg_param, ROOTRW, | ||
635 | aelpg_param_read, aelpg_param_store); | ||
636 | |||
637 | static ssize_t aelpg_enable_store(struct device *dev, | ||
638 | struct device_attribute *attr, const char *buf, size_t count) | ||
639 | { | ||
640 | struct gk20a *g = get_gk20a(dev); | ||
641 | unsigned long val = 0; | ||
642 | int status = 0; | ||
643 | union pmu_ap_cmd ap_cmd; | ||
644 | int err; | ||
645 | |||
646 | if (kstrtoul(buf, 10, &val) < 0) | ||
647 | return -EINVAL; | ||
648 | |||
649 | err = gk20a_busy(g); | ||
650 | if (err) | ||
651 | return err; | ||
652 | |||
653 | if (g->pmu.pmu_ready) { | ||
654 | if (val && !g->aelpg_enabled) { | ||
655 | g->aelpg_enabled = true; | ||
656 | /* Enable AELPG */ | ||
657 | ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL; | ||
658 | ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
659 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
660 | } else if (!val && g->aelpg_enabled) { | ||
661 | g->aelpg_enabled = false; | ||
662 | /* Disable AELPG */ | ||
663 | ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; | ||
664 | ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
665 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
666 | } | ||
667 | } else { | ||
668 | nvgpu_info(g, "PMU is not ready, AELPG request failed"); | ||
669 | } | ||
670 | gk20a_idle(g); | ||
671 | |||
672 | nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" : | ||
673 | "disabled"); | ||
674 | |||
675 | return count; | ||
676 | } | ||
677 | |||
678 | static ssize_t aelpg_enable_read(struct device *dev, | ||
679 | struct device_attribute *attr, char *buf) | ||
680 | { | ||
681 | struct gk20a *g = get_gk20a(dev); | ||
682 | |||
683 | return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0); | ||
684 | } | ||
685 | |||
686 | static DEVICE_ATTR(aelpg_enable, ROOTRW, | ||
687 | aelpg_enable_read, aelpg_enable_store); | ||
688 | |||
689 | |||
690 | static ssize_t allow_all_enable_read(struct device *dev, | ||
691 | struct device_attribute *attr, char *buf) | ||
692 | { | ||
693 | struct gk20a *g = get_gk20a(dev); | ||
694 | |||
695 | return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0); | ||
696 | } | ||
697 | |||
698 | static ssize_t allow_all_enable_store(struct device *dev, | ||
699 | struct device_attribute *attr, const char *buf, size_t count) | ||
700 | { | ||
701 | struct gk20a *g = get_gk20a(dev); | ||
702 | unsigned long val = 0; | ||
703 | int err; | ||
704 | |||
705 | if (kstrtoul(buf, 10, &val) < 0) | ||
706 | return -EINVAL; | ||
707 | |||
708 | err = gk20a_busy(g); | ||
709 | g->allow_all = (val ? true : false); | ||
710 | gk20a_idle(g); | ||
711 | |||
712 | return count; | ||
713 | } | ||
714 | |||
715 | static DEVICE_ATTR(allow_all, ROOTRW, | ||
716 | allow_all_enable_read, allow_all_enable_store); | ||
717 | |||
718 | static ssize_t emc3d_ratio_store(struct device *dev, | ||
719 | struct device_attribute *attr, const char *buf, size_t count) | ||
720 | { | ||
721 | struct gk20a *g = get_gk20a(dev); | ||
722 | unsigned long val = 0; | ||
723 | |||
724 | if (kstrtoul(buf, 10, &val) < 0) | ||
725 | return -EINVAL; | ||
726 | |||
727 | g->emc3d_ratio = val; | ||
728 | |||
729 | return count; | ||
730 | } | ||
731 | |||
732 | static ssize_t emc3d_ratio_read(struct device *dev, | ||
733 | struct device_attribute *attr, char *buf) | ||
734 | { | ||
735 | struct gk20a *g = get_gk20a(dev); | ||
736 | |||
737 | return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio); | ||
738 | } | ||
739 | |||
740 | static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); | ||
741 | |||
742 | static ssize_t fmax_at_vmin_safe_read(struct device *dev, | ||
743 | struct device_attribute *attr, char *buf) | ||
744 | { | ||
745 | struct gk20a *g = get_gk20a(dev); | ||
746 | unsigned long gpu_fmax_at_vmin_hz = 0; | ||
747 | |||
748 | if (g->ops.clk.get_fmax_at_vmin_safe) | ||
749 | gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g); | ||
750 | |||
751 | return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz)); | ||
752 | } | ||
753 | |||
754 | static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL); | ||
755 | |||
756 | #ifdef CONFIG_PM | ||
757 | static ssize_t force_idle_store(struct device *dev, | ||
758 | struct device_attribute *attr, const char *buf, size_t count) | ||
759 | { | ||
760 | struct gk20a *g = get_gk20a(dev); | ||
761 | unsigned long val = 0; | ||
762 | int err = 0; | ||
763 | |||
764 | if (kstrtoul(buf, 10, &val) < 0) | ||
765 | return -EINVAL; | ||
766 | |||
767 | if (val) { | ||
768 | if (g->forced_idle) | ||
769 | return count; /* do nothing */ | ||
770 | else { | ||
771 | err = __gk20a_do_idle(g, false); | ||
772 | if (!err) { | ||
773 | g->forced_idle = 1; | ||
774 | nvgpu_info(g, "gpu is idle : %d", | ||
775 | g->forced_idle); | ||
776 | } | ||
777 | } | ||
778 | } else { | ||
779 | if (!g->forced_idle) | ||
780 | return count; /* do nothing */ | ||
781 | else { | ||
782 | err = __gk20a_do_unidle(g); | ||
783 | if (!err) { | ||
784 | g->forced_idle = 0; | ||
785 | nvgpu_info(g, "gpu is idle : %d", | ||
786 | g->forced_idle); | ||
787 | } | ||
788 | } | ||
789 | } | ||
790 | |||
791 | return count; | ||
792 | } | ||
793 | |||
794 | static ssize_t force_idle_read(struct device *dev, | ||
795 | struct device_attribute *attr, char *buf) | ||
796 | { | ||
797 | struct gk20a *g = get_gk20a(dev); | ||
798 | |||
799 | return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0); | ||
800 | } | ||
801 | |||
802 | static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); | ||
803 | #endif | ||
804 | |||
805 | static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask) | ||
806 | { | ||
807 | u32 i; | ||
808 | bool valid = false; | ||
809 | |||
810 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) { | ||
811 | if (tpc_mask == g->valid_tpc_mask[i]) { | ||
812 | valid = true; | ||
813 | break; | ||
814 | } | ||
815 | } | ||
816 | return valid; | ||
817 | } | ||
818 | |||
819 | static ssize_t tpc_pg_mask_read(struct device *dev, | ||
820 | struct device_attribute *attr, char *buf) | ||
821 | { | ||
822 | struct gk20a *g = get_gk20a(dev); | ||
823 | |||
824 | return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask); | ||
825 | } | ||
826 | |||
827 | static ssize_t tpc_pg_mask_store(struct device *dev, | ||
828 | struct device_attribute *attr, const char *buf, size_t count) | ||
829 | { | ||
830 | struct gk20a *g = get_gk20a(dev); | ||
831 | struct gr_gk20a *gr = &g->gr; | ||
832 | unsigned long val = 0; | ||
833 | |||
834 | nvgpu_mutex_acquire(&g->tpc_pg_lock); | ||
835 | |||
836 | if (kstrtoul(buf, 10, &val) < 0) { | ||
837 | nvgpu_err(g, "invalid value"); | ||
838 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
839 | return -EINVAL; | ||
840 | } | ||
841 | |||
842 | if (val == g->tpc_pg_mask) { | ||
843 | nvgpu_info(g, "no value change, same mask already set"); | ||
844 | goto exit; | ||
845 | } | ||
846 | |||
847 | if (gr->ctx_vars.golden_image_size) { | ||
848 | nvgpu_err(g, "golden image size already initialized"); | ||
849 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
850 | return -ENODEV; | ||
851 | } | ||
852 | |||
853 | /* checking that the value from userspace is within | ||
854 | * the possible valid TPC configurations. | ||
855 | */ | ||
856 | if (is_tpc_mask_valid(g, (u32)val)) { | ||
857 | g->tpc_pg_mask = val; | ||
858 | } else { | ||
859 | nvgpu_err(g, "TPC-PG mask is invalid"); | ||
860 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
861 | return -EINVAL; | ||
862 | } | ||
863 | exit: | ||
864 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
865 | |||
866 | return count; | ||
867 | } | ||
868 | |||
869 | static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store); | ||
870 | |||
871 | static ssize_t tpc_fs_mask_store(struct device *dev, | ||
872 | struct device_attribute *attr, const char *buf, size_t count) | ||
873 | { | ||
874 | struct gk20a *g = get_gk20a(dev); | ||
875 | unsigned long val = 0; | ||
876 | |||
877 | if (kstrtoul(buf, 10, &val) < 0) | ||
878 | return -EINVAL; | ||
879 | |||
880 | if (!g->gr.gpc_tpc_mask) | ||
881 | return -ENODEV; | ||
882 | |||
883 | if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { | ||
884 | g->gr.gpc_tpc_mask[0] = val; | ||
885 | g->tpc_fs_mask_user = val; | ||
886 | |||
887 | g->ops.gr.set_gpc_tpc_mask(g, 0); | ||
888 | |||
889 | nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image); | ||
890 | g->gr.ctx_vars.local_golden_image = NULL; | ||
891 | g->gr.ctx_vars.golden_image_initialized = false; | ||
892 | g->gr.ctx_vars.golden_image_size = 0; | ||
893 | /* Cause next poweron to reinit just gr */ | ||
894 | g->gr.sw_ready = false; | ||
895 | } | ||
896 | |||
897 | return count; | ||
898 | } | ||
899 | |||
900 | static ssize_t tpc_fs_mask_read(struct device *dev, | ||
901 | struct device_attribute *attr, char *buf) | ||
902 | { | ||
903 | struct gk20a *g = get_gk20a(dev); | ||
904 | struct gr_gk20a *gr = &g->gr; | ||
905 | u32 gpc_index; | ||
906 | u32 tpc_fs_mask = 0; | ||
907 | int err = 0; | ||
908 | |||
909 | err = gk20a_busy(g); | ||
910 | if (err) | ||
911 | return err; | ||
912 | |||
913 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
914 | if (g->ops.gr.get_gpc_tpc_mask) | ||
915 | tpc_fs_mask |= | ||
916 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << | ||
917 | (gr->max_tpc_per_gpc_count * gpc_index); | ||
918 | } | ||
919 | |||
920 | gk20a_idle(g); | ||
921 | |||
922 | return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask); | ||
923 | } | ||
924 | |||
925 | static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store); | ||
926 | |||
927 | static ssize_t min_timeslice_us_read(struct device *dev, | ||
928 | struct device_attribute *attr, char *buf) | ||
929 | { | ||
930 | struct gk20a *g = get_gk20a(dev); | ||
931 | |||
932 | return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us); | ||
933 | } | ||
934 | |||
935 | static ssize_t min_timeslice_us_store(struct device *dev, | ||
936 | struct device_attribute *attr, const char *buf, size_t count) | ||
937 | { | ||
938 | struct gk20a *g = get_gk20a(dev); | ||
939 | unsigned long val; | ||
940 | |||
941 | if (kstrtoul(buf, 10, &val) < 0) | ||
942 | return -EINVAL; | ||
943 | |||
944 | if (val > g->max_timeslice_us) | ||
945 | return -EINVAL; | ||
946 | |||
947 | g->min_timeslice_us = val; | ||
948 | |||
949 | return count; | ||
950 | } | ||
951 | |||
952 | static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read, | ||
953 | min_timeslice_us_store); | ||
954 | |||
955 | static ssize_t max_timeslice_us_read(struct device *dev, | ||
956 | struct device_attribute *attr, char *buf) | ||
957 | { | ||
958 | struct gk20a *g = get_gk20a(dev); | ||
959 | |||
960 | return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us); | ||
961 | } | ||
962 | |||
963 | static ssize_t max_timeslice_us_store(struct device *dev, | ||
964 | struct device_attribute *attr, const char *buf, size_t count) | ||
965 | { | ||
966 | struct gk20a *g = get_gk20a(dev); | ||
967 | unsigned long val; | ||
968 | |||
969 | if (kstrtoul(buf, 10, &val) < 0) | ||
970 | return -EINVAL; | ||
971 | |||
972 | if (val < g->min_timeslice_us) | ||
973 | return -EINVAL; | ||
974 | |||
975 | g->max_timeslice_us = val; | ||
976 | |||
977 | return count; | ||
978 | } | ||
979 | |||
980 | static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read, | ||
981 | max_timeslice_us_store); | ||
982 | |||
983 | static ssize_t czf_bypass_store(struct device *dev, | ||
984 | struct device_attribute *attr, const char *buf, size_t count) | ||
985 | { | ||
986 | struct gk20a *g = get_gk20a(dev); | ||
987 | unsigned long val; | ||
988 | |||
989 | if (kstrtoul(buf, 10, &val) < 0) | ||
990 | return -EINVAL; | ||
991 | |||
992 | if (val >= 4) | ||
993 | return -EINVAL; | ||
994 | |||
995 | g->gr.czf_bypass = val; | ||
996 | |||
997 | return count; | ||
998 | } | ||
999 | |||
1000 | static ssize_t czf_bypass_read(struct device *dev, | ||
1001 | struct device_attribute *attr, char *buf) | ||
1002 | { | ||
1003 | struct gk20a *g = get_gk20a(dev); | ||
1004 | |||
1005 | return sprintf(buf, "%d\n", g->gr.czf_bypass); | ||
1006 | } | ||
1007 | |||
1008 | static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); | ||
1009 | |||
1010 | static ssize_t pd_max_batches_store(struct device *dev, | ||
1011 | struct device_attribute *attr, const char *buf, size_t count) | ||
1012 | { | ||
1013 | struct gk20a *g = get_gk20a(dev); | ||
1014 | unsigned long val; | ||
1015 | |||
1016 | if (kstrtoul(buf, 10, &val) < 0) | ||
1017 | return -EINVAL; | ||
1018 | |||
1019 | if (val > 64) | ||
1020 | return -EINVAL; | ||
1021 | |||
1022 | g->gr.pd_max_batches = val; | ||
1023 | |||
1024 | return count; | ||
1025 | } | ||
1026 | |||
1027 | static ssize_t pd_max_batches_read(struct device *dev, | ||
1028 | struct device_attribute *attr, char *buf) | ||
1029 | { | ||
1030 | struct gk20a *g = get_gk20a(dev); | ||
1031 | |||
1032 | return sprintf(buf, "%d\n", g->gr.pd_max_batches); | ||
1033 | } | ||
1034 | |||
1035 | static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store); | ||
1036 | |||
1037 | static ssize_t gfxp_wfi_timeout_count_store(struct device *dev, | ||
1038 | struct device_attribute *attr, const char *buf, size_t count) | ||
1039 | { | ||
1040 | struct gk20a *g = get_gk20a(dev); | ||
1041 | struct gr_gk20a *gr = &g->gr; | ||
1042 | unsigned long val = 0; | ||
1043 | int err = -1; | ||
1044 | |||
1045 | if (kstrtoul(buf, 10, &val) < 0) | ||
1046 | return -EINVAL; | ||
1047 | |||
1048 | if (g->ops.gr.get_max_gfxp_wfi_timeout_count) { | ||
1049 | if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g)) | ||
1050 | return -EINVAL; | ||
1051 | } | ||
1052 | |||
1053 | gr->gfxp_wfi_timeout_count = val; | ||
1054 | |||
1055 | if (g->ops.gr.init_preemption_state && g->power_on) { | ||
1056 | err = gk20a_busy(g); | ||
1057 | if (err) | ||
1058 | return err; | ||
1059 | |||
1060 | err = gr_gk20a_elpg_protected_call(g, | ||
1061 | g->ops.gr.init_preemption_state(g)); | ||
1062 | |||
1063 | gk20a_idle(g); | ||
1064 | |||
1065 | if (err) | ||
1066 | return err; | ||
1067 | } | ||
1068 | return count; | ||
1069 | } | ||
1070 | |||
1071 | static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev, | ||
1072 | struct device_attribute *attr, const char *buf, size_t count) | ||
1073 | { | ||
1074 | struct gk20a *g = get_gk20a(dev); | ||
1075 | struct gr_gk20a *gr = &g->gr; | ||
1076 | int err = -1; | ||
1077 | |||
1078 | if (count > 0 && buf[0] == 's') | ||
1079 | /* sysclk */ | ||
1080 | gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK; | ||
1081 | else | ||
1082 | /* usec */ | ||
1083 | gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC; | ||
1084 | |||
1085 | if (g->ops.gr.init_preemption_state && g->power_on) { | ||
1086 | err = gk20a_busy(g); | ||
1087 | if (err) | ||
1088 | return err; | ||
1089 | |||
1090 | err = gr_gk20a_elpg_protected_call(g, | ||
1091 | g->ops.gr.init_preemption_state(g)); | ||
1092 | |||
1093 | gk20a_idle(g); | ||
1094 | |||
1095 | if (err) | ||
1096 | return err; | ||
1097 | } | ||
1098 | |||
1099 | return count; | ||
1100 | } | ||
1101 | |||
1102 | static ssize_t gfxp_wfi_timeout_count_read(struct device *dev, | ||
1103 | struct device_attribute *attr, char *buf) | ||
1104 | { | ||
1105 | struct gk20a *g = get_gk20a(dev); | ||
1106 | struct gr_gk20a *gr = &g->gr; | ||
1107 | u32 val = gr->gfxp_wfi_timeout_count; | ||
1108 | |||
1109 | return snprintf(buf, PAGE_SIZE, "%d\n", val); | ||
1110 | } | ||
1111 | |||
1112 | static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev, | ||
1113 | struct device_attribute *attr, char *buf) | ||
1114 | { | ||
1115 | struct gk20a *g = get_gk20a(dev); | ||
1116 | struct gr_gk20a *gr = &g->gr; | ||
1117 | |||
1118 | if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC) | ||
1119 | return snprintf(buf, PAGE_SIZE, "usec\n"); | ||
1120 | else | ||
1121 | return snprintf(buf, PAGE_SIZE, "sysclk\n"); | ||
1122 | } | ||
1123 | |||
1124 | static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH), | ||
1125 | gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store); | ||
1126 | |||
1127 | static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH), | ||
1128 | gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store); | ||
1129 | |||
1130 | static ssize_t comptag_mem_deduct_store(struct device *dev, | ||
1131 | struct device_attribute *attr, | ||
1132 | const char *buf, size_t count) | ||
1133 | { | ||
1134 | struct gk20a *g = get_gk20a(dev); | ||
1135 | unsigned long val; | ||
1136 | |||
1137 | if (kstrtoul(buf, 10, &val) < 0) | ||
1138 | return -EINVAL; | ||
1139 | |||
1140 | if (val >= totalram_size_in_mb) { | ||
1141 | dev_err(dev, "comptag_mem_deduct can not be set above %lu", | ||
1142 | totalram_size_in_mb); | ||
1143 | return -EINVAL; | ||
1144 | } | ||
1145 | |||
1146 | g->gr.comptag_mem_deduct = val; | ||
1147 | /* Deduct the part taken by the running system */ | ||
1148 | g->gr.max_comptag_mem -= val; | ||
1149 | |||
1150 | return count; | ||
1151 | } | ||
1152 | |||
1153 | static ssize_t comptag_mem_deduct_show(struct device *dev, | ||
1154 | struct device_attribute *attr, char *buf) | ||
1155 | { | ||
1156 | struct gk20a *g = get_gk20a(dev); | ||
1157 | |||
1158 | return sprintf(buf, "%d\n", g->gr.comptag_mem_deduct); | ||
1159 | } | ||
1160 | |||
1161 | static DEVICE_ATTR(comptag_mem_deduct, ROOTRW, | ||
1162 | comptag_mem_deduct_show, comptag_mem_deduct_store); | ||
1163 | |||
1164 | void nvgpu_remove_sysfs(struct device *dev) | ||
1165 | { | ||
1166 | device_remove_file(dev, &dev_attr_elcg_enable); | ||
1167 | device_remove_file(dev, &dev_attr_blcg_enable); | ||
1168 | device_remove_file(dev, &dev_attr_slcg_enable); | ||
1169 | device_remove_file(dev, &dev_attr_ptimer_scale_factor); | ||
1170 | device_remove_file(dev, &dev_attr_ptimer_ref_freq); | ||
1171 | device_remove_file(dev, &dev_attr_ptimer_src_freq); | ||
1172 | device_remove_file(dev, &dev_attr_elpg_enable); | ||
1173 | device_remove_file(dev, &dev_attr_mscg_enable); | ||
1174 | device_remove_file(dev, &dev_attr_emc3d_ratio); | ||
1175 | device_remove_file(dev, &dev_attr_ldiv_slowdown_factor); | ||
1176 | |||
1177 | device_remove_file(dev, &dev_attr_fmax_at_vmin_safe); | ||
1178 | |||
1179 | device_remove_file(dev, &dev_attr_counters); | ||
1180 | device_remove_file(dev, &dev_attr_counters_reset); | ||
1181 | device_remove_file(dev, &dev_attr_load); | ||
1182 | device_remove_file(dev, &dev_attr_railgate_delay); | ||
1183 | device_remove_file(dev, &dev_attr_is_railgated); | ||
1184 | #ifdef CONFIG_PM | ||
1185 | device_remove_file(dev, &dev_attr_force_idle); | ||
1186 | device_remove_file(dev, &dev_attr_railgate_enable); | ||
1187 | #endif | ||
1188 | device_remove_file(dev, &dev_attr_aelpg_param); | ||
1189 | device_remove_file(dev, &dev_attr_aelpg_enable); | ||
1190 | device_remove_file(dev, &dev_attr_allow_all); | ||
1191 | device_remove_file(dev, &dev_attr_tpc_fs_mask); | ||
1192 | device_remove_file(dev, &dev_attr_tpc_pg_mask); | ||
1193 | device_remove_file(dev, &dev_attr_min_timeslice_us); | ||
1194 | device_remove_file(dev, &dev_attr_max_timeslice_us); | ||
1195 | |||
1196 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
1197 | nvgpu_nvhost_remove_symlink(get_gk20a(dev)); | ||
1198 | #endif | ||
1199 | |||
1200 | device_remove_file(dev, &dev_attr_czf_bypass); | ||
1201 | device_remove_file(dev, &dev_attr_pd_max_batches); | ||
1202 | device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count); | ||
1203 | device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit); | ||
1204 | device_remove_file(dev, &dev_attr_gpu_powered_on); | ||
1205 | |||
1206 | device_remove_file(dev, &dev_attr_comptag_mem_deduct); | ||
1207 | |||
1208 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
1209 | struct kobject *kobj = &dev->kobj; | ||
1210 | struct device *parent = container_of((kobj->parent), | ||
1211 | struct device, kobj); | ||
1212 | sysfs_remove_link(&parent->kobj, "gpu.0"); | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | int nvgpu_create_sysfs(struct device *dev) | ||
1217 | { | ||
1218 | struct gk20a *g = get_gk20a(dev); | ||
1219 | int error = 0; | ||
1220 | |||
1221 | error |= device_create_file(dev, &dev_attr_elcg_enable); | ||
1222 | error |= device_create_file(dev, &dev_attr_blcg_enable); | ||
1223 | error |= device_create_file(dev, &dev_attr_slcg_enable); | ||
1224 | error |= device_create_file(dev, &dev_attr_ptimer_scale_factor); | ||
1225 | error |= device_create_file(dev, &dev_attr_ptimer_ref_freq); | ||
1226 | error |= device_create_file(dev, &dev_attr_ptimer_src_freq); | ||
1227 | error |= device_create_file(dev, &dev_attr_elpg_enable); | ||
1228 | error |= device_create_file(dev, &dev_attr_mscg_enable); | ||
1229 | error |= device_create_file(dev, &dev_attr_emc3d_ratio); | ||
1230 | error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor); | ||
1231 | |||
1232 | error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe); | ||
1233 | |||
1234 | error |= device_create_file(dev, &dev_attr_counters); | ||
1235 | error |= device_create_file(dev, &dev_attr_counters_reset); | ||
1236 | error |= device_create_file(dev, &dev_attr_load); | ||
1237 | error |= device_create_file(dev, &dev_attr_railgate_delay); | ||
1238 | error |= device_create_file(dev, &dev_attr_is_railgated); | ||
1239 | #ifdef CONFIG_PM | ||
1240 | error |= device_create_file(dev, &dev_attr_force_idle); | ||
1241 | error |= device_create_file(dev, &dev_attr_railgate_enable); | ||
1242 | #endif | ||
1243 | error |= device_create_file(dev, &dev_attr_aelpg_param); | ||
1244 | error |= device_create_file(dev, &dev_attr_aelpg_enable); | ||
1245 | error |= device_create_file(dev, &dev_attr_allow_all); | ||
1246 | error |= device_create_file(dev, &dev_attr_tpc_fs_mask); | ||
1247 | error |= device_create_file(dev, &dev_attr_tpc_pg_mask); | ||
1248 | error |= device_create_file(dev, &dev_attr_min_timeslice_us); | ||
1249 | error |= device_create_file(dev, &dev_attr_max_timeslice_us); | ||
1250 | |||
1251 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
1252 | error |= nvgpu_nvhost_create_symlink(g); | ||
1253 | #endif | ||
1254 | |||
1255 | error |= device_create_file(dev, &dev_attr_czf_bypass); | ||
1256 | error |= device_create_file(dev, &dev_attr_pd_max_batches); | ||
1257 | error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count); | ||
1258 | error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit); | ||
1259 | error |= device_create_file(dev, &dev_attr_gpu_powered_on); | ||
1260 | |||
1261 | error |= device_create_file(dev, &dev_attr_comptag_mem_deduct); | ||
1262 | |||
1263 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
1264 | struct kobject *kobj = &dev->kobj; | ||
1265 | struct device *parent = container_of((kobj->parent), | ||
1266 | struct device, kobj); | ||
1267 | error |= sysfs_create_link(&parent->kobj, | ||
1268 | &dev->kobj, "gpu.0"); | ||
1269 | } | ||
1270 | |||
1271 | if (error) | ||
1272 | nvgpu_err(g, "Failed to create sysfs attributes!\n"); | ||
1273 | |||
1274 | return error; | ||
1275 | } | ||
diff --git a/include/os/linux/sysfs.h b/include/os/linux/sysfs.h new file mode 100644 index 0000000..8092584 --- /dev/null +++ b/include/os/linux/sysfs.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef NVGPU_SYSFS_H | ||
17 | #define NVGPU_SYSFS_H | ||
18 | |||
19 | struct device; | ||
20 | |||
21 | int nvgpu_create_sysfs(struct device *dev); | ||
22 | void nvgpu_remove_sysfs(struct device *dev); | ||
23 | |||
24 | #endif | ||
diff --git a/include/os/linux/thread.c b/include/os/linux/thread.c new file mode 100644 index 0000000..c56bff6 --- /dev/null +++ b/include/os/linux/thread.c | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kthread.h> | ||
18 | |||
19 | #include <nvgpu/thread.h> | ||
20 | #include <nvgpu/timers.h> | ||
21 | |||
22 | int nvgpu_thread_proxy(void *threaddata) | ||
23 | { | ||
24 | struct nvgpu_thread *thread = threaddata; | ||
25 | int ret = thread->fn(thread->data); | ||
26 | |||
27 | thread->running = false; | ||
28 | return ret; | ||
29 | } | ||
30 | |||
31 | int nvgpu_thread_create(struct nvgpu_thread *thread, | ||
32 | void *data, | ||
33 | int (*threadfn)(void *data), const char *name) | ||
34 | { | ||
35 | struct task_struct *task = kthread_create(nvgpu_thread_proxy, | ||
36 | thread, name); | ||
37 | if (IS_ERR(task)) | ||
38 | return PTR_ERR(task); | ||
39 | |||
40 | thread->task = task; | ||
41 | thread->fn = threadfn; | ||
42 | thread->data = data; | ||
43 | thread->running = true; | ||
44 | wake_up_process(task); | ||
45 | return 0; | ||
46 | }; | ||
47 | |||
48 | void nvgpu_thread_stop(struct nvgpu_thread *thread) | ||
49 | { | ||
50 | if (thread->task) { | ||
51 | kthread_stop(thread->task); | ||
52 | thread->task = NULL; | ||
53 | } | ||
54 | }; | ||
55 | |||
56 | bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) | ||
57 | { | ||
58 | return kthread_should_stop(); | ||
59 | }; | ||
60 | |||
61 | bool nvgpu_thread_is_running(struct nvgpu_thread *thread) | ||
62 | { | ||
63 | return ACCESS_ONCE(thread->running); | ||
64 | }; | ||
65 | |||
66 | void nvgpu_thread_join(struct nvgpu_thread *thread) | ||
67 | { | ||
68 | while (ACCESS_ONCE(thread->running)) | ||
69 | nvgpu_msleep(10); | ||
70 | }; | ||
diff --git a/include/os/linux/timers.c b/include/os/linux/timers.c new file mode 100644 index 0000000..018fd2d --- /dev/null +++ b/include/os/linux/timers.c | |||
@@ -0,0 +1,269 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/ktime.h> | ||
18 | #include <linux/delay.h> | ||
19 | |||
20 | #include <nvgpu/timers.h> | ||
21 | #include <nvgpu/soc.h> | ||
22 | #include <nvgpu/gk20a.h> | ||
23 | |||
24 | #include "platform_gk20a.h" | ||
25 | |||
26 | /* | ||
27 | * Returns 1 if the platform is pre-Si and should ignore the timeout checking. | ||
28 | * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the | ||
29 | * timeout check regardless of platform). | ||
30 | */ | ||
31 | static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout) | ||
32 | { | ||
33 | if (timeout->flags & NVGPU_TIMER_NO_PRE_SI) | ||
34 | return 0; | ||
35 | |||
36 | return !nvgpu_platform_is_silicon(timeout->g); | ||
37 | } | ||
38 | |||
39 | /** | ||
40 | * nvgpu_timeout_init - Init timer. | ||
41 | * | ||
42 | * @g - nvgpu device. | ||
43 | * @timeout - The timer. | ||
44 | * @duration - Timeout in milliseconds or number of retries. | ||
45 | * @flags - Flags for timer. | ||
46 | * | ||
47 | * This configures the timeout to start the timeout duration now, i.e: when this | ||
48 | * function is called. Available flags to pass to @flags: | ||
49 | * | ||
50 | * %NVGPU_TIMER_CPU_TIMER | ||
51 | * %NVGPU_TIMER_RETRY_TIMER | ||
52 | * %NVGPU_TIMER_NO_PRE_SI | ||
53 | * %NVGPU_TIMER_SILENT_TIMEOUT | ||
54 | * | ||
55 | * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then | ||
56 | * a CPU timer is used by default. | ||
57 | */ | ||
58 | int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout, | ||
59 | u32 duration, unsigned long flags) | ||
60 | { | ||
61 | if (flags & ~NVGPU_TIMER_FLAG_MASK) | ||
62 | return -EINVAL; | ||
63 | |||
64 | memset(timeout, 0, sizeof(*timeout)); | ||
65 | |||
66 | timeout->g = g; | ||
67 | timeout->flags = flags; | ||
68 | |||
69 | if (flags & NVGPU_TIMER_RETRY_TIMER) | ||
70 | timeout->retries.max = duration; | ||
71 | else | ||
72 | timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(), | ||
73 | (s64)NSEC_PER_MSEC * duration)); | ||
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout, | ||
79 | void *caller, | ||
80 | const char *fmt, va_list args) | ||
81 | { | ||
82 | struct gk20a *g = timeout->g; | ||
83 | ktime_t now = ktime_get(); | ||
84 | |||
85 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
86 | return 0; | ||
87 | |||
88 | if (ktime_after(now, ns_to_ktime(timeout->time))) { | ||
89 | if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { | ||
90 | char buf[128]; | ||
91 | |||
92 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
93 | |||
94 | nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf); | ||
95 | } | ||
96 | |||
97 | return -ETIMEDOUT; | ||
98 | } | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout, | ||
104 | void *caller, | ||
105 | const char *fmt, va_list args) | ||
106 | { | ||
107 | struct gk20a *g = timeout->g; | ||
108 | |||
109 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
110 | return 0; | ||
111 | |||
112 | if (timeout->retries.attempted >= timeout->retries.max) { | ||
113 | if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { | ||
114 | char buf[128]; | ||
115 | |||
116 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
117 | |||
118 | nvgpu_err(g, "No more retries @ %pF %s", caller, buf); | ||
119 | } | ||
120 | |||
121 | return -ETIMEDOUT; | ||
122 | } | ||
123 | |||
124 | timeout->retries.attempted++; | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | /** | ||
130 | * __nvgpu_timeout_expired_msg - Check if a timeout has expired. | ||
131 | * | ||
132 | * @timeout - The timeout to check. | ||
133 | * @caller - Address of the caller of this function. | ||
134 | * @fmt - The fmt string. | ||
135 | * | ||
136 | * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise. | ||
137 | * | ||
138 | * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout | ||
139 | * then a message is printed based on %fmt. | ||
140 | */ | ||
141 | int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout, | ||
142 | void *caller, const char *fmt, ...) | ||
143 | { | ||
144 | int ret; | ||
145 | va_list args; | ||
146 | |||
147 | va_start(args, fmt); | ||
148 | if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) | ||
149 | ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt, | ||
150 | args); | ||
151 | else | ||
152 | ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt, | ||
153 | args); | ||
154 | va_end(args); | ||
155 | |||
156 | return ret; | ||
157 | } | ||
158 | |||
159 | /** | ||
160 | * nvgpu_timeout_peek_expired - Check the status of a timeout. | ||
161 | * | ||
162 | * @timeout - The timeout to check. | ||
163 | * | ||
164 | * Returns non-zero if the timeout is expired, zero otherwise. In the case of | ||
165 | * retry timers this will not increment the underlying retry count. Also if the | ||
166 | * timer has expired no messages will be printed. | ||
167 | * | ||
168 | * This function honors the pre-Si check as well. | ||
169 | */ | ||
170 | int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout) | ||
171 | { | ||
172 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
173 | return 0; | ||
174 | |||
175 | if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) | ||
176 | return timeout->retries.attempted >= timeout->retries.max; | ||
177 | else | ||
178 | return ktime_after(ktime_get(), ns_to_ktime(timeout->time)); | ||
179 | } | ||
180 | |||
181 | /** | ||
182 | * nvgpu_udelay - Delay for some number of microseconds. | ||
183 | * | ||
184 | * @usecs - Microseconds to wait for. | ||
185 | * | ||
186 | * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly | ||
187 | * accurate. This is normally backed by a busy-loop so this means waits should | ||
188 | * be kept short, below 100us. If longer delays are necessary then | ||
189 | * nvgpu_msleep() should be preferred. | ||
190 | * | ||
191 | * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This | ||
192 | * function will attempt to not use a busy-loop. | ||
193 | */ | ||
194 | void nvgpu_udelay(unsigned int usecs) | ||
195 | { | ||
196 | udelay(usecs); | ||
197 | } | ||
198 | |||
199 | /** | ||
200 | * nvgpu_usleep_range - Sleep for a range of microseconds. | ||
201 | * | ||
202 | * @min_us - Minimum wait time. | ||
203 | * @max_us - Maximum wait time. | ||
204 | * | ||
205 | * Wait for some number of microseconds between @min_us and @max_us. This, | ||
206 | * unlike nvgpu_udelay(), will attempt to sleep for the passed number of | ||
207 | * microseconds instead of busy looping. Not all platforms support this, | ||
208 | * and in that case this reduces to nvgpu_udelay(min_us). | ||
209 | * | ||
210 | * Linux note: this is not safe to use in atomic context. If you are in | ||
211 | * atomic context you must use nvgpu_udelay(). | ||
212 | */ | ||
213 | void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us) | ||
214 | { | ||
215 | usleep_range(min_us, max_us); | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * nvgpu_msleep - Sleep for some milliseconds. | ||
220 | * | ||
221 | * @msecs - Sleep for at least this many milliseconds. | ||
222 | * | ||
223 | * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms | ||
224 | * or so) the sleep will be significantly longer due to scheduling overhead and | ||
225 | * mechanics. | ||
226 | */ | ||
227 | void nvgpu_msleep(unsigned int msecs) | ||
228 | { | ||
229 | msleep(msecs); | ||
230 | } | ||
231 | |||
232 | /** | ||
233 | * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock. | ||
234 | * | ||
235 | * Return a clock in millisecond units. The start time of the clock is | ||
236 | * unspecified; the time returned can be compared with older ones to measure | ||
237 | * durations. The source clock does not jump when the system clock is adjusted. | ||
238 | */ | ||
239 | s64 nvgpu_current_time_ms(void) | ||
240 | { | ||
241 | return ktime_to_ms(ktime_get()); | ||
242 | } | ||
243 | |||
244 | /** | ||
245 | * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock. | ||
246 | * | ||
247 | * Return a clock in nanosecond units. The start time of the clock is | ||
248 | * unspecified; the time returned can be compared with older ones to measure | ||
249 | * durations. The source clock does not jump when the system clock is adjusted. | ||
250 | */ | ||
251 | s64 nvgpu_current_time_ns(void) | ||
252 | { | ||
253 | return ktime_to_ns(ktime_get()); | ||
254 | } | ||
255 | |||
256 | /** | ||
257 | * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp. | ||
258 | * | ||
259 | * Return a "high resolution" time stamp. It does not really matter exactly what | ||
260 | * it is, so long as it generally returns unique values and monotonically | ||
261 | * increases - wrap around _is_ possible though in a system running for long | ||
262 | * enough. | ||
263 | * | ||
264 | * Note: what high resolution means is system dependent. | ||
265 | */ | ||
266 | u64 nvgpu_hr_timestamp(void) | ||
267 | { | ||
268 | return get_cycles(); | ||
269 | } | ||
diff --git a/include/os/linux/vgpu/fecs_trace_vgpu.c b/include/os/linux/vgpu/fecs_trace_vgpu.c new file mode 100644 index 0000000..02a381e --- /dev/null +++ b/include/os/linux/vgpu/fecs_trace_vgpu.c | |||
@@ -0,0 +1,225 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <uapi/linux/nvgpu.h> | ||
18 | |||
19 | #include <nvgpu/kmem.h> | ||
20 | #include <nvgpu/bug.h> | ||
21 | #include <nvgpu/enabled.h> | ||
22 | #include <nvgpu/ctxsw_trace.h> | ||
23 | #include <nvgpu/vgpu/vgpu_ivm.h> | ||
24 | #include <nvgpu/vgpu/tegra_vgpu.h> | ||
25 | #include <nvgpu/vgpu/vgpu.h> | ||
26 | #include <nvgpu/gk20a.h> | ||
27 | |||
28 | #include "os/linux/os_linux.h" | ||
29 | #include "gk20a/fecs_trace_gk20a.h" | ||
30 | #include "vgpu/fecs_trace_vgpu.h" | ||
31 | |||
32 | struct vgpu_fecs_trace { | ||
33 | struct tegra_hv_ivm_cookie *cookie; | ||
34 | struct nvgpu_ctxsw_ring_header *header; | ||
35 | struct nvgpu_gpu_ctxsw_trace_entry *entries; | ||
36 | int num_entries; | ||
37 | bool enabled; | ||
38 | void *buf; | ||
39 | }; | ||
40 | |||
41 | int vgpu_fecs_trace_init(struct gk20a *g) | ||
42 | { | ||
43 | struct device *dev = dev_from_gk20a(g); | ||
44 | struct device_node *np = dev->of_node; | ||
45 | struct of_phandle_args args; | ||
46 | struct vgpu_fecs_trace *vcst; | ||
47 | u32 mempool; | ||
48 | int err; | ||
49 | |||
50 | nvgpu_log_fn(g, " "); | ||
51 | |||
52 | vcst = nvgpu_kzalloc(g, sizeof(*vcst)); | ||
53 | if (!vcst) | ||
54 | return -ENOMEM; | ||
55 | |||
56 | err = of_parse_phandle_with_fixed_args(np, | ||
57 | "mempool-fecs-trace", 1, 0, &args); | ||
58 | if (err) { | ||
59 | nvgpu_info(g, "does not support fecs trace"); | ||
60 | goto fail; | ||
61 | } | ||
62 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
63 | |||
64 | mempool = args.args[0]; | ||
65 | vcst->cookie = vgpu_ivm_mempool_reserve(mempool); | ||
66 | if (IS_ERR(vcst->cookie)) { | ||
67 | nvgpu_info(g, | ||
68 | "mempool %u reserve failed", mempool); | ||
69 | vcst->cookie = NULL; | ||
70 | err = -EINVAL; | ||
71 | goto fail; | ||
72 | } | ||
73 | |||
74 | vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie), | ||
75 | vgpu_ivm_get_size(vcst->cookie)); | ||
76 | if (!vcst->buf) { | ||
77 | nvgpu_info(g, "ioremap_cache failed"); | ||
78 | err = -EINVAL; | ||
79 | goto fail; | ||
80 | } | ||
81 | vcst->header = vcst->buf; | ||
82 | vcst->num_entries = vcst->header->num_ents; | ||
83 | if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { | ||
84 | nvgpu_err(g, "entry size mismatch"); | ||
85 | goto fail; | ||
86 | } | ||
87 | vcst->entries = vcst->buf + sizeof(*vcst->header); | ||
88 | g->fecs_trace = (struct gk20a_fecs_trace *)vcst; | ||
89 | |||
90 | return 0; | ||
91 | fail: | ||
92 | iounmap(vcst->buf); | ||
93 | if (vcst->cookie) | ||
94 | vgpu_ivm_mempool_unreserve(vcst->cookie); | ||
95 | nvgpu_kfree(g, vcst); | ||
96 | return err; | ||
97 | } | ||
98 | |||
99 | int vgpu_fecs_trace_deinit(struct gk20a *g) | ||
100 | { | ||
101 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
102 | |||
103 | iounmap(vcst->buf); | ||
104 | vgpu_ivm_mempool_unreserve(vcst->cookie); | ||
105 | nvgpu_kfree(g, vcst); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | int vgpu_fecs_trace_enable(struct gk20a *g) | ||
110 | { | ||
111 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
112 | struct tegra_vgpu_cmd_msg msg = { | ||
113 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, | ||
114 | .handle = vgpu_get_handle(g), | ||
115 | }; | ||
116 | int err; | ||
117 | |||
118 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
119 | err = err ? err : msg.ret; | ||
120 | WARN_ON(err); | ||
121 | vcst->enabled = !err; | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | int vgpu_fecs_trace_disable(struct gk20a *g) | ||
126 | { | ||
127 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
128 | struct tegra_vgpu_cmd_msg msg = { | ||
129 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, | ||
130 | .handle = vgpu_get_handle(g), | ||
131 | }; | ||
132 | int err; | ||
133 | |||
134 | vcst->enabled = false; | ||
135 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
136 | err = err ? err : msg.ret; | ||
137 | WARN_ON(err); | ||
138 | return err; | ||
139 | } | ||
140 | |||
141 | bool vgpu_fecs_trace_is_enabled(struct gk20a *g) | ||
142 | { | ||
143 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
144 | |||
145 | return (vcst && vcst->enabled); | ||
146 | } | ||
147 | |||
148 | int vgpu_fecs_trace_poll(struct gk20a *g) | ||
149 | { | ||
150 | struct tegra_vgpu_cmd_msg msg = { | ||
151 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, | ||
152 | .handle = vgpu_get_handle(g), | ||
153 | }; | ||
154 | int err; | ||
155 | |||
156 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
157 | err = err ? err : msg.ret; | ||
158 | WARN_ON(err); | ||
159 | return err; | ||
160 | } | ||
161 | |||
162 | int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) | ||
163 | { | ||
164 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
165 | |||
166 | *buf = vcst->buf; | ||
167 | *size = vgpu_ivm_get_size(vcst->cookie); | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | int vgpu_free_user_buffer(struct gk20a *g) | ||
172 | { | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) | ||
177 | { | ||
178 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
179 | unsigned long size = vgpu_ivm_get_size(vcst->cookie); | ||
180 | unsigned long vsize = vma->vm_end - vma->vm_start; | ||
181 | |||
182 | size = min(size, vsize); | ||
183 | size = round_up(size, PAGE_SIZE); | ||
184 | |||
185 | return remap_pfn_range(vma, vma->vm_start, | ||
186 | vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT, | ||
187 | size, | ||
188 | vma->vm_page_prot); | ||
189 | } | ||
190 | |||
191 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
192 | int vgpu_fecs_trace_max_entries(struct gk20a *g, | ||
193 | struct nvgpu_gpu_ctxsw_trace_filter *filter) | ||
194 | { | ||
195 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
196 | |||
197 | return vcst->header->num_ents; | ||
198 | } | ||
199 | |||
200 | #if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE | ||
201 | #error "FECS trace filter size mismatch!" | ||
202 | #endif | ||
203 | |||
204 | int vgpu_fecs_trace_set_filter(struct gk20a *g, | ||
205 | struct nvgpu_gpu_ctxsw_trace_filter *filter) | ||
206 | { | ||
207 | struct tegra_vgpu_cmd_msg msg = { | ||
208 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, | ||
209 | .handle = vgpu_get_handle(g), | ||
210 | }; | ||
211 | struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; | ||
212 | int err; | ||
213 | |||
214 | memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); | ||
215 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
216 | err = err ? err : msg.ret; | ||
217 | WARN_ON(err); | ||
218 | return err; | ||
219 | } | ||
220 | |||
221 | void vgpu_fecs_trace_data_update(struct gk20a *g) | ||
222 | { | ||
223 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
224 | } | ||
225 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | ||
diff --git a/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c new file mode 100644 index 0000000..0304bcc --- /dev/null +++ b/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/platform_device.h> | ||
18 | |||
19 | #include <nvgpu/nvhost.h> | ||
20 | #include <nvgpu/gk20a.h> | ||
21 | |||
22 | #include "vgpu/clk_vgpu.h" | ||
23 | #include "os/linux/platform_gk20a.h" | ||
24 | #include "os/linux/os_linux.h" | ||
25 | #include "os/linux/vgpu/vgpu_linux.h" | ||
26 | #include "os/linux/vgpu/platform_vgpu_tegra.h" | ||
27 | |||
28 | static int gv11b_vgpu_probe(struct device *dev) | ||
29 | { | ||
30 | struct platform_device *pdev = to_platform_device(dev); | ||
31 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
32 | struct resource *r; | ||
33 | void __iomem *regs; | ||
34 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g); | ||
35 | struct gk20a *g = platform->g; | ||
36 | int ret; | ||
37 | |||
38 | r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode"); | ||
39 | if (!r) { | ||
40 | nvgpu_err(g, "failed to get usermode regs"); | ||
41 | return -ENXIO; | ||
42 | } | ||
43 | regs = devm_ioremap_resource(dev, r); | ||
44 | if (IS_ERR(regs)) { | ||
45 | nvgpu_err(g, "failed to map usermode regs"); | ||
46 | return PTR_ERR(regs); | ||
47 | } | ||
48 | l->usermode_regs = regs; | ||
49 | |||
50 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
51 | ret = nvgpu_get_nvhost_dev(g); | ||
52 | if (ret) { | ||
53 | l->usermode_regs = NULL; | ||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev, | ||
58 | &g->syncpt_unit_base, | ||
59 | &g->syncpt_unit_size); | ||
60 | if (ret) { | ||
61 | nvgpu_err(g, "Failed to get syncpt interface"); | ||
62 | return -ENOSYS; | ||
63 | } | ||
64 | g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); | ||
65 | nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", | ||
66 | g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size); | ||
67 | #endif | ||
68 | vgpu_init_clk_support(platform->g); | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | struct gk20a_platform gv11b_vgpu_tegra_platform = { | ||
74 | .has_syncpoints = true, | ||
75 | |||
76 | /* power management configuration */ | ||
77 | .can_railgate_init = false, | ||
78 | .can_elpg_init = false, | ||
79 | .enable_slcg = false, | ||
80 | .enable_blcg = false, | ||
81 | .enable_elcg = false, | ||
82 | .enable_elpg = false, | ||
83 | .enable_aelpg = false, | ||
84 | .can_slcg = false, | ||
85 | .can_blcg = false, | ||
86 | .can_elcg = false, | ||
87 | |||
88 | .ch_wdt_timeout_ms = 5000, | ||
89 | |||
90 | .probe = gv11b_vgpu_probe, | ||
91 | |||
92 | .clk_round_rate = vgpu_plat_clk_round_rate, | ||
93 | .get_clk_freqs = vgpu_plat_clk_get_freqs, | ||
94 | |||
95 | /* frequency scaling configuration */ | ||
96 | .devfreq_governor = "userspace", | ||
97 | |||
98 | .virtual_dev = true, | ||
99 | |||
100 | /* power management callbacks */ | ||
101 | .suspend = vgpu_tegra_suspend, | ||
102 | .resume = vgpu_tegra_resume, | ||
103 | }; | ||
diff --git a/include/os/linux/vgpu/platform_vgpu_tegra.c b/include/os/linux/vgpu/platform_vgpu_tegra.c new file mode 100644 index 0000000..948323e --- /dev/null +++ b/include/os/linux/vgpu/platform_vgpu_tegra.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Tegra Virtualized GPU Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <nvgpu/nvhost.h> | ||
20 | #include <nvgpu/gk20a.h> | ||
21 | |||
22 | #include "os/linux/platform_gk20a.h" | ||
23 | #include "vgpu/clk_vgpu.h" | ||
24 | #include "vgpu_linux.h" | ||
25 | |||
26 | static int gk20a_tegra_probe(struct device *dev) | ||
27 | { | ||
28 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
29 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
30 | int ret; | ||
31 | |||
32 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
33 | if (ret) | ||
34 | return ret; | ||
35 | |||
36 | vgpu_init_clk_support(platform->g); | ||
37 | return 0; | ||
38 | #else | ||
39 | return 0; | ||
40 | #endif | ||
41 | } | ||
42 | |||
43 | long vgpu_plat_clk_round_rate(struct device *dev, unsigned long rate) | ||
44 | { | ||
45 | /* server will handle frequency rounding */ | ||
46 | return rate; | ||
47 | } | ||
48 | |||
49 | int vgpu_plat_clk_get_freqs(struct device *dev, unsigned long **freqs, | ||
50 | int *num_freqs) | ||
51 | { | ||
52 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
53 | struct gk20a *g = platform->g; | ||
54 | |||
55 | return vgpu_clk_get_freqs(g, freqs, num_freqs); | ||
56 | } | ||
57 | |||
58 | int vgpu_plat_clk_cap_rate(struct device *dev, unsigned long rate) | ||
59 | { | ||
60 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
61 | struct gk20a *g = platform->g; | ||
62 | |||
63 | return vgpu_clk_cap_rate(g, rate); | ||
64 | } | ||
65 | |||
66 | struct gk20a_platform vgpu_tegra_platform = { | ||
67 | .has_syncpoints = true, | ||
68 | .aggressive_sync_destroy_thresh = 64, | ||
69 | |||
70 | /* power management configuration */ | ||
71 | .can_railgate_init = false, | ||
72 | .can_elpg_init = false, | ||
73 | .enable_slcg = false, | ||
74 | .enable_blcg = false, | ||
75 | .enable_elcg = false, | ||
76 | .enable_elpg = false, | ||
77 | .enable_aelpg = false, | ||
78 | .can_slcg = false, | ||
79 | .can_blcg = false, | ||
80 | .can_elcg = false, | ||
81 | |||
82 | .ch_wdt_timeout_ms = 5000, | ||
83 | |||
84 | .probe = gk20a_tegra_probe, | ||
85 | |||
86 | .clk_round_rate = vgpu_plat_clk_round_rate, | ||
87 | .get_clk_freqs = vgpu_plat_clk_get_freqs, | ||
88 | |||
89 | /* frequency scaling configuration */ | ||
90 | .devfreq_governor = "userspace", | ||
91 | |||
92 | .virtual_dev = true, | ||
93 | |||
94 | /* power management callbacks */ | ||
95 | .suspend = vgpu_tegra_suspend, | ||
96 | .resume = vgpu_tegra_resume, | ||
97 | }; | ||
diff --git a/include/os/linux/vgpu/platform_vgpu_tegra.h b/include/os/linux/vgpu/platform_vgpu_tegra.h new file mode 100644 index 0000000..fef346d --- /dev/null +++ b/include/os/linux/vgpu/platform_vgpu_tegra.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef _VGPU_PLATFORM_H_ | ||
18 | #define _VGPU_PLATFORM_H_ | ||
19 | |||
20 | long vgpu_plat_clk_round_rate(struct device *dev, unsigned long rate); | ||
21 | int vgpu_plat_clk_get_freqs(struct device *dev, unsigned long **freqs, | ||
22 | int *num_freqs); | ||
23 | int vgpu_plat_clk_cap_rate(struct device *dev, unsigned long rate); | ||
24 | #endif | ||
diff --git a/include/os/linux/vgpu/sysfs_vgpu.c b/include/os/linux/vgpu/sysfs_vgpu.c new file mode 100644 index 0000000..ade5d82 --- /dev/null +++ b/include/os/linux/vgpu/sysfs_vgpu.c | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/device.h> | ||
18 | #include <nvgpu/vgpu/vgpu.h> | ||
19 | |||
20 | #include "os/linux/platform_gk20a.h" | ||
21 | #include "os/linux/os_linux.h" | ||
22 | #include "vgpu/ecc_vgpu.h" | ||
23 | |||
24 | static ssize_t vgpu_load_show(struct device *dev, | ||
25 | struct device_attribute *attr, | ||
26 | char *buf) | ||
27 | { | ||
28 | struct gk20a *g = get_gk20a(dev); | ||
29 | struct tegra_vgpu_cmd_msg msg = {0}; | ||
30 | struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load; | ||
31 | int err; | ||
32 | |||
33 | msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD; | ||
34 | msg.handle = vgpu_get_handle(g); | ||
35 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
36 | if (err) | ||
37 | return err; | ||
38 | |||
39 | return snprintf(buf, PAGE_SIZE, "%u\n", p->load); | ||
40 | } | ||
41 | static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); | ||
42 | |||
43 | static ssize_t vgpu_ecc_stat_show(struct device *dev, | ||
44 | struct device_attribute *attr, | ||
45 | char *buf) | ||
46 | { | ||
47 | struct gk20a *g = get_gk20a(dev); | ||
48 | struct tegra_vgpu_cmd_msg msg = {0}; | ||
49 | struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter; | ||
50 | struct dev_ext_attribute *ext_attr = container_of(attr, | ||
51 | struct dev_ext_attribute, attr); | ||
52 | struct vgpu_ecc_stat *ecc_stat = ext_attr->var; | ||
53 | int err; | ||
54 | |||
55 | p->ecc_id = ecc_stat->ecc_id; | ||
56 | |||
57 | msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE; | ||
58 | msg.handle = vgpu_get_handle(g); | ||
59 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
60 | err = err ? err : msg.ret; | ||
61 | if (unlikely(err)) { | ||
62 | nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err); | ||
63 | return err; | ||
64 | } | ||
65 | |||
66 | return snprintf(buf, PAGE_SIZE, "%u\n", p->value); | ||
67 | } | ||
68 | |||
69 | static int vgpu_create_ecc_sysfs(struct device *dev) | ||
70 | { | ||
71 | struct gk20a *g = get_gk20a(dev); | ||
72 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
73 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
74 | struct vgpu_ecc_stat *stats; | ||
75 | struct dev_ext_attribute *attrs; | ||
76 | int err, i, count; | ||
77 | |||
78 | err = vgpu_ecc_get_info(g); | ||
79 | if (unlikely(err)) { | ||
80 | nvgpu_err(g, "ecc: cannot get ECC info: %d", err); | ||
81 | return err; | ||
82 | } | ||
83 | |||
84 | stats = priv->ecc_stats; | ||
85 | count = priv->ecc_stats_count; | ||
86 | |||
87 | attrs = nvgpu_kzalloc(g, count * sizeof(*attrs)); | ||
88 | if (unlikely(!attrs)) { | ||
89 | nvgpu_err(g, "ecc: no memory"); | ||
90 | vgpu_ecc_remove_info(g); | ||
91 | return -ENOMEM; | ||
92 | } | ||
93 | |||
94 | for (i = 0; i < count; i++) { | ||
95 | sysfs_attr_init(&attrs[i].attr.attr); | ||
96 | attrs[i].attr.attr.name = stats[i].name; | ||
97 | attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); | ||
98 | attrs[i].attr.show = vgpu_ecc_stat_show; | ||
99 | attrs[i].attr.store = NULL; | ||
100 | attrs[i].var = &stats[i]; | ||
101 | |||
102 | err = device_create_file(dev, &attrs[i].attr); | ||
103 | if (unlikely(err)) { | ||
104 | nvgpu_warn(g, "ecc: cannot create file \"%s\": %d", | ||
105 | stats[i].name, err); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | l->ecc_attrs = attrs; | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static void vgpu_remove_ecc_sysfs(struct device *dev) | ||
114 | { | ||
115 | struct gk20a *g = get_gk20a(dev); | ||
116 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
117 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
118 | int i; | ||
119 | |||
120 | if (l->ecc_attrs) { | ||
121 | for (i = 0; i < priv->ecc_stats_count; i++) | ||
122 | device_remove_file(dev, &l->ecc_attrs[i].attr); | ||
123 | |||
124 | nvgpu_kfree(g, l->ecc_attrs); | ||
125 | l->ecc_attrs = NULL; | ||
126 | } | ||
127 | |||
128 | vgpu_ecc_remove_info(g); | ||
129 | } | ||
130 | |||
131 | void vgpu_create_sysfs(struct device *dev) | ||
132 | { | ||
133 | if (device_create_file(dev, &dev_attr_load)) | ||
134 | dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); | ||
135 | |||
136 | vgpu_create_ecc_sysfs(dev); | ||
137 | } | ||
138 | |||
139 | void vgpu_remove_sysfs(struct device *dev) | ||
140 | { | ||
141 | device_remove_file(dev, &dev_attr_load); | ||
142 | vgpu_remove_ecc_sysfs(dev); | ||
143 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_ivc.c b/include/os/linux/vgpu/vgpu_ivc.c new file mode 100644 index 0000000..950f0d4 --- /dev/null +++ b/include/os/linux/vgpu/vgpu_ivc.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/types.h> | ||
18 | #include <linux/tegra_gr_comm.h> | ||
19 | |||
20 | #include "os/linux/os_linux.h" | ||
21 | |||
22 | int vgpu_ivc_init(struct gk20a *g, u32 elems, | ||
23 | const size_t *queue_sizes, u32 queue_start, u32 num_queues) | ||
24 | { | ||
25 | struct platform_device *pdev = to_platform_device(dev_from_gk20a(g)); | ||
26 | |||
27 | return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start, | ||
28 | num_queues); | ||
29 | } | ||
30 | |||
31 | void vgpu_ivc_deinit(u32 queue_start, u32 num_queues) | ||
32 | { | ||
33 | tegra_gr_comm_deinit(queue_start, num_queues); | ||
34 | } | ||
35 | |||
36 | void vgpu_ivc_release(void *handle) | ||
37 | { | ||
38 | tegra_gr_comm_release(handle); | ||
39 | } | ||
40 | |||
41 | u32 vgpu_ivc_get_server_vmid(void) | ||
42 | { | ||
43 | return tegra_gr_comm_get_server_vmid(); | ||
44 | } | ||
45 | |||
46 | int vgpu_ivc_recv(u32 index, void **handle, void **data, | ||
47 | size_t *size, u32 *sender) | ||
48 | { | ||
49 | return tegra_gr_comm_recv(index, handle, data, size, sender); | ||
50 | } | ||
51 | |||
52 | int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size) | ||
53 | { | ||
54 | return tegra_gr_comm_send(peer, index, data, size); | ||
55 | } | ||
56 | |||
57 | int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle, | ||
58 | void **data, size_t *size) | ||
59 | { | ||
60 | return tegra_gr_comm_sendrecv(peer, index, handle, data, size); | ||
61 | } | ||
62 | |||
63 | u32 vgpu_ivc_get_peer_self(void) | ||
64 | { | ||
65 | return TEGRA_GR_COMM_ID_SELF; | ||
66 | } | ||
67 | |||
68 | void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr, | ||
69 | size_t *size) | ||
70 | { | ||
71 | return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size); | ||
72 | } | ||
73 | |||
74 | void vgpu_ivc_oob_put_ptr(void *handle) | ||
75 | { | ||
76 | tegra_gr_comm_oob_put_ptr(handle); | ||
77 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_ivm.c b/include/os/linux/vgpu/vgpu_ivm.c new file mode 100644 index 0000000..bbd444d --- /dev/null +++ b/include/os/linux/vgpu/vgpu_ivm.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/vgpu/vgpu_ivm.h> | ||
18 | |||
19 | #include <linux/tegra-ivc.h> | ||
20 | |||
21 | #include "os/linux/os_linux.h" | ||
22 | |||
23 | struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id) | ||
24 | { | ||
25 | return tegra_hv_mempool_reserve(id); | ||
26 | } | ||
27 | |||
28 | int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie) | ||
29 | { | ||
30 | return tegra_hv_mempool_unreserve(cookie); | ||
31 | } | ||
32 | |||
33 | u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie) | ||
34 | { | ||
35 | return cookie->ipa; | ||
36 | } | ||
37 | |||
38 | u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie) | ||
39 | { | ||
40 | return cookie->size; | ||
41 | } | ||
42 | |||
43 | void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie) | ||
44 | { | ||
45 | return ioremap_cache(vgpu_ivm_get_ipa(cookie), | ||
46 | vgpu_ivm_get_size(cookie)); | ||
47 | } | ||
48 | |||
49 | void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie, | ||
50 | void *addr) | ||
51 | { | ||
52 | iounmap(addr); | ||
53 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_linux.c b/include/os/linux/vgpu/vgpu_linux.c new file mode 100644 index 0000000..80bcfff --- /dev/null +++ b/include/os/linux/vgpu/vgpu_linux.c | |||
@@ -0,0 +1,525 @@ | |||
1 | /* | ||
2 | * Virtualized GPU for Linux | ||
3 | * | ||
4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/mm.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/dma-mapping.h> | ||
22 | #include <linux/pm_runtime.h> | ||
23 | #include <linux/pm_qos.h> | ||
24 | #include <linux/platform_device.h> | ||
25 | #include <soc/tegra/chip-id.h> | ||
26 | |||
27 | #include <nvgpu/kmem.h> | ||
28 | #include <nvgpu/bug.h> | ||
29 | #include <nvgpu/enabled.h> | ||
30 | #include <nvgpu/debug.h> | ||
31 | #include <nvgpu/soc.h> | ||
32 | #include <nvgpu/ctxsw_trace.h> | ||
33 | #include <nvgpu/defaults.h> | ||
34 | #include <nvgpu/ltc.h> | ||
35 | #include <nvgpu/channel.h> | ||
36 | #include <nvgpu/clk_arb.h> | ||
37 | |||
38 | #include "vgpu_linux.h" | ||
39 | #include "vgpu/fecs_trace_vgpu.h" | ||
40 | #include "vgpu/clk_vgpu.h" | ||
41 | #include "gk20a/regops_gk20a.h" | ||
42 | #include "gm20b/hal_gm20b.h" | ||
43 | |||
44 | #include "os/linux/module.h" | ||
45 | #include "os/linux/os_linux.h" | ||
46 | #include "os/linux/ioctl.h" | ||
47 | #include "os/linux/scale.h" | ||
48 | #include "os/linux/driver_common.h" | ||
49 | #include "os/linux/platform_gk20a.h" | ||
50 | #include "os/linux/vgpu/platform_vgpu_tegra.h" | ||
51 | |||
52 | struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g) | ||
53 | { | ||
54 | struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g)); | ||
55 | |||
56 | return (struct vgpu_priv_data *)plat->vgpu_priv; | ||
57 | } | ||
58 | |||
59 | static void vgpu_remove_support(struct gk20a *g) | ||
60 | { | ||
61 | vgpu_remove_support_common(g); | ||
62 | } | ||
63 | |||
64 | static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) | ||
65 | { | ||
66 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
67 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
68 | |||
69 | nvgpu_mutex_init(&g->power_lock); | ||
70 | nvgpu_mutex_init(&g->ctxsw_disable_lock); | ||
71 | nvgpu_mutex_init(&g->clk_arb_enable_lock); | ||
72 | nvgpu_mutex_init(&g->cg_pg_lock); | ||
73 | |||
74 | nvgpu_mutex_init(&priv->vgpu_clk_get_freq_lock); | ||
75 | |||
76 | nvgpu_mutex_init(&l->ctrl.privs_lock); | ||
77 | nvgpu_init_list_node(&l->ctrl.privs); | ||
78 | |||
79 | l->regs_saved = l->regs; | ||
80 | l->bar1_saved = l->bar1; | ||
81 | |||
82 | nvgpu_atomic_set(&g->clk_arb_global_nr, 0); | ||
83 | |||
84 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | ||
85 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | ||
86 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); | ||
87 | g->ptimer_src_freq = platform->ptimer_src_freq; | ||
88 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); | ||
89 | g->railgate_delay = platform->railgate_delay_init; | ||
90 | |||
91 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, | ||
92 | platform->unify_address_spaces); | ||
93 | } | ||
94 | |||
95 | static int vgpu_init_support(struct platform_device *pdev) | ||
96 | { | ||
97 | struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
98 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
99 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
100 | void __iomem *regs; | ||
101 | int err = 0; | ||
102 | |||
103 | if (!r) { | ||
104 | nvgpu_err(g, "failed to get gk20a bar1"); | ||
105 | err = -ENXIO; | ||
106 | goto fail; | ||
107 | } | ||
108 | |||
109 | if (r->name && !strcmp(r->name, "/vgpu")) { | ||
110 | regs = devm_ioremap_resource(&pdev->dev, r); | ||
111 | if (IS_ERR(regs)) { | ||
112 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
113 | err = PTR_ERR(regs); | ||
114 | goto fail; | ||
115 | } | ||
116 | l->bar1 = regs; | ||
117 | l->bar1_mem = r; | ||
118 | } | ||
119 | |||
120 | nvgpu_mutex_init(&g->dbg_sessions_lock); | ||
121 | nvgpu_mutex_init(&g->client_lock); | ||
122 | |||
123 | nvgpu_init_list_node(&g->profiler_objects); | ||
124 | |||
125 | g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); | ||
126 | if (!g->dbg_regops_tmp_buf) { | ||
127 | nvgpu_err(g, "couldn't allocate regops tmp buf"); | ||
128 | return -ENOMEM; | ||
129 | } | ||
130 | g->dbg_regops_tmp_buf_ops = | ||
131 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
132 | |||
133 | g->remove_support = vgpu_remove_support; | ||
134 | return 0; | ||
135 | |||
136 | fail: | ||
137 | vgpu_remove_support(g); | ||
138 | return err; | ||
139 | } | ||
140 | |||
141 | int vgpu_pm_prepare_poweroff(struct device *dev) | ||
142 | { | ||
143 | struct gk20a *g = get_gk20a(dev); | ||
144 | int ret = 0; | ||
145 | |||
146 | nvgpu_log_fn(g, " "); | ||
147 | |||
148 | nvgpu_mutex_acquire(&g->power_lock); | ||
149 | |||
150 | if (!g->power_on) | ||
151 | goto done; | ||
152 | |||
153 | if (g->ops.fifo.channel_suspend) | ||
154 | ret = g->ops.fifo.channel_suspend(g); | ||
155 | if (ret) | ||
156 | goto done; | ||
157 | |||
158 | g->power_on = false; | ||
159 | done: | ||
160 | nvgpu_mutex_release(&g->power_lock); | ||
161 | |||
162 | return ret; | ||
163 | } | ||
164 | |||
165 | int vgpu_pm_finalize_poweron(struct device *dev) | ||
166 | { | ||
167 | struct gk20a *g = get_gk20a(dev); | ||
168 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
169 | int err = 0; | ||
170 | |||
171 | nvgpu_log_fn(g, " "); | ||
172 | |||
173 | nvgpu_mutex_acquire(&g->power_lock); | ||
174 | |||
175 | if (g->power_on) | ||
176 | goto done; | ||
177 | |||
178 | g->power_on = true; | ||
179 | |||
180 | vgpu_detect_chip(g); | ||
181 | err = vgpu_init_hal(g); | ||
182 | if (err) | ||
183 | goto done; | ||
184 | |||
185 | if (g->ops.ltc.init_fs_state) | ||
186 | g->ops.ltc.init_fs_state(g); | ||
187 | |||
188 | err = nvgpu_init_ltc_support(g); | ||
189 | if (err) { | ||
190 | nvgpu_err(g, "failed to init ltc"); | ||
191 | goto done; | ||
192 | } | ||
193 | |||
194 | err = vgpu_init_mm_support(g); | ||
195 | if (err) { | ||
196 | nvgpu_err(g, "failed to init gk20a mm"); | ||
197 | goto done; | ||
198 | } | ||
199 | |||
200 | err = vgpu_init_fifo_support(g); | ||
201 | if (err) { | ||
202 | nvgpu_err(g, "failed to init gk20a fifo"); | ||
203 | goto done; | ||
204 | } | ||
205 | |||
206 | err = vgpu_init_gr_support(g); | ||
207 | if (err) { | ||
208 | nvgpu_err(g, "failed to init gk20a gr"); | ||
209 | goto done; | ||
210 | } | ||
211 | |||
212 | err = nvgpu_clk_arb_init_arbiter(g); | ||
213 | if (err) { | ||
214 | nvgpu_err(g, "failed to init clk arb"); | ||
215 | goto done; | ||
216 | } | ||
217 | |||
218 | err = g->ops.chip_init_gpu_characteristics(g); | ||
219 | if (err) { | ||
220 | nvgpu_err(g, "failed to init gk20a gpu characteristics"); | ||
221 | goto done; | ||
222 | } | ||
223 | |||
224 | err = nvgpu_finalize_poweron_linux(l); | ||
225 | if (err) | ||
226 | goto done; | ||
227 | |||
228 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
229 | gk20a_ctxsw_trace_init(g); | ||
230 | #endif | ||
231 | gk20a_sched_ctrl_init(g); | ||
232 | gk20a_channel_resume(g); | ||
233 | |||
234 | g->sw_ready = true; | ||
235 | |||
236 | done: | ||
237 | if (err) | ||
238 | g->power_on = false; | ||
239 | |||
240 | nvgpu_mutex_release(&g->power_lock); | ||
241 | return err; | ||
242 | } | ||
243 | |||
244 | static int vgpu_qos_notify(struct notifier_block *nb, | ||
245 | unsigned long n, void *data) | ||
246 | { | ||
247 | struct gk20a_scale_profile *profile = | ||
248 | container_of(nb, struct gk20a_scale_profile, | ||
249 | qos_notify_block); | ||
250 | struct gk20a *g = get_gk20a(profile->dev); | ||
251 | u32 max_freq; | ||
252 | int err; | ||
253 | |||
254 | nvgpu_log_fn(g, " "); | ||
255 | |||
256 | max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS); | ||
257 | err = vgpu_plat_clk_cap_rate(profile->dev, max_freq); | ||
258 | if (err) | ||
259 | nvgpu_err(g, "%s failed, err=%d", __func__, err); | ||
260 | |||
261 | return NOTIFY_OK; /* need notify call further */ | ||
262 | } | ||
263 | |||
264 | static int vgpu_pm_qos_init(struct device *dev) | ||
265 | { | ||
266 | struct gk20a *g = get_gk20a(dev); | ||
267 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
268 | |||
269 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) { | ||
270 | if (!profile) | ||
271 | return -EINVAL; | ||
272 | } else { | ||
273 | profile = nvgpu_kzalloc(g, sizeof(*profile)); | ||
274 | if (!profile) | ||
275 | return -ENOMEM; | ||
276 | g->scale_profile = profile; | ||
277 | } | ||
278 | |||
279 | profile->dev = dev; | ||
280 | profile->qos_notify_block.notifier_call = vgpu_qos_notify; | ||
281 | pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
282 | &profile->qos_notify_block); | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | static void vgpu_pm_qos_remove(struct device *dev) | ||
287 | { | ||
288 | struct gk20a *g = get_gk20a(dev); | ||
289 | |||
290 | pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
291 | &g->scale_profile->qos_notify_block); | ||
292 | nvgpu_kfree(g, g->scale_profile); | ||
293 | g->scale_profile = NULL; | ||
294 | } | ||
295 | |||
296 | static int vgpu_pm_init(struct device *dev) | ||
297 | { | ||
298 | struct gk20a *g = get_gk20a(dev); | ||
299 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
300 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
301 | unsigned long *freqs; | ||
302 | int num_freqs; | ||
303 | int err = 0; | ||
304 | |||
305 | nvgpu_log_fn(g, " "); | ||
306 | |||
307 | if (nvgpu_platform_is_simulation(g)) | ||
308 | return 0; | ||
309 | |||
310 | __pm_runtime_disable(dev, false); | ||
311 | |||
312 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
313 | gk20a_scale_init(dev); | ||
314 | |||
315 | if (l->devfreq) { | ||
316 | /* set min/max frequency based on frequency table */ | ||
317 | err = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
318 | if (err) | ||
319 | return err; | ||
320 | |||
321 | if (num_freqs < 1) | ||
322 | return -EINVAL; | ||
323 | |||
324 | l->devfreq->min_freq = freqs[0]; | ||
325 | l->devfreq->max_freq = freqs[num_freqs - 1]; | ||
326 | } | ||
327 | |||
328 | err = vgpu_pm_qos_init(dev); | ||
329 | if (err) | ||
330 | return err; | ||
331 | |||
332 | return err; | ||
333 | } | ||
334 | |||
335 | int vgpu_probe(struct platform_device *pdev) | ||
336 | { | ||
337 | struct nvgpu_os_linux *l; | ||
338 | struct gk20a *gk20a; | ||
339 | int err; | ||
340 | struct device *dev = &pdev->dev; | ||
341 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
342 | struct vgpu_priv_data *priv; | ||
343 | |||
344 | if (!platform) { | ||
345 | dev_err(dev, "no platform data\n"); | ||
346 | return -ENODATA; | ||
347 | } | ||
348 | |||
349 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
350 | if (!l) { | ||
351 | dev_err(dev, "couldn't allocate gk20a support"); | ||
352 | return -ENOMEM; | ||
353 | } | ||
354 | gk20a = &l->g; | ||
355 | |||
356 | nvgpu_log_fn(gk20a, " "); | ||
357 | |||
358 | nvgpu_init_gk20a(gk20a); | ||
359 | |||
360 | nvgpu_kmem_init(gk20a); | ||
361 | |||
362 | err = nvgpu_init_enabled_flags(gk20a); | ||
363 | if (err) { | ||
364 | kfree(gk20a); | ||
365 | return err; | ||
366 | } | ||
367 | |||
368 | l->dev = dev; | ||
369 | if (tegra_platform_is_vdk()) | ||
370 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | ||
371 | |||
372 | gk20a->is_virtual = true; | ||
373 | |||
374 | priv = nvgpu_kzalloc(gk20a, sizeof(*priv)); | ||
375 | if (!priv) { | ||
376 | kfree(gk20a); | ||
377 | return -ENOMEM; | ||
378 | } | ||
379 | |||
380 | platform->g = gk20a; | ||
381 | platform->vgpu_priv = priv; | ||
382 | |||
383 | err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); | ||
384 | if (err) | ||
385 | return err; | ||
386 | |||
387 | vgpu_init_support(pdev); | ||
388 | |||
389 | vgpu_init_vars(gk20a, platform); | ||
390 | |||
391 | init_rwsem(&l->busy_lock); | ||
392 | |||
393 | nvgpu_spinlock_init(&gk20a->mc_enable_lock); | ||
394 | |||
395 | gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; | ||
396 | |||
397 | /* Initialize the platform interface. */ | ||
398 | err = platform->probe(dev); | ||
399 | if (err) { | ||
400 | if (err == -EPROBE_DEFER) | ||
401 | nvgpu_info(gk20a, "platform probe failed"); | ||
402 | else | ||
403 | nvgpu_err(gk20a, "platform probe failed"); | ||
404 | return err; | ||
405 | } | ||
406 | |||
407 | if (platform->late_probe) { | ||
408 | err = platform->late_probe(dev); | ||
409 | if (err) { | ||
410 | nvgpu_err(gk20a, "late probe failed"); | ||
411 | return err; | ||
412 | } | ||
413 | } | ||
414 | |||
415 | err = vgpu_comm_init(gk20a); | ||
416 | if (err) { | ||
417 | nvgpu_err(gk20a, "failed to init comm interface"); | ||
418 | return -ENOSYS; | ||
419 | } | ||
420 | |||
421 | priv->virt_handle = vgpu_connect(); | ||
422 | if (!priv->virt_handle) { | ||
423 | nvgpu_err(gk20a, "failed to connect to server node"); | ||
424 | vgpu_comm_deinit(); | ||
425 | return -ENOSYS; | ||
426 | } | ||
427 | |||
428 | err = vgpu_get_constants(gk20a); | ||
429 | if (err) { | ||
430 | vgpu_comm_deinit(); | ||
431 | return err; | ||
432 | } | ||
433 | |||
434 | err = vgpu_pm_init(dev); | ||
435 | if (err) { | ||
436 | nvgpu_err(gk20a, "pm init failed"); | ||
437 | return err; | ||
438 | } | ||
439 | |||
440 | err = nvgpu_thread_create(&priv->intr_handler, gk20a, | ||
441 | vgpu_intr_thread, "gk20a"); | ||
442 | if (err) | ||
443 | return err; | ||
444 | |||
445 | gk20a_debug_init(gk20a, "gpu.0"); | ||
446 | |||
447 | /* Set DMA parameters to allow larger sgt lists */ | ||
448 | dev->dma_parms = &l->dma_parms; | ||
449 | dma_set_max_seg_size(dev, UINT_MAX); | ||
450 | |||
451 | gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; | ||
452 | gk20a->timeouts_disabled_by_user = false; | ||
453 | nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0); | ||
454 | |||
455 | vgpu_create_sysfs(dev); | ||
456 | gk20a_init_gr(gk20a); | ||
457 | |||
458 | nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); | ||
459 | gk20a->gr.max_comptag_mem = totalram_size_in_mb; | ||
460 | |||
461 | nvgpu_ref_init(&gk20a->refcount); | ||
462 | |||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | int vgpu_remove(struct platform_device *pdev) | ||
467 | { | ||
468 | struct device *dev = &pdev->dev; | ||
469 | struct gk20a *g = get_gk20a(dev); | ||
470 | |||
471 | nvgpu_log_fn(g, " "); | ||
472 | |||
473 | vgpu_pm_qos_remove(dev); | ||
474 | if (g->remove_support) | ||
475 | g->remove_support(g); | ||
476 | |||
477 | vgpu_comm_deinit(); | ||
478 | gk20a_sched_ctrl_cleanup(g); | ||
479 | gk20a_user_deinit(dev, &nvgpu_class); | ||
480 | vgpu_remove_sysfs(dev); | ||
481 | gk20a_get_platform(dev)->g = NULL; | ||
482 | gk20a_put(g); | ||
483 | |||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | bool vgpu_is_reduced_bar1(struct gk20a *g) | ||
488 | { | ||
489 | struct fifo_gk20a *f = &g->fifo; | ||
490 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
491 | |||
492 | return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size; | ||
493 | } | ||
494 | |||
495 | int vgpu_tegra_suspend(struct device *dev) | ||
496 | { | ||
497 | struct tegra_vgpu_cmd_msg msg = {}; | ||
498 | struct gk20a *g = get_gk20a(dev); | ||
499 | int err = 0; | ||
500 | |||
501 | msg.cmd = TEGRA_VGPU_CMD_SUSPEND; | ||
502 | msg.handle = vgpu_get_handle(g); | ||
503 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
504 | err = err ? err : msg.ret; | ||
505 | if (err) | ||
506 | nvgpu_err(g, "vGPU suspend failed\n"); | ||
507 | |||
508 | return err; | ||
509 | } | ||
510 | |||
511 | int vgpu_tegra_resume(struct device *dev) | ||
512 | { | ||
513 | struct tegra_vgpu_cmd_msg msg = {}; | ||
514 | struct gk20a *g = get_gk20a(dev); | ||
515 | int err = 0; | ||
516 | |||
517 | msg.cmd = TEGRA_VGPU_CMD_RESUME; | ||
518 | msg.handle = vgpu_get_handle(g); | ||
519 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
520 | err = err ? err : msg.ret; | ||
521 | if (err) | ||
522 | nvgpu_err(g, "vGPU resume failed\n"); | ||
523 | |||
524 | return err; | ||
525 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_linux.h b/include/os/linux/vgpu/vgpu_linux.h new file mode 100644 index 0000000..ff7d3a6 --- /dev/null +++ b/include/os/linux/vgpu/vgpu_linux.h | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Linux Interfaces | ||
3 | * | ||
4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef __VGPU_LINUX_H__ | ||
20 | #define __VGPU_LINUX_H__ | ||
21 | |||
22 | struct device; | ||
23 | struct platform_device; | ||
24 | |||
25 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
26 | |||
27 | #include <nvgpu/vgpu/vgpu.h> | ||
28 | |||
29 | int vgpu_pm_prepare_poweroff(struct device *dev); | ||
30 | int vgpu_pm_finalize_poweron(struct device *dev); | ||
31 | int vgpu_probe(struct platform_device *dev); | ||
32 | int vgpu_remove(struct platform_device *dev); | ||
33 | |||
34 | void vgpu_create_sysfs(struct device *dev); | ||
35 | void vgpu_remove_sysfs(struct device *dev); | ||
36 | |||
37 | int vgpu_tegra_suspend(struct device *dev); | ||
38 | int vgpu_tegra_resume(struct device *dev); | ||
39 | #else | ||
40 | /* define placeholders for functions used outside of vgpu */ | ||
41 | |||
42 | static inline int vgpu_pm_prepare_poweroff(struct device *dev) | ||
43 | { | ||
44 | return -ENOSYS; | ||
45 | } | ||
46 | static inline int vgpu_pm_finalize_poweron(struct device *dev) | ||
47 | { | ||
48 | return -ENOSYS; | ||
49 | } | ||
50 | static inline int vgpu_probe(struct platform_device *dev) | ||
51 | { | ||
52 | return -ENOSYS; | ||
53 | } | ||
54 | static inline int vgpu_remove(struct platform_device *dev) | ||
55 | { | ||
56 | return -ENOSYS; | ||
57 | } | ||
58 | static inline int vgpu_tegra_suspend(struct device *dev) | ||
59 | { | ||
60 | return -ENOSYS; | ||
61 | } | ||
62 | static inline int vgpu_tegra_resume(struct device *dev) | ||
63 | { | ||
64 | return -ENOSYS; | ||
65 | } | ||
66 | #endif | ||
67 | |||
68 | #endif | ||
diff --git a/include/os/linux/vm.c b/include/os/linux/vm.c new file mode 100644 index 0000000..dc807ab --- /dev/null +++ b/include/os/linux/vm.c | |||
@@ -0,0 +1,356 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | #include <linux/scatterlist.h> | ||
19 | #include <uapi/linux/nvgpu.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/rbtree.h> | ||
24 | #include <nvgpu/vm_area.h> | ||
25 | #include <nvgpu/nvgpu_mem.h> | ||
26 | #include <nvgpu/page_allocator.h> | ||
27 | #include <nvgpu/vidmem.h> | ||
28 | #include <nvgpu/utils.h> | ||
29 | #include <nvgpu/gk20a.h> | ||
30 | |||
31 | #include <nvgpu/linux/vm.h> | ||
32 | #include <nvgpu/linux/nvgpu_mem.h> | ||
33 | |||
34 | #include "gk20a/mm_gk20a.h" | ||
35 | |||
36 | #include "platform_gk20a.h" | ||
37 | #include "os_linux.h" | ||
38 | #include "dmabuf.h" | ||
39 | #include "dmabuf_vidmem.h" | ||
40 | |||
41 | static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) | ||
42 | { | ||
43 | u32 core_flags = 0; | ||
44 | |||
45 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | ||
46 | core_flags |= NVGPU_VM_MAP_FIXED_OFFSET; | ||
47 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) | ||
48 | core_flags |= NVGPU_VM_MAP_CACHEABLE; | ||
49 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT) | ||
50 | core_flags |= NVGPU_VM_MAP_IO_COHERENT; | ||
51 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE) | ||
52 | core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE; | ||
53 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC) | ||
54 | core_flags |= NVGPU_VM_MAP_L3_ALLOC; | ||
55 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) | ||
56 | core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; | ||
57 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC) | ||
58 | core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC; | ||
59 | |||
60 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) | ||
61 | nvgpu_warn(g, "Ignoring deprecated flag: " | ||
62 | "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS"); | ||
63 | |||
64 | return core_flags; | ||
65 | } | ||
66 | |||
67 | static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | ||
68 | struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) | ||
69 | { | ||
70 | struct nvgpu_rbtree_node *node = NULL; | ||
71 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
72 | |||
73 | nvgpu_rbtree_enum_start(0, &node, root); | ||
74 | |||
75 | while (node) { | ||
76 | struct nvgpu_mapped_buf *mapped_buffer = | ||
77 | mapped_buffer_from_rbtree_node(node); | ||
78 | |||
79 | if (mapped_buffer->os_priv.dmabuf == dmabuf && | ||
80 | mapped_buffer->kind == kind) | ||
81 | return mapped_buffer; | ||
82 | |||
83 | nvgpu_rbtree_enum_next(&node, node); | ||
84 | } | ||
85 | |||
86 | return NULL; | ||
87 | } | ||
88 | |||
89 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | ||
90 | struct dma_buf **dmabuf, | ||
91 | u64 *offset) | ||
92 | { | ||
93 | struct nvgpu_mapped_buf *mapped_buffer; | ||
94 | struct gk20a *g = gk20a_from_vm(vm); | ||
95 | |||
96 | nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va); | ||
97 | |||
98 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
99 | |||
100 | mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); | ||
101 | if (!mapped_buffer) { | ||
102 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
103 | return -EINVAL; | ||
104 | } | ||
105 | |||
106 | *dmabuf = mapped_buffer->os_priv.dmabuf; | ||
107 | *offset = gpu_va - mapped_buffer->addr; | ||
108 | |||
109 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf) | ||
115 | { | ||
116 | return os_buf->dmabuf->size; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * vm->update_gmmu_lock must be held. This checks to see if we already have | ||
121 | * mapped the passed buffer into this VM. If so, just return the existing | ||
122 | * mapping address. | ||
123 | */ | ||
124 | struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | ||
125 | struct nvgpu_os_buffer *os_buf, | ||
126 | u64 map_addr, | ||
127 | u32 flags, | ||
128 | int kind) | ||
129 | { | ||
130 | struct gk20a *g = gk20a_from_vm(vm); | ||
131 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
132 | |||
133 | if (flags & NVGPU_VM_MAP_FIXED_OFFSET) { | ||
134 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr); | ||
135 | if (!mapped_buffer) | ||
136 | return NULL; | ||
137 | |||
138 | if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf || | ||
139 | mapped_buffer->kind != (u32)kind) | ||
140 | return NULL; | ||
141 | } else { | ||
142 | mapped_buffer = | ||
143 | __nvgpu_vm_find_mapped_buf_reverse(vm, | ||
144 | os_buf->dmabuf, | ||
145 | kind); | ||
146 | if (!mapped_buffer) | ||
147 | return NULL; | ||
148 | } | ||
149 | |||
150 | if (mapped_buffer->flags != flags) | ||
151 | return NULL; | ||
152 | |||
153 | /* | ||
154 | * If we find the mapping here then that means we have mapped it already | ||
155 | * and the prior pin and get must be undone. | ||
156 | */ | ||
157 | gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, | ||
158 | mapped_buffer->os_priv.sgt); | ||
159 | dma_buf_put(os_buf->dmabuf); | ||
160 | |||
161 | nvgpu_log(g, gpu_dbg_map, | ||
162 | "gv: 0x%04x_%08x + 0x%-7zu " | ||
163 | "[dma: 0x%010llx, pa: 0x%010llx] " | ||
164 | "pgsz=%-3dKb as=%-2d " | ||
165 | "flags=0x%x apt=%s (reused)", | ||
166 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | ||
167 | os_buf->dmabuf->size, | ||
168 | (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl), | ||
169 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), | ||
170 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | ||
171 | vm_aspace_id(vm), | ||
172 | mapped_buffer->flags, | ||
173 | nvgpu_aperture_str(g, | ||
174 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
175 | |||
176 | return mapped_buffer; | ||
177 | } | ||
178 | |||
179 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, | ||
180 | struct dma_buf *dmabuf, | ||
181 | u64 map_addr, | ||
182 | u32 flags, | ||
183 | u32 page_size, | ||
184 | s16 compr_kind, | ||
185 | s16 incompr_kind, | ||
186 | int rw_flag, | ||
187 | u64 buffer_offset, | ||
188 | u64 mapping_size, | ||
189 | struct vm_gk20a_mapping_batch *batch, | ||
190 | u64 *gpu_va) | ||
191 | { | ||
192 | struct gk20a *g = gk20a_from_vm(vm); | ||
193 | struct device *dev = dev_from_gk20a(g); | ||
194 | struct nvgpu_os_buffer os_buf; | ||
195 | struct sg_table *sgt; | ||
196 | struct nvgpu_sgt *nvgpu_sgt = NULL; | ||
197 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
198 | struct dma_buf_attachment *attachment; | ||
199 | int err = 0; | ||
200 | |||
201 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | ||
202 | if (IS_ERR(sgt)) { | ||
203 | nvgpu_warn(g, "Failed to pin dma_buf!"); | ||
204 | return PTR_ERR(sgt); | ||
205 | } | ||
206 | os_buf.dmabuf = dmabuf; | ||
207 | os_buf.attachment = attachment; | ||
208 | os_buf.dev = dev; | ||
209 | |||
210 | if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { | ||
211 | err = -EINVAL; | ||
212 | goto clean_up; | ||
213 | } | ||
214 | |||
215 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
216 | if (!nvgpu_sgt) { | ||
217 | err = -ENOMEM; | ||
218 | goto clean_up; | ||
219 | } | ||
220 | |||
221 | mapped_buffer = nvgpu_vm_map(vm, | ||
222 | &os_buf, | ||
223 | nvgpu_sgt, | ||
224 | map_addr, | ||
225 | mapping_size, | ||
226 | buffer_offset, | ||
227 | rw_flag, | ||
228 | flags, | ||
229 | compr_kind, | ||
230 | incompr_kind, | ||
231 | batch, | ||
232 | gk20a_dmabuf_aperture(g, dmabuf)); | ||
233 | |||
234 | nvgpu_sgt_free(g, nvgpu_sgt); | ||
235 | |||
236 | if (IS_ERR(mapped_buffer)) { | ||
237 | err = PTR_ERR(mapped_buffer); | ||
238 | goto clean_up; | ||
239 | } | ||
240 | |||
241 | mapped_buffer->os_priv.dmabuf = dmabuf; | ||
242 | mapped_buffer->os_priv.attachment = attachment; | ||
243 | mapped_buffer->os_priv.sgt = sgt; | ||
244 | |||
245 | *gpu_va = mapped_buffer->addr; | ||
246 | return 0; | ||
247 | |||
248 | clean_up: | ||
249 | gk20a_mm_unpin(dev, dmabuf, attachment, sgt); | ||
250 | |||
251 | return err; | ||
252 | } | ||
253 | |||
254 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | ||
255 | int dmabuf_fd, | ||
256 | u64 *map_addr, | ||
257 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | ||
258 | u32 page_size, | ||
259 | s16 compr_kind, | ||
260 | s16 incompr_kind, | ||
261 | u64 buffer_offset, | ||
262 | u64 mapping_size, | ||
263 | struct vm_gk20a_mapping_batch *batch) | ||
264 | { | ||
265 | struct gk20a *g = gk20a_from_vm(vm); | ||
266 | struct dma_buf *dmabuf; | ||
267 | u64 ret_va; | ||
268 | int err = 0; | ||
269 | |||
270 | /* get ref to the mem handle (released on unmap_locked) */ | ||
271 | dmabuf = dma_buf_get(dmabuf_fd); | ||
272 | if (IS_ERR(dmabuf)) { | ||
273 | nvgpu_warn(g, "%s: fd %d is not a dmabuf", | ||
274 | __func__, dmabuf_fd); | ||
275 | return PTR_ERR(dmabuf); | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * For regular maps we do not accept either an input address or a | ||
280 | * buffer_offset. | ||
281 | */ | ||
282 | if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) && | ||
283 | (buffer_offset || *map_addr)) { | ||
284 | nvgpu_err(g, | ||
285 | "Regular map with addr/buf offset is not supported!"); | ||
286 | dma_buf_put(dmabuf); | ||
287 | return -EINVAL; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Map size is always buffer size for non fixed mappings. As such map | ||
292 | * size should be left as zero by userspace for non-fixed maps. | ||
293 | */ | ||
294 | if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
295 | nvgpu_err(g, "map_size && non-fixed-mapping!"); | ||
296 | dma_buf_put(dmabuf); | ||
297 | return -EINVAL; | ||
298 | } | ||
299 | |||
300 | /* verify that we're not overflowing the buffer, i.e. | ||
301 | * (buffer_offset + mapping_size) > dmabuf->size. | ||
302 | * | ||
303 | * Since buffer_offset + mapping_size could overflow, first check | ||
304 | * that mapping size < dmabuf_size, at which point we can subtract | ||
305 | * mapping_size from both sides for the final comparison. | ||
306 | */ | ||
307 | if ((mapping_size > dmabuf->size) || | ||
308 | (buffer_offset > (dmabuf->size - mapping_size))) { | ||
309 | nvgpu_err(g, | ||
310 | "buf size %llx < (offset(%llx) + map_size(%llx))", | ||
311 | (u64)dmabuf->size, buffer_offset, mapping_size); | ||
312 | dma_buf_put(dmabuf); | ||
313 | return -EINVAL; | ||
314 | } | ||
315 | |||
316 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); | ||
317 | if (err) { | ||
318 | dma_buf_put(dmabuf); | ||
319 | return err; | ||
320 | } | ||
321 | |||
322 | err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr, | ||
323 | nvgpu_vm_translate_linux_flags(g, flags), | ||
324 | page_size, | ||
325 | compr_kind, incompr_kind, | ||
326 | gk20a_mem_flag_none, | ||
327 | buffer_offset, | ||
328 | mapping_size, | ||
329 | batch, | ||
330 | &ret_va); | ||
331 | |||
332 | if (!err) | ||
333 | *map_addr = ret_va; | ||
334 | else | ||
335 | dma_buf_put(dmabuf); | ||
336 | |||
337 | return err; | ||
338 | } | ||
339 | |||
340 | /* | ||
341 | * This is the function call-back for freeing OS specific components of an | ||
342 | * nvgpu_mapped_buf. This should most likely never be called outside of the | ||
343 | * core MM framework! | ||
344 | * | ||
345 | * Note: the VM lock will be held. | ||
346 | */ | ||
347 | void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) | ||
348 | { | ||
349 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
350 | |||
351 | gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, | ||
352 | mapped_buffer->os_priv.attachment, | ||
353 | mapped_buffer->os_priv.sgt); | ||
354 | |||
355 | dma_buf_put(mapped_buffer->os_priv.dmabuf); | ||
356 | } | ||
diff --git a/include/os/linux/vpr.c b/include/os/linux/vpr.c new file mode 100644 index 0000000..3a98125 --- /dev/null +++ b/include/os/linux/vpr.c | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/vpr.h> | ||
15 | |||
16 | #include <linux/init.h> | ||
17 | #include <linux/platform/tegra/common.h> | ||
18 | |||
19 | bool nvgpu_is_vpr_resize_enabled(void) | ||
20 | { | ||
21 | return tegra_is_vpr_resize_supported(); | ||
22 | } | ||