diff options
Diffstat (limited to 'include/os/linux')
146 files changed, 31469 insertions, 0 deletions
diff --git a/include/os/linux/cde.c b/include/os/linux/cde.c new file mode 100644 index 0000000..715513c --- /dev/null +++ b/include/os/linux/cde.c | |||
| @@ -0,0 +1,1794 @@ | |||
| 1 | /* | ||
| 2 | * Color decompression engine support | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/dma-mapping.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/dma-buf.h> | ||
| 22 | #include <uapi/linux/nvgpu.h> | ||
| 23 | |||
| 24 | #include <trace/events/gk20a.h> | ||
| 25 | |||
| 26 | #include <nvgpu/dma.h> | ||
| 27 | #include <nvgpu/gmmu.h> | ||
| 28 | #include <nvgpu/timers.h> | ||
| 29 | #include <nvgpu/nvgpu_common.h> | ||
| 30 | #include <nvgpu/kmem.h> | ||
| 31 | #include <nvgpu/log.h> | ||
| 32 | #include <nvgpu/bug.h> | ||
| 33 | #include <nvgpu/firmware.h> | ||
| 34 | #include <nvgpu/os_sched.h> | ||
| 35 | #include <nvgpu/channel.h> | ||
| 36 | #include <nvgpu/utils.h> | ||
| 37 | #include <nvgpu/gk20a.h> | ||
| 38 | |||
| 39 | #include <nvgpu/linux/vm.h> | ||
| 40 | |||
| 41 | #include "gk20a/mm_gk20a.h" | ||
| 42 | #include "gk20a/fence_gk20a.h" | ||
| 43 | #include "gk20a/gr_gk20a.h" | ||
| 44 | |||
| 45 | #include "cde.h" | ||
| 46 | #include "os_linux.h" | ||
| 47 | #include "dmabuf.h" | ||
| 48 | #include "channel.h" | ||
| 49 | #include "cde_gm20b.h" | ||
| 50 | #include "cde_gp10b.h" | ||
| 51 | |||
| 52 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
| 53 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
| 54 | |||
| 55 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
| 56 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); | ||
| 57 | |||
| 58 | #define CTX_DELETE_TIME 1000 | ||
| 59 | |||
| 60 | #define MAX_CTX_USE_COUNT 42 | ||
| 61 | #define MAX_CTX_RETRY_TIME 2000 | ||
| 62 | |||
| 63 | static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | ||
| 64 | { | ||
| 65 | struct nvgpu_mapped_buf *buffer; | ||
| 66 | dma_addr_t addr = 0; | ||
| 67 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 68 | |||
| 69 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
| 70 | buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); | ||
| 71 | if (buffer) | ||
| 72 | addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); | ||
| 73 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
| 74 | |||
| 75 | return addr; | ||
| 76 | } | ||
| 77 | |||
| 78 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
| 79 | { | ||
| 80 | unsigned int i; | ||
| 81 | |||
| 82 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
| 83 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
| 84 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
| 85 | } | ||
| 86 | |||
| 87 | nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); | ||
| 88 | |||
| 89 | cde_ctx->convert_cmd = NULL; | ||
| 90 | cde_ctx->init_convert_cmd = NULL; | ||
| 91 | cde_ctx->num_bufs = 0; | ||
| 92 | cde_ctx->num_params = 0; | ||
| 93 | cde_ctx->init_cmd_num_entries = 0; | ||
| 94 | cde_ctx->convert_cmd_num_entries = 0; | ||
| 95 | cde_ctx->init_cmd_executed = false; | ||
| 96 | } | ||
| 97 | |||
| 98 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
| 99 | __must_hold(&cde_app->mutex) | ||
| 100 | { | ||
| 101 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 102 | struct gk20a *g = &l->g; | ||
| 103 | struct channel_gk20a *ch = cde_ctx->ch; | ||
| 104 | struct vm_gk20a *vm = ch->vm; | ||
| 105 | |||
| 106 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
| 107 | |||
| 108 | /* release mapped memory */ | ||
| 109 | gk20a_deinit_cde_img(cde_ctx); | ||
| 110 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
| 111 | cde_ctx->backing_store_vaddr); | ||
| 112 | |||
| 113 | /* | ||
| 114 | * free the channel | ||
| 115 | * gk20a_channel_close() will also unbind the channel from TSG | ||
| 116 | */ | ||
| 117 | gk20a_channel_close(ch); | ||
| 118 | nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release); | ||
| 119 | |||
| 120 | /* housekeeping on app */ | ||
| 121 | nvgpu_list_del(&cde_ctx->list); | ||
| 122 | l->cde_app.ctx_count--; | ||
| 123 | nvgpu_kfree(g, cde_ctx); | ||
| 124 | } | ||
| 125 | |||
| 126 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
| 127 | bool wait_finish) | ||
| 128 | __releases(&cde_app->mutex) | ||
| 129 | __acquires(&cde_app->mutex) | ||
| 130 | { | ||
| 131 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
| 132 | |||
| 133 | /* permanent contexts do not have deleter works */ | ||
| 134 | if (!cde_ctx->is_temporary) | ||
| 135 | return; | ||
| 136 | |||
| 137 | if (wait_finish) { | ||
| 138 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 139 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
| 140 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 141 | } else { | ||
| 142 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) | ||
| 147 | __must_hold(&l->cde_app->mutex) | ||
| 148 | { | ||
| 149 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 150 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
| 151 | |||
| 152 | /* safe to go off the mutex in cancel_deleter since app is | ||
| 153 | * deinitialised; no new jobs are started. deleter works may be only at | ||
| 154 | * waiting for the mutex or before, going to abort */ | ||
| 155 | |||
| 156 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
| 157 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
| 158 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
| 159 | gk20a_cde_remove_ctx(cde_ctx); | ||
| 160 | } | ||
| 161 | |||
| 162 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
| 163 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
| 164 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
| 165 | gk20a_cde_remove_ctx(cde_ctx); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | static void gk20a_cde_stop(struct nvgpu_os_linux *l) | ||
| 170 | __must_hold(&l->cde_app->mutex) | ||
| 171 | { | ||
| 172 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 173 | |||
| 174 | /* prevent further conversions and delayed works from working */ | ||
| 175 | cde_app->initialised = false; | ||
| 176 | /* free all data, empty the list */ | ||
| 177 | gk20a_cde_remove_contexts(l); | ||
| 178 | } | ||
| 179 | |||
| 180 | void gk20a_cde_destroy(struct nvgpu_os_linux *l) | ||
| 181 | __acquires(&l->cde_app->mutex) | ||
| 182 | __releases(&l->cde_app->mutex) | ||
| 183 | { | ||
| 184 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 185 | |||
| 186 | if (!cde_app->initialised) | ||
| 187 | return; | ||
| 188 | |||
| 189 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 190 | gk20a_cde_stop(l); | ||
| 191 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 192 | |||
| 193 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
| 194 | } | ||
| 195 | |||
| 196 | void gk20a_cde_suspend(struct nvgpu_os_linux *l) | ||
| 197 | __acquires(&l->cde_app->mutex) | ||
| 198 | __releases(&l->cde_app->mutex) | ||
| 199 | { | ||
| 200 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 201 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
| 202 | |||
| 203 | if (!cde_app->initialised) | ||
| 204 | return; | ||
| 205 | |||
| 206 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 207 | |||
| 208 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
| 209 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
| 210 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
| 211 | } | ||
| 212 | |||
| 213 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
| 214 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
| 215 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
| 216 | } | ||
| 217 | |||
| 218 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 219 | |||
| 220 | } | ||
| 221 | |||
| 222 | static int gk20a_cde_create_context(struct nvgpu_os_linux *l) | ||
| 223 | __must_hold(&l->cde_app->mutex) | ||
| 224 | { | ||
| 225 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 226 | struct gk20a_cde_ctx *cde_ctx; | ||
| 227 | |||
| 228 | cde_ctx = gk20a_cde_allocate_context(l); | ||
| 229 | if (IS_ERR(cde_ctx)) | ||
| 230 | return PTR_ERR(cde_ctx); | ||
| 231 | |||
| 232 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
| 233 | cde_app->ctx_count++; | ||
| 234 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
| 235 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
| 236 | |||
| 237 | return 0; | ||
| 238 | } | ||
| 239 | |||
| 240 | static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) | ||
| 241 | __must_hold(&l->cde_app->mutex) | ||
| 242 | { | ||
| 243 | int err; | ||
| 244 | int i; | ||
| 245 | |||
| 246 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
| 247 | err = gk20a_cde_create_context(l); | ||
| 248 | if (err) | ||
| 249 | goto out; | ||
| 250 | } | ||
| 251 | |||
| 252 | return 0; | ||
| 253 | out: | ||
| 254 | gk20a_cde_remove_contexts(l); | ||
| 255 | return err; | ||
| 256 | } | ||
| 257 | |||
| 258 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
| 259 | struct nvgpu_firmware *img, | ||
| 260 | struct gk20a_cde_hdr_buf *buf) | ||
| 261 | { | ||
| 262 | struct nvgpu_mem *mem; | ||
| 263 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 264 | struct gk20a *g = &l->g; | ||
| 265 | int err; | ||
| 266 | |||
| 267 | /* check that the file can hold the buf */ | ||
| 268 | if (buf->data_byte_offset != 0 && | ||
| 269 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
| 270 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
| 271 | cde_ctx->num_bufs); | ||
| 272 | return -EINVAL; | ||
| 273 | } | ||
| 274 | |||
| 275 | /* check that we have enough buf elems available */ | ||
| 276 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
| 277 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
| 278 | cde_ctx->num_bufs); | ||
| 279 | return -ENOMEM; | ||
| 280 | } | ||
| 281 | |||
| 282 | /* allocate buf */ | ||
| 283 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
| 284 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
| 285 | if (err) { | ||
| 286 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
| 287 | cde_ctx->num_bufs); | ||
| 288 | return -ENOMEM; | ||
| 289 | } | ||
| 290 | |||
| 291 | /* copy the content */ | ||
| 292 | if (buf->data_byte_offset != 0) | ||
| 293 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
| 294 | buf->num_bytes); | ||
| 295 | |||
| 296 | cde_ctx->num_bufs++; | ||
| 297 | |||
| 298 | return 0; | ||
| 299 | } | ||
| 300 | |||
| 301 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
| 302 | int type, s32 shift, u64 mask, u64 value) | ||
| 303 | { | ||
| 304 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 305 | struct gk20a *g = &l->g; | ||
| 306 | u32 *target_mem_ptr = target; | ||
| 307 | u64 *target_mem_ptr_u64 = target; | ||
| 308 | u64 current_value, new_value; | ||
| 309 | |||
| 310 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
| 311 | value &= mask; | ||
| 312 | |||
| 313 | /* read current data from the location */ | ||
| 314 | current_value = 0; | ||
| 315 | if (type == TYPE_PARAM_TYPE_U32) { | ||
| 316 | if (mask != 0xfffffffful) | ||
| 317 | current_value = *target_mem_ptr; | ||
| 318 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
| 319 | if (mask != ~0ul) | ||
| 320 | current_value = *target_mem_ptr_u64; | ||
| 321 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
| 322 | current_value = *target_mem_ptr_u64; | ||
| 323 | current_value = (u64)(current_value >> 32) | | ||
| 324 | (u64)(current_value << 32); | ||
| 325 | } else { | ||
| 326 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
| 327 | type); | ||
| 328 | return -EINVAL; | ||
| 329 | } | ||
| 330 | |||
| 331 | current_value &= ~mask; | ||
| 332 | new_value = current_value | value; | ||
| 333 | |||
| 334 | /* store the element data back */ | ||
| 335 | if (type == TYPE_PARAM_TYPE_U32) | ||
| 336 | *target_mem_ptr = (u32)new_value; | ||
| 337 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
| 338 | *target_mem_ptr_u64 = new_value; | ||
| 339 | else { | ||
| 340 | new_value = (u64)(new_value >> 32) | | ||
| 341 | (u64)(new_value << 32); | ||
| 342 | *target_mem_ptr_u64 = new_value; | ||
| 343 | } | ||
| 344 | |||
| 345 | return 0; | ||
| 346 | } | ||
| 347 | |||
| 348 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
| 349 | struct nvgpu_firmware *img, | ||
| 350 | struct gk20a_cde_hdr_replace *replace) | ||
| 351 | { | ||
| 352 | struct nvgpu_mem *source_mem; | ||
| 353 | struct nvgpu_mem *target_mem; | ||
| 354 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 355 | struct gk20a *g = &l->g; | ||
| 356 | u32 *target_mem_ptr; | ||
| 357 | u64 vaddr; | ||
| 358 | int err; | ||
| 359 | |||
| 360 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
| 361 | replace->source_buf >= cde_ctx->num_bufs) { | ||
| 362 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
| 363 | replace->target_buf, replace->source_buf, | ||
| 364 | cde_ctx->num_bufs); | ||
| 365 | return -EINVAL; | ||
| 366 | } | ||
| 367 | |||
| 368 | source_mem = cde_ctx->mem + replace->source_buf; | ||
| 369 | target_mem = cde_ctx->mem + replace->target_buf; | ||
| 370 | target_mem_ptr = target_mem->cpu_va; | ||
| 371 | |||
| 372 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
| 373 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
| 374 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
| 375 | replace->target_byte_offset, | ||
| 376 | replace->source_byte_offset, | ||
| 377 | source_mem->size, | ||
| 378 | target_mem->size); | ||
| 379 | return -EINVAL; | ||
| 380 | } | ||
| 381 | |||
| 382 | /* calculate the target pointer */ | ||
| 383 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
| 384 | |||
| 385 | /* determine patch value */ | ||
| 386 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
| 387 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
| 388 | replace->shift, replace->mask, | ||
| 389 | vaddr); | ||
| 390 | if (err) { | ||
| 391 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
| 392 | err, replace->target_buf, | ||
| 393 | replace->target_byte_offset, | ||
| 394 | replace->source_buf, | ||
| 395 | replace->source_byte_offset); | ||
| 396 | } | ||
| 397 | |||
| 398 | return err; | ||
| 399 | } | ||
| 400 | |||
| 401 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
| 402 | { | ||
| 403 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 404 | struct gk20a *g = &l->g; | ||
| 405 | struct nvgpu_mem *target_mem; | ||
| 406 | u32 *target_mem_ptr; | ||
| 407 | u64 new_data; | ||
| 408 | int user_id = 0, err; | ||
| 409 | unsigned int i; | ||
| 410 | |||
| 411 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
| 412 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
| 413 | target_mem = cde_ctx->mem + param->target_buf; | ||
| 414 | target_mem_ptr = target_mem->cpu_va; | ||
| 415 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
| 416 | |||
| 417 | switch (param->id) { | ||
| 418 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
| 419 | new_data = g->gr.comptags_per_cacheline; | ||
| 420 | break; | ||
| 421 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
| 422 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
| 423 | g->gr.cacheline_size; | ||
| 424 | break; | ||
| 425 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
| 426 | new_data = cde_ctx->surf_param_offset; | ||
| 427 | break; | ||
| 428 | case TYPE_PARAM_NUMPAGES: | ||
| 429 | new_data = cde_ctx->surf_param_lines; | ||
| 430 | break; | ||
| 431 | case TYPE_PARAM_BACKINGSTORE: | ||
| 432 | new_data = cde_ctx->backing_store_vaddr; | ||
| 433 | break; | ||
| 434 | case TYPE_PARAM_DESTINATION: | ||
| 435 | new_data = cde_ctx->compbit_vaddr; | ||
| 436 | break; | ||
| 437 | case TYPE_PARAM_DESTINATION_SIZE: | ||
| 438 | new_data = cde_ctx->compbit_size; | ||
| 439 | break; | ||
| 440 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
| 441 | new_data = g->gr.compbit_store.mem.size; | ||
| 442 | break; | ||
| 443 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
| 444 | new_data = gpuva_to_iova_base(cde_ctx->vm, | ||
| 445 | cde_ctx->surf_vaddr); | ||
| 446 | if (new_data == 0) { | ||
| 447 | nvgpu_warn(g, "cde: failed to find 0x%llx", | ||
| 448 | cde_ctx->surf_vaddr); | ||
| 449 | return -EINVAL; | ||
| 450 | } | ||
| 451 | break; | ||
| 452 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
| 453 | new_data = g->gr.compbit_store.base_hw; | ||
| 454 | break; | ||
| 455 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
| 456 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
| 457 | break; | ||
| 458 | case TYPE_PARAM_SCATTERBUFFER: | ||
| 459 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
| 460 | break; | ||
| 461 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
| 462 | new_data = cde_ctx->scatterbuffer_size; | ||
| 463 | break; | ||
| 464 | default: | ||
| 465 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
| 466 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
| 467 | continue; | ||
| 468 | new_data = cde_ctx->user_param_values[user_id]; | ||
| 469 | } | ||
| 470 | |||
| 471 | nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
| 472 | i, param->id, param->target_buf, | ||
| 473 | param->target_byte_offset, new_data, | ||
| 474 | param->data_offset, param->type, param->shift, | ||
| 475 | param->mask); | ||
| 476 | |||
| 477 | new_data += param->data_offset; | ||
| 478 | |||
| 479 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
| 480 | param->shift, param->mask, new_data); | ||
| 481 | |||
| 482 | if (err) { | ||
| 483 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
| 484 | err, i, param->id, param->target_buf, | ||
| 485 | param->target_byte_offset, new_data); | ||
| 486 | return err; | ||
| 487 | } | ||
| 488 | } | ||
| 489 | |||
| 490 | return 0; | ||
| 491 | } | ||
| 492 | |||
| 493 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
| 494 | struct nvgpu_firmware *img, | ||
| 495 | struct gk20a_cde_hdr_param *param) | ||
| 496 | { | ||
| 497 | struct nvgpu_mem *target_mem; | ||
| 498 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 499 | struct gk20a *g = &l->g; | ||
| 500 | |||
| 501 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
| 502 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
| 503 | cde_ctx->num_params, param->target_buf, | ||
| 504 | cde_ctx->num_bufs); | ||
| 505 | return -EINVAL; | ||
| 506 | } | ||
| 507 | |||
| 508 | target_mem = cde_ctx->mem + param->target_buf; | ||
| 509 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
| 510 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
| 511 | cde_ctx->num_params, param->target_byte_offset, | ||
| 512 | target_mem->size); | ||
| 513 | return -EINVAL; | ||
| 514 | } | ||
| 515 | |||
| 516 | /* does this parameter fit into our parameter structure */ | ||
| 517 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
| 518 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
| 519 | cde_ctx->num_params); | ||
| 520 | return -ENOMEM; | ||
| 521 | } | ||
| 522 | |||
| 523 | /* is the given id valid? */ | ||
| 524 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
| 525 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
| 526 | param->id, cde_ctx->num_params, | ||
| 527 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
| 528 | return -EINVAL; | ||
| 529 | } | ||
| 530 | |||
| 531 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
| 532 | cde_ctx->num_params++; | ||
| 533 | |||
| 534 | return 0; | ||
| 535 | } | ||
| 536 | |||
| 537 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
| 538 | struct nvgpu_firmware *img, | ||
| 539 | u32 required_class) | ||
| 540 | { | ||
| 541 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 542 | struct gk20a *g = &l->g; | ||
| 543 | int err; | ||
| 544 | |||
| 545 | /* CDE enabled */ | ||
| 546 | cde_ctx->ch->cde = true; | ||
| 547 | |||
| 548 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); | ||
| 549 | if (err) { | ||
| 550 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
| 551 | err); | ||
| 552 | return err; | ||
| 553 | } | ||
| 554 | |||
| 555 | return 0; | ||
| 556 | } | ||
| 557 | |||
| 558 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
| 559 | struct nvgpu_firmware *img, | ||
| 560 | u32 op, | ||
| 561 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
| 562 | u32 num_elems) | ||
| 563 | { | ||
| 564 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 565 | struct gk20a *g = &l->g; | ||
| 566 | struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem; | ||
| 567 | u32 *num_entries; | ||
| 568 | unsigned int i; | ||
| 569 | |||
| 570 | /* check command type */ | ||
| 571 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
| 572 | gpfifo = &cde_ctx->init_convert_cmd; | ||
| 573 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
| 574 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
| 575 | gpfifo = &cde_ctx->convert_cmd; | ||
| 576 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
| 577 | } else { | ||
| 578 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
| 579 | op); | ||
| 580 | return -EINVAL; | ||
| 581 | } | ||
| 582 | |||
| 583 | /* allocate gpfifo entries to be pushed */ | ||
| 584 | *gpfifo = nvgpu_kzalloc(g, | ||
| 585 | sizeof(struct nvgpu_gpfifo_entry) * num_elems); | ||
| 586 | if (!*gpfifo) { | ||
| 587 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
| 588 | return -ENOMEM; | ||
| 589 | } | ||
| 590 | |||
| 591 | gpfifo_elem = *gpfifo; | ||
| 592 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
| 593 | struct nvgpu_mem *target_mem; | ||
| 594 | |||
| 595 | /* validate the current entry */ | ||
| 596 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
| 597 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
| 598 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
| 599 | return -EINVAL; | ||
| 600 | } | ||
| 601 | |||
| 602 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
| 603 | if (target_mem->size< | ||
| 604 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
| 605 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
| 606 | target_mem->size, | ||
| 607 | cmd_elem->target_byte_offset, | ||
| 608 | cmd_elem->num_bytes); | ||
| 609 | return -EINVAL; | ||
| 610 | } | ||
| 611 | |||
| 612 | /* store the element into gpfifo */ | ||
| 613 | gpfifo_elem->entry0 = | ||
| 614 | u64_lo32(target_mem->gpu_va + | ||
| 615 | cmd_elem->target_byte_offset); | ||
| 616 | gpfifo_elem->entry1 = | ||
| 617 | u64_hi32(target_mem->gpu_va + | ||
| 618 | cmd_elem->target_byte_offset) | | ||
| 619 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
| 620 | sizeof(u32)); | ||
| 621 | } | ||
| 622 | |||
| 623 | *num_entries = num_elems; | ||
| 624 | return 0; | ||
| 625 | } | ||
| 626 | |||
| 627 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
| 628 | { | ||
| 629 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 630 | struct gk20a *g = &l->g; | ||
| 631 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
| 632 | sizeof(struct nvgpu_gpfifo_entry); | ||
| 633 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
| 634 | sizeof(struct nvgpu_gpfifo_entry); | ||
| 635 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
| 636 | struct nvgpu_gpfifo_entry *combined_cmd; | ||
| 637 | |||
| 638 | /* allocate buffer that has space for both */ | ||
| 639 | combined_cmd = nvgpu_kzalloc(g, total_bytes); | ||
| 640 | if (!combined_cmd) { | ||
| 641 | nvgpu_warn(g, | ||
| 642 | "cde: could not allocate memory for gpfifo entries"); | ||
| 643 | return -ENOMEM; | ||
| 644 | } | ||
| 645 | |||
| 646 | /* move the original init here and append convert */ | ||
| 647 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
| 648 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
| 649 | cde_ctx->convert_cmd, conv_bytes); | ||
| 650 | |||
| 651 | nvgpu_kfree(g, cde_ctx->init_convert_cmd); | ||
| 652 | nvgpu_kfree(g, cde_ctx->convert_cmd); | ||
| 653 | |||
| 654 | cde_ctx->init_convert_cmd = combined_cmd; | ||
| 655 | cde_ctx->convert_cmd = combined_cmd | ||
| 656 | + cde_ctx->init_cmd_num_entries; | ||
| 657 | |||
| 658 | return 0; | ||
| 659 | } | ||
| 660 | |||
| 661 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
| 662 | struct nvgpu_firmware *img) | ||
| 663 | { | ||
| 664 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 665 | struct gk20a *g = &l->g; | ||
| 666 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 667 | u32 *data = (u32 *)img->data; | ||
| 668 | u32 num_of_elems; | ||
| 669 | struct gk20a_cde_hdr_elem *elem; | ||
| 670 | u32 min_size = 0; | ||
| 671 | int err = 0; | ||
| 672 | unsigned int i; | ||
| 673 | |||
| 674 | min_size += 2 * sizeof(u32); | ||
| 675 | if (img->size < min_size) { | ||
| 676 | nvgpu_warn(g, "cde: invalid image header"); | ||
| 677 | return -EINVAL; | ||
| 678 | } | ||
| 679 | |||
| 680 | cde_app->firmware_version = data[0]; | ||
| 681 | num_of_elems = data[1]; | ||
| 682 | |||
| 683 | min_size += num_of_elems * sizeof(*elem); | ||
| 684 | if (img->size < min_size) { | ||
| 685 | nvgpu_warn(g, "cde: bad image"); | ||
| 686 | return -EINVAL; | ||
| 687 | } | ||
| 688 | |||
| 689 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
| 690 | for (i = 0; i < num_of_elems; i++) { | ||
| 691 | int err = 0; | ||
| 692 | switch (elem->type) { | ||
| 693 | case TYPE_BUF: | ||
| 694 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
| 695 | break; | ||
| 696 | case TYPE_REPLACE: | ||
| 697 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
| 698 | &elem->replace); | ||
| 699 | break; | ||
| 700 | case TYPE_PARAM: | ||
| 701 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
| 702 | break; | ||
| 703 | case TYPE_REQUIRED_CLASS: | ||
| 704 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
| 705 | elem->required_class); | ||
| 706 | break; | ||
| 707 | case TYPE_COMMAND: | ||
| 708 | { | ||
| 709 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
| 710 | &img->data[elem->command.data_byte_offset]; | ||
| 711 | err = gk20a_init_cde_command(cde_ctx, img, | ||
| 712 | elem->command.op, cmd, | ||
| 713 | elem->command.num_entries); | ||
| 714 | break; | ||
| 715 | } | ||
| 716 | case TYPE_ARRAY: | ||
| 717 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
| 718 | elem->array.data, | ||
| 719 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
| 720 | break; | ||
| 721 | default: | ||
| 722 | nvgpu_warn(g, "cde: unknown header element"); | ||
| 723 | err = -EINVAL; | ||
| 724 | } | ||
| 725 | |||
| 726 | if (err) | ||
| 727 | goto deinit_image; | ||
| 728 | |||
| 729 | elem++; | ||
| 730 | } | ||
| 731 | |||
| 732 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
| 733 | nvgpu_warn(g, "cde: convert command not defined"); | ||
| 734 | err = -EINVAL; | ||
| 735 | goto deinit_image; | ||
| 736 | } | ||
| 737 | |||
| 738 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
| 739 | nvgpu_warn(g, "cde: convert command not defined"); | ||
| 740 | err = -EINVAL; | ||
| 741 | goto deinit_image; | ||
| 742 | } | ||
| 743 | |||
| 744 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
| 745 | if (err) | ||
| 746 | goto deinit_image; | ||
| 747 | |||
| 748 | return 0; | ||
| 749 | |||
| 750 | deinit_image: | ||
| 751 | gk20a_deinit_cde_img(cde_ctx); | ||
| 752 | return err; | ||
| 753 | } | ||
| 754 | |||
| 755 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
| 756 | u32 op, struct nvgpu_channel_fence *fence, | ||
| 757 | u32 flags, struct gk20a_fence **fence_out) | ||
| 758 | { | ||
| 759 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 760 | struct gk20a *g = &l->g; | ||
| 761 | struct nvgpu_gpfifo_entry *gpfifo = NULL; | ||
| 762 | int num_entries = 0; | ||
| 763 | |||
| 764 | /* check command type */ | ||
| 765 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
| 766 | /* both init and convert combined */ | ||
| 767 | gpfifo = cde_ctx->init_convert_cmd; | ||
| 768 | num_entries = cde_ctx->init_cmd_num_entries | ||
| 769 | + cde_ctx->convert_cmd_num_entries; | ||
| 770 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
| 771 | gpfifo = cde_ctx->convert_cmd; | ||
| 772 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
| 773 | } else if (op == TYPE_BUF_COMMAND_NOOP) { | ||
| 774 | /* Any non-null gpfifo will suffice with 0 num_entries */ | ||
| 775 | gpfifo = cde_ctx->init_convert_cmd; | ||
| 776 | num_entries = 0; | ||
| 777 | } else { | ||
| 778 | nvgpu_warn(g, "cde: unknown buffer"); | ||
| 779 | return -EINVAL; | ||
| 780 | } | ||
| 781 | |||
| 782 | if (gpfifo == NULL) { | ||
| 783 | nvgpu_warn(g, "cde: buffer not available"); | ||
| 784 | return -ENOSYS; | ||
| 785 | } | ||
| 786 | |||
| 787 | return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, | ||
| 788 | num_entries, flags, fence, fence_out); | ||
| 789 | } | ||
| 790 | |||
| 791 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
| 792 | __acquires(&cde_app->mutex) | ||
| 793 | __releases(&cde_app->mutex) | ||
| 794 | { | ||
| 795 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
| 796 | struct gk20a *g = &cde_ctx->l->g; | ||
| 797 | |||
| 798 | nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
| 799 | trace_gk20a_cde_release(cde_ctx); | ||
| 800 | |||
| 801 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 802 | |||
| 803 | if (cde_ctx->in_use) { | ||
| 804 | cde_ctx->in_use = false; | ||
| 805 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
| 806 | cde_app->ctx_usecount--; | ||
| 807 | } else { | ||
| 808 | nvgpu_log_info(g, "double release cde context %p", cde_ctx); | ||
| 809 | } | ||
| 810 | |||
| 811 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 812 | } | ||
| 813 | |||
| 814 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
| 815 | __acquires(&cde_app->mutex) | ||
| 816 | __releases(&cde_app->mutex) | ||
| 817 | { | ||
| 818 | struct delayed_work *delay_work = to_delayed_work(work); | ||
| 819 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
| 820 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
| 821 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
| 822 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 823 | struct gk20a *g = &l->g; | ||
| 824 | int err; | ||
| 825 | |||
| 826 | /* someone has just taken it? engine deletion started? */ | ||
| 827 | if (cde_ctx->in_use || !cde_app->initialised) | ||
| 828 | return; | ||
| 829 | |||
| 830 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
| 831 | "cde: attempting to delete temporary %p", cde_ctx); | ||
| 832 | |||
| 833 | err = gk20a_busy(g); | ||
| 834 | if (err) { | ||
| 835 | /* this context would find new use anyway later, so not freeing | ||
| 836 | * here does not leak anything */ | ||
| 837 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
| 838 | " temp ctx deletion"); | ||
| 839 | return; | ||
| 840 | } | ||
| 841 | |||
| 842 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 843 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
| 844 | nvgpu_log(g, gpu_dbg_cde_ctx, | ||
| 845 | "cde: context use raced, not deleting %p", | ||
| 846 | cde_ctx); | ||
| 847 | goto out; | ||
| 848 | } | ||
| 849 | |||
| 850 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
| 851 | "double pending %p", cde_ctx); | ||
| 852 | |||
| 853 | gk20a_cde_remove_ctx(cde_ctx); | ||
| 854 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
| 855 | "cde: destroyed %p count=%d use=%d max=%d", | ||
| 856 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
| 857 | cde_app->ctx_count_top); | ||
| 858 | |||
| 859 | out: | ||
| 860 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 861 | gk20a_idle(g); | ||
| 862 | } | ||
| 863 | |||
| 864 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) | ||
| 865 | __must_hold(&cde_app->mutex) | ||
| 866 | { | ||
| 867 | struct gk20a *g = &l->g; | ||
| 868 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 869 | struct gk20a_cde_ctx *cde_ctx; | ||
| 870 | |||
| 871 | /* exhausted? */ | ||
| 872 | |||
| 873 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
| 874 | return ERR_PTR(-EAGAIN); | ||
| 875 | |||
| 876 | /* idle context available? */ | ||
| 877 | |||
| 878 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
| 879 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
| 880 | gk20a_cde_ctx, list); | ||
| 881 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
| 882 | "cde: got free %p count=%d use=%d max=%d", | ||
| 883 | cde_ctx, cde_app->ctx_count, | ||
| 884 | cde_app->ctx_usecount, | ||
| 885 | cde_app->ctx_count_top); | ||
| 886 | trace_gk20a_cde_get_context(cde_ctx); | ||
| 887 | |||
| 888 | /* deleter work may be scheduled, but in_use prevents it */ | ||
| 889 | cde_ctx->in_use = true; | ||
| 890 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
| 891 | cde_app->ctx_usecount++; | ||
| 892 | |||
| 893 | /* cancel any deletions now that ctx is in use */ | ||
| 894 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
| 895 | return cde_ctx; | ||
| 896 | } | ||
| 897 | |||
| 898 | /* no free contexts, get a temporary one */ | ||
| 899 | |||
| 900 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
| 901 | "cde: no free contexts, count=%d", | ||
| 902 | cde_app->ctx_count); | ||
| 903 | |||
| 904 | cde_ctx = gk20a_cde_allocate_context(l); | ||
| 905 | if (IS_ERR(cde_ctx)) { | ||
| 906 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
| 907 | PTR_ERR(cde_ctx)); | ||
| 908 | return cde_ctx; | ||
| 909 | } | ||
| 910 | |||
| 911 | trace_gk20a_cde_get_context(cde_ctx); | ||
| 912 | cde_ctx->in_use = true; | ||
| 913 | cde_ctx->is_temporary = true; | ||
| 914 | cde_app->ctx_usecount++; | ||
| 915 | cde_app->ctx_count++; | ||
| 916 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
| 917 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
| 918 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
| 919 | |||
| 920 | return cde_ctx; | ||
| 921 | } | ||
| 922 | |||
| 923 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) | ||
| 924 | __releases(&cde_app->mutex) | ||
| 925 | __acquires(&cde_app->mutex) | ||
| 926 | { | ||
| 927 | struct gk20a *g = &l->g; | ||
| 928 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 929 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
| 930 | struct nvgpu_timeout timeout; | ||
| 931 | |||
| 932 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
| 933 | NVGPU_TIMER_CPU_TIMER); | ||
| 934 | |||
| 935 | do { | ||
| 936 | cde_ctx = gk20a_cde_do_get_context(l); | ||
| 937 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
| 938 | break; | ||
| 939 | |||
| 940 | /* exhausted, retry */ | ||
| 941 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 942 | cond_resched(); | ||
| 943 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 944 | } while (!nvgpu_timeout_expired(&timeout)); | ||
| 945 | |||
| 946 | return cde_ctx; | ||
| 947 | } | ||
| 948 | |||
| 949 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) | ||
| 950 | { | ||
| 951 | struct gk20a *g = &l->g; | ||
| 952 | struct gk20a_cde_ctx *cde_ctx; | ||
| 953 | int ret; | ||
| 954 | |||
| 955 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
| 956 | if (!cde_ctx) | ||
| 957 | return ERR_PTR(-ENOMEM); | ||
| 958 | |||
| 959 | cde_ctx->l = l; | ||
| 960 | cde_ctx->dev = dev_from_gk20a(g); | ||
| 961 | |||
| 962 | ret = gk20a_cde_load(cde_ctx); | ||
| 963 | if (ret) { | ||
| 964 | nvgpu_kfree(g, cde_ctx); | ||
| 965 | return ERR_PTR(ret); | ||
| 966 | } | ||
| 967 | |||
| 968 | nvgpu_init_list_node(&cde_ctx->list); | ||
| 969 | cde_ctx->is_temporary = false; | ||
| 970 | cde_ctx->in_use = false; | ||
| 971 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
| 972 | gk20a_cde_ctx_deleter_fn); | ||
| 973 | |||
| 974 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
| 975 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
| 976 | return cde_ctx; | ||
| 977 | } | ||
| 978 | |||
| 979 | static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm, | ||
| 980 | u32 map_offset, u32 map_size) | ||
| 981 | { | ||
| 982 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 983 | |||
| 984 | /* | ||
| 985 | * To be simple we will just make the map size depend on the | ||
| 986 | * iommu'ability of the driver. If there's an IOMMU we can rely on | ||
| 987 | * buffers being contiguous. If not, then we'll use 4k pages since we | ||
| 988 | * know that will work for any buffer. | ||
| 989 | */ | ||
| 990 | if (!nvgpu_iommuable(g)) | ||
| 991 | return SZ_4K; | ||
| 992 | |||
| 993 | /* | ||
| 994 | * If map size or offset is not 64K aligned then use small pages. | ||
| 995 | */ | ||
| 996 | if (map_size & (vm->big_page_size - 1) || | ||
| 997 | map_offset & (vm->big_page_size - 1)) | ||
| 998 | return SZ_4K; | ||
| 999 | |||
| 1000 | return vm->big_page_size; | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
| 1004 | struct dma_buf *compbits_scatter_buf, | ||
| 1005 | u64 compbits_byte_offset, | ||
| 1006 | u64 scatterbuffer_byte_offset, | ||
| 1007 | struct nvgpu_channel_fence *fence, | ||
| 1008 | u32 __flags, struct gk20a_cde_param *params, | ||
| 1009 | int num_params, struct gk20a_fence **fence_out) | ||
| 1010 | __acquires(&l->cde_app->mutex) | ||
| 1011 | __releases(&l->cde_app->mutex) | ||
| 1012 | { | ||
| 1013 | struct gk20a *g = &l->g; | ||
| 1014 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
| 1015 | struct gk20a_comptags comptags; | ||
| 1016 | struct nvgpu_os_buffer os_buf = { | ||
| 1017 | compbits_scatter_buf, | ||
| 1018 | NULL, | ||
| 1019 | dev_from_gk20a(g) | ||
| 1020 | }; | ||
| 1021 | u64 mapped_compbits_offset = 0; | ||
| 1022 | u64 compbits_size = 0; | ||
| 1023 | u64 mapped_scatterbuffer_offset = 0; | ||
| 1024 | u64 scatterbuffer_size = 0; | ||
| 1025 | u64 map_vaddr = 0; | ||
| 1026 | u64 map_offset = 0; | ||
| 1027 | u64 map_size = 0; | ||
| 1028 | u8 *surface = NULL; | ||
| 1029 | u64 big_page_mask = 0; | ||
| 1030 | u32 flags; | ||
| 1031 | int err, i; | ||
| 1032 | const s16 compbits_kind = 0; | ||
| 1033 | u32 submit_op; | ||
| 1034 | struct dma_buf_attachment *attachment; | ||
| 1035 | |||
| 1036 | nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
| 1037 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
| 1038 | |||
| 1039 | /* scatter buffer must be after compbits buffer */ | ||
| 1040 | if (scatterbuffer_byte_offset && | ||
| 1041 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
| 1042 | return -EINVAL; | ||
| 1043 | |||
| 1044 | err = gk20a_busy(g); | ||
| 1045 | if (err) | ||
| 1046 | return err; | ||
| 1047 | |||
| 1048 | nvgpu_mutex_acquire(&l->cde_app.mutex); | ||
| 1049 | cde_ctx = gk20a_cde_get_context(l); | ||
| 1050 | nvgpu_mutex_release(&l->cde_app.mutex); | ||
| 1051 | if (IS_ERR(cde_ctx)) { | ||
| 1052 | err = PTR_ERR(cde_ctx); | ||
| 1053 | goto exit_idle; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | /* First, map the buffer to local va */ | ||
| 1057 | |||
| 1058 | /* ensure that the compbits buffer has drvdata */ | ||
| 1059 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
| 1060 | dev_from_gk20a(g)); | ||
| 1061 | if (err) | ||
| 1062 | goto exit_idle; | ||
| 1063 | |||
| 1064 | /* compbits don't start at page aligned offset, so we need to align | ||
| 1065 | the region to be mapped */ | ||
| 1066 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
| 1067 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
| 1068 | map_size = compbits_scatter_buf->size - map_offset; | ||
| 1069 | |||
| 1070 | |||
| 1071 | /* compute compbit start offset from the beginning of the mapped | ||
| 1072 | area */ | ||
| 1073 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
| 1074 | if (scatterbuffer_byte_offset) { | ||
| 1075 | compbits_size = scatterbuffer_byte_offset - | ||
| 1076 | compbits_byte_offset; | ||
| 1077 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
| 1078 | map_offset; | ||
| 1079 | scatterbuffer_size = compbits_scatter_buf->size - | ||
| 1080 | scatterbuffer_byte_offset; | ||
| 1081 | } else { | ||
| 1082 | compbits_size = compbits_scatter_buf->size - | ||
| 1083 | compbits_byte_offset; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
| 1087 | map_offset, map_size); | ||
| 1088 | nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
| 1089 | mapped_compbits_offset, compbits_size); | ||
| 1090 | nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
| 1091 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
| 1092 | |||
| 1093 | |||
| 1094 | /* map the destination buffer */ | ||
| 1095 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ | ||
| 1096 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, | ||
| 1097 | NVGPU_VM_MAP_CACHEABLE | | ||
| 1098 | NVGPU_VM_MAP_DIRECT_KIND_CTRL, | ||
| 1099 | gk20a_cde_mapping_page_size(cde_ctx->vm, | ||
| 1100 | map_offset, | ||
| 1101 | map_size), | ||
| 1102 | NV_KIND_INVALID, | ||
| 1103 | compbits_kind, /* incompressible kind */ | ||
| 1104 | gk20a_mem_flag_none, | ||
| 1105 | map_offset, map_size, | ||
| 1106 | NULL, | ||
| 1107 | &map_vaddr); | ||
| 1108 | if (err) { | ||
| 1109 | nvgpu_warn(g, "cde: failed to map compbits scatter buf at %lld size %lld", | ||
| 1110 | map_offset, map_size); | ||
| 1111 | dma_buf_put(compbits_scatter_buf); | ||
| 1112 | err = -EINVAL; | ||
| 1113 | goto exit_idle; | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | if (scatterbuffer_byte_offset && | ||
| 1117 | l->ops.cde.need_scatter_buffer && | ||
| 1118 | l->ops.cde.need_scatter_buffer(g)) { | ||
| 1119 | struct sg_table *sgt; | ||
| 1120 | void *scatter_buffer; | ||
| 1121 | |||
| 1122 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
| 1123 | if (IS_ERR(surface)) { | ||
| 1124 | nvgpu_warn(g, | ||
| 1125 | "dma_buf_vmap failed"); | ||
| 1126 | err = -EINVAL; | ||
| 1127 | goto exit_unmap_vaddr; | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
| 1131 | |||
| 1132 | nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
| 1133 | surface, scatter_buffer); | ||
| 1134 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf, | ||
| 1135 | &attachment); | ||
| 1136 | if (IS_ERR(sgt)) { | ||
| 1137 | nvgpu_warn(g, | ||
| 1138 | "mm_pin failed"); | ||
| 1139 | err = -EINVAL; | ||
| 1140 | goto exit_unmap_surface; | ||
| 1141 | } else { | ||
| 1142 | err = l->ops.cde.populate_scatter_buffer(g, sgt, | ||
| 1143 | compbits_byte_offset, scatter_buffer, | ||
| 1144 | scatterbuffer_size); | ||
| 1145 | WARN_ON(err); | ||
| 1146 | |||
| 1147 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
| 1148 | attachment, sgt); | ||
| 1149 | if (err) | ||
| 1150 | goto exit_unmap_surface; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
| 1154 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
| 1155 | surface = NULL; | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | /* store source buffer compression tags */ | ||
| 1159 | gk20a_get_comptags(&os_buf, &comptags); | ||
| 1160 | cde_ctx->surf_param_offset = comptags.offset; | ||
| 1161 | cde_ctx->surf_param_lines = comptags.lines; | ||
| 1162 | |||
| 1163 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
| 1164 | compbits live in the same surface, and we can get the alloc base | ||
| 1165 | address by using gpuva_to_iova_base, this will do */ | ||
| 1166 | cde_ctx->surf_vaddr = map_vaddr; | ||
| 1167 | |||
| 1168 | /* store information about destination */ | ||
| 1169 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
| 1170 | cde_ctx->compbit_size = compbits_size; | ||
| 1171 | |||
| 1172 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
| 1173 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
| 1174 | |||
| 1175 | /* remove existing argument data */ | ||
| 1176 | memset(cde_ctx->user_param_values, 0, | ||
| 1177 | sizeof(cde_ctx->user_param_values)); | ||
| 1178 | |||
| 1179 | /* read user space arguments for the conversion */ | ||
| 1180 | for (i = 0; i < num_params; i++) { | ||
| 1181 | struct gk20a_cde_param *param = params + i; | ||
| 1182 | int id = param->id - NUM_RESERVED_PARAMS; | ||
| 1183 | |||
| 1184 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
| 1185 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
| 1186 | err = -EINVAL; | ||
| 1187 | goto exit_unmap_surface; | ||
| 1188 | } | ||
| 1189 | cde_ctx->user_param_values[id] = param->value; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | /* patch data */ | ||
| 1193 | err = gk20a_cde_patch_params(cde_ctx); | ||
| 1194 | if (err) { | ||
| 1195 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
| 1196 | goto exit_unmap_surface; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
| 1200 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
| 1201 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
| 1202 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
| 1203 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
| 1204 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
| 1205 | |||
| 1206 | /* take always the postfence as it is needed for protecting the | ||
| 1207 | * cde context */ | ||
| 1208 | flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
| 1209 | |||
| 1210 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
| 1211 | gk20a_idle(g); | ||
| 1212 | |||
| 1213 | if (comptags.lines == 0) { | ||
| 1214 | /* | ||
| 1215 | * Nothing to do on the buffer, but do a null kickoff for | ||
| 1216 | * managing the pre and post fences. | ||
| 1217 | */ | ||
| 1218 | submit_op = TYPE_BUF_COMMAND_NOOP; | ||
| 1219 | } else if (!cde_ctx->init_cmd_executed) { | ||
| 1220 | /* | ||
| 1221 | * First time, so include the init pushbuf too in addition to | ||
| 1222 | * the conversion code. | ||
| 1223 | */ | ||
| 1224 | submit_op = TYPE_BUF_COMMAND_INIT; | ||
| 1225 | } else { | ||
| 1226 | /* | ||
| 1227 | * The usual condition: execute just the conversion. | ||
| 1228 | */ | ||
| 1229 | submit_op = TYPE_BUF_COMMAND_CONVERT; | ||
| 1230 | } | ||
| 1231 | err = gk20a_cde_execute_buffer(cde_ctx, submit_op, | ||
| 1232 | fence, flags, fence_out); | ||
| 1233 | |||
| 1234 | if (comptags.lines != 0 && !err) | ||
| 1235 | cde_ctx->init_cmd_executed = true; | ||
| 1236 | |||
| 1237 | /* unmap the buffers - channel holds references to them now */ | ||
| 1238 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
| 1239 | |||
| 1240 | return err; | ||
| 1241 | |||
| 1242 | exit_unmap_surface: | ||
| 1243 | if (surface) | ||
| 1244 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
| 1245 | exit_unmap_vaddr: | ||
| 1246 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
| 1247 | exit_idle: | ||
| 1248 | gk20a_idle(g); | ||
| 1249 | return err; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
| 1253 | __acquires(&cde_app->mutex) | ||
| 1254 | __releases(&cde_app->mutex) | ||
| 1255 | { | ||
| 1256 | struct gk20a_cde_ctx *cde_ctx = data; | ||
| 1257 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 1258 | struct gk20a *g = &l->g; | ||
| 1259 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 1260 | bool channel_idle; | ||
| 1261 | |||
| 1262 | channel_gk20a_joblist_lock(ch); | ||
| 1263 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
| 1264 | channel_gk20a_joblist_unlock(ch); | ||
| 1265 | |||
| 1266 | if (!channel_idle) | ||
| 1267 | return; | ||
| 1268 | |||
| 1269 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
| 1270 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
| 1271 | if (!cde_ctx->in_use) | ||
| 1272 | nvgpu_log_info(g, "double finish cde context %p on channel %p", | ||
| 1273 | cde_ctx, ch); | ||
| 1274 | |||
| 1275 | if (gk20a_channel_check_timedout(ch)) { | ||
| 1276 | if (cde_ctx->is_temporary) { | ||
| 1277 | nvgpu_warn(g, | ||
| 1278 | "cde: channel had timed out" | ||
| 1279 | " (temporary channel)"); | ||
| 1280 | /* going to be deleted anyway */ | ||
| 1281 | } else { | ||
| 1282 | nvgpu_warn(g, | ||
| 1283 | "cde: channel had timed out" | ||
| 1284 | ", reloading"); | ||
| 1285 | /* mark it to be deleted, replace with a new one */ | ||
| 1286 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 1287 | cde_ctx->is_temporary = true; | ||
| 1288 | if (gk20a_cde_create_context(l)) { | ||
| 1289 | nvgpu_err(g, "cde: can't replace context"); | ||
| 1290 | } | ||
| 1291 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 1292 | } | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | /* delete temporary contexts later (watch for doubles) */ | ||
| 1296 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
| 1297 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
| 1298 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
| 1299 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
| 1300 | } | ||
| 1301 | |||
| 1302 | if (!gk20a_channel_check_timedout(ch)) { | ||
| 1303 | gk20a_cde_ctx_release(cde_ctx); | ||
| 1304 | } | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
| 1308 | { | ||
| 1309 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
| 1310 | struct gk20a *g = &l->g; | ||
| 1311 | struct nvgpu_firmware *img; | ||
| 1312 | struct channel_gk20a *ch; | ||
| 1313 | struct tsg_gk20a *tsg; | ||
| 1314 | struct gr_gk20a *gr = &g->gr; | ||
| 1315 | struct nvgpu_setup_bind_args setup_bind_args; | ||
| 1316 | int err = 0; | ||
| 1317 | u64 vaddr; | ||
| 1318 | |||
| 1319 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
| 1320 | if (!img) { | ||
| 1321 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
| 1322 | return -ENOSYS; | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
| 1326 | if (!tsg) { | ||
| 1327 | nvgpu_err(g, "cde: could not create TSG"); | ||
| 1328 | err = -ENOMEM; | ||
| 1329 | goto err_get_gk20a_channel; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
| 1333 | cde_ctx, | ||
| 1334 | -1, | ||
| 1335 | false); | ||
| 1336 | if (!ch) { | ||
| 1337 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
| 1338 | err = -ENOMEM; | ||
| 1339 | goto err_get_gk20a_channel; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | ch->timeout.enabled = false; | ||
| 1343 | |||
| 1344 | /* bind the channel to the vm */ | ||
| 1345 | err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch); | ||
| 1346 | if (err) { | ||
| 1347 | nvgpu_warn(g, "cde: could not bind vm"); | ||
| 1348 | goto err_commit_va; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | err = gk20a_tsg_bind_channel(tsg, ch); | ||
| 1352 | if (err) { | ||
| 1353 | nvgpu_err(g, "cde: unable to bind to tsg"); | ||
| 1354 | goto err_setup_bind; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | setup_bind_args.num_gpfifo_entries = 1024; | ||
| 1358 | setup_bind_args.num_inflight_jobs = 0; | ||
| 1359 | setup_bind_args.flags = 0; | ||
| 1360 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
| 1361 | if (err) { | ||
| 1362 | nvgpu_warn(g, "cde: unable to setup channel"); | ||
| 1363 | goto err_setup_bind; | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | /* map backing store to gpu virtual space */ | ||
| 1367 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
| 1368 | g->gr.compbit_store.mem.size, | ||
| 1369 | NVGPU_VM_MAP_CACHEABLE, | ||
| 1370 | gk20a_mem_flag_read_only, | ||
| 1371 | false, | ||
| 1372 | gr->compbit_store.mem.aperture); | ||
| 1373 | |||
| 1374 | if (!vaddr) { | ||
| 1375 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
| 1376 | err = -ENOMEM; | ||
| 1377 | goto err_map_backingstore; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | /* store initialisation data */ | ||
| 1381 | cde_ctx->ch = ch; | ||
| 1382 | cde_ctx->tsg = tsg; | ||
| 1383 | cde_ctx->vm = ch->vm; | ||
| 1384 | cde_ctx->backing_store_vaddr = vaddr; | ||
| 1385 | |||
| 1386 | /* initialise the firmware */ | ||
| 1387 | err = gk20a_init_cde_img(cde_ctx, img); | ||
| 1388 | if (err) { | ||
| 1389 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
| 1390 | goto err_init_cde_img; | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | /* initialisation done */ | ||
| 1394 | nvgpu_release_firmware(g, img); | ||
| 1395 | |||
| 1396 | return 0; | ||
| 1397 | |||
| 1398 | err_init_cde_img: | ||
| 1399 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
| 1400 | err_map_backingstore: | ||
| 1401 | err_setup_bind: | ||
| 1402 | nvgpu_vm_put(ch->vm); | ||
| 1403 | err_commit_va: | ||
| 1404 | err_get_gk20a_channel: | ||
| 1405 | nvgpu_release_firmware(g, img); | ||
| 1406 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
| 1407 | return err; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | int gk20a_cde_reload(struct nvgpu_os_linux *l) | ||
| 1411 | __acquires(&l->cde_app->mutex) | ||
| 1412 | __releases(&l->cde_app->mutex) | ||
| 1413 | { | ||
| 1414 | struct gk20a *g = &l->g; | ||
| 1415 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 1416 | int err; | ||
| 1417 | |||
| 1418 | if (!cde_app->initialised) | ||
| 1419 | return -ENOSYS; | ||
| 1420 | |||
| 1421 | err = gk20a_busy(g); | ||
| 1422 | if (err) | ||
| 1423 | return err; | ||
| 1424 | |||
| 1425 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 1426 | |||
| 1427 | gk20a_cde_stop(l); | ||
| 1428 | |||
| 1429 | err = gk20a_cde_create_contexts(l); | ||
| 1430 | if (!err) | ||
| 1431 | cde_app->initialised = true; | ||
| 1432 | |||
| 1433 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 1434 | |||
| 1435 | gk20a_idle(g); | ||
| 1436 | return err; | ||
| 1437 | } | ||
| 1438 | |||
| 1439 | int gk20a_init_cde_support(struct nvgpu_os_linux *l) | ||
| 1440 | __acquires(&cde_app->mutex) | ||
| 1441 | __releases(&cde_app->mutex) | ||
| 1442 | { | ||
| 1443 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
| 1444 | struct gk20a *g = &l->g; | ||
| 1445 | int err; | ||
| 1446 | |||
| 1447 | if (cde_app->initialised) | ||
| 1448 | return 0; | ||
| 1449 | |||
| 1450 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
| 1451 | |||
| 1452 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
| 1453 | if (err) | ||
| 1454 | return err; | ||
| 1455 | |||
| 1456 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
| 1457 | |||
| 1458 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
| 1459 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
| 1460 | cde_app->ctx_count = 0; | ||
| 1461 | cde_app->ctx_count_top = 0; | ||
| 1462 | cde_app->ctx_usecount = 0; | ||
| 1463 | |||
| 1464 | err = gk20a_cde_create_contexts(l); | ||
| 1465 | if (!err) | ||
| 1466 | cde_app->initialised = true; | ||
| 1467 | |||
| 1468 | nvgpu_mutex_release(&cde_app->mutex); | ||
| 1469 | nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
| 1470 | |||
| 1471 | if (err) | ||
| 1472 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
| 1473 | |||
| 1474 | return err; | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | enum cde_launch_patch_id { | ||
| 1478 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
| 1479 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
| 1480 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
| 1481 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
| 1482 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
| 1483 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
| 1484 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
| 1485 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
| 1486 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
| 1487 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
| 1488 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
| 1489 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
| 1490 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
| 1491 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
| 1492 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
| 1493 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
| 1494 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
| 1495 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
| 1496 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
| 1497 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
| 1498 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
| 1499 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
| 1500 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
| 1501 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
| 1502 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
| 1503 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
| 1504 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
| 1505 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
| 1506 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
| 1507 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
| 1508 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
| 1509 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
| 1510 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
| 1511 | }; | ||
| 1512 | |||
| 1513 | /* maximum number of WRITE_PATCHes in the below function */ | ||
| 1514 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
| 1515 | |||
| 1516 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
| 1517 | struct nvgpu_os_linux *l, | ||
| 1518 | struct dma_buf *dmabuf, u32 consumer, | ||
| 1519 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
| 1520 | u64 scatterbuffer_offset, | ||
| 1521 | u32 width, u32 height, u32 block_height_log2, | ||
| 1522 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
| 1523 | struct gk20a_buffer_state *state) | ||
| 1524 | { | ||
| 1525 | struct gk20a *g = &l->g; | ||
| 1526 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
| 1527 | int param = 0; | ||
| 1528 | int err = 0; | ||
| 1529 | struct gk20a_fence *new_fence = NULL; | ||
| 1530 | const int wgx = 8; | ||
| 1531 | const int wgy = 8; | ||
| 1532 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
| 1533 | const int xalign = compbits_per_byte * wgx; | ||
| 1534 | const int yalign = wgy; | ||
| 1535 | |||
| 1536 | /* Compute per launch parameters */ | ||
| 1537 | const int xtiles = (width + 7) >> 3; | ||
| 1538 | const int ytiles = (height + 7) >> 3; | ||
| 1539 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
| 1540 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
| 1541 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
| 1542 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
| 1543 | const int xblocks = (xtiles + 1) >> 1; | ||
| 1544 | const int voffset = compbits_voffset - compbits_hoffset; | ||
| 1545 | |||
| 1546 | int hprog = -1; | ||
| 1547 | int vprog = -1; | ||
| 1548 | |||
| 1549 | if (l->ops.cde.get_program_numbers) | ||
| 1550 | l->ops.cde.get_program_numbers(g, block_height_log2, | ||
| 1551 | l->cde_app.shader_parameter, | ||
| 1552 | &hprog, &vprog); | ||
| 1553 | else { | ||
| 1554 | nvgpu_warn(g, "cde: chip not supported"); | ||
| 1555 | return -ENOSYS; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | if (hprog < 0 || vprog < 0) { | ||
| 1559 | nvgpu_warn(g, "cde: could not determine programs"); | ||
| 1560 | return -ENOSYS; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
| 1564 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
| 1565 | xtiles, ytiles); | ||
| 1566 | |||
| 1567 | nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
| 1568 | width, height, block_height_log2, | ||
| 1569 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
| 1570 | nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
| 1571 | width, height, xtiles, ytiles); | ||
| 1572 | nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
| 1573 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
| 1574 | nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
| 1575 | hprog, | ||
| 1576 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
| 1577 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
| 1578 | vprog, | ||
| 1579 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
| 1580 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
| 1581 | |||
| 1582 | /* Write parameters */ | ||
| 1583 | #define WRITE_PATCH(NAME, VALUE) \ | ||
| 1584 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
| 1585 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
| 1586 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
| 1587 | block_height_log2); | ||
| 1588 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
| 1589 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
| 1590 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
| 1591 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
| 1592 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
| 1593 | |||
| 1594 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
| 1595 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
| 1596 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
| 1597 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
| 1598 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
| 1599 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
| 1600 | |||
| 1601 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
| 1602 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
| 1603 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
| 1604 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
| 1605 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
| 1606 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
| 1607 | |||
| 1608 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
| 1609 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
| 1610 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
| 1611 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
| 1612 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
| 1613 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
| 1614 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
| 1615 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
| 1616 | |||
| 1617 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
| 1618 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
| 1619 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
| 1620 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
| 1621 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
| 1622 | } else { | ||
| 1623 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
| 1624 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
| 1625 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
| 1626 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
| 1627 | } | ||
| 1628 | |||
| 1629 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
| 1630 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
| 1631 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
| 1632 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
| 1633 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
| 1634 | } else { | ||
| 1635 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
| 1636 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
| 1637 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
| 1638 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
| 1639 | } | ||
| 1640 | #undef WRITE_PATCH | ||
| 1641 | |||
| 1642 | err = gk20a_cde_convert(l, dmabuf, | ||
| 1643 | compbits_hoffset, | ||
| 1644 | scatterbuffer_offset, | ||
| 1645 | fence_in, submit_flags, | ||
| 1646 | params, param, &new_fence); | ||
| 1647 | if (err) | ||
| 1648 | goto out; | ||
| 1649 | |||
| 1650 | /* compbits generated, update state & fence */ | ||
| 1651 | gk20a_fence_put(state->fence); | ||
| 1652 | state->fence = new_fence; | ||
| 1653 | state->valid_compbits |= consumer & | ||
| 1654 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
| 1655 | out: | ||
| 1656 | return err; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | static int gk20a_buffer_convert_gpu_to_cde( | ||
| 1660 | struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, | ||
| 1661 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
| 1662 | u64 scatterbuffer_offset, | ||
| 1663 | u32 width, u32 height, u32 block_height_log2, | ||
| 1664 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
| 1665 | struct gk20a_buffer_state *state) | ||
| 1666 | { | ||
| 1667 | struct gk20a *g = &l->g; | ||
| 1668 | int err = 0; | ||
| 1669 | |||
| 1670 | if (!l->cde_app.initialised) | ||
| 1671 | return -ENOSYS; | ||
| 1672 | |||
| 1673 | nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n", | ||
| 1674 | l->cde_app.firmware_version); | ||
| 1675 | |||
| 1676 | if (l->cde_app.firmware_version == 1) { | ||
| 1677 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
| 1678 | l, dmabuf, consumer, offset, compbits_hoffset, | ||
| 1679 | compbits_voffset, scatterbuffer_offset, | ||
| 1680 | width, height, block_height_log2, | ||
| 1681 | submit_flags, fence_in, state); | ||
| 1682 | } else { | ||
| 1683 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
| 1684 | l->cde_app.firmware_version); | ||
| 1685 | err = -EINVAL; | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | return err; | ||
| 1689 | } | ||
| 1690 | |||
| 1691 | int gk20a_prepare_compressible_read( | ||
| 1692 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
| 1693 | u64 compbits_hoffset, u64 compbits_voffset, | ||
| 1694 | u64 scatterbuffer_offset, | ||
| 1695 | u32 width, u32 height, u32 block_height_log2, | ||
| 1696 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
| 1697 | u32 *valid_compbits, u32 *zbc_color, | ||
| 1698 | struct gk20a_fence **fence_out) | ||
| 1699 | { | ||
| 1700 | struct gk20a *g = &l->g; | ||
| 1701 | int err = 0; | ||
| 1702 | struct gk20a_buffer_state *state; | ||
| 1703 | struct dma_buf *dmabuf; | ||
| 1704 | u32 missing_bits; | ||
| 1705 | |||
| 1706 | dmabuf = dma_buf_get(buffer_fd); | ||
| 1707 | if (IS_ERR(dmabuf)) | ||
| 1708 | return -EINVAL; | ||
| 1709 | |||
| 1710 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
| 1711 | if (err) { | ||
| 1712 | dma_buf_put(dmabuf); | ||
| 1713 | return err; | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | missing_bits = (state->valid_compbits ^ request) & request; | ||
| 1717 | |||
| 1718 | nvgpu_mutex_acquire(&state->lock); | ||
| 1719 | |||
| 1720 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
| 1721 | |||
| 1722 | gk20a_fence_put(state->fence); | ||
| 1723 | state->fence = NULL; | ||
| 1724 | /* state->fence = decompress(); | ||
| 1725 | state->valid_compbits = 0; */ | ||
| 1726 | err = -EINVAL; | ||
| 1727 | goto out; | ||
| 1728 | } else if (missing_bits) { | ||
| 1729 | u32 missing_cde_bits = missing_bits & | ||
| 1730 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
| 1731 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
| 1732 | missing_cde_bits) { | ||
| 1733 | err = gk20a_buffer_convert_gpu_to_cde( | ||
| 1734 | l, dmabuf, | ||
| 1735 | missing_cde_bits, | ||
| 1736 | offset, compbits_hoffset, | ||
| 1737 | compbits_voffset, scatterbuffer_offset, | ||
| 1738 | width, height, block_height_log2, | ||
| 1739 | submit_flags, fence, | ||
| 1740 | state); | ||
| 1741 | if (err) | ||
| 1742 | goto out; | ||
| 1743 | } | ||
| 1744 | } | ||
| 1745 | |||
| 1746 | if (state->fence && fence_out) | ||
| 1747 | *fence_out = gk20a_fence_get(state->fence); | ||
| 1748 | |||
| 1749 | if (valid_compbits) | ||
| 1750 | *valid_compbits = state->valid_compbits; | ||
| 1751 | |||
| 1752 | if (zbc_color) | ||
| 1753 | *zbc_color = state->zbc_color; | ||
| 1754 | |||
| 1755 | out: | ||
| 1756 | nvgpu_mutex_release(&state->lock); | ||
| 1757 | dma_buf_put(dmabuf); | ||
| 1758 | return err; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
| 1762 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
| 1763 | { | ||
| 1764 | int err; | ||
| 1765 | struct gk20a_buffer_state *state; | ||
| 1766 | struct dma_buf *dmabuf; | ||
| 1767 | |||
| 1768 | dmabuf = dma_buf_get(buffer_fd); | ||
| 1769 | if (IS_ERR(dmabuf)) { | ||
| 1770 | nvgpu_err(g, "invalid dmabuf"); | ||
| 1771 | return -EINVAL; | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
| 1775 | if (err) { | ||
| 1776 | nvgpu_err(g, "could not get state from dmabuf"); | ||
| 1777 | dma_buf_put(dmabuf); | ||
| 1778 | return err; | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | nvgpu_mutex_acquire(&state->lock); | ||
| 1782 | |||
| 1783 | /* Update the compbits state. */ | ||
| 1784 | state->valid_compbits = valid_compbits; | ||
| 1785 | state->zbc_color = zbc_color; | ||
| 1786 | |||
| 1787 | /* Discard previous compbit job fence. */ | ||
| 1788 | gk20a_fence_put(state->fence); | ||
| 1789 | state->fence = NULL; | ||
| 1790 | |||
| 1791 | nvgpu_mutex_release(&state->lock); | ||
| 1792 | dma_buf_put(dmabuf); | ||
| 1793 | return 0; | ||
| 1794 | } | ||
diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h new file mode 100644 index 0000000..5928b62 --- /dev/null +++ b/include/os/linux/cde.h | |||
| @@ -0,0 +1,326 @@ | |||
| 1 | /* | ||
| 2 | * GK20A color decompression engine support | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef _CDE_GK20A_H_ | ||
| 20 | #define _CDE_GK20A_H_ | ||
| 21 | |||
| 22 | #include <nvgpu/nvgpu_mem.h> | ||
| 23 | #include <nvgpu/list.h> | ||
| 24 | #include <nvgpu/lock.h> | ||
| 25 | |||
| 26 | #include <linux/kobject.h> | ||
| 27 | #include <linux/workqueue.h> | ||
| 28 | |||
| 29 | #define MAX_CDE_BUFS 10 | ||
| 30 | #define MAX_CDE_PARAMS 64 | ||
| 31 | #define MAX_CDE_USER_PARAMS 40 | ||
| 32 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
| 33 | |||
| 34 | /* | ||
| 35 | * The size of the context ring buffer that is dedicated for handling cde | ||
| 36 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
| 37 | * wait on the previous job to that channel, so increasing this value | ||
| 38 | * reduces the likelihood of stalls. | ||
| 39 | */ | ||
| 40 | #define NUM_CDE_CONTEXTS 4 | ||
| 41 | |||
| 42 | struct dma_buf; | ||
| 43 | struct device; | ||
| 44 | struct nvgpu_os_linux; | ||
| 45 | struct gk20a; | ||
| 46 | struct gk20a_fence; | ||
| 47 | struct nvgpu_channel_fence; | ||
| 48 | struct channel_gk20a; | ||
| 49 | struct vm_gk20a; | ||
| 50 | struct nvgpu_gpfifo_entry; | ||
| 51 | |||
| 52 | /* | ||
| 53 | * this element defines a buffer that is allocated and mapped into gpu address | ||
| 54 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
| 55 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
| 56 | * | ||
| 57 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
| 58 | */ | ||
| 59 | |||
| 60 | struct gk20a_cde_hdr_buf { | ||
| 61 | u64 data_byte_offset; | ||
| 62 | u64 num_bytes; | ||
| 63 | }; | ||
| 64 | |||
| 65 | /* | ||
| 66 | * this element defines a constant patching in buffers. It basically | ||
| 67 | * computes physical address to <source_buf>+source_byte_offset. The | ||
| 68 | * address is then modified into patch value as per: | ||
| 69 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
| 70 | * | ||
| 71 | * The type field defines the register size as: | ||
| 72 | * 0=u32, | ||
| 73 | * 1=u64 (little endian), | ||
| 74 | * 2=u64 (big endian) | ||
| 75 | */ | ||
| 76 | |||
| 77 | struct gk20a_cde_hdr_replace { | ||
| 78 | u32 target_buf; | ||
| 79 | u32 source_buf; | ||
| 80 | s32 shift; | ||
| 81 | u32 type; | ||
| 82 | u64 target_byte_offset; | ||
| 83 | u64 source_byte_offset; | ||
| 84 | u64 mask; | ||
| 85 | }; | ||
| 86 | |||
| 87 | enum { | ||
| 88 | TYPE_PARAM_TYPE_U32 = 0, | ||
| 89 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
| 90 | TYPE_PARAM_TYPE_U64_BIG | ||
| 91 | }; | ||
| 92 | |||
| 93 | /* | ||
| 94 | * this element defines a runtime patching in buffers. Parameters with id from | ||
| 95 | * 0 to 1024 are reserved for special usage as follows: | ||
| 96 | * 0 = comptags_per_cacheline, | ||
| 97 | * 1 = slices_per_fbp, | ||
| 98 | * 2 = num_fbps | ||
| 99 | * 3 = source buffer first page offset | ||
| 100 | * 4 = source buffer block height log2 | ||
| 101 | * 5 = backing store memory address | ||
| 102 | * 6 = destination memory address | ||
| 103 | * 7 = destination size (bytes) | ||
| 104 | * 8 = backing store size (bytes) | ||
| 105 | * 9 = cache line size | ||
| 106 | * | ||
| 107 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
| 108 | * parameters from user space should be placed in buffers, what is their | ||
| 109 | * type, etc. | ||
| 110 | * | ||
| 111 | * Once the value is available, we add data_offset to the value. | ||
| 112 | * | ||
| 113 | * The value address is then modified into patch value as per: | ||
| 114 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
| 115 | * | ||
| 116 | * The type field defines the register size as: | ||
| 117 | * 0=u32, | ||
| 118 | * 1=u64 (little endian), | ||
| 119 | * 2=u64 (big endian) | ||
| 120 | */ | ||
| 121 | |||
| 122 | struct gk20a_cde_hdr_param { | ||
| 123 | u32 id; | ||
| 124 | u32 target_buf; | ||
| 125 | s32 shift; | ||
| 126 | u32 type; | ||
| 127 | s64 data_offset; | ||
| 128 | u64 target_byte_offset; | ||
| 129 | u64 mask; | ||
| 130 | }; | ||
| 131 | |||
| 132 | enum { | ||
| 133 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
| 134 | TYPE_PARAM_GPU_CONFIGURATION, | ||
| 135 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
| 136 | TYPE_PARAM_NUMPAGES, | ||
| 137 | TYPE_PARAM_BACKINGSTORE, | ||
| 138 | TYPE_PARAM_DESTINATION, | ||
| 139 | TYPE_PARAM_DESTINATION_SIZE, | ||
| 140 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
| 141 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
| 142 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
| 143 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
| 144 | TYPE_PARAM_SCATTERBUFFER, | ||
| 145 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
| 146 | NUM_RESERVED_PARAMS = 1024, | ||
| 147 | }; | ||
| 148 | |||
| 149 | /* | ||
| 150 | * This header element defines a command. The op field determines whether the | ||
| 151 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
| 152 | * denotes the beginning address of command elements in the file. | ||
| 153 | */ | ||
| 154 | |||
| 155 | struct gk20a_cde_hdr_command { | ||
| 156 | u32 op; | ||
| 157 | u32 num_entries; | ||
| 158 | u64 data_byte_offset; | ||
| 159 | }; | ||
| 160 | |||
| 161 | enum { | ||
| 162 | TYPE_BUF_COMMAND_INIT = 0, | ||
| 163 | TYPE_BUF_COMMAND_CONVERT, | ||
| 164 | TYPE_BUF_COMMAND_NOOP | ||
| 165 | }; | ||
| 166 | |||
| 167 | /* | ||
| 168 | * This is a command element defines one entry inside push buffer. target_buf | ||
| 169 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
| 170 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
| 171 | */ | ||
| 172 | |||
| 173 | struct gk20a_cde_cmd_elem { | ||
| 174 | u32 target_buf; | ||
| 175 | u32 padding; | ||
| 176 | u64 target_byte_offset; | ||
| 177 | u64 num_bytes; | ||
| 178 | }; | ||
| 179 | |||
| 180 | /* | ||
| 181 | * This element is used for storing a small array of data. | ||
| 182 | */ | ||
| 183 | |||
| 184 | enum { | ||
| 185 | ARRAY_PROGRAM_OFFSET = 0, | ||
| 186 | ARRAY_REGISTER_COUNT, | ||
| 187 | ARRAY_LAUNCH_COMMAND, | ||
| 188 | NUM_CDE_ARRAYS | ||
| 189 | }; | ||
| 190 | |||
| 191 | struct gk20a_cde_hdr_array { | ||
| 192 | u32 id; | ||
| 193 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
| 194 | }; | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Following defines a single header element. Each element has a type and | ||
| 198 | * some of the data structures. | ||
| 199 | */ | ||
| 200 | |||
| 201 | struct gk20a_cde_hdr_elem { | ||
| 202 | u32 type; | ||
| 203 | u32 padding; | ||
| 204 | union { | ||
| 205 | struct gk20a_cde_hdr_buf buf; | ||
| 206 | struct gk20a_cde_hdr_replace replace; | ||
| 207 | struct gk20a_cde_hdr_param param; | ||
| 208 | u32 required_class; | ||
| 209 | struct gk20a_cde_hdr_command command; | ||
| 210 | struct gk20a_cde_hdr_array array; | ||
| 211 | }; | ||
| 212 | }; | ||
| 213 | |||
| 214 | enum { | ||
| 215 | TYPE_BUF = 0, | ||
| 216 | TYPE_REPLACE, | ||
| 217 | TYPE_PARAM, | ||
| 218 | TYPE_REQUIRED_CLASS, | ||
| 219 | TYPE_COMMAND, | ||
| 220 | TYPE_ARRAY | ||
| 221 | }; | ||
| 222 | |||
| 223 | struct gk20a_cde_param { | ||
| 224 | u32 id; | ||
| 225 | u32 padding; | ||
| 226 | u64 value; | ||
| 227 | }; | ||
| 228 | |||
| 229 | struct gk20a_cde_ctx { | ||
| 230 | struct nvgpu_os_linux *l; | ||
| 231 | struct device *dev; | ||
| 232 | |||
| 233 | /* channel related data */ | ||
| 234 | struct channel_gk20a *ch; | ||
| 235 | struct tsg_gk20a *tsg; | ||
| 236 | struct vm_gk20a *vm; | ||
| 237 | |||
| 238 | /* buf converter configuration */ | ||
| 239 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
| 240 | unsigned int num_bufs; | ||
| 241 | |||
| 242 | /* buffer patching params (where should patching be done) */ | ||
| 243 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
| 244 | unsigned int num_params; | ||
| 245 | |||
| 246 | /* storage for user space parameter values */ | ||
| 247 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
| 248 | |||
| 249 | u32 surf_param_offset; | ||
| 250 | u32 surf_param_lines; | ||
| 251 | u64 surf_vaddr; | ||
| 252 | |||
| 253 | u64 compbit_vaddr; | ||
| 254 | u64 compbit_size; | ||
| 255 | |||
| 256 | u64 scatterbuffer_vaddr; | ||
| 257 | u64 scatterbuffer_size; | ||
| 258 | |||
| 259 | u64 backing_store_vaddr; | ||
| 260 | |||
| 261 | struct nvgpu_gpfifo_entry *init_convert_cmd; | ||
| 262 | int init_cmd_num_entries; | ||
| 263 | |||
| 264 | struct nvgpu_gpfifo_entry *convert_cmd; | ||
| 265 | int convert_cmd_num_entries; | ||
| 266 | |||
| 267 | struct kobj_attribute attr; | ||
| 268 | |||
| 269 | bool init_cmd_executed; | ||
| 270 | |||
| 271 | struct nvgpu_list_node list; | ||
| 272 | bool is_temporary; | ||
| 273 | bool in_use; | ||
| 274 | struct delayed_work ctx_deleter_work; | ||
| 275 | }; | ||
| 276 | |||
| 277 | static inline struct gk20a_cde_ctx * | ||
| 278 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
| 279 | { | ||
| 280 | return (struct gk20a_cde_ctx *) | ||
| 281 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
| 282 | }; | ||
| 283 | |||
| 284 | struct gk20a_cde_app { | ||
| 285 | bool initialised; | ||
| 286 | struct nvgpu_mutex mutex; | ||
| 287 | |||
| 288 | struct nvgpu_list_node free_contexts; | ||
| 289 | struct nvgpu_list_node used_contexts; | ||
| 290 | unsigned int ctx_count; | ||
| 291 | unsigned int ctx_usecount; | ||
| 292 | unsigned int ctx_count_top; | ||
| 293 | |||
| 294 | u32 firmware_version; | ||
| 295 | |||
| 296 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
| 297 | |||
| 298 | u32 shader_parameter; | ||
| 299 | }; | ||
| 300 | |||
| 301 | void gk20a_cde_destroy(struct nvgpu_os_linux *l); | ||
| 302 | void gk20a_cde_suspend(struct nvgpu_os_linux *l); | ||
| 303 | int gk20a_init_cde_support(struct nvgpu_os_linux *l); | ||
| 304 | int gk20a_cde_reload(struct nvgpu_os_linux *l); | ||
| 305 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
| 306 | struct dma_buf *compbits_buf, | ||
| 307 | u64 compbits_byte_offset, | ||
| 308 | u64 scatterbuffer_byte_offset, | ||
| 309 | struct nvgpu_channel_fence *fence, | ||
| 310 | u32 __flags, struct gk20a_cde_param *params, | ||
| 311 | int num_params, struct gk20a_fence **fence_out); | ||
| 312 | |||
| 313 | int gk20a_prepare_compressible_read( | ||
| 314 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
| 315 | u64 compbits_hoffset, u64 compbits_voffset, | ||
| 316 | u64 scatterbuffer_offset, | ||
| 317 | u32 width, u32 height, u32 block_height_log2, | ||
| 318 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
| 319 | u32 *valid_compbits, u32 *zbc_color, | ||
| 320 | struct gk20a_fence **fence_out); | ||
| 321 | int gk20a_mark_compressible_write( | ||
| 322 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
| 323 | u32 zbc_color); | ||
| 324 | int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); | ||
| 325 | |||
| 326 | #endif | ||
diff --git a/include/os/linux/cde_gm20b.c b/include/os/linux/cde_gm20b.c new file mode 100644 index 0000000..a9a4754 --- /dev/null +++ b/include/os/linux/cde_gm20b.c | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | /* | ||
| 2 | * GM20B CDE | ||
| 3 | * | ||
| 4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 7 | * copy of this software and associated documentation files (the "Software"), | ||
| 8 | * to deal in the Software without restriction, including without limitation | ||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 11 | * Software is furnished to do so, subject to the following conditions: | ||
| 12 | * | ||
| 13 | * The above copyright notice and this permission notice shall be included in | ||
| 14 | * all copies or substantial portions of the Software. | ||
| 15 | * | ||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 22 | * DEALINGS IN THE SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <nvgpu/gk20a.h> | ||
| 26 | |||
| 27 | #include "cde_gm20b.h" | ||
| 28 | |||
| 29 | enum programs { | ||
| 30 | PROG_HPASS = 0, | ||
| 31 | PROG_VPASS_LARGE = 1, | ||
| 32 | PROG_VPASS_SMALL = 2, | ||
| 33 | PROG_HPASS_DEBUG = 3, | ||
| 34 | PROG_VPASS_LARGE_DEBUG = 4, | ||
| 35 | PROG_VPASS_SMALL_DEBUG = 5, | ||
| 36 | PROG_PASSTHROUGH = 6, | ||
| 37 | }; | ||
| 38 | |||
| 39 | void gm20b_cde_get_program_numbers(struct gk20a *g, | ||
| 40 | u32 block_height_log2, | ||
| 41 | u32 shader_parameter, | ||
| 42 | int *hprog_out, int *vprog_out) | ||
| 43 | { | ||
| 44 | int hprog = PROG_HPASS; | ||
| 45 | int vprog = (block_height_log2 >= 2) ? | ||
| 46 | PROG_VPASS_LARGE : PROG_VPASS_SMALL; | ||
| 47 | if (shader_parameter == 1) { | ||
| 48 | hprog = PROG_PASSTHROUGH; | ||
| 49 | vprog = PROG_PASSTHROUGH; | ||
| 50 | } else if (shader_parameter == 2) { | ||
| 51 | hprog = PROG_HPASS_DEBUG; | ||
| 52 | vprog = (block_height_log2 >= 2) ? | ||
| 53 | PROG_VPASS_LARGE_DEBUG : | ||
| 54 | PROG_VPASS_SMALL_DEBUG; | ||
| 55 | } | ||
| 56 | |||
| 57 | *hprog_out = hprog; | ||
| 58 | *vprog_out = vprog; | ||
| 59 | } | ||
diff --git a/include/os/linux/cde_gm20b.h b/include/os/linux/cde_gm20b.h new file mode 100644 index 0000000..fac8aaf --- /dev/null +++ b/include/os/linux/cde_gm20b.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | /* | ||
| 2 | * GM20B CDE | ||
| 3 | * | ||
| 4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 7 | * copy of this software and associated documentation files (the "Software"), | ||
| 8 | * to deal in the Software without restriction, including without limitation | ||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 11 | * Software is furnished to do so, subject to the following conditions: | ||
| 12 | * | ||
| 13 | * The above copyright notice and this permission notice shall be included in | ||
| 14 | * all copies or substantial portions of the Software. | ||
| 15 | * | ||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 22 | * DEALINGS IN THE SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef _NVHOST_GM20B_CDE | ||
| 26 | #define _NVHOST_GM20B_CDE | ||
| 27 | |||
| 28 | void gm20b_cde_get_program_numbers(struct gk20a *g, | ||
| 29 | u32 block_height_log2, | ||
| 30 | u32 shader_parameter, | ||
| 31 | int *hprog_out, int *vprog_out); | ||
| 32 | |||
| 33 | #endif | ||
diff --git a/include/os/linux/cde_gp10b.c b/include/os/linux/cde_gp10b.c new file mode 100644 index 0000000..6356d33 --- /dev/null +++ b/include/os/linux/cde_gp10b.c | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | /* | ||
| 2 | * GP10B CDE | ||
| 3 | * | ||
| 4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 7 | * copy of this software and associated documentation files (the "Software"), | ||
| 8 | * to deal in the Software without restriction, including without limitation | ||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 11 | * Software is furnished to do so, subject to the following conditions: | ||
| 12 | * | ||
| 13 | * The above copyright notice and this permission notice shall be included in | ||
| 14 | * all copies or substantial portions of the Software. | ||
| 15 | * | ||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 22 | * DEALINGS IN THE SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <nvgpu/log.h> | ||
| 26 | #include <nvgpu/dma.h> | ||
| 27 | #include <nvgpu/gk20a.h> | ||
| 28 | |||
| 29 | #include "cde_gp10b.h" | ||
| 30 | |||
| 31 | enum gp10b_programs { | ||
| 32 | GP10B_PROG_HPASS = 0, | ||
| 33 | GP10B_PROG_HPASS_4K = 1, | ||
| 34 | GP10B_PROG_VPASS = 2, | ||
| 35 | GP10B_PROG_VPASS_4K = 3, | ||
| 36 | GP10B_PROG_HPASS_DEBUG = 4, | ||
| 37 | GP10B_PROG_HPASS_4K_DEBUG = 5, | ||
| 38 | GP10B_PROG_VPASS_DEBUG = 6, | ||
| 39 | GP10B_PROG_VPASS_4K_DEBUG = 7, | ||
| 40 | GP10B_PROG_PASSTHROUGH = 8, | ||
| 41 | }; | ||
| 42 | |||
| 43 | void gp10b_cde_get_program_numbers(struct gk20a *g, | ||
| 44 | u32 block_height_log2, | ||
| 45 | u32 shader_parameter, | ||
| 46 | int *hprog_out, int *vprog_out) | ||
| 47 | { | ||
| 48 | int hprog, vprog; | ||
| 49 | |||
| 50 | if (shader_parameter == 1) { | ||
| 51 | hprog = GP10B_PROG_PASSTHROUGH; | ||
| 52 | vprog = GP10B_PROG_PASSTHROUGH; | ||
| 53 | } else { | ||
| 54 | hprog = GP10B_PROG_HPASS; | ||
| 55 | vprog = GP10B_PROG_VPASS; | ||
| 56 | if (shader_parameter == 2) { | ||
| 57 | hprog = GP10B_PROG_HPASS_DEBUG; | ||
| 58 | vprog = GP10B_PROG_VPASS_DEBUG; | ||
| 59 | } | ||
| 60 | if (!nvgpu_iommuable(g)) { | ||
| 61 | if (!g->mm.disable_bigpage) { | ||
| 62 | nvgpu_warn(g, | ||
| 63 | "When no IOMMU big pages cannot be used"); | ||
| 64 | } | ||
| 65 | hprog |= 1; | ||
| 66 | vprog |= 1; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | *hprog_out = hprog; | ||
| 71 | *vprog_out = vprog; | ||
| 72 | } | ||
| 73 | |||
| 74 | bool gp10b_need_scatter_buffer(struct gk20a *g) | ||
| 75 | { | ||
| 76 | return !nvgpu_iommuable(g); | ||
| 77 | } | ||
| 78 | |||
| 79 | static u8 parity(u32 a) | ||
| 80 | { | ||
| 81 | a ^= a>>16u; | ||
| 82 | a ^= a>>8u; | ||
| 83 | a ^= a>>4u; | ||
| 84 | a &= 0xfu; | ||
| 85 | return (0x6996u >> a) & 1u; | ||
| 86 | } | ||
| 87 | |||
| 88 | int gp10b_populate_scatter_buffer(struct gk20a *g, | ||
| 89 | struct sg_table *sgt, | ||
| 90 | size_t surface_size, | ||
| 91 | void *scatter_buffer_ptr, | ||
| 92 | size_t scatter_buffer_size) | ||
| 93 | { | ||
| 94 | /* map scatter buffer to CPU VA and fill it */ | ||
| 95 | const u32 page_size_log2 = 12; | ||
| 96 | const u32 page_size = 1 << page_size_log2; | ||
| 97 | const u32 page_size_shift = page_size_log2 - 7u; | ||
| 98 | |||
| 99 | /* 0011 1111 1111 1111 1111 1110 0100 1000 */ | ||
| 100 | const u32 getSliceMaskGP10B = 0x3ffffe48; | ||
| 101 | u8 *scatter_buffer = scatter_buffer_ptr; | ||
| 102 | |||
| 103 | size_t i; | ||
| 104 | struct scatterlist *sg = NULL; | ||
| 105 | u8 d = 0; | ||
| 106 | size_t page = 0; | ||
| 107 | size_t pages_left; | ||
| 108 | |||
| 109 | surface_size = round_up(surface_size, page_size); | ||
| 110 | |||
| 111 | pages_left = surface_size >> page_size_log2; | ||
| 112 | if ((pages_left >> 3) > scatter_buffer_size) | ||
| 113 | return -ENOMEM; | ||
| 114 | |||
| 115 | for_each_sg(sgt->sgl, sg, sgt->nents, i) { | ||
| 116 | unsigned int j; | ||
| 117 | u64 surf_pa = sg_phys(sg); | ||
| 118 | unsigned int n = (int)(sg->length >> page_size_log2); | ||
| 119 | |||
| 120 | nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n); | ||
| 121 | |||
| 122 | for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) { | ||
| 123 | u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift; | ||
| 124 | u8 scatter_bit = parity(addr); | ||
| 125 | u8 bit = page & 7; | ||
| 126 | |||
| 127 | d |= scatter_bit << bit; | ||
| 128 | if (bit == 7) { | ||
| 129 | scatter_buffer[page >> 3] = d; | ||
| 130 | d = 0; | ||
| 131 | } | ||
| 132 | |||
| 133 | ++page; | ||
| 134 | --pages_left; | ||
| 135 | } | ||
| 136 | |||
| 137 | if (pages_left == 0) | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* write the last byte in case the number of pages is not divisible by 8 */ | ||
| 142 | if ((page & 7) != 0) | ||
| 143 | scatter_buffer[page >> 3] = d; | ||
| 144 | |||
| 145 | if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) { | ||
| 146 | nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:"); | ||
| 147 | for (i = 0; i < page >> 3; i++) { | ||
| 148 | nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | return 0; | ||
| 153 | } | ||
diff --git a/include/os/linux/cde_gp10b.h b/include/os/linux/cde_gp10b.h new file mode 100644 index 0000000..3ecca2a --- /dev/null +++ b/include/os/linux/cde_gp10b.h | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | /* | ||
| 2 | * GP10B CDE | ||
| 3 | * | ||
| 4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 7 | * copy of this software and associated documentation files (the "Software"), | ||
| 8 | * to deal in the Software without restriction, including without limitation | ||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 11 | * Software is furnished to do so, subject to the following conditions: | ||
| 12 | * | ||
| 13 | * The above copyright notice and this permission notice shall be included in | ||
| 14 | * all copies or substantial portions of the Software. | ||
| 15 | * | ||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 22 | * DEALINGS IN THE SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef _NVHOST_GP10B_CDE | ||
| 26 | #define _NVHOST_GP10B_CDE | ||
| 27 | |||
| 28 | #include "os_linux.h" | ||
| 29 | |||
| 30 | void gp10b_cde_get_program_numbers(struct gk20a *g, | ||
| 31 | u32 block_height_log2, | ||
| 32 | u32 shader_parameter, | ||
| 33 | int *hprog_out, int *vprog_out); | ||
| 34 | bool gp10b_need_scatter_buffer(struct gk20a *g); | ||
| 35 | int gp10b_populate_scatter_buffer(struct gk20a *g, | ||
| 36 | struct sg_table *sgt, | ||
| 37 | size_t surface_size, | ||
| 38 | void *scatter_buffer_ptr, | ||
| 39 | size_t scatter_buffer_size); | ||
| 40 | #endif | ||
diff --git a/include/os/linux/channel.h b/include/os/linux/channel.h new file mode 100644 index 0000000..e6326fa --- /dev/null +++ b/include/os/linux/channel.h | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #ifndef NVGPU_LINUX_CHANNEL_H | ||
| 17 | #define NVGPU_LINUX_CHANNEL_H | ||
| 18 | |||
| 19 | #include <linux/workqueue.h> | ||
| 20 | #include <linux/dma-buf.h> | ||
| 21 | |||
| 22 | #include <nvgpu/types.h> | ||
| 23 | |||
| 24 | struct channel_gk20a; | ||
| 25 | struct nvgpu_gpfifo; | ||
| 26 | struct nvgpu_submit_gpfifo_args; | ||
| 27 | struct nvgpu_channel_fence; | ||
| 28 | struct gk20a_fence; | ||
| 29 | struct fifo_profile_gk20a; | ||
| 30 | struct nvgpu_os_linux; | ||
| 31 | |||
| 32 | struct sync_fence; | ||
| 33 | struct sync_timeline; | ||
| 34 | |||
| 35 | struct nvgpu_channel_completion_cb { | ||
| 36 | /* | ||
| 37 | * Signal channel owner via a callback, if set, in job cleanup with | ||
| 38 | * schedule_work. Means that something finished on the channel (perhaps | ||
| 39 | * more than one job). | ||
| 40 | */ | ||
| 41 | void (*fn)(struct channel_gk20a *, void *); | ||
| 42 | void *user_data; | ||
| 43 | /* Make access to the two above atomic */ | ||
| 44 | struct nvgpu_spinlock lock; | ||
| 45 | /* Per-channel async work task, cannot reschedule itself */ | ||
| 46 | struct work_struct work; | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct nvgpu_error_notifier { | ||
| 50 | struct dma_buf *dmabuf; | ||
| 51 | void *vaddr; | ||
| 52 | |||
| 53 | struct nvgpu_notification *notification; | ||
| 54 | |||
| 55 | struct nvgpu_mutex mutex; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /* | ||
| 59 | * This struct contains fence_related data. | ||
| 60 | * e.g. sync_timeline for sync_fences. | ||
| 61 | */ | ||
| 62 | struct nvgpu_os_fence_framework { | ||
| 63 | struct sync_timeline *timeline; | ||
| 64 | }; | ||
| 65 | |||
| 66 | struct nvgpu_usermode_bufs_linux { | ||
| 67 | /* | ||
| 68 | * Common low level info of these is stored in nvgpu_mems in | ||
| 69 | * channel_gk20a; these hold lifetimes for the actual dmabuf and its | ||
| 70 | * dma mapping. | ||
| 71 | */ | ||
| 72 | struct nvgpu_usermode_buf_linux { | ||
| 73 | struct dma_buf *dmabuf; | ||
| 74 | struct dma_buf_attachment *attachment; | ||
| 75 | struct sg_table *sgt; | ||
| 76 | } gpfifo, userd; | ||
| 77 | }; | ||
| 78 | |||
| 79 | struct nvgpu_channel_linux { | ||
| 80 | struct channel_gk20a *ch; | ||
| 81 | |||
| 82 | struct nvgpu_os_fence_framework fence_framework; | ||
| 83 | |||
| 84 | struct nvgpu_channel_completion_cb completion_cb; | ||
| 85 | struct nvgpu_error_notifier error_notifier; | ||
| 86 | |||
| 87 | struct dma_buf *cyclestate_buffer_handler; | ||
| 88 | |||
| 89 | struct nvgpu_usermode_bufs_linux usermode; | ||
| 90 | }; | ||
| 91 | |||
| 92 | u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags); | ||
| 93 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); | ||
| 94 | void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l); | ||
| 95 | |||
| 96 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
| 97 | void (*update_fn)(struct channel_gk20a *, void *), | ||
| 98 | void *update_fn_data, | ||
| 99 | int runlist_id, | ||
| 100 | bool is_privileged_channel); | ||
| 101 | |||
| 102 | #endif | ||
diff --git a/include/os/linux/clk.c b/include/os/linux/clk.c new file mode 100644 index 0000000..e9796ea --- /dev/null +++ b/include/os/linux/clk.c | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | /* | ||
| 2 | * Linux clock support | ||
| 3 | * | ||
| 4 | * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/clk.h> | ||
| 20 | |||
| 21 | #include <soc/tegra/tegra-dvfs.h> | ||
| 22 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
| 23 | |||
| 24 | #include "clk.h" | ||
| 25 | #include "os_linux.h" | ||
| 26 | #include "platform_gk20a.h" | ||
| 27 | |||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | #include <nvgpu/clk_arb.h> | ||
| 30 | |||
| 31 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
| 32 | |||
| 33 | static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) | ||
| 34 | { | ||
| 35 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
| 36 | unsigned long ret; | ||
| 37 | |||
| 38 | switch (api_domain) { | ||
| 39 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
| 40 | if (g->clk.tegra_clk) | ||
| 41 | ret = clk_get_rate(g->clk.tegra_clk); | ||
| 42 | else | ||
| 43 | ret = clk_get_rate(platform->clk[0]); | ||
| 44 | break; | ||
| 45 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
| 46 | ret = clk_get_rate(platform->clk[1]); | ||
| 47 | break; | ||
| 48 | default: | ||
| 49 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
| 50 | ret = 0; | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | |||
| 54 | return ret; | ||
| 55 | } | ||
| 56 | |||
| 57 | static int nvgpu_linux_clk_set_rate(struct gk20a *g, | ||
| 58 | u32 api_domain, unsigned long rate) | ||
| 59 | { | ||
| 60 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
| 61 | int ret; | ||
| 62 | |||
| 63 | switch (api_domain) { | ||
| 64 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
| 65 | if (g->clk.tegra_clk) | ||
| 66 | ret = clk_set_rate(g->clk.tegra_clk, rate); | ||
| 67 | else | ||
| 68 | ret = clk_set_rate(platform->clk[0], rate); | ||
| 69 | break; | ||
| 70 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
| 71 | ret = clk_set_rate(platform->clk[1], rate); | ||
| 72 | break; | ||
| 73 | default: | ||
| 74 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
| 75 | ret = -EINVAL; | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | |||
| 79 | return ret; | ||
| 80 | } | ||
| 81 | |||
| 82 | static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g) | ||
| 83 | { | ||
| 84 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
| 85 | |||
| 86 | /* | ||
| 87 | * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed | ||
| 88 | * to frequency governor is a shared user on the gbus. The latter can be | ||
| 89 | * accessed as GPU clock parent, and incorporate DVFS related data. | ||
| 90 | */ | ||
| 91 | if (g->clk.tegra_clk) | ||
| 92 | return tegra_dvfs_get_fmax_at_vmin_safe_t( | ||
| 93 | g->clk.tegra_clk_parent); | ||
| 94 | |||
| 95 | if (platform->maxmin_clk_id) | ||
| 96 | return tegra_bpmp_dvfs_get_fmax_at_vmin( | ||
| 97 | platform->maxmin_clk_id); | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g) | ||
| 103 | { | ||
| 104 | struct clk *c; | ||
| 105 | |||
| 106 | c = clk_get_sys("gpu_ref", "gpu_ref"); | ||
| 107 | if (IS_ERR(c)) { | ||
| 108 | nvgpu_err(g, "failed to get GPCPLL reference clock"); | ||
| 109 | return 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | return clk_get_rate(c); | ||
| 113 | } | ||
| 114 | |||
| 115 | static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk, | ||
| 116 | unsigned long rate) | ||
| 117 | { | ||
| 118 | return tegra_dvfs_predict_mv_at_hz_cur_tfloor( | ||
| 119 | clk->tegra_clk_parent, rate); | ||
| 120 | } | ||
| 121 | |||
| 122 | static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain) | ||
| 123 | { | ||
| 124 | int ret; | ||
| 125 | u16 min_mhz, max_mhz; | ||
| 126 | |||
| 127 | switch (api_domain) { | ||
| 128 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
| 129 | ret = tegra_dvfs_get_maxrate(g->clk.tegra_clk_parent); | ||
| 130 | /* If dvfs not supported */ | ||
| 131 | if (ret == 0) { | ||
| 132 | int err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
| 133 | NVGPU_CLK_DOMAIN_GPCCLK, | ||
| 134 | &min_mhz, &max_mhz); | ||
| 135 | if (err == 0) { | ||
| 136 | ret = max_mhz * 1000000L; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | break; | ||
| 140 | default: | ||
| 141 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
| 142 | ret = 0; | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | |||
| 146 | return ret; | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * This API is used to return a list of supported frequencies by igpu. | ||
| 151 | * Set *num_points as 0 to get the size of the freqs list, returned | ||
| 152 | * by *num_points itself. freqs array must be provided by caller. | ||
| 153 | * If *num_points is non-zero, then freqs array size must atleast | ||
| 154 | * equal *num_points. | ||
| 155 | */ | ||
| 156 | static int nvgpu_linux_clk_get_f_points(struct gk20a *g, | ||
| 157 | u32 api_domain, u32 *num_points, u16 *freqs) | ||
| 158 | { | ||
| 159 | struct device *dev = dev_from_gk20a(g); | ||
| 160 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 161 | unsigned long *gpu_freq_table; | ||
| 162 | int ret = 0; | ||
| 163 | int num_supported_freq = 0; | ||
| 164 | u32 i; | ||
| 165 | |||
| 166 | switch (api_domain) { | ||
| 167 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
| 168 | ret = platform->get_clk_freqs(dev, &gpu_freq_table, | ||
| 169 | &num_supported_freq); | ||
| 170 | |||
| 171 | if (ret) { | ||
| 172 | return ret; | ||
| 173 | } | ||
| 174 | |||
| 175 | if (num_points == NULL) { | ||
| 176 | return -EINVAL; | ||
| 177 | } | ||
| 178 | |||
| 179 | if (*num_points != 0U) { | ||
| 180 | if (freqs == NULL || (*num_points > (u32)num_supported_freq)) { | ||
| 181 | return -EINVAL; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | if (*num_points == 0) { | ||
| 186 | *num_points = num_supported_freq; | ||
| 187 | } else { | ||
| 188 | for (i = 0; i < *num_points; i++) { | ||
| 189 | freqs[i] = HZ_TO_MHZ(gpu_freq_table[i]); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | default: | ||
| 194 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
| 195 | ret = -EINVAL; | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | |||
| 199 | return ret; | ||
| 200 | } | ||
| 201 | |||
| 202 | static int nvgpu_clk_get_range(struct gk20a *g, u32 api_domain, | ||
| 203 | u16 *min_mhz, u16 *max_mhz) | ||
| 204 | { | ||
| 205 | struct device *dev = dev_from_gk20a(g); | ||
| 206 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 207 | unsigned long *freqs; | ||
| 208 | int num_freqs; | ||
| 209 | int ret; | ||
| 210 | |||
| 211 | switch (api_domain) { | ||
| 212 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
| 213 | ret = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
| 214 | |||
| 215 | if (!ret) { | ||
| 216 | *min_mhz = HZ_TO_MHZ(freqs[0]); | ||
| 217 | *max_mhz = HZ_TO_MHZ(freqs[num_freqs - 1]); | ||
| 218 | } | ||
| 219 | break; | ||
| 220 | default: | ||
| 221 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
| 222 | ret = -EINVAL; | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | |||
| 226 | return ret; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* rate_target should be passed in as Hz | ||
| 230 | rounded_rate is returned in Hz */ | ||
| 231 | static int nvgpu_clk_get_round_rate(struct gk20a *g, | ||
| 232 | u32 api_domain, unsigned long rate_target, | ||
| 233 | unsigned long *rounded_rate) | ||
| 234 | { | ||
| 235 | struct device *dev = dev_from_gk20a(g); | ||
| 236 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 237 | unsigned long *freqs; | ||
| 238 | int num_freqs; | ||
| 239 | int i, ret = 0; | ||
| 240 | |||
| 241 | switch (api_domain) { | ||
| 242 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
| 243 | ret = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
| 244 | |||
| 245 | for (i = 0; i < num_freqs; ++i) { | ||
| 246 | if (freqs[i] >= rate_target) { | ||
| 247 | *rounded_rate = freqs[i]; | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | *rounded_rate = freqs[num_freqs - 1]; | ||
| 252 | break; | ||
| 253 | default: | ||
| 254 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
| 255 | ret = -EINVAL; | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | return ret; | ||
| 260 | } | ||
| 261 | |||
| 262 | static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) | ||
| 263 | { | ||
| 264 | return clk_prepare_enable(clk->tegra_clk); | ||
| 265 | } | ||
| 266 | |||
| 267 | static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk) | ||
| 268 | { | ||
| 269 | clk_disable_unprepare(clk->tegra_clk); | ||
| 270 | } | ||
| 271 | |||
| 272 | void nvgpu_linux_init_clk_support(struct gk20a *g) | ||
| 273 | { | ||
| 274 | g->ops.clk.get_rate = nvgpu_linux_clk_get_rate; | ||
| 275 | g->ops.clk.set_rate = nvgpu_linux_clk_set_rate; | ||
| 276 | g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe; | ||
| 277 | g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate; | ||
| 278 | g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor; | ||
| 279 | g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; | ||
| 280 | g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; | ||
| 281 | g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; | ||
| 282 | g->ops.clk.clk_domain_get_f_points = nvgpu_linux_clk_get_f_points; | ||
| 283 | g->ops.clk.get_clk_range = nvgpu_clk_get_range; | ||
| 284 | g->ops.clk.clk_get_round_rate = nvgpu_clk_get_round_rate; | ||
| 285 | g->ops.clk.measure_freq = nvgpu_clk_measure_freq; | ||
| 286 | } | ||
diff --git a/include/os/linux/clk.h b/include/os/linux/clk.h new file mode 100644 index 0000000..614a7fd --- /dev/null +++ b/include/os/linux/clk.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef NVGPU_COMMON_LINUX_CLK_H | ||
| 18 | |||
| 19 | struct gk20a; | ||
| 20 | void nvgpu_linux_init_clk_support(struct gk20a *g); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/comptags.c b/include/os/linux/comptags.c new file mode 100644 index 0000000..ab37197 --- /dev/null +++ b/include/os/linux/comptags.c | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/dma-buf.h> | ||
| 18 | |||
| 19 | #include <nvgpu/comptags.h> | ||
| 20 | #include <nvgpu/gk20a.h> | ||
| 21 | |||
| 22 | #include <nvgpu/linux/vm.h> | ||
| 23 | |||
| 24 | #include "dmabuf.h" | ||
| 25 | |||
| 26 | void gk20a_get_comptags(struct nvgpu_os_buffer *buf, | ||
| 27 | struct gk20a_comptags *comptags) | ||
| 28 | { | ||
| 29 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
| 30 | buf->dev); | ||
| 31 | |||
| 32 | if (!comptags) | ||
| 33 | return; | ||
| 34 | |||
| 35 | if (!priv) { | ||
| 36 | memset(comptags, 0, sizeof(*comptags)); | ||
| 37 | return; | ||
| 38 | } | ||
| 39 | |||
| 40 | nvgpu_mutex_acquire(&priv->lock); | ||
| 41 | *comptags = priv->comptags; | ||
| 42 | nvgpu_mutex_release(&priv->lock); | ||
| 43 | } | ||
| 44 | |||
| 45 | int gk20a_alloc_or_get_comptags(struct gk20a *g, | ||
| 46 | struct nvgpu_os_buffer *buf, | ||
| 47 | struct gk20a_comptag_allocator *allocator, | ||
| 48 | struct gk20a_comptags *comptags) | ||
| 49 | { | ||
| 50 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
| 51 | buf->dev); | ||
| 52 | u32 offset; | ||
| 53 | int err; | ||
| 54 | unsigned int ctag_granularity; | ||
| 55 | u32 lines; | ||
| 56 | |||
| 57 | if (!priv) | ||
| 58 | return -ENOSYS; | ||
| 59 | |||
| 60 | nvgpu_mutex_acquire(&priv->lock); | ||
| 61 | |||
| 62 | if (priv->comptags.allocated) { | ||
| 63 | /* | ||
| 64 | * already allocated | ||
| 65 | */ | ||
| 66 | *comptags = priv->comptags; | ||
| 67 | |||
| 68 | err = 0; | ||
| 69 | goto exit_locked; | ||
| 70 | } | ||
| 71 | |||
| 72 | ctag_granularity = g->ops.fb.compression_page_size(g); | ||
| 73 | lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); | ||
| 74 | |||
| 75 | /* 0-sized buffer? Shouldn't occur, but let's check anyways. */ | ||
| 76 | if (lines < 1) { | ||
| 77 | err = -EINVAL; | ||
| 78 | goto exit_locked; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* store the allocator so we can use it when we free the ctags */ | ||
| 82 | priv->comptag_allocator = allocator; | ||
| 83 | err = gk20a_comptaglines_alloc(allocator, &offset, lines); | ||
| 84 | if (!err) { | ||
| 85 | priv->comptags.offset = offset; | ||
| 86 | priv->comptags.lines = lines; | ||
| 87 | priv->comptags.needs_clear = true; | ||
| 88 | } else { | ||
| 89 | priv->comptags.offset = 0; | ||
| 90 | priv->comptags.lines = 0; | ||
| 91 | priv->comptags.needs_clear = false; | ||
| 92 | } | ||
| 93 | |||
| 94 | /* | ||
| 95 | * We don't report an error here if comptag alloc failed. The | ||
| 96 | * caller will simply fallback to incompressible kinds. It | ||
| 97 | * would not be safe to re-allocate comptags anyways on | ||
| 98 | * successive calls, as that would break map aliasing. | ||
| 99 | */ | ||
| 100 | err = 0; | ||
| 101 | priv->comptags.allocated = true; | ||
| 102 | |||
| 103 | *comptags = priv->comptags; | ||
| 104 | |||
| 105 | exit_locked: | ||
| 106 | nvgpu_mutex_release(&priv->lock); | ||
| 107 | |||
| 108 | return err; | ||
| 109 | } | ||
| 110 | |||
| 111 | bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) | ||
| 112 | { | ||
| 113 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
| 114 | buf->dev); | ||
| 115 | bool clear_started = false; | ||
| 116 | |||
| 117 | if (priv) { | ||
| 118 | nvgpu_mutex_acquire(&priv->lock); | ||
| 119 | |||
| 120 | clear_started = priv->comptags.needs_clear; | ||
| 121 | |||
| 122 | if (!clear_started) | ||
| 123 | nvgpu_mutex_release(&priv->lock); | ||
| 124 | } | ||
| 125 | |||
| 126 | return clear_started; | ||
| 127 | } | ||
| 128 | |||
| 129 | void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf, | ||
| 130 | bool clear_successful) | ||
| 131 | { | ||
| 132 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
| 133 | buf->dev); | ||
| 134 | if (priv) { | ||
| 135 | if (clear_successful) | ||
| 136 | priv->comptags.needs_clear = false; | ||
| 137 | |||
| 138 | nvgpu_mutex_release(&priv->lock); | ||
| 139 | } | ||
| 140 | } | ||
diff --git a/include/os/linux/cond.c b/include/os/linux/cond.c new file mode 100644 index 0000000..633c34f --- /dev/null +++ b/include/os/linux/cond.c | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/wait.h> | ||
| 18 | #include <linux/sched.h> | ||
| 19 | |||
| 20 | #include <nvgpu/cond.h> | ||
| 21 | |||
| 22 | int nvgpu_cond_init(struct nvgpu_cond *cond) | ||
| 23 | { | ||
| 24 | init_waitqueue_head(&cond->wq); | ||
| 25 | cond->initialized = true; | ||
| 26 | |||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | |||
| 30 | void nvgpu_cond_destroy(struct nvgpu_cond *cond) | ||
| 31 | { | ||
| 32 | cond->initialized = false; | ||
| 33 | } | ||
| 34 | |||
| 35 | int nvgpu_cond_signal(struct nvgpu_cond *cond) | ||
| 36 | { | ||
| 37 | if (!cond->initialized) | ||
| 38 | return -EINVAL; | ||
| 39 | |||
| 40 | wake_up(&cond->wq); | ||
| 41 | |||
| 42 | return 0; | ||
| 43 | } | ||
| 44 | |||
| 45 | int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond) | ||
| 46 | { | ||
| 47 | if (!cond->initialized) | ||
| 48 | return -EINVAL; | ||
| 49 | |||
| 50 | wake_up_interruptible(&cond->wq); | ||
| 51 | |||
| 52 | return 0; | ||
| 53 | } | ||
| 54 | |||
| 55 | int nvgpu_cond_broadcast(struct nvgpu_cond *cond) | ||
| 56 | { | ||
| 57 | if (!cond->initialized) | ||
| 58 | return -EINVAL; | ||
| 59 | |||
| 60 | wake_up_all(&cond->wq); | ||
| 61 | |||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond) | ||
| 66 | { | ||
| 67 | if (!cond->initialized) | ||
| 68 | return -EINVAL; | ||
| 69 | |||
| 70 | wake_up_interruptible_all(&cond->wq); | ||
| 71 | |||
| 72 | return 0; | ||
| 73 | } | ||
diff --git a/include/os/linux/ctxsw_trace.c b/include/os/linux/ctxsw_trace.c new file mode 100644 index 0000000..2d36d9c --- /dev/null +++ b/include/os/linux/ctxsw_trace.c | |||
| @@ -0,0 +1,792 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/wait.h> | ||
| 18 | #include <linux/ktime.h> | ||
| 19 | #include <linux/uaccess.h> | ||
| 20 | #include <linux/poll.h> | ||
| 21 | #include <trace/events/gk20a.h> | ||
| 22 | #include <uapi/linux/nvgpu.h> | ||
| 23 | #include <nvgpu/ctxsw_trace.h> | ||
| 24 | #include <nvgpu/kmem.h> | ||
| 25 | #include <nvgpu/log.h> | ||
| 26 | #include <nvgpu/atomic.h> | ||
| 27 | #include <nvgpu/barrier.h> | ||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | #include <nvgpu/channel.h> | ||
| 30 | |||
| 31 | #include "gk20a/gr_gk20a.h" | ||
| 32 | #include "gk20a/fecs_trace_gk20a.h" | ||
| 33 | |||
| 34 | #include "platform_gk20a.h" | ||
| 35 | #include "os_linux.h" | ||
| 36 | #include "ctxsw_trace.h" | ||
| 37 | |||
| 38 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
| 39 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
| 40 | |||
| 41 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
| 42 | |||
| 43 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
| 44 | struct gk20a_ctxsw_dev { | ||
| 45 | struct gk20a *g; | ||
| 46 | |||
| 47 | struct nvgpu_ctxsw_ring_header *hdr; | ||
| 48 | struct nvgpu_gpu_ctxsw_trace_entry *ents; | ||
| 49 | struct nvgpu_gpu_ctxsw_trace_filter filter; | ||
| 50 | bool write_enabled; | ||
| 51 | struct nvgpu_cond readout_wq; | ||
| 52 | size_t size; | ||
| 53 | u32 num_ents; | ||
| 54 | |||
| 55 | nvgpu_atomic_t vma_ref; | ||
| 56 | |||
| 57 | struct nvgpu_mutex write_lock; | ||
| 58 | }; | ||
| 59 | |||
| 60 | |||
| 61 | struct gk20a_ctxsw_trace { | ||
| 62 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
| 63 | }; | ||
| 64 | |||
| 65 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
| 66 | { | ||
| 67 | return (hdr->write_idx == hdr->read_idx); | ||
| 68 | } | ||
| 69 | |||
| 70 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
| 71 | { | ||
| 72 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
| 73 | } | ||
| 74 | |||
| 75 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
| 76 | { | ||
| 77 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
| 78 | } | ||
| 79 | |||
| 80 | static void nvgpu_set_ctxsw_trace_entry(struct nvgpu_ctxsw_trace_entry *entry_dst, | ||
| 81 | struct nvgpu_gpu_ctxsw_trace_entry *entry_src) | ||
| 82 | { | ||
| 83 | entry_dst->tag = entry_src->tag; | ||
| 84 | entry_dst->vmid = entry_src->vmid; | ||
| 85 | entry_dst->seqno = entry_src->seqno; | ||
| 86 | entry_dst->context_id = entry_src->context_id; | ||
| 87 | entry_dst->pid = entry_src->pid; | ||
| 88 | entry_dst->timestamp = entry_src->timestamp; | ||
| 89 | } | ||
| 90 | |||
| 91 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
| 92 | loff_t *off) | ||
| 93 | { | ||
| 94 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
| 95 | struct gk20a *g = dev->g; | ||
| 96 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
| 97 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
| 98 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
| 99 | struct nvgpu_ctxsw_trace_entry user_entry; | ||
| 100 | size_t copied = 0; | ||
| 101 | int err; | ||
| 102 | |||
| 103 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | ||
| 104 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
| 105 | |||
| 106 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 107 | while (ring_is_empty(hdr)) { | ||
| 108 | nvgpu_mutex_release(&dev->write_lock); | ||
| 109 | if (filp->f_flags & O_NONBLOCK) | ||
| 110 | return -EAGAIN; | ||
| 111 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
| 112 | !ring_is_empty(hdr), 0); | ||
| 113 | if (err) | ||
| 114 | return err; | ||
| 115 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 116 | } | ||
| 117 | |||
| 118 | while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) { | ||
| 119 | if (ring_is_empty(hdr)) | ||
| 120 | break; | ||
| 121 | |||
| 122 | nvgpu_set_ctxsw_trace_entry(&user_entry, &dev->ents[hdr->read_idx]); | ||
| 123 | if (copy_to_user(entry, &user_entry, | ||
| 124 | sizeof(*entry))) { | ||
| 125 | nvgpu_mutex_release(&dev->write_lock); | ||
| 126 | return -EFAULT; | ||
| 127 | } | ||
| 128 | |||
| 129 | hdr->read_idx++; | ||
| 130 | if (hdr->read_idx >= hdr->num_ents) | ||
| 131 | hdr->read_idx = 0; | ||
| 132 | |||
| 133 | entry++; | ||
| 134 | copied += sizeof(*entry); | ||
| 135 | size -= sizeof(*entry); | ||
| 136 | } | ||
| 137 | |||
| 138 | nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
| 139 | hdr->read_idx); | ||
| 140 | |||
| 141 | *off = hdr->read_idx; | ||
| 142 | nvgpu_mutex_release(&dev->write_lock); | ||
| 143 | |||
| 144 | return copied; | ||
| 145 | } | ||
| 146 | |||
| 147 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
| 148 | { | ||
| 149 | struct gk20a *g = dev->g; | ||
| 150 | |||
| 151 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
| 152 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 153 | dev->write_enabled = true; | ||
| 154 | nvgpu_mutex_release(&dev->write_lock); | ||
| 155 | dev->g->ops.fecs_trace.enable(dev->g); | ||
| 156 | return 0; | ||
| 157 | } | ||
| 158 | |||
| 159 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
| 160 | { | ||
| 161 | struct gk20a *g = dev->g; | ||
| 162 | |||
| 163 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
| 164 | dev->g->ops.fecs_trace.disable(dev->g); | ||
| 165 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 166 | dev->write_enabled = false; | ||
| 167 | nvgpu_mutex_release(&dev->write_lock); | ||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 171 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, | ||
| 172 | size_t size) | ||
| 173 | { | ||
| 174 | struct gk20a *g = dev->g; | ||
| 175 | void *buf; | ||
| 176 | int err; | ||
| 177 | |||
| 178 | if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) | ||
| 179 | return -EBUSY; | ||
| 180 | |||
| 181 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); | ||
| 182 | if (err) | ||
| 183 | return err; | ||
| 184 | |||
| 185 | |||
| 186 | dev->hdr = buf; | ||
| 187 | dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
| 188 | dev->size = size; | ||
| 189 | dev->num_ents = dev->hdr->num_ents; | ||
| 190 | |||
| 191 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
| 192 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
| 197 | void **buf, size_t *size) | ||
| 198 | { | ||
| 199 | struct nvgpu_ctxsw_ring_header *hdr; | ||
| 200 | |||
| 201 | *size = roundup(*size, PAGE_SIZE); | ||
| 202 | hdr = vmalloc_user(*size); | ||
| 203 | if (!hdr) | ||
| 204 | return -ENOMEM; | ||
| 205 | |||
| 206 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
| 207 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
| 208 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
| 209 | / sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
| 210 | hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
| 211 | hdr->drop_count = 0; | ||
| 212 | hdr->read_idx = 0; | ||
| 213 | hdr->write_idx = 0; | ||
| 214 | hdr->write_seqno = 0; | ||
| 215 | |||
| 216 | *buf = hdr; | ||
| 217 | return 0; | ||
| 218 | } | ||
| 219 | |||
| 220 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
| 221 | { | ||
| 222 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
| 223 | |||
| 224 | nvgpu_vfree(g, dev->hdr); | ||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | |||
| 228 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
| 229 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
| 230 | { | ||
| 231 | struct gk20a *g = dev->g; | ||
| 232 | size_t size = args->size; | ||
| 233 | int ret; | ||
| 234 | |||
| 235 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
| 236 | |||
| 237 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
| 238 | return -EINVAL; | ||
| 239 | |||
| 240 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 241 | ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
| 242 | nvgpu_mutex_release(&dev->write_lock); | ||
| 243 | |||
| 244 | return ret; | ||
| 245 | } | ||
| 246 | |||
| 247 | static void nvgpu_set_ctxsw_trace_filter_args(struct nvgpu_gpu_ctxsw_trace_filter *filter_dst, | ||
| 248 | struct nvgpu_ctxsw_trace_filter *filter_src) | ||
| 249 | { | ||
| 250 | memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64); | ||
| 251 | } | ||
| 252 | |||
| 253 | static void nvgpu_get_ctxsw_trace_filter_args(struct nvgpu_ctxsw_trace_filter *filter_dst, | ||
| 254 | struct nvgpu_gpu_ctxsw_trace_filter *filter_src) | ||
| 255 | { | ||
| 256 | memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64); | ||
| 257 | } | ||
| 258 | |||
| 259 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
| 260 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
| 261 | { | ||
| 262 | struct gk20a *g = dev->g; | ||
| 263 | |||
| 264 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 265 | nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter); | ||
| 266 | nvgpu_mutex_release(&dev->write_lock); | ||
| 267 | |||
| 268 | if (g->ops.fecs_trace.set_filter) | ||
| 269 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
| 270 | return 0; | ||
| 271 | } | ||
| 272 | |||
| 273 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
| 274 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
| 275 | { | ||
| 276 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 277 | nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter); | ||
| 278 | nvgpu_mutex_release(&dev->write_lock); | ||
| 279 | |||
| 280 | return 0; | ||
| 281 | } | ||
| 282 | |||
| 283 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
| 284 | { | ||
| 285 | struct gk20a *g = dev->g; | ||
| 286 | int err; | ||
| 287 | |||
| 288 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
| 289 | |||
| 290 | err = gk20a_busy(g); | ||
| 291 | if (err) | ||
| 292 | return err; | ||
| 293 | |||
| 294 | if (g->ops.fecs_trace.flush) | ||
| 295 | err = g->ops.fecs_trace.flush(g); | ||
| 296 | |||
| 297 | if (likely(!err)) | ||
| 298 | err = g->ops.fecs_trace.poll(g); | ||
| 299 | |||
| 300 | gk20a_idle(g); | ||
| 301 | return err; | ||
| 302 | } | ||
| 303 | |||
| 304 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
| 305 | { | ||
| 306 | struct nvgpu_os_linux *l; | ||
| 307 | struct gk20a *g; | ||
| 308 | struct gk20a_ctxsw_trace *trace; | ||
| 309 | struct gk20a_ctxsw_dev *dev; | ||
| 310 | int err; | ||
| 311 | size_t size; | ||
| 312 | u32 n; | ||
| 313 | |||
| 314 | /* only one VM for now */ | ||
| 315 | const int vmid = 0; | ||
| 316 | |||
| 317 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); | ||
| 318 | g = gk20a_get(&l->g); | ||
| 319 | if (!g) | ||
| 320 | return -ENODEV; | ||
| 321 | |||
| 322 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
| 323 | |||
| 324 | err = gk20a_busy(g); | ||
| 325 | if (err) | ||
| 326 | goto free_ref; | ||
| 327 | |||
| 328 | trace = g->ctxsw_trace; | ||
| 329 | if (!trace) { | ||
| 330 | err = -ENODEV; | ||
| 331 | goto idle; | ||
| 332 | } | ||
| 333 | |||
| 334 | /* Allow only one user for this device */ | ||
| 335 | dev = &trace->devs[vmid]; | ||
| 336 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 337 | if (dev->hdr) { | ||
| 338 | err = -EBUSY; | ||
| 339 | goto done; | ||
| 340 | } | ||
| 341 | |||
| 342 | /* By default, allocate ring buffer big enough to accommodate | ||
| 343 | * FECS records with default event filter */ | ||
| 344 | |||
| 345 | /* enable all traces by default */ | ||
| 346 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
| 347 | |||
| 348 | /* compute max number of entries generated with this filter */ | ||
| 349 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
| 350 | |||
| 351 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
| 352 | n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
| 353 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
| 354 | size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry)); | ||
| 355 | |||
| 356 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
| 357 | if (!err) { | ||
| 358 | filp->private_data = dev; | ||
| 359 | nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
| 360 | filp, dev, size); | ||
| 361 | } | ||
| 362 | |||
| 363 | done: | ||
| 364 | nvgpu_mutex_release(&dev->write_lock); | ||
| 365 | |||
| 366 | idle: | ||
| 367 | gk20a_idle(g); | ||
| 368 | free_ref: | ||
| 369 | if (err) | ||
| 370 | gk20a_put(g); | ||
| 371 | return err; | ||
| 372 | } | ||
| 373 | |||
| 374 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
| 375 | { | ||
| 376 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
| 377 | struct gk20a *g = dev->g; | ||
| 378 | |||
| 379 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
| 380 | |||
| 381 | g->ops.fecs_trace.disable(g); | ||
| 382 | |||
| 383 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 384 | dev->write_enabled = false; | ||
| 385 | nvgpu_mutex_release(&dev->write_lock); | ||
| 386 | |||
| 387 | if (dev->hdr) { | ||
| 388 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); | ||
| 389 | dev->hdr = NULL; | ||
| 390 | } | ||
| 391 | gk20a_put(g); | ||
| 392 | return 0; | ||
| 393 | } | ||
| 394 | |||
| 395 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
| 396 | unsigned long arg) | ||
| 397 | { | ||
| 398 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
| 399 | struct gk20a *g = dev->g; | ||
| 400 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
| 401 | int err = 0; | ||
| 402 | |||
| 403 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
| 404 | |||
| 405 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || | ||
| 406 | (_IOC_NR(cmd) == 0) || | ||
| 407 | (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || | ||
| 408 | (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) | ||
| 409 | return -EINVAL; | ||
| 410 | |||
| 411 | memset(buf, 0, sizeof(buf)); | ||
| 412 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 413 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
| 414 | return -EFAULT; | ||
| 415 | } | ||
| 416 | |||
| 417 | switch (cmd) { | ||
| 418 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
| 419 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
| 420 | break; | ||
| 421 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
| 422 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
| 423 | break; | ||
| 424 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
| 425 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
| 426 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
| 427 | break; | ||
| 428 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
| 429 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
| 430 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
| 431 | break; | ||
| 432 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
| 433 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
| 434 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
| 435 | break; | ||
| 436 | case NVGPU_CTXSW_IOCTL_POLL: | ||
| 437 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
| 438 | break; | ||
| 439 | default: | ||
| 440 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
| 441 | cmd); | ||
| 442 | err = -ENOTTY; | ||
| 443 | } | ||
| 444 | |||
| 445 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 446 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
| 447 | |||
| 448 | return err; | ||
| 449 | } | ||
| 450 | |||
| 451 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
| 452 | { | ||
| 453 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
| 454 | struct gk20a *g = dev->g; | ||
| 455 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
| 456 | unsigned int mask = 0; | ||
| 457 | |||
| 458 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
| 459 | |||
| 460 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 461 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
| 462 | if (!ring_is_empty(hdr)) | ||
| 463 | mask |= POLLIN | POLLRDNORM; | ||
| 464 | nvgpu_mutex_release(&dev->write_lock); | ||
| 465 | |||
| 466 | return mask; | ||
| 467 | } | ||
| 468 | |||
| 469 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
| 470 | { | ||
| 471 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
| 472 | struct gk20a *g = dev->g; | ||
| 473 | |||
| 474 | nvgpu_atomic_inc(&dev->vma_ref); | ||
| 475 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
| 476 | nvgpu_atomic_read(&dev->vma_ref)); | ||
| 477 | } | ||
| 478 | |||
| 479 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
| 480 | { | ||
| 481 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
| 482 | struct gk20a *g = dev->g; | ||
| 483 | |||
| 484 | nvgpu_atomic_dec(&dev->vma_ref); | ||
| 485 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
| 486 | nvgpu_atomic_read(&dev->vma_ref)); | ||
| 487 | } | ||
| 488 | |||
| 489 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
| 490 | .open = gk20a_ctxsw_dev_vma_open, | ||
| 491 | .close = gk20a_ctxsw_dev_vma_close, | ||
| 492 | }; | ||
| 493 | |||
| 494 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
| 495 | struct vm_area_struct *vma) | ||
| 496 | { | ||
| 497 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
| 498 | } | ||
| 499 | |||
| 500 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
| 501 | { | ||
| 502 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
| 503 | struct gk20a *g = dev->g; | ||
| 504 | int ret; | ||
| 505 | |||
| 506 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
| 507 | vma->vm_start, vma->vm_end); | ||
| 508 | |||
| 509 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); | ||
| 510 | if (likely(!ret)) { | ||
| 511 | vma->vm_private_data = dev; | ||
| 512 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
| 513 | vma->vm_ops->open(vma); | ||
| 514 | } | ||
| 515 | |||
| 516 | return ret; | ||
| 517 | } | ||
| 518 | |||
| 519 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 520 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
| 521 | { | ||
| 522 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
| 523 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
| 524 | int err; | ||
| 525 | int i; | ||
| 526 | |||
| 527 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
| 528 | dev->g = g; | ||
| 529 | dev->hdr = NULL; | ||
| 530 | dev->write_enabled = false; | ||
| 531 | nvgpu_cond_init(&dev->readout_wq); | ||
| 532 | err = nvgpu_mutex_init(&dev->write_lock); | ||
| 533 | if (err) | ||
| 534 | return err; | ||
| 535 | nvgpu_atomic_set(&dev->vma_ref, 0); | ||
| 536 | dev++; | ||
| 537 | } | ||
| 538 | return 0; | ||
| 539 | } | ||
| 540 | #endif | ||
| 541 | |||
| 542 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
| 543 | { | ||
| 544 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 545 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
| 546 | int err; | ||
| 547 | |||
| 548 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
| 549 | |||
| 550 | /* if tracing is not supported, skip this */ | ||
| 551 | if (!g->ops.fecs_trace.init) | ||
| 552 | return 0; | ||
| 553 | |||
| 554 | if (likely(trace)) { | ||
| 555 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
| 556 | return 0; | ||
| 557 | } | ||
| 558 | |||
| 559 | trace = nvgpu_kzalloc(g, sizeof(*trace)); | ||
| 560 | if (unlikely(!trace)) | ||
| 561 | return -ENOMEM; | ||
| 562 | g->ctxsw_trace = trace; | ||
| 563 | |||
| 564 | err = gk20a_ctxsw_init_devs(g); | ||
| 565 | if (err) | ||
| 566 | goto fail; | ||
| 567 | |||
| 568 | err = g->ops.fecs_trace.init(g); | ||
| 569 | if (unlikely(err)) | ||
| 570 | goto fail; | ||
| 571 | |||
| 572 | return 0; | ||
| 573 | |||
| 574 | fail: | ||
| 575 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
| 576 | nvgpu_kfree(g, trace); | ||
| 577 | g->ctxsw_trace = NULL; | ||
| 578 | return err; | ||
| 579 | #else | ||
| 580 | return 0; | ||
| 581 | #endif | ||
| 582 | } | ||
| 583 | |||
| 584 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
| 585 | { | ||
| 586 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 587 | struct gk20a_ctxsw_trace *trace; | ||
| 588 | struct gk20a_ctxsw_dev *dev; | ||
| 589 | int i; | ||
| 590 | |||
| 591 | if (!g->ctxsw_trace) | ||
| 592 | return; | ||
| 593 | |||
| 594 | trace = g->ctxsw_trace; | ||
| 595 | dev = trace->devs; | ||
| 596 | |||
| 597 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
| 598 | nvgpu_mutex_destroy(&dev->write_lock); | ||
| 599 | dev++; | ||
| 600 | } | ||
| 601 | |||
| 602 | nvgpu_kfree(g, g->ctxsw_trace); | ||
| 603 | g->ctxsw_trace = NULL; | ||
| 604 | |||
| 605 | g->ops.fecs_trace.deinit(g); | ||
| 606 | #endif | ||
| 607 | } | ||
| 608 | |||
| 609 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
| 610 | struct nvgpu_gpu_ctxsw_trace_entry *entry) | ||
| 611 | { | ||
| 612 | struct nvgpu_ctxsw_ring_header *hdr; | ||
| 613 | struct gk20a_ctxsw_dev *dev; | ||
| 614 | int ret = 0; | ||
| 615 | const char *reason; | ||
| 616 | u32 write_idx; | ||
| 617 | |||
| 618 | if (!g->ctxsw_trace) | ||
| 619 | return 0; | ||
| 620 | |||
| 621 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
| 622 | return -ENODEV; | ||
| 623 | |||
| 624 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
| 625 | hdr = dev->hdr; | ||
| 626 | |||
| 627 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
| 628 | "dev=%p hdr=%p", dev, hdr); | ||
| 629 | |||
| 630 | nvgpu_mutex_acquire(&dev->write_lock); | ||
| 631 | |||
| 632 | if (unlikely(!hdr)) { | ||
| 633 | /* device has been released */ | ||
| 634 | ret = -ENODEV; | ||
| 635 | goto done; | ||
| 636 | } | ||
| 637 | |||
| 638 | write_idx = hdr->write_idx; | ||
| 639 | if (write_idx >= dev->num_ents) { | ||
| 640 | nvgpu_err(dev->g, | ||
| 641 | "write_idx=%u out of range [0..%u]", | ||
| 642 | write_idx, dev->num_ents); | ||
| 643 | ret = -ENOSPC; | ||
| 644 | reason = "write_idx out of range"; | ||
| 645 | goto disable; | ||
| 646 | } | ||
| 647 | |||
| 648 | entry->seqno = hdr->write_seqno++; | ||
| 649 | |||
| 650 | if (!dev->write_enabled) { | ||
| 651 | ret = -EBUSY; | ||
| 652 | reason = "write disabled"; | ||
| 653 | goto drop; | ||
| 654 | } | ||
| 655 | |||
| 656 | if (unlikely(ring_is_full(hdr))) { | ||
| 657 | ret = -ENOSPC; | ||
| 658 | reason = "user fifo full"; | ||
| 659 | goto drop; | ||
| 660 | } | ||
| 661 | |||
| 662 | if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
| 663 | reason = "filtered out"; | ||
| 664 | goto filter; | ||
| 665 | } | ||
| 666 | |||
| 667 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
| 668 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
| 669 | entry->seqno, entry->context_id, entry->pid, | ||
| 670 | entry->tag, entry->timestamp); | ||
| 671 | |||
| 672 | dev->ents[write_idx] = *entry; | ||
| 673 | |||
| 674 | /* ensure record is written before updating write index */ | ||
| 675 | nvgpu_smp_wmb(); | ||
| 676 | |||
| 677 | write_idx++; | ||
| 678 | if (unlikely(write_idx >= hdr->num_ents)) | ||
| 679 | write_idx = 0; | ||
| 680 | hdr->write_idx = write_idx; | ||
| 681 | nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
| 682 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
| 683 | |||
| 684 | nvgpu_mutex_release(&dev->write_lock); | ||
| 685 | return ret; | ||
| 686 | |||
| 687 | disable: | ||
| 688 | g->ops.fecs_trace.disable(g); | ||
| 689 | |||
| 690 | drop: | ||
| 691 | hdr->drop_count++; | ||
| 692 | |||
| 693 | filter: | ||
| 694 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
| 695 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
| 696 | "tag=%x time=%llx (%s)", | ||
| 697 | entry->seqno, entry->context_id, entry->pid, | ||
| 698 | entry->tag, entry->timestamp, reason); | ||
| 699 | |||
| 700 | done: | ||
| 701 | nvgpu_mutex_release(&dev->write_lock); | ||
| 702 | return ret; | ||
| 703 | } | ||
| 704 | |||
| 705 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
| 706 | { | ||
| 707 | struct gk20a_ctxsw_dev *dev; | ||
| 708 | |||
| 709 | if (!g->ctxsw_trace) | ||
| 710 | return; | ||
| 711 | |||
| 712 | dev = &g->ctxsw_trace->devs[vmid]; | ||
| 713 | nvgpu_cond_signal_interruptible(&dev->readout_wq); | ||
| 714 | } | ||
| 715 | |||
| 716 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | ||
| 717 | { | ||
| 718 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 719 | struct nvgpu_gpu_ctxsw_trace_entry entry = { | ||
| 720 | .vmid = 0, | ||
| 721 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
| 722 | .context_id = 0, | ||
| 723 | .pid = ch->tgid, | ||
| 724 | }; | ||
| 725 | |||
| 726 | if (!g->ctxsw_trace) | ||
| 727 | return; | ||
| 728 | |||
| 729 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
| 730 | gk20a_ctxsw_trace_write(g, &entry); | ||
| 731 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
| 732 | #endif | ||
| 733 | trace_gk20a_channel_reset(ch->chid, ch->tsgid); | ||
| 734 | } | ||
| 735 | |||
| 736 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | ||
| 737 | { | ||
| 738 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 739 | struct nvgpu_gpu_ctxsw_trace_entry entry = { | ||
| 740 | .vmid = 0, | ||
| 741 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
| 742 | .context_id = 0, | ||
| 743 | .pid = tsg->tgid, | ||
| 744 | }; | ||
| 745 | |||
| 746 | if (!g->ctxsw_trace) | ||
| 747 | return; | ||
| 748 | |||
| 749 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
| 750 | gk20a_ctxsw_trace_write(g, &entry); | ||
| 751 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
| 752 | #endif | ||
| 753 | trace_gk20a_channel_reset(~0, tsg->tsgid); | ||
| 754 | } | ||
| 755 | |||
| 756 | /* | ||
| 757 | * Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_* | ||
| 758 | * into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_* | ||
| 759 | */ | ||
| 760 | |||
| 761 | u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags) | ||
| 762 | { | ||
| 763 | switch (tags){ | ||
| 764 | case NVGPU_CTXSW_TAG_SOF: | ||
| 765 | return NVGPU_GPU_CTXSW_TAG_SOF; | ||
| 766 | case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: | ||
| 767 | return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST; | ||
| 768 | case NVGPU_CTXSW_TAG_FE_ACK: | ||
| 769 | return NVGPU_GPU_CTXSW_TAG_FE_ACK; | ||
| 770 | case NVGPU_CTXSW_TAG_FE_ACK_WFI: | ||
| 771 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI; | ||
| 772 | case NVGPU_CTXSW_TAG_FE_ACK_GFXP: | ||
| 773 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP; | ||
| 774 | case NVGPU_CTXSW_TAG_FE_ACK_CTAP: | ||
| 775 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP; | ||
| 776 | case NVGPU_CTXSW_TAG_FE_ACK_CILP: | ||
| 777 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP; | ||
| 778 | case NVGPU_CTXSW_TAG_SAVE_END: | ||
| 779 | return NVGPU_GPU_CTXSW_TAG_SAVE_END; | ||
| 780 | case NVGPU_CTXSW_TAG_RESTORE_START: | ||
| 781 | return NVGPU_GPU_CTXSW_TAG_RESTORE_START; | ||
| 782 | case NVGPU_CTXSW_TAG_CONTEXT_START: | ||
| 783 | return NVGPU_GPU_CTXSW_TAG_CONTEXT_START; | ||
| 784 | case NVGPU_CTXSW_TAG_ENGINE_RESET: | ||
| 785 | return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET; | ||
| 786 | case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP: | ||
| 787 | return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP; | ||
| 788 | } | ||
| 789 | |||
| 790 | WARN_ON(1); | ||
| 791 | return tags; | ||
| 792 | } | ||
diff --git a/include/os/linux/ctxsw_trace.h b/include/os/linux/ctxsw_trace.h new file mode 100644 index 0000000..88ca7f2 --- /dev/null +++ b/include/os/linux/ctxsw_trace.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __CTXSW_TRACE_H__ | ||
| 18 | #define __CTXSW_TRACE_H__ | ||
| 19 | |||
| 20 | #include <nvgpu/types.h> | ||
| 21 | |||
| 22 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | ||
| 23 | |||
| 24 | struct file; | ||
| 25 | struct inode; | ||
| 26 | struct poll_table_struct; | ||
| 27 | |||
| 28 | struct gk20a; | ||
| 29 | |||
| 30 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); | ||
| 31 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); | ||
| 32 | long gk20a_ctxsw_dev_ioctl(struct file *filp, | ||
| 33 | unsigned int cmd, unsigned long arg); | ||
| 34 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, | ||
| 35 | size_t size, loff_t *offs); | ||
| 36 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, | ||
| 37 | struct poll_table_struct *pts); | ||
| 38 | |||
| 39 | #endif /* __CTXSW_TRACE_H__ */ | ||
diff --git a/include/os/linux/debug.c b/include/os/linux/debug.c new file mode 100644 index 0000000..5f0703c --- /dev/null +++ b/include/os/linux/debug.c | |||
| @@ -0,0 +1,457 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_cde.h" | ||
| 16 | #include "debug_ce.h" | ||
| 17 | #include "debug_fifo.h" | ||
| 18 | #include "debug_gr.h" | ||
| 19 | #include "debug_allocator.h" | ||
| 20 | #include "debug_kmem.h" | ||
| 21 | #include "debug_pmu.h" | ||
| 22 | #include "debug_sched.h" | ||
| 23 | #include "debug_hal.h" | ||
| 24 | #include "debug_xve.h" | ||
| 25 | #include "debug_ltc.h" | ||
| 26 | #include "debug_bios.h" | ||
| 27 | #include "os_linux.h" | ||
| 28 | #include "platform_gk20a.h" | ||
| 29 | |||
| 30 | #include <nvgpu/gk20a.h> | ||
| 31 | |||
| 32 | #include <linux/debugfs.h> | ||
| 33 | #include <linux/seq_file.h> | ||
| 34 | #include <linux/uaccess.h> | ||
| 35 | |||
| 36 | #include <nvgpu/debug.h> | ||
| 37 | |||
| 38 | unsigned int gk20a_debug_trace_cmdbuf; | ||
| 39 | |||
| 40 | static inline void gk20a_debug_write_printk(void *ctx, const char *str, | ||
| 41 | size_t len) | ||
| 42 | { | ||
| 43 | pr_info("%s", str); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, | ||
| 47 | size_t len) | ||
| 48 | { | ||
| 49 | seq_write((struct seq_file *)ctx, str, len); | ||
| 50 | } | ||
| 51 | |||
| 52 | void gk20a_debug_output(struct gk20a_debug_output *o, | ||
| 53 | const char *fmt, ...) | ||
| 54 | { | ||
| 55 | va_list args; | ||
| 56 | int len; | ||
| 57 | |||
| 58 | va_start(args, fmt); | ||
| 59 | len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); | ||
| 60 | va_end(args); | ||
| 61 | o->fn(o->ctx, o->buf, len); | ||
| 62 | } | ||
| 63 | |||
| 64 | static int gk20a_gr_dump_regs(struct gk20a *g, | ||
| 65 | struct gk20a_debug_output *o) | ||
| 66 | { | ||
| 67 | if (g->ops.gr.dump_gr_regs) | ||
| 68 | gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); | ||
| 69 | |||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | |||
| 73 | int gk20a_gr_debug_dump(struct gk20a *g) | ||
| 74 | { | ||
| 75 | struct gk20a_debug_output o = { | ||
| 76 | .fn = gk20a_debug_write_printk | ||
| 77 | }; | ||
| 78 | |||
| 79 | gk20a_gr_dump_regs(g, &o); | ||
| 80 | |||
| 81 | return 0; | ||
| 82 | } | ||
| 83 | |||
| 84 | static int gk20a_gr_debug_show(struct seq_file *s, void *unused) | ||
| 85 | { | ||
| 86 | struct device *dev = s->private; | ||
| 87 | struct gk20a *g = gk20a_get_platform(dev)->g; | ||
| 88 | struct gk20a_debug_output o = { | ||
| 89 | .fn = gk20a_debug_write_to_seqfile, | ||
| 90 | .ctx = s, | ||
| 91 | }; | ||
| 92 | int err; | ||
| 93 | |||
| 94 | err = gk20a_busy(g); | ||
| 95 | if (err) { | ||
| 96 | nvgpu_err(g, "failed to power on gpu: %d", err); | ||
| 97 | return -EINVAL; | ||
| 98 | } | ||
| 99 | |||
| 100 | gk20a_gr_dump_regs(g, &o); | ||
| 101 | |||
| 102 | gk20a_idle(g); | ||
| 103 | |||
| 104 | return 0; | ||
| 105 | } | ||
| 106 | |||
| 107 | void gk20a_debug_dump(struct gk20a *g) | ||
| 108 | { | ||
| 109 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
| 110 | struct gk20a_debug_output o = { | ||
| 111 | .fn = gk20a_debug_write_printk | ||
| 112 | }; | ||
| 113 | |||
| 114 | if (platform->dump_platform_dependencies) | ||
| 115 | platform->dump_platform_dependencies(dev_from_gk20a(g)); | ||
| 116 | |||
| 117 | /* HAL only initialized after 1st power-on */ | ||
| 118 | if (g->ops.debug.show_dump) | ||
| 119 | g->ops.debug.show_dump(g, &o); | ||
| 120 | } | ||
| 121 | |||
| 122 | static int gk20a_debug_show(struct seq_file *s, void *unused) | ||
| 123 | { | ||
| 124 | struct device *dev = s->private; | ||
| 125 | struct gk20a_debug_output o = { | ||
| 126 | .fn = gk20a_debug_write_to_seqfile, | ||
| 127 | .ctx = s, | ||
| 128 | }; | ||
| 129 | struct gk20a *g; | ||
| 130 | int err; | ||
| 131 | |||
| 132 | g = gk20a_get_platform(dev)->g; | ||
| 133 | |||
| 134 | err = gk20a_busy(g); | ||
| 135 | if (err) { | ||
| 136 | nvgpu_err(g, "failed to power on gpu: %d", err); | ||
| 137 | return -EFAULT; | ||
| 138 | } | ||
| 139 | |||
| 140 | /* HAL only initialized after 1st power-on */ | ||
| 141 | if (g->ops.debug.show_dump) | ||
| 142 | g->ops.debug.show_dump(g, &o); | ||
| 143 | |||
| 144 | gk20a_idle(g); | ||
| 145 | return 0; | ||
| 146 | } | ||
| 147 | |||
| 148 | static int gk20a_gr_debug_open(struct inode *inode, struct file *file) | ||
| 149 | { | ||
| 150 | return single_open(file, gk20a_gr_debug_show, inode->i_private); | ||
| 151 | } | ||
| 152 | |||
| 153 | static int gk20a_debug_open(struct inode *inode, struct file *file) | ||
| 154 | { | ||
| 155 | return single_open(file, gk20a_debug_show, inode->i_private); | ||
| 156 | } | ||
| 157 | |||
| 158 | static const struct file_operations gk20a_gr_debug_fops = { | ||
| 159 | .open = gk20a_gr_debug_open, | ||
| 160 | .read = seq_read, | ||
| 161 | .llseek = seq_lseek, | ||
| 162 | .release = single_release, | ||
| 163 | }; | ||
| 164 | |||
| 165 | static const struct file_operations gk20a_debug_fops = { | ||
| 166 | .open = gk20a_debug_open, | ||
| 167 | .read = seq_read, | ||
| 168 | .llseek = seq_lseek, | ||
| 169 | .release = single_release, | ||
| 170 | }; | ||
| 171 | |||
| 172 | void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) | ||
| 173 | { | ||
| 174 | g->ops.fifo.dump_pbdma_status(g, o); | ||
| 175 | g->ops.fifo.dump_eng_status(g, o); | ||
| 176 | |||
| 177 | gk20a_debug_dump_all_channel_status_ramfc(g, o); | ||
| 178 | } | ||
| 179 | |||
| 180 | static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) | ||
| 181 | { | ||
| 182 | char buf[3]; | ||
| 183 | struct gk20a *g = file->private_data; | ||
| 184 | |||
| 185 | if (g->mm.disable_bigpage) | ||
| 186 | buf[0] = 'Y'; | ||
| 187 | else | ||
| 188 | buf[0] = 'N'; | ||
| 189 | buf[1] = '\n'; | ||
| 190 | buf[2] = 0x00; | ||
| 191 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
| 192 | } | ||
| 193 | |||
| 194 | static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) | ||
| 195 | { | ||
| 196 | char buf[32]; | ||
| 197 | int buf_size; | ||
| 198 | bool bv; | ||
| 199 | struct gk20a *g = file->private_data; | ||
| 200 | |||
| 201 | buf_size = min(count, (sizeof(buf)-1)); | ||
| 202 | if (copy_from_user(buf, user_buf, buf_size)) | ||
| 203 | return -EFAULT; | ||
| 204 | |||
| 205 | if (strtobool(buf, &bv) == 0) { | ||
| 206 | g->mm.disable_bigpage = bv; | ||
| 207 | gk20a_init_gpu_characteristics(g); | ||
| 208 | } | ||
| 209 | |||
| 210 | return count; | ||
| 211 | } | ||
| 212 | |||
| 213 | static struct file_operations disable_bigpage_fops = { | ||
| 214 | .open = simple_open, | ||
| 215 | .read = disable_bigpage_read, | ||
| 216 | .write = disable_bigpage_write, | ||
| 217 | }; | ||
| 218 | |||
| 219 | static int railgate_residency_show(struct seq_file *s, void *data) | ||
| 220 | { | ||
| 221 | struct gk20a *g = s->private; | ||
| 222 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 223 | unsigned long time_since_last_state_transition_ms; | ||
| 224 | unsigned long total_rail_gate_time_ms; | ||
| 225 | unsigned long total_rail_ungate_time_ms; | ||
| 226 | |||
| 227 | if (platform->is_railgated(dev_from_gk20a(g))) { | ||
| 228 | time_since_last_state_transition_ms = | ||
| 229 | jiffies_to_msecs(jiffies - | ||
| 230 | g->pstats.last_rail_gate_complete); | ||
| 231 | total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; | ||
| 232 | total_rail_gate_time_ms = | ||
| 233 | g->pstats.total_rail_gate_time_ms + | ||
| 234 | time_since_last_state_transition_ms; | ||
| 235 | } else { | ||
| 236 | time_since_last_state_transition_ms = | ||
| 237 | jiffies_to_msecs(jiffies - | ||
| 238 | g->pstats.last_rail_ungate_complete); | ||
| 239 | total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; | ||
| 240 | total_rail_ungate_time_ms = | ||
| 241 | g->pstats.total_rail_ungate_time_ms + | ||
| 242 | time_since_last_state_transition_ms; | ||
| 243 | } | ||
| 244 | |||
| 245 | seq_printf(s, "Time with Rails Gated: %lu ms\n" | ||
| 246 | "Time with Rails UnGated: %lu ms\n" | ||
| 247 | "Total railgating cycles: %lu\n", | ||
| 248 | total_rail_gate_time_ms, | ||
| 249 | total_rail_ungate_time_ms, | ||
| 250 | g->pstats.railgating_cycle_count - 1); | ||
| 251 | return 0; | ||
| 252 | |||
| 253 | } | ||
| 254 | |||
| 255 | static int railgate_residency_open(struct inode *inode, struct file *file) | ||
| 256 | { | ||
| 257 | return single_open(file, railgate_residency_show, inode->i_private); | ||
| 258 | } | ||
| 259 | |||
| 260 | static const struct file_operations railgate_residency_fops = { | ||
| 261 | .open = railgate_residency_open, | ||
| 262 | .read = seq_read, | ||
| 263 | .llseek = seq_lseek, | ||
| 264 | .release = single_release, | ||
| 265 | }; | ||
| 266 | |||
| 267 | static int gk20a_railgating_debugfs_init(struct gk20a *g) | ||
| 268 | { | ||
| 269 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 270 | struct dentry *d; | ||
| 271 | |||
| 272 | d = debugfs_create_file( | ||
| 273 | "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
| 274 | &railgate_residency_fops); | ||
| 275 | if (!d) | ||
| 276 | return -ENOMEM; | ||
| 277 | |||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | static ssize_t timeouts_enabled_read(struct file *file, | ||
| 281 | char __user *user_buf, size_t count, loff_t *ppos) | ||
| 282 | { | ||
| 283 | char buf[3]; | ||
| 284 | struct gk20a *g = file->private_data; | ||
| 285 | |||
| 286 | if (nvgpu_is_timeouts_enabled(g)) | ||
| 287 | buf[0] = 'Y'; | ||
| 288 | else | ||
| 289 | buf[0] = 'N'; | ||
| 290 | buf[1] = '\n'; | ||
| 291 | buf[2] = 0x00; | ||
| 292 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
| 293 | } | ||
| 294 | |||
| 295 | static ssize_t timeouts_enabled_write(struct file *file, | ||
| 296 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
| 297 | { | ||
| 298 | char buf[3]; | ||
| 299 | int buf_size; | ||
| 300 | bool timeouts_enabled; | ||
| 301 | struct gk20a *g = file->private_data; | ||
| 302 | |||
| 303 | buf_size = min(count, (sizeof(buf)-1)); | ||
| 304 | if (copy_from_user(buf, user_buf, buf_size)) | ||
| 305 | return -EFAULT; | ||
| 306 | |||
| 307 | if (strtobool(buf, &timeouts_enabled) == 0) { | ||
| 308 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 309 | if (timeouts_enabled == false) { | ||
| 310 | /* requesting to disable timeouts */ | ||
| 311 | if (g->timeouts_disabled_by_user == false) { | ||
| 312 | nvgpu_atomic_inc(&g->timeouts_disabled_refcount); | ||
| 313 | g->timeouts_disabled_by_user = true; | ||
| 314 | } | ||
| 315 | } else { | ||
| 316 | /* requesting to enable timeouts */ | ||
| 317 | if (g->timeouts_disabled_by_user == true) { | ||
| 318 | nvgpu_atomic_dec(&g->timeouts_disabled_refcount); | ||
| 319 | g->timeouts_disabled_by_user = false; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 323 | } | ||
| 324 | |||
| 325 | return count; | ||
| 326 | } | ||
| 327 | |||
| 328 | static const struct file_operations timeouts_enabled_fops = { | ||
| 329 | .open = simple_open, | ||
| 330 | .read = timeouts_enabled_read, | ||
| 331 | .write = timeouts_enabled_write, | ||
| 332 | }; | ||
| 333 | |||
| 334 | void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) | ||
| 335 | { | ||
| 336 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 337 | struct device *dev = dev_from_gk20a(g); | ||
| 338 | |||
| 339 | l->debugfs = debugfs_create_dir(dev_name(dev), NULL); | ||
| 340 | if (!l->debugfs) | ||
| 341 | return; | ||
| 342 | |||
| 343 | if (debugfs_symlink) | ||
| 344 | l->debugfs_alias = | ||
| 345 | debugfs_create_symlink(debugfs_symlink, | ||
| 346 | NULL, dev_name(dev)); | ||
| 347 | |||
| 348 | debugfs_create_file("status", S_IRUGO, l->debugfs, | ||
| 349 | dev, &gk20a_debug_fops); | ||
| 350 | debugfs_create_file("gr_status", S_IRUGO, l->debugfs, | ||
| 351 | dev, &gk20a_gr_debug_fops); | ||
| 352 | debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, | ||
| 353 | l->debugfs, &gk20a_debug_trace_cmdbuf); | ||
| 354 | |||
| 355 | debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, | ||
| 356 | l->debugfs, &g->ch_wdt_timeout_ms); | ||
| 357 | |||
| 358 | debugfs_create_u32("disable_syncpoints", S_IRUGO, | ||
| 359 | l->debugfs, &g->disable_syncpoints); | ||
| 360 | |||
| 361 | /* New debug logging API. */ | ||
| 362 | debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR, | ||
| 363 | l->debugfs, &g->log_mask); | ||
| 364 | debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR, | ||
| 365 | l->debugfs, &g->log_trace); | ||
| 366 | |||
| 367 | l->debugfs_ltc_enabled = | ||
| 368 | debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, | ||
| 369 | l->debugfs, | ||
| 370 | &g->mm.ltc_enabled_target); | ||
| 371 | |||
| 372 | l->debugfs_gr_idle_timeout_default = | ||
| 373 | debugfs_create_u32("gr_idle_timeout_default_us", | ||
| 374 | S_IRUGO|S_IWUSR, l->debugfs, | ||
| 375 | &g->gr_idle_timeout_default); | ||
| 376 | l->debugfs_timeouts_enabled = | ||
| 377 | debugfs_create_file("timeouts_enabled", | ||
| 378 | S_IRUGO|S_IWUSR, | ||
| 379 | l->debugfs, | ||
| 380 | g, | ||
| 381 | &timeouts_enabled_fops); | ||
| 382 | |||
| 383 | l->debugfs_disable_bigpage = | ||
| 384 | debugfs_create_file("disable_bigpage", | ||
| 385 | S_IRUGO|S_IWUSR, | ||
| 386 | l->debugfs, | ||
| 387 | g, | ||
| 388 | &disable_bigpage_fops); | ||
| 389 | |||
| 390 | l->debugfs_timeslice_low_priority_us = | ||
| 391 | debugfs_create_u32("timeslice_low_priority_us", | ||
| 392 | S_IRUGO|S_IWUSR, | ||
| 393 | l->debugfs, | ||
| 394 | &g->timeslice_low_priority_us); | ||
| 395 | l->debugfs_timeslice_medium_priority_us = | ||
| 396 | debugfs_create_u32("timeslice_medium_priority_us", | ||
| 397 | S_IRUGO|S_IWUSR, | ||
| 398 | l->debugfs, | ||
| 399 | &g->timeslice_medium_priority_us); | ||
| 400 | l->debugfs_timeslice_high_priority_us = | ||
| 401 | debugfs_create_u32("timeslice_high_priority_us", | ||
| 402 | S_IRUGO|S_IWUSR, | ||
| 403 | l->debugfs, | ||
| 404 | &g->timeslice_high_priority_us); | ||
| 405 | l->debugfs_runlist_interleave = | ||
| 406 | debugfs_create_bool("runlist_interleave", | ||
| 407 | S_IRUGO|S_IWUSR, | ||
| 408 | l->debugfs, | ||
| 409 | &g->runlist_interleave); | ||
| 410 | l->debugfs_force_preemption_gfxp = | ||
| 411 | debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, | ||
| 412 | l->debugfs, | ||
| 413 | &g->gr.ctx_vars.force_preemption_gfxp); | ||
| 414 | |||
| 415 | l->debugfs_force_preemption_cilp = | ||
| 416 | debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, | ||
| 417 | l->debugfs, | ||
| 418 | &g->gr.ctx_vars.force_preemption_cilp); | ||
| 419 | |||
| 420 | l->debugfs_dump_ctxsw_stats = | ||
| 421 | debugfs_create_bool("dump_ctxsw_stats_on_channel_close", | ||
| 422 | S_IRUGO|S_IWUSR, l->debugfs, | ||
| 423 | &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close); | ||
| 424 | |||
| 425 | gr_gk20a_debugfs_init(g); | ||
| 426 | gk20a_pmu_debugfs_init(g); | ||
| 427 | gk20a_railgating_debugfs_init(g); | ||
| 428 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 429 | gk20a_cde_debugfs_init(g); | ||
| 430 | #endif | ||
| 431 | gk20a_ce_debugfs_init(g); | ||
| 432 | nvgpu_alloc_debugfs_init(g); | ||
| 433 | nvgpu_hal_debugfs_init(g); | ||
| 434 | gk20a_fifo_debugfs_init(g); | ||
| 435 | gk20a_sched_debugfs_init(g); | ||
| 436 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 437 | nvgpu_kmem_debugfs_init(g); | ||
| 438 | #endif | ||
| 439 | nvgpu_ltc_debugfs_init(g); | ||
| 440 | if (g->pci_vendor_id) { | ||
| 441 | nvgpu_xve_debugfs_init(g); | ||
| 442 | nvgpu_bios_debugfs_init(g); | ||
| 443 | } | ||
| 444 | } | ||
| 445 | |||
| 446 | void gk20a_debug_deinit(struct gk20a *g) | ||
| 447 | { | ||
| 448 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 449 | |||
| 450 | if (!l->debugfs) | ||
| 451 | return; | ||
| 452 | |||
| 453 | gk20a_fifo_debugfs_deinit(g); | ||
| 454 | |||
| 455 | debugfs_remove_recursive(l->debugfs); | ||
| 456 | debugfs_remove(l->debugfs_alias); | ||
| 457 | } | ||
diff --git a/include/os/linux/debug_allocator.c b/include/os/linux/debug_allocator.c new file mode 100644 index 0000000..d63a903 --- /dev/null +++ b/include/os/linux/debug_allocator.c | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_allocator.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | #include <linux/seq_file.h> | ||
| 20 | |||
| 21 | #include <nvgpu/allocator.h> | ||
| 22 | |||
| 23 | static int __alloc_show(struct seq_file *s, void *unused) | ||
| 24 | { | ||
| 25 | struct nvgpu_allocator *a = s->private; | ||
| 26 | |||
| 27 | nvgpu_alloc_print_stats(a, s, 1); | ||
| 28 | |||
| 29 | return 0; | ||
| 30 | } | ||
| 31 | |||
| 32 | static int __alloc_open(struct inode *inode, struct file *file) | ||
| 33 | { | ||
| 34 | return single_open(file, __alloc_show, inode->i_private); | ||
| 35 | } | ||
| 36 | |||
| 37 | static const struct file_operations __alloc_fops = { | ||
| 38 | .open = __alloc_open, | ||
| 39 | .read = seq_read, | ||
| 40 | .llseek = seq_lseek, | ||
| 41 | .release = single_release, | ||
| 42 | }; | ||
| 43 | |||
| 44 | void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) | ||
| 45 | { | ||
| 46 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 47 | |||
| 48 | if (!l->debugfs_allocators) | ||
| 49 | return; | ||
| 50 | |||
| 51 | a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, | ||
| 52 | l->debugfs_allocators, | ||
| 53 | a, &__alloc_fops); | ||
| 54 | } | ||
| 55 | |||
| 56 | void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) | ||
| 57 | { | ||
| 58 | } | ||
| 59 | |||
| 60 | void nvgpu_alloc_debugfs_init(struct gk20a *g) | ||
| 61 | { | ||
| 62 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 63 | |||
| 64 | l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs); | ||
| 65 | if (IS_ERR_OR_NULL(l->debugfs_allocators)) { | ||
| 66 | l->debugfs_allocators = NULL; | ||
| 67 | return; | ||
| 68 | } | ||
| 69 | } | ||
diff --git a/include/os/linux/debug_allocator.h b/include/os/linux/debug_allocator.h new file mode 100644 index 0000000..1b21cfc --- /dev/null +++ b/include/os/linux/debug_allocator.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_ALLOCATOR_H__ | ||
| 16 | #define __NVGPU_DEBUG_ALLOCATOR_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | void nvgpu_alloc_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */ | ||
diff --git a/include/os/linux/debug_bios.c b/include/os/linux/debug_bios.c new file mode 100644 index 0000000..f69ccf3 --- /dev/null +++ b/include/os/linux/debug_bios.c | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <nvgpu/types.h> | ||
| 16 | |||
| 17 | #include "debug_bios.h" | ||
| 18 | #include "os_linux.h" | ||
| 19 | |||
| 20 | #include <linux/debugfs.h> | ||
| 21 | #include <linux/uaccess.h> | ||
| 22 | |||
| 23 | static int bios_version_show(struct seq_file *s, void *unused) | ||
| 24 | { | ||
| 25 | struct gk20a *g = s->private; | ||
| 26 | |||
| 27 | seq_printf(s, "Version %02x.%02x.%02x.%02x.%02x\n", | ||
| 28 | (g->bios.vbios_version >> 24) & 0xFF, | ||
| 29 | (g->bios.vbios_version >> 16) & 0xFF, | ||
| 30 | (g->bios.vbios_version >> 8) & 0xFF, | ||
| 31 | (g->bios.vbios_version >> 0) & 0xFF, | ||
| 32 | (g->bios.vbios_oem_version) & 0xFF); | ||
| 33 | |||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | |||
| 37 | static int bios_version_open(struct inode *inode, struct file *file) | ||
| 38 | { | ||
| 39 | return single_open(file, bios_version_show, inode->i_private); | ||
| 40 | } | ||
| 41 | |||
| 42 | static const struct file_operations bios_version_fops = { | ||
| 43 | .open = bios_version_open, | ||
| 44 | .read = seq_read, | ||
| 45 | .llseek = seq_lseek, | ||
| 46 | .release = single_release, | ||
| 47 | }; | ||
| 48 | |||
| 49 | |||
| 50 | int nvgpu_bios_debugfs_init(struct gk20a *g) | ||
| 51 | { | ||
| 52 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 53 | struct dentry *gpu_root = l->debugfs; | ||
| 54 | |||
| 55 | debugfs_create_file("bios", S_IRUGO, | ||
| 56 | gpu_root, g, | ||
| 57 | &bios_version_fops); | ||
| 58 | |||
| 59 | return 0; | ||
| 60 | } | ||
diff --git a/include/os/linux/debug_bios.h b/include/os/linux/debug_bios.h new file mode 100644 index 0000000..f8e7783 --- /dev/null +++ b/include/os/linux/debug_bios.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_BIOS_H__ | ||
| 16 | #define __NVGPU_DEBUG_BIOS_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | int nvgpu_bios_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_BIOS_H__ */ | ||
diff --git a/include/os/linux/debug_cde.c b/include/os/linux/debug_cde.c new file mode 100644 index 0000000..f0afa6e --- /dev/null +++ b/include/os/linux/debug_cde.c | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_cde.h" | ||
| 16 | #include "platform_gk20a.h" | ||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include <linux/debugfs.h> | ||
| 20 | |||
| 21 | |||
| 22 | static ssize_t gk20a_cde_reload_write(struct file *file, | ||
| 23 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
| 24 | { | ||
| 25 | struct nvgpu_os_linux *l = file->private_data; | ||
| 26 | gk20a_cde_reload(l); | ||
| 27 | return count; | ||
| 28 | } | ||
| 29 | |||
| 30 | static const struct file_operations gk20a_cde_reload_fops = { | ||
| 31 | .open = simple_open, | ||
| 32 | .write = gk20a_cde_reload_write, | ||
| 33 | }; | ||
| 34 | |||
| 35 | void gk20a_cde_debugfs_init(struct gk20a *g) | ||
| 36 | { | ||
| 37 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 38 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 39 | |||
| 40 | if (!platform->has_cde) | ||
| 41 | return; | ||
| 42 | |||
| 43 | debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, | ||
| 44 | l->debugfs, &l->cde_app.shader_parameter); | ||
| 45 | debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, | ||
| 46 | l->debugfs, &l->cde_app.ctx_count); | ||
| 47 | debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, | ||
| 48 | l->debugfs, &l->cde_app.ctx_usecount); | ||
| 49 | debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, | ||
| 50 | l->debugfs, &l->cde_app.ctx_count_top); | ||
| 51 | debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, | ||
| 52 | l, &gk20a_cde_reload_fops); | ||
| 53 | } | ||
diff --git a/include/os/linux/debug_cde.h b/include/os/linux/debug_cde.h new file mode 100644 index 0000000..4895edd --- /dev/null +++ b/include/os/linux/debug_cde.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_CDE_H__ | ||
| 16 | #define __NVGPU_DEBUG_CDE_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | void gk20a_cde_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_CDE_H__ */ | ||
diff --git a/include/os/linux/debug_ce.c b/include/os/linux/debug_ce.c new file mode 100644 index 0000000..cea0bb4 --- /dev/null +++ b/include/os/linux/debug_ce.c | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_ce.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | |||
| 20 | void gk20a_ce_debugfs_init(struct gk20a *g) | ||
| 21 | { | ||
| 22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 23 | |||
| 24 | debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, | ||
| 25 | l->debugfs, &g->ce_app.ctx_count); | ||
| 26 | debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, | ||
| 27 | l->debugfs, &g->ce_app.app_state); | ||
| 28 | debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, | ||
| 29 | l->debugfs, &g->ce_app.next_ctx_id); | ||
| 30 | } | ||
diff --git a/include/os/linux/debug_ce.h b/include/os/linux/debug_ce.h new file mode 100644 index 0000000..2a8750c --- /dev/null +++ b/include/os/linux/debug_ce.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_CE_H__ | ||
| 16 | #define __NVGPU_DEBUG_CE_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | void gk20a_ce_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_CE_H__ */ | ||
diff --git a/include/os/linux/debug_clk_gm20b.c b/include/os/linux/debug_clk_gm20b.c new file mode 100644 index 0000000..b8b95fd --- /dev/null +++ b/include/os/linux/debug_clk_gm20b.c | |||
| @@ -0,0 +1,280 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/uaccess.h> | ||
| 16 | #include <linux/debugfs.h> | ||
| 17 | #include <linux/seq_file.h> | ||
| 18 | |||
| 19 | #include <nvgpu/io.h> | ||
| 20 | #include <nvgpu/clk_arb.h> | ||
| 21 | |||
| 22 | #include "gm20b/clk_gm20b.h" | ||
| 23 | #include "os_linux.h" | ||
| 24 | #include "platform_gk20a.h" | ||
| 25 | |||
| 26 | static int rate_get(void *data, u64 *val) | ||
| 27 | { | ||
| 28 | struct gk20a *g = (struct gk20a *)data; | ||
| 29 | struct clk_gk20a *clk = &g->clk; | ||
| 30 | |||
| 31 | *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
| 32 | return 0; | ||
| 33 | } | ||
| 34 | static int rate_set(void *data, u64 val) | ||
| 35 | { | ||
| 36 | struct gk20a *g = (struct gk20a *)data; | ||
| 37 | if (nvgpu_clk_arb_has_active_req(g)) | ||
| 38 | return 0; | ||
| 39 | return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val); | ||
| 40 | } | ||
| 41 | DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); | ||
| 42 | |||
| 43 | static int pll_reg_show(struct seq_file *s, void *data) | ||
| 44 | { | ||
| 45 | struct gk20a *g = s->private; | ||
| 46 | struct nvgpu_clk_pll_debug_data d; | ||
| 47 | u32 reg, m, n, pl, f; | ||
| 48 | int err = 0; | ||
| 49 | |||
| 50 | if (g->ops.clk.get_pll_debug_data) { | ||
| 51 | err = g->ops.clk.get_pll_debug_data(g, &d); | ||
| 52 | if (err) | ||
| 53 | return err; | ||
| 54 | } else { | ||
| 55 | return -EINVAL; | ||
| 56 | } | ||
| 57 | |||
| 58 | seq_printf(s, "bypassctrl = %s, ", | ||
| 59 | d.trim_sys_bypassctrl_val ? "bypass" : "vco"); | ||
| 60 | seq_printf(s, "sel_vco = %s, ", | ||
| 61 | d.trim_sys_sel_vco_val ? "vco" : "bypass"); | ||
| 62 | |||
| 63 | seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val, | ||
| 64 | d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled", | ||
| 65 | d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked", | ||
| 66 | d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off"); | ||
| 67 | |||
| 68 | reg = d.trim_sys_gpcpll_coeff_val; | ||
| 69 | m = d.trim_sys_gpcpll_coeff_mdiv; | ||
| 70 | n = d.trim_sys_gpcpll_coeff_ndiv; | ||
| 71 | pl = d.trim_sys_gpcpll_coeff_pldiv; | ||
| 72 | f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl)); | ||
| 73 | seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); | ||
| 74 | seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2); | ||
| 75 | |||
| 76 | seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n", | ||
| 77 | d.trim_sys_gpcpll_dvfs0_val, | ||
| 78 | d.trim_sys_gpcpll_dvfs0_dfs_coeff, | ||
| 79 | d.trim_sys_gpcpll_dvfs0_dfs_det_max, | ||
| 80 | d.trim_sys_gpcpll_dvfs0_dfs_dc_offset); | ||
| 81 | |||
| 82 | return 0; | ||
| 83 | } | ||
| 84 | |||
| 85 | static int pll_reg_open(struct inode *inode, struct file *file) | ||
| 86 | { | ||
| 87 | return single_open(file, pll_reg_show, inode->i_private); | ||
| 88 | } | ||
| 89 | |||
| 90 | static const struct file_operations pll_reg_fops = { | ||
| 91 | .open = pll_reg_open, | ||
| 92 | .read = seq_read, | ||
| 93 | .llseek = seq_lseek, | ||
| 94 | .release = single_release, | ||
| 95 | }; | ||
| 96 | |||
| 97 | static int pll_reg_raw_show(struct seq_file *s, void *data) | ||
| 98 | { | ||
| 99 | struct gk20a *g = s->private; | ||
| 100 | struct nvgpu_clk_pll_debug_data d; | ||
| 101 | u32 reg; | ||
| 102 | int err = 0; | ||
| 103 | |||
| 104 | if (g->ops.clk.get_pll_debug_data) { | ||
| 105 | err = g->ops.clk.get_pll_debug_data(g, &d); | ||
| 106 | if (err) | ||
| 107 | return err; | ||
| 108 | } else { | ||
| 109 | return -EINVAL; | ||
| 110 | } | ||
| 111 | |||
| 112 | seq_puts(s, "GPCPLL REGISTERS:\n"); | ||
| 113 | for (reg = d.trim_sys_gpcpll_cfg_reg; | ||
| 114 | reg < d.trim_sys_gpcpll_dvfs2_reg; | ||
| 115 | reg += sizeof(u32)) | ||
| 116 | seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); | ||
| 117 | |||
| 118 | reg = d.trim_bcast_gpcpll_dvfs2_reg; | ||
| 119 | if (reg) | ||
| 120 | seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); | ||
| 121 | |||
| 122 | seq_puts(s, "\nGPC CLK OUT REGISTERS:\n"); | ||
| 123 | |||
| 124 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg, | ||
| 125 | d.trim_sys_sel_vco_val); | ||
| 126 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg, | ||
| 127 | d.trim_sys_gpc2clk_out_val); | ||
| 128 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg, | ||
| 129 | d.trim_sys_bypassctrl_val); | ||
| 130 | |||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | |||
| 134 | static int pll_reg_raw_open(struct inode *inode, struct file *file) | ||
| 135 | { | ||
| 136 | return single_open(file, pll_reg_raw_show, inode->i_private); | ||
| 137 | } | ||
| 138 | |||
| 139 | static ssize_t pll_reg_raw_write(struct file *file, | ||
| 140 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
| 141 | { | ||
| 142 | struct gk20a *g = file->f_path.dentry->d_inode->i_private; | ||
| 143 | char buf[80]; | ||
| 144 | u32 reg, val; | ||
| 145 | int err = 0; | ||
| 146 | |||
| 147 | if (sizeof(buf) <= count) | ||
| 148 | return -EINVAL; | ||
| 149 | |||
| 150 | if (copy_from_user(buf, userbuf, count)) | ||
| 151 | return -EFAULT; | ||
| 152 | |||
| 153 | /* terminate buffer and trim - white spaces may be appended | ||
| 154 | * at the end when invoked from shell command line */ | ||
| 155 | buf[count] = '\0'; | ||
| 156 | strim(buf); | ||
| 157 | |||
| 158 | if (sscanf(buf, "[0x%x] = 0x%x", ®, &val) != 2) | ||
| 159 | return -EINVAL; | ||
| 160 | |||
| 161 | if (g->ops.clk.pll_reg_write(g, reg, val)) | ||
| 162 | err = g->ops.clk.pll_reg_write(g, reg, val); | ||
| 163 | else | ||
| 164 | err = -EINVAL; | ||
| 165 | |||
| 166 | return err; | ||
| 167 | } | ||
| 168 | |||
| 169 | static const struct file_operations pll_reg_raw_fops = { | ||
| 170 | .open = pll_reg_raw_open, | ||
| 171 | .read = seq_read, | ||
| 172 | .write = pll_reg_raw_write, | ||
| 173 | .llseek = seq_lseek, | ||
| 174 | .release = single_release, | ||
| 175 | }; | ||
| 176 | |||
| 177 | static int monitor_get(void *data, u64 *val) | ||
| 178 | { | ||
| 179 | struct gk20a *g = (struct gk20a *)data; | ||
| 180 | int err = 0; | ||
| 181 | |||
| 182 | if (g->ops.clk.get_gpcclk_clock_counter) | ||
| 183 | err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val); | ||
| 184 | else | ||
| 185 | err = -EINVAL; | ||
| 186 | |||
| 187 | return err; | ||
| 188 | } | ||
| 189 | DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); | ||
| 190 | |||
| 191 | static int voltage_get(void *data, u64 *val) | ||
| 192 | { | ||
| 193 | struct gk20a *g = (struct gk20a *)data; | ||
| 194 | int err = 0; | ||
| 195 | |||
| 196 | if (g->ops.clk.get_voltage) | ||
| 197 | err = g->ops.clk.get_voltage(&g->clk, val); | ||
| 198 | else | ||
| 199 | err = -EINVAL; | ||
| 200 | |||
| 201 | return err; | ||
| 202 | } | ||
| 203 | DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n"); | ||
| 204 | |||
| 205 | static int pll_param_show(struct seq_file *s, void *data) | ||
| 206 | { | ||
| 207 | struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms(); | ||
| 208 | |||
| 209 | seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n", | ||
| 210 | gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope, | ||
| 211 | gpc_pll_params->vco_ctrl); | ||
| 212 | return 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | static int pll_param_open(struct inode *inode, struct file *file) | ||
| 216 | { | ||
| 217 | return single_open(file, pll_param_show, inode->i_private); | ||
| 218 | } | ||
| 219 | |||
| 220 | static const struct file_operations pll_param_fops = { | ||
| 221 | .open = pll_param_open, | ||
| 222 | .read = seq_read, | ||
| 223 | .llseek = seq_lseek, | ||
| 224 | .release = single_release, | ||
| 225 | }; | ||
| 226 | |||
| 227 | int gm20b_clk_init_debugfs(struct gk20a *g) | ||
| 228 | { | ||
| 229 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 230 | struct dentry *d; | ||
| 231 | |||
| 232 | if (!l->debugfs) | ||
| 233 | return -EINVAL; | ||
| 234 | |||
| 235 | d = debugfs_create_file( | ||
| 236 | "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops); | ||
| 237 | if (!d) | ||
| 238 | goto err_out; | ||
| 239 | |||
| 240 | d = debugfs_create_file( | ||
| 241 | "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops); | ||
| 242 | if (!d) | ||
| 243 | goto err_out; | ||
| 244 | |||
| 245 | d = debugfs_create_file("pll_reg_raw", | ||
| 246 | S_IRUGO, l->debugfs, g, &pll_reg_raw_fops); | ||
| 247 | if (!d) | ||
| 248 | goto err_out; | ||
| 249 | |||
| 250 | d = debugfs_create_file( | ||
| 251 | "monitor", S_IRUGO, l->debugfs, g, &monitor_fops); | ||
| 252 | if (!d) | ||
| 253 | goto err_out; | ||
| 254 | |||
| 255 | d = debugfs_create_file( | ||
| 256 | "voltage", S_IRUGO, l->debugfs, g, &voltage_fops); | ||
| 257 | if (!d) | ||
| 258 | goto err_out; | ||
| 259 | |||
| 260 | d = debugfs_create_file( | ||
| 261 | "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops); | ||
| 262 | if (!d) | ||
| 263 | goto err_out; | ||
| 264 | |||
| 265 | d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs, | ||
| 266 | (u32 *)&g->clk.gpc_pll.mode); | ||
| 267 | if (!d) | ||
| 268 | goto err_out; | ||
| 269 | |||
| 270 | d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO, | ||
| 271 | l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq); | ||
| 272 | if (!d) | ||
| 273 | goto err_out; | ||
| 274 | |||
| 275 | return 0; | ||
| 276 | |||
| 277 | err_out: | ||
| 278 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
| 279 | return -ENOMEM; | ||
| 280 | } | ||
diff --git a/include/os/linux/debug_clk_gm20b.h b/include/os/linux/debug_clk_gm20b.h new file mode 100644 index 0000000..850ad89 --- /dev/null +++ b/include/os/linux/debug_clk_gm20b.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __DEBUG_CLK_GM20B_H | ||
| 18 | #define __DEBUG_CLK_GM20B_H | ||
| 19 | |||
| 20 | #ifdef CONFIG_DEBUG_FS | ||
| 21 | int gm20b_clk_init_debugfs(struct gk20a *g); | ||
| 22 | #else | ||
| 23 | inline int gm20b_clk_init_debugfs(struct gk20a *g) | ||
| 24 | { | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | #endif | ||
| 28 | |||
| 29 | #endif | ||
diff --git a/include/os/linux/debug_clk_gp106.c b/include/os/linux/debug_clk_gp106.c new file mode 100644 index 0000000..4900c00 --- /dev/null +++ b/include/os/linux/debug_clk_gp106.c | |||
| @@ -0,0 +1,193 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/debugfs.h> | ||
| 18 | |||
| 19 | #include <nvgpu/clk.h> | ||
| 20 | |||
| 21 | #include "os_linux.h" | ||
| 22 | |||
| 23 | void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); | ||
| 24 | |||
| 25 | static int gp106_get_rate_show(void *data , u64 *val) | ||
| 26 | { | ||
| 27 | struct namemap_cfg *c = (struct namemap_cfg *)data; | ||
| 28 | struct gk20a *g = c->g; | ||
| 29 | |||
| 30 | if (!g->ops.clk.get_rate_cntr) | ||
| 31 | return -EINVAL; | ||
| 32 | |||
| 33 | *val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) : | ||
| 34 | 0 /* TODO PLL read */; | ||
| 35 | |||
| 36 | return 0; | ||
| 37 | } | ||
| 38 | DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gp106_get_rate_show, NULL, "%llu\n"); | ||
| 39 | |||
| 40 | static int sys_cfc_read(void *data , u64 *val) | ||
| 41 | { | ||
| 42 | struct gk20a *g = (struct gk20a *)data; | ||
| 43 | bool bload = boardobjgrpmask_bitget( | ||
| 44 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 45 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
| 46 | |||
| 47 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 48 | *val = bload ? 1 : 0; | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | static int sys_cfc_write(void *data , u64 val) | ||
| 52 | { | ||
| 53 | struct gk20a *g = (struct gk20a *)data; | ||
| 54 | int status; | ||
| 55 | /* val = 1 implies load or enable the CLFC */ | ||
| 56 | bool bload = val ? true : false; | ||
| 57 | |||
| 58 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 59 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 60 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
| 61 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 62 | |||
| 63 | return status; | ||
| 64 | } | ||
| 65 | DEFINE_SIMPLE_ATTRIBUTE(sys_cfc_fops, sys_cfc_read, sys_cfc_write, "%llu\n"); | ||
| 66 | |||
| 67 | static int ltc_cfc_read(void *data , u64 *val) | ||
| 68 | { | ||
| 69 | struct gk20a *g = (struct gk20a *)data; | ||
| 70 | bool bload = boardobjgrpmask_bitget( | ||
| 71 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 72 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
| 73 | |||
| 74 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 75 | *val = bload ? 1 : 0; | ||
| 76 | return 0; | ||
| 77 | } | ||
| 78 | static int ltc_cfc_write(void *data , u64 val) | ||
| 79 | { | ||
| 80 | struct gk20a *g = (struct gk20a *)data; | ||
| 81 | int status; | ||
| 82 | /* val = 1 implies load or enable the CLFC */ | ||
| 83 | bool bload = val ? true : false; | ||
| 84 | |||
| 85 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 86 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 87 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
| 88 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 89 | |||
| 90 | return status; | ||
| 91 | } | ||
| 92 | DEFINE_SIMPLE_ATTRIBUTE(ltc_cfc_fops, ltc_cfc_read, ltc_cfc_write, "%llu\n"); | ||
| 93 | |||
| 94 | static int xbar_cfc_read(void *data , u64 *val) | ||
| 95 | { | ||
| 96 | struct gk20a *g = (struct gk20a *)data; | ||
| 97 | bool bload = boardobjgrpmask_bitget( | ||
| 98 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 99 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
| 100 | |||
| 101 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 102 | *val = bload ? 1 : 0; | ||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | static int xbar_cfc_write(void *data , u64 val) | ||
| 106 | { | ||
| 107 | struct gk20a *g = (struct gk20a *)data; | ||
| 108 | int status; | ||
| 109 | /* val = 1 implies load or enable the CLFC */ | ||
| 110 | bool bload = val ? true : false; | ||
| 111 | |||
| 112 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 113 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 114 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
| 115 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 116 | |||
| 117 | return status; | ||
| 118 | } | ||
| 119 | DEFINE_SIMPLE_ATTRIBUTE(xbar_cfc_fops, xbar_cfc_read, | ||
| 120 | xbar_cfc_write, "%llu\n"); | ||
| 121 | |||
| 122 | static int gpc_cfc_read(void *data , u64 *val) | ||
| 123 | { | ||
| 124 | struct gk20a *g = (struct gk20a *)data; | ||
| 125 | bool bload = boardobjgrpmask_bitget( | ||
| 126 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 127 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
| 128 | |||
| 129 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 130 | *val = bload ? 1 : 0; | ||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | static int gpc_cfc_write(void *data , u64 val) | ||
| 134 | { | ||
| 135 | struct gk20a *g = (struct gk20a *)data; | ||
| 136 | int status; | ||
| 137 | /* val = 1 implies load or enable the CLFC */ | ||
| 138 | bool bload = val ? true : false; | ||
| 139 | |||
| 140 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 141 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 142 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
| 143 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 144 | |||
| 145 | return status; | ||
| 146 | } | ||
| 147 | DEFINE_SIMPLE_ATTRIBUTE(gpc_cfc_fops, gpc_cfc_read, gpc_cfc_write, "%llu\n"); | ||
| 148 | |||
| 149 | int gp106_clk_init_debugfs(struct gk20a *g) | ||
| 150 | { | ||
| 151 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 152 | struct dentry *gpu_root = l->debugfs; | ||
| 153 | struct dentry *clocks_root, *clk_freq_ctlr_root; | ||
| 154 | struct dentry *d; | ||
| 155 | unsigned int i; | ||
| 156 | |||
| 157 | if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root))) | ||
| 158 | return -ENOMEM; | ||
| 159 | |||
| 160 | clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root); | ||
| 161 | if (clk_freq_ctlr_root == NULL) | ||
| 162 | return -ENOMEM; | ||
| 163 | |||
| 164 | d = debugfs_create_file("sys", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 165 | g, &sys_cfc_fops); | ||
| 166 | d = debugfs_create_file("ltc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 167 | g, <c_cfc_fops); | ||
| 168 | d = debugfs_create_file("xbar", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 169 | g, &xbar_cfc_fops); | ||
| 170 | d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 171 | g, &gpc_cfc_fops); | ||
| 172 | |||
| 173 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
| 174 | |||
| 175 | for (i = 0; i < g->clk.namemap_num; i++) { | ||
| 176 | if (g->clk.clk_namemap[i].is_enable) { | ||
| 177 | d = debugfs_create_file( | ||
| 178 | g->clk.clk_namemap[i].name, | ||
| 179 | S_IRUGO, | ||
| 180 | clocks_root, | ||
| 181 | &g->clk.clk_namemap[i], | ||
| 182 | &get_rate_fops); | ||
| 183 | if (!d) | ||
| 184 | goto err_out; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | return 0; | ||
| 188 | |||
| 189 | err_out: | ||
| 190 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
| 191 | debugfs_remove_recursive(clocks_root); | ||
| 192 | return -ENOMEM; | ||
| 193 | } | ||
diff --git a/include/os/linux/debug_clk_gp106.h b/include/os/linux/debug_clk_gp106.h new file mode 100644 index 0000000..b1d031d --- /dev/null +++ b/include/os/linux/debug_clk_gp106.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __DEBUG_CLK_GP106_H | ||
| 18 | #define __DEBUG_CLK_GP106_H | ||
| 19 | |||
| 20 | #ifdef CONFIG_DEBUG_FS | ||
| 21 | int gp106_clk_init_debugfs(struct gk20a *g); | ||
| 22 | #else | ||
| 23 | inline int gp106_clk_init_debugfs(struct gk20a *g) | ||
| 24 | { | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | #endif | ||
| 28 | |||
| 29 | #endif | ||
diff --git a/include/os/linux/debug_clk_gv100.c b/include/os/linux/debug_clk_gv100.c new file mode 100644 index 0000000..623f2b6 --- /dev/null +++ b/include/os/linux/debug_clk_gv100.c | |||
| @@ -0,0 +1,193 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/debugfs.h> | ||
| 18 | |||
| 19 | #include "gv100/clk_gv100.h" | ||
| 20 | |||
| 21 | #include "os_linux.h" | ||
| 22 | |||
| 23 | void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); | ||
| 24 | |||
| 25 | static int gv100_get_rate_show(void *data , u64 *val) | ||
| 26 | { | ||
| 27 | struct namemap_cfg *c = (struct namemap_cfg *)data; | ||
| 28 | struct gk20a *g = c->g; | ||
| 29 | |||
| 30 | if (!g->ops.clk.get_rate_cntr) | ||
| 31 | return -EINVAL; | ||
| 32 | |||
| 33 | *val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) : | ||
| 34 | 0 /* TODO PLL read */; | ||
| 35 | |||
| 36 | return 0; | ||
| 37 | } | ||
| 38 | DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gv100_get_rate_show, NULL, "%llu\n"); | ||
| 39 | |||
| 40 | static int sys_cfc_read(void *data , u64 *val) | ||
| 41 | { | ||
| 42 | struct gk20a *g = (struct gk20a *)data; | ||
| 43 | bool bload = boardobjgrpmask_bitget( | ||
| 44 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 45 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
| 46 | |||
| 47 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 48 | *val = bload ? 1 : 0; | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | static int sys_cfc_write(void *data , u64 val) | ||
| 52 | { | ||
| 53 | struct gk20a *g = (struct gk20a *)data; | ||
| 54 | int status; | ||
| 55 | /* val = 1 implies load or enable the CLFC */ | ||
| 56 | bool bload = val ? true : false; | ||
| 57 | |||
| 58 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 59 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 60 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS); | ||
| 61 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 62 | |||
| 63 | return status; | ||
| 64 | } | ||
| 65 | DEFINE_SIMPLE_ATTRIBUTE(sys_cfc_fops, sys_cfc_read, sys_cfc_write, "%llu\n"); | ||
| 66 | |||
| 67 | static int ltc_cfc_read(void *data , u64 *val) | ||
| 68 | { | ||
| 69 | struct gk20a *g = (struct gk20a *)data; | ||
| 70 | bool bload = boardobjgrpmask_bitget( | ||
| 71 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 72 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
| 73 | |||
| 74 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 75 | *val = bload ? 1 : 0; | ||
| 76 | return 0; | ||
| 77 | } | ||
| 78 | static int ltc_cfc_write(void *data , u64 val) | ||
| 79 | { | ||
| 80 | struct gk20a *g = (struct gk20a *)data; | ||
| 81 | int status; | ||
| 82 | /* val = 1 implies load or enable the CLFC */ | ||
| 83 | bool bload = val ? true : false; | ||
| 84 | |||
| 85 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 86 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 87 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC); | ||
| 88 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 89 | |||
| 90 | return status; | ||
| 91 | } | ||
| 92 | DEFINE_SIMPLE_ATTRIBUTE(ltc_cfc_fops, ltc_cfc_read, ltc_cfc_write, "%llu\n"); | ||
| 93 | |||
| 94 | static int xbar_cfc_read(void *data , u64 *val) | ||
| 95 | { | ||
| 96 | struct gk20a *g = (struct gk20a *)data; | ||
| 97 | bool bload = boardobjgrpmask_bitget( | ||
| 98 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 99 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
| 100 | |||
| 101 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 102 | *val = bload ? 1 : 0; | ||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | static int xbar_cfc_write(void *data , u64 val) | ||
| 106 | { | ||
| 107 | struct gk20a *g = (struct gk20a *)data; | ||
| 108 | int status; | ||
| 109 | /* val = 1 implies load or enable the CLFC */ | ||
| 110 | bool bload = val ? true : false; | ||
| 111 | |||
| 112 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 113 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 114 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR); | ||
| 115 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 116 | |||
| 117 | return status; | ||
| 118 | } | ||
| 119 | DEFINE_SIMPLE_ATTRIBUTE(xbar_cfc_fops, xbar_cfc_read, | ||
| 120 | xbar_cfc_write, "%llu\n"); | ||
| 121 | |||
| 122 | static int gpc_cfc_read(void *data , u64 *val) | ||
| 123 | { | ||
| 124 | struct gk20a *g = (struct gk20a *)data; | ||
| 125 | bool bload = boardobjgrpmask_bitget( | ||
| 126 | &g->clk_pmu.clk_freq_controllers.freq_ctrl_load_mask.super, | ||
| 127 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
| 128 | |||
| 129 | /* val = 1 implies CLFC is loaded or enabled */ | ||
| 130 | *val = bload ? 1 : 0; | ||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | static int gpc_cfc_write(void *data , u64 val) | ||
| 134 | { | ||
| 135 | struct gk20a *g = (struct gk20a *)data; | ||
| 136 | int status; | ||
| 137 | /* val = 1 implies load or enable the CLFC */ | ||
| 138 | bool bload = val ? true : false; | ||
| 139 | |||
| 140 | nvgpu_clk_arb_pstate_change_lock(g, true); | ||
| 141 | status = clk_pmu_freq_controller_load(g, bload, | ||
| 142 | CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0); | ||
| 143 | nvgpu_clk_arb_pstate_change_lock(g, false); | ||
| 144 | |||
| 145 | return status; | ||
| 146 | } | ||
| 147 | DEFINE_SIMPLE_ATTRIBUTE(gpc_cfc_fops, gpc_cfc_read, gpc_cfc_write, "%llu\n"); | ||
| 148 | |||
| 149 | int gv100_clk_init_debugfs(struct gk20a *g) | ||
| 150 | { | ||
| 151 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 152 | struct dentry *gpu_root = l->debugfs; | ||
| 153 | struct dentry *clocks_root, *clk_freq_ctlr_root; | ||
| 154 | struct dentry *d; | ||
| 155 | unsigned int i; | ||
| 156 | |||
| 157 | if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root))) | ||
| 158 | return -ENOMEM; | ||
| 159 | |||
| 160 | clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root); | ||
| 161 | if (clk_freq_ctlr_root == NULL) | ||
| 162 | return -ENOMEM; | ||
| 163 | |||
| 164 | d = debugfs_create_file("sys", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 165 | g, &sys_cfc_fops); | ||
| 166 | d = debugfs_create_file("ltc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 167 | g, <c_cfc_fops); | ||
| 168 | d = debugfs_create_file("xbar", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 169 | g, &xbar_cfc_fops); | ||
| 170 | d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, | ||
| 171 | g, &gpc_cfc_fops); | ||
| 172 | |||
| 173 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
| 174 | |||
| 175 | for (i = 0; i < g->clk.namemap_num; i++) { | ||
| 176 | if (g->clk.clk_namemap[i].is_enable) { | ||
| 177 | d = debugfs_create_file( | ||
| 178 | g->clk.clk_namemap[i].name, | ||
| 179 | S_IRUGO, | ||
| 180 | clocks_root, | ||
| 181 | &g->clk.clk_namemap[i], | ||
| 182 | &get_rate_fops); | ||
| 183 | if (!d) | ||
| 184 | goto err_out; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | return 0; | ||
| 188 | |||
| 189 | err_out: | ||
| 190 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
| 191 | debugfs_remove_recursive(clocks_root); | ||
| 192 | return -ENOMEM; | ||
| 193 | } | ||
diff --git a/include/os/linux/debug_clk_gv100.h b/include/os/linux/debug_clk_gv100.h new file mode 100644 index 0000000..419b4ab --- /dev/null +++ b/include/os/linux/debug_clk_gv100.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __DEBUG_CLK_GV100_H | ||
| 18 | #define __DEBUG_CLK_GV100_H | ||
| 19 | |||
| 20 | #ifdef CONFIG_DEBUG_FS | ||
| 21 | int gv100_clk_init_debugfs(struct gk20a *g); | ||
| 22 | #else | ||
| 23 | static inline int gv100_clk_init_debugfs(struct gk20a *g) | ||
| 24 | { | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | #endif | ||
| 28 | |||
| 29 | #endif | ||
diff --git a/include/os/linux/debug_fecs_trace.c b/include/os/linux/debug_fecs_trace.c new file mode 100644 index 0000000..7786053 --- /dev/null +++ b/include/os/linux/debug_fecs_trace.c | |||
| @@ -0,0 +1,151 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/debugfs.h> | ||
| 18 | |||
| 19 | #include <nvgpu/fecs_trace.h> | ||
| 20 | |||
| 21 | #include "os_linux.h" | ||
| 22 | |||
| 23 | /* | ||
| 24 | * The sequence iterator functions. We simply use the count of the | ||
| 25 | * next line as our internal position. | ||
| 26 | */ | ||
| 27 | static void *gk20a_fecs_trace_debugfs_ring_seq_start( | ||
| 28 | struct seq_file *s, loff_t *pos) | ||
| 29 | { | ||
| 30 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
| 31 | return NULL; | ||
| 32 | |||
| 33 | return pos; | ||
| 34 | } | ||
| 35 | |||
| 36 | static void *gk20a_fecs_trace_debugfs_ring_seq_next( | ||
| 37 | struct seq_file *s, void *v, loff_t *pos) | ||
| 38 | { | ||
| 39 | ++(*pos); | ||
| 40 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
| 41 | return NULL; | ||
| 42 | return pos; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void gk20a_fecs_trace_debugfs_ring_seq_stop( | ||
| 46 | struct seq_file *s, void *v) | ||
| 47 | { | ||
| 48 | } | ||
| 49 | |||
| 50 | static int gk20a_fecs_trace_debugfs_ring_seq_show( | ||
| 51 | struct seq_file *s, void *v) | ||
| 52 | { | ||
| 53 | loff_t *pos = (loff_t *) v; | ||
| 54 | struct gk20a *g = *(struct gk20a **)s->private; | ||
| 55 | struct gk20a_fecs_trace_record *r = | ||
| 56 | gk20a_fecs_trace_get_record(g, *pos); | ||
| 57 | int i; | ||
| 58 | const u32 invalid_tag = gk20a_fecs_trace_record_ts_tag_invalid_ts_v(); | ||
| 59 | u32 tag; | ||
| 60 | u64 timestamp; | ||
| 61 | |||
| 62 | seq_printf(s, "record #%lld (%p)\n", *pos, r); | ||
| 63 | seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); | ||
| 64 | seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); | ||
| 65 | if (gk20a_fecs_trace_is_valid_record(r)) { | ||
| 66 | seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); | ||
| 67 | seq_printf(s, "\tcontext_id=%08x\n", r->context_id); | ||
| 68 | seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); | ||
| 69 | seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); | ||
| 70 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
| 71 | tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
| 72 | if (tag == invalid_tag) | ||
| 73 | continue; | ||
| 74 | timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
| 75 | timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
| 76 | seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Tie them all together into a set of seq_operations. | ||
| 84 | */ | ||
| 85 | static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = { | ||
| 86 | .start = gk20a_fecs_trace_debugfs_ring_seq_start, | ||
| 87 | .next = gk20a_fecs_trace_debugfs_ring_seq_next, | ||
| 88 | .stop = gk20a_fecs_trace_debugfs_ring_seq_stop, | ||
| 89 | .show = gk20a_fecs_trace_debugfs_ring_seq_show | ||
| 90 | }; | ||
| 91 | |||
| 92 | /* | ||
| 93 | * Time to set up the file operations for our /proc file. In this case, | ||
| 94 | * all we need is an open function which sets up the sequence ops. | ||
| 95 | */ | ||
| 96 | |||
| 97 | static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode, | ||
| 98 | struct file *file) | ||
| 99 | { | ||
| 100 | struct gk20a **p; | ||
| 101 | |||
| 102 | p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops, | ||
| 103 | sizeof(struct gk20a *)); | ||
| 104 | if (!p) | ||
| 105 | return -ENOMEM; | ||
| 106 | |||
| 107 | *p = (struct gk20a *)inode->i_private; | ||
| 108 | return 0; | ||
| 109 | }; | ||
| 110 | |||
| 111 | /* | ||
| 112 | * The file operations structure contains our open function along with | ||
| 113 | * set of the canned seq_ ops. | ||
| 114 | */ | ||
| 115 | static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = { | ||
| 116 | .owner = THIS_MODULE, | ||
| 117 | .open = gk20a_ctxsw_debugfs_ring_open, | ||
| 118 | .read = seq_read, | ||
| 119 | .llseek = seq_lseek, | ||
| 120 | .release = seq_release_private | ||
| 121 | }; | ||
| 122 | |||
| 123 | static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val) | ||
| 124 | { | ||
| 125 | *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg); | ||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops, | ||
| 129 | gk20a_fecs_trace_debugfs_read, NULL, "%llu\n"); | ||
| 130 | |||
| 131 | static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val) | ||
| 132 | { | ||
| 133 | *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg); | ||
| 134 | return 0; | ||
| 135 | } | ||
| 136 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops, | ||
| 137 | gk20a_fecs_trace_debugfs_write, NULL, "%llu\n"); | ||
| 138 | |||
| 139 | int nvgpu_fecs_trace_init_debugfs(struct gk20a *g) | ||
| 140 | { | ||
| 141 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 142 | |||
| 143 | debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g, | ||
| 144 | &gk20a_fecs_trace_debugfs_read_fops); | ||
| 145 | debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g, | ||
| 146 | &gk20a_fecs_trace_debugfs_write_fops); | ||
| 147 | debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g, | ||
| 148 | &gk20a_fecs_trace_debugfs_ring_fops); | ||
| 149 | |||
| 150 | return 0; | ||
| 151 | } | ||
diff --git a/include/os/linux/debug_fecs_trace.h b/include/os/linux/debug_fecs_trace.h new file mode 100644 index 0000000..54ebaaf --- /dev/null +++ b/include/os/linux/debug_fecs_trace.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef LINUX_DEBUG_FECS_TRACE_H | ||
| 18 | #define LINUX_DEBUG_FECS_TRACE_H | ||
| 19 | |||
| 20 | struct gk20a; | ||
| 21 | |||
| 22 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_GK20A_CTXSW_TRACE) | ||
| 23 | int nvgpu_fecs_trace_init_debugfs(struct gk20a *g); | ||
| 24 | #else | ||
| 25 | static int nvgpu_fecs_trace_init_debugfs(struct gk20a *g) | ||
| 26 | { | ||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | #endif | ||
| 30 | #endif | ||
diff --git a/include/os/linux/debug_fifo.c b/include/os/linux/debug_fifo.c new file mode 100644 index 0000000..98da8bc --- /dev/null +++ b/include/os/linux/debug_fifo.c | |||
| @@ -0,0 +1,376 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_fifo.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | #include <linux/seq_file.h> | ||
| 20 | |||
| 21 | #include <nvgpu/sort.h> | ||
| 22 | #include <nvgpu/timers.h> | ||
| 23 | #include <nvgpu/channel.h> | ||
| 24 | |||
| 25 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); | ||
| 26 | |||
| 27 | static void *gk20a_fifo_sched_debugfs_seq_start( | ||
| 28 | struct seq_file *s, loff_t *pos) | ||
| 29 | { | ||
| 30 | struct gk20a *g = s->private; | ||
| 31 | struct fifo_gk20a *f = &g->fifo; | ||
| 32 | |||
| 33 | if (*pos >= f->num_channels) | ||
| 34 | return NULL; | ||
| 35 | |||
| 36 | return &f->channel[*pos]; | ||
| 37 | } | ||
| 38 | |||
| 39 | static void *gk20a_fifo_sched_debugfs_seq_next( | ||
| 40 | struct seq_file *s, void *v, loff_t *pos) | ||
| 41 | { | ||
| 42 | struct gk20a *g = s->private; | ||
| 43 | struct fifo_gk20a *f = &g->fifo; | ||
| 44 | |||
| 45 | ++(*pos); | ||
| 46 | if (*pos >= f->num_channels) | ||
| 47 | return NULL; | ||
| 48 | |||
| 49 | return &f->channel[*pos]; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void gk20a_fifo_sched_debugfs_seq_stop( | ||
| 53 | struct seq_file *s, void *v) | ||
| 54 | { | ||
| 55 | } | ||
| 56 | |||
| 57 | static int gk20a_fifo_sched_debugfs_seq_show( | ||
| 58 | struct seq_file *s, void *v) | ||
| 59 | { | ||
| 60 | struct gk20a *g = s->private; | ||
| 61 | struct fifo_gk20a *f = &g->fifo; | ||
| 62 | struct channel_gk20a *ch = v; | ||
| 63 | struct tsg_gk20a *tsg = NULL; | ||
| 64 | |||
| 65 | struct fifo_engine_info_gk20a *engine_info; | ||
| 66 | struct fifo_runlist_info_gk20a *runlist; | ||
| 67 | u32 runlist_id; | ||
| 68 | int ret = SEQ_SKIP; | ||
| 69 | u32 engine_id; | ||
| 70 | |||
| 71 | engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
| 72 | engine_info = (f->engine_info + engine_id); | ||
| 73 | runlist_id = engine_info->runlist_id; | ||
| 74 | runlist = &f->runlist_info[runlist_id]; | ||
| 75 | |||
| 76 | if (ch == f->channel) { | ||
| 77 | seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); | ||
| 78 | seq_puts(s, " (usecs) (msecs)\n"); | ||
| 79 | ret = 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | if (!test_bit(ch->chid, runlist->active_channels)) | ||
| 83 | return ret; | ||
| 84 | |||
| 85 | if (gk20a_channel_get(ch)) { | ||
| 86 | tsg = tsg_gk20a_from_ch(ch); | ||
| 87 | |||
| 88 | if (tsg) | ||
| 89 | seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", | ||
| 90 | ch->chid, | ||
| 91 | ch->tsgid, | ||
| 92 | ch->tgid, | ||
| 93 | tsg->timeslice_us, | ||
| 94 | ch->timeout_ms_max, | ||
| 95 | tsg->interleave_level, | ||
| 96 | tsg->gr_ctx.graphics_preempt_mode, | ||
| 97 | tsg->gr_ctx.compute_preempt_mode); | ||
| 98 | gk20a_channel_put(ch); | ||
| 99 | } | ||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { | ||
| 104 | .start = gk20a_fifo_sched_debugfs_seq_start, | ||
| 105 | .next = gk20a_fifo_sched_debugfs_seq_next, | ||
| 106 | .stop = gk20a_fifo_sched_debugfs_seq_stop, | ||
| 107 | .show = gk20a_fifo_sched_debugfs_seq_show | ||
| 108 | }; | ||
| 109 | |||
| 110 | static int gk20a_fifo_sched_debugfs_open(struct inode *inode, | ||
| 111 | struct file *file) | ||
| 112 | { | ||
| 113 | struct gk20a *g = inode->i_private; | ||
| 114 | int err; | ||
| 115 | |||
| 116 | err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); | ||
| 117 | if (err) | ||
| 118 | return err; | ||
| 119 | |||
| 120 | nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private); | ||
| 121 | |||
| 122 | ((struct seq_file *)file->private_data)->private = inode->i_private; | ||
| 123 | return 0; | ||
| 124 | }; | ||
| 125 | |||
| 126 | /* | ||
| 127 | * The file operations structure contains our open function along with | ||
| 128 | * set of the canned seq_ ops. | ||
| 129 | */ | ||
| 130 | static const struct file_operations gk20a_fifo_sched_debugfs_fops = { | ||
| 131 | .owner = THIS_MODULE, | ||
| 132 | .open = gk20a_fifo_sched_debugfs_open, | ||
| 133 | .read = seq_read, | ||
| 134 | .llseek = seq_lseek, | ||
| 135 | .release = seq_release | ||
| 136 | }; | ||
| 137 | |||
| 138 | static int gk20a_fifo_profile_enable(void *data, u64 val) | ||
| 139 | { | ||
| 140 | struct gk20a *g = (struct gk20a *) data; | ||
| 141 | struct fifo_gk20a *f = &g->fifo; | ||
| 142 | |||
| 143 | |||
| 144 | nvgpu_mutex_acquire(&f->profile.lock); | ||
| 145 | if (val == 0) { | ||
| 146 | if (f->profile.enabled) { | ||
| 147 | f->profile.enabled = false; | ||
| 148 | nvgpu_ref_put(&f->profile.ref, | ||
| 149 | __gk20a_fifo_profile_free); | ||
| 150 | } | ||
| 151 | } else { | ||
| 152 | if (!f->profile.enabled) { | ||
| 153 | /* not kref init as it can have a running condition if | ||
| 154 | * we enable/disable/enable while kickoff is happening | ||
| 155 | */ | ||
| 156 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { | ||
| 157 | f->profile.data = nvgpu_vzalloc(g, | ||
| 158 | FIFO_PROFILING_ENTRIES * | ||
| 159 | sizeof(struct fifo_profile_gk20a)); | ||
| 160 | f->profile.sorted = nvgpu_vzalloc(g, | ||
| 161 | FIFO_PROFILING_ENTRIES * | ||
| 162 | sizeof(u64)); | ||
| 163 | if (!(f->profile.data && f->profile.sorted)) { | ||
| 164 | nvgpu_vfree(g, f->profile.data); | ||
| 165 | nvgpu_vfree(g, f->profile.sorted); | ||
| 166 | nvgpu_mutex_release(&f->profile.lock); | ||
| 167 | return -ENOMEM; | ||
| 168 | } | ||
| 169 | nvgpu_ref_init(&f->profile.ref); | ||
| 170 | } | ||
| 171 | atomic_set(&f->profile.get.atomic_var, 0); | ||
| 172 | f->profile.enabled = true; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | nvgpu_mutex_release(&f->profile.lock); | ||
| 176 | |||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | |||
| 180 | DEFINE_SIMPLE_ATTRIBUTE( | ||
| 181 | gk20a_fifo_profile_enable_debugfs_fops, | ||
| 182 | NULL, | ||
| 183 | gk20a_fifo_profile_enable, | ||
| 184 | "%llu\n" | ||
| 185 | ); | ||
| 186 | |||
| 187 | static int __profile_cmp(const void *a, const void *b) | ||
| 188 | { | ||
| 189 | return *((unsigned long long *) a) - *((unsigned long long *) b); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | ||
| 193 | * This uses about 800b in the stack, but the function using it is not part | ||
| 194 | * of a callstack where much memory is being used, so it is fine | ||
| 195 | */ | ||
| 196 | #define PERCENTILE_WIDTH 5 | ||
| 197 | #define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) | ||
| 198 | |||
| 199 | static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, | ||
| 200 | u64 *percentiles, u32 index_end, u32 index_start) | ||
| 201 | { | ||
| 202 | unsigned int nelem = 0; | ||
| 203 | unsigned int index; | ||
| 204 | struct fifo_profile_gk20a *profile; | ||
| 205 | |||
| 206 | for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { | ||
| 207 | profile = &g->fifo.profile.data[index]; | ||
| 208 | |||
| 209 | if (profile->timestamp[index_end] > | ||
| 210 | profile->timestamp[index_start]) { | ||
| 211 | /* This is a valid element */ | ||
| 212 | g->fifo.profile.sorted[nelem] = | ||
| 213 | profile->timestamp[index_end] - | ||
| 214 | profile->timestamp[index_start]; | ||
| 215 | nelem++; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | /* sort it */ | ||
| 220 | sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), | ||
| 221 | __profile_cmp, NULL); | ||
| 222 | |||
| 223 | /* build ranges */ | ||
| 224 | for (index = 0; index < PERCENTILE_RANGES; index++) { | ||
| 225 | percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : | ||
| 226 | g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * | ||
| 227 | nelem)/100 - 1]; | ||
| 228 | } | ||
| 229 | return nelem; | ||
| 230 | } | ||
| 231 | |||
| 232 | static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) | ||
| 233 | { | ||
| 234 | struct gk20a *g = s->private; | ||
| 235 | unsigned int get, nelem, index; | ||
| 236 | /* | ||
| 237 | * 800B in the stack, but function is declared statically and only | ||
| 238 | * called from debugfs handler | ||
| 239 | */ | ||
| 240 | u64 percentiles_ioctl[PERCENTILE_RANGES]; | ||
| 241 | u64 percentiles_kickoff[PERCENTILE_RANGES]; | ||
| 242 | u64 percentiles_jobtracking[PERCENTILE_RANGES]; | ||
| 243 | u64 percentiles_append[PERCENTILE_RANGES]; | ||
| 244 | u64 percentiles_userd[PERCENTILE_RANGES]; | ||
| 245 | |||
| 246 | if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { | ||
| 247 | seq_printf(s, "Profiling disabled\n"); | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | get = atomic_read(&g->fifo.profile.get.atomic_var); | ||
| 252 | |||
| 253 | __gk20a_fifo_create_stats(g, percentiles_ioctl, | ||
| 254 | PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); | ||
| 255 | __gk20a_fifo_create_stats(g, percentiles_kickoff, | ||
| 256 | PROFILE_END, PROFILE_ENTRY); | ||
| 257 | __gk20a_fifo_create_stats(g, percentiles_jobtracking, | ||
| 258 | PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); | ||
| 259 | __gk20a_fifo_create_stats(g, percentiles_append, | ||
| 260 | PROFILE_APPEND, PROFILE_JOB_TRACKING); | ||
| 261 | nelem = __gk20a_fifo_create_stats(g, percentiles_userd, | ||
| 262 | PROFILE_END, PROFILE_APPEND); | ||
| 263 | |||
| 264 | seq_printf(s, "Number of kickoffs: %d\n", nelem); | ||
| 265 | seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); | ||
| 266 | |||
| 267 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
| 268 | seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", | ||
| 269 | PERCENTILE_WIDTH * (index+1), | ||
| 270 | percentiles_ioctl[index], | ||
| 271 | percentiles_kickoff[index], | ||
| 272 | percentiles_append[index], | ||
| 273 | percentiles_jobtracking[index], | ||
| 274 | percentiles_userd[index]); | ||
| 275 | |||
| 276 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
| 277 | |||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) | ||
| 282 | { | ||
| 283 | return single_open(file, gk20a_fifo_profile_stats, inode->i_private); | ||
| 284 | } | ||
| 285 | |||
| 286 | static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { | ||
| 287 | .open = gk20a_fifo_profile_stats_open, | ||
| 288 | .read = seq_read, | ||
| 289 | .llseek = seq_lseek, | ||
| 290 | .release = single_release, | ||
| 291 | }; | ||
| 292 | |||
| 293 | |||
| 294 | void gk20a_fifo_debugfs_init(struct gk20a *g) | ||
| 295 | { | ||
| 296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 297 | struct dentry *gpu_root = l->debugfs; | ||
| 298 | struct dentry *fifo_root; | ||
| 299 | struct dentry *profile_root; | ||
| 300 | |||
| 301 | fifo_root = debugfs_create_dir("fifo", gpu_root); | ||
| 302 | if (IS_ERR_OR_NULL(fifo_root)) | ||
| 303 | return; | ||
| 304 | |||
| 305 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
| 306 | |||
| 307 | debugfs_create_file("sched", 0600, fifo_root, g, | ||
| 308 | &gk20a_fifo_sched_debugfs_fops); | ||
| 309 | |||
| 310 | profile_root = debugfs_create_dir("profile", fifo_root); | ||
| 311 | if (IS_ERR_OR_NULL(profile_root)) | ||
| 312 | return; | ||
| 313 | |||
| 314 | nvgpu_mutex_init(&g->fifo.profile.lock); | ||
| 315 | g->fifo.profile.enabled = false; | ||
| 316 | atomic_set(&g->fifo.profile.get.atomic_var, 0); | ||
| 317 | atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); | ||
| 318 | |||
| 319 | debugfs_create_file("enable", 0600, profile_root, g, | ||
| 320 | &gk20a_fifo_profile_enable_debugfs_fops); | ||
| 321 | |||
| 322 | debugfs_create_file("stats", 0600, profile_root, g, | ||
| 323 | &gk20a_fifo_profile_stats_debugfs_fops); | ||
| 324 | |||
| 325 | } | ||
| 326 | |||
| 327 | void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx) | ||
| 328 | { | ||
| 329 | if (profile) | ||
| 330 | profile->timestamp[idx] = nvgpu_current_time_ns(); | ||
| 331 | } | ||
| 332 | |||
| 333 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) | ||
| 334 | { | ||
| 335 | struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, | ||
| 336 | profile.ref); | ||
| 337 | nvgpu_vfree(f->g, f->profile.data); | ||
| 338 | nvgpu_vfree(f->g, f->profile.sorted); | ||
| 339 | } | ||
| 340 | |||
| 341 | /* Get the next element in the ring buffer of profile entries | ||
| 342 | * and grab a reference to the structure | ||
| 343 | */ | ||
| 344 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) | ||
| 345 | { | ||
| 346 | struct fifo_gk20a *f = &g->fifo; | ||
| 347 | struct fifo_profile_gk20a *profile; | ||
| 348 | unsigned int index; | ||
| 349 | |||
| 350 | /* If kref is zero, profiling is not enabled */ | ||
| 351 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) | ||
| 352 | return NULL; | ||
| 353 | index = atomic_inc_return(&f->profile.get.atomic_var); | ||
| 354 | profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; | ||
| 355 | |||
| 356 | return profile; | ||
| 357 | } | ||
| 358 | |||
| 359 | /* Free the reference to the structure. This allows deferred cleanups */ | ||
| 360 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
| 361 | struct fifo_profile_gk20a *profile) | ||
| 362 | { | ||
| 363 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
| 364 | } | ||
| 365 | |||
| 366 | void gk20a_fifo_debugfs_deinit(struct gk20a *g) | ||
| 367 | { | ||
| 368 | struct fifo_gk20a *f = &g->fifo; | ||
| 369 | |||
| 370 | nvgpu_mutex_acquire(&f->profile.lock); | ||
| 371 | if (f->profile.enabled) { | ||
| 372 | f->profile.enabled = false; | ||
| 373 | nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
| 374 | } | ||
| 375 | nvgpu_mutex_release(&f->profile.lock); | ||
| 376 | } | ||
diff --git a/include/os/linux/debug_fifo.h b/include/os/linux/debug_fifo.h new file mode 100644 index 0000000..46ac853 --- /dev/null +++ b/include/os/linux/debug_fifo.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_FIFO_H__ | ||
| 16 | #define __NVGPU_DEBUG_FIFO_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | void gk20a_fifo_debugfs_init(struct gk20a *g); | ||
| 20 | void gk20a_fifo_debugfs_deinit(struct gk20a *g); | ||
| 21 | |||
| 22 | #endif /* __NVGPU_DEBUG_FIFO_H__ */ | ||
diff --git a/include/os/linux/debug_gr.c b/include/os/linux/debug_gr.c new file mode 100644 index 0000000..d54c6d6 --- /dev/null +++ b/include/os/linux/debug_gr.c | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_gr.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | |||
| 20 | int gr_gk20a_debugfs_init(struct gk20a *g) | ||
| 21 | { | ||
| 22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 23 | |||
| 24 | l->debugfs_gr_default_attrib_cb_size = | ||
| 25 | debugfs_create_u32("gr_default_attrib_cb_size", | ||
| 26 | S_IRUGO|S_IWUSR, l->debugfs, | ||
| 27 | &g->gr.attrib_cb_default_size); | ||
| 28 | |||
| 29 | return 0; | ||
| 30 | } | ||
| 31 | |||
diff --git a/include/os/linux/debug_gr.h b/include/os/linux/debug_gr.h new file mode 100644 index 0000000..4b46acb --- /dev/null +++ b/include/os/linux/debug_gr.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_GR_H__ | ||
| 16 | #define __NVGPU_DEBUG_GR_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | int gr_gk20a_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_GR_H__ */ | ||
diff --git a/include/os/linux/debug_hal.c b/include/os/linux/debug_hal.c new file mode 100644 index 0000000..031e335 --- /dev/null +++ b/include/os/linux/debug_hal.c | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_hal.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | #include <linux/seq_file.h> | ||
| 20 | |||
| 21 | /* Format and print a single function pointer to the specified seq_file. */ | ||
| 22 | static void __hal_print_op(struct seq_file *s, void *op_ptr) | ||
| 23 | { | ||
| 24 | seq_printf(s, "%pF\n", op_ptr); | ||
| 25 | } | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Prints an array of function pointer addresses in op_ptrs to the | ||
| 29 | * specified seq_file | ||
| 30 | */ | ||
| 31 | static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops) | ||
| 32 | { | ||
| 33 | int i; | ||
| 34 | |||
| 35 | for (i = 0; i < num_ops; i++) | ||
| 36 | __hal_print_op(s, op_ptrs[i]); | ||
| 37 | } | ||
| 38 | |||
| 39 | /* | ||
| 40 | * Show file operation, which generates content of the file once. Prints a list | ||
| 41 | * of gpu operations as defined by gops and the corresponding function pointer | ||
| 42 | * destination addresses. Relies on no compiler reordering of struct fields and | ||
| 43 | * assumption that all members are function pointers. | ||
| 44 | */ | ||
| 45 | static int __hal_show(struct seq_file *s, void *unused) | ||
| 46 | { | ||
| 47 | struct gpu_ops *gops = s->private; | ||
| 48 | |||
| 49 | __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *)); | ||
| 50 | |||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | static int __hal_open(struct inode *inode, struct file *file) | ||
| 55 | { | ||
| 56 | return single_open(file, __hal_show, inode->i_private); | ||
| 57 | } | ||
| 58 | |||
| 59 | static const struct file_operations __hal_fops = { | ||
| 60 | .open = __hal_open, | ||
| 61 | .read = seq_read, | ||
| 62 | .llseek = seq_lseek, | ||
| 63 | .release = single_release, | ||
| 64 | }; | ||
| 65 | |||
| 66 | void nvgpu_hal_debugfs_fini(struct gk20a *g) | ||
| 67 | { | ||
| 68 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 69 | |||
| 70 | if (!(l->debugfs_hal == NULL)) | ||
| 71 | debugfs_remove_recursive(l->debugfs_hal); | ||
| 72 | } | ||
| 73 | |||
| 74 | void nvgpu_hal_debugfs_init(struct gk20a *g) | ||
| 75 | { | ||
| 76 | struct dentry *d; | ||
| 77 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 78 | |||
| 79 | if (!l->debugfs) | ||
| 80 | return; | ||
| 81 | l->debugfs_hal = debugfs_create_dir("hal", l->debugfs); | ||
| 82 | if (IS_ERR_OR_NULL(l->debugfs_hal)) { | ||
| 83 | l->debugfs_hal = NULL; | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | |||
| 87 | /* Pass along reference to the gpu_ops struct as private data */ | ||
| 88 | d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal, | ||
| 89 | &g->ops, &__hal_fops); | ||
| 90 | if (!d) { | ||
| 91 | nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__); | ||
| 92 | debugfs_remove_recursive(l->debugfs_hal); | ||
| 93 | return; | ||
| 94 | } | ||
| 95 | } | ||
diff --git a/include/os/linux/debug_hal.h b/include/os/linux/debug_hal.h new file mode 100644 index 0000000..eee6f23 --- /dev/null +++ b/include/os/linux/debug_hal.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_HAL_H__ | ||
| 16 | #define __NVGPU_DEBUG_HAL_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | void nvgpu_hal_debugfs_fini(struct gk20a *g); | ||
| 20 | void nvgpu_hal_debugfs_init(struct gk20a *g); | ||
| 21 | |||
| 22 | #endif /* __NVGPU_DEBUG_HAL_H__ */ | ||
diff --git a/include/os/linux/debug_kmem.c b/include/os/linux/debug_kmem.c new file mode 100644 index 0000000..a0c7d47 --- /dev/null +++ b/include/os/linux/debug_kmem.c | |||
| @@ -0,0 +1,312 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/debugfs.h> | ||
| 15 | #include <linux/seq_file.h> | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | #include "debug_kmem.h" | ||
| 19 | #include "kmem_priv.h" | ||
| 20 | |||
| 21 | /** | ||
| 22 | * to_human_readable_bytes - Determine suffix for passed size. | ||
| 23 | * | ||
| 24 | * @bytes - Number of bytes to generate a suffix for. | ||
| 25 | * @hr_bytes [out] - The human readable number of bytes. | ||
| 26 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
| 27 | * | ||
| 28 | * Computes a human readable decomposition of the passed number of bytes. The | ||
| 29 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
| 30 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
| 31 | * ranges: | ||
| 32 | * | ||
| 33 | * 0 - 1023 B | ||
| 34 | * 1 - 1023 KB | ||
| 35 | * 1 - 1023 MB | ||
| 36 | * 1 - 1023 GB | ||
| 37 | * 1 - 1023 TB | ||
| 38 | * 1 - ... PB | ||
| 39 | */ | ||
| 40 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
| 41 | const char **hr_suffix) | ||
| 42 | { | ||
| 43 | static const char *suffixes[] = | ||
| 44 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
| 45 | |||
| 46 | u64 suffix_ind = 0; | ||
| 47 | |||
| 48 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
| 49 | bytes >>= 10; | ||
| 50 | suffix_ind++; | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Handle case where bytes > 1023PB. | ||
| 55 | */ | ||
| 56 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
| 57 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
| 58 | |||
| 59 | *hr_bytes = bytes; | ||
| 60 | *hr_suffix = suffixes[suffix_ind]; | ||
| 61 | } | ||
| 62 | |||
| 63 | /** | ||
| 64 | * print_hr_bytes - Print human readable bytes | ||
| 65 | * | ||
| 66 | * @s - A seq_file to print to. May be NULL. | ||
| 67 | * @msg - A message to print before the bytes. | ||
| 68 | * @bytes - Number of bytes. | ||
| 69 | * | ||
| 70 | * Print @msg followed by the human readable decomposition of the passed number | ||
| 71 | * of bytes. | ||
| 72 | * | ||
| 73 | * If @s is NULL then this prints will be made to the kernel log. | ||
| 74 | */ | ||
| 75 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
| 76 | { | ||
| 77 | u64 hr_bytes; | ||
| 78 | const char *hr_suffix; | ||
| 79 | |||
| 80 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
| 81 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
| 82 | } | ||
| 83 | |||
| 84 | /** | ||
| 85 | * print_histogram - Build a histogram of the memory usage. | ||
| 86 | * | ||
| 87 | * @tracker The tracking to pull data from. | ||
| 88 | * @s A seq_file to dump info into. | ||
| 89 | */ | ||
| 90 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, | ||
| 91 | struct seq_file *s) | ||
| 92 | { | ||
| 93 | int i; | ||
| 94 | u64 pot_min, pot_max; | ||
| 95 | u64 nr_buckets; | ||
| 96 | unsigned int *buckets; | ||
| 97 | unsigned int total_allocs; | ||
| 98 | struct nvgpu_rbtree_node *node; | ||
| 99 | static const char histogram_line[] = | ||
| 100 | "++++++++++++++++++++++++++++++++++++++++"; | ||
| 101 | |||
| 102 | /* | ||
| 103 | * pot_min is essentially a round down to the nearest power of 2. This | ||
| 104 | * is the start of the histogram. pot_max is just a round up to the | ||
| 105 | * nearest power of two. Each histogram bucket is one power of two so | ||
| 106 | * the histogram buckets are exponential. | ||
| 107 | */ | ||
| 108 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
| 109 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
| 110 | |||
| 111 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
| 112 | |||
| 113 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
| 114 | if (!buckets) { | ||
| 115 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | |||
| 119 | /* | ||
| 120 | * Iterate across all of the allocs and determine what bucket they | ||
| 121 | * should go in. Round the size down to the nearest power of two to | ||
| 122 | * find the right bucket. | ||
| 123 | */ | ||
| 124 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
| 125 | while (node) { | ||
| 126 | int b; | ||
| 127 | u64 bucket_min; | ||
| 128 | struct nvgpu_mem_alloc *alloc = | ||
| 129 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
| 130 | |||
| 131 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
| 132 | if (bucket_min < tracker->min_alloc) | ||
| 133 | bucket_min = tracker->min_alloc; | ||
| 134 | |||
| 135 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Handle the one case were there's an alloc exactly as big as | ||
| 139 | * the maximum bucket size of the largest bucket. Most of the | ||
| 140 | * buckets have an inclusive minimum and exclusive maximum. But | ||
| 141 | * the largest bucket needs to have an _inclusive_ maximum as | ||
| 142 | * well. | ||
| 143 | */ | ||
| 144 | if (b == (int)nr_buckets) | ||
| 145 | b--; | ||
| 146 | |||
| 147 | buckets[b]++; | ||
| 148 | |||
| 149 | nvgpu_rbtree_enum_next(&node, node); | ||
| 150 | } | ||
| 151 | |||
| 152 | total_allocs = 0; | ||
| 153 | for (i = 0; i < (int)nr_buckets; i++) | ||
| 154 | total_allocs += buckets[i]; | ||
| 155 | |||
| 156 | __pstat(s, "Alloc histogram:\n"); | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Actually compute the histogram lines. | ||
| 160 | */ | ||
| 161 | for (i = 0; i < (int)nr_buckets; i++) { | ||
| 162 | char this_line[sizeof(histogram_line) + 1]; | ||
| 163 | u64 line_length; | ||
| 164 | u64 hr_bytes; | ||
| 165 | const char *hr_suffix; | ||
| 166 | |||
| 167 | memset(this_line, 0, sizeof(this_line)); | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Compute the normalized line length. Cant use floating point | ||
| 171 | * so we will just multiply everything by 1000 and use fixed | ||
| 172 | * point. | ||
| 173 | */ | ||
| 174 | line_length = (1000 * buckets[i]) / total_allocs; | ||
| 175 | line_length *= sizeof(histogram_line); | ||
| 176 | line_length /= 1000; | ||
| 177 | |||
| 178 | memset(this_line, '+', line_length); | ||
| 179 | |||
| 180 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
| 181 | &hr_bytes, &hr_suffix); | ||
| 182 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
| 183 | hr_bytes, hr_bytes << 1, | ||
| 184 | hr_suffix, buckets[i], this_line); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | /** | ||
| 189 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
| 190 | * | ||
| 191 | * @tracker The tracking to pull data from. | ||
| 192 | * @s A seq_file to dump info into. | ||
| 193 | * | ||
| 194 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
| 195 | * used with @s. Otherwise the stats are pr_info()ed. | ||
| 196 | */ | ||
| 197 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
| 198 | struct seq_file *s) | ||
| 199 | { | ||
| 200 | nvgpu_lock_tracker(tracker); | ||
| 201 | |||
| 202 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
| 203 | |||
| 204 | __pstat(s, "Basic Stats:\n"); | ||
| 205 | __pstat(s, " Number of allocs %lld\n", | ||
| 206 | tracker->nr_allocs); | ||
| 207 | __pstat(s, " Number of frees %lld\n", | ||
| 208 | tracker->nr_frees); | ||
| 209 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
| 210 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
| 211 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
| 212 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
| 213 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
| 214 | tracker->bytes_alloced_real); | ||
| 215 | print_hr_bytes(s, " Bytes freed (real) ", | ||
| 216 | tracker->bytes_freed_real); | ||
| 217 | __pstat(s, "\n"); | ||
| 218 | |||
| 219 | print_histogram(tracker, s); | ||
| 220 | |||
| 221 | nvgpu_unlock_tracker(tracker); | ||
| 222 | } | ||
| 223 | |||
| 224 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
| 225 | { | ||
| 226 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
| 227 | |||
| 228 | nvgpu_kmem_print_stats(tracker, s); | ||
| 229 | |||
| 230 | return 0; | ||
| 231 | } | ||
| 232 | |||
| 233 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
| 234 | { | ||
| 235 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
| 236 | } | ||
| 237 | |||
| 238 | static const struct file_operations __kmem_tracking_fops = { | ||
| 239 | .open = __kmem_tracking_open, | ||
| 240 | .read = seq_read, | ||
| 241 | .llseek = seq_lseek, | ||
| 242 | .release = single_release, | ||
| 243 | }; | ||
| 244 | |||
| 245 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
| 246 | struct nvgpu_mem_alloc_tracker *tracker, | ||
| 247 | struct seq_file *s) | ||
| 248 | { | ||
| 249 | struct nvgpu_rbtree_node *node; | ||
| 250 | |||
| 251 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
| 252 | while (node) { | ||
| 253 | struct nvgpu_mem_alloc *alloc = | ||
| 254 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
| 255 | |||
| 256 | kmem_print_mem_alloc(g, alloc, s); | ||
| 257 | |||
| 258 | nvgpu_rbtree_enum_next(&node, node); | ||
| 259 | } | ||
| 260 | |||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
| 265 | { | ||
| 266 | struct gk20a *g = s->private; | ||
| 267 | |||
| 268 | nvgpu_lock_tracker(g->vmallocs); | ||
| 269 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
| 270 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
| 271 | seq_puts(s, "\n"); | ||
| 272 | nvgpu_unlock_tracker(g->vmallocs); | ||
| 273 | |||
| 274 | nvgpu_lock_tracker(g->kmallocs); | ||
| 275 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
| 276 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
| 277 | nvgpu_unlock_tracker(g->kmallocs); | ||
| 278 | |||
| 279 | return 0; | ||
| 280 | } | ||
| 281 | |||
| 282 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
| 283 | { | ||
| 284 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
| 285 | } | ||
| 286 | |||
| 287 | static const struct file_operations __kmem_traces_fops = { | ||
| 288 | .open = __kmem_traces_open, | ||
| 289 | .read = seq_read, | ||
| 290 | .llseek = seq_lseek, | ||
| 291 | .release = single_release, | ||
| 292 | }; | ||
| 293 | |||
| 294 | void nvgpu_kmem_debugfs_init(struct gk20a *g) | ||
| 295 | { | ||
| 296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 297 | struct dentry *node; | ||
| 298 | |||
| 299 | l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs); | ||
| 300 | if (IS_ERR_OR_NULL(l->debugfs_kmem)) | ||
| 301 | return; | ||
| 302 | |||
| 303 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
| 304 | l->debugfs_kmem, | ||
| 305 | g->vmallocs, &__kmem_tracking_fops); | ||
| 306 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
| 307 | l->debugfs_kmem, | ||
| 308 | g->kmallocs, &__kmem_tracking_fops); | ||
| 309 | node = debugfs_create_file("traces", S_IRUGO, | ||
| 310 | l->debugfs_kmem, | ||
| 311 | g, &__kmem_traces_fops); | ||
| 312 | } | ||
diff --git a/include/os/linux/debug_kmem.h b/include/os/linux/debug_kmem.h new file mode 100644 index 0000000..44322b5 --- /dev/null +++ b/include/os/linux/debug_kmem.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_KMEM_H__ | ||
| 16 | #define __NVGPU_DEBUG_KMEM_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 20 | void nvgpu_kmem_debugfs_init(struct gk20a *g); | ||
| 21 | #endif | ||
| 22 | |||
| 23 | #endif /* __NVGPU_DEBUG_KMEM_H__ */ | ||
diff --git a/include/os/linux/debug_ltc.c b/include/os/linux/debug_ltc.c new file mode 100644 index 0000000..1b4c221 --- /dev/null +++ b/include/os/linux/debug_ltc.c | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_ltc.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <nvgpu/gk20a.h> | ||
| 19 | |||
| 20 | #include <linux/debugfs.h> | ||
| 21 | #include <linux/uaccess.h> | ||
| 22 | |||
| 23 | static ssize_t ltc_intr_illegal_compstat_read(struct file *file, | ||
| 24 | char __user *user_buf, size_t count, loff_t *ppos) | ||
| 25 | { | ||
| 26 | char buf[3]; | ||
| 27 | struct gk20a *g = file->private_data; | ||
| 28 | |||
| 29 | if (g->ltc_intr_en_illegal_compstat) | ||
| 30 | buf[0] = 'Y'; | ||
| 31 | else | ||
| 32 | buf[0] = 'N'; | ||
| 33 | buf[1] = '\n'; | ||
| 34 | buf[2] = 0x00; | ||
| 35 | |||
| 36 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
| 37 | } | ||
| 38 | |||
| 39 | static ssize_t ltc_intr_illegal_compstat_write(struct file *file, | ||
| 40 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
| 41 | { | ||
| 42 | char buf[3]; | ||
| 43 | int buf_size; | ||
| 44 | bool intr_illegal_compstat_enabled; | ||
| 45 | struct gk20a *g = file->private_data; | ||
| 46 | int err; | ||
| 47 | |||
| 48 | if (!g->ops.ltc.intr_en_illegal_compstat) | ||
| 49 | return -EINVAL; | ||
| 50 | |||
| 51 | buf_size = min(count, (sizeof(buf)-1)); | ||
| 52 | if (copy_from_user(buf, user_buf, buf_size)) | ||
| 53 | return -EFAULT; | ||
| 54 | |||
| 55 | err = gk20a_busy(g); | ||
| 56 | if (err) | ||
| 57 | return err; | ||
| 58 | |||
| 59 | if (strtobool(buf, &intr_illegal_compstat_enabled) == 0) { | ||
| 60 | g->ops.ltc.intr_en_illegal_compstat(g, | ||
| 61 | intr_illegal_compstat_enabled); | ||
| 62 | g->ltc_intr_en_illegal_compstat = intr_illegal_compstat_enabled; | ||
| 63 | } | ||
| 64 | |||
| 65 | gk20a_idle(g); | ||
| 66 | |||
| 67 | return buf_size; | ||
| 68 | } | ||
| 69 | |||
| 70 | static const struct file_operations ltc_intr_illegal_compstat_fops = { | ||
| 71 | .open = simple_open, | ||
| 72 | .read = ltc_intr_illegal_compstat_read, | ||
| 73 | .write = ltc_intr_illegal_compstat_write, | ||
| 74 | }; | ||
| 75 | |||
| 76 | int nvgpu_ltc_debugfs_init(struct gk20a *g) | ||
| 77 | { | ||
| 78 | struct dentry *d; | ||
| 79 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 80 | struct dentry *gpu_root = l->debugfs; | ||
| 81 | |||
| 82 | l->debugfs_ltc = debugfs_create_dir("ltc", gpu_root); | ||
| 83 | if (IS_ERR_OR_NULL(l->debugfs_ltc)) | ||
| 84 | return -ENODEV; | ||
| 85 | |||
| 86 | /* Debug fs node to enable/disable illegal_compstat */ | ||
| 87 | d = debugfs_create_file("intr_illegal_compstat_enable", 0600, | ||
| 88 | l->debugfs_ltc, g, | ||
| 89 | <c_intr_illegal_compstat_fops); | ||
| 90 | if (!d) | ||
| 91 | return -ENOMEM; | ||
| 92 | |||
| 93 | return 0; | ||
| 94 | } | ||
diff --git a/include/os/linux/debug_ltc.h b/include/os/linux/debug_ltc.h new file mode 100644 index 0000000..3ad734c --- /dev/null +++ b/include/os/linux/debug_ltc.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_LTC_H__ | ||
| 16 | #define __NVGPU_DEBUG_LTC_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | int nvgpu_ltc_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_LTC_H__ */ | ||
diff --git a/include/os/linux/debug_pmgr.c b/include/os/linux/debug_pmgr.c new file mode 100644 index 0000000..c264978 --- /dev/null +++ b/include/os/linux/debug_pmgr.c | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/debugfs.h> | ||
| 18 | |||
| 19 | #include "os_linux.h" | ||
| 20 | |||
| 21 | #include "pmgr/pmgr.h" | ||
| 22 | |||
| 23 | static int pmgr_pwr_devices_get_power_u64(void *data, u64 *p) | ||
| 24 | { | ||
| 25 | struct gk20a *g = (struct gk20a *)data; | ||
| 26 | int err; | ||
| 27 | u32 val; | ||
| 28 | |||
| 29 | err = pmgr_pwr_devices_get_power(g, &val); | ||
| 30 | *p = val; | ||
| 31 | |||
| 32 | return err; | ||
| 33 | } | ||
| 34 | |||
| 35 | static int pmgr_pwr_devices_get_current_u64(void *data, u64 *p) | ||
| 36 | { | ||
| 37 | struct gk20a *g = (struct gk20a *)data; | ||
| 38 | int err; | ||
| 39 | u32 val; | ||
| 40 | |||
| 41 | err = pmgr_pwr_devices_get_current(g, &val); | ||
| 42 | *p = val; | ||
| 43 | |||
| 44 | return err; | ||
| 45 | } | ||
| 46 | |||
| 47 | static int pmgr_pwr_devices_get_voltage_u64(void *data, u64 *p) | ||
| 48 | { | ||
| 49 | struct gk20a *g = (struct gk20a *)data; | ||
| 50 | int err; | ||
| 51 | u32 val; | ||
| 52 | |||
| 53 | err = pmgr_pwr_devices_get_voltage(g, &val); | ||
| 54 | *p = val; | ||
| 55 | |||
| 56 | return err; | ||
| 57 | } | ||
| 58 | |||
| 59 | DEFINE_SIMPLE_ATTRIBUTE( | ||
| 60 | pmgr_power_ctrl_fops, pmgr_pwr_devices_get_power_u64, NULL, "%llu\n"); | ||
| 61 | |||
| 62 | DEFINE_SIMPLE_ATTRIBUTE( | ||
| 63 | pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current_u64, NULL, "%llu\n"); | ||
| 64 | |||
| 65 | DEFINE_SIMPLE_ATTRIBUTE( | ||
| 66 | pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_voltage_u64, NULL, "%llu\n"); | ||
| 67 | |||
| 68 | static void pmgr_debugfs_init(struct gk20a *g) | ||
| 69 | { | ||
| 70 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 71 | struct dentry *dbgentry; | ||
| 72 | |||
| 73 | dbgentry = debugfs_create_file( | ||
| 74 | "power", S_IRUGO, l->debugfs, g, &pmgr_power_ctrl_fops); | ||
| 75 | if (!dbgentry) | ||
| 76 | nvgpu_err(g, "debugfs entry create failed for power"); | ||
| 77 | |||
| 78 | dbgentry = debugfs_create_file( | ||
| 79 | "current", S_IRUGO, l->debugfs, g, &pmgr_current_ctrl_fops); | ||
| 80 | if (!dbgentry) | ||
| 81 | nvgpu_err(g, "debugfs entry create failed for current"); | ||
| 82 | |||
| 83 | dbgentry = debugfs_create_file( | ||
| 84 | "voltage", S_IRUGO, l->debugfs, g, &pmgr_voltage_ctrl_fops); | ||
| 85 | if (!dbgentry) | ||
| 86 | nvgpu_err(g, "debugfs entry create failed for voltage"); | ||
| 87 | } | ||
| 88 | |||
| 89 | int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l) | ||
| 90 | { | ||
| 91 | struct gk20a *g = &l->g; | ||
| 92 | int ret = 0; | ||
| 93 | |||
| 94 | if (!nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) | ||
| 95 | return ret; | ||
| 96 | |||
| 97 | if (!g->ops.clk.support_pmgr_domain) | ||
| 98 | return ret; | ||
| 99 | |||
| 100 | pmgr_debugfs_init(g); | ||
| 101 | |||
| 102 | return ret; | ||
| 103 | } | ||
| 104 | |||
diff --git a/include/os/linux/debug_pmgr.h b/include/os/linux/debug_pmgr.h new file mode 100644 index 0000000..bd6c556 --- /dev/null +++ b/include/os/linux/debug_pmgr.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __LINUX_DEBUG_PMGR_H | ||
| 18 | #define __LINUX_DEBUG_PMGR_H | ||
| 19 | |||
| 20 | #ifdef CONFIG_DEBUG_FS | ||
| 21 | int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l); | ||
| 22 | #else | ||
| 23 | int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l) | ||
| 24 | { | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | #endif | ||
| 28 | #endif | ||
diff --git a/include/os/linux/debug_pmu.c b/include/os/linux/debug_pmu.c new file mode 100644 index 0000000..f3e36d0 --- /dev/null +++ b/include/os/linux/debug_pmu.c | |||
| @@ -0,0 +1,484 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <nvgpu/enabled.h> | ||
| 16 | #include "debug_pmu.h" | ||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include <linux/debugfs.h> | ||
| 20 | #include <linux/seq_file.h> | ||
| 21 | #include <linux/uaccess.h> | ||
| 22 | |||
| 23 | static int lpwr_debug_show(struct seq_file *s, void *data) | ||
| 24 | { | ||
| 25 | struct gk20a *g = s->private; | ||
| 26 | |||
| 27 | if (g->ops.pmu.pmu_pg_engines_feature_list && | ||
| 28 | g->ops.pmu.pmu_pg_engines_feature_list(g, | ||
| 29 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != | ||
| 30 | NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { | ||
| 31 | seq_printf(s, "PSTATE: %u\n" | ||
| 32 | "RPPG Enabled: %u\n" | ||
| 33 | "RPPG ref count: %u\n" | ||
| 34 | "RPPG state: %u\n" | ||
| 35 | "MSCG Enabled: %u\n" | ||
| 36 | "MSCG pstate state: %u\n" | ||
| 37 | "MSCG transition state: %u\n", | ||
| 38 | g->ops.clk_arb.get_current_pstate(g), | ||
| 39 | g->elpg_enabled, g->pmu.elpg_refcnt, | ||
| 40 | g->pmu.elpg_stat, g->mscg_enabled, | ||
| 41 | g->pmu.mscg_stat, g->pmu.mscg_transition_state); | ||
| 42 | |||
| 43 | } else | ||
| 44 | seq_printf(s, "ELPG Enabled: %u\n" | ||
| 45 | "ELPG ref count: %u\n" | ||
| 46 | "ELPG state: %u\n", | ||
| 47 | g->elpg_enabled, g->pmu.elpg_refcnt, | ||
| 48 | g->pmu.elpg_stat); | ||
| 49 | |||
| 50 | return 0; | ||
| 51 | |||
| 52 | } | ||
| 53 | |||
| 54 | static int lpwr_debug_open(struct inode *inode, struct file *file) | ||
| 55 | { | ||
| 56 | return single_open(file, lpwr_debug_show, inode->i_private); | ||
| 57 | } | ||
| 58 | |||
| 59 | static const struct file_operations lpwr_debug_fops = { | ||
| 60 | .open = lpwr_debug_open, | ||
| 61 | .read = seq_read, | ||
| 62 | .llseek = seq_lseek, | ||
| 63 | .release = single_release, | ||
| 64 | }; | ||
| 65 | |||
| 66 | static int mscg_stat_show(struct seq_file *s, void *data) | ||
| 67 | { | ||
| 68 | struct gk20a *g = s->private; | ||
| 69 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
| 70 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
| 71 | int err; | ||
| 72 | |||
| 73 | /* Don't unnecessarily power on the device */ | ||
| 74 | if (g->power_on) { | ||
| 75 | err = gk20a_busy(g); | ||
| 76 | if (err) | ||
| 77 | return err; | ||
| 78 | |||
| 79 | nvgpu_pmu_get_pg_stats(g, | ||
| 80 | PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); | ||
| 81 | gk20a_idle(g); | ||
| 82 | } | ||
| 83 | total_ingating = g->pg_ingating_time_us + | ||
| 84 | (u64)pg_stat_data.ingating_time; | ||
| 85 | total_ungating = g->pg_ungating_time_us + | ||
| 86 | (u64)pg_stat_data.ungating_time; | ||
| 87 | |||
| 88 | divisor = total_ingating + total_ungating; | ||
| 89 | |||
| 90 | /* We compute the residency on a scale of 1000 */ | ||
| 91 | dividend = total_ingating * 1000; | ||
| 92 | |||
| 93 | if (divisor) | ||
| 94 | residency = div64_u64(dividend, divisor); | ||
| 95 | else | ||
| 96 | residency = 0; | ||
| 97 | |||
| 98 | seq_printf(s, | ||
| 99 | "Time in MSCG: %llu us\n" | ||
| 100 | "Time out of MSCG: %llu us\n" | ||
| 101 | "MSCG residency ratio: %llu\n" | ||
| 102 | "MSCG Entry Count: %u\n" | ||
| 103 | "MSCG Avg Entry latency %u\n" | ||
| 104 | "MSCG Avg Exit latency %u\n", | ||
| 105 | total_ingating, total_ungating, | ||
| 106 | residency, pg_stat_data.gating_cnt, | ||
| 107 | pg_stat_data.avg_entry_latency_us, | ||
| 108 | pg_stat_data.avg_exit_latency_us); | ||
| 109 | return 0; | ||
| 110 | |||
| 111 | } | ||
| 112 | |||
| 113 | static int mscg_stat_open(struct inode *inode, struct file *file) | ||
| 114 | { | ||
| 115 | return single_open(file, mscg_stat_show, inode->i_private); | ||
| 116 | } | ||
| 117 | |||
| 118 | static const struct file_operations mscg_stat_fops = { | ||
| 119 | .open = mscg_stat_open, | ||
| 120 | .read = seq_read, | ||
| 121 | .llseek = seq_lseek, | ||
| 122 | .release = single_release, | ||
| 123 | }; | ||
| 124 | |||
| 125 | static int mscg_transitions_show(struct seq_file *s, void *data) | ||
| 126 | { | ||
| 127 | struct gk20a *g = s->private; | ||
| 128 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
| 129 | u32 total_gating_cnt; | ||
| 130 | int err; | ||
| 131 | |||
| 132 | if (g->power_on) { | ||
| 133 | err = gk20a_busy(g); | ||
| 134 | if (err) | ||
| 135 | return err; | ||
| 136 | |||
| 137 | nvgpu_pmu_get_pg_stats(g, | ||
| 138 | PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); | ||
| 139 | gk20a_idle(g); | ||
| 140 | } | ||
| 141 | total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; | ||
| 142 | |||
| 143 | seq_printf(s, "%u\n", total_gating_cnt); | ||
| 144 | return 0; | ||
| 145 | |||
| 146 | } | ||
| 147 | |||
| 148 | static int mscg_transitions_open(struct inode *inode, struct file *file) | ||
| 149 | { | ||
| 150 | return single_open(file, mscg_transitions_show, inode->i_private); | ||
| 151 | } | ||
| 152 | |||
| 153 | static const struct file_operations mscg_transitions_fops = { | ||
| 154 | .open = mscg_transitions_open, | ||
| 155 | .read = seq_read, | ||
| 156 | .llseek = seq_lseek, | ||
| 157 | .release = single_release, | ||
| 158 | }; | ||
| 159 | |||
| 160 | static int elpg_stat_show(struct seq_file *s, void *data) | ||
| 161 | { | ||
| 162 | struct gk20a *g = s->private; | ||
| 163 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
| 164 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
| 165 | int err; | ||
| 166 | |||
| 167 | /* Don't unnecessarily power on the device */ | ||
| 168 | if (g->power_on) { | ||
| 169 | err = gk20a_busy(g); | ||
| 170 | if (err) | ||
| 171 | return err; | ||
| 172 | |||
| 173 | nvgpu_pmu_get_pg_stats(g, | ||
| 174 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); | ||
| 175 | gk20a_idle(g); | ||
| 176 | } | ||
| 177 | total_ingating = g->pg_ingating_time_us + | ||
| 178 | (u64)pg_stat_data.ingating_time; | ||
| 179 | total_ungating = g->pg_ungating_time_us + | ||
| 180 | (u64)pg_stat_data.ungating_time; | ||
| 181 | divisor = total_ingating + total_ungating; | ||
| 182 | |||
| 183 | /* We compute the residency on a scale of 1000 */ | ||
| 184 | dividend = total_ingating * 1000; | ||
| 185 | |||
| 186 | if (divisor) | ||
| 187 | residency = div64_u64(dividend, divisor); | ||
| 188 | else | ||
| 189 | residency = 0; | ||
| 190 | |||
| 191 | seq_printf(s, | ||
| 192 | "Time in ELPG: %llu us\n" | ||
| 193 | "Time out of ELPG: %llu us\n" | ||
| 194 | "ELPG residency ratio: %llu\n" | ||
| 195 | "ELPG Entry Count: %u\n" | ||
| 196 | "ELPG Avg Entry latency %u us\n" | ||
| 197 | "ELPG Avg Exit latency %u us\n", | ||
| 198 | total_ingating, total_ungating, | ||
| 199 | residency, pg_stat_data.gating_cnt, | ||
| 200 | pg_stat_data.avg_entry_latency_us, | ||
| 201 | pg_stat_data.avg_exit_latency_us); | ||
| 202 | return 0; | ||
| 203 | |||
| 204 | } | ||
| 205 | |||
| 206 | static int elpg_stat_open(struct inode *inode, struct file *file) | ||
| 207 | { | ||
| 208 | return single_open(file, elpg_stat_show, inode->i_private); | ||
| 209 | } | ||
| 210 | |||
| 211 | static const struct file_operations elpg_stat_fops = { | ||
| 212 | .open = elpg_stat_open, | ||
| 213 | .read = seq_read, | ||
| 214 | .llseek = seq_lseek, | ||
| 215 | .release = single_release, | ||
| 216 | }; | ||
| 217 | |||
| 218 | static int elpg_transitions_show(struct seq_file *s, void *data) | ||
| 219 | { | ||
| 220 | struct gk20a *g = s->private; | ||
| 221 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
| 222 | u32 total_gating_cnt; | ||
| 223 | int err; | ||
| 224 | |||
| 225 | if (g->power_on) { | ||
| 226 | err = gk20a_busy(g); | ||
| 227 | if (err) | ||
| 228 | return err; | ||
| 229 | |||
| 230 | nvgpu_pmu_get_pg_stats(g, | ||
| 231 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); | ||
| 232 | gk20a_idle(g); | ||
| 233 | } | ||
| 234 | total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; | ||
| 235 | |||
| 236 | seq_printf(s, "%u\n", total_gating_cnt); | ||
| 237 | return 0; | ||
| 238 | |||
| 239 | } | ||
| 240 | |||
| 241 | static int elpg_transitions_open(struct inode *inode, struct file *file) | ||
| 242 | { | ||
| 243 | return single_open(file, elpg_transitions_show, inode->i_private); | ||
| 244 | } | ||
| 245 | |||
| 246 | static const struct file_operations elpg_transitions_fops = { | ||
| 247 | .open = elpg_transitions_open, | ||
| 248 | .read = seq_read, | ||
| 249 | .llseek = seq_lseek, | ||
| 250 | .release = single_release, | ||
| 251 | }; | ||
| 252 | |||
| 253 | static int falc_trace_show(struct seq_file *s, void *data) | ||
| 254 | { | ||
| 255 | struct gk20a *g = s->private; | ||
| 256 | struct nvgpu_pmu *pmu = &g->pmu; | ||
| 257 | u32 i = 0, j = 0, k, l, m; | ||
| 258 | char part_str[40]; | ||
| 259 | void *tracebuffer; | ||
| 260 | char *trace; | ||
| 261 | u32 *trace1; | ||
| 262 | |||
| 263 | /* allocate system memory to copy pmu trace buffer */ | ||
| 264 | tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); | ||
| 265 | if (tracebuffer == NULL) | ||
| 266 | return -ENOMEM; | ||
| 267 | |||
| 268 | /* read pmu traces into system memory buffer */ | ||
| 269 | nvgpu_mem_rd_n(g, &pmu->trace_buf, | ||
| 270 | 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); | ||
| 271 | |||
| 272 | trace = (char *)tracebuffer; | ||
| 273 | trace1 = (u32 *)tracebuffer; | ||
| 274 | |||
| 275 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { | ||
| 276 | for (j = 0; j < 0x40; j++) | ||
| 277 | if (trace1[(i / 4) + j]) | ||
| 278 | break; | ||
| 279 | if (j == 0x40) | ||
| 280 | break; | ||
| 281 | seq_printf(s, "Index %x: ", trace1[(i / 4)]); | ||
| 282 | l = 0; | ||
| 283 | m = 0; | ||
| 284 | while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { | ||
| 285 | if (k >= 40) | ||
| 286 | break; | ||
| 287 | strncpy(part_str, (trace+i+20+m), k); | ||
| 288 | part_str[k] = 0; | ||
| 289 | seq_printf(s, "%s0x%x", part_str, | ||
| 290 | trace1[(i / 4) + 1 + l]); | ||
| 291 | l++; | ||
| 292 | m += k + 2; | ||
| 293 | } | ||
| 294 | seq_printf(s, "%s", (trace+i+20+m)); | ||
| 295 | } | ||
| 296 | |||
| 297 | nvgpu_kfree(g, tracebuffer); | ||
| 298 | return 0; | ||
| 299 | } | ||
| 300 | |||
| 301 | static int falc_trace_open(struct inode *inode, struct file *file) | ||
| 302 | { | ||
| 303 | return single_open(file, falc_trace_show, inode->i_private); | ||
| 304 | } | ||
| 305 | |||
| 306 | static const struct file_operations falc_trace_fops = { | ||
| 307 | .open = falc_trace_open, | ||
| 308 | .read = seq_read, | ||
| 309 | .llseek = seq_lseek, | ||
| 310 | .release = single_release, | ||
| 311 | }; | ||
| 312 | |||
| 313 | static int perfmon_events_enable_show(struct seq_file *s, void *data) | ||
| 314 | { | ||
| 315 | struct gk20a *g = s->private; | ||
| 316 | |||
| 317 | seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); | ||
| 318 | return 0; | ||
| 319 | |||
| 320 | } | ||
| 321 | |||
| 322 | static int perfmon_events_enable_open(struct inode *inode, struct file *file) | ||
| 323 | { | ||
| 324 | return single_open(file, perfmon_events_enable_show, inode->i_private); | ||
| 325 | } | ||
| 326 | |||
| 327 | static ssize_t perfmon_events_enable_write(struct file *file, | ||
| 328 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
| 329 | { | ||
| 330 | struct seq_file *s = file->private_data; | ||
| 331 | struct gk20a *g = s->private; | ||
| 332 | unsigned long val = 0; | ||
| 333 | char buf[40]; | ||
| 334 | int buf_size; | ||
| 335 | int err; | ||
| 336 | |||
| 337 | memset(buf, 0, sizeof(buf)); | ||
| 338 | buf_size = min(count, (sizeof(buf)-1)); | ||
| 339 | |||
| 340 | if (copy_from_user(buf, userbuf, buf_size)) | ||
| 341 | return -EFAULT; | ||
| 342 | |||
| 343 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 344 | return -EINVAL; | ||
| 345 | |||
| 346 | /* Don't turn on gk20a unnecessarily */ | ||
| 347 | if (g->power_on) { | ||
| 348 | err = gk20a_busy(g); | ||
| 349 | if (err) | ||
| 350 | return err; | ||
| 351 | |||
| 352 | if (val && !g->pmu.perfmon_sampling_enabled && | ||
| 353 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | ||
| 354 | g->pmu.perfmon_sampling_enabled = true; | ||
| 355 | g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); | ||
| 356 | } else if (!val && g->pmu.perfmon_sampling_enabled && | ||
| 357 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | ||
| 358 | g->pmu.perfmon_sampling_enabled = false; | ||
| 359 | g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); | ||
| 360 | } | ||
| 361 | gk20a_idle(g); | ||
| 362 | } else { | ||
| 363 | g->pmu.perfmon_sampling_enabled = val ? true : false; | ||
| 364 | } | ||
| 365 | |||
| 366 | return count; | ||
| 367 | } | ||
| 368 | |||
| 369 | static const struct file_operations perfmon_events_enable_fops = { | ||
| 370 | .open = perfmon_events_enable_open, | ||
| 371 | .read = seq_read, | ||
| 372 | .write = perfmon_events_enable_write, | ||
| 373 | .llseek = seq_lseek, | ||
| 374 | .release = single_release, | ||
| 375 | }; | ||
| 376 | |||
| 377 | static int perfmon_events_count_show(struct seq_file *s, void *data) | ||
| 378 | { | ||
| 379 | struct gk20a *g = s->private; | ||
| 380 | |||
| 381 | seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); | ||
| 382 | return 0; | ||
| 383 | |||
| 384 | } | ||
| 385 | |||
| 386 | static int perfmon_events_count_open(struct inode *inode, struct file *file) | ||
| 387 | { | ||
| 388 | return single_open(file, perfmon_events_count_show, inode->i_private); | ||
| 389 | } | ||
| 390 | |||
| 391 | static const struct file_operations perfmon_events_count_fops = { | ||
| 392 | .open = perfmon_events_count_open, | ||
| 393 | .read = seq_read, | ||
| 394 | .llseek = seq_lseek, | ||
| 395 | .release = single_release, | ||
| 396 | }; | ||
| 397 | |||
| 398 | static int security_show(struct seq_file *s, void *data) | ||
| 399 | { | ||
| 400 | struct gk20a *g = s->private; | ||
| 401 | |||
| 402 | seq_printf(s, "%d\n", g->pmu.pmu_mode); | ||
| 403 | return 0; | ||
| 404 | |||
| 405 | } | ||
| 406 | |||
| 407 | static int security_open(struct inode *inode, struct file *file) | ||
| 408 | { | ||
| 409 | return single_open(file, security_show, inode->i_private); | ||
| 410 | } | ||
| 411 | |||
| 412 | static const struct file_operations security_fops = { | ||
| 413 | .open = security_open, | ||
| 414 | .read = seq_read, | ||
| 415 | .llseek = seq_lseek, | ||
| 416 | .release = single_release, | ||
| 417 | }; | ||
| 418 | |||
| 419 | int gk20a_pmu_debugfs_init(struct gk20a *g) | ||
| 420 | { | ||
| 421 | struct dentry *d; | ||
| 422 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 423 | |||
| 424 | d = debugfs_create_file( | ||
| 425 | "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
| 426 | &lpwr_debug_fops); | ||
| 427 | if (!d) | ||
| 428 | goto err_out; | ||
| 429 | |||
| 430 | d = debugfs_create_file( | ||
| 431 | "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
| 432 | &mscg_stat_fops); | ||
| 433 | if (!d) | ||
| 434 | goto err_out; | ||
| 435 | |||
| 436 | d = debugfs_create_file( | ||
| 437 | "mscg_transitions", S_IRUGO, l->debugfs, g, | ||
| 438 | &mscg_transitions_fops); | ||
| 439 | if (!d) | ||
| 440 | goto err_out; | ||
| 441 | |||
| 442 | d = debugfs_create_file( | ||
| 443 | "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
| 444 | &elpg_stat_fops); | ||
| 445 | if (!d) | ||
| 446 | goto err_out; | ||
| 447 | |||
| 448 | d = debugfs_create_file( | ||
| 449 | "elpg_transitions", S_IRUGO, l->debugfs, g, | ||
| 450 | &elpg_transitions_fops); | ||
| 451 | if (!d) | ||
| 452 | goto err_out; | ||
| 453 | |||
| 454 | d = debugfs_create_file( | ||
| 455 | "pmu_security", S_IRUGO, l->debugfs, g, | ||
| 456 | &security_fops); | ||
| 457 | if (!d) | ||
| 458 | goto err_out; | ||
| 459 | |||
| 460 | /* No access to PMU if virtual */ | ||
| 461 | if (!g->is_virtual) { | ||
| 462 | d = debugfs_create_file( | ||
| 463 | "falc_trace", S_IRUGO, l->debugfs, g, | ||
| 464 | &falc_trace_fops); | ||
| 465 | if (!d) | ||
| 466 | goto err_out; | ||
| 467 | |||
| 468 | d = debugfs_create_file( | ||
| 469 | "perfmon_events_enable", S_IRUGO, l->debugfs, g, | ||
| 470 | &perfmon_events_enable_fops); | ||
| 471 | if (!d) | ||
| 472 | goto err_out; | ||
| 473 | |||
| 474 | d = debugfs_create_file( | ||
| 475 | "perfmon_events_count", S_IRUGO, l->debugfs, g, | ||
| 476 | &perfmon_events_count_fops); | ||
| 477 | if (!d) | ||
| 478 | goto err_out; | ||
| 479 | } | ||
| 480 | return 0; | ||
| 481 | err_out: | ||
| 482 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
| 483 | return -ENOMEM; | ||
| 484 | } | ||
diff --git a/include/os/linux/debug_pmu.h b/include/os/linux/debug_pmu.h new file mode 100644 index 0000000..c4e3243 --- /dev/null +++ b/include/os/linux/debug_pmu.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_PMU_H__ | ||
| 16 | #define __NVGPU_DEBUG_PMU_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | int gk20a_pmu_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_PMU_H__ */ | ||
diff --git a/include/os/linux/debug_sched.c b/include/os/linux/debug_sched.c new file mode 100644 index 0000000..fa43dc4 --- /dev/null +++ b/include/os/linux/debug_sched.c | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_sched.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | #include <linux/seq_file.h> | ||
| 20 | |||
| 21 | static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) | ||
| 22 | { | ||
| 23 | struct gk20a *g = s->private; | ||
| 24 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 25 | bool sched_busy = true; | ||
| 26 | |||
| 27 | int n = sched->bitmap_size / sizeof(u64); | ||
| 28 | int i; | ||
| 29 | int err; | ||
| 30 | |||
| 31 | err = gk20a_busy(g); | ||
| 32 | if (err) | ||
| 33 | return err; | ||
| 34 | |||
| 35 | if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { | ||
| 36 | sched_busy = false; | ||
| 37 | nvgpu_mutex_release(&sched->busy_lock); | ||
| 38 | } | ||
| 39 | |||
| 40 | seq_printf(s, "control_locked=%d\n", sched->control_locked); | ||
| 41 | seq_printf(s, "busy=%d\n", sched_busy); | ||
| 42 | seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); | ||
| 43 | |||
| 44 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 45 | |||
| 46 | seq_puts(s, "active_tsg_bitmap\n"); | ||
| 47 | for (i = 0; i < n; i++) | ||
| 48 | seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); | ||
| 49 | |||
| 50 | seq_puts(s, "recent_tsg_bitmap\n"); | ||
| 51 | for (i = 0; i < n; i++) | ||
| 52 | seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); | ||
| 53 | |||
| 54 | nvgpu_mutex_release(&sched->status_lock); | ||
| 55 | |||
| 56 | gk20a_idle(g); | ||
| 57 | |||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) | ||
| 62 | { | ||
| 63 | return single_open(file, gk20a_sched_debugfs_show, inode->i_private); | ||
| 64 | } | ||
| 65 | |||
| 66 | static const struct file_operations gk20a_sched_debugfs_fops = { | ||
| 67 | .open = gk20a_sched_debugfs_open, | ||
| 68 | .read = seq_read, | ||
| 69 | .llseek = seq_lseek, | ||
| 70 | .release = single_release, | ||
| 71 | }; | ||
| 72 | |||
| 73 | void gk20a_sched_debugfs_init(struct gk20a *g) | ||
| 74 | { | ||
| 75 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 76 | |||
| 77 | debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs, | ||
| 78 | g, &gk20a_sched_debugfs_fops); | ||
| 79 | } | ||
diff --git a/include/os/linux/debug_sched.h b/include/os/linux/debug_sched.h new file mode 100644 index 0000000..34a8f55 --- /dev/null +++ b/include/os/linux/debug_sched.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_SCHED_H__ | ||
| 16 | #define __NVGPU_DEBUG_SCHED_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | void gk20a_sched_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_SCHED_H__ */ | ||
diff --git a/include/os/linux/debug_therm_gp106.c b/include/os/linux/debug_therm_gp106.c new file mode 100644 index 0000000..dfe3946 --- /dev/null +++ b/include/os/linux/debug_therm_gp106.c | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/debugfs.h> | ||
| 18 | |||
| 19 | #include "os_linux.h" | ||
| 20 | |||
| 21 | static int therm_get_internal_sensor_curr_temp(void *data, u64 *val) | ||
| 22 | { | ||
| 23 | struct gk20a *g = (struct gk20a *)data; | ||
| 24 | u32 readval; | ||
| 25 | int err; | ||
| 26 | |||
| 27 | if (!g->ops.therm.get_internal_sensor_curr_temp) | ||
| 28 | return -EINVAL; | ||
| 29 | |||
| 30 | err = g->ops.therm.get_internal_sensor_curr_temp(g, &readval); | ||
| 31 | if (!err) | ||
| 32 | *val = readval; | ||
| 33 | |||
| 34 | return err; | ||
| 35 | } | ||
| 36 | DEFINE_SIMPLE_ATTRIBUTE(therm_ctrl_fops, therm_get_internal_sensor_curr_temp, NULL, "%llu\n"); | ||
| 37 | |||
| 38 | int gp106_therm_init_debugfs(struct gk20a *g) | ||
| 39 | { | ||
| 40 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 41 | struct dentry *dbgentry; | ||
| 42 | |||
| 43 | dbgentry = debugfs_create_file( | ||
| 44 | "temp", S_IRUGO, l->debugfs, g, &therm_ctrl_fops); | ||
| 45 | if (!dbgentry) | ||
| 46 | nvgpu_err(g, "debugfs entry create failed for therm_curr_temp"); | ||
| 47 | |||
| 48 | return 0; | ||
| 49 | } | ||
diff --git a/include/os/linux/debug_therm_gp106.h b/include/os/linux/debug_therm_gp106.h new file mode 100644 index 0000000..3e9380d --- /dev/null +++ b/include/os/linux/debug_therm_gp106.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __DEBUG_THERM_GP106_H | ||
| 18 | #define __DEBUG_THERM_GP106_H | ||
| 19 | |||
| 20 | #ifdef CONFIG_DEBUG_FS | ||
| 21 | int gp106_therm_init_debugfs(struct gk20a *g); | ||
| 22 | #else | ||
| 23 | static inline int gp106_therm_init_debugfs(struct gk20a *g) | ||
| 24 | { | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | #endif | ||
| 28 | |||
| 29 | #endif | ||
diff --git a/include/os/linux/debug_xve.c b/include/os/linux/debug_xve.c new file mode 100644 index 0000000..128d316 --- /dev/null +++ b/include/os/linux/debug_xve.c | |||
| @@ -0,0 +1,177 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <nvgpu/types.h> | ||
| 16 | #include <nvgpu/xve.h> | ||
| 17 | #include <nvgpu/timers.h> | ||
| 18 | |||
| 19 | #include "debug_xve.h" | ||
| 20 | #include "os_linux.h" | ||
| 21 | |||
| 22 | #include <linux/debugfs.h> | ||
| 23 | #include <linux/uaccess.h> | ||
| 24 | |||
| 25 | static ssize_t xve_link_speed_write(struct file *filp, | ||
| 26 | const char __user *buff, | ||
| 27 | size_t len, loff_t *off) | ||
| 28 | { | ||
| 29 | struct gk20a *g = ((struct seq_file *)filp->private_data)->private; | ||
| 30 | char kbuff[16]; | ||
| 31 | u32 buff_size, check_len; | ||
| 32 | u32 link_speed = 0; | ||
| 33 | int ret; | ||
| 34 | |||
| 35 | buff_size = min_t(size_t, 16, len); | ||
| 36 | |||
| 37 | memset(kbuff, 0, 16); | ||
| 38 | if (copy_from_user(kbuff, buff, buff_size)) | ||
| 39 | return -EFAULT; | ||
| 40 | |||
| 41 | check_len = strlen("Gen1"); | ||
| 42 | if (strncmp(kbuff, "Gen1", check_len) == 0) | ||
| 43 | link_speed = GPU_XVE_SPEED_2P5; | ||
| 44 | else if (strncmp(kbuff, "Gen2", check_len) == 0) | ||
| 45 | link_speed = GPU_XVE_SPEED_5P0; | ||
| 46 | else if (strncmp(kbuff, "Gen3", check_len) == 0) | ||
| 47 | link_speed = GPU_XVE_SPEED_8P0; | ||
| 48 | else | ||
| 49 | nvgpu_err(g, "%s: Unknown PCIe speed: %s", | ||
| 50 | __func__, kbuff); | ||
| 51 | |||
| 52 | if (!link_speed) | ||
| 53 | return -EINVAL; | ||
| 54 | |||
| 55 | /* Brief pause... To help rate limit this. */ | ||
| 56 | nvgpu_msleep(250); | ||
| 57 | |||
| 58 | /* | ||
| 59 | * And actually set the speed. Yay. | ||
| 60 | */ | ||
| 61 | ret = g->ops.xve.set_speed(g, link_speed); | ||
| 62 | if (ret) | ||
| 63 | return ret; | ||
| 64 | |||
| 65 | return len; | ||
| 66 | } | ||
| 67 | |||
| 68 | static int xve_link_speed_show(struct seq_file *s, void *unused) | ||
| 69 | { | ||
| 70 | struct gk20a *g = s->private; | ||
| 71 | u32 speed; | ||
| 72 | int err; | ||
| 73 | |||
| 74 | err = g->ops.xve.get_speed(g, &speed); | ||
| 75 | if (err) | ||
| 76 | return err; | ||
| 77 | |||
| 78 | seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed)); | ||
| 79 | |||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | static int xve_link_speed_open(struct inode *inode, struct file *file) | ||
| 84 | { | ||
| 85 | return single_open(file, xve_link_speed_show, inode->i_private); | ||
| 86 | } | ||
| 87 | |||
| 88 | static const struct file_operations xve_link_speed_fops = { | ||
| 89 | .open = xve_link_speed_open, | ||
| 90 | .read = seq_read, | ||
| 91 | .write = xve_link_speed_write, | ||
| 92 | .llseek = seq_lseek, | ||
| 93 | .release = single_release, | ||
| 94 | }; | ||
| 95 | |||
| 96 | static int xve_available_speeds_show(struct seq_file *s, void *unused) | ||
| 97 | { | ||
| 98 | struct gk20a *g = s->private; | ||
| 99 | u32 available_speeds; | ||
| 100 | |||
| 101 | g->ops.xve.available_speeds(g, &available_speeds); | ||
| 102 | |||
| 103 | seq_puts(s, "Available PCIe bus speeds:\n"); | ||
| 104 | if (available_speeds & GPU_XVE_SPEED_2P5) | ||
| 105 | seq_puts(s, " Gen1\n"); | ||
| 106 | if (available_speeds & GPU_XVE_SPEED_5P0) | ||
| 107 | seq_puts(s, " Gen2\n"); | ||
| 108 | if (available_speeds & GPU_XVE_SPEED_8P0) | ||
| 109 | seq_puts(s, " Gen3\n"); | ||
| 110 | |||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | static int xve_available_speeds_open(struct inode *inode, struct file *file) | ||
| 115 | { | ||
| 116 | return single_open(file, xve_available_speeds_show, inode->i_private); | ||
| 117 | } | ||
| 118 | |||
| 119 | static const struct file_operations xve_available_speeds_fops = { | ||
| 120 | .open = xve_available_speeds_open, | ||
| 121 | .read = seq_read, | ||
| 122 | .llseek = seq_lseek, | ||
| 123 | .release = single_release, | ||
| 124 | }; | ||
| 125 | |||
| 126 | static int xve_link_control_status_show(struct seq_file *s, void *unused) | ||
| 127 | { | ||
| 128 | struct gk20a *g = s->private; | ||
| 129 | u32 link_status; | ||
| 130 | |||
| 131 | link_status = g->ops.xve.get_link_control_status(g); | ||
| 132 | seq_printf(s, "0x%08x\n", link_status); | ||
| 133 | |||
| 134 | return 0; | ||
| 135 | } | ||
| 136 | |||
| 137 | static int xve_link_control_status_open(struct inode *inode, struct file *file) | ||
| 138 | { | ||
| 139 | return single_open(file, xve_link_control_status_show, inode->i_private); | ||
| 140 | } | ||
| 141 | |||
| 142 | static const struct file_operations xve_link_control_status_fops = { | ||
| 143 | .open = xve_link_control_status_open, | ||
| 144 | .read = seq_read, | ||
| 145 | .llseek = seq_lseek, | ||
| 146 | .release = single_release, | ||
| 147 | }; | ||
| 148 | |||
| 149 | int nvgpu_xve_debugfs_init(struct gk20a *g) | ||
| 150 | { | ||
| 151 | int err = -ENODEV; | ||
| 152 | |||
| 153 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 154 | struct dentry *gpu_root = l->debugfs; | ||
| 155 | |||
| 156 | l->debugfs_xve = debugfs_create_dir("xve", gpu_root); | ||
| 157 | if (IS_ERR_OR_NULL(l->debugfs_xve)) | ||
| 158 | goto fail; | ||
| 159 | |||
| 160 | /* | ||
| 161 | * These are just debug nodes. If they fail to get made it's not worth | ||
| 162 | * worrying the higher level SW. | ||
| 163 | */ | ||
| 164 | debugfs_create_file("link_speed", S_IRUGO, | ||
| 165 | l->debugfs_xve, g, | ||
| 166 | &xve_link_speed_fops); | ||
| 167 | debugfs_create_file("available_speeds", S_IRUGO, | ||
| 168 | l->debugfs_xve, g, | ||
| 169 | &xve_available_speeds_fops); | ||
| 170 | debugfs_create_file("link_control_status", S_IRUGO, | ||
| 171 | l->debugfs_xve, g, | ||
| 172 | &xve_link_control_status_fops); | ||
| 173 | |||
| 174 | err = 0; | ||
| 175 | fail: | ||
| 176 | return err; | ||
| 177 | } | ||
diff --git a/include/os/linux/debug_xve.h b/include/os/linux/debug_xve.h new file mode 100644 index 0000000..f3b1ac5 --- /dev/null +++ b/include/os/linux/debug_xve.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __NVGPU_DEBUG_XVE_H__ | ||
| 16 | #define __NVGPU_DEBUG_XVE_H__ | ||
| 17 | |||
| 18 | struct gk20a; | ||
| 19 | int nvgpu_xve_debugfs_init(struct gk20a *g); | ||
| 20 | |||
| 21 | #endif /* __NVGPU_DEBUG_SVE_H__ */ | ||
diff --git a/include/os/linux/dmabuf.c b/include/os/linux/dmabuf.c new file mode 100644 index 0000000..e8e3313 --- /dev/null +++ b/include/os/linux/dmabuf.c | |||
| @@ -0,0 +1,219 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/device.h> | ||
| 18 | #include <linux/dma-buf.h> | ||
| 19 | #include <linux/scatterlist.h> | ||
| 20 | |||
| 21 | #include <nvgpu/comptags.h> | ||
| 22 | #include <nvgpu/enabled.h> | ||
| 23 | #include <nvgpu/gk20a.h> | ||
| 24 | |||
| 25 | #include <nvgpu/linux/vm.h> | ||
| 26 | |||
| 27 | #include "gk20a/fence_gk20a.h" | ||
| 28 | |||
| 29 | #include "platform_gk20a.h" | ||
| 30 | #include "dmabuf.h" | ||
| 31 | #include "os_linux.h" | ||
| 32 | #include "dmabuf_vidmem.h" | ||
| 33 | |||
| 34 | static void gk20a_mm_delete_priv(void *_priv) | ||
| 35 | { | ||
| 36 | struct gk20a_buffer_state *s, *s_tmp; | ||
| 37 | struct gk20a_dmabuf_priv *priv = _priv; | ||
| 38 | struct gk20a *g; | ||
| 39 | |||
| 40 | if (!priv) | ||
| 41 | return; | ||
| 42 | |||
| 43 | g = priv->g; | ||
| 44 | |||
| 45 | if (priv->comptags.allocated && priv->comptags.lines) { | ||
| 46 | BUG_ON(!priv->comptag_allocator); | ||
| 47 | gk20a_comptaglines_free(priv->comptag_allocator, | ||
| 48 | priv->comptags.offset, | ||
| 49 | priv->comptags.lines); | ||
| 50 | } | ||
| 51 | |||
| 52 | /* Free buffer states */ | ||
| 53 | nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states, | ||
| 54 | gk20a_buffer_state, list) { | ||
| 55 | gk20a_fence_put(s->fence); | ||
| 56 | nvgpu_list_del(&s->list); | ||
| 57 | nvgpu_kfree(g, s); | ||
| 58 | } | ||
| 59 | |||
| 60 | nvgpu_kfree(g, priv); | ||
| 61 | } | ||
| 62 | |||
| 63 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | ||
| 64 | struct dma_buf *dmabuf) | ||
| 65 | { | ||
| 66 | struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf); | ||
| 67 | bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY); | ||
| 68 | |||
| 69 | if (buf_owner == NULL) { | ||
| 70 | /* Not nvgpu-allocated, assume system memory */ | ||
| 71 | return APERTURE_SYSMEM; | ||
| 72 | } else if (WARN_ON(buf_owner == g && unified_memory)) { | ||
| 73 | /* Looks like our video memory, but this gpu doesn't support | ||
| 74 | * it. Warn about a bug and bail out */ | ||
| 75 | nvgpu_warn(g, | ||
| 76 | "dmabuf is our vidmem but we don't have local vidmem"); | ||
| 77 | return APERTURE_INVALID; | ||
| 78 | } else if (buf_owner != g) { | ||
| 79 | /* Someone else's vidmem */ | ||
| 80 | return APERTURE_INVALID; | ||
| 81 | } else { | ||
| 82 | /* Yay, buf_owner == g */ | ||
| 83 | return APERTURE_VIDMEM; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, | ||
| 88 | struct dma_buf_attachment **attachment) | ||
| 89 | { | ||
| 90 | struct gk20a_dmabuf_priv *priv; | ||
| 91 | |||
| 92 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
| 93 | if (WARN_ON(!priv)) | ||
| 94 | return ERR_PTR(-EINVAL); | ||
| 95 | |||
| 96 | nvgpu_mutex_acquire(&priv->lock); | ||
| 97 | |||
| 98 | if (priv->pin_count == 0) { | ||
| 99 | priv->attach = dma_buf_attach(dmabuf, dev); | ||
| 100 | if (IS_ERR(priv->attach)) { | ||
| 101 | nvgpu_mutex_release(&priv->lock); | ||
| 102 | return (struct sg_table *)priv->attach; | ||
| 103 | } | ||
| 104 | |||
| 105 | priv->sgt = dma_buf_map_attachment(priv->attach, | ||
| 106 | DMA_BIDIRECTIONAL); | ||
| 107 | if (IS_ERR(priv->sgt)) { | ||
| 108 | dma_buf_detach(dmabuf, priv->attach); | ||
| 109 | nvgpu_mutex_release(&priv->lock); | ||
| 110 | return priv->sgt; | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | priv->pin_count++; | ||
| 115 | nvgpu_mutex_release(&priv->lock); | ||
| 116 | *attachment = priv->attach; | ||
| 117 | return priv->sgt; | ||
| 118 | } | ||
| 119 | |||
| 120 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
| 121 | struct dma_buf_attachment *attachment, | ||
| 122 | struct sg_table *sgt) | ||
| 123 | { | ||
| 124 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
| 125 | dma_addr_t dma_addr; | ||
| 126 | |||
| 127 | if (IS_ERR(priv) || !priv) | ||
| 128 | return; | ||
| 129 | |||
| 130 | nvgpu_mutex_acquire(&priv->lock); | ||
| 131 | WARN_ON(priv->sgt != sgt); | ||
| 132 | WARN_ON(priv->attach != attachment); | ||
| 133 | priv->pin_count--; | ||
| 134 | WARN_ON(priv->pin_count < 0); | ||
| 135 | dma_addr = sg_dma_address(priv->sgt->sgl); | ||
| 136 | if (priv->pin_count == 0) { | ||
| 137 | dma_buf_unmap_attachment(priv->attach, priv->sgt, | ||
| 138 | DMA_BIDIRECTIONAL); | ||
| 139 | dma_buf_detach(dmabuf, priv->attach); | ||
| 140 | } | ||
| 141 | nvgpu_mutex_release(&priv->lock); | ||
| 142 | } | ||
| 143 | |||
| 144 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) | ||
| 145 | { | ||
| 146 | struct gk20a *g = gk20a_get_platform(dev)->g; | ||
| 147 | struct gk20a_dmabuf_priv *priv; | ||
| 148 | |||
| 149 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
| 150 | if (likely(priv)) | ||
| 151 | return 0; | ||
| 152 | |||
| 153 | nvgpu_mutex_acquire(&g->mm.priv_lock); | ||
| 154 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
| 155 | if (priv) | ||
| 156 | goto priv_exist_or_err; | ||
| 157 | |||
| 158 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
| 159 | if (!priv) { | ||
| 160 | priv = ERR_PTR(-ENOMEM); | ||
| 161 | goto priv_exist_or_err; | ||
| 162 | } | ||
| 163 | |||
| 164 | nvgpu_mutex_init(&priv->lock); | ||
| 165 | nvgpu_init_list_node(&priv->states); | ||
| 166 | priv->g = g; | ||
| 167 | dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); | ||
| 168 | |||
| 169 | priv_exist_or_err: | ||
| 170 | nvgpu_mutex_release(&g->mm.priv_lock); | ||
| 171 | if (IS_ERR(priv)) | ||
| 172 | return -ENOMEM; | ||
| 173 | |||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | ||
| 178 | u64 offset, struct gk20a_buffer_state **state) | ||
| 179 | { | ||
| 180 | int err = 0; | ||
| 181 | struct gk20a_dmabuf_priv *priv; | ||
| 182 | struct gk20a_buffer_state *s; | ||
| 183 | struct device *dev = dev_from_gk20a(g); | ||
| 184 | |||
| 185 | if (WARN_ON(offset >= (u64)dmabuf->size)) | ||
| 186 | return -EINVAL; | ||
| 187 | |||
| 188 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); | ||
| 189 | if (err) | ||
| 190 | return err; | ||
| 191 | |||
| 192 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
| 193 | if (WARN_ON(!priv)) | ||
| 194 | return -ENOSYS; | ||
| 195 | |||
| 196 | nvgpu_mutex_acquire(&priv->lock); | ||
| 197 | |||
| 198 | nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list) | ||
| 199 | if (s->offset == offset) | ||
| 200 | goto out; | ||
| 201 | |||
| 202 | /* State not found, create state. */ | ||
| 203 | s = nvgpu_kzalloc(g, sizeof(*s)); | ||
| 204 | if (!s) { | ||
| 205 | err = -ENOMEM; | ||
| 206 | goto out; | ||
| 207 | } | ||
| 208 | |||
| 209 | s->offset = offset; | ||
| 210 | nvgpu_init_list_node(&s->list); | ||
| 211 | nvgpu_mutex_init(&s->lock); | ||
| 212 | nvgpu_list_add_tail(&s->list, &priv->states); | ||
| 213 | |||
| 214 | out: | ||
| 215 | nvgpu_mutex_release(&priv->lock); | ||
| 216 | if (!err) | ||
| 217 | *state = s; | ||
| 218 | return err; | ||
| 219 | } | ||
diff --git a/include/os/linux/dmabuf.h b/include/os/linux/dmabuf.h new file mode 100644 index 0000000..8399eaa --- /dev/null +++ b/include/os/linux/dmabuf.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __COMMON_LINUX_DMABUF_H__ | ||
| 18 | #define __COMMON_LINUX_DMABUF_H__ | ||
| 19 | |||
| 20 | #include <nvgpu/comptags.h> | ||
| 21 | #include <nvgpu/list.h> | ||
| 22 | #include <nvgpu/lock.h> | ||
| 23 | #include <nvgpu/gmmu.h> | ||
| 24 | |||
| 25 | struct sg_table; | ||
| 26 | struct dma_buf; | ||
| 27 | struct dma_buf_attachment; | ||
| 28 | struct device; | ||
| 29 | |||
| 30 | struct gk20a; | ||
| 31 | struct gk20a_buffer_state; | ||
| 32 | |||
| 33 | struct gk20a_dmabuf_priv { | ||
| 34 | struct nvgpu_mutex lock; | ||
| 35 | |||
| 36 | struct gk20a *g; | ||
| 37 | |||
| 38 | struct gk20a_comptag_allocator *comptag_allocator; | ||
| 39 | struct gk20a_comptags comptags; | ||
| 40 | |||
| 41 | struct dma_buf_attachment *attach; | ||
| 42 | struct sg_table *sgt; | ||
| 43 | |||
| 44 | int pin_count; | ||
| 45 | |||
| 46 | struct nvgpu_list_node states; | ||
| 47 | |||
| 48 | u64 buffer_id; | ||
| 49 | }; | ||
| 50 | |||
| 51 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, | ||
| 52 | struct dma_buf_attachment **attachment); | ||
| 53 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
| 54 | struct dma_buf_attachment *attachment, | ||
| 55 | struct sg_table *sgt); | ||
| 56 | |||
| 57 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | ||
| 58 | |||
| 59 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | ||
| 60 | u64 offset, struct gk20a_buffer_state **state); | ||
| 61 | |||
| 62 | #endif | ||
diff --git a/include/os/linux/dmabuf_vidmem.c b/include/os/linux/dmabuf_vidmem.c new file mode 100644 index 0000000..bada5dc --- /dev/null +++ b/include/os/linux/dmabuf_vidmem.c | |||
| @@ -0,0 +1,269 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/dma-buf.h> | ||
| 18 | #include <linux/version.h> | ||
| 19 | #include <uapi/linux/nvgpu.h> | ||
| 20 | |||
| 21 | #ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD | ||
| 22 | #include <linux/platform/tegra/tegra_fd.h> | ||
| 23 | #endif | ||
| 24 | |||
| 25 | #include <nvgpu/dma.h> | ||
| 26 | #include <nvgpu/enabled.h> | ||
| 27 | #include <nvgpu/vidmem.h> | ||
| 28 | #include <nvgpu/nvgpu_mem.h> | ||
| 29 | #include <nvgpu/page_allocator.h> | ||
| 30 | #include <nvgpu/gk20a.h> | ||
| 31 | |||
| 32 | #include <nvgpu/linux/vm.h> | ||
| 33 | #include <nvgpu/linux/dma.h> | ||
| 34 | |||
| 35 | #include "gk20a/mm_gk20a.h" | ||
| 36 | #include "dmabuf_vidmem.h" | ||
| 37 | |||
| 38 | bool nvgpu_addr_is_vidmem_page_alloc(u64 addr) | ||
| 39 | { | ||
| 40 | return !!(addr & 1ULL); | ||
| 41 | } | ||
| 42 | |||
| 43 | void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr) | ||
| 44 | { | ||
| 45 | /* set bit 0 to indicate vidmem allocation */ | ||
| 46 | sg_dma_address(sgl) = (addr | 1ULL); | ||
| 47 | } | ||
| 48 | |||
| 49 | struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl) | ||
| 50 | { | ||
| 51 | u64 addr; | ||
| 52 | |||
| 53 | addr = sg_dma_address(sgl); | ||
| 54 | |||
| 55 | if (nvgpu_addr_is_vidmem_page_alloc(addr)) | ||
| 56 | addr = addr & ~1ULL; | ||
| 57 | else | ||
| 58 | WARN_ON(1); | ||
| 59 | |||
| 60 | return (struct nvgpu_page_alloc *)(uintptr_t)addr; | ||
| 61 | } | ||
| 62 | |||
| 63 | static struct sg_table *gk20a_vidbuf_map_dma_buf( | ||
| 64 | struct dma_buf_attachment *attach, enum dma_data_direction dir) | ||
| 65 | { | ||
| 66 | struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv; | ||
| 67 | |||
| 68 | return buf->mem->priv.sgt; | ||
| 69 | } | ||
| 70 | |||
| 71 | static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, | ||
| 72 | struct sg_table *sgt, | ||
| 73 | enum dma_data_direction dir) | ||
| 74 | { | ||
| 75 | } | ||
| 76 | |||
| 77 | static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | ||
| 78 | { | ||
| 79 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
| 80 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
| 81 | struct gk20a *g = buf->g; | ||
| 82 | |||
| 83 | vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", | ||
| 84 | dmabuf, buf->mem->size >> 10); | ||
| 85 | |||
| 86 | if (linux_buf && linux_buf->dmabuf_priv_delete) | ||
| 87 | linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv); | ||
| 88 | |||
| 89 | nvgpu_kfree(g, linux_buf); | ||
| 90 | nvgpu_vidmem_buf_free(g, buf); | ||
| 91 | |||
| 92 | gk20a_put(g); | ||
| 93 | } | ||
| 94 | |||
| 95 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | ||
| 96 | { | ||
| 97 | WARN_ON("Not supported"); | ||
| 98 | return NULL; | ||
| 99 | } | ||
| 100 | |||
| 101 | static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, | ||
| 102 | unsigned long page_num) | ||
| 103 | { | ||
| 104 | WARN_ON("Not supported"); | ||
| 105 | return NULL; | ||
| 106 | } | ||
| 107 | |||
| 108 | static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) | ||
| 109 | { | ||
| 110 | return -EINVAL; | ||
| 111 | } | ||
| 112 | |||
| 113 | static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, | ||
| 114 | struct device *dev, void *priv, void (*delete)(void *priv)) | ||
| 115 | { | ||
| 116 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
| 117 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
| 118 | |||
| 119 | linux_buf->dmabuf_priv = priv; | ||
| 120 | linux_buf->dmabuf_priv_delete = delete; | ||
| 121 | |||
| 122 | return 0; | ||
| 123 | } | ||
| 124 | |||
| 125 | static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, | ||
| 126 | struct device *dev) | ||
| 127 | { | ||
| 128 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
| 129 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
| 130 | |||
| 131 | return linux_buf->dmabuf_priv; | ||
| 132 | } | ||
| 133 | |||
| 134 | static const struct dma_buf_ops gk20a_vidbuf_ops = { | ||
| 135 | .map_dma_buf = gk20a_vidbuf_map_dma_buf, | ||
| 136 | .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, | ||
| 137 | .release = gk20a_vidbuf_release, | ||
| 138 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) | ||
| 139 | .map_atomic = gk20a_vidbuf_kmap_atomic, | ||
| 140 | .map = gk20a_vidbuf_kmap, | ||
| 141 | #else | ||
| 142 | .kmap_atomic = gk20a_vidbuf_kmap_atomic, | ||
| 143 | .kmap = gk20a_vidbuf_kmap, | ||
| 144 | #endif | ||
| 145 | .mmap = gk20a_vidbuf_mmap, | ||
| 146 | .set_drvdata = gk20a_vidbuf_set_private, | ||
| 147 | .get_drvdata = gk20a_vidbuf_get_private, | ||
| 148 | }; | ||
| 149 | |||
| 150 | static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf) | ||
| 151 | { | ||
| 152 | DEFINE_DMA_BUF_EXPORT_INFO(exp_info); | ||
| 153 | |||
| 154 | exp_info.priv = buf; | ||
| 155 | exp_info.ops = &gk20a_vidbuf_ops; | ||
| 156 | exp_info.size = buf->mem->size; | ||
| 157 | exp_info.flags = O_RDWR; | ||
| 158 | |||
| 159 | return dma_buf_export(&exp_info); | ||
| 160 | } | ||
| 161 | |||
| 162 | struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | ||
| 163 | { | ||
| 164 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
| 165 | |||
| 166 | if (dmabuf->ops != &gk20a_vidbuf_ops) | ||
| 167 | return NULL; | ||
| 168 | |||
| 169 | return buf->g; | ||
| 170 | } | ||
| 171 | |||
| 172 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | ||
| 173 | { | ||
| 174 | struct nvgpu_vidmem_buf *buf = NULL; | ||
| 175 | struct nvgpu_vidmem_linux *priv; | ||
| 176 | int err, fd; | ||
| 177 | |||
| 178 | /* | ||
| 179 | * This ref is released when the dma_buf is closed. | ||
| 180 | */ | ||
| 181 | if (!gk20a_get(g)) | ||
| 182 | return -ENODEV; | ||
| 183 | |||
| 184 | vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes); | ||
| 185 | |||
| 186 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
| 187 | if (!priv) { | ||
| 188 | err = -ENOMEM; | ||
| 189 | goto fail; | ||
| 190 | } | ||
| 191 | |||
| 192 | buf = nvgpu_vidmem_user_alloc(g, bytes); | ||
| 193 | if (IS_ERR(buf)) { | ||
| 194 | err = PTR_ERR(buf); | ||
| 195 | goto fail; | ||
| 196 | } | ||
| 197 | |||
| 198 | priv->dmabuf = gk20a_vidbuf_export(buf); | ||
| 199 | if (IS_ERR(priv->dmabuf)) { | ||
| 200 | err = PTR_ERR(priv->dmabuf); | ||
| 201 | goto fail; | ||
| 202 | } | ||
| 203 | |||
| 204 | buf->priv = priv; | ||
| 205 | |||
| 206 | #ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD | ||
| 207 | fd = tegra_alloc_fd(current->files, 1024, O_RDWR); | ||
| 208 | #else | ||
| 209 | fd = get_unused_fd_flags(O_RDWR); | ||
| 210 | #endif | ||
| 211 | if (fd < 0) { | ||
| 212 | /* ->release frees what we have done */ | ||
| 213 | dma_buf_put(priv->dmabuf); | ||
| 214 | return fd; | ||
| 215 | } | ||
| 216 | |||
| 217 | /* fclose() on this drops one ref, freeing the dma buf */ | ||
| 218 | fd_install(fd, priv->dmabuf->file); | ||
| 219 | |||
| 220 | vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", | ||
| 221 | priv->dmabuf, buf->mem->size >> 10); | ||
| 222 | |||
| 223 | return fd; | ||
| 224 | |||
| 225 | fail: | ||
| 226 | nvgpu_vidmem_buf_free(g, buf); | ||
| 227 | nvgpu_kfree(g, priv); | ||
| 228 | gk20a_put(g); | ||
| 229 | |||
| 230 | vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err); | ||
| 231 | return err; | ||
| 232 | } | ||
| 233 | |||
| 234 | int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | ||
| 235 | void *buffer, u64 offset, u64 size, u32 cmd) | ||
| 236 | { | ||
| 237 | struct nvgpu_vidmem_buf *vidmem_buf; | ||
| 238 | struct nvgpu_mem *mem; | ||
| 239 | int err = 0; | ||
| 240 | |||
| 241 | if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM) | ||
| 242 | return -EINVAL; | ||
| 243 | |||
| 244 | vidmem_buf = dmabuf->priv; | ||
| 245 | mem = vidmem_buf->mem; | ||
| 246 | |||
| 247 | nvgpu_speculation_barrier(); | ||
| 248 | switch (cmd) { | ||
| 249 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: | ||
| 250 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); | ||
| 251 | break; | ||
| 252 | |||
| 253 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE: | ||
| 254 | nvgpu_mem_wr_n(g, mem, offset, buffer, size); | ||
| 255 | break; | ||
| 256 | |||
| 257 | default: | ||
| 258 | err = -EINVAL; | ||
| 259 | } | ||
| 260 | |||
| 261 | return err; | ||
| 262 | } | ||
| 263 | |||
| 264 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) | ||
| 265 | { | ||
| 266 | nvgpu_free(vidmem->allocator, | ||
| 267 | (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); | ||
| 268 | nvgpu_free_sgtable(g, &vidmem->priv.sgt); | ||
| 269 | } | ||
diff --git a/include/os/linux/dmabuf_vidmem.h b/include/os/linux/dmabuf_vidmem.h new file mode 100644 index 0000000..977fd78 --- /dev/null +++ b/include/os/linux/dmabuf_vidmem.h | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __NVGPU_LINUX_DMABUF_VIDMEM_H__ | ||
| 18 | #define __NVGPU_LINUX_DMABUF_VIDMEM_H__ | ||
| 19 | |||
| 20 | #include <nvgpu/types.h> | ||
| 21 | |||
| 22 | struct dma_buf; | ||
| 23 | |||
| 24 | struct gk20a; | ||
| 25 | struct scatterlist; | ||
| 26 | |||
| 27 | #ifdef CONFIG_GK20A_VIDMEM | ||
| 28 | |||
| 29 | struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf); | ||
| 30 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes); | ||
| 31 | |||
| 32 | void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr); | ||
| 33 | struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl); | ||
| 34 | |||
| 35 | int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | ||
| 36 | void *buffer, u64 offset, u64 size, u32 cmd); | ||
| 37 | |||
| 38 | #else /* !CONFIG_GK20A_VIDMEM */ | ||
| 39 | |||
| 40 | static inline struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | ||
| 41 | { | ||
| 42 | return NULL; | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | ||
| 46 | { | ||
| 47 | return -ENOSYS; | ||
| 48 | } | ||
| 49 | |||
| 50 | static inline void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, | ||
| 51 | u64 addr) | ||
| 52 | { | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc( | ||
| 56 | struct scatterlist *sgl) | ||
| 57 | { | ||
| 58 | return NULL; | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline int nvgpu_vidmem_buf_access_memory(struct gk20a *g, | ||
| 62 | struct dma_buf *dmabuf, | ||
| 63 | void *buffer, u64 offset, | ||
| 64 | u64 size, u32 cmd) | ||
| 65 | { | ||
| 66 | return -ENOSYS; | ||
| 67 | } | ||
| 68 | |||
| 69 | #endif | ||
| 70 | |||
| 71 | |||
| 72 | struct nvgpu_vidmem_linux { | ||
| 73 | struct dma_buf *dmabuf; | ||
| 74 | void *dmabuf_priv; | ||
| 75 | void (*dmabuf_priv_delete)(void *); | ||
| 76 | }; | ||
| 77 | |||
| 78 | #endif | ||
diff --git a/include/os/linux/driver_common.c b/include/os/linux/driver_common.c new file mode 100644 index 0000000..c76dabe --- /dev/null +++ b/include/os/linux/driver_common.c | |||
| @@ -0,0 +1,351 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/reboot.h> | ||
| 18 | #include <linux/dma-mapping.h> | ||
| 19 | #include <linux/mm.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <uapi/linux/nvgpu.h> | ||
| 22 | |||
| 23 | #include <nvgpu/defaults.h> | ||
| 24 | #include <nvgpu/kmem.h> | ||
| 25 | #include <nvgpu/nvgpu_common.h> | ||
| 26 | #include <nvgpu/soc.h> | ||
| 27 | #include <nvgpu/bug.h> | ||
| 28 | #include <nvgpu/enabled.h> | ||
| 29 | #include <nvgpu/debug.h> | ||
| 30 | #include <nvgpu/sizes.h> | ||
| 31 | #include <nvgpu/gk20a.h> | ||
| 32 | |||
| 33 | #include "platform_gk20a.h" | ||
| 34 | #include "module.h" | ||
| 35 | #include "os_linux.h" | ||
| 36 | #include "sysfs.h" | ||
| 37 | #include "ioctl.h" | ||
| 38 | #include "gk20a/regops_gk20a.h" | ||
| 39 | |||
| 40 | #define EMC3D_DEFAULT_RATIO 750 | ||
| 41 | |||
| 42 | void nvgpu_kernel_restart(void *cmd) | ||
| 43 | { | ||
| 44 | kernel_restart(cmd); | ||
| 45 | } | ||
| 46 | |||
| 47 | static void nvgpu_init_vars(struct gk20a *g) | ||
| 48 | { | ||
| 49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 50 | struct device *dev = dev_from_gk20a(g); | ||
| 51 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 52 | |||
| 53 | nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq); | ||
| 54 | nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq); | ||
| 55 | |||
| 56 | init_rwsem(&l->busy_lock); | ||
| 57 | nvgpu_rwsem_init(&g->deterministic_busy); | ||
| 58 | |||
| 59 | nvgpu_spinlock_init(&g->mc_enable_lock); | ||
| 60 | |||
| 61 | nvgpu_mutex_init(&platform->railgate_lock); | ||
| 62 | nvgpu_mutex_init(&g->dbg_sessions_lock); | ||
| 63 | nvgpu_mutex_init(&g->client_lock); | ||
| 64 | nvgpu_mutex_init(&g->power_lock); | ||
| 65 | nvgpu_mutex_init(&g->ctxsw_disable_lock); | ||
| 66 | nvgpu_mutex_init(&g->tpc_pg_lock); | ||
| 67 | nvgpu_mutex_init(&g->clk_arb_enable_lock); | ||
| 68 | nvgpu_mutex_init(&g->cg_pg_lock); | ||
| 69 | |||
| 70 | /* Init the clock req count to 0 */ | ||
| 71 | nvgpu_atomic_set(&g->clk_arb_global_nr, 0); | ||
| 72 | |||
| 73 | nvgpu_mutex_init(&l->ctrl.privs_lock); | ||
| 74 | nvgpu_init_list_node(&l->ctrl.privs); | ||
| 75 | |||
| 76 | l->regs_saved = l->regs; | ||
| 77 | l->bar1_saved = l->bar1; | ||
| 78 | |||
| 79 | g->emc3d_ratio = EMC3D_DEFAULT_RATIO; | ||
| 80 | |||
| 81 | /* Set DMA parameters to allow larger sgt lists */ | ||
| 82 | dev->dma_parms = &l->dma_parms; | ||
| 83 | dma_set_max_seg_size(dev, UINT_MAX); | ||
| 84 | |||
| 85 | /* | ||
| 86 | * A default of 16GB is the largest supported DMA size that is | ||
| 87 | * acceptable to all currently supported Tegra SoCs. | ||
| 88 | */ | ||
| 89 | if (!platform->dma_mask) | ||
| 90 | platform->dma_mask = DMA_BIT_MASK(34); | ||
| 91 | |||
| 92 | dma_set_mask(dev, platform->dma_mask); | ||
| 93 | dma_set_coherent_mask(dev, platform->dma_mask); | ||
| 94 | |||
| 95 | nvgpu_init_list_node(&g->profiler_objects); | ||
| 96 | |||
| 97 | nvgpu_init_list_node(&g->boardobj_head); | ||
| 98 | nvgpu_init_list_node(&g->boardobjgrp_head); | ||
| 99 | |||
| 100 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); | ||
| 101 | } | ||
| 102 | |||
| 103 | static void nvgpu_init_gr_vars(struct gk20a *g) | ||
| 104 | { | ||
| 105 | gk20a_init_gr(g); | ||
| 106 | |||
| 107 | nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); | ||
| 108 | g->gr.max_comptag_mem = totalram_size_in_mb; | ||
| 109 | } | ||
| 110 | |||
| 111 | static void nvgpu_init_timeout(struct gk20a *g) | ||
| 112 | { | ||
| 113 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 114 | |||
| 115 | g->timeouts_disabled_by_user = false; | ||
| 116 | nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0); | ||
| 117 | |||
| 118 | if (nvgpu_platform_is_silicon(g)) { | ||
| 119 | g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; | ||
| 120 | } else if (nvgpu_platform_is_fpga(g)) { | ||
| 121 | g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA; | ||
| 122 | } else { | ||
| 123 | g->gr_idle_timeout_default = (u32)ULONG_MAX; | ||
| 124 | } | ||
| 125 | g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; | ||
| 126 | g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US; | ||
| 127 | } | ||
| 128 | |||
| 129 | static void nvgpu_init_timeslice(struct gk20a *g) | ||
| 130 | { | ||
| 131 | g->runlist_interleave = true; | ||
| 132 | |||
| 133 | g->timeslice_low_priority_us = 1300; | ||
| 134 | g->timeslice_medium_priority_us = 2600; | ||
| 135 | g->timeslice_high_priority_us = 5200; | ||
| 136 | |||
| 137 | g->min_timeslice_us = 1000; | ||
| 138 | g->max_timeslice_us = 50000; | ||
| 139 | } | ||
| 140 | |||
| 141 | static void nvgpu_init_pm_vars(struct gk20a *g) | ||
| 142 | { | ||
| 143 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 144 | u32 i = 0; | ||
| 145 | |||
| 146 | /* | ||
| 147 | * Set up initial power settings. For non-slicon platforms, disable | ||
| 148 | * power features and for silicon platforms, read from platform data | ||
| 149 | */ | ||
| 150 | g->slcg_enabled = | ||
| 151 | nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false; | ||
| 152 | g->blcg_enabled = | ||
| 153 | nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false; | ||
| 154 | g->elcg_enabled = | ||
| 155 | nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false; | ||
| 156 | g->elpg_enabled = | ||
| 157 | nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false; | ||
| 158 | g->aelpg_enabled = | ||
| 159 | nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false; | ||
| 160 | g->mscg_enabled = | ||
| 161 | nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false; | ||
| 162 | g->can_elpg = | ||
| 163 | nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false; | ||
| 164 | |||
| 165 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG, | ||
| 166 | nvgpu_platform_is_silicon(g) ? platform->can_elcg : false); | ||
| 167 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG, | ||
| 168 | nvgpu_platform_is_silicon(g) ? platform->can_slcg : false); | ||
| 169 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG, | ||
| 170 | nvgpu_platform_is_silicon(g) ? platform->can_blcg : false); | ||
| 171 | |||
| 172 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | ||
| 173 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | ||
| 174 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 175 | g->has_cde = platform->has_cde; | ||
| 176 | #endif | ||
| 177 | g->ptimer_src_freq = platform->ptimer_src_freq; | ||
| 178 | g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); | ||
| 179 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); | ||
| 180 | g->can_tpc_powergate = platform->can_tpc_powergate; | ||
| 181 | |||
| 182 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) | ||
| 183 | g->valid_tpc_mask[i] = platform->valid_tpc_mask[i]; | ||
| 184 | |||
| 185 | g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; | ||
| 186 | /* if default delay is not set, set default delay to 500msec */ | ||
| 187 | if (platform->railgate_delay_init) | ||
| 188 | g->railgate_delay = platform->railgate_delay_init; | ||
| 189 | else | ||
| 190 | g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT; | ||
| 191 | __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon); | ||
| 192 | |||
| 193 | /* set default values to aelpg parameters */ | ||
| 194 | g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; | ||
| 195 | g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; | ||
| 196 | g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; | ||
| 197 | g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US; | ||
| 198 | g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; | ||
| 199 | |||
| 200 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm); | ||
| 201 | } | ||
| 202 | |||
| 203 | static void nvgpu_init_vbios_vars(struct gk20a *g) | ||
| 204 | { | ||
| 205 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 206 | |||
| 207 | __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos); | ||
| 208 | g->vbios_min_version = platform->vbios_min_version; | ||
| 209 | } | ||
| 210 | |||
| 211 | static void nvgpu_init_ltc_vars(struct gk20a *g) | ||
| 212 | { | ||
| 213 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 214 | |||
| 215 | g->ltc_streamid = platform->ltc_streamid; | ||
| 216 | } | ||
| 217 | |||
| 218 | static void nvgpu_init_mm_vars(struct gk20a *g) | ||
| 219 | { | ||
| 220 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 221 | |||
| 222 | g->mm.disable_bigpage = platform->disable_bigpage; | ||
| 223 | __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE, | ||
| 224 | platform->honors_aperture); | ||
| 225 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, | ||
| 226 | platform->unified_memory); | ||
| 227 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, | ||
| 228 | platform->unify_address_spaces); | ||
| 229 | __nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM, | ||
| 230 | platform->force_128K_pmu_vm); | ||
| 231 | |||
| 232 | nvgpu_mutex_init(&g->mm.tlb_lock); | ||
| 233 | nvgpu_mutex_init(&g->mm.priv_lock); | ||
| 234 | } | ||
| 235 | |||
| 236 | int nvgpu_probe(struct gk20a *g, | ||
| 237 | const char *debugfs_symlink, | ||
| 238 | const char *interface_name, | ||
| 239 | struct class *class) | ||
| 240 | { | ||
| 241 | struct device *dev = dev_from_gk20a(g); | ||
| 242 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 243 | int err = 0; | ||
| 244 | |||
| 245 | nvgpu_init_vars(g); | ||
| 246 | nvgpu_init_gr_vars(g); | ||
| 247 | nvgpu_init_timeout(g); | ||
| 248 | nvgpu_init_timeslice(g); | ||
| 249 | nvgpu_init_pm_vars(g); | ||
| 250 | nvgpu_init_vbios_vars(g); | ||
| 251 | nvgpu_init_ltc_vars(g); | ||
| 252 | err = nvgpu_init_soc_vars(g); | ||
| 253 | if (err) { | ||
| 254 | nvgpu_err(g, "init soc vars failed"); | ||
| 255 | return err; | ||
| 256 | } | ||
| 257 | |||
| 258 | /* Initialize the platform interface. */ | ||
| 259 | err = platform->probe(dev); | ||
| 260 | if (err) { | ||
| 261 | if (err == -EPROBE_DEFER) | ||
| 262 | nvgpu_info(g, "platform probe failed"); | ||
| 263 | else | ||
| 264 | nvgpu_err(g, "platform probe failed"); | ||
| 265 | return err; | ||
| 266 | } | ||
| 267 | |||
| 268 | nvgpu_init_mm_vars(g); | ||
| 269 | |||
| 270 | /* platform probe can defer do user init only if probe succeeds */ | ||
| 271 | err = gk20a_user_init(dev, interface_name, class); | ||
| 272 | if (err) | ||
| 273 | return err; | ||
| 274 | |||
| 275 | if (platform->late_probe) { | ||
| 276 | err = platform->late_probe(dev); | ||
| 277 | if (err) { | ||
| 278 | nvgpu_err(g, "late probe failed"); | ||
| 279 | return err; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | nvgpu_create_sysfs(dev); | ||
| 284 | gk20a_debug_init(g, debugfs_symlink); | ||
| 285 | |||
| 286 | g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); | ||
| 287 | if (!g->dbg_regops_tmp_buf) { | ||
| 288 | nvgpu_err(g, "couldn't allocate regops tmp buf"); | ||
| 289 | return -ENOMEM; | ||
| 290 | } | ||
| 291 | g->dbg_regops_tmp_buf_ops = | ||
| 292 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
| 293 | |||
| 294 | g->remove_support = gk20a_remove_support; | ||
| 295 | |||
| 296 | nvgpu_ref_init(&g->refcount); | ||
| 297 | |||
| 298 | return 0; | ||
| 299 | } | ||
| 300 | |||
| 301 | /** | ||
| 302 | * cyclic_delta - Returns delta of cyclic integers a and b. | ||
| 303 | * | ||
| 304 | * @a - First integer | ||
| 305 | * @b - Second integer | ||
| 306 | * | ||
| 307 | * Note: if a is ahead of b, delta is positive. | ||
| 308 | */ | ||
| 309 | static int cyclic_delta(int a, int b) | ||
| 310 | { | ||
| 311 | return a - b; | ||
| 312 | } | ||
| 313 | |||
| 314 | /** | ||
| 315 | * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete | ||
| 316 | * | ||
| 317 | * @g - The GPU to wait on. | ||
| 318 | * | ||
| 319 | * Waits until all interrupt handlers that have been scheduled to run have | ||
| 320 | * completed. | ||
| 321 | */ | ||
| 322 | void nvgpu_wait_for_deferred_interrupts(struct gk20a *g) | ||
| 323 | { | ||
| 324 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 325 | int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count); | ||
| 326 | int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count); | ||
| 327 | |||
| 328 | /* wait until all stalling irqs are handled */ | ||
| 329 | NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq, | ||
| 330 | cyclic_delta(stall_irq_threshold, | ||
| 331 | atomic_read(&l->sw_irq_stall_last_handled)) | ||
| 332 | <= 0, 0); | ||
| 333 | |||
| 334 | /* wait until all non-stalling irqs are handled */ | ||
| 335 | NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq, | ||
| 336 | cyclic_delta(nonstall_irq_threshold, | ||
| 337 | atomic_read(&l->sw_irq_nonstall_last_handled)) | ||
| 338 | <= 0, 0); | ||
| 339 | } | ||
| 340 | |||
| 341 | static void nvgpu_free_gk20a(struct gk20a *g) | ||
| 342 | { | ||
| 343 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 344 | |||
| 345 | kfree(l); | ||
| 346 | } | ||
| 347 | |||
| 348 | void nvgpu_init_gk20a(struct gk20a *g) | ||
| 349 | { | ||
| 350 | g->free = nvgpu_free_gk20a; | ||
| 351 | } | ||
diff --git a/include/os/linux/driver_common.h b/include/os/linux/driver_common.h new file mode 100644 index 0000000..6f42f77 --- /dev/null +++ b/include/os/linux/driver_common.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef NVGPU_LINUX_DRIVER_COMMON | ||
| 18 | #define NVGPU_LINUX_DRIVER_COMMON | ||
| 19 | |||
| 20 | void nvgpu_init_gk20a(struct gk20a *g); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/dt.c b/include/os/linux/dt.c new file mode 100644 index 0000000..88e391e --- /dev/null +++ b/include/os/linux/dt.c | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/dt.h> | ||
| 18 | #include <linux/of.h> | ||
| 19 | |||
| 20 | #include "os_linux.h" | ||
| 21 | |||
| 22 | int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name, | ||
| 23 | u32 index, u32 *value) | ||
| 24 | { | ||
| 25 | struct device *dev = dev_from_gk20a(g); | ||
| 26 | struct device_node *np = dev->of_node; | ||
| 27 | |||
| 28 | return of_property_read_u32_index(np, name, index, value); | ||
| 29 | } | ||
diff --git a/include/os/linux/ecc_sysfs.c b/include/os/linux/ecc_sysfs.c new file mode 100644 index 0000000..73ae3dc --- /dev/null +++ b/include/os/linux/ecc_sysfs.c | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/ecc.h> | ||
| 18 | #include <nvgpu/gk20a.h> | ||
| 19 | |||
| 20 | #include "os_linux.h" | ||
| 21 | |||
| 22 | int nvgpu_ecc_sysfs_init(struct gk20a *g) | ||
| 23 | { | ||
| 24 | struct device *dev = dev_from_gk20a(g); | ||
| 25 | struct nvgpu_ecc *ecc = &g->ecc; | ||
| 26 | struct dev_ext_attribute *attr; | ||
| 27 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 28 | struct nvgpu_ecc_stat *stat; | ||
| 29 | int i = 0, err; | ||
| 30 | |||
| 31 | attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count); | ||
| 32 | if (!attr) | ||
| 33 | return -ENOMEM; | ||
| 34 | |||
| 35 | nvgpu_list_for_each_entry(stat, | ||
| 36 | &ecc->stats_list, nvgpu_ecc_stat, node) { | ||
| 37 | if (i >= ecc->stats_count) { | ||
| 38 | err = -EINVAL; | ||
| 39 | nvgpu_err(g, "stats_list longer than stats_count %d", | ||
| 40 | ecc->stats_count); | ||
| 41 | break; | ||
| 42 | } | ||
| 43 | sysfs_attr_init(&attr[i].attr.attr); | ||
| 44 | attr[i].attr.attr.name = stat->name; | ||
| 45 | attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); | ||
| 46 | attr[i].var = &stat->counter; | ||
| 47 | attr[i].attr.show = device_show_int; | ||
| 48 | err = device_create_file(dev, &attr[i].attr); | ||
| 49 | if (err) { | ||
| 50 | nvgpu_err(g, "sysfs node create failed for %s\n", | ||
| 51 | stat->name); | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | i++; | ||
| 55 | } | ||
| 56 | |||
| 57 | if (err) { | ||
| 58 | while (i-- > 0) | ||
| 59 | device_remove_file(dev, &attr[i].attr); | ||
| 60 | nvgpu_kfree(g, attr); | ||
| 61 | return err; | ||
| 62 | } | ||
| 63 | |||
| 64 | l->ecc_attrs = attr; | ||
| 65 | |||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | |||
| 69 | void nvgpu_ecc_sysfs_remove(struct gk20a *g) | ||
| 70 | { | ||
| 71 | struct device *dev = dev_from_gk20a(g); | ||
| 72 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 73 | struct nvgpu_ecc *ecc = &g->ecc; | ||
| 74 | int i; | ||
| 75 | |||
| 76 | for (i = 0; i < ecc->stats_count; i++) | ||
| 77 | device_remove_file(dev, &l->ecc_attrs[i].attr); | ||
| 78 | nvgpu_kfree(g, l->ecc_attrs); | ||
| 79 | l->ecc_attrs = NULL; | ||
| 80 | } | ||
diff --git a/include/os/linux/firmware.c b/include/os/linux/firmware.c new file mode 100644 index 0000000..8f0344b --- /dev/null +++ b/include/os/linux/firmware.c | |||
| @@ -0,0 +1,117 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/firmware.h> | ||
| 18 | |||
| 19 | #include <nvgpu/kmem.h> | ||
| 20 | #include <nvgpu/bug.h> | ||
| 21 | #include <nvgpu/firmware.h> | ||
| 22 | #include <nvgpu/gk20a.h> | ||
| 23 | |||
| 24 | #include "platform_gk20a.h" | ||
| 25 | #include "os_linux.h" | ||
| 26 | |||
| 27 | static const struct firmware *do_request_firmware(struct device *dev, | ||
| 28 | const char *prefix, const char *fw_name, int flags) | ||
| 29 | { | ||
| 30 | const struct firmware *fw; | ||
| 31 | char *fw_path = NULL; | ||
| 32 | int path_len, err; | ||
| 33 | |||
| 34 | if (prefix) { | ||
| 35 | path_len = strlen(prefix) + strlen(fw_name); | ||
| 36 | path_len += 2; /* for the path separator and zero terminator*/ | ||
| 37 | |||
| 38 | fw_path = nvgpu_kzalloc(get_gk20a(dev), | ||
| 39 | sizeof(*fw_path) * path_len); | ||
| 40 | if (!fw_path) | ||
| 41 | return NULL; | ||
| 42 | |||
| 43 | sprintf(fw_path, "%s/%s", prefix, fw_name); | ||
| 44 | fw_name = fw_path; | ||
| 45 | } | ||
| 46 | |||
| 47 | if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN) | ||
| 48 | err = request_firmware_direct(&fw, fw_name, dev); | ||
| 49 | else | ||
| 50 | err = request_firmware(&fw, fw_name, dev); | ||
| 51 | |||
| 52 | nvgpu_kfree(get_gk20a(dev), fw_path); | ||
| 53 | if (err) | ||
| 54 | return NULL; | ||
| 55 | return fw; | ||
| 56 | } | ||
| 57 | |||
| 58 | /* This is a simple wrapper around request_firmware that takes 'fw_name' and | ||
| 59 | * applies an IP specific relative path prefix to it. The caller is | ||
| 60 | * responsible for calling nvgpu_release_firmware later. */ | ||
| 61 | struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g, | ||
| 62 | const char *fw_name, | ||
| 63 | int flags) | ||
| 64 | { | ||
| 65 | struct device *dev = dev_from_gk20a(g); | ||
| 66 | struct nvgpu_firmware *fw; | ||
| 67 | const struct firmware *linux_fw; | ||
| 68 | |||
| 69 | /* current->fs is NULL when calling from SYS_EXIT. | ||
| 70 | Add a check here to prevent crash in request_firmware */ | ||
| 71 | if (!current->fs || !fw_name) | ||
| 72 | return NULL; | ||
| 73 | |||
| 74 | fw = nvgpu_kzalloc(g, sizeof(*fw)); | ||
| 75 | if (!fw) | ||
| 76 | return NULL; | ||
| 77 | |||
| 78 | linux_fw = do_request_firmware(dev, g->name, fw_name, flags); | ||
| 79 | |||
| 80 | #ifdef CONFIG_TEGRA_GK20A | ||
| 81 | /* TO BE REMOVED - Support loading from legacy SOC specific path. */ | ||
| 82 | if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) { | ||
| 83 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 84 | linux_fw = do_request_firmware(dev, | ||
| 85 | platform->soc_name, fw_name, flags); | ||
| 86 | } | ||
| 87 | #endif | ||
| 88 | |||
| 89 | if (!linux_fw) | ||
| 90 | goto err; | ||
| 91 | |||
| 92 | fw->data = nvgpu_kmalloc(g, linux_fw->size); | ||
| 93 | if (!fw->data) | ||
| 94 | goto err_release; | ||
| 95 | |||
| 96 | memcpy(fw->data, linux_fw->data, linux_fw->size); | ||
| 97 | fw->size = linux_fw->size; | ||
| 98 | |||
| 99 | release_firmware(linux_fw); | ||
| 100 | |||
| 101 | return fw; | ||
| 102 | |||
| 103 | err_release: | ||
| 104 | release_firmware(linux_fw); | ||
| 105 | err: | ||
| 106 | nvgpu_kfree(g, fw); | ||
| 107 | return NULL; | ||
| 108 | } | ||
| 109 | |||
| 110 | void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw) | ||
| 111 | { | ||
| 112 | if(!fw) | ||
| 113 | return; | ||
| 114 | |||
| 115 | nvgpu_kfree(g, fw->data); | ||
| 116 | nvgpu_kfree(g, fw); | ||
| 117 | } | ||
diff --git a/include/os/linux/fuse.c b/include/os/linux/fuse.c new file mode 100644 index 0000000..27851f9 --- /dev/null +++ b/include/os/linux/fuse.c | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <soc/tegra/fuse.h> | ||
| 15 | |||
| 16 | #include <nvgpu/fuse.h> | ||
| 17 | |||
| 18 | int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g) | ||
| 19 | { | ||
| 20 | return tegra_sku_info.gpu_speedo_id; | ||
| 21 | } | ||
| 22 | |||
| 23 | /* | ||
| 24 | * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100 | ||
| 25 | * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100 | ||
| 26 | */ | ||
| 27 | void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val) | ||
| 28 | { | ||
| 29 | tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0); | ||
| 30 | } | ||
| 31 | |||
| 32 | void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val) | ||
| 33 | { | ||
| 34 | tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0); | ||
| 35 | } | ||
| 36 | |||
| 37 | void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val) | ||
| 38 | { | ||
| 39 | tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0); | ||
| 40 | } | ||
| 41 | |||
| 42 | void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val) | ||
| 43 | { | ||
| 44 | tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0); | ||
| 45 | } | ||
| 46 | |||
| 47 | int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val) | ||
| 48 | { | ||
| 49 | return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val); | ||
| 50 | } | ||
| 51 | |||
| 52 | int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val) | ||
| 53 | { | ||
| 54 | return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val); | ||
| 55 | } | ||
diff --git a/include/os/linux/intr.c b/include/os/linux/intr.c new file mode 100644 index 0000000..8838b72 --- /dev/null +++ b/include/os/linux/intr.c | |||
| @@ -0,0 +1,136 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <trace/events/gk20a.h> | ||
| 15 | #include <linux/irqreturn.h> | ||
| 16 | |||
| 17 | #include <nvgpu/gk20a.h> | ||
| 18 | |||
| 19 | #include <nvgpu/atomic.h> | ||
| 20 | #include <nvgpu/unit.h> | ||
| 21 | #include "os_linux.h" | ||
| 22 | |||
| 23 | irqreturn_t nvgpu_intr_stall(struct gk20a *g) | ||
| 24 | { | ||
| 25 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 26 | u32 mc_intr_0; | ||
| 27 | |||
| 28 | trace_mc_gk20a_intr_stall(g->name); | ||
| 29 | |||
| 30 | if (!g->power_on) | ||
| 31 | return IRQ_NONE; | ||
| 32 | |||
| 33 | /* not from gpu when sharing irq with others */ | ||
| 34 | mc_intr_0 = g->ops.mc.intr_stall(g); | ||
| 35 | if (unlikely(!mc_intr_0)) | ||
| 36 | return IRQ_NONE; | ||
| 37 | |||
| 38 | g->ops.mc.intr_stall_pause(g); | ||
| 39 | |||
| 40 | atomic_inc(&l->hw_irq_stall_count); | ||
| 41 | |||
| 42 | trace_mc_gk20a_intr_stall_done(g->name); | ||
| 43 | |||
| 44 | return IRQ_WAKE_THREAD; | ||
| 45 | } | ||
| 46 | |||
| 47 | irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g) | ||
| 48 | { | ||
| 49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 50 | int hw_irq_count; | ||
| 51 | |||
| 52 | nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched"); | ||
| 53 | |||
| 54 | trace_mc_gk20a_intr_thread_stall(g->name); | ||
| 55 | |||
| 56 | hw_irq_count = atomic_read(&l->hw_irq_stall_count); | ||
| 57 | g->ops.mc.isr_stall(g); | ||
| 58 | g->ops.mc.intr_stall_resume(g); | ||
| 59 | /* sync handled irq counter before re-enabling interrupts */ | ||
| 60 | atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count); | ||
| 61 | |||
| 62 | nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq); | ||
| 63 | |||
| 64 | trace_mc_gk20a_intr_thread_stall_done(g->name); | ||
| 65 | |||
| 66 | return IRQ_HANDLED; | ||
| 67 | } | ||
| 68 | |||
| 69 | irqreturn_t nvgpu_intr_nonstall(struct gk20a *g) | ||
| 70 | { | ||
| 71 | u32 non_stall_intr_val; | ||
| 72 | u32 hw_irq_count; | ||
| 73 | int ops_old, ops_new, ops = 0; | ||
| 74 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 75 | |||
| 76 | if (!g->power_on) | ||
| 77 | return IRQ_NONE; | ||
| 78 | |||
| 79 | /* not from gpu when sharing irq with others */ | ||
| 80 | non_stall_intr_val = g->ops.mc.intr_nonstall(g); | ||
| 81 | if (unlikely(!non_stall_intr_val)) | ||
| 82 | return IRQ_NONE; | ||
| 83 | |||
| 84 | g->ops.mc.intr_nonstall_pause(g); | ||
| 85 | |||
| 86 | ops = g->ops.mc.isr_nonstall(g); | ||
| 87 | if (ops) { | ||
| 88 | do { | ||
| 89 | ops_old = atomic_read(&l->nonstall_ops); | ||
| 90 | ops_new = ops_old | ops; | ||
| 91 | } while (ops_old != atomic_cmpxchg(&l->nonstall_ops, | ||
| 92 | ops_old, ops_new)); | ||
| 93 | |||
| 94 | queue_work(l->nonstall_work_queue, &l->nonstall_fn_work); | ||
| 95 | } | ||
| 96 | |||
| 97 | hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count); | ||
| 98 | |||
| 99 | /* sync handled irq counter before re-enabling interrupts */ | ||
| 100 | atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count); | ||
| 101 | |||
| 102 | g->ops.mc.intr_nonstall_resume(g); | ||
| 103 | |||
| 104 | nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq); | ||
| 105 | |||
| 106 | return IRQ_HANDLED; | ||
| 107 | } | ||
| 108 | |||
| 109 | static void mc_gk20a_handle_intr_nonstall(struct gk20a *g, u32 ops) | ||
| 110 | { | ||
| 111 | bool semaphore_wakeup, post_events; | ||
| 112 | |||
| 113 | semaphore_wakeup = | ||
| 114 | (((ops & GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ? | ||
| 115 | true : false); | ||
| 116 | post_events = (((ops & GK20A_NONSTALL_OPS_POST_EVENTS) != 0U) ? | ||
| 117 | true: false); | ||
| 118 | |||
| 119 | if (semaphore_wakeup) { | ||
| 120 | g->ops.semaphore_wakeup(g, post_events); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | void nvgpu_intr_nonstall_cb(struct work_struct *work) | ||
| 125 | { | ||
| 126 | struct nvgpu_os_linux *l = | ||
| 127 | container_of(work, struct nvgpu_os_linux, nonstall_fn_work); | ||
| 128 | struct gk20a *g = &l->g; | ||
| 129 | |||
| 130 | do { | ||
| 131 | u32 ops; | ||
| 132 | |||
| 133 | ops = atomic_xchg(&l->nonstall_ops, 0); | ||
| 134 | mc_gk20a_handle_intr_nonstall(g, ops); | ||
| 135 | } while (atomic_read(&l->nonstall_ops) != 0); | ||
| 136 | } | ||
diff --git a/include/os/linux/intr.h b/include/os/linux/intr.h new file mode 100644 index 0000000..d43cdcc --- /dev/null +++ b/include/os/linux/intr.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #ifndef __NVGPU_LINUX_INTR_H__ | ||
| 15 | #define __NVGPU_LINUX_INTR_H__ | ||
| 16 | struct gk20a; | ||
| 17 | |||
| 18 | irqreturn_t nvgpu_intr_stall(struct gk20a *g); | ||
| 19 | irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g); | ||
| 20 | irqreturn_t nvgpu_intr_nonstall(struct gk20a *g); | ||
| 21 | void nvgpu_intr_nonstall_cb(struct work_struct *work); | ||
| 22 | #endif | ||
diff --git a/include/os/linux/io.c b/include/os/linux/io.c new file mode 100644 index 0000000..3e84e88 --- /dev/null +++ b/include/os/linux/io.c | |||
| @@ -0,0 +1,130 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <nvgpu/io.h> | ||
| 15 | #include <nvgpu/types.h> | ||
| 16 | #include <nvgpu/gk20a.h> | ||
| 17 | |||
| 18 | #include "os_linux.h" | ||
| 19 | |||
| 20 | void nvgpu_writel(struct gk20a *g, u32 r, u32 v) | ||
| 21 | { | ||
| 22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 23 | |||
| 24 | if (unlikely(!l->regs)) { | ||
| 25 | __gk20a_warn_on_no_regs(); | ||
| 26 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
| 27 | } else { | ||
| 28 | writel_relaxed(v, l->regs + r); | ||
| 29 | nvgpu_wmb(); | ||
| 30 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | void nvgpu_writel_relaxed(struct gk20a *g, u32 r, u32 v) | ||
| 35 | { | ||
| 36 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 37 | |||
| 38 | if (unlikely(!l->regs)) { | ||
| 39 | __gk20a_warn_on_no_regs(); | ||
| 40 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
| 41 | } else { | ||
| 42 | writel_relaxed(v, l->regs + r); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | u32 nvgpu_readl(struct gk20a *g, u32 r) | ||
| 47 | { | ||
| 48 | u32 v = __nvgpu_readl(g, r); | ||
| 49 | |||
| 50 | if (v == 0xffffffff) | ||
| 51 | __nvgpu_check_gpu_state(g); | ||
| 52 | |||
| 53 | return v; | ||
| 54 | } | ||
| 55 | |||
| 56 | u32 __nvgpu_readl(struct gk20a *g, u32 r) | ||
| 57 | { | ||
| 58 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 59 | u32 v = 0xffffffff; | ||
| 60 | |||
| 61 | if (unlikely(!l->regs)) { | ||
| 62 | __gk20a_warn_on_no_regs(); | ||
| 63 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
| 64 | } else { | ||
| 65 | v = readl(l->regs + r); | ||
| 66 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
| 67 | } | ||
| 68 | |||
| 69 | return v; | ||
| 70 | } | ||
| 71 | |||
| 72 | void nvgpu_writel_loop(struct gk20a *g, u32 r, u32 v) | ||
| 73 | { | ||
| 74 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 75 | |||
| 76 | if (unlikely(!l->regs)) { | ||
| 77 | __gk20a_warn_on_no_regs(); | ||
| 78 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
| 79 | } else { | ||
| 80 | nvgpu_wmb(); | ||
| 81 | do { | ||
| 82 | writel_relaxed(v, l->regs + r); | ||
| 83 | } while (readl(l->regs + r) != v); | ||
| 84 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v) | ||
| 89 | { | ||
| 90 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 91 | |||
| 92 | if (unlikely(!l->bar1)) { | ||
| 93 | __gk20a_warn_on_no_regs(); | ||
| 94 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | ||
| 95 | } else { | ||
| 96 | nvgpu_wmb(); | ||
| 97 | writel_relaxed(v, l->bar1 + b); | ||
| 98 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 nvgpu_bar1_readl(struct gk20a *g, u32 b) | ||
| 103 | { | ||
| 104 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 105 | u32 v = 0xffffffff; | ||
| 106 | |||
| 107 | if (unlikely(!l->bar1)) { | ||
| 108 | __gk20a_warn_on_no_regs(); | ||
| 109 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | ||
| 110 | } else { | ||
| 111 | v = readl(l->bar1 + b); | ||
| 112 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | ||
| 113 | } | ||
| 114 | |||
| 115 | return v; | ||
| 116 | } | ||
| 117 | |||
| 118 | bool nvgpu_io_exists(struct gk20a *g) | ||
| 119 | { | ||
| 120 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 121 | |||
| 122 | return l->regs != NULL; | ||
| 123 | } | ||
| 124 | |||
| 125 | bool nvgpu_io_valid_reg(struct gk20a *g, u32 r) | ||
| 126 | { | ||
| 127 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 128 | |||
| 129 | return r < resource_size(l->regs); | ||
| 130 | } | ||
diff --git a/include/os/linux/io_usermode.c b/include/os/linux/io_usermode.c new file mode 100644 index 0000000..ffc532f --- /dev/null +++ b/include/os/linux/io_usermode.c | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <nvgpu/io.h> | ||
| 15 | #include <nvgpu/types.h> | ||
| 16 | #include <nvgpu/gk20a.h> | ||
| 17 | |||
| 18 | #include "os_linux.h" | ||
| 19 | |||
| 20 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
| 21 | |||
| 22 | void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v) | ||
| 23 | { | ||
| 24 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 25 | void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r()); | ||
| 26 | |||
| 27 | writel_relaxed(v, reg); | ||
| 28 | nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v); | ||
| 29 | } | ||
diff --git a/include/os/linux/ioctl.c b/include/os/linux/ioctl.c new file mode 100644 index 0000000..a40df2a --- /dev/null +++ b/include/os/linux/ioctl.c | |||
| @@ -0,0 +1,297 @@ | |||
| 1 | /* | ||
| 2 | * NVGPU IOCTLs | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/file.h> | ||
| 20 | |||
| 21 | #include <nvgpu/nvgpu_common.h> | ||
| 22 | #include <nvgpu/ctxsw_trace.h> | ||
| 23 | #include <nvgpu/gk20a.h> | ||
| 24 | |||
| 25 | #include "gk20a/dbg_gpu_gk20a.h" | ||
| 26 | |||
| 27 | #include "ioctl_channel.h" | ||
| 28 | #include "ioctl_ctrl.h" | ||
| 29 | #include "ioctl_as.h" | ||
| 30 | #include "ioctl_tsg.h" | ||
| 31 | #include "ioctl_dbg.h" | ||
| 32 | #include "module.h" | ||
| 33 | #include "os_linux.h" | ||
| 34 | #include "ctxsw_trace.h" | ||
| 35 | #include "platform_gk20a.h" | ||
| 36 | |||
| 37 | #define GK20A_NUM_CDEVS 7 | ||
| 38 | |||
| 39 | const struct file_operations gk20a_channel_ops = { | ||
| 40 | .owner = THIS_MODULE, | ||
| 41 | .release = gk20a_channel_release, | ||
| 42 | .open = gk20a_channel_open, | ||
| 43 | #ifdef CONFIG_COMPAT | ||
| 44 | .compat_ioctl = gk20a_channel_ioctl, | ||
| 45 | #endif | ||
| 46 | .unlocked_ioctl = gk20a_channel_ioctl, | ||
| 47 | }; | ||
| 48 | |||
| 49 | static const struct file_operations gk20a_ctrl_ops = { | ||
| 50 | .owner = THIS_MODULE, | ||
| 51 | .release = gk20a_ctrl_dev_release, | ||
| 52 | .open = gk20a_ctrl_dev_open, | ||
| 53 | .unlocked_ioctl = gk20a_ctrl_dev_ioctl, | ||
| 54 | #ifdef CONFIG_COMPAT | ||
| 55 | .compat_ioctl = gk20a_ctrl_dev_ioctl, | ||
| 56 | #endif | ||
| 57 | .mmap = gk20a_ctrl_dev_mmap, | ||
| 58 | }; | ||
| 59 | |||
| 60 | static const struct file_operations gk20a_dbg_ops = { | ||
| 61 | .owner = THIS_MODULE, | ||
| 62 | .release = gk20a_dbg_gpu_dev_release, | ||
| 63 | .open = gk20a_dbg_gpu_dev_open, | ||
| 64 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
| 65 | .poll = gk20a_dbg_gpu_dev_poll, | ||
| 66 | #ifdef CONFIG_COMPAT | ||
| 67 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
| 68 | #endif | ||
| 69 | }; | ||
| 70 | |||
| 71 | static const struct file_operations gk20a_as_ops = { | ||
| 72 | .owner = THIS_MODULE, | ||
| 73 | .release = gk20a_as_dev_release, | ||
| 74 | .open = gk20a_as_dev_open, | ||
| 75 | #ifdef CONFIG_COMPAT | ||
| 76 | .compat_ioctl = gk20a_as_dev_ioctl, | ||
| 77 | #endif | ||
| 78 | .unlocked_ioctl = gk20a_as_dev_ioctl, | ||
| 79 | }; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * Note: We use a different 'open' to trigger handling of the profiler session. | ||
| 83 | * Most of the code is shared between them... Though, at some point if the | ||
| 84 | * code does get too tangled trying to handle each in the same path we can | ||
| 85 | * separate them cleanly. | ||
| 86 | */ | ||
| 87 | static const struct file_operations gk20a_prof_ops = { | ||
| 88 | .owner = THIS_MODULE, | ||
| 89 | .release = gk20a_dbg_gpu_dev_release, | ||
| 90 | .open = gk20a_prof_gpu_dev_open, | ||
| 91 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
| 92 | #ifdef CONFIG_COMPAT | ||
| 93 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
| 94 | #endif | ||
| 95 | }; | ||
| 96 | |||
| 97 | static const struct file_operations gk20a_tsg_ops = { | ||
| 98 | .owner = THIS_MODULE, | ||
| 99 | .release = nvgpu_ioctl_tsg_dev_release, | ||
| 100 | .open = nvgpu_ioctl_tsg_dev_open, | ||
| 101 | #ifdef CONFIG_COMPAT | ||
| 102 | .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl, | ||
| 103 | #endif | ||
| 104 | .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl, | ||
| 105 | }; | ||
| 106 | |||
| 107 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 108 | static const struct file_operations gk20a_ctxsw_ops = { | ||
| 109 | .owner = THIS_MODULE, | ||
| 110 | .release = gk20a_ctxsw_dev_release, | ||
| 111 | .open = gk20a_ctxsw_dev_open, | ||
| 112 | #ifdef CONFIG_COMPAT | ||
| 113 | .compat_ioctl = gk20a_ctxsw_dev_ioctl, | ||
| 114 | #endif | ||
| 115 | .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, | ||
| 116 | .poll = gk20a_ctxsw_dev_poll, | ||
| 117 | .read = gk20a_ctxsw_dev_read, | ||
| 118 | .mmap = gk20a_ctxsw_dev_mmap, | ||
| 119 | }; | ||
| 120 | #endif | ||
| 121 | |||
| 122 | static const struct file_operations gk20a_sched_ops = { | ||
| 123 | .owner = THIS_MODULE, | ||
| 124 | .release = gk20a_sched_dev_release, | ||
| 125 | .open = gk20a_sched_dev_open, | ||
| 126 | #ifdef CONFIG_COMPAT | ||
| 127 | .compat_ioctl = gk20a_sched_dev_ioctl, | ||
| 128 | #endif | ||
| 129 | .unlocked_ioctl = gk20a_sched_dev_ioctl, | ||
| 130 | .poll = gk20a_sched_dev_poll, | ||
| 131 | .read = gk20a_sched_dev_read, | ||
| 132 | }; | ||
| 133 | |||
| 134 | static int gk20a_create_device( | ||
| 135 | struct device *dev, int devno, | ||
| 136 | const char *interface_name, const char *cdev_name, | ||
| 137 | struct cdev *cdev, struct device **out, | ||
| 138 | const struct file_operations *ops, | ||
| 139 | struct class *class) | ||
| 140 | { | ||
| 141 | struct device *subdev; | ||
| 142 | int err; | ||
| 143 | struct gk20a *g = gk20a_from_dev(dev); | ||
| 144 | |||
| 145 | nvgpu_log_fn(g, " "); | ||
| 146 | |||
| 147 | cdev_init(cdev, ops); | ||
| 148 | cdev->owner = THIS_MODULE; | ||
| 149 | |||
| 150 | err = cdev_add(cdev, devno, 1); | ||
| 151 | if (err) { | ||
| 152 | dev_err(dev, "failed to add %s cdev\n", cdev_name); | ||
| 153 | return err; | ||
| 154 | } | ||
| 155 | |||
| 156 | subdev = device_create(class, NULL, devno, NULL, | ||
| 157 | interface_name, cdev_name); | ||
| 158 | |||
| 159 | if (IS_ERR(subdev)) { | ||
| 160 | err = PTR_ERR(dev); | ||
| 161 | cdev_del(cdev); | ||
| 162 | dev_err(dev, "failed to create %s device for %s\n", | ||
| 163 | cdev_name, dev_name(dev)); | ||
| 164 | return err; | ||
| 165 | } | ||
| 166 | |||
| 167 | *out = subdev; | ||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 171 | void gk20a_user_deinit(struct device *dev, struct class *class) | ||
| 172 | { | ||
| 173 | struct gk20a *g = gk20a_from_dev(dev); | ||
| 174 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 175 | |||
| 176 | if (l->channel.node) { | ||
| 177 | device_destroy(class, l->channel.cdev.dev); | ||
| 178 | cdev_del(&l->channel.cdev); | ||
| 179 | } | ||
| 180 | |||
| 181 | if (l->as_dev.node) { | ||
| 182 | device_destroy(class, l->as_dev.cdev.dev); | ||
| 183 | cdev_del(&l->as_dev.cdev); | ||
| 184 | } | ||
| 185 | |||
| 186 | if (l->ctrl.node) { | ||
| 187 | device_destroy(class, l->ctrl.cdev.dev); | ||
| 188 | cdev_del(&l->ctrl.cdev); | ||
| 189 | } | ||
| 190 | |||
| 191 | if (l->dbg.node) { | ||
| 192 | device_destroy(class, l->dbg.cdev.dev); | ||
| 193 | cdev_del(&l->dbg.cdev); | ||
| 194 | } | ||
| 195 | |||
| 196 | if (l->prof.node) { | ||
| 197 | device_destroy(class, l->prof.cdev.dev); | ||
| 198 | cdev_del(&l->prof.cdev); | ||
| 199 | } | ||
| 200 | |||
| 201 | if (l->tsg.node) { | ||
| 202 | device_destroy(class, l->tsg.cdev.dev); | ||
| 203 | cdev_del(&l->tsg.cdev); | ||
| 204 | } | ||
| 205 | |||
| 206 | if (l->ctxsw.node) { | ||
| 207 | device_destroy(class, l->ctxsw.cdev.dev); | ||
| 208 | cdev_del(&l->ctxsw.cdev); | ||
| 209 | } | ||
| 210 | |||
| 211 | if (l->sched.node) { | ||
| 212 | device_destroy(class, l->sched.cdev.dev); | ||
| 213 | cdev_del(&l->sched.cdev); | ||
| 214 | } | ||
| 215 | |||
| 216 | if (l->cdev_region) | ||
| 217 | unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS); | ||
| 218 | } | ||
| 219 | |||
| 220 | int gk20a_user_init(struct device *dev, const char *interface_name, | ||
| 221 | struct class *class) | ||
| 222 | { | ||
| 223 | int err; | ||
| 224 | dev_t devno; | ||
| 225 | struct gk20a *g = gk20a_from_dev(dev); | ||
| 226 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 227 | |||
| 228 | err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev)); | ||
| 229 | if (err) { | ||
| 230 | dev_err(dev, "failed to allocate devno\n"); | ||
| 231 | goto fail; | ||
| 232 | } | ||
| 233 | l->cdev_region = devno; | ||
| 234 | |||
| 235 | err = gk20a_create_device(dev, devno++, interface_name, "", | ||
| 236 | &l->channel.cdev, &l->channel.node, | ||
| 237 | &gk20a_channel_ops, | ||
| 238 | class); | ||
| 239 | if (err) | ||
| 240 | goto fail; | ||
| 241 | |||
| 242 | err = gk20a_create_device(dev, devno++, interface_name, "-as", | ||
| 243 | &l->as_dev.cdev, &l->as_dev.node, | ||
| 244 | &gk20a_as_ops, | ||
| 245 | class); | ||
| 246 | if (err) | ||
| 247 | goto fail; | ||
| 248 | |||
| 249 | err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", | ||
| 250 | &l->ctrl.cdev, &l->ctrl.node, | ||
| 251 | &gk20a_ctrl_ops, | ||
| 252 | class); | ||
| 253 | if (err) | ||
| 254 | goto fail; | ||
| 255 | |||
| 256 | err = gk20a_create_device(dev, devno++, interface_name, "-dbg", | ||
| 257 | &l->dbg.cdev, &l->dbg.node, | ||
| 258 | &gk20a_dbg_ops, | ||
| 259 | class); | ||
| 260 | if (err) | ||
| 261 | goto fail; | ||
| 262 | |||
| 263 | err = gk20a_create_device(dev, devno++, interface_name, "-prof", | ||
| 264 | &l->prof.cdev, &l->prof.node, | ||
| 265 | &gk20a_prof_ops, | ||
| 266 | class); | ||
| 267 | if (err) | ||
| 268 | goto fail; | ||
| 269 | |||
| 270 | err = gk20a_create_device(dev, devno++, interface_name, "-tsg", | ||
| 271 | &l->tsg.cdev, &l->tsg.node, | ||
| 272 | &gk20a_tsg_ops, | ||
| 273 | class); | ||
| 274 | if (err) | ||
| 275 | goto fail; | ||
| 276 | |||
| 277 | #if defined(CONFIG_GK20A_CTXSW_TRACE) | ||
| 278 | err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", | ||
| 279 | &l->ctxsw.cdev, &l->ctxsw.node, | ||
| 280 | &gk20a_ctxsw_ops, | ||
| 281 | class); | ||
| 282 | if (err) | ||
| 283 | goto fail; | ||
| 284 | #endif | ||
| 285 | |||
| 286 | err = gk20a_create_device(dev, devno++, interface_name, "-sched", | ||
| 287 | &l->sched.cdev, &l->sched.node, | ||
| 288 | &gk20a_sched_ops, | ||
| 289 | class); | ||
| 290 | if (err) | ||
| 291 | goto fail; | ||
| 292 | |||
| 293 | return 0; | ||
| 294 | fail: | ||
| 295 | gk20a_user_deinit(dev, &nvgpu_class); | ||
| 296 | return err; | ||
| 297 | } | ||
diff --git a/include/os/linux/ioctl.h b/include/os/linux/ioctl.h new file mode 100644 index 0000000..7bf1671 --- /dev/null +++ b/include/os/linux/ioctl.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | #ifndef __NVGPU_IOCTL_H__ | ||
| 14 | #define __NVGPU_IOCTL_H__ | ||
| 15 | |||
| 16 | struct device; | ||
| 17 | struct class; | ||
| 18 | |||
| 19 | int gk20a_user_init(struct device *dev, const char *interface_name, | ||
| 20 | struct class *class); | ||
| 21 | void gk20a_user_deinit(struct device *dev, struct class *class); | ||
| 22 | |||
| 23 | #endif | ||
diff --git a/include/os/linux/ioctl_as.c b/include/os/linux/ioctl_as.c new file mode 100644 index 0000000..f0cec17 --- /dev/null +++ b/include/os/linux/ioctl_as.c | |||
| @@ -0,0 +1,427 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Address Spaces | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <linux/cdev.h> | ||
| 17 | #include <linux/uaccess.h> | ||
| 18 | #include <linux/fs.h> | ||
| 19 | |||
| 20 | #include <trace/events/gk20a.h> | ||
| 21 | |||
| 22 | #include <uapi/linux/nvgpu.h> | ||
| 23 | |||
| 24 | #include <nvgpu/gmmu.h> | ||
| 25 | #include <nvgpu/vm_area.h> | ||
| 26 | #include <nvgpu/log2.h> | ||
| 27 | #include <nvgpu/gk20a.h> | ||
| 28 | #include <nvgpu/channel.h> | ||
| 29 | |||
| 30 | #include <nvgpu/linux/vm.h> | ||
| 31 | |||
| 32 | #include "platform_gk20a.h" | ||
| 33 | #include "ioctl_as.h" | ||
| 34 | #include "os_linux.h" | ||
| 35 | |||
| 36 | static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags) | ||
| 37 | { | ||
| 38 | u32 core_flags = 0; | ||
| 39 | |||
| 40 | if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | ||
| 41 | core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET; | ||
| 42 | if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) | ||
| 43 | core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE; | ||
| 44 | |||
| 45 | return core_flags; | ||
| 46 | } | ||
| 47 | |||
| 48 | static int gk20a_as_ioctl_bind_channel( | ||
| 49 | struct gk20a_as_share *as_share, | ||
| 50 | struct nvgpu_as_bind_channel_args *args) | ||
| 51 | { | ||
| 52 | int err = 0; | ||
| 53 | struct channel_gk20a *ch; | ||
| 54 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
| 55 | |||
| 56 | nvgpu_log_fn(g, " "); | ||
| 57 | |||
| 58 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
| 59 | if (!ch) | ||
| 60 | return -EINVAL; | ||
| 61 | |||
| 62 | if (gk20a_channel_as_bound(ch)) { | ||
| 63 | err = -EINVAL; | ||
| 64 | goto out; | ||
| 65 | } | ||
| 66 | |||
| 67 | /* this will set channel_gk20a->vm */ | ||
| 68 | err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch); | ||
| 69 | |||
| 70 | out: | ||
| 71 | gk20a_channel_put(ch); | ||
| 72 | return err; | ||
| 73 | } | ||
| 74 | |||
| 75 | static int gk20a_as_ioctl_alloc_space( | ||
| 76 | struct gk20a_as_share *as_share, | ||
| 77 | struct nvgpu_as_alloc_space_args *args) | ||
| 78 | { | ||
| 79 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
| 80 | |||
| 81 | nvgpu_log_fn(g, " "); | ||
| 82 | return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size, | ||
| 83 | &args->o_a.offset, | ||
| 84 | gk20a_as_translate_as_alloc_space_flags(g, | ||
| 85 | args->flags)); | ||
| 86 | } | ||
| 87 | |||
| 88 | static int gk20a_as_ioctl_free_space( | ||
| 89 | struct gk20a_as_share *as_share, | ||
| 90 | struct nvgpu_as_free_space_args *args) | ||
| 91 | { | ||
| 92 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
| 93 | |||
| 94 | nvgpu_log_fn(g, " "); | ||
| 95 | return nvgpu_vm_area_free(as_share->vm, args->offset); | ||
| 96 | } | ||
| 97 | |||
| 98 | static int gk20a_as_ioctl_map_buffer_ex( | ||
| 99 | struct gk20a_as_share *as_share, | ||
| 100 | struct nvgpu_as_map_buffer_ex_args *args) | ||
| 101 | { | ||
| 102 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
| 103 | |||
| 104 | nvgpu_log_fn(g, " "); | ||
| 105 | |||
| 106 | /* unsupported, direct kind control must be used */ | ||
| 107 | if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) { | ||
| 108 | struct gk20a *g = as_share->vm->mm->g; | ||
| 109 | nvgpu_log_info(g, "Direct kind control must be requested"); | ||
| 110 | return -EINVAL; | ||
| 111 | } | ||
| 112 | |||
| 113 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, | ||
| 114 | &args->offset, args->flags, | ||
| 115 | args->page_size, | ||
| 116 | args->compr_kind, | ||
| 117 | args->incompr_kind, | ||
| 118 | args->buffer_offset, | ||
| 119 | args->mapping_size, | ||
| 120 | NULL); | ||
| 121 | } | ||
| 122 | |||
| 123 | static int gk20a_as_ioctl_unmap_buffer( | ||
| 124 | struct gk20a_as_share *as_share, | ||
| 125 | struct nvgpu_as_unmap_buffer_args *args) | ||
| 126 | { | ||
| 127 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
| 128 | |||
| 129 | nvgpu_log_fn(g, " "); | ||
| 130 | |||
| 131 | nvgpu_vm_unmap(as_share->vm, args->offset, NULL); | ||
| 132 | |||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | static int gk20a_as_ioctl_map_buffer_batch( | ||
| 137 | struct gk20a_as_share *as_share, | ||
| 138 | struct nvgpu_as_map_buffer_batch_args *args) | ||
| 139 | { | ||
| 140 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
| 141 | u32 i; | ||
| 142 | int err = 0; | ||
| 143 | |||
| 144 | struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = | ||
| 145 | (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) | ||
| 146 | args->unmaps; | ||
| 147 | struct nvgpu_as_map_buffer_ex_args __user *user_map_args = | ||
| 148 | (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) | ||
| 149 | args->maps; | ||
| 150 | |||
| 151 | struct vm_gk20a_mapping_batch batch; | ||
| 152 | |||
| 153 | nvgpu_log_fn(g, " "); | ||
| 154 | |||
| 155 | if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT || | ||
| 156 | args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT) | ||
| 157 | return -EINVAL; | ||
| 158 | |||
| 159 | nvgpu_vm_mapping_batch_start(&batch); | ||
| 160 | |||
| 161 | for (i = 0; i < args->num_unmaps; ++i) { | ||
| 162 | struct nvgpu_as_unmap_buffer_args unmap_args; | ||
| 163 | |||
| 164 | if (copy_from_user(&unmap_args, &user_unmap_args[i], | ||
| 165 | sizeof(unmap_args))) { | ||
| 166 | err = -EFAULT; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | |||
| 170 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); | ||
| 171 | } | ||
| 172 | |||
| 173 | nvgpu_speculation_barrier(); | ||
| 174 | if (err) { | ||
| 175 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | ||
| 176 | |||
| 177 | args->num_unmaps = i; | ||
| 178 | args->num_maps = 0; | ||
| 179 | return err; | ||
| 180 | } | ||
| 181 | |||
| 182 | for (i = 0; i < args->num_maps; ++i) { | ||
| 183 | s16 compressible_kind; | ||
| 184 | s16 incompressible_kind; | ||
| 185 | |||
| 186 | struct nvgpu_as_map_buffer_ex_args map_args; | ||
| 187 | memset(&map_args, 0, sizeof(map_args)); | ||
| 188 | |||
| 189 | if (copy_from_user(&map_args, &user_map_args[i], | ||
| 190 | sizeof(map_args))) { | ||
| 191 | err = -EFAULT; | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | |||
| 195 | if (map_args.flags & | ||
| 196 | NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
| 197 | compressible_kind = map_args.compr_kind; | ||
| 198 | incompressible_kind = map_args.incompr_kind; | ||
| 199 | } else { | ||
| 200 | /* direct kind control must be used */ | ||
| 201 | err = -EINVAL; | ||
| 202 | break; | ||
| 203 | } | ||
| 204 | |||
| 205 | err = nvgpu_vm_map_buffer( | ||
| 206 | as_share->vm, map_args.dmabuf_fd, | ||
| 207 | &map_args.offset, map_args.flags, map_args.page_size, | ||
| 208 | compressible_kind, incompressible_kind, | ||
| 209 | map_args.buffer_offset, | ||
| 210 | map_args.mapping_size, | ||
| 211 | &batch); | ||
| 212 | if (err) | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | |||
| 216 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | ||
| 217 | |||
| 218 | if (err) | ||
| 219 | args->num_maps = i; | ||
| 220 | /* note: args->num_unmaps will be unmodified, which is ok | ||
| 221 | * since all unmaps are done */ | ||
| 222 | |||
| 223 | return err; | ||
| 224 | } | ||
| 225 | |||
| 226 | static int gk20a_as_ioctl_get_va_regions( | ||
| 227 | struct gk20a_as_share *as_share, | ||
| 228 | struct nvgpu_as_get_va_regions_args *args) | ||
| 229 | { | ||
| 230 | unsigned int i; | ||
| 231 | unsigned int write_entries; | ||
| 232 | struct nvgpu_as_va_region __user *user_region_ptr; | ||
| 233 | struct vm_gk20a *vm = as_share->vm; | ||
| 234 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 235 | unsigned int page_sizes = GMMU_PAGE_SIZE_KERNEL; | ||
| 236 | |||
| 237 | nvgpu_log_fn(g, " "); | ||
| 238 | |||
| 239 | if (!vm->big_pages) | ||
| 240 | page_sizes--; | ||
| 241 | |||
| 242 | write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); | ||
| 243 | if (write_entries > page_sizes) | ||
| 244 | write_entries = page_sizes; | ||
| 245 | |||
| 246 | user_region_ptr = | ||
| 247 | (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; | ||
| 248 | |||
| 249 | for (i = 0; i < write_entries; ++i) { | ||
| 250 | struct nvgpu_as_va_region region; | ||
| 251 | struct nvgpu_allocator *vma = vm->vma[i]; | ||
| 252 | |||
| 253 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | ||
| 254 | |||
| 255 | region.page_size = vm->gmmu_page_sizes[i]; | ||
| 256 | region.offset = nvgpu_alloc_base(vma); | ||
| 257 | /* No __aeabi_uldivmod() on some platforms... */ | ||
| 258 | region.pages = (nvgpu_alloc_end(vma) - | ||
| 259 | nvgpu_alloc_base(vma)) >> ilog2(region.page_size); | ||
| 260 | |||
| 261 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | ||
| 262 | return -EFAULT; | ||
| 263 | } | ||
| 264 | |||
| 265 | args->buf_size = | ||
| 266 | page_sizes * sizeof(struct nvgpu_as_va_region); | ||
| 267 | |||
| 268 | return 0; | ||
| 269 | } | ||
| 270 | |||
| 271 | static int nvgpu_as_ioctl_get_sync_ro_map( | ||
| 272 | struct gk20a_as_share *as_share, | ||
| 273 | struct nvgpu_as_get_sync_ro_map_args *args) | ||
| 274 | { | ||
| 275 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 276 | struct vm_gk20a *vm = as_share->vm; | ||
| 277 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 278 | u64 base_gpuva; | ||
| 279 | u32 sync_size; | ||
| 280 | int err = 0; | ||
| 281 | |||
| 282 | if (!g->ops.fifo.get_sync_ro_map) | ||
| 283 | return -EINVAL; | ||
| 284 | |||
| 285 | if (!nvgpu_has_syncpoints(g)) | ||
| 286 | return -EINVAL; | ||
| 287 | |||
| 288 | err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size); | ||
| 289 | if (err) | ||
| 290 | return err; | ||
| 291 | |||
| 292 | args->base_gpuva = base_gpuva; | ||
| 293 | args->sync_size = sync_size; | ||
| 294 | |||
| 295 | return err; | ||
| 296 | #else | ||
| 297 | return -EINVAL; | ||
| 298 | #endif | ||
| 299 | } | ||
| 300 | |||
| 301 | int gk20a_as_dev_open(struct inode *inode, struct file *filp) | ||
| 302 | { | ||
| 303 | struct nvgpu_os_linux *l; | ||
| 304 | struct gk20a_as_share *as_share; | ||
| 305 | struct gk20a *g; | ||
| 306 | int err; | ||
| 307 | |||
| 308 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev); | ||
| 309 | g = &l->g; | ||
| 310 | |||
| 311 | nvgpu_log_fn(g, " "); | ||
| 312 | |||
| 313 | err = gk20a_as_alloc_share(g, 0, 0, &as_share); | ||
| 314 | if (err) { | ||
| 315 | nvgpu_log_fn(g, "failed to alloc share"); | ||
| 316 | return err; | ||
| 317 | } | ||
| 318 | |||
| 319 | filp->private_data = as_share; | ||
| 320 | return 0; | ||
| 321 | } | ||
| 322 | |||
| 323 | int gk20a_as_dev_release(struct inode *inode, struct file *filp) | ||
| 324 | { | ||
| 325 | struct gk20a_as_share *as_share = filp->private_data; | ||
| 326 | |||
| 327 | if (!as_share) | ||
| 328 | return 0; | ||
| 329 | |||
| 330 | return gk20a_as_release_share(as_share); | ||
| 331 | } | ||
| 332 | |||
| 333 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
| 334 | { | ||
| 335 | int err = 0; | ||
| 336 | struct gk20a_as_share *as_share = filp->private_data; | ||
| 337 | struct gk20a *g = gk20a_from_as(as_share->as); | ||
| 338 | |||
| 339 | u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE]; | ||
| 340 | |||
| 341 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
| 342 | |||
| 343 | if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) || | ||
| 344 | (_IOC_NR(cmd) == 0) || | ||
| 345 | (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) || | ||
| 346 | (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE)) | ||
| 347 | return -EINVAL; | ||
| 348 | |||
| 349 | memset(buf, 0, sizeof(buf)); | ||
| 350 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 351 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
| 352 | return -EFAULT; | ||
| 353 | } | ||
| 354 | |||
| 355 | err = gk20a_busy(g); | ||
| 356 | if (err) | ||
| 357 | return err; | ||
| 358 | |||
| 359 | nvgpu_speculation_barrier(); | ||
| 360 | switch (cmd) { | ||
| 361 | case NVGPU_AS_IOCTL_BIND_CHANNEL: | ||
| 362 | trace_gk20a_as_ioctl_bind_channel(g->name); | ||
| 363 | err = gk20a_as_ioctl_bind_channel(as_share, | ||
| 364 | (struct nvgpu_as_bind_channel_args *)buf); | ||
| 365 | |||
| 366 | break; | ||
| 367 | case NVGPU32_AS_IOCTL_ALLOC_SPACE: | ||
| 368 | { | ||
| 369 | struct nvgpu32_as_alloc_space_args *args32 = | ||
| 370 | (struct nvgpu32_as_alloc_space_args *)buf; | ||
| 371 | struct nvgpu_as_alloc_space_args args; | ||
| 372 | |||
| 373 | args.pages = args32->pages; | ||
| 374 | args.page_size = args32->page_size; | ||
| 375 | args.flags = args32->flags; | ||
| 376 | args.o_a.offset = args32->o_a.offset; | ||
| 377 | trace_gk20a_as_ioctl_alloc_space(g->name); | ||
| 378 | err = gk20a_as_ioctl_alloc_space(as_share, &args); | ||
| 379 | args32->o_a.offset = args.o_a.offset; | ||
| 380 | break; | ||
| 381 | } | ||
| 382 | case NVGPU_AS_IOCTL_ALLOC_SPACE: | ||
| 383 | trace_gk20a_as_ioctl_alloc_space(g->name); | ||
| 384 | err = gk20a_as_ioctl_alloc_space(as_share, | ||
| 385 | (struct nvgpu_as_alloc_space_args *)buf); | ||
| 386 | break; | ||
| 387 | case NVGPU_AS_IOCTL_FREE_SPACE: | ||
| 388 | trace_gk20a_as_ioctl_free_space(g->name); | ||
| 389 | err = gk20a_as_ioctl_free_space(as_share, | ||
| 390 | (struct nvgpu_as_free_space_args *)buf); | ||
| 391 | break; | ||
| 392 | case NVGPU_AS_IOCTL_MAP_BUFFER_EX: | ||
| 393 | trace_gk20a_as_ioctl_map_buffer(g->name); | ||
| 394 | err = gk20a_as_ioctl_map_buffer_ex(as_share, | ||
| 395 | (struct nvgpu_as_map_buffer_ex_args *)buf); | ||
| 396 | break; | ||
| 397 | case NVGPU_AS_IOCTL_UNMAP_BUFFER: | ||
| 398 | trace_gk20a_as_ioctl_unmap_buffer(g->name); | ||
| 399 | err = gk20a_as_ioctl_unmap_buffer(as_share, | ||
| 400 | (struct nvgpu_as_unmap_buffer_args *)buf); | ||
| 401 | break; | ||
| 402 | case NVGPU_AS_IOCTL_GET_VA_REGIONS: | ||
| 403 | trace_gk20a_as_ioctl_get_va_regions(g->name); | ||
| 404 | err = gk20a_as_ioctl_get_va_regions(as_share, | ||
| 405 | (struct nvgpu_as_get_va_regions_args *)buf); | ||
| 406 | break; | ||
| 407 | case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: | ||
| 408 | err = gk20a_as_ioctl_map_buffer_batch(as_share, | ||
| 409 | (struct nvgpu_as_map_buffer_batch_args *)buf); | ||
| 410 | break; | ||
| 411 | case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP: | ||
| 412 | err = nvgpu_as_ioctl_get_sync_ro_map(as_share, | ||
| 413 | (struct nvgpu_as_get_sync_ro_map_args *)buf); | ||
| 414 | break; | ||
| 415 | default: | ||
| 416 | err = -ENOTTY; | ||
| 417 | break; | ||
| 418 | } | ||
| 419 | |||
| 420 | gk20a_idle(g); | ||
| 421 | |||
| 422 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 423 | if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) | ||
| 424 | err = -EFAULT; | ||
| 425 | |||
| 426 | return err; | ||
| 427 | } | ||
diff --git a/include/os/linux/ioctl_as.h b/include/os/linux/ioctl_as.h new file mode 100644 index 0000000..b3de378 --- /dev/null +++ b/include/os/linux/ioctl_as.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Address Spaces | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | */ | ||
| 15 | #ifndef __NVGPU_COMMON_LINUX_AS_H__ | ||
| 16 | #define __NVGPU_COMMON_LINUX_AS_H__ | ||
| 17 | |||
| 18 | struct inode; | ||
| 19 | struct file; | ||
| 20 | |||
| 21 | /* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and | ||
| 22 | * num_maps */ | ||
| 23 | #define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256 | ||
| 24 | |||
| 25 | /* struct file_operations driver interface */ | ||
| 26 | int gk20a_as_dev_open(struct inode *inode, struct file *filp); | ||
| 27 | int gk20a_as_dev_release(struct inode *inode, struct file *filp); | ||
| 28 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
| 29 | |||
| 30 | #endif | ||
diff --git a/include/os/linux/ioctl_channel.c b/include/os/linux/ioctl_channel.c new file mode 100644 index 0000000..0f39cc7 --- /dev/null +++ b/include/os/linux/ioctl_channel.c | |||
| @@ -0,0 +1,1388 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Graphics channel | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <trace/events/gk20a.h> | ||
| 20 | #include <linux/file.h> | ||
| 21 | #include <linux/anon_inodes.h> | ||
| 22 | #include <linux/dma-buf.h> | ||
| 23 | #include <linux/poll.h> | ||
| 24 | #include <uapi/linux/nvgpu.h> | ||
| 25 | |||
| 26 | #include <nvgpu/semaphore.h> | ||
| 27 | #include <nvgpu/timers.h> | ||
| 28 | #include <nvgpu/kmem.h> | ||
| 29 | #include <nvgpu/log.h> | ||
| 30 | #include <nvgpu/list.h> | ||
| 31 | #include <nvgpu/debug.h> | ||
| 32 | #include <nvgpu/enabled.h> | ||
| 33 | #include <nvgpu/error_notifier.h> | ||
| 34 | #include <nvgpu/barrier.h> | ||
| 35 | #include <nvgpu/nvhost.h> | ||
| 36 | #include <nvgpu/os_sched.h> | ||
| 37 | #include <nvgpu/gk20a.h> | ||
| 38 | #include <nvgpu/channel.h> | ||
| 39 | #include <nvgpu/channel_sync.h> | ||
| 40 | |||
| 41 | #include "gk20a/dbg_gpu_gk20a.h" | ||
| 42 | #include "gk20a/fence_gk20a.h" | ||
| 43 | |||
| 44 | #include "platform_gk20a.h" | ||
| 45 | #include "ioctl_channel.h" | ||
| 46 | #include "channel.h" | ||
| 47 | #include "os_linux.h" | ||
| 48 | #include "ctxsw_trace.h" | ||
| 49 | |||
| 50 | /* the minimal size of client buffer */ | ||
| 51 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | ||
| 52 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | ||
| 53 | sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) | ||
| 54 | |||
| 55 | static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) | ||
| 56 | { | ||
| 57 | switch (graphics_preempt_mode) { | ||
| 58 | case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: | ||
| 59 | return "WFI"; | ||
| 60 | default: | ||
| 61 | return "?"; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode) | ||
| 66 | { | ||
| 67 | switch (compute_preempt_mode) { | ||
| 68 | case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: | ||
| 69 | return "WFI"; | ||
| 70 | case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: | ||
| 71 | return "CTA"; | ||
| 72 | default: | ||
| 73 | return "?"; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | static void gk20a_channel_trace_sched_param( | ||
| 78 | void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice, | ||
| 79 | u32 timeout, const char *interleave, | ||
| 80 | const char *graphics_preempt_mode, | ||
| 81 | const char *compute_preempt_mode), | ||
| 82 | struct channel_gk20a *ch) | ||
| 83 | { | ||
| 84 | struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); | ||
| 85 | |||
| 86 | if (!tsg) | ||
| 87 | return; | ||
| 88 | |||
| 89 | (trace)(ch->chid, ch->tsgid, ch->pid, | ||
| 90 | tsg_gk20a_from_ch(ch)->timeslice_us, | ||
| 91 | ch->timeout_ms_max, | ||
| 92 | gk20a_fifo_interleave_level_name(tsg->interleave_level), | ||
| 93 | gr_gk20a_graphics_preempt_mode_name( | ||
| 94 | tsg->gr_ctx.graphics_preempt_mode), | ||
| 95 | gr_gk20a_compute_preempt_mode_name( | ||
| 96 | tsg->gr_ctx.compute_preempt_mode)); | ||
| 97 | } | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Although channels do have pointers back to the gk20a struct that they were | ||
| 101 | * created under in cases where the driver is killed that pointer can be bad. | ||
| 102 | * The channel memory can be freed before the release() function for a given | ||
| 103 | * channel is called. This happens when the driver dies and userspace doesn't | ||
| 104 | * get a chance to call release() until after the entire gk20a driver data is | ||
| 105 | * unloaded and freed. | ||
| 106 | */ | ||
| 107 | struct channel_priv { | ||
| 108 | struct gk20a *g; | ||
| 109 | struct channel_gk20a *c; | ||
| 110 | }; | ||
| 111 | |||
| 112 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
| 113 | |||
| 114 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) | ||
| 115 | { | ||
| 116 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 117 | |||
| 118 | /* disable existing cyclestats buffer */ | ||
| 119 | nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); | ||
| 120 | if (priv->cyclestate_buffer_handler) { | ||
| 121 | dma_buf_vunmap(priv->cyclestate_buffer_handler, | ||
| 122 | ch->cyclestate.cyclestate_buffer); | ||
| 123 | dma_buf_put(priv->cyclestate_buffer_handler); | ||
| 124 | priv->cyclestate_buffer_handler = NULL; | ||
| 125 | ch->cyclestate.cyclestate_buffer = NULL; | ||
| 126 | ch->cyclestate.cyclestate_buffer_size = 0; | ||
| 127 | } | ||
| 128 | nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); | ||
| 129 | } | ||
| 130 | |||
| 131 | int gk20a_channel_cycle_stats(struct channel_gk20a *ch, int dmabuf_fd) | ||
| 132 | { | ||
| 133 | struct dma_buf *dmabuf; | ||
| 134 | void *virtual_address; | ||
| 135 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 136 | |||
| 137 | /* is it allowed to handle calls for current GPU? */ | ||
| 138 | if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS)) | ||
| 139 | return -ENOSYS; | ||
| 140 | |||
| 141 | if (dmabuf_fd && !priv->cyclestate_buffer_handler) { | ||
| 142 | |||
| 143 | /* set up new cyclestats buffer */ | ||
| 144 | dmabuf = dma_buf_get(dmabuf_fd); | ||
| 145 | if (IS_ERR(dmabuf)) | ||
| 146 | return PTR_ERR(dmabuf); | ||
| 147 | virtual_address = dma_buf_vmap(dmabuf); | ||
| 148 | if (!virtual_address) | ||
| 149 | return -ENOMEM; | ||
| 150 | |||
| 151 | priv->cyclestate_buffer_handler = dmabuf; | ||
| 152 | ch->cyclestate.cyclestate_buffer = virtual_address; | ||
| 153 | ch->cyclestate.cyclestate_buffer_size = dmabuf->size; | ||
| 154 | return 0; | ||
| 155 | |||
| 156 | } else if (!dmabuf_fd && priv->cyclestate_buffer_handler) { | ||
| 157 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
| 158 | return 0; | ||
| 159 | |||
| 160 | } else if (!dmabuf_fd && !priv->cyclestate_buffer_handler) { | ||
| 161 | /* no request from GL */ | ||
| 162 | return 0; | ||
| 163 | |||
| 164 | } else { | ||
| 165 | pr_err("channel already has cyclestats buffer\n"); | ||
| 166 | return -EINVAL; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
| 171 | { | ||
| 172 | int ret; | ||
| 173 | |||
| 174 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
| 175 | if (ch->cs_client) | ||
| 176 | ret = gr_gk20a_css_flush(ch, ch->cs_client); | ||
| 177 | else | ||
| 178 | ret = -EBADF; | ||
| 179 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
| 180 | |||
| 181 | return ret; | ||
| 182 | } | ||
| 183 | |||
| 184 | int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
| 185 | u32 dmabuf_fd, | ||
| 186 | u32 perfmon_id_count, | ||
| 187 | u32 *perfmon_id_start) | ||
| 188 | { | ||
| 189 | int ret = 0; | ||
| 190 | struct gk20a *g = ch->g; | ||
| 191 | struct gk20a_cs_snapshot_client_linux *client_linux; | ||
| 192 | struct gk20a_cs_snapshot_client *client; | ||
| 193 | |||
| 194 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
| 195 | if (ch->cs_client) { | ||
| 196 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
| 197 | return -EEXIST; | ||
| 198 | } | ||
| 199 | |||
| 200 | client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); | ||
| 201 | if (!client_linux) { | ||
| 202 | ret = -ENOMEM; | ||
| 203 | goto err; | ||
| 204 | } | ||
| 205 | |||
| 206 | client_linux->dmabuf_fd = dmabuf_fd; | ||
| 207 | client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); | ||
| 208 | if (IS_ERR(client_linux->dma_handler)) { | ||
| 209 | ret = PTR_ERR(client_linux->dma_handler); | ||
| 210 | client_linux->dma_handler = NULL; | ||
| 211 | goto err_free; | ||
| 212 | } | ||
| 213 | |||
| 214 | client = &client_linux->cs_client; | ||
| 215 | client->snapshot_size = client_linux->dma_handler->size; | ||
| 216 | if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { | ||
| 217 | ret = -ENOMEM; | ||
| 218 | goto err_put; | ||
| 219 | } | ||
| 220 | |||
| 221 | client->snapshot = (struct gk20a_cs_snapshot_fifo *) | ||
| 222 | dma_buf_vmap(client_linux->dma_handler); | ||
| 223 | if (!client->snapshot) { | ||
| 224 | ret = -ENOMEM; | ||
| 225 | goto err_put; | ||
| 226 | } | ||
| 227 | |||
| 228 | ch->cs_client = client; | ||
| 229 | |||
| 230 | ret = gr_gk20a_css_attach(ch, | ||
| 231 | perfmon_id_count, | ||
| 232 | perfmon_id_start, | ||
| 233 | ch->cs_client); | ||
| 234 | |||
| 235 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
| 236 | |||
| 237 | return ret; | ||
| 238 | |||
| 239 | err_put: | ||
| 240 | dma_buf_put(client_linux->dma_handler); | ||
| 241 | err_free: | ||
| 242 | nvgpu_kfree(g, client_linux); | ||
| 243 | err: | ||
| 244 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
| 245 | return ret; | ||
| 246 | } | ||
| 247 | |||
| 248 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
| 249 | { | ||
| 250 | int ret; | ||
| 251 | struct gk20a_cs_snapshot_client_linux *client_linux; | ||
| 252 | |||
| 253 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
| 254 | if (!ch->cs_client) { | ||
| 255 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
| 256 | return 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | client_linux = container_of(ch->cs_client, | ||
| 260 | struct gk20a_cs_snapshot_client_linux, | ||
| 261 | cs_client); | ||
| 262 | |||
| 263 | ret = gr_gk20a_css_detach(ch, ch->cs_client); | ||
| 264 | |||
| 265 | if (client_linux->dma_handler) { | ||
| 266 | if (ch->cs_client->snapshot) | ||
| 267 | dma_buf_vunmap(client_linux->dma_handler, | ||
| 268 | ch->cs_client->snapshot); | ||
| 269 | dma_buf_put(client_linux->dma_handler); | ||
| 270 | } | ||
| 271 | |||
| 272 | ch->cs_client = NULL; | ||
| 273 | nvgpu_kfree(ch->g, client_linux); | ||
| 274 | |||
| 275 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
| 276 | |||
| 277 | return ret; | ||
| 278 | } | ||
| 279 | #endif | ||
| 280 | |||
| 281 | static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, | ||
| 282 | struct nvgpu_channel_wdt_args *args) | ||
| 283 | { | ||
| 284 | u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT | | ||
| 285 | NVGPU_IOCTL_CHANNEL_ENABLE_WDT); | ||
| 286 | |||
| 287 | if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT) | ||
| 288 | ch->timeout.enabled = false; | ||
| 289 | else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT) | ||
| 290 | ch->timeout.enabled = true; | ||
| 291 | else | ||
| 292 | return -EINVAL; | ||
| 293 | |||
| 294 | if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT) | ||
| 295 | ch->timeout.limit_ms = args->timeout_ms; | ||
| 296 | |||
| 297 | ch->timeout.debug_dump = (args->wdt_status & | ||
| 298 | NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0; | ||
| 299 | |||
| 300 | return 0; | ||
| 301 | } | ||
| 302 | |||
| 303 | static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) | ||
| 304 | { | ||
| 305 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 306 | |||
| 307 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
| 308 | if (priv->error_notifier.dmabuf) { | ||
| 309 | dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); | ||
| 310 | dma_buf_put(priv->error_notifier.dmabuf); | ||
| 311 | priv->error_notifier.dmabuf = NULL; | ||
| 312 | priv->error_notifier.notification = NULL; | ||
| 313 | priv->error_notifier.vaddr = NULL; | ||
| 314 | } | ||
| 315 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
| 316 | } | ||
| 317 | |||
| 318 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | ||
| 319 | struct nvgpu_set_error_notifier *args) | ||
| 320 | { | ||
| 321 | struct dma_buf *dmabuf; | ||
| 322 | void *va; | ||
| 323 | u64 end = args->offset + sizeof(struct nvgpu_notification); | ||
| 324 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 325 | |||
| 326 | if (!args->mem) { | ||
| 327 | pr_err("gk20a_init_error_notifier: invalid memory handle\n"); | ||
| 328 | return -EINVAL; | ||
| 329 | } | ||
| 330 | |||
| 331 | dmabuf = dma_buf_get(args->mem); | ||
| 332 | |||
| 333 | gk20a_channel_free_error_notifiers(ch); | ||
| 334 | |||
| 335 | if (IS_ERR(dmabuf)) { | ||
| 336 | pr_err("Invalid handle: %d\n", args->mem); | ||
| 337 | return -EINVAL; | ||
| 338 | } | ||
| 339 | |||
| 340 | if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { | ||
| 341 | dma_buf_put(dmabuf); | ||
| 342 | nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset"); | ||
| 343 | return -EINVAL; | ||
| 344 | } | ||
| 345 | |||
| 346 | nvgpu_speculation_barrier(); | ||
| 347 | |||
| 348 | /* map handle */ | ||
| 349 | va = dma_buf_vmap(dmabuf); | ||
| 350 | if (!va) { | ||
| 351 | dma_buf_put(dmabuf); | ||
| 352 | pr_err("Cannot map notifier handle\n"); | ||
| 353 | return -ENOMEM; | ||
| 354 | } | ||
| 355 | |||
| 356 | priv->error_notifier.notification = va + args->offset; | ||
| 357 | priv->error_notifier.vaddr = va; | ||
| 358 | memset(priv->error_notifier.notification, 0, | ||
| 359 | sizeof(struct nvgpu_notification)); | ||
| 360 | |||
| 361 | /* set channel notifiers pointer */ | ||
| 362 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
| 363 | priv->error_notifier.dmabuf = dmabuf; | ||
| 364 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
| 365 | |||
| 366 | return 0; | ||
| 367 | } | ||
| 368 | |||
| 369 | /* | ||
| 370 | * This returns the channel with a reference. The caller must | ||
| 371 | * gk20a_channel_put() the ref back after use. | ||
| 372 | * | ||
| 373 | * NULL is returned if the channel was not found. | ||
| 374 | */ | ||
| 375 | struct channel_gk20a *gk20a_get_channel_from_file(int fd) | ||
| 376 | { | ||
| 377 | struct channel_gk20a *ch; | ||
| 378 | struct channel_priv *priv; | ||
| 379 | struct file *f = fget(fd); | ||
| 380 | |||
| 381 | if (!f) | ||
| 382 | return NULL; | ||
| 383 | |||
| 384 | if (f->f_op != &gk20a_channel_ops) { | ||
| 385 | fput(f); | ||
| 386 | return NULL; | ||
| 387 | } | ||
| 388 | |||
| 389 | priv = (struct channel_priv *)f->private_data; | ||
| 390 | ch = gk20a_channel_get(priv->c); | ||
| 391 | fput(f); | ||
| 392 | return ch; | ||
| 393 | } | ||
| 394 | |||
| 395 | int gk20a_channel_release(struct inode *inode, struct file *filp) | ||
| 396 | { | ||
| 397 | struct channel_priv *priv = filp->private_data; | ||
| 398 | struct channel_gk20a *ch; | ||
| 399 | struct gk20a *g; | ||
| 400 | |||
| 401 | int err; | ||
| 402 | |||
| 403 | /* We could still end up here even if the channel_open failed, e.g. | ||
| 404 | * if we ran out of hw channel IDs. | ||
| 405 | */ | ||
| 406 | if (!priv) | ||
| 407 | return 0; | ||
| 408 | |||
| 409 | ch = priv->c; | ||
| 410 | g = priv->g; | ||
| 411 | |||
| 412 | err = gk20a_busy(g); | ||
| 413 | if (err) { | ||
| 414 | nvgpu_err(g, "failed to release a channel!"); | ||
| 415 | goto channel_release; | ||
| 416 | } | ||
| 417 | |||
| 418 | trace_gk20a_channel_release(dev_name(dev_from_gk20a(g))); | ||
| 419 | |||
| 420 | gk20a_channel_close(ch); | ||
| 421 | gk20a_channel_free_error_notifiers(ch); | ||
| 422 | |||
| 423 | gk20a_idle(g); | ||
| 424 | |||
| 425 | channel_release: | ||
| 426 | gk20a_put(g); | ||
| 427 | nvgpu_kfree(g, filp->private_data); | ||
| 428 | filp->private_data = NULL; | ||
| 429 | return 0; | ||
| 430 | } | ||
| 431 | |||
| 432 | /* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */ | ||
| 433 | static int __gk20a_channel_open(struct gk20a *g, | ||
| 434 | struct file *filp, s32 runlist_id) | ||
| 435 | { | ||
| 436 | int err; | ||
| 437 | struct channel_gk20a *ch; | ||
| 438 | struct channel_priv *priv; | ||
| 439 | |||
| 440 | nvgpu_log_fn(g, " "); | ||
| 441 | |||
| 442 | g = gk20a_get(g); | ||
| 443 | if (!g) | ||
| 444 | return -ENODEV; | ||
| 445 | |||
| 446 | trace_gk20a_channel_open(dev_name(dev_from_gk20a(g))); | ||
| 447 | |||
| 448 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
| 449 | if (!priv) { | ||
| 450 | err = -ENOMEM; | ||
| 451 | goto free_ref; | ||
| 452 | } | ||
| 453 | |||
| 454 | err = gk20a_busy(g); | ||
| 455 | if (err) { | ||
| 456 | nvgpu_err(g, "failed to power on, %d", err); | ||
| 457 | goto fail_busy; | ||
| 458 | } | ||
| 459 | /* All the user space channel should be non privilege */ | ||
| 460 | ch = gk20a_open_new_channel(g, runlist_id, false, | ||
| 461 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
| 462 | gk20a_idle(g); | ||
| 463 | if (!ch) { | ||
| 464 | nvgpu_err(g, | ||
| 465 | "failed to get f"); | ||
| 466 | err = -ENOMEM; | ||
| 467 | goto fail_busy; | ||
| 468 | } | ||
| 469 | |||
| 470 | gk20a_channel_trace_sched_param( | ||
| 471 | trace_gk20a_channel_sched_defaults, ch); | ||
| 472 | |||
| 473 | priv->g = g; | ||
| 474 | priv->c = ch; | ||
| 475 | |||
| 476 | filp->private_data = priv; | ||
| 477 | return 0; | ||
| 478 | |||
| 479 | fail_busy: | ||
| 480 | nvgpu_kfree(g, priv); | ||
| 481 | free_ref: | ||
| 482 | gk20a_put(g); | ||
| 483 | return err; | ||
| 484 | } | ||
| 485 | |||
| 486 | int gk20a_channel_open(struct inode *inode, struct file *filp) | ||
| 487 | { | ||
| 488 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
| 489 | struct nvgpu_os_linux, channel.cdev); | ||
| 490 | struct gk20a *g = &l->g; | ||
| 491 | int ret; | ||
| 492 | |||
| 493 | nvgpu_log_fn(g, "start"); | ||
| 494 | ret = __gk20a_channel_open(g, filp, -1); | ||
| 495 | |||
| 496 | nvgpu_log_fn(g, "end"); | ||
| 497 | return ret; | ||
| 498 | } | ||
| 499 | |||
| 500 | int gk20a_channel_open_ioctl(struct gk20a *g, | ||
| 501 | struct nvgpu_channel_open_args *args) | ||
| 502 | { | ||
| 503 | int err; | ||
| 504 | int fd; | ||
| 505 | struct file *file; | ||
| 506 | char name[64]; | ||
| 507 | s32 runlist_id = args->in.runlist_id; | ||
| 508 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 509 | |||
| 510 | err = get_unused_fd_flags(O_RDWR); | ||
| 511 | if (err < 0) | ||
| 512 | return err; | ||
| 513 | fd = err; | ||
| 514 | |||
| 515 | snprintf(name, sizeof(name), "nvhost-%s-fd%d", | ||
| 516 | dev_name(dev_from_gk20a(g)), fd); | ||
| 517 | |||
| 518 | file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR); | ||
| 519 | if (IS_ERR(file)) { | ||
| 520 | err = PTR_ERR(file); | ||
| 521 | goto clean_up; | ||
| 522 | } | ||
| 523 | |||
| 524 | err = __gk20a_channel_open(g, file, runlist_id); | ||
| 525 | if (err) | ||
| 526 | goto clean_up_file; | ||
| 527 | |||
| 528 | fd_install(fd, file); | ||
| 529 | args->out.channel_fd = fd; | ||
| 530 | return 0; | ||
| 531 | |||
| 532 | clean_up_file: | ||
| 533 | fput(file); | ||
| 534 | clean_up: | ||
| 535 | put_unused_fd(fd); | ||
| 536 | return err; | ||
| 537 | } | ||
| 538 | |||
| 539 | static u32 nvgpu_setup_bind_user_flags_to_common_flags(u32 user_flags) | ||
| 540 | { | ||
| 541 | u32 flags = 0; | ||
| 542 | |||
| 543 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_VPR_ENABLED) | ||
| 544 | flags |= NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR; | ||
| 545 | |||
| 546 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_DETERMINISTIC) | ||
| 547 | flags |= NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC; | ||
| 548 | |||
| 549 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) | ||
| 550 | flags |= NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE; | ||
| 551 | |||
| 552 | if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT) | ||
| 553 | flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT; | ||
| 554 | |||
| 555 | return flags; | ||
| 556 | } | ||
| 557 | |||
| 558 | static void nvgpu_get_setup_bind_args( | ||
| 559 | struct nvgpu_channel_setup_bind_args *channel_setup_bind_args, | ||
| 560 | struct nvgpu_setup_bind_args *setup_bind_args) | ||
| 561 | { | ||
| 562 | setup_bind_args->num_gpfifo_entries = | ||
| 563 | channel_setup_bind_args->num_gpfifo_entries; | ||
| 564 | setup_bind_args->num_inflight_jobs = | ||
| 565 | channel_setup_bind_args->num_inflight_jobs; | ||
| 566 | setup_bind_args->userd_dmabuf_fd = | ||
| 567 | channel_setup_bind_args->userd_dmabuf_fd; | ||
| 568 | setup_bind_args->userd_dmabuf_offset = | ||
| 569 | channel_setup_bind_args->userd_dmabuf_offset; | ||
| 570 | setup_bind_args->gpfifo_dmabuf_fd = | ||
| 571 | channel_setup_bind_args->gpfifo_dmabuf_fd; | ||
| 572 | setup_bind_args->gpfifo_dmabuf_offset = | ||
| 573 | channel_setup_bind_args->gpfifo_dmabuf_offset; | ||
| 574 | setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags( | ||
| 575 | channel_setup_bind_args->flags); | ||
| 576 | } | ||
| 577 | |||
| 578 | static void nvgpu_get_gpfifo_ex_args( | ||
| 579 | struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args, | ||
| 580 | struct nvgpu_setup_bind_args *setup_bind_args) | ||
| 581 | { | ||
| 582 | setup_bind_args->num_gpfifo_entries = alloc_gpfifo_ex_args->num_entries; | ||
| 583 | setup_bind_args->num_inflight_jobs = | ||
| 584 | alloc_gpfifo_ex_args->num_inflight_jobs; | ||
| 585 | setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags( | ||
| 586 | alloc_gpfifo_ex_args->flags); | ||
| 587 | } | ||
| 588 | |||
| 589 | static void nvgpu_get_gpfifo_args( | ||
| 590 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args, | ||
| 591 | struct nvgpu_setup_bind_args *setup_bind_args) | ||
| 592 | { | ||
| 593 | /* | ||
| 594 | * Kernel can insert one extra gpfifo entry before user | ||
| 595 | * submitted gpfifos and another one after, for internal usage. | ||
| 596 | * Triple the requested size. | ||
| 597 | */ | ||
| 598 | setup_bind_args->num_gpfifo_entries = | ||
| 599 | alloc_gpfifo_args->num_entries * 3; | ||
| 600 | setup_bind_args->num_inflight_jobs = 0; | ||
| 601 | setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags( | ||
| 602 | alloc_gpfifo_args->flags); | ||
| 603 | } | ||
| 604 | |||
| 605 | static void nvgpu_get_fence_args( | ||
| 606 | struct nvgpu_fence *fence_args_in, | ||
| 607 | struct nvgpu_channel_fence *fence_args_out) | ||
| 608 | { | ||
| 609 | fence_args_out->id = fence_args_in->id; | ||
| 610 | fence_args_out->value = fence_args_in->value; | ||
| 611 | } | ||
| 612 | |||
| 613 | static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, | ||
| 614 | ulong id, u32 offset, | ||
| 615 | u32 payload, u32 timeout) | ||
| 616 | { | ||
| 617 | struct dma_buf *dmabuf; | ||
| 618 | void *data; | ||
| 619 | u32 *semaphore; | ||
| 620 | int ret = 0; | ||
| 621 | |||
| 622 | /* do not wait if channel has timed out */ | ||
| 623 | if (gk20a_channel_check_timedout(ch)) { | ||
| 624 | return -ETIMEDOUT; | ||
| 625 | } | ||
| 626 | |||
| 627 | dmabuf = dma_buf_get(id); | ||
| 628 | if (IS_ERR(dmabuf)) { | ||
| 629 | nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id); | ||
| 630 | return -EINVAL; | ||
| 631 | } | ||
| 632 | |||
| 633 | data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); | ||
| 634 | if (!data) { | ||
| 635 | nvgpu_err(ch->g, "failed to map notifier memory"); | ||
| 636 | ret = -EINVAL; | ||
| 637 | goto cleanup_put; | ||
| 638 | } | ||
| 639 | |||
| 640 | semaphore = data + (offset & ~PAGE_MASK); | ||
| 641 | |||
| 642 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
| 643 | &ch->semaphore_wq, | ||
| 644 | *semaphore == payload || | ||
| 645 | gk20a_channel_check_timedout(ch), | ||
| 646 | timeout); | ||
| 647 | |||
| 648 | dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); | ||
| 649 | cleanup_put: | ||
| 650 | dma_buf_put(dmabuf); | ||
| 651 | return ret; | ||
| 652 | } | ||
| 653 | |||
| 654 | static int gk20a_channel_wait(struct channel_gk20a *ch, | ||
| 655 | struct nvgpu_wait_args *args) | ||
| 656 | { | ||
| 657 | struct dma_buf *dmabuf; | ||
| 658 | struct gk20a *g = ch->g; | ||
| 659 | struct notification *notif; | ||
| 660 | struct timespec tv; | ||
| 661 | u64 jiffies; | ||
| 662 | ulong id; | ||
| 663 | u32 offset; | ||
| 664 | int remain, ret = 0; | ||
| 665 | u64 end; | ||
| 666 | |||
| 667 | nvgpu_log_fn(g, " "); | ||
| 668 | |||
| 669 | if (gk20a_channel_check_timedout(ch)) { | ||
| 670 | return -ETIMEDOUT; | ||
| 671 | } | ||
| 672 | |||
| 673 | switch (args->type) { | ||
| 674 | case NVGPU_WAIT_TYPE_NOTIFIER: | ||
| 675 | id = args->condition.notifier.dmabuf_fd; | ||
| 676 | offset = args->condition.notifier.offset; | ||
| 677 | end = offset + sizeof(struct notification); | ||
| 678 | |||
| 679 | dmabuf = dma_buf_get(id); | ||
| 680 | if (IS_ERR(dmabuf)) { | ||
| 681 | nvgpu_err(g, "invalid notifier nvmap handle 0x%lx", | ||
| 682 | id); | ||
| 683 | return -EINVAL; | ||
| 684 | } | ||
| 685 | |||
| 686 | if (end > dmabuf->size || end < sizeof(struct notification)) { | ||
| 687 | dma_buf_put(dmabuf); | ||
| 688 | nvgpu_err(g, "invalid notifier offset"); | ||
| 689 | return -EINVAL; | ||
| 690 | } | ||
| 691 | |||
| 692 | nvgpu_speculation_barrier(); | ||
| 693 | |||
| 694 | notif = dma_buf_vmap(dmabuf); | ||
| 695 | if (!notif) { | ||
| 696 | nvgpu_err(g, "failed to map notifier memory"); | ||
| 697 | return -ENOMEM; | ||
| 698 | } | ||
| 699 | |||
| 700 | notif = (struct notification *)((uintptr_t)notif + offset); | ||
| 701 | |||
| 702 | /* user should set status pending before | ||
| 703 | * calling this ioctl */ | ||
| 704 | remain = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
| 705 | &ch->notifier_wq, | ||
| 706 | notif->status == 0 || | ||
| 707 | gk20a_channel_check_timedout(ch), | ||
| 708 | args->timeout); | ||
| 709 | |||
| 710 | if (remain == 0 && notif->status != 0) { | ||
| 711 | ret = -ETIMEDOUT; | ||
| 712 | goto notif_clean_up; | ||
| 713 | } else if (remain < 0) { | ||
| 714 | ret = -EINTR; | ||
| 715 | goto notif_clean_up; | ||
| 716 | } | ||
| 717 | |||
| 718 | /* TBD: fill in correct information */ | ||
| 719 | jiffies = get_jiffies_64(); | ||
| 720 | jiffies_to_timespec(jiffies, &tv); | ||
| 721 | notif->timestamp.nanoseconds[0] = tv.tv_nsec; | ||
| 722 | notif->timestamp.nanoseconds[1] = tv.tv_sec; | ||
| 723 | notif->info32 = 0xDEADBEEF; /* should be object name */ | ||
| 724 | notif->info16 = ch->chid; /* should be method offset */ | ||
| 725 | |||
| 726 | notif_clean_up: | ||
| 727 | dma_buf_vunmap(dmabuf, notif); | ||
| 728 | return ret; | ||
| 729 | |||
| 730 | case NVGPU_WAIT_TYPE_SEMAPHORE: | ||
| 731 | ret = gk20a_channel_wait_semaphore(ch, | ||
| 732 | args->condition.semaphore.dmabuf_fd, | ||
| 733 | args->condition.semaphore.offset, | ||
| 734 | args->condition.semaphore.payload, | ||
| 735 | args->timeout); | ||
| 736 | |||
| 737 | break; | ||
| 738 | |||
| 739 | default: | ||
| 740 | ret = -EINVAL; | ||
| 741 | break; | ||
| 742 | } | ||
| 743 | |||
| 744 | return ret; | ||
| 745 | } | ||
| 746 | |||
| 747 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | ||
| 748 | struct nvgpu_zcull_bind_args *args) | ||
| 749 | { | ||
| 750 | struct gk20a *g = ch->g; | ||
| 751 | struct gr_gk20a *gr = &g->gr; | ||
| 752 | |||
| 753 | nvgpu_log_fn(gr->g, " "); | ||
| 754 | |||
| 755 | return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, | ||
| 756 | args->gpu_va, args->mode); | ||
| 757 | } | ||
| 758 | |||
| 759 | static int gk20a_ioctl_channel_submit_gpfifo( | ||
| 760 | struct channel_gk20a *ch, | ||
| 761 | struct nvgpu_submit_gpfifo_args *args) | ||
| 762 | { | ||
| 763 | struct nvgpu_channel_fence fence; | ||
| 764 | struct gk20a_fence *fence_out; | ||
| 765 | struct fifo_profile_gk20a *profile = NULL; | ||
| 766 | u32 submit_flags = 0; | ||
| 767 | int fd = -1; | ||
| 768 | struct gk20a *g = ch->g; | ||
| 769 | struct nvgpu_gpfifo_userdata userdata; | ||
| 770 | |||
| 771 | int ret = 0; | ||
| 772 | nvgpu_log_fn(g, " "); | ||
| 773 | |||
| 774 | profile = gk20a_fifo_profile_acquire(ch->g); | ||
| 775 | gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); | ||
| 776 | |||
| 777 | if (gk20a_channel_check_timedout(ch)) { | ||
| 778 | return -ETIMEDOUT; | ||
| 779 | } | ||
| 780 | |||
| 781 | nvgpu_get_fence_args(&args->fence, &fence); | ||
| 782 | submit_flags = | ||
| 783 | nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); | ||
| 784 | |||
| 785 | /* Try and allocate an fd here*/ | ||
| 786 | if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
| 787 | && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { | ||
| 788 | fd = get_unused_fd_flags(O_RDWR); | ||
| 789 | if (fd < 0) | ||
| 790 | return fd; | ||
| 791 | } | ||
| 792 | |||
| 793 | userdata.entries = (struct nvgpu_gpfifo_entry __user *) | ||
| 794 | (uintptr_t)args->gpfifo; | ||
| 795 | userdata.context = NULL; | ||
| 796 | |||
| 797 | ret = nvgpu_submit_channel_gpfifo_user(ch, | ||
| 798 | userdata, args->num_entries, | ||
| 799 | submit_flags, &fence, &fence_out, profile); | ||
| 800 | |||
| 801 | if (ret) { | ||
| 802 | if (fd != -1) | ||
| 803 | put_unused_fd(fd); | ||
| 804 | goto clean_up; | ||
| 805 | } | ||
| 806 | |||
| 807 | /* Convert fence_out to something we can pass back to user space. */ | ||
| 808 | if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { | ||
| 809 | if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | ||
| 810 | ret = gk20a_fence_install_fd(fence_out, fd); | ||
| 811 | if (ret) | ||
| 812 | put_unused_fd(fd); | ||
| 813 | else | ||
| 814 | args->fence.id = fd; | ||
| 815 | } else { | ||
| 816 | args->fence.id = fence_out->syncpt_id; | ||
| 817 | args->fence.value = fence_out->syncpt_value; | ||
| 818 | } | ||
| 819 | } | ||
| 820 | gk20a_fence_put(fence_out); | ||
| 821 | |||
| 822 | gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT); | ||
| 823 | if (profile) | ||
| 824 | gk20a_fifo_profile_release(ch->g, profile); | ||
| 825 | |||
| 826 | clean_up: | ||
| 827 | return ret; | ||
| 828 | } | ||
| 829 | |||
| 830 | /* | ||
| 831 | * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_* | ||
| 832 | * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_* | ||
| 833 | */ | ||
| 834 | u32 nvgpu_get_common_runlist_level(u32 level) | ||
| 835 | { | ||
| 836 | nvgpu_speculation_barrier(); | ||
| 837 | switch (level) { | ||
| 838 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: | ||
| 839 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | ||
| 840 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | ||
| 841 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM; | ||
| 842 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: | ||
| 843 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH; | ||
| 844 | default: | ||
| 845 | pr_err("%s: incorrect runlist level\n", __func__); | ||
| 846 | } | ||
| 847 | |||
| 848 | return level; | ||
| 849 | } | ||
| 850 | |||
| 851 | static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags) | ||
| 852 | { | ||
| 853 | u32 flags = 0; | ||
| 854 | |||
| 855 | if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) | ||
| 856 | flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP; | ||
| 857 | |||
| 858 | if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) | ||
| 859 | flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP; | ||
| 860 | |||
| 861 | return flags; | ||
| 862 | } | ||
| 863 | |||
| 864 | static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch, | ||
| 865 | u32 class_num, u32 user_flags) | ||
| 866 | { | ||
| 867 | return ch->g->ops.gr.alloc_obj_ctx(ch, class_num, | ||
| 868 | nvgpu_obj_ctx_user_flags_to_common_flags(user_flags)); | ||
| 869 | } | ||
| 870 | |||
| 871 | /* | ||
| 872 | * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
| 873 | * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
| 874 | */ | ||
| 875 | u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags) | ||
| 876 | { | ||
| 877 | u32 flags = 0; | ||
| 878 | |||
| 879 | if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI) | ||
| 880 | flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
| 881 | if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) | ||
| 882 | flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
| 883 | |||
| 884 | return flags; | ||
| 885 | } | ||
| 886 | |||
| 887 | /* | ||
| 888 | * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
| 889 | * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
| 890 | */ | ||
| 891 | u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags) | ||
| 892 | { | ||
| 893 | u32 flags = 0; | ||
| 894 | |||
| 895 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI) | ||
| 896 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
| 897 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA) | ||
| 898 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
| 899 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP) | ||
| 900 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
| 901 | |||
| 902 | return flags; | ||
| 903 | } | ||
| 904 | |||
| 905 | /* | ||
| 906 | * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
| 907 | * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
| 908 | */ | ||
| 909 | u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode) | ||
| 910 | { | ||
| 911 | switch (graphics_preempt_mode) { | ||
| 912 | case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: | ||
| 913 | return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
| 914 | case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: | ||
| 915 | return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
| 916 | } | ||
| 917 | |||
| 918 | return graphics_preempt_mode; | ||
| 919 | } | ||
| 920 | |||
| 921 | /* | ||
| 922 | * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
| 923 | * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
| 924 | */ | ||
| 925 | u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) | ||
| 926 | { | ||
| 927 | switch (compute_preempt_mode) { | ||
| 928 | case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: | ||
| 929 | return NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
| 930 | case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: | ||
| 931 | return NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
| 932 | case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: | ||
| 933 | return NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
| 934 | } | ||
| 935 | |||
| 936 | return compute_preempt_mode; | ||
| 937 | } | ||
| 938 | |||
| 939 | /* | ||
| 940 | * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
| 941 | * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
| 942 | */ | ||
| 943 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | ||
| 944 | { | ||
| 945 | nvgpu_speculation_barrier(); | ||
| 946 | switch (graphics_preempt_mode) { | ||
| 947 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: | ||
| 948 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
| 949 | case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP: | ||
| 950 | return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; | ||
| 951 | } | ||
| 952 | |||
| 953 | return graphics_preempt_mode; | ||
| 954 | } | ||
| 955 | |||
| 956 | /* | ||
| 957 | * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
| 958 | * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
| 959 | */ | ||
| 960 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) | ||
| 961 | { | ||
| 962 | nvgpu_speculation_barrier(); | ||
| 963 | switch (compute_preempt_mode) { | ||
| 964 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | ||
| 965 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; | ||
| 966 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: | ||
| 967 | return NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
| 968 | case NVGPU_COMPUTE_PREEMPTION_MODE_CILP: | ||
| 969 | return NVGPU_PREEMPTION_MODE_COMPUTE_CILP; | ||
| 970 | } | ||
| 971 | |||
| 972 | return compute_preempt_mode; | ||
| 973 | } | ||
| 974 | |||
| 975 | static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, | ||
| 976 | u32 graphics_preempt_mode, u32 compute_preempt_mode) | ||
| 977 | { | ||
| 978 | int err; | ||
| 979 | |||
| 980 | if (ch->g->ops.gr.set_preemption_mode) { | ||
| 981 | err = gk20a_busy(ch->g); | ||
| 982 | if (err) { | ||
| 983 | nvgpu_err(ch->g, "failed to power on, %d", err); | ||
| 984 | return err; | ||
| 985 | } | ||
| 986 | err = ch->g->ops.gr.set_preemption_mode(ch, | ||
| 987 | nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode), | ||
| 988 | nvgpu_get_common_compute_preempt_mode(compute_preempt_mode)); | ||
| 989 | gk20a_idle(ch->g); | ||
| 990 | } else { | ||
| 991 | err = -EINVAL; | ||
| 992 | } | ||
| 993 | |||
| 994 | return err; | ||
| 995 | } | ||
| 996 | |||
| 997 | static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, | ||
| 998 | struct nvgpu_get_user_syncpoint_args *args) | ||
| 999 | { | ||
| 1000 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 1001 | struct gk20a *g = ch->g; | ||
| 1002 | int err; | ||
| 1003 | |||
| 1004 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { | ||
| 1005 | nvgpu_err(g, "user syncpoints not supported"); | ||
| 1006 | return -EINVAL; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | if (!nvgpu_has_syncpoints(g)) { | ||
| 1010 | nvgpu_err(g, "syncpoints not supported"); | ||
| 1011 | return -EINVAL; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | if (g->aggressive_sync_destroy_thresh) { | ||
| 1015 | nvgpu_err(g, "sufficient syncpoints not available"); | ||
| 1016 | return -EINVAL; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
| 1020 | if (ch->user_sync) { | ||
| 1021 | nvgpu_mutex_release(&ch->sync_lock); | ||
| 1022 | } else { | ||
| 1023 | ch->user_sync = nvgpu_channel_sync_create(ch, true); | ||
| 1024 | if (!ch->user_sync) { | ||
| 1025 | nvgpu_mutex_release(&ch->sync_lock); | ||
| 1026 | return -ENOMEM; | ||
| 1027 | } | ||
| 1028 | nvgpu_mutex_release(&ch->sync_lock); | ||
| 1029 | |||
| 1030 | if (g->ops.fifo.resetup_ramfc) { | ||
| 1031 | err = g->ops.fifo.resetup_ramfc(ch); | ||
| 1032 | if (err) | ||
| 1033 | return err; | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync); | ||
| 1038 | args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, | ||
| 1039 | args->syncpoint_id); | ||
| 1040 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) | ||
| 1041 | args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync); | ||
| 1042 | else | ||
| 1043 | args->gpu_va = 0; | ||
| 1044 | |||
| 1045 | return 0; | ||
| 1046 | #else | ||
| 1047 | return -EINVAL; | ||
| 1048 | #endif | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | long gk20a_channel_ioctl(struct file *filp, | ||
| 1052 | unsigned int cmd, unsigned long arg) | ||
| 1053 | { | ||
| 1054 | struct channel_priv *priv = filp->private_data; | ||
| 1055 | struct channel_gk20a *ch = priv->c; | ||
| 1056 | struct device *dev = dev_from_gk20a(ch->g); | ||
| 1057 | u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0}; | ||
| 1058 | int err = 0; | ||
| 1059 | struct gk20a *g = ch->g; | ||
| 1060 | |||
| 1061 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
| 1062 | |||
| 1063 | if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || | ||
| 1064 | (_IOC_NR(cmd) == 0) || | ||
| 1065 | (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || | ||
| 1066 | (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) | ||
| 1067 | return -EINVAL; | ||
| 1068 | |||
| 1069 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 1070 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
| 1071 | return -EFAULT; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | /* take a ref or return timeout if channel refs can't be taken */ | ||
| 1075 | ch = gk20a_channel_get(ch); | ||
| 1076 | if (!ch) | ||
| 1077 | return -ETIMEDOUT; | ||
| 1078 | |||
| 1079 | /* protect our sanity for threaded userspace - most of the channel is | ||
| 1080 | * not thread safe */ | ||
| 1081 | nvgpu_mutex_acquire(&ch->ioctl_lock); | ||
| 1082 | |||
| 1083 | /* this ioctl call keeps a ref to the file which keeps a ref to the | ||
| 1084 | * channel */ | ||
| 1085 | |||
| 1086 | nvgpu_speculation_barrier(); | ||
| 1087 | switch (cmd) { | ||
| 1088 | case NVGPU_IOCTL_CHANNEL_OPEN: | ||
| 1089 | err = gk20a_channel_open_ioctl(ch->g, | ||
| 1090 | (struct nvgpu_channel_open_args *)buf); | ||
| 1091 | break; | ||
| 1092 | case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: | ||
| 1093 | break; | ||
| 1094 | case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: | ||
| 1095 | { | ||
| 1096 | struct nvgpu_alloc_obj_ctx_args *args = | ||
| 1097 | (struct nvgpu_alloc_obj_ctx_args *)buf; | ||
| 1098 | |||
| 1099 | err = gk20a_busy(ch->g); | ||
| 1100 | if (err) { | ||
| 1101 | dev_err(dev, | ||
| 1102 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1103 | __func__, cmd); | ||
| 1104 | break; | ||
| 1105 | } | ||
| 1106 | err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags); | ||
| 1107 | gk20a_idle(ch->g); | ||
| 1108 | break; | ||
| 1109 | } | ||
| 1110 | case NVGPU_IOCTL_CHANNEL_SETUP_BIND: | ||
| 1111 | { | ||
| 1112 | struct nvgpu_channel_setup_bind_args *channel_setup_bind_args = | ||
| 1113 | (struct nvgpu_channel_setup_bind_args *)buf; | ||
| 1114 | struct nvgpu_setup_bind_args setup_bind_args; | ||
| 1115 | |||
| 1116 | nvgpu_get_setup_bind_args(channel_setup_bind_args, | ||
| 1117 | &setup_bind_args); | ||
| 1118 | |||
| 1119 | err = gk20a_busy(ch->g); | ||
| 1120 | if (err) { | ||
| 1121 | dev_err(dev, | ||
| 1122 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1123 | __func__, cmd); | ||
| 1124 | break; | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | if (!is_power_of_2(setup_bind_args.num_gpfifo_entries)) { | ||
| 1128 | err = -EINVAL; | ||
| 1129 | gk20a_idle(ch->g); | ||
| 1130 | break; | ||
| 1131 | } | ||
| 1132 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
| 1133 | channel_setup_bind_args->work_submit_token = | ||
| 1134 | setup_bind_args.work_submit_token; | ||
| 1135 | gk20a_idle(ch->g); | ||
| 1136 | break; | ||
| 1137 | } | ||
| 1138 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: | ||
| 1139 | { | ||
| 1140 | struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args = | ||
| 1141 | (struct nvgpu_alloc_gpfifo_ex_args *)buf; | ||
| 1142 | struct nvgpu_setup_bind_args setup_bind_args; | ||
| 1143 | |||
| 1144 | nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &setup_bind_args); | ||
| 1145 | |||
| 1146 | err = gk20a_busy(ch->g); | ||
| 1147 | if (err) { | ||
| 1148 | dev_err(dev, | ||
| 1149 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1150 | __func__, cmd); | ||
| 1151 | break; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { | ||
| 1155 | err = -EINVAL; | ||
| 1156 | gk20a_idle(ch->g); | ||
| 1157 | break; | ||
| 1158 | } | ||
| 1159 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
| 1160 | gk20a_idle(ch->g); | ||
| 1161 | break; | ||
| 1162 | } | ||
| 1163 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | ||
| 1164 | { | ||
| 1165 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = | ||
| 1166 | (struct nvgpu_alloc_gpfifo_args *)buf; | ||
| 1167 | struct nvgpu_setup_bind_args setup_bind_args; | ||
| 1168 | |||
| 1169 | nvgpu_get_gpfifo_args(alloc_gpfifo_args, &setup_bind_args); | ||
| 1170 | |||
| 1171 | err = gk20a_busy(ch->g); | ||
| 1172 | if (err) { | ||
| 1173 | dev_err(dev, | ||
| 1174 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1175 | __func__, cmd); | ||
| 1176 | break; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | err = nvgpu_channel_setup_bind(ch, &setup_bind_args); | ||
| 1180 | gk20a_idle(ch->g); | ||
| 1181 | break; | ||
| 1182 | } | ||
| 1183 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: | ||
| 1184 | err = gk20a_ioctl_channel_submit_gpfifo(ch, | ||
| 1185 | (struct nvgpu_submit_gpfifo_args *)buf); | ||
| 1186 | break; | ||
| 1187 | case NVGPU_IOCTL_CHANNEL_WAIT: | ||
| 1188 | err = gk20a_busy(ch->g); | ||
| 1189 | if (err) { | ||
| 1190 | dev_err(dev, | ||
| 1191 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1192 | __func__, cmd); | ||
| 1193 | break; | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | /* waiting is thread-safe, not dropping this mutex could | ||
| 1197 | * deadlock in certain conditions */ | ||
| 1198 | nvgpu_mutex_release(&ch->ioctl_lock); | ||
| 1199 | |||
| 1200 | err = gk20a_channel_wait(ch, | ||
| 1201 | (struct nvgpu_wait_args *)buf); | ||
| 1202 | |||
| 1203 | nvgpu_mutex_acquire(&ch->ioctl_lock); | ||
| 1204 | |||
| 1205 | gk20a_idle(ch->g); | ||
| 1206 | break; | ||
| 1207 | case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: | ||
| 1208 | err = gk20a_busy(ch->g); | ||
| 1209 | if (err) { | ||
| 1210 | dev_err(dev, | ||
| 1211 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1212 | __func__, cmd); | ||
| 1213 | break; | ||
| 1214 | } | ||
| 1215 | err = gk20a_channel_zcull_bind(ch, | ||
| 1216 | (struct nvgpu_zcull_bind_args *)buf); | ||
| 1217 | gk20a_idle(ch->g); | ||
| 1218 | break; | ||
| 1219 | case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: | ||
| 1220 | err = gk20a_busy(ch->g); | ||
| 1221 | if (err) { | ||
| 1222 | dev_err(dev, | ||
| 1223 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1224 | __func__, cmd); | ||
| 1225 | break; | ||
| 1226 | } | ||
| 1227 | err = gk20a_init_error_notifier(ch, | ||
| 1228 | (struct nvgpu_set_error_notifier *)buf); | ||
| 1229 | gk20a_idle(ch->g); | ||
| 1230 | break; | ||
| 1231 | case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: | ||
| 1232 | { | ||
| 1233 | u32 timeout = | ||
| 1234 | (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; | ||
| 1235 | nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
| 1236 | timeout, ch->chid); | ||
| 1237 | ch->timeout_ms_max = timeout; | ||
| 1238 | gk20a_channel_trace_sched_param( | ||
| 1239 | trace_gk20a_channel_set_timeout, ch); | ||
| 1240 | break; | ||
| 1241 | } | ||
| 1242 | case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: | ||
| 1243 | { | ||
| 1244 | u32 timeout = | ||
| 1245 | (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; | ||
| 1246 | bool timeout_debug_dump = !((u32) | ||
| 1247 | ((struct nvgpu_set_timeout_ex_args *)buf)->flags & | ||
| 1248 | (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); | ||
| 1249 | nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
| 1250 | timeout, ch->chid); | ||
| 1251 | ch->timeout_ms_max = timeout; | ||
| 1252 | ch->timeout_debug_dump = timeout_debug_dump; | ||
| 1253 | gk20a_channel_trace_sched_param( | ||
| 1254 | trace_gk20a_channel_set_timeout, ch); | ||
| 1255 | break; | ||
| 1256 | } | ||
| 1257 | case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: | ||
| 1258 | ((struct nvgpu_get_param_args *)buf)->value = | ||
| 1259 | gk20a_channel_check_timedout(ch); | ||
| 1260 | break; | ||
| 1261 | case NVGPU_IOCTL_CHANNEL_ENABLE: | ||
| 1262 | err = gk20a_busy(ch->g); | ||
| 1263 | if (err) { | ||
| 1264 | dev_err(dev, | ||
| 1265 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1266 | __func__, cmd); | ||
| 1267 | break; | ||
| 1268 | } | ||
| 1269 | if (ch->g->ops.fifo.enable_channel) | ||
| 1270 | ch->g->ops.fifo.enable_channel(ch); | ||
| 1271 | else | ||
| 1272 | err = -ENOSYS; | ||
| 1273 | gk20a_idle(ch->g); | ||
| 1274 | break; | ||
| 1275 | case NVGPU_IOCTL_CHANNEL_DISABLE: | ||
| 1276 | err = gk20a_busy(ch->g); | ||
| 1277 | if (err) { | ||
| 1278 | dev_err(dev, | ||
| 1279 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1280 | __func__, cmd); | ||
| 1281 | break; | ||
| 1282 | } | ||
| 1283 | if (ch->g->ops.fifo.disable_channel) | ||
| 1284 | ch->g->ops.fifo.disable_channel(ch); | ||
| 1285 | else | ||
| 1286 | err = -ENOSYS; | ||
| 1287 | gk20a_idle(ch->g); | ||
| 1288 | break; | ||
| 1289 | case NVGPU_IOCTL_CHANNEL_PREEMPT: | ||
| 1290 | err = gk20a_busy(ch->g); | ||
| 1291 | if (err) { | ||
| 1292 | dev_err(dev, | ||
| 1293 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1294 | __func__, cmd); | ||
| 1295 | break; | ||
| 1296 | } | ||
| 1297 | err = gk20a_fifo_preempt(ch->g, ch); | ||
| 1298 | gk20a_idle(ch->g); | ||
| 1299 | break; | ||
| 1300 | case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: | ||
| 1301 | if (!capable(CAP_SYS_NICE)) { | ||
| 1302 | err = -EPERM; | ||
| 1303 | break; | ||
| 1304 | } | ||
| 1305 | if (!ch->g->ops.fifo.reschedule_runlist) { | ||
| 1306 | err = -ENOSYS; | ||
| 1307 | break; | ||
| 1308 | } | ||
| 1309 | err = gk20a_busy(ch->g); | ||
| 1310 | if (err) { | ||
| 1311 | dev_err(dev, | ||
| 1312 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1313 | __func__, cmd); | ||
| 1314 | break; | ||
| 1315 | } | ||
| 1316 | err = ch->g->ops.fifo.reschedule_runlist(ch, | ||
| 1317 | NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & | ||
| 1318 | ((struct nvgpu_reschedule_runlist_args *)buf)->flags); | ||
| 1319 | gk20a_idle(ch->g); | ||
| 1320 | break; | ||
| 1321 | case NVGPU_IOCTL_CHANNEL_FORCE_RESET: | ||
| 1322 | err = gk20a_busy(ch->g); | ||
| 1323 | if (err) { | ||
| 1324 | dev_err(dev, | ||
| 1325 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1326 | __func__, cmd); | ||
| 1327 | break; | ||
| 1328 | } | ||
| 1329 | err = ch->g->ops.fifo.force_reset_ch(ch, | ||
| 1330 | NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); | ||
| 1331 | gk20a_idle(ch->g); | ||
| 1332 | break; | ||
| 1333 | case NVGPU_IOCTL_CHANNEL_WDT: | ||
| 1334 | err = gk20a_channel_set_wdt_status(ch, | ||
| 1335 | (struct nvgpu_channel_wdt_args *)buf); | ||
| 1336 | break; | ||
| 1337 | case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE: | ||
| 1338 | err = nvgpu_ioctl_channel_set_preemption_mode(ch, | ||
| 1339 | ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode, | ||
| 1340 | ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode); | ||
| 1341 | break; | ||
| 1342 | case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: | ||
| 1343 | if (ch->g->ops.gr.set_boosted_ctx) { | ||
| 1344 | bool boost = | ||
| 1345 | ((struct nvgpu_boosted_ctx_args *)buf)->boost; | ||
| 1346 | |||
| 1347 | err = gk20a_busy(ch->g); | ||
| 1348 | if (err) { | ||
| 1349 | dev_err(dev, | ||
| 1350 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1351 | __func__, cmd); | ||
| 1352 | break; | ||
| 1353 | } | ||
| 1354 | err = ch->g->ops.gr.set_boosted_ctx(ch, boost); | ||
| 1355 | gk20a_idle(ch->g); | ||
| 1356 | } else { | ||
| 1357 | err = -EINVAL; | ||
| 1358 | } | ||
| 1359 | break; | ||
| 1360 | case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: | ||
| 1361 | err = gk20a_busy(ch->g); | ||
| 1362 | if (err) { | ||
| 1363 | dev_err(dev, | ||
| 1364 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
| 1365 | __func__, cmd); | ||
| 1366 | break; | ||
| 1367 | } | ||
| 1368 | err = nvgpu_ioctl_channel_get_user_syncpoint(ch, | ||
| 1369 | (struct nvgpu_get_user_syncpoint_args *)buf); | ||
| 1370 | gk20a_idle(ch->g); | ||
| 1371 | break; | ||
| 1372 | default: | ||
| 1373 | dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); | ||
| 1374 | err = -ENOTTY; | ||
| 1375 | break; | ||
| 1376 | } | ||
| 1377 | |||
| 1378 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 1379 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
| 1380 | |||
| 1381 | nvgpu_mutex_release(&ch->ioctl_lock); | ||
| 1382 | |||
| 1383 | gk20a_channel_put(ch); | ||
| 1384 | |||
| 1385 | nvgpu_log_fn(g, "end"); | ||
| 1386 | |||
| 1387 | return err; | ||
| 1388 | } | ||
diff --git a/include/os/linux/ioctl_channel.h b/include/os/linux/ioctl_channel.h new file mode 100644 index 0000000..3e80289 --- /dev/null +++ b/include/os/linux/ioctl_channel.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | #ifndef __NVGPU_IOCTL_CHANNEL_H__ | ||
| 14 | #define __NVGPU_IOCTL_CHANNEL_H__ | ||
| 15 | |||
| 16 | #include <linux/fs.h> | ||
| 17 | |||
| 18 | #include "gk20a/css_gr_gk20a.h" | ||
| 19 | |||
| 20 | struct inode; | ||
| 21 | struct file; | ||
| 22 | struct gk20a; | ||
| 23 | struct nvgpu_channel_open_args; | ||
| 24 | |||
| 25 | struct gk20a_cs_snapshot_client_linux { | ||
| 26 | struct gk20a_cs_snapshot_client cs_client; | ||
| 27 | |||
| 28 | u32 dmabuf_fd; | ||
| 29 | struct dma_buf *dma_handler; | ||
| 30 | }; | ||
| 31 | |||
| 32 | int gk20a_channel_open(struct inode *inode, struct file *filp); | ||
| 33 | int gk20a_channel_release(struct inode *inode, struct file *filp); | ||
| 34 | long gk20a_channel_ioctl(struct file *filp, | ||
| 35 | unsigned int cmd, unsigned long arg); | ||
| 36 | int gk20a_channel_open_ioctl(struct gk20a *g, | ||
| 37 | struct nvgpu_channel_open_args *args); | ||
| 38 | |||
| 39 | int gk20a_channel_cycle_stats(struct channel_gk20a *ch, int dmabuf_fd); | ||
| 40 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); | ||
| 41 | |||
| 42 | int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
| 43 | u32 dmabuf_fd, | ||
| 44 | u32 perfmon_id_count, | ||
| 45 | u32 *perfmon_id_start); | ||
| 46 | int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch); | ||
| 47 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); | ||
| 48 | |||
| 49 | extern const struct file_operations gk20a_channel_ops; | ||
| 50 | |||
| 51 | u32 nvgpu_get_common_runlist_level(u32 level); | ||
| 52 | |||
| 53 | u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags); | ||
| 54 | u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags); | ||
| 55 | u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode); | ||
| 56 | u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode); | ||
| 57 | #endif | ||
diff --git a/include/os/linux/ioctl_clk_arb.c b/include/os/linux/ioctl_clk_arb.c new file mode 100644 index 0000000..477222d --- /dev/null +++ b/include/os/linux/ioctl_clk_arb.c | |||
| @@ -0,0 +1,574 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/cdev.h> | ||
| 18 | #include <linux/file.h> | ||
| 19 | #include <linux/anon_inodes.h> | ||
| 20 | #include <linux/uaccess.h> | ||
| 21 | #include <linux/poll.h> | ||
| 22 | #ifdef CONFIG_DEBUG_FS | ||
| 23 | #include <linux/debugfs.h> | ||
| 24 | #endif | ||
| 25 | #include <uapi/linux/nvgpu.h> | ||
| 26 | |||
| 27 | #include <nvgpu/bitops.h> | ||
| 28 | #include <nvgpu/lock.h> | ||
| 29 | #include <nvgpu/kmem.h> | ||
| 30 | #include <nvgpu/atomic.h> | ||
| 31 | #include <nvgpu/bug.h> | ||
| 32 | #include <nvgpu/kref.h> | ||
| 33 | #include <nvgpu/log.h> | ||
| 34 | #include <nvgpu/barrier.h> | ||
| 35 | #include <nvgpu/cond.h> | ||
| 36 | #include <nvgpu/list.h> | ||
| 37 | #include <nvgpu/clk_arb.h> | ||
| 38 | #include <nvgpu/gk20a.h> | ||
| 39 | |||
| 40 | #include "clk/clk.h" | ||
| 41 | #include "pstate/pstate.h" | ||
| 42 | #include "lpwr/lpwr.h" | ||
| 43 | #include "volt/volt.h" | ||
| 44 | |||
| 45 | #ifdef CONFIG_DEBUG_FS | ||
| 46 | #include "os_linux.h" | ||
| 47 | #endif | ||
| 48 | |||
| 49 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | ||
| 50 | struct file *filp) | ||
| 51 | { | ||
| 52 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
| 53 | struct nvgpu_clk_session *session = dev->session; | ||
| 54 | |||
| 55 | |||
| 56 | clk_arb_dbg(session->g, " "); | ||
| 57 | |||
| 58 | /* This is done to account for the extra refcount taken in | ||
| 59 | * nvgpu_clk_arb_commit_request_fd without events support in iGPU | ||
| 60 | */ | ||
| 61 | if (!session->g->clk_arb->clk_arb_events_supported) { | ||
| 62 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
| 63 | } | ||
| 64 | |||
| 65 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
| 66 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
| 67 | return 0; | ||
| 68 | } | ||
| 69 | |||
| 70 | static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask) | ||
| 71 | { | ||
| 72 | unsigned int poll_mask = 0; | ||
| 73 | |||
| 74 | if (nvgpu_poll_mask & NVGPU_POLLIN) | ||
| 75 | poll_mask |= POLLIN; | ||
| 76 | if (nvgpu_poll_mask & NVGPU_POLLPRI) | ||
| 77 | poll_mask |= POLLPRI; | ||
| 78 | if (nvgpu_poll_mask & NVGPU_POLLOUT) | ||
| 79 | poll_mask |= POLLOUT; | ||
| 80 | if (nvgpu_poll_mask & NVGPU_POLLRDNORM) | ||
| 81 | poll_mask |= POLLRDNORM; | ||
| 82 | if (nvgpu_poll_mask & NVGPU_POLLHUP) | ||
| 83 | poll_mask |= POLLHUP; | ||
| 84 | |||
| 85 | return poll_mask; | ||
| 86 | } | ||
| 87 | |||
| 88 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) | ||
| 89 | { | ||
| 90 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
| 91 | |||
| 92 | clk_arb_dbg(dev->session->g, " "); | ||
| 93 | |||
| 94 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
| 95 | return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0)); | ||
| 96 | } | ||
| 97 | |||
| 98 | void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev) | ||
| 99 | { | ||
| 100 | nvgpu_cond_broadcast_interruptible(&dev->readout_wq); | ||
| 101 | } | ||
| 102 | |||
| 103 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | ||
| 104 | struct file *filp) | ||
| 105 | { | ||
| 106 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
| 107 | struct nvgpu_clk_session *session = dev->session; | ||
| 108 | struct nvgpu_clk_arb *arb; | ||
| 109 | |||
| 110 | arb = session->g->clk_arb; | ||
| 111 | |||
| 112 | clk_arb_dbg(session->g, " "); | ||
| 113 | |||
| 114 | if (arb) { | ||
| 115 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
| 116 | nvgpu_list_del(&dev->link); | ||
| 117 | nvgpu_spinlock_release(&arb->users_lock); | ||
| 118 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); | ||
| 119 | } | ||
| 120 | |||
| 121 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
| 122 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
| 123 | |||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event) | ||
| 128 | { | ||
| 129 | u32 nvgpu_gpu_event; | ||
| 130 | |||
| 131 | switch (nvgpu_event) { | ||
| 132 | case NVGPU_EVENT_VF_UPDATE: | ||
| 133 | nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE; | ||
| 134 | break; | ||
| 135 | case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE: | ||
| 136 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE; | ||
| 137 | break; | ||
| 138 | case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE: | ||
| 139 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE; | ||
| 140 | break; | ||
| 141 | case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED: | ||
| 142 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED; | ||
| 143 | break; | ||
| 144 | case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED: | ||
| 145 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED; | ||
| 146 | break; | ||
| 147 | case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD: | ||
| 148 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD; | ||
| 149 | break; | ||
| 150 | case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD: | ||
| 151 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD; | ||
| 152 | break; | ||
| 153 | case NVGPU_EVENT_ALARM_GPU_LOST: | ||
| 154 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST; | ||
| 155 | break; | ||
| 156 | default: | ||
| 157 | /* Control shouldn't come here */ | ||
| 158 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1; | ||
| 159 | break; | ||
| 160 | } | ||
| 161 | return nvgpu_gpu_event; | ||
| 162 | } | ||
| 163 | |||
| 164 | static inline u32 __pending_event(struct nvgpu_clk_dev *dev, | ||
| 165 | struct nvgpu_gpu_event_info *info) { | ||
| 166 | |||
| 167 | u32 tail, head; | ||
| 168 | u32 events = 0; | ||
| 169 | struct nvgpu_clk_notification *p_notif; | ||
| 170 | |||
| 171 | tail = nvgpu_atomic_read(&dev->queue.tail); | ||
| 172 | head = nvgpu_atomic_read(&dev->queue.head); | ||
| 173 | |||
| 174 | head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; | ||
| 175 | |||
| 176 | if (_WRAPGTEQ(tail, head) && info) { | ||
| 177 | head++; | ||
| 178 | p_notif = &dev->queue.notifications[head % dev->queue.size]; | ||
| 179 | events |= nvgpu_convert_gpu_event(p_notif->notification); | ||
| 180 | info->event_id = ffs(events) - 1; | ||
| 181 | info->timestamp = p_notif->timestamp; | ||
| 182 | nvgpu_atomic_set(&dev->queue.head, head); | ||
| 183 | } | ||
| 184 | |||
| 185 | return events; | ||
| 186 | } | ||
| 187 | |||
| 188 | static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, | ||
| 189 | size_t size, loff_t *off) | ||
| 190 | { | ||
| 191 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
| 192 | struct nvgpu_gpu_event_info info; | ||
| 193 | ssize_t err; | ||
| 194 | |||
| 195 | clk_arb_dbg(dev->session->g, | ||
| 196 | "filp=%p, buf=%p, size=%zu", filp, buf, size); | ||
| 197 | |||
| 198 | if ((size - *off) < sizeof(info)) | ||
| 199 | return 0; | ||
| 200 | |||
| 201 | memset(&info, 0, sizeof(info)); | ||
| 202 | /* Get the oldest event from the queue */ | ||
| 203 | while (!__pending_event(dev, &info)) { | ||
| 204 | if (filp->f_flags & O_NONBLOCK) | ||
| 205 | return -EAGAIN; | ||
| 206 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
| 207 | __pending_event(dev, &info), 0); | ||
| 208 | if (err) | ||
| 209 | return err; | ||
| 210 | if (info.timestamp) | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | |||
| 214 | if (copy_to_user(buf + *off, &info, sizeof(info))) | ||
| 215 | return -EFAULT; | ||
| 216 | |||
| 217 | return sizeof(info); | ||
| 218 | } | ||
| 219 | |||
| 220 | static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, | ||
| 221 | struct nvgpu_gpu_set_event_filter_args *args) | ||
| 222 | { | ||
| 223 | struct gk20a *g = dev->session->g; | ||
| 224 | u32 mask; | ||
| 225 | |||
| 226 | nvgpu_log(g, gpu_dbg_fn, " "); | ||
| 227 | |||
| 228 | if (args->flags) | ||
| 229 | return -EINVAL; | ||
| 230 | |||
| 231 | if (args->size != 1) | ||
| 232 | return -EINVAL; | ||
| 233 | |||
| 234 | if (copy_from_user(&mask, (void __user *) args->buffer, | ||
| 235 | args->size * sizeof(u32))) | ||
| 236 | return -EFAULT; | ||
| 237 | |||
| 238 | /* update alarm mask */ | ||
| 239 | nvgpu_atomic_set(&dev->enabled_mask, mask); | ||
| 240 | |||
| 241 | return 0; | ||
| 242 | } | ||
| 243 | |||
| 244 | static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, | ||
| 245 | unsigned long arg) | ||
| 246 | { | ||
| 247 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
| 248 | struct gk20a *g = dev->session->g; | ||
| 249 | u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; | ||
| 250 | int err = 0; | ||
| 251 | |||
| 252 | nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); | ||
| 253 | |||
| 254 | if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
| 255 | || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) | ||
| 256 | return -EINVAL; | ||
| 257 | |||
| 258 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); | ||
| 259 | |||
| 260 | memset(buf, 0, sizeof(buf)); | ||
| 261 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 262 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
| 263 | return -EFAULT; | ||
| 264 | } | ||
| 265 | |||
| 266 | switch (cmd) { | ||
| 267 | case NVGPU_EVENT_IOCTL_SET_FILTER: | ||
| 268 | err = nvgpu_clk_arb_set_event_filter(dev, | ||
| 269 | (struct nvgpu_gpu_set_event_filter_args *)buf); | ||
| 270 | break; | ||
| 271 | default: | ||
| 272 | nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); | ||
| 273 | err = -ENOTTY; | ||
| 274 | } | ||
| 275 | |||
| 276 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 277 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
| 278 | |||
| 279 | return err; | ||
| 280 | } | ||
| 281 | |||
| 282 | static const struct file_operations completion_dev_ops = { | ||
| 283 | .owner = THIS_MODULE, | ||
| 284 | .release = nvgpu_clk_arb_release_completion_dev, | ||
| 285 | .poll = nvgpu_clk_arb_poll_dev, | ||
| 286 | }; | ||
| 287 | |||
| 288 | static const struct file_operations event_dev_ops = { | ||
| 289 | .owner = THIS_MODULE, | ||
| 290 | .release = nvgpu_clk_arb_release_event_dev, | ||
| 291 | .poll = nvgpu_clk_arb_poll_dev, | ||
| 292 | .read = nvgpu_clk_arb_read_event_dev, | ||
| 293 | #ifdef CONFIG_COMPAT | ||
| 294 | .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
| 295 | #endif | ||
| 296 | .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
| 297 | }; | ||
| 298 | |||
| 299 | static int nvgpu_clk_arb_install_fd(struct gk20a *g, | ||
| 300 | struct nvgpu_clk_session *session, | ||
| 301 | const struct file_operations *fops, | ||
| 302 | struct nvgpu_clk_dev **_dev) | ||
| 303 | { | ||
| 304 | struct file *file; | ||
| 305 | int fd; | ||
| 306 | int err; | ||
| 307 | int status; | ||
| 308 | char name[64]; | ||
| 309 | struct nvgpu_clk_dev *dev; | ||
| 310 | |||
| 311 | clk_arb_dbg(g, " "); | ||
| 312 | |||
| 313 | dev = nvgpu_kzalloc(g, sizeof(*dev)); | ||
| 314 | if (!dev) | ||
| 315 | return -ENOMEM; | ||
| 316 | |||
| 317 | status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, | ||
| 318 | DEFAULT_EVENT_NUMBER); | ||
| 319 | if (status < 0) { | ||
| 320 | err = status; | ||
| 321 | goto fail; | ||
| 322 | } | ||
| 323 | |||
| 324 | fd = get_unused_fd_flags(O_RDWR); | ||
| 325 | if (fd < 0) { | ||
| 326 | err = fd; | ||
| 327 | goto fail; | ||
| 328 | } | ||
| 329 | |||
| 330 | snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); | ||
| 331 | file = anon_inode_getfile(name, fops, dev, O_RDWR); | ||
| 332 | if (IS_ERR(file)) { | ||
| 333 | err = PTR_ERR(file); | ||
| 334 | goto fail_fd; | ||
| 335 | } | ||
| 336 | |||
| 337 | fd_install(fd, file); | ||
| 338 | |||
| 339 | nvgpu_cond_init(&dev->readout_wq); | ||
| 340 | |||
| 341 | nvgpu_atomic_set(&dev->poll_mask, 0); | ||
| 342 | |||
| 343 | dev->session = session; | ||
| 344 | nvgpu_ref_init(&dev->refcount); | ||
| 345 | |||
| 346 | nvgpu_ref_get(&session->refcount); | ||
| 347 | |||
| 348 | *_dev = dev; | ||
| 349 | |||
| 350 | return fd; | ||
| 351 | |||
| 352 | fail_fd: | ||
| 353 | put_unused_fd(fd); | ||
| 354 | fail: | ||
| 355 | nvgpu_kfree(g, dev); | ||
| 356 | |||
| 357 | return err; | ||
| 358 | } | ||
| 359 | |||
| 360 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | ||
| 361 | struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) | ||
| 362 | { | ||
| 363 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
| 364 | struct nvgpu_clk_dev *dev; | ||
| 365 | int fd; | ||
| 366 | |||
| 367 | clk_arb_dbg(g, " "); | ||
| 368 | |||
| 369 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); | ||
| 370 | if (fd < 0) | ||
| 371 | return fd; | ||
| 372 | |||
| 373 | /* TODO: alarm mask needs to be set to default value to prevent | ||
| 374 | * failures of legacy tests. This will be removed when sanity is | ||
| 375 | * updated | ||
| 376 | */ | ||
| 377 | if (alarm_mask) | ||
| 378 | nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); | ||
| 379 | else | ||
| 380 | nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); | ||
| 381 | |||
| 382 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); | ||
| 383 | |||
| 384 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
| 385 | nvgpu_list_add_tail(&dev->link, &arb->users); | ||
| 386 | nvgpu_spinlock_release(&arb->users_lock); | ||
| 387 | |||
| 388 | *event_fd = fd; | ||
| 389 | |||
| 390 | return 0; | ||
| 391 | } | ||
| 392 | |||
| 393 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
| 394 | struct nvgpu_clk_session *session, int *request_fd) | ||
| 395 | { | ||
| 396 | struct nvgpu_clk_dev *dev; | ||
| 397 | int fd; | ||
| 398 | |||
| 399 | clk_arb_dbg(g, " "); | ||
| 400 | |||
| 401 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | ||
| 402 | if (fd < 0) | ||
| 403 | return fd; | ||
| 404 | |||
| 405 | *request_fd = fd; | ||
| 406 | |||
| 407 | return 0; | ||
| 408 | } | ||
| 409 | |||
| 410 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | ||
| 411 | struct nvgpu_clk_session *session, int request_fd) | ||
| 412 | { | ||
| 413 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
| 414 | struct nvgpu_clk_dev *dev; | ||
| 415 | struct fd fd; | ||
| 416 | int err = 0; | ||
| 417 | |||
| 418 | clk_arb_dbg(g, " "); | ||
| 419 | |||
| 420 | fd = fdget(request_fd); | ||
| 421 | if (!fd.file) | ||
| 422 | return -EINVAL; | ||
| 423 | |||
| 424 | if (fd.file->f_op != &completion_dev_ops) { | ||
| 425 | err = -EINVAL; | ||
| 426 | goto fdput_fd; | ||
| 427 | } | ||
| 428 | |||
| 429 | dev = (struct nvgpu_clk_dev *) fd.file->private_data; | ||
| 430 | |||
| 431 | if (!dev || dev->session != session) { | ||
| 432 | err = -EINVAL; | ||
| 433 | goto fdput_fd; | ||
| 434 | } | ||
| 435 | |||
| 436 | clk_arb_dbg(g, "requested target = %u\n", | ||
| 437 | (u32)dev->gpc2clk_target_mhz); | ||
| 438 | |||
| 439 | nvgpu_atomic_inc(&g->clk_arb_global_nr); | ||
| 440 | nvgpu_ref_get(&dev->refcount); | ||
| 441 | nvgpu_spinlock_acquire(&session->session_lock); | ||
| 442 | nvgpu_list_add(&dev->node, &session->targets); | ||
| 443 | nvgpu_spinlock_release(&session->session_lock); | ||
| 444 | nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); | ||
| 445 | |||
| 446 | fdput_fd: | ||
| 447 | fdput(fd); | ||
| 448 | return err; | ||
| 449 | } | ||
| 450 | |||
| 451 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, | ||
| 452 | int request_fd, u32 api_domain, u16 target_mhz) | ||
| 453 | { | ||
| 454 | struct nvgpu_clk_dev *dev; | ||
| 455 | struct fd fd; | ||
| 456 | int err = 0; | ||
| 457 | |||
| 458 | clk_arb_dbg(session->g, | ||
| 459 | "domain=0x%08x target_mhz=%u", api_domain, target_mhz); | ||
| 460 | |||
| 461 | fd = fdget(request_fd); | ||
| 462 | if (!fd.file) | ||
| 463 | return -EINVAL; | ||
| 464 | |||
| 465 | if (fd.file->f_op != &completion_dev_ops) { | ||
| 466 | err = -EINVAL; | ||
| 467 | goto fdput_fd; | ||
| 468 | } | ||
| 469 | |||
| 470 | dev = fd.file->private_data; | ||
| 471 | if (!dev || dev->session != session) { | ||
| 472 | err = -EINVAL; | ||
| 473 | goto fdput_fd; | ||
| 474 | } | ||
| 475 | |||
| 476 | switch (api_domain) { | ||
| 477 | case NVGPU_CLK_DOMAIN_MCLK: | ||
| 478 | dev->mclk_target_mhz = target_mhz; | ||
| 479 | break; | ||
| 480 | |||
| 481 | case NVGPU_CLK_DOMAIN_GPCCLK: | ||
| 482 | dev->gpc2clk_target_mhz = target_mhz * 2ULL; | ||
| 483 | break; | ||
| 484 | |||
| 485 | default: | ||
| 486 | err = -EINVAL; | ||
| 487 | } | ||
| 488 | |||
| 489 | fdput_fd: | ||
| 490 | fdput(fd); | ||
| 491 | return err; | ||
| 492 | } | ||
| 493 | |||
| 494 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) | ||
| 495 | { | ||
| 496 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | ||
| 497 | u32 api_domains = 0; | ||
| 498 | |||
| 499 | if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) | ||
| 500 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); | ||
| 501 | |||
| 502 | if (clk_domains & CTRL_CLK_DOMAIN_MCLK) | ||
| 503 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); | ||
| 504 | |||
| 505 | return api_domains; | ||
| 506 | } | ||
| 507 | |||
| 508 | #ifdef CONFIG_DEBUG_FS | ||
| 509 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | ||
| 510 | { | ||
| 511 | struct gk20a *g = s->private; | ||
| 512 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
| 513 | struct nvgpu_clk_arb_debug *debug; | ||
| 514 | |||
| 515 | u64 num; | ||
| 516 | s64 tmp, avg, std, max, min; | ||
| 517 | |||
| 518 | debug = NV_ACCESS_ONCE(arb->debug); | ||
| 519 | /* Make copy of structure and ensure no reordering */ | ||
| 520 | nvgpu_smp_rmb(); | ||
| 521 | if (!debug) | ||
| 522 | return -EINVAL; | ||
| 523 | |||
| 524 | std = debug->switch_std; | ||
| 525 | avg = debug->switch_avg; | ||
| 526 | max = debug->switch_max; | ||
| 527 | min = debug->switch_min; | ||
| 528 | num = debug->switch_num; | ||
| 529 | |||
| 530 | tmp = std; | ||
| 531 | do_div(tmp, num); | ||
| 532 | seq_printf(s, "Number of transitions: %lld\n", | ||
| 533 | num); | ||
| 534 | seq_printf(s, "max / min : %lld / %lld usec\n", | ||
| 535 | max, min); | ||
| 536 | seq_printf(s, "avg / std : %lld / %ld usec\n", | ||
| 537 | avg, int_sqrt(tmp)); | ||
| 538 | |||
| 539 | return 0; | ||
| 540 | } | ||
| 541 | |||
| 542 | static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) | ||
| 543 | { | ||
| 544 | return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); | ||
| 545 | } | ||
| 546 | |||
| 547 | static const struct file_operations nvgpu_clk_arb_stats_fops = { | ||
| 548 | .open = nvgpu_clk_arb_stats_open, | ||
| 549 | .read = seq_read, | ||
| 550 | .llseek = seq_lseek, | ||
| 551 | .release = single_release, | ||
| 552 | }; | ||
| 553 | |||
| 554 | |||
| 555 | int nvgpu_clk_arb_debugfs_init(struct gk20a *g) | ||
| 556 | { | ||
| 557 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 558 | struct dentry *gpu_root = l->debugfs; | ||
| 559 | struct dentry *d; | ||
| 560 | |||
| 561 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
| 562 | |||
| 563 | d = debugfs_create_file( | ||
| 564 | "arb_stats", | ||
| 565 | S_IRUGO, | ||
| 566 | gpu_root, | ||
| 567 | g, | ||
| 568 | &nvgpu_clk_arb_stats_fops); | ||
| 569 | if (!d) | ||
| 570 | return -ENOMEM; | ||
| 571 | |||
| 572 | return 0; | ||
| 573 | } | ||
| 574 | #endif | ||
diff --git a/include/os/linux/ioctl_ctrl.c b/include/os/linux/ioctl_ctrl.c new file mode 100644 index 0000000..ee141ff --- /dev/null +++ b/include/os/linux/ioctl_ctrl.c | |||
| @@ -0,0 +1,2106 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2011-2020, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/uaccess.h> | ||
| 18 | #include <linux/cdev.h> | ||
| 19 | #include <linux/file.h> | ||
| 20 | #include <linux/anon_inodes.h> | ||
| 21 | #include <linux/fs.h> | ||
| 22 | #include <linux/pm_runtime.h> | ||
| 23 | #include <uapi/linux/nvgpu.h> | ||
| 24 | |||
| 25 | #include <nvgpu/bitops.h> | ||
| 26 | #include <nvgpu/kmem.h> | ||
| 27 | #include <nvgpu/bug.h> | ||
| 28 | #include <nvgpu/ptimer.h> | ||
| 29 | #include <nvgpu/vidmem.h> | ||
| 30 | #include <nvgpu/log.h> | ||
| 31 | #include <nvgpu/enabled.h> | ||
| 32 | #include <nvgpu/sizes.h> | ||
| 33 | #include <nvgpu/list.h> | ||
| 34 | #include <nvgpu/clk_arb.h> | ||
| 35 | #include <nvgpu/gk20a.h> | ||
| 36 | #include <nvgpu/channel.h> | ||
| 37 | |||
| 38 | #include "ioctl_ctrl.h" | ||
| 39 | #include "ioctl_dbg.h" | ||
| 40 | #include "ioctl_as.h" | ||
| 41 | #include "ioctl_tsg.h" | ||
| 42 | #include "ioctl_channel.h" | ||
| 43 | #include "gk20a/fence_gk20a.h" | ||
| 44 | |||
| 45 | #include "platform_gk20a.h" | ||
| 46 | #include "os_linux.h" | ||
| 47 | #include "dmabuf.h" | ||
| 48 | #include "channel.h" | ||
| 49 | #include "dmabuf_vidmem.h" | ||
| 50 | |||
| 51 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \ | ||
| 52 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) | ||
| 53 | #define MHZ_TO_HZ(a) ((u64)a * MHZ) | ||
| 54 | |||
| 55 | struct gk20a_ctrl_priv { | ||
| 56 | struct device *dev; | ||
| 57 | struct gk20a *g; | ||
| 58 | struct nvgpu_clk_session *clk_session; | ||
| 59 | |||
| 60 | struct nvgpu_list_node list; | ||
| 61 | struct { | ||
| 62 | struct vm_area_struct *vma; | ||
| 63 | unsigned long flags; | ||
| 64 | bool vma_mapped; | ||
| 65 | } usermode_vma; | ||
| 66 | }; | ||
| 67 | |||
| 68 | static inline struct gk20a_ctrl_priv * | ||
| 69 | gk20a_ctrl_priv_from_list(struct nvgpu_list_node *node) | ||
| 70 | { | ||
| 71 | return (struct gk20a_ctrl_priv *) | ||
| 72 | ((uintptr_t)node - offsetof(struct gk20a_ctrl_priv, list)); | ||
| 73 | } | ||
| 74 | |||
| 75 | static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags) | ||
| 76 | { | ||
| 77 | u32 core_flags = 0; | ||
| 78 | |||
| 79 | if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) | ||
| 80 | core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED; | ||
| 81 | |||
| 82 | return core_flags; | ||
| 83 | } | ||
| 84 | |||
| 85 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | ||
| 86 | { | ||
| 87 | struct nvgpu_os_linux *l; | ||
| 88 | struct gk20a *g; | ||
| 89 | struct gk20a_ctrl_priv *priv; | ||
| 90 | int err = 0; | ||
| 91 | |||
| 92 | l = container_of(inode->i_cdev, | ||
| 93 | struct nvgpu_os_linux, ctrl.cdev); | ||
| 94 | g = gk20a_get(&l->g); | ||
| 95 | if (!g) | ||
| 96 | return -ENODEV; | ||
| 97 | |||
| 98 | nvgpu_log_fn(g, " "); | ||
| 99 | |||
| 100 | priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv)); | ||
| 101 | if (!priv) { | ||
| 102 | err = -ENOMEM; | ||
| 103 | goto free_ref; | ||
| 104 | } | ||
| 105 | filp->private_data = priv; | ||
| 106 | priv->dev = dev_from_gk20a(g); | ||
| 107 | /* | ||
| 108 | * We dont close the arbiter fd's after driver teardown to support | ||
| 109 | * GPU_LOST events, so we store g here, instead of dereferencing the | ||
| 110 | * dev structure on teardown | ||
| 111 | */ | ||
| 112 | priv->g = g; | ||
| 113 | |||
| 114 | if (!g->sw_ready) { | ||
| 115 | err = gk20a_busy(g); | ||
| 116 | if (err) | ||
| 117 | goto free_ref; | ||
| 118 | gk20a_idle(g); | ||
| 119 | } | ||
| 120 | |||
| 121 | err = nvgpu_clk_arb_init_session(g, &priv->clk_session); | ||
| 122 | free_ref: | ||
| 123 | if (err != 0) { | ||
| 124 | gk20a_put(g); | ||
| 125 | if (priv) | ||
| 126 | nvgpu_kfree(g, priv); | ||
| 127 | } else { | ||
| 128 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
| 129 | nvgpu_list_add(&priv->list, &l->ctrl.privs); | ||
| 130 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
| 131 | } | ||
| 132 | |||
| 133 | return err; | ||
| 134 | } | ||
| 135 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) | ||
| 136 | { | ||
| 137 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
| 138 | struct gk20a *g = priv->g; | ||
| 139 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 140 | |||
| 141 | nvgpu_log_fn(g, " "); | ||
| 142 | |||
| 143 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
| 144 | nvgpu_list_del(&priv->list); | ||
| 145 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
| 146 | |||
| 147 | if (priv->clk_session) | ||
| 148 | nvgpu_clk_arb_release_session(g, priv->clk_session); | ||
| 149 | |||
| 150 | gk20a_put(g); | ||
| 151 | nvgpu_kfree(g, priv); | ||
| 152 | |||
| 153 | return 0; | ||
| 154 | } | ||
| 155 | |||
| 156 | struct nvgpu_flags_mapping { | ||
| 157 | u64 ioctl_flag; | ||
| 158 | int enabled_flag; | ||
| 159 | }; | ||
| 160 | |||
| 161 | static struct nvgpu_flags_mapping flags_mapping[] = { | ||
| 162 | {NVGPU_GPU_FLAGS_CAN_RAILGATE, | ||
| 163 | NVGPU_CAN_RAILGATE}, | ||
| 164 | {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS, | ||
| 165 | NVGPU_HAS_SYNCPOINTS}, | ||
| 166 | {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS, | ||
| 167 | NVGPU_SUPPORT_PARTIAL_MAPPINGS}, | ||
| 168 | {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS, | ||
| 169 | NVGPU_SUPPORT_SPARSE_ALLOCS}, | ||
| 170 | {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS, | ||
| 171 | NVGPU_SUPPORT_SYNC_FENCE_FDS}, | ||
| 172 | {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS, | ||
| 173 | NVGPU_SUPPORT_CYCLE_STATS}, | ||
| 174 | {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT, | ||
| 175 | NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT}, | ||
| 176 | {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS, | ||
| 177 | NVGPU_SUPPORT_USERSPACE_MANAGED_AS}, | ||
| 178 | {NVGPU_GPU_FLAGS_SUPPORT_TSG, | ||
| 179 | NVGPU_SUPPORT_TSG}, | ||
| 180 | {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS, | ||
| 181 | NVGPU_SUPPORT_CLOCK_CONTROLS}, | ||
| 182 | {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE, | ||
| 183 | NVGPU_SUPPORT_GET_VOLTAGE}, | ||
| 184 | {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT, | ||
| 185 | NVGPU_SUPPORT_GET_CURRENT}, | ||
| 186 | {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER, | ||
| 187 | NVGPU_SUPPORT_GET_POWER}, | ||
| 188 | {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE, | ||
| 189 | NVGPU_SUPPORT_GET_TEMPERATURE}, | ||
| 190 | {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT, | ||
| 191 | NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT}, | ||
| 192 | {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS, | ||
| 193 | NVGPU_SUPPORT_DEVICE_EVENTS}, | ||
| 194 | {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE, | ||
| 195 | NVGPU_SUPPORT_FECS_CTXSW_TRACE}, | ||
| 196 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, | ||
| 197 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, | ||
| 198 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, | ||
| 199 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, | ||
| 200 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS, | ||
| 201 | NVGPU_SUPPORT_DETERMINISTIC_OPTS}, | ||
| 202 | {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS, | ||
| 203 | NVGPU_SUPPORT_SYNCPOINT_ADDRESS}, | ||
| 204 | {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT, | ||
| 205 | NVGPU_SUPPORT_USER_SYNCPOINT}, | ||
| 206 | {NVGPU_GPU_FLAGS_SUPPORT_USERMODE_SUBMIT, | ||
| 207 | NVGPU_SUPPORT_USERMODE_SUBMIT}, | ||
| 208 | {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, | ||
| 209 | NVGPU_SUPPORT_IO_COHERENCE}, | ||
| 210 | {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, | ||
| 211 | NVGPU_SUPPORT_RESCHEDULE_RUNLIST}, | ||
| 212 | {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL, | ||
| 213 | NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL}, | ||
| 214 | {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF, | ||
| 215 | NVGPU_ECC_ENABLED_SM_LRF}, | ||
| 216 | {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM, | ||
| 217 | NVGPU_ECC_ENABLED_SM_SHM}, | ||
| 218 | {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX, | ||
| 219 | NVGPU_ECC_ENABLED_TEX}, | ||
| 220 | {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC, | ||
| 221 | NVGPU_ECC_ENABLED_LTC}, | ||
| 222 | {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS, | ||
| 223 | NVGPU_SUPPORT_TSG_SUBCONTEXTS}, | ||
| 224 | {NVGPU_GPU_FLAGS_SUPPORT_SCG, | ||
| 225 | NVGPU_SUPPORT_SCG}, | ||
| 226 | {NVGPU_GPU_FLAGS_SUPPORT_VPR, | ||
| 227 | NVGPU_SUPPORT_VPR}, | ||
| 228 | {NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE, | ||
| 229 | NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE}, | ||
| 230 | }; | ||
| 231 | |||
| 232 | static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) | ||
| 233 | { | ||
| 234 | unsigned int i; | ||
| 235 | u64 ioctl_flags = 0; | ||
| 236 | |||
| 237 | for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) { | ||
| 238 | if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag)) | ||
| 239 | ioctl_flags |= flags_mapping[i].ioctl_flag; | ||
| 240 | } | ||
| 241 | |||
| 242 | if (!capable(CAP_SYS_NICE)) { | ||
| 243 | ioctl_flags &= ~NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST; | ||
| 244 | } | ||
| 245 | |||
| 246 | return ioctl_flags; | ||
| 247 | } | ||
| 248 | |||
| 249 | static void nvgpu_set_preemption_mode_flags(struct gk20a *g, | ||
| 250 | struct nvgpu_gpu_characteristics *gpu) | ||
| 251 | { | ||
| 252 | struct nvgpu_preemption_modes_rec preemption_mode_rec; | ||
| 253 | |||
| 254 | g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec); | ||
| 255 | |||
| 256 | gpu->graphics_preemption_mode_flags = | ||
| 257 | nvgpu_get_ioctl_graphics_preempt_mode_flags( | ||
| 258 | preemption_mode_rec.graphics_preemption_mode_flags); | ||
| 259 | gpu->compute_preemption_mode_flags = | ||
| 260 | nvgpu_get_ioctl_compute_preempt_mode_flags( | ||
| 261 | preemption_mode_rec.compute_preemption_mode_flags); | ||
| 262 | |||
| 263 | gpu->default_graphics_preempt_mode = | ||
| 264 | nvgpu_get_ioctl_graphics_preempt_mode( | ||
| 265 | preemption_mode_rec.default_graphics_preempt_mode); | ||
| 266 | gpu->default_compute_preempt_mode = | ||
| 267 | nvgpu_get_ioctl_compute_preempt_mode( | ||
| 268 | preemption_mode_rec.default_compute_preempt_mode); | ||
| 269 | } | ||
| 270 | |||
| 271 | static long | ||
| 272 | gk20a_ctrl_ioctl_gpu_characteristics( | ||
| 273 | struct gk20a *g, | ||
| 274 | struct nvgpu_gpu_get_characteristics *request) | ||
| 275 | { | ||
| 276 | struct nvgpu_gpu_characteristics gpu; | ||
| 277 | long err = 0; | ||
| 278 | |||
| 279 | if (gk20a_busy(g)) { | ||
| 280 | nvgpu_err(g, "failed to power on gpu"); | ||
| 281 | return -EINVAL; | ||
| 282 | } | ||
| 283 | |||
| 284 | memset(&gpu, 0, sizeof(gpu)); | ||
| 285 | |||
| 286 | gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); | ||
| 287 | gpu.on_board_video_memory_size = 0; /* integrated GPU */ | ||
| 288 | |||
| 289 | gpu.num_gpc = g->gr.gpc_count; | ||
| 290 | gpu.max_gpc_count = g->gr.max_gpc_count; | ||
| 291 | |||
| 292 | gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; | ||
| 293 | |||
| 294 | gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ | ||
| 295 | |||
| 296 | gpu.compression_page_size = g->ops.fb.compression_page_size(g); | ||
| 297 | |||
| 298 | if (g->ops.gr.get_gpc_mask) { | ||
| 299 | gpu.gpc_mask = g->ops.gr.get_gpc_mask(g); | ||
| 300 | } else { | ||
| 301 | gpu.gpc_mask = BIT32(g->gr.gpc_count) - 1; | ||
| 302 | } | ||
| 303 | |||
| 304 | gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); | ||
| 305 | |||
| 306 | gpu.arch = g->params.gpu_arch; | ||
| 307 | gpu.impl = g->params.gpu_impl; | ||
| 308 | gpu.rev = g->params.gpu_rev; | ||
| 309 | gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT; | ||
| 310 | gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ? | ||
| 311 | NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0; | ||
| 312 | gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS); | ||
| 313 | gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS); | ||
| 314 | gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS); | ||
| 315 | gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS); | ||
| 316 | gpu.inline_to_memory_class = | ||
| 317 | g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS); | ||
| 318 | gpu.dma_copy_class = | ||
| 319 | g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); | ||
| 320 | |||
| 321 | gpu.vbios_version = g->bios.vbios_version; | ||
| 322 | gpu.vbios_oem_version = g->bios.vbios_oem_version; | ||
| 323 | |||
| 324 | gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g); | ||
| 325 | gpu.pde_coverage_bit_count = | ||
| 326 | g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0]; | ||
| 327 | gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g); | ||
| 328 | |||
| 329 | gpu.sm_arch_sm_version = g->params.sm_arch_sm_version; | ||
| 330 | gpu.sm_arch_spa_version = g->params.sm_arch_spa_version; | ||
| 331 | gpu.sm_arch_warp_count = g->params.sm_arch_warp_count; | ||
| 332 | |||
| 333 | gpu.max_css_buffer_size = g->gr.max_css_buffer_size; | ||
| 334 | |||
| 335 | gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; | ||
| 336 | gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; | ||
| 337 | gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; | ||
| 338 | gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; | ||
| 339 | gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; | ||
| 340 | gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST; | ||
| 341 | gpu.gpu_va_bit_count = 40; | ||
| 342 | |||
| 343 | strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname)); | ||
| 344 | gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g); | ||
| 345 | gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); | ||
| 346 | gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); | ||
| 347 | gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); | ||
| 348 | gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; | ||
| 349 | gpu.gr_gobs_per_comptagline_per_slice = | ||
| 350 | g->gr.gobs_per_comptagline_per_slice; | ||
| 351 | gpu.num_ltc = g->ltc_count; | ||
| 352 | gpu.lts_per_ltc = g->gr.slices_per_ltc; | ||
| 353 | gpu.cbc_cache_line_size = g->gr.cacheline_size; | ||
| 354 | gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; | ||
| 355 | |||
| 356 | if (g->ops.clk.get_maxrate) | ||
| 357 | gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
| 358 | |||
| 359 | gpu.local_video_memory_size = g->mm.vidmem.size; | ||
| 360 | |||
| 361 | gpu.pci_vendor_id = g->pci_vendor_id; | ||
| 362 | gpu.pci_device_id = g->pci_device_id; | ||
| 363 | gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id; | ||
| 364 | gpu.pci_subsystem_device_id = g->pci_subsystem_device_id; | ||
| 365 | gpu.pci_class = g->pci_class; | ||
| 366 | gpu.pci_revision = g->pci_revision; | ||
| 367 | |||
| 368 | nvgpu_set_preemption_mode_flags(g, &gpu); | ||
| 369 | |||
| 370 | if (request->gpu_characteristics_buf_size > 0) { | ||
| 371 | size_t write_size = sizeof(gpu); | ||
| 372 | |||
| 373 | nvgpu_speculation_barrier(); | ||
| 374 | if (write_size > request->gpu_characteristics_buf_size) | ||
| 375 | write_size = request->gpu_characteristics_buf_size; | ||
| 376 | |||
| 377 | err = copy_to_user((void __user *)(uintptr_t) | ||
| 378 | request->gpu_characteristics_buf_addr, | ||
| 379 | &gpu, write_size); | ||
| 380 | } | ||
| 381 | |||
| 382 | if (err == 0) | ||
| 383 | request->gpu_characteristics_buf_size = sizeof(gpu); | ||
| 384 | |||
| 385 | gk20a_idle(g); | ||
| 386 | |||
| 387 | return err; | ||
| 388 | } | ||
| 389 | |||
| 390 | static int gk20a_ctrl_prepare_compressible_read( | ||
| 391 | struct gk20a *g, | ||
| 392 | struct nvgpu_gpu_prepare_compressible_read_args *args) | ||
| 393 | { | ||
| 394 | int ret = -ENOSYS; | ||
| 395 | |||
| 396 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 397 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 398 | struct nvgpu_channel_fence fence; | ||
| 399 | struct gk20a_fence *fence_out = NULL; | ||
| 400 | int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( | ||
| 401 | args->submit_flags); | ||
| 402 | int fd = -1; | ||
| 403 | |||
| 404 | fence.id = args->fence.syncpt_id; | ||
| 405 | fence.value = args->fence.syncpt_value; | ||
| 406 | |||
| 407 | /* Try and allocate an fd here*/ | ||
| 408 | if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
| 409 | && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) { | ||
| 410 | fd = get_unused_fd_flags(O_RDWR); | ||
| 411 | if (fd < 0) | ||
| 412 | return fd; | ||
| 413 | } | ||
| 414 | |||
| 415 | ret = gk20a_prepare_compressible_read(l, args->handle, | ||
| 416 | args->request_compbits, args->offset, | ||
| 417 | args->compbits_hoffset, args->compbits_voffset, | ||
| 418 | args->scatterbuffer_offset, | ||
| 419 | args->width, args->height, args->block_height_log2, | ||
| 420 | submit_flags, &fence, &args->valid_compbits, | ||
| 421 | &args->zbc_color, &fence_out); | ||
| 422 | |||
| 423 | if (ret) { | ||
| 424 | if (fd != -1) | ||
| 425 | put_unused_fd(fd); | ||
| 426 | return ret; | ||
| 427 | } | ||
| 428 | |||
| 429 | /* Convert fence_out to something we can pass back to user space. */ | ||
| 430 | if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { | ||
| 431 | if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
| 432 | if (fence_out) { | ||
| 433 | ret = gk20a_fence_install_fd(fence_out, fd); | ||
| 434 | if (ret) | ||
| 435 | put_unused_fd(fd); | ||
| 436 | else | ||
| 437 | args->fence.fd = fd; | ||
| 438 | } else { | ||
| 439 | args->fence.fd = -1; | ||
| 440 | put_unused_fd(fd); | ||
| 441 | } | ||
| 442 | } else { | ||
| 443 | if (fence_out) { | ||
| 444 | args->fence.syncpt_id = fence_out->syncpt_id; | ||
| 445 | args->fence.syncpt_value = | ||
| 446 | fence_out->syncpt_value; | ||
| 447 | } else { | ||
| 448 | args->fence.syncpt_id = -1; | ||
| 449 | args->fence.syncpt_value = 0; | ||
| 450 | } | ||
| 451 | } | ||
| 452 | } | ||
| 453 | gk20a_fence_put(fence_out); | ||
| 454 | #endif | ||
| 455 | |||
| 456 | return ret; | ||
| 457 | } | ||
| 458 | |||
| 459 | static int gk20a_ctrl_mark_compressible_write( | ||
| 460 | struct gk20a *g, | ||
| 461 | struct nvgpu_gpu_mark_compressible_write_args *args) | ||
| 462 | { | ||
| 463 | int ret = -ENOSYS; | ||
| 464 | |||
| 465 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 466 | ret = gk20a_mark_compressible_write(g, args->handle, | ||
| 467 | args->valid_compbits, args->offset, args->zbc_color); | ||
| 468 | #endif | ||
| 469 | |||
| 470 | return ret; | ||
| 471 | } | ||
| 472 | |||
| 473 | static int gk20a_ctrl_alloc_as( | ||
| 474 | struct gk20a *g, | ||
| 475 | struct nvgpu_alloc_as_args *args) | ||
| 476 | { | ||
| 477 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 478 | struct gk20a_as_share *as_share; | ||
| 479 | int err; | ||
| 480 | int fd; | ||
| 481 | struct file *file; | ||
| 482 | char name[64]; | ||
| 483 | |||
| 484 | err = get_unused_fd_flags(O_RDWR); | ||
| 485 | if (err < 0) | ||
| 486 | return err; | ||
| 487 | fd = err; | ||
| 488 | |||
| 489 | snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd); | ||
| 490 | |||
| 491 | file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR); | ||
| 492 | if (IS_ERR(file)) { | ||
| 493 | err = PTR_ERR(file); | ||
| 494 | goto clean_up; | ||
| 495 | } | ||
| 496 | |||
| 497 | err = gk20a_as_alloc_share(g, args->big_page_size, | ||
| 498 | gk20a_as_translate_as_alloc_flags(g, | ||
| 499 | args->flags), | ||
| 500 | &as_share); | ||
| 501 | if (err) | ||
| 502 | goto clean_up_file; | ||
| 503 | |||
| 504 | fd_install(fd, file); | ||
| 505 | file->private_data = as_share; | ||
| 506 | |||
| 507 | args->as_fd = fd; | ||
| 508 | return 0; | ||
| 509 | |||
| 510 | clean_up_file: | ||
| 511 | fput(file); | ||
| 512 | clean_up: | ||
| 513 | put_unused_fd(fd); | ||
| 514 | return err; | ||
| 515 | } | ||
| 516 | |||
| 517 | static int gk20a_ctrl_open_tsg(struct gk20a *g, | ||
| 518 | struct nvgpu_gpu_open_tsg_args *args) | ||
| 519 | { | ||
| 520 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 521 | int err; | ||
| 522 | int fd; | ||
| 523 | struct file *file; | ||
| 524 | char name[64]; | ||
| 525 | |||
| 526 | err = get_unused_fd_flags(O_RDWR); | ||
| 527 | if (err < 0) | ||
| 528 | return err; | ||
| 529 | fd = err; | ||
| 530 | |||
| 531 | snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd); | ||
| 532 | |||
| 533 | file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR); | ||
| 534 | if (IS_ERR(file)) { | ||
| 535 | err = PTR_ERR(file); | ||
| 536 | goto clean_up; | ||
| 537 | } | ||
| 538 | |||
| 539 | err = nvgpu_ioctl_tsg_open(g, file); | ||
| 540 | if (err) | ||
| 541 | goto clean_up_file; | ||
| 542 | |||
| 543 | fd_install(fd, file); | ||
| 544 | args->tsg_fd = fd; | ||
| 545 | return 0; | ||
| 546 | |||
| 547 | clean_up_file: | ||
| 548 | fput(file); | ||
| 549 | clean_up: | ||
| 550 | put_unused_fd(fd); | ||
| 551 | return err; | ||
| 552 | } | ||
| 553 | |||
| 554 | static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, | ||
| 555 | struct nvgpu_gpu_get_tpc_masks_args *args) | ||
| 556 | { | ||
| 557 | struct gr_gk20a *gr = &g->gr; | ||
| 558 | int err = 0; | ||
| 559 | const u32 gpc_tpc_mask_size = sizeof(u32) * gr->max_gpc_count; | ||
| 560 | |||
| 561 | if (args->mask_buf_size > 0) { | ||
| 562 | size_t write_size = gpc_tpc_mask_size; | ||
| 563 | |||
| 564 | nvgpu_speculation_barrier(); | ||
| 565 | if (write_size > args->mask_buf_size) | ||
| 566 | write_size = args->mask_buf_size; | ||
| 567 | |||
| 568 | err = copy_to_user((void __user *)(uintptr_t) | ||
| 569 | args->mask_buf_addr, | ||
| 570 | gr->gpc_tpc_mask, write_size); | ||
| 571 | } | ||
| 572 | |||
| 573 | if (err == 0) | ||
| 574 | args->mask_buf_size = gpc_tpc_mask_size; | ||
| 575 | |||
| 576 | return err; | ||
| 577 | } | ||
| 578 | |||
| 579 | static int gk20a_ctrl_get_fbp_l2_masks( | ||
| 580 | struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) | ||
| 581 | { | ||
| 582 | struct gr_gk20a *gr = &g->gr; | ||
| 583 | int err = 0; | ||
| 584 | const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; | ||
| 585 | |||
| 586 | if (args->mask_buf_size > 0) { | ||
| 587 | size_t write_size = fbp_l2_mask_size; | ||
| 588 | |||
| 589 | nvgpu_speculation_barrier(); | ||
| 590 | if (write_size > args->mask_buf_size) | ||
| 591 | write_size = args->mask_buf_size; | ||
| 592 | |||
| 593 | err = copy_to_user((void __user *)(uintptr_t) | ||
| 594 | args->mask_buf_addr, | ||
| 595 | gr->fbp_rop_l2_en_mask, write_size); | ||
| 596 | } | ||
| 597 | |||
| 598 | if (err == 0) | ||
| 599 | args->mask_buf_size = fbp_l2_mask_size; | ||
| 600 | |||
| 601 | return err; | ||
| 602 | } | ||
| 603 | |||
| 604 | static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, | ||
| 605 | struct nvgpu_gpu_l2_fb_args *args) | ||
| 606 | { | ||
| 607 | int ret; | ||
| 608 | bool always_poweron; | ||
| 609 | |||
| 610 | if ((!args->l2_flush && !args->fb_flush) || | ||
| 611 | (!args->l2_flush && args->l2_invalidate)) | ||
| 612 | return -EINVAL; | ||
| 613 | |||
| 614 | /* Handle this case for joint rails or DGPU */ | ||
| 615 | always_poweron = (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || | ||
| 616 | !pm_runtime_enabled(dev_from_gk20a(g))); | ||
| 617 | |||
| 618 | /* In case of not always power_on, exit if g->power_on is false */ | ||
| 619 | if (!always_poweron && !gk20a_check_poweron(g)) { | ||
| 620 | return 0; | ||
| 621 | } | ||
| 622 | |||
| 623 | /* There is a small window between a call to gk20a_idle() has occured | ||
| 624 | * and railgate being actually triggered(setting g->power_on = false), | ||
| 625 | * when l2_flush can race with railgate. Its better to take a busy_lock | ||
| 626 | * to prevent the gk20a_idle() from proceeding. There is a very small | ||
| 627 | * chance that gk20a_idle() might begin before gk20a_busy(). Having | ||
| 628 | * a locked access to g->power_on further reduces the probability of | ||
| 629 | * gk20a_idle() being triggered before gk20a_busy() | ||
| 630 | */ | ||
| 631 | ret = gk20a_busy(g); | ||
| 632 | |||
| 633 | if (ret != 0) { | ||
| 634 | nvgpu_err(g, "failed to take power ref"); | ||
| 635 | return ret; | ||
| 636 | } | ||
| 637 | |||
| 638 | if (args->l2_flush) | ||
| 639 | g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); | ||
| 640 | |||
| 641 | if (args->fb_flush) | ||
| 642 | g->ops.mm.fb_flush(g); | ||
| 643 | |||
| 644 | gk20a_idle(g); | ||
| 645 | |||
| 646 | return 0; | ||
| 647 | } | ||
| 648 | |||
| 649 | static int nvgpu_gpu_ioctl_set_mmu_debug_mode( | ||
| 650 | struct gk20a *g, | ||
| 651 | struct nvgpu_gpu_mmu_debug_mode_args *args) | ||
| 652 | { | ||
| 653 | if (gk20a_busy(g)) { | ||
| 654 | nvgpu_err(g, "failed to power on gpu"); | ||
| 655 | return -EINVAL; | ||
| 656 | } | ||
| 657 | |||
| 658 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 659 | g->ops.fb.set_debug_mode(g, args->state == 1); | ||
| 660 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 661 | |||
| 662 | gk20a_idle(g); | ||
| 663 | return 0; | ||
| 664 | } | ||
| 665 | |||
| 666 | static int nvgpu_gpu_ioctl_set_debug_mode( | ||
| 667 | struct gk20a *g, | ||
| 668 | struct nvgpu_gpu_sm_debug_mode_args *args) | ||
| 669 | { | ||
| 670 | struct channel_gk20a *ch; | ||
| 671 | int err; | ||
| 672 | |||
| 673 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
| 674 | if (!ch) | ||
| 675 | return -EINVAL; | ||
| 676 | |||
| 677 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 678 | if (g->ops.gr.set_sm_debug_mode) | ||
| 679 | err = g->ops.gr.set_sm_debug_mode(g, ch, | ||
| 680 | args->sms, !!args->enable); | ||
| 681 | else | ||
| 682 | err = -ENOSYS; | ||
| 683 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 684 | |||
| 685 | gk20a_channel_put(ch); | ||
| 686 | return err; | ||
| 687 | } | ||
| 688 | |||
| 689 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) | ||
| 690 | { | ||
| 691 | int err; | ||
| 692 | |||
| 693 | err = gk20a_busy(g); | ||
| 694 | if (err) | ||
| 695 | return err; | ||
| 696 | |||
| 697 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 698 | err = gr_gk20a_elpg_protected_call(g, | ||
| 699 | g->ops.gr.trigger_suspend(g)); | ||
| 700 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 701 | |||
| 702 | gk20a_idle(g); | ||
| 703 | |||
| 704 | return err; | ||
| 705 | } | ||
| 706 | |||
| 707 | static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | ||
| 708 | struct nvgpu_gpu_wait_pause_args *args) | ||
| 709 | { | ||
| 710 | int err; | ||
| 711 | struct warpstate *ioctl_w_state; | ||
| 712 | struct nvgpu_warpstate *w_state = NULL; | ||
| 713 | u32 sm_count, ioctl_size, size, sm_id; | ||
| 714 | |||
| 715 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | ||
| 716 | |||
| 717 | ioctl_size = sm_count * sizeof(struct warpstate); | ||
| 718 | ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); | ||
| 719 | if (!ioctl_w_state) | ||
| 720 | return -ENOMEM; | ||
| 721 | |||
| 722 | size = sm_count * sizeof(struct nvgpu_warpstate); | ||
| 723 | w_state = nvgpu_kzalloc(g, size); | ||
| 724 | if (!w_state) { | ||
| 725 | err = -ENOMEM; | ||
| 726 | goto out_free; | ||
| 727 | } | ||
| 728 | |||
| 729 | err = gk20a_busy(g); | ||
| 730 | if (err) | ||
| 731 | goto out_free; | ||
| 732 | |||
| 733 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 734 | (void)gr_gk20a_elpg_protected_call(g, | ||
| 735 | g->ops.gr.wait_for_pause(g, w_state)); | ||
| 736 | |||
| 737 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | ||
| 738 | ioctl_w_state[sm_id].valid_warps[0] = | ||
| 739 | w_state[sm_id].valid_warps[0]; | ||
| 740 | ioctl_w_state[sm_id].valid_warps[1] = | ||
| 741 | w_state[sm_id].valid_warps[1]; | ||
| 742 | ioctl_w_state[sm_id].trapped_warps[0] = | ||
| 743 | w_state[sm_id].trapped_warps[0]; | ||
| 744 | ioctl_w_state[sm_id].trapped_warps[1] = | ||
| 745 | w_state[sm_id].trapped_warps[1]; | ||
| 746 | ioctl_w_state[sm_id].paused_warps[0] = | ||
| 747 | w_state[sm_id].paused_warps[0]; | ||
| 748 | ioctl_w_state[sm_id].paused_warps[1] = | ||
| 749 | w_state[sm_id].paused_warps[1]; | ||
| 750 | } | ||
| 751 | /* Copy to user space - pointed by "args->pwarpstate" */ | ||
| 752 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, | ||
| 753 | w_state, ioctl_size)) { | ||
| 754 | nvgpu_log_fn(g, "copy_to_user failed!"); | ||
| 755 | err = -EFAULT; | ||
| 756 | } | ||
| 757 | |||
| 758 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 759 | |||
| 760 | gk20a_idle(g); | ||
| 761 | |||
| 762 | out_free: | ||
| 763 | nvgpu_kfree(g, w_state); | ||
| 764 | nvgpu_kfree(g, ioctl_w_state); | ||
| 765 | |||
| 766 | return err; | ||
| 767 | } | ||
| 768 | |||
| 769 | static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) | ||
| 770 | { | ||
| 771 | int err; | ||
| 772 | |||
| 773 | err = gk20a_busy(g); | ||
| 774 | if (err) | ||
| 775 | return err; | ||
| 776 | |||
| 777 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 778 | err = gr_gk20a_elpg_protected_call(g, | ||
| 779 | g->ops.gr.resume_from_pause(g)); | ||
| 780 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 781 | |||
| 782 | gk20a_idle(g); | ||
| 783 | |||
| 784 | return err; | ||
| 785 | } | ||
| 786 | |||
| 787 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) | ||
| 788 | { | ||
| 789 | int err; | ||
| 790 | |||
| 791 | err = gk20a_busy(g); | ||
| 792 | if (err) | ||
| 793 | return err; | ||
| 794 | |||
| 795 | err = gr_gk20a_elpg_protected_call(g, | ||
| 796 | g->ops.gr.clear_sm_errors(g)); | ||
| 797 | |||
| 798 | gk20a_idle(g); | ||
| 799 | |||
| 800 | return err; | ||
| 801 | } | ||
| 802 | |||
| 803 | static int nvgpu_gpu_ioctl_has_any_exception( | ||
| 804 | struct gk20a *g, | ||
| 805 | struct nvgpu_gpu_tpc_exception_en_status_args *args) | ||
| 806 | { | ||
| 807 | u32 tpc_exception_en; | ||
| 808 | |||
| 809 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 810 | tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); | ||
| 811 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 812 | |||
| 813 | args->tpc_exception_en_sm_mask = tpc_exception_en; | ||
| 814 | |||
| 815 | return 0; | ||
| 816 | } | ||
| 817 | |||
| 818 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, | ||
| 819 | struct nvgpu_gpu_num_vsms *args) | ||
| 820 | { | ||
| 821 | struct gr_gk20a *gr = &g->gr; | ||
| 822 | args->num_vsms = gr->no_of_sm; | ||
| 823 | return 0; | ||
| 824 | } | ||
| 825 | |||
| 826 | static int gk20a_ctrl_vsm_mapping(struct gk20a *g, | ||
| 827 | struct nvgpu_gpu_vsms_mapping *args) | ||
| 828 | { | ||
| 829 | int err = 0; | ||
| 830 | struct gr_gk20a *gr = &g->gr; | ||
| 831 | size_t write_size = gr->no_of_sm * | ||
| 832 | sizeof(struct nvgpu_gpu_vsms_mapping_entry); | ||
| 833 | struct nvgpu_gpu_vsms_mapping_entry *vsms_buf; | ||
| 834 | u32 i; | ||
| 835 | |||
| 836 | vsms_buf = nvgpu_kzalloc(g, write_size); | ||
| 837 | if (vsms_buf == NULL) | ||
| 838 | return -ENOMEM; | ||
| 839 | |||
| 840 | for (i = 0; i < gr->no_of_sm; i++) { | ||
| 841 | vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index; | ||
| 842 | if (g->ops.gr.get_nonpes_aware_tpc) | ||
| 843 | vsms_buf[i].tpc_index = | ||
| 844 | g->ops.gr.get_nonpes_aware_tpc(g, | ||
| 845 | gr->sm_to_cluster[i].gpc_index, | ||
| 846 | gr->sm_to_cluster[i].tpc_index); | ||
| 847 | else | ||
| 848 | vsms_buf[i].tpc_index = | ||
| 849 | gr->sm_to_cluster[i].tpc_index; | ||
| 850 | } | ||
| 851 | |||
| 852 | err = copy_to_user((void __user *)(uintptr_t) | ||
| 853 | args->vsms_map_buf_addr, | ||
| 854 | vsms_buf, write_size); | ||
| 855 | nvgpu_kfree(g, vsms_buf); | ||
| 856 | |||
| 857 | return err; | ||
| 858 | } | ||
| 859 | |||
| 860 | static int nvgpu_gpu_get_cpu_time_correlation_info( | ||
| 861 | struct gk20a *g, | ||
| 862 | struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) | ||
| 863 | { | ||
| 864 | struct nvgpu_cpu_time_correlation_sample *samples; | ||
| 865 | int err; | ||
| 866 | u32 i; | ||
| 867 | |||
| 868 | if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT || | ||
| 869 | args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) | ||
| 870 | return -EINVAL; | ||
| 871 | |||
| 872 | samples = nvgpu_kzalloc(g, args->count * | ||
| 873 | sizeof(struct nvgpu_cpu_time_correlation_sample)); | ||
| 874 | if (!samples) { | ||
| 875 | return -ENOMEM; | ||
| 876 | } | ||
| 877 | |||
| 878 | err = g->ops.ptimer.get_timestamps_zipper(g, | ||
| 879 | args->source_id, args->count, samples); | ||
| 880 | if (!err) { | ||
| 881 | for (i = 0; i < args->count; i++) { | ||
| 882 | args->samples[i].cpu_timestamp = samples[i].cpu_timestamp; | ||
| 883 | args->samples[i].gpu_timestamp = samples[i].gpu_timestamp; | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | nvgpu_kfree(g, samples); | ||
| 888 | |||
| 889 | return err; | ||
| 890 | } | ||
| 891 | |||
| 892 | static int nvgpu_gpu_get_gpu_time( | ||
| 893 | struct gk20a *g, | ||
| 894 | struct nvgpu_gpu_get_gpu_time_args *args) | ||
| 895 | { | ||
| 896 | u64 time; | ||
| 897 | int err; | ||
| 898 | |||
| 899 | err = gk20a_busy(g); | ||
| 900 | if (err) | ||
| 901 | return err; | ||
| 902 | |||
| 903 | err = g->ops.ptimer.read_ptimer(g, &time); | ||
| 904 | if (!err) | ||
| 905 | args->gpu_timestamp = time; | ||
| 906 | |||
| 907 | gk20a_idle(g); | ||
| 908 | return err; | ||
| 909 | } | ||
| 910 | |||
| 911 | static int nvgpu_gpu_get_engine_info( | ||
| 912 | struct gk20a *g, | ||
| 913 | struct nvgpu_gpu_get_engine_info_args *args) | ||
| 914 | { | ||
| 915 | int err = 0; | ||
| 916 | u32 engine_enum = ENGINE_INVAL_GK20A; | ||
| 917 | u32 report_index = 0; | ||
| 918 | u32 engine_id_idx; | ||
| 919 | const u32 max_buffer_engines = args->engine_info_buf_size / | ||
| 920 | sizeof(struct nvgpu_gpu_get_engine_info_item); | ||
| 921 | struct nvgpu_gpu_get_engine_info_item __user *dst_item_list = | ||
| 922 | (void __user *)(uintptr_t)args->engine_info_buf_addr; | ||
| 923 | |||
| 924 | for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; | ||
| 925 | ++engine_id_idx) { | ||
| 926 | u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx]; | ||
| 927 | const struct fifo_engine_info_gk20a *src_info = | ||
| 928 | &g->fifo.engine_info[active_engine_id]; | ||
| 929 | struct nvgpu_gpu_get_engine_info_item dst_info; | ||
| 930 | |||
| 931 | memset(&dst_info, 0, sizeof(dst_info)); | ||
| 932 | |||
| 933 | engine_enum = src_info->engine_enum; | ||
| 934 | |||
| 935 | switch (engine_enum) { | ||
| 936 | case ENGINE_GR_GK20A: | ||
| 937 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR; | ||
| 938 | break; | ||
| 939 | |||
| 940 | case ENGINE_GRCE_GK20A: | ||
| 941 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY; | ||
| 942 | break; | ||
| 943 | |||
| 944 | case ENGINE_ASYNC_CE_GK20A: | ||
| 945 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY; | ||
| 946 | break; | ||
| 947 | |||
| 948 | default: | ||
| 949 | nvgpu_err(g, "Unmapped engine enum %u", | ||
| 950 | engine_enum); | ||
| 951 | continue; | ||
| 952 | } | ||
| 953 | |||
| 954 | dst_info.engine_instance = src_info->inst_id; | ||
| 955 | dst_info.runlist_id = src_info->runlist_id; | ||
| 956 | |||
| 957 | if (report_index < max_buffer_engines) { | ||
| 958 | err = copy_to_user(&dst_item_list[report_index], | ||
| 959 | &dst_info, sizeof(dst_info)); | ||
| 960 | if (err) | ||
| 961 | goto clean_up; | ||
| 962 | } | ||
| 963 | |||
| 964 | ++report_index; | ||
| 965 | } | ||
| 966 | |||
| 967 | args->engine_info_buf_size = | ||
| 968 | report_index * sizeof(struct nvgpu_gpu_get_engine_info_item); | ||
| 969 | |||
| 970 | clean_up: | ||
| 971 | return err; | ||
| 972 | } | ||
| 973 | |||
| 974 | static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, | ||
| 975 | struct nvgpu_gpu_alloc_vidmem_args *args) | ||
| 976 | { | ||
| 977 | u32 align = args->in.alignment ? args->in.alignment : SZ_4K; | ||
| 978 | int fd; | ||
| 979 | |||
| 980 | nvgpu_log_fn(g, " "); | ||
| 981 | |||
| 982 | /* not yet supported */ | ||
| 983 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) | ||
| 984 | return -EINVAL; | ||
| 985 | |||
| 986 | /* not yet supported */ | ||
| 987 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) | ||
| 988 | return -EINVAL; | ||
| 989 | |||
| 990 | if (args->in.size & (SZ_4K - 1)) | ||
| 991 | return -EINVAL; | ||
| 992 | |||
| 993 | if (!args->in.size) | ||
| 994 | return -EINVAL; | ||
| 995 | |||
| 996 | if (align & (align - 1)) | ||
| 997 | return -EINVAL; | ||
| 998 | |||
| 999 | if (align > roundup_pow_of_two(args->in.size)) { | ||
| 1000 | /* log this special case, buddy allocator detail */ | ||
| 1001 | nvgpu_warn(g, | ||
| 1002 | "alignment larger than buffer size rounded up to power of 2 is not supported"); | ||
| 1003 | return -EINVAL; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | fd = nvgpu_vidmem_export_linux(g, args->in.size); | ||
| 1007 | if (fd < 0) | ||
| 1008 | return fd; | ||
| 1009 | |||
| 1010 | args->out.dmabuf_fd = fd; | ||
| 1011 | |||
| 1012 | nvgpu_log_fn(g, "done, fd=%d", fd); | ||
| 1013 | |||
| 1014 | return 0; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | static int nvgpu_gpu_get_memory_state(struct gk20a *g, | ||
| 1018 | struct nvgpu_gpu_get_memory_state_args *args) | ||
| 1019 | { | ||
| 1020 | int err; | ||
| 1021 | |||
| 1022 | nvgpu_log_fn(g, " "); | ||
| 1023 | |||
| 1024 | if (args->reserved[0] || args->reserved[1] || | ||
| 1025 | args->reserved[2] || args->reserved[3]) | ||
| 1026 | return -EINVAL; | ||
| 1027 | |||
| 1028 | err = nvgpu_vidmem_get_space(g, &args->total_free_bytes); | ||
| 1029 | |||
| 1030 | nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes); | ||
| 1031 | |||
| 1032 | return err; | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain) | ||
| 1036 | { | ||
| 1037 | u32 domain = 0; | ||
| 1038 | |||
| 1039 | if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK) | ||
| 1040 | domain = NVGPU_CLK_DOMAIN_MCLK; | ||
| 1041 | else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK) | ||
| 1042 | domain = NVGPU_CLK_DOMAIN_GPCCLK; | ||
| 1043 | else | ||
| 1044 | domain = NVGPU_CLK_DOMAIN_MAX + 1; | ||
| 1045 | |||
| 1046 | return domain; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g, | ||
| 1050 | struct gk20a_ctrl_priv *priv, | ||
| 1051 | struct nvgpu_gpu_clk_vf_points_args *args) | ||
| 1052 | { | ||
| 1053 | struct nvgpu_gpu_clk_vf_point clk_point; | ||
| 1054 | struct nvgpu_gpu_clk_vf_point __user *entry; | ||
| 1055 | struct nvgpu_clk_session *session = priv->clk_session; | ||
| 1056 | u32 clk_domains = 0; | ||
| 1057 | int err; | ||
| 1058 | u16 last_mhz; | ||
| 1059 | u16 *fpoints; | ||
| 1060 | u32 i; | ||
| 1061 | u32 max_points = 0; | ||
| 1062 | u32 num_points = 0; | ||
| 1063 | u16 min_mhz; | ||
| 1064 | u16 max_mhz; | ||
| 1065 | |||
| 1066 | nvgpu_log_fn(g, " "); | ||
| 1067 | |||
| 1068 | if (!session || args->flags) | ||
| 1069 | return -EINVAL; | ||
| 1070 | |||
| 1071 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
| 1072 | args->num_entries = 0; | ||
| 1073 | |||
| 1074 | if (!nvgpu_clk_arb_is_valid_domain(g, | ||
| 1075 | nvgpu_gpu_convert_clk_domain(args->clk_domain))) | ||
| 1076 | return -EINVAL; | ||
| 1077 | |||
| 1078 | err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, | ||
| 1079 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
| 1080 | &max_points, NULL); | ||
| 1081 | if (err) | ||
| 1082 | return err; | ||
| 1083 | |||
| 1084 | if (!args->max_entries) { | ||
| 1085 | args->max_entries = max_points; | ||
| 1086 | return 0; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | if (args->max_entries < max_points) | ||
| 1090 | return -EINVAL; | ||
| 1091 | |||
| 1092 | err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
| 1093 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
| 1094 | &min_mhz, &max_mhz); | ||
| 1095 | if (err) | ||
| 1096 | return err; | ||
| 1097 | |||
| 1098 | fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16)); | ||
| 1099 | if (!fpoints) | ||
| 1100 | return -ENOMEM; | ||
| 1101 | |||
| 1102 | err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, | ||
| 1103 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
| 1104 | &max_points, fpoints); | ||
| 1105 | if (err) | ||
| 1106 | goto fail; | ||
| 1107 | |||
| 1108 | entry = (struct nvgpu_gpu_clk_vf_point __user *) | ||
| 1109 | (uintptr_t)args->clk_vf_point_entries; | ||
| 1110 | |||
| 1111 | last_mhz = 0; | ||
| 1112 | num_points = 0; | ||
| 1113 | for (i = 0; (i < max_points) && !err; i++) { | ||
| 1114 | |||
| 1115 | /* filter out duplicate frequencies */ | ||
| 1116 | if (fpoints[i] == last_mhz) | ||
| 1117 | continue; | ||
| 1118 | |||
| 1119 | /* filter out out-of-range frequencies */ | ||
| 1120 | if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz)) | ||
| 1121 | continue; | ||
| 1122 | |||
| 1123 | last_mhz = fpoints[i]; | ||
| 1124 | clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]); | ||
| 1125 | |||
| 1126 | err = copy_to_user((void __user *)entry, &clk_point, | ||
| 1127 | sizeof(clk_point)); | ||
| 1128 | |||
| 1129 | num_points++; | ||
| 1130 | entry++; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | args->num_entries = num_points; | ||
| 1134 | |||
| 1135 | fail: | ||
| 1136 | nvgpu_kfree(g, fpoints); | ||
| 1137 | return err; | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | static int nvgpu_gpu_clk_get_range(struct gk20a *g, | ||
| 1141 | struct gk20a_ctrl_priv *priv, | ||
| 1142 | struct nvgpu_gpu_clk_range_args *args) | ||
| 1143 | { | ||
| 1144 | struct nvgpu_gpu_clk_range clk_range; | ||
| 1145 | struct nvgpu_gpu_clk_range __user *entry; | ||
| 1146 | struct nvgpu_clk_session *session = priv->clk_session; | ||
| 1147 | |||
| 1148 | u32 clk_domains = 0; | ||
| 1149 | u32 num_domains; | ||
| 1150 | u32 num_entries; | ||
| 1151 | u32 i; | ||
| 1152 | int bit; | ||
| 1153 | int err; | ||
| 1154 | u16 min_mhz, max_mhz; | ||
| 1155 | |||
| 1156 | nvgpu_log_fn(g, " "); | ||
| 1157 | |||
| 1158 | if (!session) | ||
| 1159 | return -EINVAL; | ||
| 1160 | |||
| 1161 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
| 1162 | num_domains = hweight_long(clk_domains); | ||
| 1163 | |||
| 1164 | if (!args->flags) { | ||
| 1165 | if (!args->num_entries) { | ||
| 1166 | args->num_entries = num_domains; | ||
| 1167 | return 0; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | if (args->num_entries < num_domains) | ||
| 1171 | return -EINVAL; | ||
| 1172 | |||
| 1173 | args->num_entries = 0; | ||
| 1174 | num_entries = num_domains; | ||
| 1175 | |||
| 1176 | } else { | ||
| 1177 | if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) | ||
| 1178 | return -EINVAL; | ||
| 1179 | |||
| 1180 | num_entries = args->num_entries; | ||
| 1181 | if (num_entries > num_domains) | ||
| 1182 | return -EINVAL; | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | entry = (struct nvgpu_gpu_clk_range __user *) | ||
| 1186 | (uintptr_t)args->clk_range_entries; | ||
| 1187 | |||
| 1188 | for (i = 0; i < num_entries; i++, entry++) { | ||
| 1189 | |||
| 1190 | if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { | ||
| 1191 | if (copy_from_user(&clk_range, (void __user *)entry, | ||
| 1192 | sizeof(clk_range))) | ||
| 1193 | return -EFAULT; | ||
| 1194 | } else { | ||
| 1195 | bit = ffs(clk_domains) - 1; | ||
| 1196 | clk_range.clk_domain = bit; | ||
| 1197 | clk_domains &= ~BIT(bit); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | clk_range.flags = 0; | ||
| 1201 | err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
| 1202 | nvgpu_gpu_convert_clk_domain(clk_range.clk_domain), | ||
| 1203 | &min_mhz, &max_mhz); | ||
| 1204 | clk_range.min_hz = MHZ_TO_HZ(min_mhz); | ||
| 1205 | clk_range.max_hz = MHZ_TO_HZ(max_mhz); | ||
| 1206 | |||
| 1207 | if (err) | ||
| 1208 | return err; | ||
| 1209 | |||
| 1210 | err = copy_to_user(entry, &clk_range, sizeof(clk_range)); | ||
| 1211 | if (err) | ||
| 1212 | return -EFAULT; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | args->num_entries = num_entries; | ||
| 1216 | |||
| 1217 | return 0; | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | static int nvgpu_gpu_clk_set_info(struct gk20a *g, | ||
| 1221 | struct gk20a_ctrl_priv *priv, | ||
| 1222 | struct nvgpu_gpu_clk_set_info_args *args) | ||
| 1223 | { | ||
| 1224 | struct nvgpu_gpu_clk_info clk_info; | ||
| 1225 | struct nvgpu_gpu_clk_info __user *entry; | ||
| 1226 | struct nvgpu_clk_session *session = priv->clk_session; | ||
| 1227 | |||
| 1228 | int fd; | ||
| 1229 | u32 clk_domains = 0; | ||
| 1230 | u16 freq_mhz; | ||
| 1231 | int i; | ||
| 1232 | int ret; | ||
| 1233 | |||
| 1234 | nvgpu_log_fn(g, " "); | ||
| 1235 | |||
| 1236 | if (!session || args->flags) | ||
| 1237 | return -EINVAL; | ||
| 1238 | |||
| 1239 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
| 1240 | if (!clk_domains) | ||
| 1241 | return -EINVAL; | ||
| 1242 | |||
| 1243 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
| 1244 | (uintptr_t)args->clk_info_entries; | ||
| 1245 | |||
| 1246 | for (i = 0; i < args->num_entries; i++, entry++) { | ||
| 1247 | |||
| 1248 | if (copy_from_user(&clk_info, entry, sizeof(clk_info))) | ||
| 1249 | return -EFAULT; | ||
| 1250 | |||
| 1251 | if (!nvgpu_clk_arb_is_valid_domain(g, | ||
| 1252 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) | ||
| 1253 | return -EINVAL; | ||
| 1254 | } | ||
| 1255 | nvgpu_speculation_barrier(); | ||
| 1256 | |||
| 1257 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
| 1258 | (uintptr_t)args->clk_info_entries; | ||
| 1259 | |||
| 1260 | ret = nvgpu_clk_arb_install_request_fd(g, session, &fd); | ||
| 1261 | if (ret < 0) | ||
| 1262 | return ret; | ||
| 1263 | |||
| 1264 | for (i = 0; i < args->num_entries; i++, entry++) { | ||
| 1265 | |||
| 1266 | if (copy_from_user(&clk_info, (void __user *)entry, | ||
| 1267 | sizeof(clk_info))) | ||
| 1268 | return -EFAULT; | ||
| 1269 | freq_mhz = HZ_TO_MHZ(clk_info.freq_hz); | ||
| 1270 | |||
| 1271 | nvgpu_clk_arb_set_session_target_mhz(session, fd, | ||
| 1272 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | nvgpu_speculation_barrier(); | ||
| 1276 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); | ||
| 1277 | if (ret < 0) | ||
| 1278 | return ret; | ||
| 1279 | |||
| 1280 | args->completion_fd = fd; | ||
| 1281 | |||
| 1282 | return ret; | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | static int nvgpu_gpu_clk_get_info(struct gk20a *g, | ||
| 1286 | struct gk20a_ctrl_priv *priv, | ||
| 1287 | struct nvgpu_gpu_clk_get_info_args *args) | ||
| 1288 | { | ||
| 1289 | struct nvgpu_gpu_clk_info clk_info; | ||
| 1290 | struct nvgpu_gpu_clk_info __user *entry; | ||
| 1291 | struct nvgpu_clk_session *session = priv->clk_session; | ||
| 1292 | u32 clk_domains = 0; | ||
| 1293 | u32 num_domains; | ||
| 1294 | u32 num_entries; | ||
| 1295 | u32 i; | ||
| 1296 | u16 freq_mhz; | ||
| 1297 | int err; | ||
| 1298 | int bit; | ||
| 1299 | |||
| 1300 | nvgpu_log_fn(g, " "); | ||
| 1301 | |||
| 1302 | if (!session) | ||
| 1303 | return -EINVAL; | ||
| 1304 | |||
| 1305 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
| 1306 | num_domains = hweight_long(clk_domains); | ||
| 1307 | |||
| 1308 | if (!args->flags) { | ||
| 1309 | if (!args->num_entries) { | ||
| 1310 | args->num_entries = num_domains; | ||
| 1311 | return 0; | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | if (args->num_entries < num_domains) | ||
| 1315 | return -EINVAL; | ||
| 1316 | |||
| 1317 | args->num_entries = 0; | ||
| 1318 | num_entries = num_domains; | ||
| 1319 | |||
| 1320 | } else { | ||
| 1321 | if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) | ||
| 1322 | return -EINVAL; | ||
| 1323 | |||
| 1324 | num_entries = args->num_entries; | ||
| 1325 | if (num_entries > num_domains * 3) | ||
| 1326 | return -EINVAL; | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
| 1330 | (uintptr_t)args->clk_info_entries; | ||
| 1331 | |||
| 1332 | for (i = 0; i < num_entries; i++, entry++) { | ||
| 1333 | |||
| 1334 | if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { | ||
| 1335 | if (copy_from_user(&clk_info, (void __user *)entry, | ||
| 1336 | sizeof(clk_info))) | ||
| 1337 | return -EFAULT; | ||
| 1338 | } else { | ||
| 1339 | bit = ffs(clk_domains) - 1; | ||
| 1340 | clk_info.clk_domain = bit; | ||
| 1341 | clk_domains &= ~BIT(bit); | ||
| 1342 | clk_info.clk_type = args->clk_type; | ||
| 1343 | } | ||
| 1344 | |||
| 1345 | nvgpu_speculation_barrier(); | ||
| 1346 | switch (clk_info.clk_type) { | ||
| 1347 | case NVGPU_GPU_CLK_TYPE_TARGET: | ||
| 1348 | err = nvgpu_clk_arb_get_session_target_mhz(session, | ||
| 1349 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
| 1350 | &freq_mhz); | ||
| 1351 | break; | ||
| 1352 | case NVGPU_GPU_CLK_TYPE_ACTUAL: | ||
| 1353 | err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, | ||
| 1354 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
| 1355 | &freq_mhz); | ||
| 1356 | break; | ||
| 1357 | case NVGPU_GPU_CLK_TYPE_EFFECTIVE: | ||
| 1358 | err = nvgpu_clk_arb_get_arbiter_effective_mhz(g, | ||
| 1359 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
| 1360 | &freq_mhz); | ||
| 1361 | break; | ||
| 1362 | default: | ||
| 1363 | freq_mhz = 0; | ||
| 1364 | err = -EINVAL; | ||
| 1365 | break; | ||
| 1366 | } | ||
| 1367 | if (err) | ||
| 1368 | return err; | ||
| 1369 | |||
| 1370 | clk_info.flags = 0; | ||
| 1371 | clk_info.freq_hz = MHZ_TO_HZ(freq_mhz); | ||
| 1372 | |||
| 1373 | err = copy_to_user((void __user *)entry, &clk_info, | ||
| 1374 | sizeof(clk_info)); | ||
| 1375 | if (err) | ||
| 1376 | return -EFAULT; | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | nvgpu_speculation_barrier(); | ||
| 1380 | args->num_entries = num_entries; | ||
| 1381 | |||
| 1382 | return 0; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | static int nvgpu_gpu_get_event_fd(struct gk20a *g, | ||
| 1386 | struct gk20a_ctrl_priv *priv, | ||
| 1387 | struct nvgpu_gpu_get_event_fd_args *args) | ||
| 1388 | { | ||
| 1389 | struct nvgpu_clk_session *session = priv->clk_session; | ||
| 1390 | |||
| 1391 | nvgpu_log_fn(g, " "); | ||
| 1392 | |||
| 1393 | if (!session) | ||
| 1394 | return -EINVAL; | ||
| 1395 | |||
| 1396 | return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd, | ||
| 1397 | args->flags); | ||
| 1398 | } | ||
| 1399 | |||
| 1400 | static int nvgpu_gpu_get_voltage(struct gk20a *g, | ||
| 1401 | struct nvgpu_gpu_get_voltage_args *args) | ||
| 1402 | { | ||
| 1403 | int err = -EINVAL; | ||
| 1404 | |||
| 1405 | nvgpu_log_fn(g, " "); | ||
| 1406 | |||
| 1407 | if (args->reserved) | ||
| 1408 | return -EINVAL; | ||
| 1409 | |||
| 1410 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE)) | ||
| 1411 | return -EINVAL; | ||
| 1412 | |||
| 1413 | err = gk20a_busy(g); | ||
| 1414 | if (err) | ||
| 1415 | return err; | ||
| 1416 | |||
| 1417 | nvgpu_speculation_barrier(); | ||
| 1418 | switch (args->which) { | ||
| 1419 | case NVGPU_GPU_VOLTAGE_CORE: | ||
| 1420 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); | ||
| 1421 | break; | ||
| 1422 | case NVGPU_GPU_VOLTAGE_SRAM: | ||
| 1423 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage); | ||
| 1424 | break; | ||
| 1425 | case NVGPU_GPU_VOLTAGE_BUS: | ||
| 1426 | err = pmgr_pwr_devices_get_voltage(g, &args->voltage); | ||
| 1427 | break; | ||
| 1428 | default: | ||
| 1429 | err = -EINVAL; | ||
| 1430 | } | ||
| 1431 | |||
| 1432 | gk20a_idle(g); | ||
| 1433 | |||
| 1434 | return err; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | static int nvgpu_gpu_get_current(struct gk20a *g, | ||
| 1438 | struct nvgpu_gpu_get_current_args *args) | ||
| 1439 | { | ||
| 1440 | int err; | ||
| 1441 | |||
| 1442 | nvgpu_log_fn(g, " "); | ||
| 1443 | |||
| 1444 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
| 1445 | return -EINVAL; | ||
| 1446 | |||
| 1447 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT)) | ||
| 1448 | return -EINVAL; | ||
| 1449 | |||
| 1450 | err = gk20a_busy(g); | ||
| 1451 | if (err) | ||
| 1452 | return err; | ||
| 1453 | |||
| 1454 | err = pmgr_pwr_devices_get_current(g, &args->currnt); | ||
| 1455 | |||
| 1456 | gk20a_idle(g); | ||
| 1457 | |||
| 1458 | return err; | ||
| 1459 | } | ||
| 1460 | |||
| 1461 | static int nvgpu_gpu_get_power(struct gk20a *g, | ||
| 1462 | struct nvgpu_gpu_get_power_args *args) | ||
| 1463 | { | ||
| 1464 | int err; | ||
| 1465 | |||
| 1466 | nvgpu_log_fn(g, " "); | ||
| 1467 | |||
| 1468 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
| 1469 | return -EINVAL; | ||
| 1470 | |||
| 1471 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER)) | ||
| 1472 | return -EINVAL; | ||
| 1473 | |||
| 1474 | err = gk20a_busy(g); | ||
| 1475 | if (err) | ||
| 1476 | return err; | ||
| 1477 | |||
| 1478 | err = pmgr_pwr_devices_get_power(g, &args->power); | ||
| 1479 | |||
| 1480 | gk20a_idle(g); | ||
| 1481 | |||
| 1482 | return err; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | static int nvgpu_gpu_get_temperature(struct gk20a *g, | ||
| 1486 | struct nvgpu_gpu_get_temperature_args *args) | ||
| 1487 | { | ||
| 1488 | int err; | ||
| 1489 | u32 temp_f24_8; | ||
| 1490 | |||
| 1491 | nvgpu_log_fn(g, " "); | ||
| 1492 | |||
| 1493 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
| 1494 | return -EINVAL; | ||
| 1495 | |||
| 1496 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE)) | ||
| 1497 | return -EINVAL; | ||
| 1498 | |||
| 1499 | if (!g->ops.therm.get_internal_sensor_curr_temp) | ||
| 1500 | return -EINVAL; | ||
| 1501 | |||
| 1502 | err = gk20a_busy(g); | ||
| 1503 | if (err) | ||
| 1504 | return err; | ||
| 1505 | |||
| 1506 | err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8); | ||
| 1507 | |||
| 1508 | gk20a_idle(g); | ||
| 1509 | |||
| 1510 | args->temp_f24_8 = (s32)temp_f24_8; | ||
| 1511 | |||
| 1512 | return err; | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, | ||
| 1516 | struct nvgpu_gpu_set_therm_alert_limit_args *args) | ||
| 1517 | { | ||
| 1518 | int err; | ||
| 1519 | |||
| 1520 | nvgpu_log_fn(g, " "); | ||
| 1521 | |||
| 1522 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
| 1523 | return -EINVAL; | ||
| 1524 | |||
| 1525 | if (!g->ops.therm.configure_therm_alert) | ||
| 1526 | return -EINVAL; | ||
| 1527 | |||
| 1528 | err = gk20a_busy(g); | ||
| 1529 | if (err) | ||
| 1530 | return err; | ||
| 1531 | |||
| 1532 | err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8); | ||
| 1533 | |||
| 1534 | gk20a_idle(g); | ||
| 1535 | |||
| 1536 | return err; | ||
| 1537 | } | ||
| 1538 | |||
| 1539 | static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch, | ||
| 1540 | u32 flags) | ||
| 1541 | { | ||
| 1542 | int err = 0; | ||
| 1543 | bool allow; | ||
| 1544 | bool disallow; | ||
| 1545 | |||
| 1546 | allow = flags & | ||
| 1547 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING; | ||
| 1548 | |||
| 1549 | disallow = flags & | ||
| 1550 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING; | ||
| 1551 | |||
| 1552 | /* Can't be both at the same time */ | ||
| 1553 | if (allow && disallow) | ||
| 1554 | return -EINVAL; | ||
| 1555 | |||
| 1556 | /* Nothing to do */ | ||
| 1557 | if (!allow && !disallow) | ||
| 1558 | return 0; | ||
| 1559 | |||
| 1560 | /* | ||
| 1561 | * Moving into explicit idle or back from it? A call that doesn't | ||
| 1562 | * change the status is a no-op. | ||
| 1563 | */ | ||
| 1564 | if (!ch->deterministic_railgate_allowed && | ||
| 1565 | allow) { | ||
| 1566 | gk20a_idle(ch->g); | ||
| 1567 | } else if (ch->deterministic_railgate_allowed && | ||
| 1568 | !allow) { | ||
| 1569 | err = gk20a_busy(ch->g); | ||
| 1570 | if (err) { | ||
| 1571 | nvgpu_warn(ch->g, | ||
| 1572 | "cannot busy to restore deterministic ch"); | ||
| 1573 | return err; | ||
| 1574 | } | ||
| 1575 | } | ||
| 1576 | ch->deterministic_railgate_allowed = allow; | ||
| 1577 | |||
| 1578 | return err; | ||
| 1579 | } | ||
| 1580 | |||
| 1581 | static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags) | ||
| 1582 | { | ||
| 1583 | if (!ch->deterministic) | ||
| 1584 | return -EINVAL; | ||
| 1585 | |||
| 1586 | return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags); | ||
| 1587 | } | ||
| 1588 | |||
| 1589 | static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, | ||
| 1590 | struct nvgpu_gpu_set_deterministic_opts_args *args) | ||
| 1591 | { | ||
| 1592 | int __user *user_channels; | ||
| 1593 | u32 i = 0; | ||
| 1594 | int err = 0; | ||
| 1595 | |||
| 1596 | nvgpu_log_fn(g, " "); | ||
| 1597 | |||
| 1598 | user_channels = (int __user *)(uintptr_t)args->channels; | ||
| 1599 | |||
| 1600 | /* Upper limit; prevent holding deterministic_busy for long */ | ||
| 1601 | if (args->num_channels > g->fifo.num_channels) { | ||
| 1602 | err = -EINVAL; | ||
| 1603 | goto out; | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | /* Trivial sanity check first */ | ||
| 1607 | if (!access_ok(VERIFY_READ, user_channels, | ||
| 1608 | args->num_channels * sizeof(int))) { | ||
| 1609 | err = -EFAULT; | ||
| 1610 | goto out; | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
| 1614 | |||
| 1615 | /* note: we exit at the first failure */ | ||
| 1616 | for (; i < args->num_channels; i++) { | ||
| 1617 | int ch_fd = 0; | ||
| 1618 | struct channel_gk20a *ch; | ||
| 1619 | |||
| 1620 | if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) { | ||
| 1621 | /* User raced with above access_ok */ | ||
| 1622 | err = -EFAULT; | ||
| 1623 | break; | ||
| 1624 | } | ||
| 1625 | |||
| 1626 | ch = gk20a_get_channel_from_file(ch_fd); | ||
| 1627 | if (!ch) { | ||
| 1628 | err = -EINVAL; | ||
| 1629 | break; | ||
| 1630 | } | ||
| 1631 | |||
| 1632 | err = nvgpu_gpu_set_deterministic_ch(ch, args->flags); | ||
| 1633 | |||
| 1634 | gk20a_channel_put(ch); | ||
| 1635 | |||
| 1636 | if (err) | ||
| 1637 | break; | ||
| 1638 | } | ||
| 1639 | |||
| 1640 | nvgpu_speculation_barrier(); | ||
| 1641 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
| 1642 | |||
| 1643 | out: | ||
| 1644 | args->num_channels = i; | ||
| 1645 | return err; | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
| 1649 | { | ||
| 1650 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
| 1651 | struct gk20a *g = priv->g; | ||
| 1652 | struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; | ||
| 1653 | struct nvgpu_gpu_zcull_get_info_args *get_info_args; | ||
| 1654 | struct nvgpu_gpu_zbc_set_table_args *set_table_args; | ||
| 1655 | struct nvgpu_gpu_zbc_query_table_args *query_table_args; | ||
| 1656 | u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE]; | ||
| 1657 | struct gr_zcull_info *zcull_info; | ||
| 1658 | struct zbc_entry *zbc_val; | ||
| 1659 | struct zbc_query_params *zbc_tbl; | ||
| 1660 | int i, err = 0; | ||
| 1661 | |||
| 1662 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
| 1663 | |||
| 1664 | if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) || | ||
| 1665 | (_IOC_NR(cmd) == 0) || | ||
| 1666 | (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) || | ||
| 1667 | (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE)) | ||
| 1668 | return -EINVAL; | ||
| 1669 | |||
| 1670 | memset(buf, 0, sizeof(buf)); | ||
| 1671 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 1672 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
| 1673 | return -EFAULT; | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | if (!g->sw_ready) { | ||
| 1677 | err = gk20a_busy(g); | ||
| 1678 | if (err) | ||
| 1679 | return err; | ||
| 1680 | |||
| 1681 | gk20a_idle(g); | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | nvgpu_speculation_barrier(); | ||
| 1685 | switch (cmd) { | ||
| 1686 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: | ||
| 1687 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; | ||
| 1688 | |||
| 1689 | get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); | ||
| 1690 | |||
| 1691 | break; | ||
| 1692 | case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: | ||
| 1693 | get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf; | ||
| 1694 | |||
| 1695 | memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args)); | ||
| 1696 | |||
| 1697 | zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info)); | ||
| 1698 | if (zcull_info == NULL) | ||
| 1699 | return -ENOMEM; | ||
| 1700 | |||
| 1701 | err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); | ||
| 1702 | if (err) { | ||
| 1703 | nvgpu_kfree(g, zcull_info); | ||
| 1704 | break; | ||
| 1705 | } | ||
| 1706 | |||
| 1707 | get_info_args->width_align_pixels = zcull_info->width_align_pixels; | ||
| 1708 | get_info_args->height_align_pixels = zcull_info->height_align_pixels; | ||
| 1709 | get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; | ||
| 1710 | get_info_args->aliquot_total = zcull_info->aliquot_total; | ||
| 1711 | get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; | ||
| 1712 | get_info_args->region_header_size = zcull_info->region_header_size; | ||
| 1713 | get_info_args->subregion_header_size = zcull_info->subregion_header_size; | ||
| 1714 | get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; | ||
| 1715 | get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; | ||
| 1716 | get_info_args->subregion_count = zcull_info->subregion_count; | ||
| 1717 | |||
| 1718 | nvgpu_kfree(g, zcull_info); | ||
| 1719 | break; | ||
| 1720 | case NVGPU_GPU_IOCTL_ZBC_SET_TABLE: | ||
| 1721 | set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf; | ||
| 1722 | |||
| 1723 | zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry)); | ||
| 1724 | if (zbc_val == NULL) | ||
| 1725 | return -ENOMEM; | ||
| 1726 | |||
| 1727 | zbc_val->format = set_table_args->format; | ||
| 1728 | zbc_val->type = set_table_args->type; | ||
| 1729 | |||
| 1730 | nvgpu_speculation_barrier(); | ||
| 1731 | switch (zbc_val->type) { | ||
| 1732 | case GK20A_ZBC_TYPE_COLOR: | ||
| 1733 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
| 1734 | zbc_val->color_ds[i] = set_table_args->color_ds[i]; | ||
| 1735 | zbc_val->color_l2[i] = set_table_args->color_l2[i]; | ||
| 1736 | } | ||
| 1737 | break; | ||
| 1738 | case GK20A_ZBC_TYPE_DEPTH: | ||
| 1739 | case T19X_ZBC: | ||
| 1740 | zbc_val->depth = set_table_args->depth; | ||
| 1741 | break; | ||
| 1742 | default: | ||
| 1743 | err = -EINVAL; | ||
| 1744 | } | ||
| 1745 | |||
| 1746 | if (!err) { | ||
| 1747 | err = gk20a_busy(g); | ||
| 1748 | if (!err) { | ||
| 1749 | err = g->ops.gr.zbc_set_table(g, &g->gr, | ||
| 1750 | zbc_val); | ||
| 1751 | gk20a_idle(g); | ||
| 1752 | } | ||
| 1753 | } | ||
| 1754 | |||
| 1755 | if (zbc_val) | ||
| 1756 | nvgpu_kfree(g, zbc_val); | ||
| 1757 | break; | ||
| 1758 | case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE: | ||
| 1759 | query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf; | ||
| 1760 | |||
| 1761 | zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params)); | ||
| 1762 | if (zbc_tbl == NULL) | ||
| 1763 | return -ENOMEM; | ||
| 1764 | |||
| 1765 | zbc_tbl->type = query_table_args->type; | ||
| 1766 | zbc_tbl->index_size = query_table_args->index_size; | ||
| 1767 | |||
| 1768 | err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl); | ||
| 1769 | |||
| 1770 | if (!err) { | ||
| 1771 | switch (zbc_tbl->type) { | ||
| 1772 | case GK20A_ZBC_TYPE_COLOR: | ||
| 1773 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
| 1774 | query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; | ||
| 1775 | query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; | ||
| 1776 | } | ||
| 1777 | break; | ||
| 1778 | case GK20A_ZBC_TYPE_DEPTH: | ||
| 1779 | case T19X_ZBC: | ||
| 1780 | query_table_args->depth = zbc_tbl->depth; | ||
| 1781 | break; | ||
| 1782 | case GK20A_ZBC_TYPE_INVALID: | ||
| 1783 | query_table_args->index_size = zbc_tbl->index_size; | ||
| 1784 | break; | ||
| 1785 | default: | ||
| 1786 | err = -EINVAL; | ||
| 1787 | } | ||
| 1788 | if (!err) { | ||
| 1789 | query_table_args->format = zbc_tbl->format; | ||
| 1790 | query_table_args->ref_cnt = zbc_tbl->ref_cnt; | ||
| 1791 | } | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | if (zbc_tbl) | ||
| 1795 | nvgpu_kfree(g, zbc_tbl); | ||
| 1796 | break; | ||
| 1797 | |||
| 1798 | case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS: | ||
| 1799 | err = gk20a_ctrl_ioctl_gpu_characteristics( | ||
| 1800 | g, (struct nvgpu_gpu_get_characteristics *)buf); | ||
| 1801 | break; | ||
| 1802 | case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ: | ||
| 1803 | err = gk20a_ctrl_prepare_compressible_read(g, | ||
| 1804 | (struct nvgpu_gpu_prepare_compressible_read_args *)buf); | ||
| 1805 | break; | ||
| 1806 | case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE: | ||
| 1807 | err = gk20a_ctrl_mark_compressible_write(g, | ||
| 1808 | (struct nvgpu_gpu_mark_compressible_write_args *)buf); | ||
| 1809 | break; | ||
| 1810 | case NVGPU_GPU_IOCTL_ALLOC_AS: | ||
| 1811 | err = gk20a_ctrl_alloc_as(g, | ||
| 1812 | (struct nvgpu_alloc_as_args *)buf); | ||
| 1813 | break; | ||
| 1814 | case NVGPU_GPU_IOCTL_OPEN_TSG: | ||
| 1815 | err = gk20a_ctrl_open_tsg(g, | ||
| 1816 | (struct nvgpu_gpu_open_tsg_args *)buf); | ||
| 1817 | break; | ||
| 1818 | case NVGPU_GPU_IOCTL_GET_TPC_MASKS: | ||
| 1819 | err = gk20a_ctrl_get_tpc_masks(g, | ||
| 1820 | (struct nvgpu_gpu_get_tpc_masks_args *)buf); | ||
| 1821 | break; | ||
| 1822 | case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS: | ||
| 1823 | err = gk20a_ctrl_get_fbp_l2_masks(g, | ||
| 1824 | (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf); | ||
| 1825 | break; | ||
| 1826 | case NVGPU_GPU_IOCTL_OPEN_CHANNEL: | ||
| 1827 | /* this arg type here, but ..gpu_open_channel_args in nvgpu.h | ||
| 1828 | * for consistency - they are the same */ | ||
| 1829 | err = gk20a_channel_open_ioctl(g, | ||
| 1830 | (struct nvgpu_channel_open_args *)buf); | ||
| 1831 | break; | ||
| 1832 | case NVGPU_GPU_IOCTL_FLUSH_L2: | ||
| 1833 | err = nvgpu_gpu_ioctl_l2_fb_ops(g, | ||
| 1834 | (struct nvgpu_gpu_l2_fb_args *)buf); | ||
| 1835 | break; | ||
| 1836 | |||
| 1837 | case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: | ||
| 1838 | err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, | ||
| 1839 | (struct nvgpu_gpu_mmu_debug_mode_args *)buf); | ||
| 1840 | break; | ||
| 1841 | |||
| 1842 | case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: | ||
| 1843 | err = gr_gk20a_elpg_protected_call(g, | ||
| 1844 | nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); | ||
| 1845 | break; | ||
| 1846 | |||
| 1847 | case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND: | ||
| 1848 | err = nvgpu_gpu_ioctl_trigger_suspend(g); | ||
| 1849 | break; | ||
| 1850 | |||
| 1851 | case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: | ||
| 1852 | err = nvgpu_gpu_ioctl_wait_for_pause(g, | ||
| 1853 | (struct nvgpu_gpu_wait_pause_args *)buf); | ||
| 1854 | break; | ||
| 1855 | |||
| 1856 | case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE: | ||
| 1857 | err = nvgpu_gpu_ioctl_resume_from_pause(g); | ||
| 1858 | break; | ||
| 1859 | |||
| 1860 | case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS: | ||
| 1861 | err = nvgpu_gpu_ioctl_clear_sm_errors(g); | ||
| 1862 | break; | ||
| 1863 | |||
| 1864 | case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: | ||
| 1865 | err = nvgpu_gpu_ioctl_has_any_exception(g, | ||
| 1866 | (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); | ||
| 1867 | break; | ||
| 1868 | |||
| 1869 | case NVGPU_GPU_IOCTL_NUM_VSMS: | ||
| 1870 | err = gk20a_ctrl_get_num_vsms(g, | ||
| 1871 | (struct nvgpu_gpu_num_vsms *)buf); | ||
| 1872 | break; | ||
| 1873 | case NVGPU_GPU_IOCTL_VSMS_MAPPING: | ||
| 1874 | err = gk20a_ctrl_vsm_mapping(g, | ||
| 1875 | (struct nvgpu_gpu_vsms_mapping *)buf); | ||
| 1876 | break; | ||
| 1877 | |||
| 1878 | case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO: | ||
| 1879 | err = nvgpu_gpu_get_cpu_time_correlation_info(g, | ||
| 1880 | (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf); | ||
| 1881 | break; | ||
| 1882 | |||
| 1883 | case NVGPU_GPU_IOCTL_GET_GPU_TIME: | ||
| 1884 | err = nvgpu_gpu_get_gpu_time(g, | ||
| 1885 | (struct nvgpu_gpu_get_gpu_time_args *)buf); | ||
| 1886 | break; | ||
| 1887 | |||
| 1888 | case NVGPU_GPU_IOCTL_GET_ENGINE_INFO: | ||
| 1889 | err = nvgpu_gpu_get_engine_info(g, | ||
| 1890 | (struct nvgpu_gpu_get_engine_info_args *)buf); | ||
| 1891 | break; | ||
| 1892 | |||
| 1893 | case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: | ||
| 1894 | err = nvgpu_gpu_alloc_vidmem(g, | ||
| 1895 | (struct nvgpu_gpu_alloc_vidmem_args *)buf); | ||
| 1896 | break; | ||
| 1897 | |||
| 1898 | case NVGPU_GPU_IOCTL_GET_MEMORY_STATE: | ||
| 1899 | err = nvgpu_gpu_get_memory_state(g, | ||
| 1900 | (struct nvgpu_gpu_get_memory_state_args *)buf); | ||
| 1901 | break; | ||
| 1902 | |||
| 1903 | case NVGPU_GPU_IOCTL_CLK_GET_RANGE: | ||
| 1904 | err = nvgpu_gpu_clk_get_range(g, priv, | ||
| 1905 | (struct nvgpu_gpu_clk_range_args *)buf); | ||
| 1906 | break; | ||
| 1907 | |||
| 1908 | case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS: | ||
| 1909 | err = nvgpu_gpu_clk_get_vf_points(g, priv, | ||
| 1910 | (struct nvgpu_gpu_clk_vf_points_args *)buf); | ||
| 1911 | break; | ||
| 1912 | |||
| 1913 | case NVGPU_GPU_IOCTL_CLK_SET_INFO: | ||
| 1914 | err = nvgpu_gpu_clk_set_info(g, priv, | ||
| 1915 | (struct nvgpu_gpu_clk_set_info_args *)buf); | ||
| 1916 | break; | ||
| 1917 | |||
| 1918 | case NVGPU_GPU_IOCTL_CLK_GET_INFO: | ||
| 1919 | err = nvgpu_gpu_clk_get_info(g, priv, | ||
| 1920 | (struct nvgpu_gpu_clk_get_info_args *)buf); | ||
| 1921 | break; | ||
| 1922 | |||
| 1923 | case NVGPU_GPU_IOCTL_GET_EVENT_FD: | ||
| 1924 | err = nvgpu_gpu_get_event_fd(g, priv, | ||
| 1925 | (struct nvgpu_gpu_get_event_fd_args *)buf); | ||
| 1926 | break; | ||
| 1927 | |||
| 1928 | case NVGPU_GPU_IOCTL_GET_VOLTAGE: | ||
| 1929 | err = nvgpu_gpu_get_voltage(g, | ||
| 1930 | (struct nvgpu_gpu_get_voltage_args *)buf); | ||
| 1931 | break; | ||
| 1932 | |||
| 1933 | case NVGPU_GPU_IOCTL_GET_CURRENT: | ||
| 1934 | err = nvgpu_gpu_get_current(g, | ||
| 1935 | (struct nvgpu_gpu_get_current_args *)buf); | ||
| 1936 | break; | ||
| 1937 | |||
| 1938 | case NVGPU_GPU_IOCTL_GET_POWER: | ||
| 1939 | err = nvgpu_gpu_get_power(g, | ||
| 1940 | (struct nvgpu_gpu_get_power_args *)buf); | ||
| 1941 | break; | ||
| 1942 | |||
| 1943 | case NVGPU_GPU_IOCTL_GET_TEMPERATURE: | ||
| 1944 | err = nvgpu_gpu_get_temperature(g, | ||
| 1945 | (struct nvgpu_gpu_get_temperature_args *)buf); | ||
| 1946 | break; | ||
| 1947 | |||
| 1948 | case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT: | ||
| 1949 | err = nvgpu_gpu_set_therm_alert_limit(g, | ||
| 1950 | (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); | ||
| 1951 | break; | ||
| 1952 | |||
| 1953 | case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS: | ||
| 1954 | err = nvgpu_gpu_set_deterministic_opts(g, | ||
| 1955 | (struct nvgpu_gpu_set_deterministic_opts_args *)buf); | ||
| 1956 | break; | ||
| 1957 | |||
| 1958 | default: | ||
| 1959 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
| 1960 | err = -ENOTTY; | ||
| 1961 | break; | ||
| 1962 | } | ||
| 1963 | |||
| 1964 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 1965 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
| 1966 | |||
| 1967 | return err; | ||
| 1968 | } | ||
| 1969 | |||
| 1970 | static void usermode_vma_close(struct vm_area_struct *vma) | ||
| 1971 | { | ||
| 1972 | struct gk20a_ctrl_priv *priv = vma->vm_private_data; | ||
| 1973 | struct gk20a *g = priv->g; | ||
| 1974 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 1975 | |||
| 1976 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
| 1977 | priv->usermode_vma.vma = NULL; | ||
| 1978 | priv->usermode_vma.vma_mapped = false; | ||
| 1979 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
| 1980 | } | ||
| 1981 | |||
| 1982 | struct vm_operations_struct usermode_vma_ops = { | ||
| 1983 | /* no .open - we use VM_DONTCOPY and don't support fork */ | ||
| 1984 | .close = usermode_vma_close, | ||
| 1985 | }; | ||
| 1986 | |||
| 1987 | int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
| 1988 | { | ||
| 1989 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
| 1990 | struct gk20a *g = priv->g; | ||
| 1991 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 1992 | u64 addr; | ||
| 1993 | int err; | ||
| 1994 | |||
| 1995 | if (g->ops.fifo.usermode_base == NULL) | ||
| 1996 | return -ENOSYS; | ||
| 1997 | |||
| 1998 | if (priv->usermode_vma.vma != NULL) | ||
| 1999 | return -EBUSY; | ||
| 2000 | |||
| 2001 | if (vma->vm_end - vma->vm_start != SZ_4K) | ||
| 2002 | return -EINVAL; | ||
| 2003 | |||
| 2004 | if (vma->vm_pgoff != 0UL) | ||
| 2005 | return -EINVAL; | ||
| 2006 | |||
| 2007 | addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g); | ||
| 2008 | |||
| 2009 | /* Sync with poweron/poweroff, and require valid regs */ | ||
| 2010 | err = gk20a_busy(g); | ||
| 2011 | if (err) { | ||
| 2012 | return err; | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
| 2016 | |||
| 2017 | vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | | ||
| 2018 | VM_DONTDUMP | VM_PFNMAP; | ||
| 2019 | vma->vm_ops = &usermode_vma_ops; | ||
| 2020 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
| 2021 | |||
| 2022 | err = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, | ||
| 2023 | vma->vm_end - vma->vm_start, vma->vm_page_prot); | ||
| 2024 | if (!err) { | ||
| 2025 | priv->usermode_vma.vma = vma; | ||
| 2026 | priv->usermode_vma.flags = vma->vm_flags; | ||
| 2027 | vma->vm_private_data = priv; | ||
| 2028 | priv->usermode_vma.vma_mapped = true; | ||
| 2029 | } | ||
| 2030 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
| 2031 | |||
| 2032 | gk20a_idle(g); | ||
| 2033 | |||
| 2034 | return err; | ||
| 2035 | } | ||
| 2036 | |||
| 2037 | static void alter_usermode_mapping(struct gk20a *g, | ||
| 2038 | struct gk20a_ctrl_priv *priv, | ||
| 2039 | bool poweroff) | ||
| 2040 | { | ||
| 2041 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 2042 | struct vm_area_struct *vma = priv->usermode_vma.vma; | ||
| 2043 | bool vma_mapped = priv->usermode_vma.vma_mapped; | ||
| 2044 | u64 addr; | ||
| 2045 | int err; | ||
| 2046 | |||
| 2047 | if (!vma) { | ||
| 2048 | /* Nothing to do - no mmap called */ | ||
| 2049 | return; | ||
| 2050 | } | ||
| 2051 | |||
| 2052 | addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g); | ||
| 2053 | |||
| 2054 | down_write(&vma->vm_mm->mmap_sem); | ||
| 2055 | |||
| 2056 | /* | ||
| 2057 | * This is a no-op for the below cases | ||
| 2058 | * a) poweroff and !vma_mapped - > do nothing as no map exists | ||
| 2059 | * b) !poweroff and vmap_mapped -> do nothing as already mapped | ||
| 2060 | */ | ||
| 2061 | if (poweroff && vma_mapped) { | ||
| 2062 | err = zap_vma_ptes(vma, vma->vm_start, SZ_4K); | ||
| 2063 | if (err == 0) { | ||
| 2064 | vma->vm_flags = VM_NONE; | ||
| 2065 | priv->usermode_vma.vma_mapped = false; | ||
| 2066 | } else { | ||
| 2067 | nvgpu_err(g, "can't remove usermode mapping"); | ||
| 2068 | } | ||
| 2069 | } else if (!poweroff && !vma_mapped) { | ||
| 2070 | vma->vm_flags = priv->usermode_vma.flags; | ||
| 2071 | err = io_remap_pfn_range(vma, vma->vm_start, | ||
| 2072 | addr >> PAGE_SHIFT, | ||
| 2073 | SZ_4K, vma->vm_page_prot); | ||
| 2074 | if (err != 0) { | ||
| 2075 | nvgpu_err(g, "can't restore usermode mapping"); | ||
| 2076 | vma->vm_flags = VM_NONE; | ||
| 2077 | } else { | ||
| 2078 | priv->usermode_vma.vma_mapped = true; | ||
| 2079 | } | ||
| 2080 | } | ||
| 2081 | |||
| 2082 | up_write(&vma->vm_mm->mmap_sem); | ||
| 2083 | } | ||
| 2084 | |||
| 2085 | static void alter_usermode_mappings(struct gk20a *g, bool poweroff) | ||
| 2086 | { | ||
| 2087 | struct gk20a_ctrl_priv *priv; | ||
| 2088 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 2089 | |||
| 2090 | nvgpu_mutex_acquire(&l->ctrl.privs_lock); | ||
| 2091 | nvgpu_list_for_each_entry(priv, &l->ctrl.privs, | ||
| 2092 | gk20a_ctrl_priv, list) { | ||
| 2093 | alter_usermode_mapping(g, priv, poweroff); | ||
| 2094 | } | ||
| 2095 | nvgpu_mutex_release(&l->ctrl.privs_lock); | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | void nvgpu_hide_usermode_for_poweroff(struct gk20a *g) | ||
| 2099 | { | ||
| 2100 | alter_usermode_mappings(g, true); | ||
| 2101 | } | ||
| 2102 | |||
| 2103 | void nvgpu_restore_usermode_for_poweron(struct gk20a *g) | ||
| 2104 | { | ||
| 2105 | alter_usermode_mappings(g, false); | ||
| 2106 | } | ||
diff --git a/include/os/linux/ioctl_ctrl.h b/include/os/linux/ioctl_ctrl.h new file mode 100644 index 0000000..3e1f798 --- /dev/null +++ b/include/os/linux/ioctl_ctrl.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #ifndef __NVGPU_IOCTL_CTRL_H__ | ||
| 17 | #define __NVGPU_IOCTL_CTRL_H__ | ||
| 18 | |||
| 19 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); | ||
| 20 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); | ||
| 21 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
| 22 | int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma); | ||
| 23 | |||
| 24 | void nvgpu_hide_usermode_for_poweroff(struct gk20a *g); | ||
| 25 | void nvgpu_restore_usermode_for_poweron(struct gk20a *g); | ||
| 26 | |||
| 27 | #endif | ||
diff --git a/include/os/linux/ioctl_dbg.c b/include/os/linux/ioctl_dbg.c new file mode 100644 index 0000000..b5a1071 --- /dev/null +++ b/include/os/linux/ioctl_dbg.c | |||
| @@ -0,0 +1,2210 @@ | |||
| 1 | /* | ||
| 2 | * Tegra GK20A GPU Debugger/Profiler Driver | ||
| 3 | * | ||
| 4 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/fs.h> | ||
| 20 | #include <linux/file.h> | ||
| 21 | #include <linux/cdev.h> | ||
| 22 | #include <linux/uaccess.h> | ||
| 23 | #include <linux/dma-buf.h> | ||
| 24 | #include <linux/poll.h> | ||
| 25 | #include <uapi/linux/nvgpu.h> | ||
| 26 | |||
| 27 | #include <nvgpu/kmem.h> | ||
| 28 | #include <nvgpu/log.h> | ||
| 29 | #include <nvgpu/vm.h> | ||
| 30 | #include <nvgpu/atomic.h> | ||
| 31 | #include <nvgpu/cond.h> | ||
| 32 | #include <nvgpu/utils.h> | ||
| 33 | #include <nvgpu/gk20a.h> | ||
| 34 | #include <nvgpu/channel.h> | ||
| 35 | #include <nvgpu/tsg.h> | ||
| 36 | |||
| 37 | #include <nvgpu/linux/vm.h> | ||
| 38 | |||
| 39 | #include "gk20a/gr_gk20a.h" | ||
| 40 | #include "gk20a/regops_gk20a.h" | ||
| 41 | #include "gk20a/dbg_gpu_gk20a.h" | ||
| 42 | #include "os_linux.h" | ||
| 43 | #include "platform_gk20a.h" | ||
| 44 | #include "ioctl_dbg.h" | ||
| 45 | #include "ioctl_channel.h" | ||
| 46 | #include "dmabuf_vidmem.h" | ||
| 47 | |||
| 48 | struct dbg_session_gk20a_linux { | ||
| 49 | struct device *dev; | ||
| 50 | struct dbg_session_gk20a dbg_s; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct dbg_session_channel_data_linux { | ||
| 54 | /* | ||
| 55 | * We have to keep a ref to the _file_, not the channel, because | ||
| 56 | * close(channel_fd) is synchronous and would deadlock if we had an | ||
| 57 | * open debug session fd holding a channel ref at that time. Holding a | ||
| 58 | * ref to the file makes close(channel_fd) just drop a kernel ref to | ||
| 59 | * the file; the channel will close when the last file ref is dropped. | ||
| 60 | */ | ||
| 61 | struct file *ch_f; | ||
| 62 | struct dbg_session_channel_data ch_data; | ||
| 63 | }; | ||
| 64 | /* turn seriously unwieldy names -> something shorter */ | ||
| 65 | #define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x | ||
| 66 | |||
| 67 | /* silly allocator - just increment id */ | ||
| 68 | static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); | ||
| 69 | static int generate_unique_id(void) | ||
| 70 | { | ||
| 71 | return nvgpu_atomic_add_return(1, &unique_id); | ||
| 72 | } | ||
| 73 | |||
| 74 | static int alloc_profiler(struct gk20a *g, | ||
| 75 | struct dbg_profiler_object_data **_prof) | ||
| 76 | { | ||
| 77 | struct dbg_profiler_object_data *prof; | ||
| 78 | *_prof = NULL; | ||
| 79 | |||
| 80 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 81 | |||
| 82 | prof = nvgpu_kzalloc(g, sizeof(*prof)); | ||
| 83 | if (!prof) | ||
| 84 | return -ENOMEM; | ||
| 85 | |||
| 86 | prof->prof_handle = generate_unique_id(); | ||
| 87 | *_prof = prof; | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux) | ||
| 92 | { | ||
| 93 | struct dbg_session_gk20a_linux *dbg_s_linux; | ||
| 94 | *_dbg_s_linux = NULL; | ||
| 95 | |||
| 96 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 97 | |||
| 98 | dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux)); | ||
| 99 | if (!dbg_s_linux) | ||
| 100 | return -ENOMEM; | ||
| 101 | |||
| 102 | dbg_s_linux->dbg_s.id = generate_unique_id(); | ||
| 103 | *_dbg_s_linux = dbg_s_linux; | ||
| 104 | return 0; | ||
| 105 | } | ||
| 106 | |||
| 107 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); | ||
| 108 | |||
| 109 | static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
| 110 | struct nvgpu_dbg_gpu_exec_reg_ops_args *args); | ||
| 111 | |||
| 112 | static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
| 113 | struct nvgpu_dbg_gpu_powergate_args *args); | ||
| 114 | |||
| 115 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
| 116 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); | ||
| 117 | |||
| 118 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
| 119 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); | ||
| 120 | |||
| 121 | static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode( | ||
| 122 | struct dbg_session_gk20a *dbg_s, | ||
| 123 | struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args); | ||
| 124 | |||
| 125 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | ||
| 126 | struct dbg_session_gk20a *dbg_s, | ||
| 127 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); | ||
| 128 | |||
| 129 | static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s, | ||
| 130 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); | ||
| 131 | |||
| 132 | static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux, | ||
| 133 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); | ||
| 134 | |||
| 135 | static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | ||
| 136 | struct nvgpu_dbg_gpu_profiler_reserve_args *args); | ||
| 137 | |||
| 138 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
| 139 | struct nvgpu_dbg_gpu_perfbuf_map_args *args); | ||
| 140 | |||
| 141 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
| 142 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); | ||
| 143 | |||
| 144 | static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | ||
| 145 | int timeout_mode); | ||
| 146 | |||
| 147 | static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, | ||
| 148 | u32 profiler_handle); | ||
| 149 | |||
| 150 | static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s); | ||
| 151 | |||
| 152 | static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s); | ||
| 153 | |||
| 154 | static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, | ||
| 155 | u32 profiler_handle); | ||
| 156 | |||
| 157 | static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s); | ||
| 158 | |||
| 159 | static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | ||
| 160 | struct file *filp, bool is_profiler); | ||
| 161 | |||
| 162 | static int nvgpu_set_sm_exception_type_mask_locked( | ||
| 163 | struct dbg_session_gk20a *dbg_s, | ||
| 164 | u32 exception_mask); | ||
| 165 | |||
| 166 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) | ||
| 167 | { | ||
| 168 | unsigned int mask = 0; | ||
| 169 | struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data; | ||
| 170 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
| 171 | struct gk20a *g = dbg_s->g; | ||
| 172 | |||
| 173 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 174 | |||
| 175 | poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait); | ||
| 176 | |||
| 177 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
| 178 | |||
| 179 | if (dbg_s->dbg_events.events_enabled && | ||
| 180 | dbg_s->dbg_events.num_pending_events > 0) { | ||
| 181 | nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d", | ||
| 182 | dbg_s->id); | ||
| 183 | nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", | ||
| 184 | dbg_s->dbg_events.num_pending_events); | ||
| 185 | mask = (POLLPRI | POLLIN); | ||
| 186 | } | ||
| 187 | |||
| 188 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
| 189 | |||
| 190 | return mask; | ||
| 191 | } | ||
| 192 | |||
| 193 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | ||
| 194 | { | ||
| 195 | struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data; | ||
| 196 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
| 197 | struct gk20a *g = dbg_s->g; | ||
| 198 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
| 199 | |||
| 200 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name); | ||
| 201 | |||
| 202 | /* unbind channels */ | ||
| 203 | dbg_unbind_all_channels_gk20a(dbg_s); | ||
| 204 | |||
| 205 | /* Powergate/Timeout enable is called here as possibility of dbg_session | ||
| 206 | * which called powergate/timeout disable ioctl, to be killed without | ||
| 207 | * calling powergate/timeout enable ioctl | ||
| 208 | */ | ||
| 209 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 210 | if (dbg_s->is_pg_disabled) { | ||
| 211 | nvgpu_set_powergate_locked(dbg_s, false); | ||
| 212 | } | ||
| 213 | nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); | ||
| 214 | |||
| 215 | /* If this session owned the perf buffer, release it */ | ||
| 216 | if (g->perfbuf.owner == dbg_s) | ||
| 217 | gk20a_perfbuf_release_locked(g, g->perfbuf.offset); | ||
| 218 | |||
| 219 | /* Per-context profiler objects were released when we called | ||
| 220 | * dbg_unbind_all_channels. We could still have global ones. | ||
| 221 | */ | ||
| 222 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
| 223 | dbg_profiler_object_data, prof_obj_entry) { | ||
| 224 | if (prof_obj->session_id == dbg_s->id) { | ||
| 225 | if (prof_obj->has_reservation) | ||
| 226 | g->ops.dbg_session_ops. | ||
| 227 | release_profiler_reservation(dbg_s, prof_obj); | ||
| 228 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
| 229 | nvgpu_kfree(g, prof_obj); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 233 | |||
| 234 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | ||
| 235 | nvgpu_mutex_destroy(&dbg_s->ioctl_lock); | ||
| 236 | |||
| 237 | nvgpu_kfree(g, dbg_session_linux); | ||
| 238 | gk20a_put(g); | ||
| 239 | |||
| 240 | return 0; | ||
| 241 | } | ||
| 242 | |||
| 243 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) | ||
| 244 | { | ||
| 245 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
| 246 | struct nvgpu_os_linux, prof.cdev); | ||
| 247 | struct gk20a *g = &l->g; | ||
| 248 | |||
| 249 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 250 | return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); | ||
| 251 | } | ||
| 252 | |||
| 253 | static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, | ||
| 254 | struct nvgpu_dbg_gpu_timeout_args *args) | ||
| 255 | { | ||
| 256 | int err; | ||
| 257 | struct gk20a *g = dbg_s->g; | ||
| 258 | |||
| 259 | nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable); | ||
| 260 | |||
| 261 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 262 | err = nvgpu_dbg_timeout_enable(dbg_s, args->enable); | ||
| 263 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 264 | |||
| 265 | return err; | ||
| 266 | } | ||
| 267 | |||
| 268 | static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | ||
| 269 | struct dbg_session_gk20a *dbg_s, | ||
| 270 | struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) | ||
| 271 | { | ||
| 272 | struct gk20a *g = dbg_s->g; | ||
| 273 | struct gr_gk20a *gr = &g->gr; | ||
| 274 | struct nvgpu_tsg_sm_error_state *sm_error_state; | ||
| 275 | struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; | ||
| 276 | struct channel_gk20a *ch; | ||
| 277 | struct tsg_gk20a *tsg; | ||
| 278 | u32 sm_id; | ||
| 279 | int err = 0; | ||
| 280 | |||
| 281 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 282 | if (ch == NULL) { | ||
| 283 | return -EINVAL; | ||
| 284 | } | ||
| 285 | |||
| 286 | tsg = tsg_gk20a_from_ch(ch); | ||
| 287 | if (tsg == NULL) { | ||
| 288 | nvgpu_err(g, "no valid tsg from ch"); | ||
| 289 | return -EINVAL; | ||
| 290 | } | ||
| 291 | |||
| 292 | sm_id = args->sm_id; | ||
| 293 | if (sm_id >= gr->no_of_sm) { | ||
| 294 | return -EINVAL; | ||
| 295 | } | ||
| 296 | |||
| 297 | if (tsg->sm_error_states == NULL) { | ||
| 298 | return -EINVAL; | ||
| 299 | } | ||
| 300 | |||
| 301 | nvgpu_speculation_barrier(); | ||
| 302 | |||
| 303 | sm_error_state = tsg->sm_error_states + sm_id; | ||
| 304 | sm_error_state_record.hww_global_esr = | ||
| 305 | sm_error_state->hww_global_esr; | ||
| 306 | sm_error_state_record.hww_warp_esr = | ||
| 307 | sm_error_state->hww_warp_esr; | ||
| 308 | sm_error_state_record.hww_warp_esr_pc = | ||
| 309 | sm_error_state->hww_warp_esr_pc; | ||
| 310 | sm_error_state_record.hww_global_esr_report_mask = | ||
| 311 | sm_error_state->hww_global_esr_report_mask; | ||
| 312 | sm_error_state_record.hww_warp_esr_report_mask = | ||
| 313 | sm_error_state->hww_warp_esr_report_mask; | ||
| 314 | |||
| 315 | if (args->sm_error_state_record_size > 0) { | ||
| 316 | size_t write_size = sizeof(*sm_error_state); | ||
| 317 | |||
| 318 | nvgpu_speculation_barrier(); | ||
| 319 | if (write_size > args->sm_error_state_record_size) | ||
| 320 | write_size = args->sm_error_state_record_size; | ||
| 321 | |||
| 322 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 323 | err = copy_to_user((void __user *)(uintptr_t) | ||
| 324 | args->sm_error_state_record_mem, | ||
| 325 | &sm_error_state_record, | ||
| 326 | write_size); | ||
| 327 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 328 | if (err != 0) { | ||
| 329 | nvgpu_err(g, "copy_to_user failed!"); | ||
| 330 | return err; | ||
| 331 | } | ||
| 332 | |||
| 333 | args->sm_error_state_record_size = write_size; | ||
| 334 | } | ||
| 335 | |||
| 336 | return 0; | ||
| 337 | } | ||
| 338 | |||
| 339 | |||
| 340 | static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( | ||
| 341 | struct dbg_session_gk20a *dbg_s, | ||
| 342 | struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args) | ||
| 343 | { | ||
| 344 | struct gk20a *g = dbg_s->g; | ||
| 345 | |||
| 346 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 347 | |||
| 348 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
| 349 | |||
| 350 | dbg_s->broadcast_stop_trigger = (args->broadcast != 0); | ||
| 351 | |||
| 352 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
| 353 | |||
| 354 | return 0; | ||
| 355 | } | ||
| 356 | |||
| 357 | static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | ||
| 358 | int timeout_mode) | ||
| 359 | { | ||
| 360 | struct gk20a *g = dbg_s->g; | ||
| 361 | int err = 0; | ||
| 362 | |||
| 363 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", | ||
| 364 | timeout_mode); | ||
| 365 | |||
| 366 | nvgpu_speculation_barrier(); | ||
| 367 | switch (timeout_mode) { | ||
| 368 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: | ||
| 369 | if (dbg_s->is_timeout_disabled == true) | ||
| 370 | nvgpu_atomic_dec(&g->timeouts_disabled_refcount); | ||
| 371 | dbg_s->is_timeout_disabled = false; | ||
| 372 | break; | ||
| 373 | |||
| 374 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE: | ||
| 375 | if (dbg_s->is_timeout_disabled == false) | ||
| 376 | nvgpu_atomic_inc(&g->timeouts_disabled_refcount); | ||
| 377 | dbg_s->is_timeout_disabled = true; | ||
| 378 | break; | ||
| 379 | |||
| 380 | default: | ||
| 381 | nvgpu_err(g, | ||
| 382 | "unrecognized dbg gpu timeout mode : 0x%x", | ||
| 383 | timeout_mode); | ||
| 384 | err = -EINVAL; | ||
| 385 | break; | ||
| 386 | } | ||
| 387 | |||
| 388 | if (!err) | ||
| 389 | nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, " | ||
| 390 | "timeouts disabled refcount %d", | ||
| 391 | dbg_s->is_timeout_disabled ? "true" : "false", | ||
| 392 | nvgpu_atomic_read(&g->timeouts_disabled_refcount)); | ||
| 393 | return err; | ||
| 394 | } | ||
| 395 | |||
| 396 | static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | ||
| 397 | struct file *filp, bool is_profiler) | ||
| 398 | { | ||
| 399 | struct nvgpu_os_linux *l; | ||
| 400 | struct dbg_session_gk20a_linux *dbg_session_linux; | ||
| 401 | struct dbg_session_gk20a *dbg_s; | ||
| 402 | struct gk20a *g; | ||
| 403 | |||
| 404 | struct device *dev; | ||
| 405 | |||
| 406 | int err; | ||
| 407 | |||
| 408 | if (!is_profiler) | ||
| 409 | l = container_of(inode->i_cdev, | ||
| 410 | struct nvgpu_os_linux, dbg.cdev); | ||
| 411 | else | ||
| 412 | l = container_of(inode->i_cdev, | ||
| 413 | struct nvgpu_os_linux, prof.cdev); | ||
| 414 | g = gk20a_get(&l->g); | ||
| 415 | if (!g) | ||
| 416 | return -ENODEV; | ||
| 417 | |||
| 418 | dev = dev_from_gk20a(g); | ||
| 419 | |||
| 420 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name); | ||
| 421 | |||
| 422 | err = alloc_session(g, &dbg_session_linux); | ||
| 423 | if (err) | ||
| 424 | goto free_ref; | ||
| 425 | |||
| 426 | dbg_s = &dbg_session_linux->dbg_s; | ||
| 427 | |||
| 428 | filp->private_data = dbg_session_linux; | ||
| 429 | dbg_session_linux->dev = dev; | ||
| 430 | dbg_s->g = g; | ||
| 431 | dbg_s->is_profiler = is_profiler; | ||
| 432 | dbg_s->is_pg_disabled = false; | ||
| 433 | dbg_s->is_timeout_disabled = false; | ||
| 434 | |||
| 435 | nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); | ||
| 436 | nvgpu_init_list_node(&dbg_s->ch_list); | ||
| 437 | err = nvgpu_mutex_init(&dbg_s->ch_list_lock); | ||
| 438 | if (err) | ||
| 439 | goto err_free_session; | ||
| 440 | err = nvgpu_mutex_init(&dbg_s->ioctl_lock); | ||
| 441 | if (err) | ||
| 442 | goto err_destroy_lock; | ||
| 443 | dbg_s->dbg_events.events_enabled = false; | ||
| 444 | dbg_s->dbg_events.num_pending_events = 0; | ||
| 445 | |||
| 446 | return 0; | ||
| 447 | |||
| 448 | err_destroy_lock: | ||
| 449 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | ||
| 450 | err_free_session: | ||
| 451 | nvgpu_kfree(g, dbg_session_linux); | ||
| 452 | free_ref: | ||
| 453 | gk20a_put(g); | ||
| 454 | return err; | ||
| 455 | } | ||
| 456 | |||
| 457 | void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) | ||
| 458 | { | ||
| 459 | nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue); | ||
| 460 | } | ||
| 461 | |||
| 462 | static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
| 463 | struct dbg_session_channel_data *ch_data) | ||
| 464 | { | ||
| 465 | struct gk20a *g = dbg_s->g; | ||
| 466 | u32 chid; | ||
| 467 | struct dbg_session_data *session_data; | ||
| 468 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
| 469 | struct dbg_session_channel_data_linux *ch_data_linux; | ||
| 470 | |||
| 471 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 472 | |||
| 473 | chid = ch_data->chid; | ||
| 474 | |||
| 475 | /* If there's a profiler ctx reservation record associated with this | ||
| 476 | * session/channel pair, release it. | ||
| 477 | */ | ||
| 478 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
| 479 | dbg_profiler_object_data, prof_obj_entry) { | ||
| 480 | if ((prof_obj->session_id == dbg_s->id) && | ||
| 481 | (prof_obj->ch->chid == chid)) { | ||
| 482 | if (prof_obj->has_reservation) { | ||
| 483 | g->ops.dbg_session_ops. | ||
| 484 | release_profiler_reservation(dbg_s, prof_obj); | ||
| 485 | } | ||
| 486 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
| 487 | nvgpu_kfree(g, prof_obj); | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | nvgpu_list_del(&ch_data->ch_entry); | ||
| 492 | |||
| 493 | session_data = ch_data->session_data; | ||
| 494 | nvgpu_list_del(&session_data->dbg_s_entry); | ||
| 495 | nvgpu_kfree(dbg_s->g, session_data); | ||
| 496 | |||
| 497 | ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux, | ||
| 498 | ch_data); | ||
| 499 | |||
| 500 | fput(ch_data_linux->ch_f); | ||
| 501 | nvgpu_kfree(dbg_s->g, ch_data_linux); | ||
| 502 | |||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
| 507 | struct nvgpu_dbg_gpu_bind_channel_args *args) | ||
| 508 | { | ||
| 509 | struct file *f; | ||
| 510 | struct gk20a *g = dbg_s->g; | ||
| 511 | struct channel_gk20a *ch; | ||
| 512 | struct dbg_session_channel_data_linux *ch_data_linux; | ||
| 513 | struct dbg_session_data *session_data; | ||
| 514 | int err = 0; | ||
| 515 | |||
| 516 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
| 517 | g->name, args->channel_fd); | ||
| 518 | |||
| 519 | /* | ||
| 520 | * Although gk20a_get_channel_from_file gives us a channel ref, need to | ||
| 521 | * hold a ref to the file during the session lifetime. See comment in | ||
| 522 | * struct dbg_session_channel_data. | ||
| 523 | */ | ||
| 524 | f = fget(args->channel_fd); | ||
| 525 | if (!f) | ||
| 526 | return -ENODEV; | ||
| 527 | |||
| 528 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
| 529 | if (!ch) { | ||
| 530 | nvgpu_log_fn(g, "no channel found for fd"); | ||
| 531 | err = -EINVAL; | ||
| 532 | goto out_fput; | ||
| 533 | } | ||
| 534 | |||
| 535 | nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid); | ||
| 536 | |||
| 537 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 538 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
| 539 | |||
| 540 | ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux)); | ||
| 541 | if (!ch_data_linux) { | ||
| 542 | err = -ENOMEM; | ||
| 543 | goto out_chput; | ||
| 544 | } | ||
| 545 | ch_data_linux->ch_f = f; | ||
| 546 | ch_data_linux->ch_data.channel_fd = args->channel_fd; | ||
| 547 | ch_data_linux->ch_data.chid = ch->chid; | ||
| 548 | ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a; | ||
| 549 | nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry); | ||
| 550 | |||
| 551 | session_data = nvgpu_kzalloc(g, sizeof(*session_data)); | ||
| 552 | if (!session_data) { | ||
| 553 | err = -ENOMEM; | ||
| 554 | goto out_kfree; | ||
| 555 | } | ||
| 556 | session_data->dbg_s = dbg_s; | ||
| 557 | nvgpu_init_list_node(&session_data->dbg_s_entry); | ||
| 558 | ch_data_linux->ch_data.session_data = session_data; | ||
| 559 | |||
| 560 | nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list); | ||
| 561 | |||
| 562 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
| 563 | nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list); | ||
| 564 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
| 565 | |||
| 566 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
| 567 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 568 | |||
| 569 | gk20a_channel_put(ch); | ||
| 570 | |||
| 571 | return 0; | ||
| 572 | |||
| 573 | out_kfree: | ||
| 574 | nvgpu_kfree(g, ch_data_linux); | ||
| 575 | out_chput: | ||
| 576 | gk20a_channel_put(ch); | ||
| 577 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
| 578 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 579 | out_fput: | ||
| 580 | fput(f); | ||
| 581 | return err; | ||
| 582 | } | ||
| 583 | |||
| 584 | static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s) | ||
| 585 | { | ||
| 586 | struct dbg_session_channel_data *ch_data, *tmp; | ||
| 587 | struct gk20a *g = dbg_s->g; | ||
| 588 | |||
| 589 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 590 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
| 591 | nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, | ||
| 592 | dbg_session_channel_data, ch_entry) | ||
| 593 | ch_data->unbind_single_channel(dbg_s, ch_data); | ||
| 594 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
| 595 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 596 | |||
| 597 | return 0; | ||
| 598 | } | ||
| 599 | |||
| 600 | /* | ||
| 601 | * Convert common regops op values of the form of NVGPU_DBG_REG_OP_* | ||
| 602 | * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* | ||
| 603 | */ | ||
| 604 | static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) | ||
| 605 | { | ||
| 606 | switch (regops_op) { | ||
| 607 | case REGOP(READ_32): | ||
| 608 | return REGOP_LINUX(READ_32); | ||
| 609 | case REGOP(WRITE_32): | ||
| 610 | return REGOP_LINUX(WRITE_32); | ||
| 611 | case REGOP(READ_64): | ||
| 612 | return REGOP_LINUX(READ_64); | ||
| 613 | case REGOP(WRITE_64): | ||
| 614 | return REGOP_LINUX(WRITE_64); | ||
| 615 | case REGOP(READ_08): | ||
| 616 | return REGOP_LINUX(READ_08); | ||
| 617 | case REGOP(WRITE_08): | ||
| 618 | return REGOP_LINUX(WRITE_08); | ||
| 619 | } | ||
| 620 | |||
| 621 | return regops_op; | ||
| 622 | } | ||
| 623 | |||
| 624 | /* | ||
| 625 | * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* | ||
| 626 | * into common regops op values of the form of NVGPU_DBG_REG_OP_* | ||
| 627 | */ | ||
| 628 | static u32 nvgpu_get_regops_op_values_common(u32 regops_op) | ||
| 629 | { | ||
| 630 | switch (regops_op) { | ||
| 631 | case REGOP_LINUX(READ_32): | ||
| 632 | return REGOP(READ_32); | ||
| 633 | case REGOP_LINUX(WRITE_32): | ||
| 634 | return REGOP(WRITE_32); | ||
| 635 | case REGOP_LINUX(READ_64): | ||
| 636 | return REGOP(READ_64); | ||
| 637 | case REGOP_LINUX(WRITE_64): | ||
| 638 | return REGOP(WRITE_64); | ||
| 639 | case REGOP_LINUX(READ_08): | ||
| 640 | return REGOP(READ_08); | ||
| 641 | case REGOP_LINUX(WRITE_08): | ||
| 642 | return REGOP(WRITE_08); | ||
| 643 | } | ||
| 644 | |||
| 645 | return regops_op; | ||
| 646 | } | ||
| 647 | |||
| 648 | /* | ||
| 649 | * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* | ||
| 650 | * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* | ||
| 651 | */ | ||
| 652 | static u32 nvgpu_get_regops_type_values_linux(u32 regops_type) | ||
| 653 | { | ||
| 654 | switch (regops_type) { | ||
| 655 | case REGOP(TYPE_GLOBAL): | ||
| 656 | return REGOP_LINUX(TYPE_GLOBAL); | ||
| 657 | case REGOP(TYPE_GR_CTX): | ||
| 658 | return REGOP_LINUX(TYPE_GR_CTX); | ||
| 659 | case REGOP(TYPE_GR_CTX_TPC): | ||
| 660 | return REGOP_LINUX(TYPE_GR_CTX_TPC); | ||
| 661 | case REGOP(TYPE_GR_CTX_SM): | ||
| 662 | return REGOP_LINUX(TYPE_GR_CTX_SM); | ||
| 663 | case REGOP(TYPE_GR_CTX_CROP): | ||
| 664 | return REGOP_LINUX(TYPE_GR_CTX_CROP); | ||
| 665 | case REGOP(TYPE_GR_CTX_ZROP): | ||
| 666 | return REGOP_LINUX(TYPE_GR_CTX_ZROP); | ||
| 667 | case REGOP(TYPE_GR_CTX_QUAD): | ||
| 668 | return REGOP_LINUX(TYPE_GR_CTX_QUAD); | ||
| 669 | } | ||
| 670 | |||
| 671 | return regops_type; | ||
| 672 | } | ||
| 673 | |||
| 674 | /* | ||
| 675 | * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* | ||
| 676 | * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* | ||
| 677 | */ | ||
| 678 | static u32 nvgpu_get_regops_type_values_common(u32 regops_type) | ||
| 679 | { | ||
| 680 | switch (regops_type) { | ||
| 681 | case REGOP_LINUX(TYPE_GLOBAL): | ||
| 682 | return REGOP(TYPE_GLOBAL); | ||
| 683 | case REGOP_LINUX(TYPE_GR_CTX): | ||
| 684 | return REGOP(TYPE_GR_CTX); | ||
| 685 | case REGOP_LINUX(TYPE_GR_CTX_TPC): | ||
| 686 | return REGOP(TYPE_GR_CTX_TPC); | ||
| 687 | case REGOP_LINUX(TYPE_GR_CTX_SM): | ||
| 688 | return REGOP(TYPE_GR_CTX_SM); | ||
| 689 | case REGOP_LINUX(TYPE_GR_CTX_CROP): | ||
| 690 | return REGOP(TYPE_GR_CTX_CROP); | ||
| 691 | case REGOP_LINUX(TYPE_GR_CTX_ZROP): | ||
| 692 | return REGOP(TYPE_GR_CTX_ZROP); | ||
| 693 | case REGOP_LINUX(TYPE_GR_CTX_QUAD): | ||
| 694 | return REGOP(TYPE_GR_CTX_QUAD); | ||
| 695 | } | ||
| 696 | |||
| 697 | return regops_type; | ||
| 698 | } | ||
| 699 | |||
| 700 | /* | ||
| 701 | * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_* | ||
| 702 | * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* | ||
| 703 | */ | ||
| 704 | static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) | ||
| 705 | { | ||
| 706 | switch (regops_status) { | ||
| 707 | case REGOP(STATUS_SUCCESS): | ||
| 708 | return REGOP_LINUX(STATUS_SUCCESS); | ||
| 709 | case REGOP(STATUS_INVALID_OP): | ||
| 710 | return REGOP_LINUX(STATUS_INVALID_OP); | ||
| 711 | case REGOP(STATUS_INVALID_TYPE): | ||
| 712 | return REGOP_LINUX(STATUS_INVALID_TYPE); | ||
| 713 | case REGOP(STATUS_INVALID_OFFSET): | ||
| 714 | return REGOP_LINUX(STATUS_INVALID_OFFSET); | ||
| 715 | case REGOP(STATUS_UNSUPPORTED_OP): | ||
| 716 | return REGOP_LINUX(STATUS_UNSUPPORTED_OP); | ||
| 717 | case REGOP(STATUS_INVALID_MASK ): | ||
| 718 | return REGOP_LINUX(STATUS_INVALID_MASK); | ||
| 719 | } | ||
| 720 | |||
| 721 | return regops_status; | ||
| 722 | } | ||
| 723 | |||
| 724 | /* | ||
| 725 | * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* | ||
| 726 | * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_* | ||
| 727 | */ | ||
| 728 | static u32 nvgpu_get_regops_status_values_common(u32 regops_status) | ||
| 729 | { | ||
| 730 | switch (regops_status) { | ||
| 731 | case REGOP_LINUX(STATUS_SUCCESS): | ||
| 732 | return REGOP(STATUS_SUCCESS); | ||
| 733 | case REGOP_LINUX(STATUS_INVALID_OP): | ||
| 734 | return REGOP(STATUS_INVALID_OP); | ||
| 735 | case REGOP_LINUX(STATUS_INVALID_TYPE): | ||
| 736 | return REGOP(STATUS_INVALID_TYPE); | ||
| 737 | case REGOP_LINUX(STATUS_INVALID_OFFSET): | ||
| 738 | return REGOP(STATUS_INVALID_OFFSET); | ||
| 739 | case REGOP_LINUX(STATUS_UNSUPPORTED_OP): | ||
| 740 | return REGOP(STATUS_UNSUPPORTED_OP); | ||
| 741 | case REGOP_LINUX(STATUS_INVALID_MASK ): | ||
| 742 | return REGOP(STATUS_INVALID_MASK); | ||
| 743 | } | ||
| 744 | |||
| 745 | return regops_status; | ||
| 746 | } | ||
| 747 | |||
| 748 | static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in, | ||
| 749 | struct nvgpu_dbg_reg_op *out, u32 num_ops) | ||
| 750 | { | ||
| 751 | u32 i; | ||
| 752 | |||
| 753 | if(in == NULL || out == NULL) | ||
| 754 | return -ENOMEM; | ||
| 755 | |||
| 756 | for (i = 0; i < num_ops; i++) { | ||
| 757 | out[i].op = nvgpu_get_regops_op_values_common(in[i].op); | ||
| 758 | out[i].type = nvgpu_get_regops_type_values_common(in[i].type); | ||
| 759 | out[i].status = nvgpu_get_regops_status_values_common(in[i].status); | ||
| 760 | out[i].quad = in[i].quad; | ||
| 761 | out[i].group_mask = in[i].group_mask; | ||
| 762 | out[i].sub_group_mask = in[i].sub_group_mask; | ||
| 763 | out[i].offset = in[i].offset; | ||
| 764 | out[i].value_lo = in[i].value_lo; | ||
| 765 | out[i].value_hi = in[i].value_hi; | ||
| 766 | out[i].and_n_mask_lo = in[i].and_n_mask_lo; | ||
| 767 | out[i].and_n_mask_hi = in[i].and_n_mask_hi; | ||
| 768 | } | ||
| 769 | |||
| 770 | return 0; | ||
| 771 | } | ||
| 772 | |||
| 773 | static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in, | ||
| 774 | struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops) | ||
| 775 | { | ||
| 776 | u32 i; | ||
| 777 | |||
| 778 | if(in == NULL || out == NULL) | ||
| 779 | return -ENOMEM; | ||
| 780 | |||
| 781 | for (i = 0; i < num_ops; i++) { | ||
| 782 | out[i].op = nvgpu_get_regops_op_values_linux(in[i].op); | ||
| 783 | out[i].type = nvgpu_get_regops_type_values_linux(in[i].type); | ||
| 784 | out[i].status = nvgpu_get_regops_status_values_linux(in[i].status); | ||
| 785 | out[i].quad = in[i].quad; | ||
| 786 | out[i].group_mask = in[i].group_mask; | ||
| 787 | out[i].sub_group_mask = in[i].sub_group_mask; | ||
| 788 | out[i].offset = in[i].offset; | ||
| 789 | out[i].value_lo = in[i].value_lo; | ||
| 790 | out[i].value_hi = in[i].value_hi; | ||
| 791 | out[i].and_n_mask_lo = in[i].and_n_mask_lo; | ||
| 792 | out[i].and_n_mask_hi = in[i].and_n_mask_hi; | ||
| 793 | } | ||
| 794 | |||
| 795 | return 0; | ||
| 796 | } | ||
| 797 | |||
| 798 | static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
| 799 | struct nvgpu_dbg_gpu_exec_reg_ops_args *args) | ||
| 800 | { | ||
| 801 | int err = 0, powergate_err = 0; | ||
| 802 | bool is_pg_disabled = false; | ||
| 803 | |||
| 804 | struct gk20a *g = dbg_s->g; | ||
| 805 | struct channel_gk20a *ch; | ||
| 806 | |||
| 807 | bool is_current_ctx; | ||
| 808 | |||
| 809 | |||
| 810 | nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); | ||
| 811 | |||
| 812 | if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) { | ||
| 813 | nvgpu_err(g, "regops limit exceeded"); | ||
| 814 | return -EINVAL; | ||
| 815 | } | ||
| 816 | |||
| 817 | if (args->num_ops == 0) { | ||
| 818 | /* Nothing to do */ | ||
| 819 | return 0; | ||
| 820 | } | ||
| 821 | |||
| 822 | if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { | ||
| 823 | nvgpu_err(g, "reg ops work buffer not allocated"); | ||
| 824 | return -ENODEV; | ||
| 825 | } | ||
| 826 | |||
| 827 | if (!dbg_s->id) { | ||
| 828 | nvgpu_err(g, "can't call reg_ops on an unbound debugger session"); | ||
| 829 | return -EINVAL; | ||
| 830 | } | ||
| 831 | |||
| 832 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 833 | if (!dbg_s->is_profiler && !ch) { | ||
| 834 | nvgpu_err(g, "bind a channel before regops for a debugging session"); | ||
| 835 | return -EINVAL; | ||
| 836 | } | ||
| 837 | |||
| 838 | /* since exec_reg_ops sends methods to the ucode, it must take the | ||
| 839 | * global gpu lock to protect against mixing methods from debug sessions | ||
| 840 | * on other channels */ | ||
| 841 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 842 | |||
| 843 | if (!dbg_s->is_pg_disabled && !g->is_virtual) { | ||
| 844 | /* In the virtual case, the server will handle | ||
| 845 | * disabling/enabling powergating when processing reg ops | ||
| 846 | */ | ||
| 847 | powergate_err = nvgpu_set_powergate_locked(dbg_s, true); | ||
| 848 | if (!powergate_err) { | ||
| 849 | is_pg_disabled = true; | ||
| 850 | } | ||
| 851 | } | ||
| 852 | |||
| 853 | if (!powergate_err) { | ||
| 854 | u64 ops_offset = 0; /* index offset */ | ||
| 855 | |||
| 856 | struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL; | ||
| 857 | |||
| 858 | linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops * | ||
| 859 | sizeof(struct nvgpu_dbg_gpu_reg_op)); | ||
| 860 | |||
| 861 | if (!linux_fragment) | ||
| 862 | return -ENOMEM; | ||
| 863 | |||
| 864 | while (ops_offset < args->num_ops && !err) { | ||
| 865 | const u64 num_ops = | ||
| 866 | min(args->num_ops - ops_offset, | ||
| 867 | (u64)(g->dbg_regops_tmp_buf_ops)); | ||
| 868 | const u64 fragment_size = | ||
| 869 | num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op); | ||
| 870 | |||
| 871 | void __user *const fragment = | ||
| 872 | (void __user *)(uintptr_t) | ||
| 873 | (args->ops + | ||
| 874 | ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op)); | ||
| 875 | |||
| 876 | nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu", | ||
| 877 | ops_offset, num_ops); | ||
| 878 | |||
| 879 | nvgpu_log_fn(g, "Copying regops from userspace"); | ||
| 880 | |||
| 881 | if (copy_from_user(linux_fragment, | ||
| 882 | fragment, fragment_size)) { | ||
| 883 | nvgpu_err(g, "copy_from_user failed!"); | ||
| 884 | err = -EFAULT; | ||
| 885 | break; | ||
| 886 | } | ||
| 887 | |||
| 888 | err = nvgpu_get_regops_data_common(linux_fragment, | ||
| 889 | g->dbg_regops_tmp_buf, num_ops); | ||
| 890 | |||
| 891 | if (err) | ||
| 892 | break; | ||
| 893 | |||
| 894 | err = g->ops.regops.exec_regops( | ||
| 895 | dbg_s, g->dbg_regops_tmp_buf, num_ops, &is_current_ctx); | ||
| 896 | |||
| 897 | if (err) { | ||
| 898 | break; | ||
| 899 | } | ||
| 900 | |||
| 901 | if (ops_offset == 0) { | ||
| 902 | args->gr_ctx_resident = is_current_ctx; | ||
| 903 | } | ||
| 904 | |||
| 905 | err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf, | ||
| 906 | linux_fragment, num_ops); | ||
| 907 | |||
| 908 | if (err) | ||
| 909 | break; | ||
| 910 | |||
| 911 | nvgpu_log_fn(g, "Copying result to userspace"); | ||
| 912 | |||
| 913 | if (copy_to_user(fragment, linux_fragment, | ||
| 914 | fragment_size)) { | ||
| 915 | nvgpu_err(g, "copy_to_user failed!"); | ||
| 916 | err = -EFAULT; | ||
| 917 | break; | ||
| 918 | } | ||
| 919 | |||
| 920 | ops_offset += num_ops; | ||
| 921 | } | ||
| 922 | |||
| 923 | nvgpu_speculation_barrier(); | ||
| 924 | nvgpu_kfree(g, linux_fragment); | ||
| 925 | |||
| 926 | /* enable powergate, if previously disabled */ | ||
| 927 | if (is_pg_disabled) { | ||
| 928 | powergate_err = nvgpu_set_powergate_locked(dbg_s, | ||
| 929 | false); | ||
| 930 | } | ||
| 931 | } | ||
| 932 | |||
| 933 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 934 | |||
| 935 | if (!err && powergate_err) | ||
| 936 | err = powergate_err; | ||
| 937 | |||
| 938 | if (err) | ||
| 939 | nvgpu_err(g, "dbg regops failed"); | ||
| 940 | |||
| 941 | return err; | ||
| 942 | } | ||
| 943 | |||
| 944 | static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
| 945 | struct nvgpu_dbg_gpu_powergate_args *args) | ||
| 946 | { | ||
| 947 | int err; | ||
| 948 | struct gk20a *g = dbg_s->g; | ||
| 949 | nvgpu_log_fn(g, "%s powergate mode = %d", | ||
| 950 | g->name, args->mode); | ||
| 951 | |||
| 952 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 953 | if ((args->mode != NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) && | ||
| 954 | (args->mode != NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE)) { | ||
| 955 | nvgpu_err(g, "invalid powergate mode"); | ||
| 956 | err = -EINVAL; | ||
| 957 | goto pg_err_end; | ||
| 958 | } | ||
| 959 | |||
| 960 | err = nvgpu_set_powergate_locked(dbg_s, | ||
| 961 | args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE); | ||
| 962 | pg_err_end: | ||
| 963 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 964 | return err; | ||
| 965 | } | ||
| 966 | |||
| 967 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
| 968 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) | ||
| 969 | { | ||
| 970 | int err; | ||
| 971 | struct gk20a *g = dbg_s->g; | ||
| 972 | struct channel_gk20a *ch_gk20a; | ||
| 973 | |||
| 974 | nvgpu_log_fn(g, "%s smpc ctxsw mode = %d", | ||
| 975 | g->name, args->mode); | ||
| 976 | |||
| 977 | err = gk20a_busy(g); | ||
| 978 | if (err) { | ||
| 979 | nvgpu_err(g, "failed to poweron"); | ||
| 980 | return err; | ||
| 981 | } | ||
| 982 | |||
| 983 | /* Take the global lock, since we'll be doing global regops */ | ||
| 984 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 985 | |||
| 986 | ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 987 | if (!ch_gk20a) { | ||
| 988 | nvgpu_err(g, | ||
| 989 | "no bound channel for smpc ctxsw mode update"); | ||
| 990 | err = -EINVAL; | ||
| 991 | goto clean_up; | ||
| 992 | } | ||
| 993 | |||
| 994 | err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a, | ||
| 995 | args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); | ||
| 996 | if (err) { | ||
| 997 | nvgpu_err(g, | ||
| 998 | "error (%d) during smpc ctxsw mode update", err); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | clean_up: | ||
| 1002 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1003 | gk20a_idle(g); | ||
| 1004 | return err; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | /* | ||
| 1008 | * Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_* | ||
| 1009 | * into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_* | ||
| 1010 | */ | ||
| 1011 | |||
| 1012 | static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) | ||
| 1013 | { | ||
| 1014 | nvgpu_speculation_barrier(); | ||
| 1015 | switch (mode){ | ||
| 1016 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: | ||
| 1017 | return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; | ||
| 1018 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW: | ||
| 1019 | return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW; | ||
| 1020 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: | ||
| 1021 | return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW; | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | return mode; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | |||
| 1028 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
| 1029 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) | ||
| 1030 | { | ||
| 1031 | int err; | ||
| 1032 | struct gk20a *g = dbg_s->g; | ||
| 1033 | struct channel_gk20a *ch_gk20a; | ||
| 1034 | u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode); | ||
| 1035 | |||
| 1036 | nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); | ||
| 1037 | |||
| 1038 | /* Must have a valid reservation to enable/disable hwpm cxtsw. | ||
| 1039 | * Just print an error message for now, but eventually this should | ||
| 1040 | * return an error, at the point where all client sw has been | ||
| 1041 | * cleaned up. | ||
| 1042 | */ | ||
| 1043 | if (!dbg_s->has_profiler_reservation) { | ||
| 1044 | nvgpu_err(g, | ||
| 1045 | "session doesn't have a valid reservation"); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | err = gk20a_busy(g); | ||
| 1049 | if (err) { | ||
| 1050 | nvgpu_err(g, "failed to poweron"); | ||
| 1051 | return err; | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | /* Take the global lock, since we'll be doing global regops */ | ||
| 1055 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1056 | |||
| 1057 | ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1058 | if (!ch_gk20a) { | ||
| 1059 | nvgpu_err(g, | ||
| 1060 | "no bound channel for pm ctxsw mode update"); | ||
| 1061 | err = -EINVAL; | ||
| 1062 | goto clean_up; | ||
| 1063 | } | ||
| 1064 | if (g->dbg_powergating_disabled_refcount == 0) { | ||
| 1065 | nvgpu_err(g, "powergate is not disabled"); | ||
| 1066 | err = -ENOSYS; | ||
| 1067 | goto clean_up; | ||
| 1068 | } | ||
| 1069 | err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, | ||
| 1070 | mode); | ||
| 1071 | |||
| 1072 | if (err) | ||
| 1073 | nvgpu_err(g, | ||
| 1074 | "error (%d) during pm ctxsw mode update", err); | ||
| 1075 | /* gk20a would require a WAR to set the core PM_ENABLE bit, not | ||
| 1076 | * added here with gk20a being deprecated | ||
| 1077 | */ | ||
| 1078 | clean_up: | ||
| 1079 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1080 | gk20a_idle(g); | ||
| 1081 | return err; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode( | ||
| 1085 | struct dbg_session_gk20a *dbg_s, | ||
| 1086 | struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args) | ||
| 1087 | { | ||
| 1088 | int err; | ||
| 1089 | struct gk20a *g = dbg_s->g; | ||
| 1090 | struct channel_gk20a *ch; | ||
| 1091 | bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED); | ||
| 1092 | |||
| 1093 | nvgpu_log_fn(g, "mode=%u", args->mode); | ||
| 1094 | |||
| 1095 | if (args->reserved != 0U) { | ||
| 1096 | return -EINVAL; | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | if ((g->ops.fb.set_mmu_debug_mode == NULL) && | ||
| 1100 | (g->ops.gr.set_mmu_debug_mode == NULL)) { | ||
| 1101 | return -ENOSYS; | ||
| 1102 | } | ||
| 1103 | |||
| 1104 | err = gk20a_busy(g); | ||
| 1105 | if (err) { | ||
| 1106 | nvgpu_err(g, "failed to poweron"); | ||
| 1107 | return err; | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | /* Take the global lock, since we'll be doing global regops */ | ||
| 1111 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1112 | |||
| 1113 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1114 | if (!ch) { | ||
| 1115 | nvgpu_err(g, "no bound channel for mmu debug mode"); | ||
| 1116 | err = -EINVAL; | ||
| 1117 | goto clean_up; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | err = nvgpu_tsg_set_mmu_debug_mode(ch, enable); | ||
| 1121 | if (err) { | ||
| 1122 | nvgpu_err(g, "set mmu debug mode failed, err=%d", err); | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | clean_up: | ||
| 1126 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1127 | gk20a_idle(g); | ||
| 1128 | return err; | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | ||
| 1132 | struct dbg_session_gk20a *dbg_s, | ||
| 1133 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) | ||
| 1134 | { | ||
| 1135 | struct gk20a *g = dbg_s->g; | ||
| 1136 | struct channel_gk20a *ch; | ||
| 1137 | int err = 0, action = args->mode; | ||
| 1138 | |||
| 1139 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode); | ||
| 1140 | |||
| 1141 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1142 | if (!ch) | ||
| 1143 | return -EINVAL; | ||
| 1144 | |||
| 1145 | err = gk20a_busy(g); | ||
| 1146 | if (err) { | ||
| 1147 | nvgpu_err(g, "failed to poweron"); | ||
| 1148 | return err; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1152 | |||
| 1153 | /* Suspend GPU context switching */ | ||
| 1154 | err = gr_gk20a_disable_ctxsw(g); | ||
| 1155 | if (err) { | ||
| 1156 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
| 1157 | /* this should probably be ctx-fatal... */ | ||
| 1158 | goto clean_up; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | nvgpu_speculation_barrier(); | ||
| 1162 | switch (action) { | ||
| 1163 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: | ||
| 1164 | gr_gk20a_suspend_context(ch); | ||
| 1165 | break; | ||
| 1166 | |||
| 1167 | case NVGPU_DBG_GPU_RESUME_ALL_SMS: | ||
| 1168 | gr_gk20a_resume_context(ch); | ||
| 1169 | break; | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | err = gr_gk20a_enable_ctxsw(g); | ||
| 1173 | if (err) | ||
| 1174 | nvgpu_err(g, "unable to restart ctxsw!"); | ||
| 1175 | |||
| 1176 | clean_up: | ||
| 1177 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1178 | gk20a_idle(g); | ||
| 1179 | |||
| 1180 | return err; | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | static int nvgpu_ioctl_allocate_profiler_object( | ||
| 1184 | struct dbg_session_gk20a_linux *dbg_session_linux, | ||
| 1185 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) | ||
| 1186 | { | ||
| 1187 | int err = 0; | ||
| 1188 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
| 1189 | struct gk20a *g = get_gk20a(dbg_session_linux->dev); | ||
| 1190 | struct dbg_profiler_object_data *prof_obj; | ||
| 1191 | |||
| 1192 | nvgpu_log_fn(g, "%s", g->name); | ||
| 1193 | |||
| 1194 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1195 | |||
| 1196 | err = alloc_profiler(g, &prof_obj); | ||
| 1197 | if (err) | ||
| 1198 | goto clean_up; | ||
| 1199 | |||
| 1200 | prof_obj->session_id = dbg_s->id; | ||
| 1201 | |||
| 1202 | if (dbg_s->is_profiler) | ||
| 1203 | prof_obj->ch = NULL; | ||
| 1204 | else { | ||
| 1205 | prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1206 | if (prof_obj->ch == NULL) { | ||
| 1207 | nvgpu_err(g, | ||
| 1208 | "bind a channel for dbg session"); | ||
| 1209 | nvgpu_kfree(g, prof_obj); | ||
| 1210 | err = -EINVAL; | ||
| 1211 | goto clean_up; | ||
| 1212 | } | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | /* Return handle to client */ | ||
| 1216 | args->profiler_handle = prof_obj->prof_handle; | ||
| 1217 | |||
| 1218 | nvgpu_init_list_node(&prof_obj->prof_obj_entry); | ||
| 1219 | |||
| 1220 | nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects); | ||
| 1221 | clean_up: | ||
| 1222 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1223 | return err; | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | static int nvgpu_ioctl_free_profiler_object( | ||
| 1227 | struct dbg_session_gk20a_linux *dbg_s_linux, | ||
| 1228 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) | ||
| 1229 | { | ||
| 1230 | int err = 0; | ||
| 1231 | struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; | ||
| 1232 | struct gk20a *g = get_gk20a(dbg_s_linux->dev); | ||
| 1233 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
| 1234 | bool obj_found = false; | ||
| 1235 | |||
| 1236 | nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x", | ||
| 1237 | g->name, dbg_s->id, args->profiler_handle); | ||
| 1238 | |||
| 1239 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1240 | |||
| 1241 | /* Remove profiler object from the list, if a match is found */ | ||
| 1242 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
| 1243 | dbg_profiler_object_data, prof_obj_entry) { | ||
| 1244 | if (prof_obj->prof_handle == args->profiler_handle) { | ||
| 1245 | if (prof_obj->session_id != dbg_s->id) { | ||
| 1246 | nvgpu_err(g, | ||
| 1247 | "invalid handle %x", | ||
| 1248 | args->profiler_handle); | ||
| 1249 | err = -EINVAL; | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | if (prof_obj->has_reservation) | ||
| 1253 | g->ops.dbg_session_ops. | ||
| 1254 | release_profiler_reservation(dbg_s, prof_obj); | ||
| 1255 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
| 1256 | nvgpu_kfree(g, prof_obj); | ||
| 1257 | obj_found = true; | ||
| 1258 | break; | ||
| 1259 | } | ||
| 1260 | } | ||
| 1261 | if (!obj_found) { | ||
| 1262 | nvgpu_err(g, "profiler %x not found", | ||
| 1263 | args->profiler_handle); | ||
| 1264 | err = -EINVAL; | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1268 | return err; | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | static struct dbg_profiler_object_data *find_matching_prof_obj( | ||
| 1272 | struct dbg_session_gk20a *dbg_s, | ||
| 1273 | u32 profiler_handle) | ||
| 1274 | { | ||
| 1275 | struct gk20a *g = dbg_s->g; | ||
| 1276 | struct dbg_profiler_object_data *prof_obj; | ||
| 1277 | |||
| 1278 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
| 1279 | dbg_profiler_object_data, prof_obj_entry) { | ||
| 1280 | if (prof_obj->prof_handle == profiler_handle) { | ||
| 1281 | if (prof_obj->session_id != dbg_s->id) { | ||
| 1282 | nvgpu_err(g, | ||
| 1283 | "invalid handle %x", | ||
| 1284 | profiler_handle); | ||
| 1285 | return NULL; | ||
| 1286 | } | ||
| 1287 | return prof_obj; | ||
| 1288 | } | ||
| 1289 | } | ||
| 1290 | return NULL; | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | /* used in scenarios where the debugger session can take just the inter-session | ||
| 1294 | * lock for performance, but the profiler session must take the per-gpu lock | ||
| 1295 | * since it might not have an associated channel. */ | ||
| 1296 | static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s) | ||
| 1297 | { | ||
| 1298 | struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1299 | |||
| 1300 | if (dbg_s->is_profiler || !ch) | ||
| 1301 | nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock); | ||
| 1302 | else | ||
| 1303 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s) | ||
| 1307 | { | ||
| 1308 | struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1309 | |||
| 1310 | if (dbg_s->is_profiler || !ch) | ||
| 1311 | nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock); | ||
| 1312 | else | ||
| 1313 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
| 1314 | } | ||
| 1315 | |||
| 1316 | static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) | ||
| 1317 | { | ||
| 1318 | struct gk20a *g = dbg_s->g; | ||
| 1319 | |||
| 1320 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 1321 | |||
| 1322 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
| 1323 | |||
| 1324 | dbg_s->dbg_events.events_enabled = true; | ||
| 1325 | dbg_s->dbg_events.num_pending_events = 0; | ||
| 1326 | |||
| 1327 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) | ||
| 1331 | { | ||
| 1332 | struct gk20a *g = dbg_s->g; | ||
| 1333 | |||
| 1334 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 1335 | |||
| 1336 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
| 1337 | |||
| 1338 | dbg_s->dbg_events.events_enabled = false; | ||
| 1339 | dbg_s->dbg_events.num_pending_events = 0; | ||
| 1340 | |||
| 1341 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) | ||
| 1345 | { | ||
| 1346 | struct gk20a *g = dbg_s->g; | ||
| 1347 | |||
| 1348 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 1349 | |||
| 1350 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
| 1351 | |||
| 1352 | if (dbg_s->dbg_events.events_enabled && | ||
| 1353 | dbg_s->dbg_events.num_pending_events > 0) | ||
| 1354 | dbg_s->dbg_events.num_pending_events--; | ||
| 1355 | |||
| 1356 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | |||
| 1360 | static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | ||
| 1361 | struct nvgpu_dbg_gpu_events_ctrl_args *args) | ||
| 1362 | { | ||
| 1363 | int ret = 0; | ||
| 1364 | struct channel_gk20a *ch; | ||
| 1365 | struct gk20a *g = dbg_s->g; | ||
| 1366 | |||
| 1367 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); | ||
| 1368 | |||
| 1369 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1370 | if (!ch) { | ||
| 1371 | nvgpu_err(g, "no channel bound to dbg session"); | ||
| 1372 | return -EINVAL; | ||
| 1373 | } | ||
| 1374 | |||
| 1375 | nvgpu_speculation_barrier(); | ||
| 1376 | switch (args->cmd) { | ||
| 1377 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | ||
| 1378 | gk20a_dbg_gpu_events_enable(dbg_s); | ||
| 1379 | break; | ||
| 1380 | |||
| 1381 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: | ||
| 1382 | gk20a_dbg_gpu_events_disable(dbg_s); | ||
| 1383 | break; | ||
| 1384 | |||
| 1385 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: | ||
| 1386 | gk20a_dbg_gpu_events_clear(dbg_s); | ||
| 1387 | break; | ||
| 1388 | |||
| 1389 | default: | ||
| 1390 | nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x", | ||
| 1391 | args->cmd); | ||
| 1392 | ret = -EINVAL; | ||
| 1393 | break; | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | return ret; | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
| 1400 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | ||
| 1401 | { | ||
| 1402 | struct gk20a *g = dbg_s->g; | ||
| 1403 | struct mm_gk20a *mm = &g->mm; | ||
| 1404 | int err; | ||
| 1405 | u32 virt_size; | ||
| 1406 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
| 1407 | |||
| 1408 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1409 | |||
| 1410 | if (g->perfbuf.owner) { | ||
| 1411 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1412 | return -EBUSY; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size, | ||
| 1416 | big_page_size << 10, | ||
| 1417 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
| 1418 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
| 1419 | false, false, "perfbuf"); | ||
| 1420 | if (!mm->perfbuf.vm) { | ||
| 1421 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1422 | return -ENOMEM; | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | err = nvgpu_vm_map_buffer(mm->perfbuf.vm, | ||
| 1426 | args->dmabuf_fd, | ||
| 1427 | &args->offset, | ||
| 1428 | 0, | ||
| 1429 | SZ_4K, | ||
| 1430 | 0, | ||
| 1431 | 0, | ||
| 1432 | 0, | ||
| 1433 | 0, | ||
| 1434 | NULL); | ||
| 1435 | if (err) | ||
| 1436 | goto err_remove_vm; | ||
| 1437 | |||
| 1438 | /* perf output buffer may not cross a 4GB boundary */ | ||
| 1439 | virt_size = u64_lo32(args->mapping_size); | ||
| 1440 | if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size - 1)) { | ||
| 1441 | err = -EINVAL; | ||
| 1442 | goto err_unmap; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | err = g->ops.dbg_session_ops.perfbuffer_enable(g, | ||
| 1446 | args->offset, virt_size); | ||
| 1447 | if (err) | ||
| 1448 | goto err_unmap; | ||
| 1449 | |||
| 1450 | g->perfbuf.owner = dbg_s; | ||
| 1451 | g->perfbuf.offset = args->offset; | ||
| 1452 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1453 | |||
| 1454 | return 0; | ||
| 1455 | |||
| 1456 | err_unmap: | ||
| 1457 | nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); | ||
| 1458 | err_remove_vm: | ||
| 1459 | nvgpu_vm_put(mm->perfbuf.vm); | ||
| 1460 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1461 | return err; | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
| 1465 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | ||
| 1466 | { | ||
| 1467 | struct gk20a *g = dbg_s->g; | ||
| 1468 | int err; | ||
| 1469 | |||
| 1470 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1471 | if ((g->perfbuf.owner != dbg_s) || | ||
| 1472 | (g->perfbuf.offset != args->offset)) { | ||
| 1473 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1474 | return -EINVAL; | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | err = gk20a_perfbuf_release_locked(g, args->offset); | ||
| 1478 | |||
| 1479 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1480 | |||
| 1481 | return err; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, | ||
| 1485 | struct nvgpu_dbg_gpu_pc_sampling_args *args) | ||
| 1486 | { | ||
| 1487 | struct channel_gk20a *ch; | ||
| 1488 | struct gk20a *g = dbg_s->g; | ||
| 1489 | |||
| 1490 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1491 | if (!ch) | ||
| 1492 | return -EINVAL; | ||
| 1493 | |||
| 1494 | nvgpu_log_fn(g, " "); | ||
| 1495 | |||
| 1496 | return g->ops.gr.update_pc_sampling ? | ||
| 1497 | g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( | ||
| 1501 | struct dbg_session_gk20a *dbg_s, | ||
| 1502 | struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) | ||
| 1503 | { | ||
| 1504 | struct gk20a *g = dbg_s->g; | ||
| 1505 | struct gr_gk20a *gr = &g->gr; | ||
| 1506 | u32 sm_id; | ||
| 1507 | struct channel_gk20a *ch; | ||
| 1508 | int err = 0; | ||
| 1509 | |||
| 1510 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1511 | if (ch == NULL) { | ||
| 1512 | return -EINVAL; | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | sm_id = args->sm_id; | ||
| 1516 | if (sm_id >= gr->no_of_sm) | ||
| 1517 | return -EINVAL; | ||
| 1518 | |||
| 1519 | nvgpu_speculation_barrier(); | ||
| 1520 | |||
| 1521 | err = gk20a_busy(g); | ||
| 1522 | if (err != 0) { | ||
| 1523 | return err; | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | err = gr_gk20a_elpg_protected_call(g, | ||
| 1527 | g->ops.gr.clear_sm_error_state(g, ch, sm_id)); | ||
| 1528 | |||
| 1529 | gk20a_idle(g); | ||
| 1530 | |||
| 1531 | return err; | ||
| 1532 | } | ||
| 1533 | |||
| 1534 | static int | ||
| 1535 | nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, | ||
| 1536 | struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args) | ||
| 1537 | { | ||
| 1538 | struct gk20a *g = dbg_s->g; | ||
| 1539 | int err = 0; | ||
| 1540 | int ctx_resident_ch_fd = -1; | ||
| 1541 | |||
| 1542 | err = gk20a_busy(g); | ||
| 1543 | if (err) | ||
| 1544 | return err; | ||
| 1545 | |||
| 1546 | nvgpu_speculation_barrier(); | ||
| 1547 | switch (args->action) { | ||
| 1548 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: | ||
| 1549 | err = g->ops.gr.suspend_contexts(g, dbg_s, | ||
| 1550 | &ctx_resident_ch_fd); | ||
| 1551 | break; | ||
| 1552 | |||
| 1553 | case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS: | ||
| 1554 | err = g->ops.gr.resume_contexts(g, dbg_s, | ||
| 1555 | &ctx_resident_ch_fd); | ||
| 1556 | break; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | if (ctx_resident_ch_fd < 0) { | ||
| 1560 | args->is_resident_context = 0; | ||
| 1561 | } else { | ||
| 1562 | args->is_resident_context = 1; | ||
| 1563 | args->resident_context_fd = ctx_resident_ch_fd; | ||
| 1564 | } | ||
| 1565 | |||
| 1566 | gk20a_idle(g); | ||
| 1567 | |||
| 1568 | return err; | ||
| 1569 | } | ||
| 1570 | |||
| 1571 | static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | ||
| 1572 | struct nvgpu_dbg_gpu_access_fb_memory_args *args) | ||
| 1573 | { | ||
| 1574 | struct gk20a *g = dbg_s->g; | ||
| 1575 | struct dma_buf *dmabuf; | ||
| 1576 | void __user *user_buffer = (void __user *)(uintptr_t)args->buffer; | ||
| 1577 | void *buffer; | ||
| 1578 | u64 size, access_size, offset; | ||
| 1579 | u64 access_limit_size = SZ_4K; | ||
| 1580 | int err = 0; | ||
| 1581 | |||
| 1582 | if ((args->offset & 3) || (!args->size) || (args->size & 3)) | ||
| 1583 | return -EINVAL; | ||
| 1584 | |||
| 1585 | dmabuf = dma_buf_get(args->dmabuf_fd); | ||
| 1586 | if (IS_ERR(dmabuf)) | ||
| 1587 | return -EINVAL; | ||
| 1588 | |||
| 1589 | if ((args->offset > dmabuf->size) || | ||
| 1590 | (args->size > dmabuf->size) || | ||
| 1591 | (args->offset + args->size > dmabuf->size)) { | ||
| 1592 | err = -EINVAL; | ||
| 1593 | goto fail_dmabuf_put; | ||
| 1594 | } | ||
| 1595 | |||
| 1596 | buffer = nvgpu_big_zalloc(g, access_limit_size); | ||
| 1597 | if (!buffer) { | ||
| 1598 | err = -ENOMEM; | ||
| 1599 | goto fail_dmabuf_put; | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | size = args->size; | ||
| 1603 | offset = 0; | ||
| 1604 | |||
| 1605 | err = gk20a_busy(g); | ||
| 1606 | if (err) | ||
| 1607 | goto fail_free_buffer; | ||
| 1608 | |||
| 1609 | while (size) { | ||
| 1610 | /* Max access size of access_limit_size in one loop */ | ||
| 1611 | access_size = min(access_limit_size, size); | ||
| 1612 | |||
| 1613 | if (args->cmd == | ||
| 1614 | NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) { | ||
| 1615 | err = copy_from_user(buffer, user_buffer + offset, | ||
| 1616 | access_size); | ||
| 1617 | if (err) | ||
| 1618 | goto fail_idle; | ||
| 1619 | } | ||
| 1620 | |||
| 1621 | err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer, | ||
| 1622 | args->offset + offset, access_size, | ||
| 1623 | args->cmd); | ||
| 1624 | if (err) | ||
| 1625 | goto fail_idle; | ||
| 1626 | |||
| 1627 | if (args->cmd == | ||
| 1628 | NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) { | ||
| 1629 | err = copy_to_user(user_buffer + offset, | ||
| 1630 | buffer, access_size); | ||
| 1631 | if (err) | ||
| 1632 | goto fail_idle; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | size -= access_size; | ||
| 1636 | offset += access_size; | ||
| 1637 | } | ||
| 1638 | nvgpu_speculation_barrier(); | ||
| 1639 | |||
| 1640 | fail_idle: | ||
| 1641 | gk20a_idle(g); | ||
| 1642 | fail_free_buffer: | ||
| 1643 | nvgpu_big_free(g, buffer); | ||
| 1644 | fail_dmabuf_put: | ||
| 1645 | dma_buf_put(dmabuf); | ||
| 1646 | |||
| 1647 | return err; | ||
| 1648 | } | ||
| 1649 | |||
| 1650 | static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | ||
| 1651 | struct nvgpu_dbg_gpu_profiler_reserve_args *args) | ||
| 1652 | { | ||
| 1653 | if (args->acquire) | ||
| 1654 | return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle); | ||
| 1655 | |||
| 1656 | return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, | ||
| 1660 | struct nvgpu_dbg_gpu_timeout_args *args) | ||
| 1661 | { | ||
| 1662 | bool status; | ||
| 1663 | struct gk20a *g = dbg_s->g; | ||
| 1664 | |||
| 1665 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1666 | status = nvgpu_is_timeouts_enabled(g); | ||
| 1667 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1668 | |||
| 1669 | if (status) | ||
| 1670 | args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE; | ||
| 1671 | else | ||
| 1672 | args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | ||
| 1676 | { | ||
| 1677 | struct mm_gk20a *mm = &g->mm; | ||
| 1678 | struct vm_gk20a *vm = mm->perfbuf.vm; | ||
| 1679 | int err; | ||
| 1680 | |||
| 1681 | err = g->ops.dbg_session_ops.perfbuffer_disable(g); | ||
| 1682 | |||
| 1683 | nvgpu_vm_unmap(vm, offset, NULL); | ||
| 1684 | nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); | ||
| 1685 | nvgpu_vm_put(vm); | ||
| 1686 | |||
| 1687 | g->perfbuf.owner = NULL; | ||
| 1688 | g->perfbuf.offset = 0; | ||
| 1689 | return err; | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, | ||
| 1693 | u32 profiler_handle) | ||
| 1694 | { | ||
| 1695 | struct gk20a *g = dbg_s->g; | ||
| 1696 | struct dbg_profiler_object_data *prof_obj; | ||
| 1697 | int err = 0; | ||
| 1698 | |||
| 1699 | nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); | ||
| 1700 | |||
| 1701 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1702 | |||
| 1703 | /* Find matching object. */ | ||
| 1704 | prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); | ||
| 1705 | |||
| 1706 | if (!prof_obj) { | ||
| 1707 | nvgpu_err(g, "object not found"); | ||
| 1708 | err = -EINVAL; | ||
| 1709 | goto exit; | ||
| 1710 | } | ||
| 1711 | |||
| 1712 | if (prof_obj->has_reservation) | ||
| 1713 | g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj); | ||
| 1714 | else { | ||
| 1715 | nvgpu_err(g, "No reservation found"); | ||
| 1716 | err = -EINVAL; | ||
| 1717 | goto exit; | ||
| 1718 | } | ||
| 1719 | exit: | ||
| 1720 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1721 | return err; | ||
| 1722 | } | ||
| 1723 | |||
| 1724 | static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, | ||
| 1725 | u32 profiler_handle) | ||
| 1726 | { | ||
| 1727 | struct gk20a *g = dbg_s->g; | ||
| 1728 | struct dbg_profiler_object_data *prof_obj, *my_prof_obj; | ||
| 1729 | int err = 0; | ||
| 1730 | struct tsg_gk20a *tsg; | ||
| 1731 | |||
| 1732 | nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); | ||
| 1733 | |||
| 1734 | if (g->profiler_reservation_count < 0) { | ||
| 1735 | nvgpu_err(g, "Negative reservation count!"); | ||
| 1736 | return -EINVAL; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1740 | |||
| 1741 | /* Find matching object. */ | ||
| 1742 | my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); | ||
| 1743 | |||
| 1744 | if (!my_prof_obj) { | ||
| 1745 | nvgpu_err(g, "object not found"); | ||
| 1746 | err = -EINVAL; | ||
| 1747 | goto exit; | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | /* If we already have the reservation, we're done */ | ||
| 1751 | if (my_prof_obj->has_reservation) { | ||
| 1752 | err = 0; | ||
| 1753 | goto exit; | ||
| 1754 | } | ||
| 1755 | |||
| 1756 | if (my_prof_obj->ch == NULL) { | ||
| 1757 | /* Global reservations are only allowed if there are no other | ||
| 1758 | * global or per-context reservations currently held | ||
| 1759 | */ | ||
| 1760 | if (!g->ops.dbg_session_ops.check_and_set_global_reservation( | ||
| 1761 | dbg_s, my_prof_obj)) { | ||
| 1762 | nvgpu_err(g, | ||
| 1763 | "global reserve: have existing reservation"); | ||
| 1764 | err = -EBUSY; | ||
| 1765 | } | ||
| 1766 | } else if (g->global_profiler_reservation_held) { | ||
| 1767 | /* If there's a global reservation, | ||
| 1768 | * we can't take a per-context one. | ||
| 1769 | */ | ||
| 1770 | nvgpu_err(g, | ||
| 1771 | "per-ctxt reserve: global reservation in effect"); | ||
| 1772 | err = -EBUSY; | ||
| 1773 | } else if ((tsg = tsg_gk20a_from_ch(my_prof_obj->ch)) != NULL) { | ||
| 1774 | /* TSG: check that another channel in the TSG | ||
| 1775 | * doesn't already have the reservation | ||
| 1776 | */ | ||
| 1777 | u32 my_tsgid = tsg->tsgid; | ||
| 1778 | |||
| 1779 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
| 1780 | dbg_profiler_object_data, prof_obj_entry) { | ||
| 1781 | if (prof_obj->has_reservation && | ||
| 1782 | (prof_obj->ch->tsgid == my_tsgid)) { | ||
| 1783 | nvgpu_err(g, | ||
| 1784 | "per-ctxt reserve (tsg): already reserved"); | ||
| 1785 | err = -EBUSY; | ||
| 1786 | goto exit; | ||
| 1787 | } | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | if (!g->ops.dbg_session_ops.check_and_set_context_reservation( | ||
| 1791 | dbg_s, my_prof_obj)) { | ||
| 1792 | /* Another guest OS has the global reservation */ | ||
| 1793 | nvgpu_err(g, | ||
| 1794 | "per-ctxt reserve: global reservation in effect"); | ||
| 1795 | err = -EBUSY; | ||
| 1796 | } | ||
| 1797 | } else { | ||
| 1798 | /* channel: check that some other profiler object doesn't | ||
| 1799 | * already have the reservation. | ||
| 1800 | */ | ||
| 1801 | struct channel_gk20a *my_ch = my_prof_obj->ch; | ||
| 1802 | |||
| 1803 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
| 1804 | dbg_profiler_object_data, prof_obj_entry) { | ||
| 1805 | if (prof_obj->has_reservation && | ||
| 1806 | (prof_obj->ch == my_ch)) { | ||
| 1807 | nvgpu_err(g, | ||
| 1808 | "per-ctxt reserve (ch): already reserved"); | ||
| 1809 | err = -EBUSY; | ||
| 1810 | goto exit; | ||
| 1811 | } | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | if (!g->ops.dbg_session_ops.check_and_set_context_reservation( | ||
| 1815 | dbg_s, my_prof_obj)) { | ||
| 1816 | /* Another guest OS has the global reservation */ | ||
| 1817 | nvgpu_err(g, | ||
| 1818 | "per-ctxt reserve: global reservation in effect"); | ||
| 1819 | err = -EBUSY; | ||
| 1820 | } | ||
| 1821 | } | ||
| 1822 | exit: | ||
| 1823 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1824 | return err; | ||
| 1825 | } | ||
| 1826 | |||
| 1827 | static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
| 1828 | struct nvgpu_dbg_gpu_unbind_channel_args *args) | ||
| 1829 | { | ||
| 1830 | struct dbg_session_channel_data *ch_data; | ||
| 1831 | struct gk20a *g = dbg_s->g; | ||
| 1832 | bool channel_found = false; | ||
| 1833 | struct channel_gk20a *ch; | ||
| 1834 | int err; | ||
| 1835 | |||
| 1836 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
| 1837 | g->name, args->channel_fd); | ||
| 1838 | |||
| 1839 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
| 1840 | if (!ch) { | ||
| 1841 | nvgpu_log_fn(g, "no channel found for fd"); | ||
| 1842 | return -EINVAL; | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
| 1846 | nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, | ||
| 1847 | dbg_session_channel_data, ch_entry) { | ||
| 1848 | if (ch->chid == ch_data->chid) { | ||
| 1849 | channel_found = true; | ||
| 1850 | break; | ||
| 1851 | } | ||
| 1852 | } | ||
| 1853 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
| 1854 | |||
| 1855 | if (!channel_found) { | ||
| 1856 | nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd); | ||
| 1857 | err = -EINVAL; | ||
| 1858 | goto out; | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1862 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
| 1863 | err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data); | ||
| 1864 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
| 1865 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1866 | |||
| 1867 | out: | ||
| 1868 | gk20a_channel_put(ch); | ||
| 1869 | return err; | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | static int nvgpu_set_sm_exception_type_mask_locked( | ||
| 1873 | struct dbg_session_gk20a *dbg_s, | ||
| 1874 | u32 exception_mask) | ||
| 1875 | { | ||
| 1876 | struct gk20a *g = dbg_s->g; | ||
| 1877 | int err = 0; | ||
| 1878 | struct channel_gk20a *ch = NULL; | ||
| 1879 | |||
| 1880 | /* | ||
| 1881 | * Obtain the fisrt channel from the channel list in | ||
| 1882 | * dbg_session, find the context associated with channel | ||
| 1883 | * and set the sm_mask_type to that context | ||
| 1884 | */ | ||
| 1885 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1886 | if (ch != NULL) { | ||
| 1887 | struct tsg_gk20a *tsg; | ||
| 1888 | |||
| 1889 | tsg = tsg_gk20a_from_ch(ch); | ||
| 1890 | if (tsg != NULL) { | ||
| 1891 | tsg->sm_exception_mask_type = exception_mask; | ||
| 1892 | goto type_mask_end; | ||
| 1893 | } | ||
| 1894 | } | ||
| 1895 | |||
| 1896 | nvgpu_log_fn(g, "unable to find the TSG\n"); | ||
| 1897 | err = -EINVAL; | ||
| 1898 | |||
| 1899 | type_mask_end: | ||
| 1900 | return err; | ||
| 1901 | } | ||
| 1902 | |||
| 1903 | static int nvgpu_dbg_gpu_set_sm_exception_type_mask( | ||
| 1904 | struct dbg_session_gk20a *dbg_s, | ||
| 1905 | struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *args) | ||
| 1906 | { | ||
| 1907 | int err = 0; | ||
| 1908 | struct gk20a *g = dbg_s->g; | ||
| 1909 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
| 1910 | |||
| 1911 | nvgpu_speculation_barrier(); | ||
| 1912 | switch (args->exception_type_mask) { | ||
| 1913 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: | ||
| 1914 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; | ||
| 1915 | break; | ||
| 1916 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE: | ||
| 1917 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
| 1918 | break; | ||
| 1919 | default: | ||
| 1920 | nvgpu_err(g, | ||
| 1921 | "unrecognized dbg sm exception type mask: 0x%x", | ||
| 1922 | args->exception_type_mask); | ||
| 1923 | err = -EINVAL; | ||
| 1924 | break; | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | if (err != 0) { | ||
| 1928 | return err; | ||
| 1929 | } | ||
| 1930 | |||
| 1931 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 1932 | err = nvgpu_set_sm_exception_type_mask_locked(dbg_s, | ||
| 1933 | sm_exception_mask_type); | ||
| 1934 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 1935 | |||
| 1936 | return err; | ||
| 1937 | } | ||
| 1938 | |||
| 1939 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
| 1940 | static int nvgpu_dbg_gpu_cycle_stats(struct dbg_session_gk20a *dbg_s, | ||
| 1941 | struct nvgpu_dbg_gpu_cycle_stats_args *args) | ||
| 1942 | { | ||
| 1943 | struct channel_gk20a *ch = NULL; | ||
| 1944 | int err; | ||
| 1945 | |||
| 1946 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1947 | if (ch == NULL) { | ||
| 1948 | return -EINVAL; | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | err = gk20a_busy(ch->g); | ||
| 1952 | if (err != 0) { | ||
| 1953 | return err; | ||
| 1954 | } | ||
| 1955 | |||
| 1956 | err = gk20a_channel_cycle_stats(ch, args->dmabuf_fd); | ||
| 1957 | |||
| 1958 | gk20a_idle(ch->g); | ||
| 1959 | return err; | ||
| 1960 | } | ||
| 1961 | |||
| 1962 | static int nvgpu_dbg_gpu_cycle_stats_snapshot(struct dbg_session_gk20a *dbg_s, | ||
| 1963 | struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *args) | ||
| 1964 | { | ||
| 1965 | struct channel_gk20a *ch = NULL; | ||
| 1966 | int err; | ||
| 1967 | |||
| 1968 | if (!args->dmabuf_fd) { | ||
| 1969 | return -EINVAL; | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | nvgpu_speculation_barrier(); | ||
| 1973 | |||
| 1974 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
| 1975 | if (ch == NULL) { | ||
| 1976 | return -EINVAL; | ||
| 1977 | } | ||
| 1978 | |||
| 1979 | /* is it allowed to handle calls for current GPU? */ | ||
| 1980 | if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) { | ||
| 1981 | return -ENOSYS; | ||
| 1982 | } | ||
| 1983 | |||
| 1984 | err = gk20a_busy(ch->g); | ||
| 1985 | if (err != 0) { | ||
| 1986 | return err; | ||
| 1987 | } | ||
| 1988 | |||
| 1989 | /* handle the command (most frequent cases first) */ | ||
| 1990 | switch (args->cmd) { | ||
| 1991 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: | ||
| 1992 | err = gk20a_flush_cycle_stats_snapshot(ch); | ||
| 1993 | args->extra = 0; | ||
| 1994 | break; | ||
| 1995 | |||
| 1996 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: | ||
| 1997 | err = gk20a_attach_cycle_stats_snapshot(ch, | ||
| 1998 | args->dmabuf_fd, | ||
| 1999 | args->extra, | ||
| 2000 | &args->extra); | ||
| 2001 | break; | ||
| 2002 | |||
| 2003 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: | ||
| 2004 | err = gk20a_channel_free_cycle_stats_snapshot(ch); | ||
| 2005 | args->extra = 0; | ||
| 2006 | break; | ||
| 2007 | |||
| 2008 | default: | ||
| 2009 | pr_err("cyclestats: unknown command %u\n", args->cmd); | ||
| 2010 | err = -EINVAL; | ||
| 2011 | break; | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | gk20a_idle(ch->g); | ||
| 2015 | return err; | ||
| 2016 | } | ||
| 2017 | |||
| 2018 | #endif | ||
| 2019 | |||
| 2020 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) | ||
| 2021 | { | ||
| 2022 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
| 2023 | struct nvgpu_os_linux, dbg.cdev); | ||
| 2024 | struct gk20a *g = &l->g; | ||
| 2025 | |||
| 2026 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 2027 | return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | ||
| 2031 | unsigned long arg) | ||
| 2032 | { | ||
| 2033 | struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data; | ||
| 2034 | struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; | ||
| 2035 | struct gk20a *g = dbg_s->g; | ||
| 2036 | u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; | ||
| 2037 | int err = 0; | ||
| 2038 | |||
| 2039 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
| 2040 | |||
| 2041 | if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) || | ||
| 2042 | (_IOC_NR(cmd) == 0) || | ||
| 2043 | (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) || | ||
| 2044 | (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE)) | ||
| 2045 | return -EINVAL; | ||
| 2046 | |||
| 2047 | memset(buf, 0, sizeof(buf)); | ||
| 2048 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 2049 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
| 2050 | return -EFAULT; | ||
| 2051 | } | ||
| 2052 | |||
| 2053 | if (!g->sw_ready) { | ||
| 2054 | err = gk20a_busy(g); | ||
| 2055 | if (err) | ||
| 2056 | return err; | ||
| 2057 | |||
| 2058 | gk20a_idle(g); | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | /* protect from threaded user space calls */ | ||
| 2062 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); | ||
| 2063 | |||
| 2064 | nvgpu_speculation_barrier(); | ||
| 2065 | switch (cmd) { | ||
| 2066 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: | ||
| 2067 | err = dbg_bind_channel_gk20a(dbg_s, | ||
| 2068 | (struct nvgpu_dbg_gpu_bind_channel_args *)buf); | ||
| 2069 | break; | ||
| 2070 | |||
| 2071 | case NVGPU_DBG_GPU_IOCTL_REG_OPS: | ||
| 2072 | err = nvgpu_ioctl_channel_reg_ops(dbg_s, | ||
| 2073 | (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); | ||
| 2074 | break; | ||
| 2075 | |||
| 2076 | case NVGPU_DBG_GPU_IOCTL_POWERGATE: | ||
| 2077 | err = nvgpu_ioctl_powergate_gk20a(dbg_s, | ||
| 2078 | (struct nvgpu_dbg_gpu_powergate_args *)buf); | ||
| 2079 | break; | ||
| 2080 | |||
| 2081 | case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: | ||
| 2082 | err = gk20a_dbg_gpu_events_ctrl(dbg_s, | ||
| 2083 | (struct nvgpu_dbg_gpu_events_ctrl_args *)buf); | ||
| 2084 | break; | ||
| 2085 | |||
| 2086 | case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: | ||
| 2087 | err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, | ||
| 2088 | (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); | ||
| 2089 | break; | ||
| 2090 | |||
| 2091 | case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE: | ||
| 2092 | err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s, | ||
| 2093 | (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf); | ||
| 2094 | break; | ||
| 2095 | |||
| 2096 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: | ||
| 2097 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, | ||
| 2098 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); | ||
| 2099 | break; | ||
| 2100 | |||
| 2101 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: | ||
| 2102 | err = gk20a_perfbuf_map(dbg_s, | ||
| 2103 | (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); | ||
| 2104 | break; | ||
| 2105 | |||
| 2106 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: | ||
| 2107 | err = gk20a_perfbuf_unmap(dbg_s, | ||
| 2108 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); | ||
| 2109 | break; | ||
| 2110 | |||
| 2111 | case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: | ||
| 2112 | err = gk20a_dbg_pc_sampling(dbg_s, | ||
| 2113 | (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); | ||
| 2114 | break; | ||
| 2115 | |||
| 2116 | case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE: | ||
| 2117 | err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s, | ||
| 2118 | (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf); | ||
| 2119 | break; | ||
| 2120 | |||
| 2121 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT: | ||
| 2122 | err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s, | ||
| 2123 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | ||
| 2124 | break; | ||
| 2125 | |||
| 2126 | case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT: | ||
| 2127 | nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s, | ||
| 2128 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | ||
| 2129 | break; | ||
| 2130 | |||
| 2131 | case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
| 2132 | err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, | ||
| 2133 | (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); | ||
| 2134 | break; | ||
| 2135 | |||
| 2136 | case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE: | ||
| 2137 | err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s, | ||
| 2138 | (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf); | ||
| 2139 | break; | ||
| 2140 | |||
| 2141 | case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL: | ||
| 2142 | err = dbg_unbind_channel_gk20a(dbg_s, | ||
| 2143 | (struct nvgpu_dbg_gpu_unbind_channel_args *)buf); | ||
| 2144 | break; | ||
| 2145 | |||
| 2146 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS: | ||
| 2147 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s, | ||
| 2148 | (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf); | ||
| 2149 | break; | ||
| 2150 | |||
| 2151 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY: | ||
| 2152 | err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s, | ||
| 2153 | (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); | ||
| 2154 | break; | ||
| 2155 | |||
| 2156 | case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE: | ||
| 2157 | err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux, | ||
| 2158 | (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); | ||
| 2159 | break; | ||
| 2160 | |||
| 2161 | case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE: | ||
| 2162 | err = nvgpu_ioctl_free_profiler_object(dbg_s_linux, | ||
| 2163 | (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); | ||
| 2164 | break; | ||
| 2165 | |||
| 2166 | case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE: | ||
| 2167 | err = nvgpu_ioctl_profiler_reserve(dbg_s, | ||
| 2168 | (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf); | ||
| 2169 | break; | ||
| 2170 | |||
| 2171 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK: | ||
| 2172 | err = nvgpu_dbg_gpu_set_sm_exception_type_mask(dbg_s, | ||
| 2173 | (struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *)buf); | ||
| 2174 | break; | ||
| 2175 | |||
| 2176 | case NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE: | ||
| 2177 | err = nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(dbg_s, | ||
| 2178 | (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf); | ||
| 2179 | break; | ||
| 2180 | |||
| 2181 | #ifdef CONFIG_GK20A_CYCLE_STATS | ||
| 2182 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS: | ||
| 2183 | err = nvgpu_dbg_gpu_cycle_stats(dbg_s, | ||
| 2184 | (struct nvgpu_dbg_gpu_cycle_stats_args *)buf); | ||
| 2185 | break; | ||
| 2186 | |||
| 2187 | case NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT: | ||
| 2188 | err = nvgpu_dbg_gpu_cycle_stats_snapshot(dbg_s, | ||
| 2189 | (struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *)buf); | ||
| 2190 | break; | ||
| 2191 | #endif | ||
| 2192 | |||
| 2193 | default: | ||
| 2194 | nvgpu_err(g, | ||
| 2195 | "unrecognized dbg gpu ioctl cmd: 0x%x", | ||
| 2196 | cmd); | ||
| 2197 | err = -ENOTTY; | ||
| 2198 | break; | ||
| 2199 | } | ||
| 2200 | |||
| 2201 | nvgpu_mutex_release(&dbg_s->ioctl_lock); | ||
| 2202 | |||
| 2203 | nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); | ||
| 2204 | |||
| 2205 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 2206 | err = copy_to_user((void __user *)arg, | ||
| 2207 | buf, _IOC_SIZE(cmd)); | ||
| 2208 | |||
| 2209 | return err; | ||
| 2210 | } | ||
diff --git a/include/os/linux/ioctl_dbg.h b/include/os/linux/ioctl_dbg.h new file mode 100644 index 0000000..2e188cc --- /dev/null +++ b/include/os/linux/ioctl_dbg.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | /* | ||
| 2 | * Tegra GK20A GPU Debugger Driver | ||
| 3 | * | ||
| 4 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | #ifndef DBG_GPU_IOCTL_GK20A_H | ||
| 19 | #define DBG_GPU_IOCTL_GK20A_H | ||
| 20 | |||
| 21 | struct inode; | ||
| 22 | struct file; | ||
| 23 | typedef struct poll_table_struct poll_table; | ||
| 24 | |||
| 25 | /* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number | ||
| 26 | * of regops */ | ||
| 27 | #define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024 | ||
| 28 | |||
| 29 | /* module debug driver interface */ | ||
| 30 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); | ||
| 31 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); | ||
| 32 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
| 33 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); | ||
| 34 | |||
| 35 | /* used by profiler driver interface */ | ||
| 36 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); | ||
| 37 | |||
| 38 | #endif | ||
diff --git a/include/os/linux/ioctl_tsg.c b/include/os/linux/ioctl_tsg.c new file mode 100644 index 0000000..2f8cb3a --- /dev/null +++ b/include/os/linux/ioctl_tsg.c | |||
| @@ -0,0 +1,750 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/fs.h> | ||
| 18 | #include <linux/file.h> | ||
| 19 | #include <linux/cdev.h> | ||
| 20 | #include <linux/uaccess.h> | ||
| 21 | #include <linux/poll.h> | ||
| 22 | #include <uapi/linux/nvgpu.h> | ||
| 23 | #include <linux/anon_inodes.h> | ||
| 24 | |||
| 25 | #include <nvgpu/kmem.h> | ||
| 26 | #include <nvgpu/log.h> | ||
| 27 | #include <nvgpu/os_sched.h> | ||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | #include <nvgpu/channel.h> | ||
| 30 | #include <nvgpu/tsg.h> | ||
| 31 | |||
| 32 | #include "gv11b/fifo_gv11b.h" | ||
| 33 | #include "platform_gk20a.h" | ||
| 34 | #include "ioctl_tsg.h" | ||
| 35 | #include "ioctl_channel.h" | ||
| 36 | #include "os_linux.h" | ||
| 37 | |||
| 38 | struct tsg_private { | ||
| 39 | struct gk20a *g; | ||
| 40 | struct tsg_gk20a *tsg; | ||
| 41 | }; | ||
| 42 | |||
| 43 | static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) | ||
| 44 | { | ||
| 45 | struct channel_gk20a *ch; | ||
| 46 | int err; | ||
| 47 | |||
| 48 | ch = gk20a_get_channel_from_file(ch_fd); | ||
| 49 | if (!ch) | ||
| 50 | return -EINVAL; | ||
| 51 | |||
| 52 | err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); | ||
| 53 | |||
| 54 | gk20a_channel_put(ch); | ||
| 55 | return err; | ||
| 56 | } | ||
| 57 | |||
| 58 | static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, | ||
| 59 | struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) | ||
| 60 | { | ||
| 61 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 62 | struct channel_gk20a *ch; | ||
| 63 | struct gr_gk20a *gr = &g->gr; | ||
| 64 | int err = 0; | ||
| 65 | |||
| 66 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
| 67 | |||
| 68 | nvgpu_mutex_acquire(&sched->control_lock); | ||
| 69 | if (sched->control_locked) { | ||
| 70 | err = -EPERM; | ||
| 71 | goto mutex_release; | ||
| 72 | } | ||
| 73 | err = gk20a_busy(g); | ||
| 74 | if (err) { | ||
| 75 | nvgpu_err(g, "failed to power on gpu"); | ||
| 76 | goto mutex_release; | ||
| 77 | } | ||
| 78 | |||
| 79 | ch = gk20a_get_channel_from_file(arg->channel_fd); | ||
| 80 | if (!ch) { | ||
| 81 | err = -EINVAL; | ||
| 82 | goto idle; | ||
| 83 | } | ||
| 84 | |||
| 85 | if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { | ||
| 86 | if ((arg->num_active_tpcs > gr->max_tpc_count) || | ||
| 87 | !(arg->num_active_tpcs)) { | ||
| 88 | nvgpu_err(g, "Invalid num of active TPCs"); | ||
| 89 | err = -EINVAL; | ||
| 90 | goto ch_put; | ||
| 91 | } | ||
| 92 | tsg->tpc_num_initialized = true; | ||
| 93 | tsg->num_active_tpcs = arg->num_active_tpcs; | ||
| 94 | tsg->tpc_pg_enabled = true; | ||
| 95 | } else { | ||
| 96 | tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled"); | ||
| 97 | } | ||
| 98 | |||
| 99 | if (arg->subcontext_id < g->fifo.max_subctx_count) { | ||
| 100 | ch->subctx_id = arg->subcontext_id; | ||
| 101 | } else { | ||
| 102 | err = -EINVAL; | ||
| 103 | goto ch_put; | ||
| 104 | } | ||
| 105 | |||
| 106 | nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", | ||
| 107 | ch->chid, ch->subctx_id); | ||
| 108 | |||
| 109 | /* Use runqueue selector 1 for all ASYNC ids */ | ||
| 110 | if (ch->subctx_id > CHANNEL_INFO_VEID0) | ||
| 111 | ch->runqueue_sel = 1; | ||
| 112 | |||
| 113 | err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); | ||
| 114 | ch_put: | ||
| 115 | gk20a_channel_put(ch); | ||
| 116 | idle: | ||
| 117 | gk20a_idle(g); | ||
| 118 | mutex_release: | ||
| 119 | nvgpu_mutex_release(&sched->control_lock); | ||
| 120 | return err; | ||
| 121 | } | ||
| 122 | |||
| 123 | static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) | ||
| 124 | { | ||
| 125 | struct channel_gk20a *ch; | ||
| 126 | int err = 0; | ||
| 127 | |||
| 128 | ch = gk20a_get_channel_from_file(ch_fd); | ||
| 129 | if (!ch) | ||
| 130 | return -EINVAL; | ||
| 131 | |||
| 132 | if (ch->tsgid != tsg->tsgid) { | ||
| 133 | err = -EINVAL; | ||
| 134 | goto out; | ||
| 135 | } | ||
| 136 | |||
| 137 | err = gk20a_tsg_unbind_channel(ch); | ||
| 138 | |||
| 139 | /* | ||
| 140 | * Mark the channel timedout since channel unbound from TSG | ||
| 141 | * has no context of its own so it can't serve any job | ||
| 142 | */ | ||
| 143 | gk20a_channel_set_timedout(ch); | ||
| 144 | |||
| 145 | out: | ||
| 146 | gk20a_channel_put(ch); | ||
| 147 | return err; | ||
| 148 | } | ||
| 149 | |||
| 150 | static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg, | ||
| 151 | unsigned int event_id, | ||
| 152 | struct gk20a_event_id_data **event_id_data) | ||
| 153 | { | ||
| 154 | struct gk20a_event_id_data *local_event_id_data; | ||
| 155 | bool event_found = false; | ||
| 156 | |||
| 157 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
| 158 | nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list, | ||
| 159 | gk20a_event_id_data, event_id_node) { | ||
| 160 | if (local_event_id_data->event_id == event_id) { | ||
| 161 | event_found = true; | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
| 166 | |||
| 167 | if (event_found) { | ||
| 168 | *event_id_data = local_event_id_data; | ||
| 169 | return 0; | ||
| 170 | } else { | ||
| 171 | return -1; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | /* | ||
| 176 | * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific | ||
| 177 | * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs | ||
| 178 | */ | ||
| 179 | static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id) | ||
| 180 | { | ||
| 181 | switch (event_id) { | ||
| 182 | case NVGPU_EVENT_ID_BPT_INT: | ||
| 183 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT; | ||
| 184 | case NVGPU_EVENT_ID_BPT_PAUSE: | ||
| 185 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE; | ||
| 186 | case NVGPU_EVENT_ID_BLOCKING_SYNC: | ||
| 187 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC; | ||
| 188 | case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED: | ||
| 189 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED; | ||
| 190 | case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE: | ||
| 191 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE; | ||
| 192 | case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN: | ||
| 193 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN; | ||
| 194 | } | ||
| 195 | |||
| 196 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX; | ||
| 197 | } | ||
| 198 | |||
| 199 | void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, | ||
| 200 | int __event_id) | ||
| 201 | { | ||
| 202 | struct gk20a_event_id_data *event_id_data; | ||
| 203 | u32 event_id; | ||
| 204 | int err = 0; | ||
| 205 | struct gk20a *g = tsg->g; | ||
| 206 | |||
| 207 | event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id); | ||
| 208 | if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | ||
| 209 | return; | ||
| 210 | |||
| 211 | err = gk20a_tsg_get_event_data_from_id(tsg, event_id, | ||
| 212 | &event_id_data); | ||
| 213 | if (err) | ||
| 214 | return; | ||
| 215 | |||
| 216 | nvgpu_mutex_acquire(&event_id_data->lock); | ||
| 217 | |||
| 218 | nvgpu_log_info(g, | ||
| 219 | "posting event for event_id=%d on tsg=%d\n", | ||
| 220 | event_id, tsg->tsgid); | ||
| 221 | event_id_data->event_posted = true; | ||
| 222 | |||
| 223 | nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq); | ||
| 224 | |||
| 225 | nvgpu_mutex_release(&event_id_data->lock); | ||
| 226 | } | ||
| 227 | |||
| 228 | static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait) | ||
| 229 | { | ||
| 230 | unsigned int mask = 0; | ||
| 231 | struct gk20a_event_id_data *event_id_data = filep->private_data; | ||
| 232 | struct gk20a *g = event_id_data->g; | ||
| 233 | u32 event_id = event_id_data->event_id; | ||
| 234 | struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; | ||
| 235 | |||
| 236 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " "); | ||
| 237 | |||
| 238 | poll_wait(filep, &event_id_data->event_id_wq.wq, wait); | ||
| 239 | |||
| 240 | nvgpu_mutex_acquire(&event_id_data->lock); | ||
| 241 | |||
| 242 | if (event_id_data->event_posted) { | ||
| 243 | nvgpu_log_info(g, | ||
| 244 | "found pending event_id=%d on TSG=%d\n", | ||
| 245 | event_id, tsg->tsgid); | ||
| 246 | mask = (POLLPRI | POLLIN); | ||
| 247 | event_id_data->event_posted = false; | ||
| 248 | } | ||
| 249 | |||
| 250 | nvgpu_mutex_release(&event_id_data->lock); | ||
| 251 | |||
| 252 | return mask; | ||
| 253 | } | ||
| 254 | |||
| 255 | static int gk20a_event_id_release(struct inode *inode, struct file *filp) | ||
| 256 | { | ||
| 257 | struct gk20a_event_id_data *event_id_data = filp->private_data; | ||
| 258 | struct gk20a *g = event_id_data->g; | ||
| 259 | struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; | ||
| 260 | |||
| 261 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
| 262 | nvgpu_list_del(&event_id_data->event_id_node); | ||
| 263 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
| 264 | |||
| 265 | nvgpu_mutex_destroy(&event_id_data->lock); | ||
| 266 | gk20a_put(g); | ||
| 267 | nvgpu_kfree(g, event_id_data); | ||
| 268 | filp->private_data = NULL; | ||
| 269 | |||
| 270 | return 0; | ||
| 271 | } | ||
| 272 | |||
| 273 | const struct file_operations gk20a_event_id_ops = { | ||
| 274 | .owner = THIS_MODULE, | ||
| 275 | .poll = gk20a_event_id_poll, | ||
| 276 | .release = gk20a_event_id_release, | ||
| 277 | }; | ||
| 278 | |||
| 279 | static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg, | ||
| 280 | int event_id, | ||
| 281 | int *fd) | ||
| 282 | { | ||
| 283 | int err = 0; | ||
| 284 | int local_fd; | ||
| 285 | struct file *file; | ||
| 286 | char name[64]; | ||
| 287 | struct gk20a_event_id_data *event_id_data; | ||
| 288 | struct gk20a *g; | ||
| 289 | |||
| 290 | g = gk20a_get(tsg->g); | ||
| 291 | if (!g) | ||
| 292 | return -ENODEV; | ||
| 293 | |||
| 294 | err = gk20a_tsg_get_event_data_from_id(tsg, | ||
| 295 | event_id, &event_id_data); | ||
| 296 | if (err == 0) { | ||
| 297 | /* We already have event enabled */ | ||
| 298 | err = -EINVAL; | ||
| 299 | goto free_ref; | ||
| 300 | } | ||
| 301 | |||
| 302 | err = get_unused_fd_flags(O_RDWR); | ||
| 303 | if (err < 0) | ||
| 304 | goto free_ref; | ||
| 305 | local_fd = err; | ||
| 306 | |||
| 307 | snprintf(name, sizeof(name), "nvgpu-event%d-fd%d", | ||
| 308 | event_id, local_fd); | ||
| 309 | |||
| 310 | file = anon_inode_getfile(name, &gk20a_event_id_ops, | ||
| 311 | NULL, O_RDWR); | ||
| 312 | if (IS_ERR(file)) { | ||
| 313 | err = PTR_ERR(file); | ||
| 314 | goto clean_up; | ||
| 315 | } | ||
| 316 | |||
| 317 | event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data)); | ||
| 318 | if (!event_id_data) { | ||
| 319 | err = -ENOMEM; | ||
| 320 | goto clean_up_file; | ||
| 321 | } | ||
| 322 | event_id_data->g = g; | ||
| 323 | event_id_data->id = tsg->tsgid; | ||
| 324 | event_id_data->event_id = event_id; | ||
| 325 | |||
| 326 | nvgpu_cond_init(&event_id_data->event_id_wq); | ||
| 327 | err = nvgpu_mutex_init(&event_id_data->lock); | ||
| 328 | if (err) | ||
| 329 | goto clean_up_free; | ||
| 330 | |||
| 331 | nvgpu_init_list_node(&event_id_data->event_id_node); | ||
| 332 | |||
| 333 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
| 334 | nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list); | ||
| 335 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
| 336 | |||
| 337 | fd_install(local_fd, file); | ||
| 338 | file->private_data = event_id_data; | ||
| 339 | |||
| 340 | *fd = local_fd; | ||
| 341 | |||
| 342 | return 0; | ||
| 343 | |||
| 344 | clean_up_free: | ||
| 345 | nvgpu_kfree(g, event_id_data); | ||
| 346 | clean_up_file: | ||
| 347 | fput(file); | ||
| 348 | clean_up: | ||
| 349 | put_unused_fd(local_fd); | ||
| 350 | free_ref: | ||
| 351 | gk20a_put(g); | ||
| 352 | return err; | ||
| 353 | } | ||
| 354 | |||
| 355 | static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, | ||
| 356 | struct nvgpu_event_id_ctrl_args *args) | ||
| 357 | { | ||
| 358 | int err = 0; | ||
| 359 | int fd = -1; | ||
| 360 | |||
| 361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | ||
| 362 | return -EINVAL; | ||
| 363 | |||
| 364 | nvgpu_speculation_barrier(); | ||
| 365 | switch (args->cmd) { | ||
| 366 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: | ||
| 367 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); | ||
| 368 | if (!err) | ||
| 369 | args->event_fd = fd; | ||
| 370 | break; | ||
| 371 | |||
| 372 | default: | ||
| 373 | nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x", | ||
| 374 | args->cmd); | ||
| 375 | err = -EINVAL; | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | |||
| 379 | return err; | ||
| 380 | } | ||
| 381 | |||
| 382 | int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp) | ||
| 383 | { | ||
| 384 | struct tsg_private *priv; | ||
| 385 | struct tsg_gk20a *tsg; | ||
| 386 | struct device *dev; | ||
| 387 | int err; | ||
| 388 | |||
| 389 | g = gk20a_get(g); | ||
| 390 | if (!g) | ||
| 391 | return -ENODEV; | ||
| 392 | |||
| 393 | dev = dev_from_gk20a(g); | ||
| 394 | |||
| 395 | nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev)); | ||
| 396 | |||
| 397 | priv = nvgpu_kmalloc(g, sizeof(*priv)); | ||
| 398 | if (!priv) { | ||
| 399 | err = -ENOMEM; | ||
| 400 | goto free_ref; | ||
| 401 | } | ||
| 402 | |||
| 403 | err = gk20a_busy(g); | ||
| 404 | if (err) { | ||
| 405 | nvgpu_err(g, "failed to power on, %d", err); | ||
| 406 | goto free_mem; | ||
| 407 | } | ||
| 408 | |||
| 409 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
| 410 | gk20a_idle(g); | ||
| 411 | if (!tsg) { | ||
| 412 | err = -ENOMEM; | ||
| 413 | goto free_mem; | ||
| 414 | } | ||
| 415 | |||
| 416 | priv->g = g; | ||
| 417 | priv->tsg = tsg; | ||
| 418 | filp->private_data = priv; | ||
| 419 | |||
| 420 | gk20a_sched_ctrl_tsg_added(g, tsg); | ||
| 421 | |||
| 422 | return 0; | ||
| 423 | |||
| 424 | free_mem: | ||
| 425 | nvgpu_kfree(g, priv); | ||
| 426 | free_ref: | ||
| 427 | gk20a_put(g); | ||
| 428 | return err; | ||
| 429 | } | ||
| 430 | |||
| 431 | int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp) | ||
| 432 | { | ||
| 433 | struct nvgpu_os_linux *l; | ||
| 434 | struct gk20a *g; | ||
| 435 | int ret; | ||
| 436 | |||
| 437 | l = container_of(inode->i_cdev, | ||
| 438 | struct nvgpu_os_linux, tsg.cdev); | ||
| 439 | g = &l->g; | ||
| 440 | |||
| 441 | nvgpu_log_fn(g, " "); | ||
| 442 | |||
| 443 | ret = gk20a_busy(g); | ||
| 444 | if (ret) { | ||
| 445 | nvgpu_err(g, "failed to power on, %d", ret); | ||
| 446 | return ret; | ||
| 447 | } | ||
| 448 | |||
| 449 | ret = nvgpu_ioctl_tsg_open(&l->g, filp); | ||
| 450 | |||
| 451 | gk20a_idle(g); | ||
| 452 | nvgpu_log_fn(g, "done"); | ||
| 453 | return ret; | ||
| 454 | } | ||
| 455 | |||
| 456 | void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref) | ||
| 457 | { | ||
| 458 | struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); | ||
| 459 | struct gk20a *g = tsg->g; | ||
| 460 | |||
| 461 | gk20a_sched_ctrl_tsg_removed(g, tsg); | ||
| 462 | |||
| 463 | gk20a_tsg_release(ref); | ||
| 464 | gk20a_put(g); | ||
| 465 | } | ||
| 466 | |||
| 467 | int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp) | ||
| 468 | { | ||
| 469 | struct tsg_private *priv = filp->private_data; | ||
| 470 | struct tsg_gk20a *tsg; | ||
| 471 | |||
| 472 | if (!priv) { | ||
| 473 | /* open failed, never got a tsg for this file */ | ||
| 474 | return 0; | ||
| 475 | } | ||
| 476 | |||
| 477 | tsg = priv->tsg; | ||
| 478 | |||
| 479 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 480 | nvgpu_kfree(tsg->g, priv); | ||
| 481 | return 0; | ||
| 482 | } | ||
| 483 | |||
| 484 | static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, | ||
| 485 | struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) | ||
| 486 | { | ||
| 487 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 488 | u32 level = arg->level; | ||
| 489 | int err; | ||
| 490 | |||
| 491 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
| 492 | |||
| 493 | nvgpu_mutex_acquire(&sched->control_lock); | ||
| 494 | if (sched->control_locked) { | ||
| 495 | err = -EPERM; | ||
| 496 | goto done; | ||
| 497 | } | ||
| 498 | err = gk20a_busy(g); | ||
| 499 | if (err) { | ||
| 500 | nvgpu_err(g, "failed to power on gpu"); | ||
| 501 | goto done; | ||
| 502 | } | ||
| 503 | |||
| 504 | level = nvgpu_get_common_runlist_level(level); | ||
| 505 | err = gk20a_tsg_set_runlist_interleave(tsg, level); | ||
| 506 | |||
| 507 | gk20a_idle(g); | ||
| 508 | done: | ||
| 509 | nvgpu_mutex_release(&sched->control_lock); | ||
| 510 | return err; | ||
| 511 | } | ||
| 512 | |||
| 513 | static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, | ||
| 514 | struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) | ||
| 515 | { | ||
| 516 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 517 | int err; | ||
| 518 | |||
| 519 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
| 520 | |||
| 521 | nvgpu_mutex_acquire(&sched->control_lock); | ||
| 522 | if (sched->control_locked) { | ||
| 523 | err = -EPERM; | ||
| 524 | goto done; | ||
| 525 | } | ||
| 526 | err = gk20a_busy(g); | ||
| 527 | if (err) { | ||
| 528 | nvgpu_err(g, "failed to power on gpu"); | ||
| 529 | goto done; | ||
| 530 | } | ||
| 531 | err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); | ||
| 532 | gk20a_idle(g); | ||
| 533 | done: | ||
| 534 | nvgpu_mutex_release(&sched->control_lock); | ||
| 535 | return err; | ||
| 536 | } | ||
| 537 | |||
| 538 | static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g, | ||
| 539 | struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) | ||
| 540 | { | ||
| 541 | arg->timeslice_us = gk20a_tsg_get_timeslice(tsg); | ||
| 542 | return 0; | ||
| 543 | } | ||
| 544 | |||
| 545 | static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, | ||
| 546 | struct tsg_gk20a *tsg, | ||
| 547 | struct nvgpu_tsg_read_single_sm_error_state_args *args) | ||
| 548 | { | ||
| 549 | struct gr_gk20a *gr = &g->gr; | ||
| 550 | struct nvgpu_tsg_sm_error_state *sm_error_state; | ||
| 551 | struct nvgpu_tsg_sm_error_state_record sm_error_state_record; | ||
| 552 | u32 sm_id; | ||
| 553 | int err = 0; | ||
| 554 | |||
| 555 | sm_id = args->sm_id; | ||
| 556 | if (sm_id >= gr->no_of_sm) | ||
| 557 | return -EINVAL; | ||
| 558 | |||
| 559 | nvgpu_speculation_barrier(); | ||
| 560 | |||
| 561 | sm_error_state = tsg->sm_error_states + sm_id; | ||
| 562 | sm_error_state_record.global_esr = | ||
| 563 | sm_error_state->hww_global_esr; | ||
| 564 | sm_error_state_record.warp_esr = | ||
| 565 | sm_error_state->hww_warp_esr; | ||
| 566 | sm_error_state_record.warp_esr_pc = | ||
| 567 | sm_error_state->hww_warp_esr_pc; | ||
| 568 | sm_error_state_record.global_esr_report_mask = | ||
| 569 | sm_error_state->hww_global_esr_report_mask; | ||
| 570 | sm_error_state_record.warp_esr_report_mask = | ||
| 571 | sm_error_state->hww_warp_esr_report_mask; | ||
| 572 | |||
| 573 | if (args->record_size > 0) { | ||
| 574 | size_t write_size = sizeof(*sm_error_state); | ||
| 575 | |||
| 576 | nvgpu_speculation_barrier(); | ||
| 577 | if (write_size > args->record_size) | ||
| 578 | write_size = args->record_size; | ||
| 579 | |||
| 580 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
| 581 | err = copy_to_user((void __user *)(uintptr_t) | ||
| 582 | args->record_mem, | ||
| 583 | &sm_error_state_record, | ||
| 584 | write_size); | ||
| 585 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
| 586 | if (err) { | ||
| 587 | nvgpu_err(g, "copy_to_user failed!"); | ||
| 588 | return err; | ||
| 589 | } | ||
| 590 | |||
| 591 | args->record_size = write_size; | ||
| 592 | } | ||
| 593 | |||
| 594 | return 0; | ||
| 595 | } | ||
| 596 | |||
| 597 | long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, | ||
| 598 | unsigned long arg) | ||
| 599 | { | ||
| 600 | struct tsg_private *priv = filp->private_data; | ||
| 601 | struct tsg_gk20a *tsg = priv->tsg; | ||
| 602 | struct gk20a *g = tsg->g; | ||
| 603 | u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE]; | ||
| 604 | int err = 0; | ||
| 605 | |||
| 606 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
| 607 | |||
| 608 | if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) || | ||
| 609 | (_IOC_NR(cmd) == 0) || | ||
| 610 | (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) || | ||
| 611 | (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE)) | ||
| 612 | return -EINVAL; | ||
| 613 | |||
| 614 | memset(buf, 0, sizeof(buf)); | ||
| 615 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 616 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
| 617 | return -EFAULT; | ||
| 618 | } | ||
| 619 | |||
| 620 | if (!g->sw_ready) { | ||
| 621 | err = gk20a_busy(g); | ||
| 622 | if (err) | ||
| 623 | return err; | ||
| 624 | |||
| 625 | gk20a_idle(g); | ||
| 626 | } | ||
| 627 | |||
| 628 | switch (cmd) { | ||
| 629 | case NVGPU_TSG_IOCTL_BIND_CHANNEL: | ||
| 630 | { | ||
| 631 | int ch_fd = *(int *)buf; | ||
| 632 | if (ch_fd < 0) { | ||
| 633 | err = -EINVAL; | ||
| 634 | break; | ||
| 635 | } | ||
| 636 | err = gk20a_tsg_bind_channel_fd(tsg, ch_fd); | ||
| 637 | break; | ||
| 638 | } | ||
| 639 | |||
| 640 | case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: | ||
| 641 | { | ||
| 642 | err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg, | ||
| 643 | (struct nvgpu_tsg_bind_channel_ex_args *)buf); | ||
| 644 | break; | ||
| 645 | } | ||
| 646 | |||
| 647 | case NVGPU_TSG_IOCTL_UNBIND_CHANNEL: | ||
| 648 | { | ||
| 649 | int ch_fd = *(int *)buf; | ||
| 650 | |||
| 651 | if (ch_fd < 0) { | ||
| 652 | err = -EINVAL; | ||
| 653 | break; | ||
| 654 | } | ||
| 655 | err = gk20a_busy(g); | ||
| 656 | if (err) { | ||
| 657 | nvgpu_err(g, | ||
| 658 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
| 659 | break; | ||
| 660 | } | ||
| 661 | err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd); | ||
| 662 | gk20a_idle(g); | ||
| 663 | break; | ||
| 664 | } | ||
| 665 | |||
| 666 | case NVGPU_IOCTL_TSG_ENABLE: | ||
| 667 | { | ||
| 668 | err = gk20a_busy(g); | ||
| 669 | if (err) { | ||
| 670 | nvgpu_err(g, | ||
| 671 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
| 672 | return err; | ||
| 673 | } | ||
| 674 | g->ops.fifo.enable_tsg(tsg); | ||
| 675 | gk20a_idle(g); | ||
| 676 | break; | ||
| 677 | } | ||
| 678 | |||
| 679 | case NVGPU_IOCTL_TSG_DISABLE: | ||
| 680 | { | ||
| 681 | err = gk20a_busy(g); | ||
| 682 | if (err) { | ||
| 683 | nvgpu_err(g, | ||
| 684 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
| 685 | return err; | ||
| 686 | } | ||
| 687 | g->ops.fifo.disable_tsg(tsg); | ||
| 688 | gk20a_idle(g); | ||
| 689 | break; | ||
| 690 | } | ||
| 691 | |||
| 692 | case NVGPU_IOCTL_TSG_PREEMPT: | ||
| 693 | { | ||
| 694 | err = gk20a_busy(g); | ||
| 695 | if (err) { | ||
| 696 | nvgpu_err(g, | ||
| 697 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
| 698 | return err; | ||
| 699 | } | ||
| 700 | /* preempt TSG */ | ||
| 701 | err = g->ops.fifo.preempt_tsg(g, tsg); | ||
| 702 | gk20a_idle(g); | ||
| 703 | break; | ||
| 704 | } | ||
| 705 | |||
| 706 | case NVGPU_IOCTL_TSG_EVENT_ID_CTRL: | ||
| 707 | { | ||
| 708 | err = gk20a_tsg_event_id_ctrl(g, tsg, | ||
| 709 | (struct nvgpu_event_id_ctrl_args *)buf); | ||
| 710 | break; | ||
| 711 | } | ||
| 712 | |||
| 713 | case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: | ||
| 714 | err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, | ||
| 715 | (struct nvgpu_runlist_interleave_args *)buf); | ||
| 716 | break; | ||
| 717 | |||
| 718 | case NVGPU_IOCTL_TSG_SET_TIMESLICE: | ||
| 719 | { | ||
| 720 | err = gk20a_tsg_ioctl_set_timeslice(g, tsg, | ||
| 721 | (struct nvgpu_timeslice_args *)buf); | ||
| 722 | break; | ||
| 723 | } | ||
| 724 | case NVGPU_IOCTL_TSG_GET_TIMESLICE: | ||
| 725 | { | ||
| 726 | err = gk20a_tsg_ioctl_get_timeslice(g, tsg, | ||
| 727 | (struct nvgpu_timeslice_args *)buf); | ||
| 728 | break; | ||
| 729 | } | ||
| 730 | |||
| 731 | case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
| 732 | { | ||
| 733 | err = gk20a_tsg_ioctl_read_single_sm_error_state(g, tsg, | ||
| 734 | (struct nvgpu_tsg_read_single_sm_error_state_args *)buf); | ||
| 735 | break; | ||
| 736 | } | ||
| 737 | |||
| 738 | default: | ||
| 739 | nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", | ||
| 740 | cmd); | ||
| 741 | err = -ENOTTY; | ||
| 742 | break; | ||
| 743 | } | ||
| 744 | |||
| 745 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
| 746 | err = copy_to_user((void __user *)arg, | ||
| 747 | buf, _IOC_SIZE(cmd)); | ||
| 748 | |||
| 749 | return err; | ||
| 750 | } | ||
diff --git a/include/os/linux/ioctl_tsg.h b/include/os/linux/ioctl_tsg.h new file mode 100644 index 0000000..67399fd --- /dev/null +++ b/include/os/linux/ioctl_tsg.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | #ifndef NVGPU_IOCTL_TSG_H | ||
| 14 | #define NVGPU_IOCTL_TSG_H | ||
| 15 | |||
| 16 | struct inode; | ||
| 17 | struct file; | ||
| 18 | struct gk20a; | ||
| 19 | struct nvgpu_ref; | ||
| 20 | |||
| 21 | int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp); | ||
| 22 | int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp); | ||
| 23 | int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp); | ||
| 24 | long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, | ||
| 25 | unsigned int cmd, unsigned long arg); | ||
| 26 | void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref); | ||
| 27 | |||
| 28 | #endif | ||
diff --git a/include/os/linux/kmem.c b/include/os/linux/kmem.c new file mode 100644 index 0000000..395cc45 --- /dev/null +++ b/include/os/linux/kmem.c | |||
| @@ -0,0 +1,653 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/mm.h> | ||
| 18 | #include <linux/slab.h> | ||
| 19 | #include <linux/debugfs.h> | ||
| 20 | #include <linux/seq_file.h> | ||
| 21 | #include <linux/vmalloc.h> | ||
| 22 | #include <linux/stacktrace.h> | ||
| 23 | |||
| 24 | #include <nvgpu/lock.h> | ||
| 25 | #include <nvgpu/kmem.h> | ||
| 26 | #include <nvgpu/atomic.h> | ||
| 27 | #include <nvgpu/bug.h> | ||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | |||
| 30 | #include "kmem_priv.h" | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Statically declared because this needs to be shared across all nvgpu driver | ||
| 34 | * instances. This makes sure that all kmem caches are _definitely_ uniquely | ||
| 35 | * named. | ||
| 36 | */ | ||
| 37 | static atomic_t kmem_cache_id; | ||
| 38 | |||
| 39 | void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear) | ||
| 40 | { | ||
| 41 | void *p; | ||
| 42 | |||
| 43 | if (size > PAGE_SIZE) { | ||
| 44 | if (clear) | ||
| 45 | p = nvgpu_vzalloc(g, size); | ||
| 46 | else | ||
| 47 | p = nvgpu_vmalloc(g, size); | ||
| 48 | } else { | ||
| 49 | if (clear) | ||
| 50 | p = nvgpu_kzalloc(g, size); | ||
| 51 | else | ||
| 52 | p = nvgpu_kmalloc(g, size); | ||
| 53 | } | ||
| 54 | |||
| 55 | return p; | ||
| 56 | } | ||
| 57 | |||
| 58 | void nvgpu_big_free(struct gk20a *g, void *p) | ||
| 59 | { | ||
| 60 | /* | ||
| 61 | * This will have to be fixed eventually. Allocs that use | ||
| 62 | * nvgpu_big_[mz]alloc() will need to remember the size of the alloc | ||
| 63 | * when freeing. | ||
| 64 | */ | ||
| 65 | if (is_vmalloc_addr(p)) | ||
| 66 | nvgpu_vfree(g, p); | ||
| 67 | else | ||
| 68 | nvgpu_kfree(g, p); | ||
| 69 | } | ||
| 70 | |||
| 71 | void *__nvgpu_kmalloc(struct gk20a *g, size_t size, void *ip) | ||
| 72 | { | ||
| 73 | void *alloc; | ||
| 74 | |||
| 75 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 76 | alloc = __nvgpu_track_kmalloc(g, size, ip); | ||
| 77 | #else | ||
| 78 | alloc = kmalloc(size, GFP_KERNEL); | ||
| 79 | #endif | ||
| 80 | |||
| 81 | kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
| 82 | size, alloc, GFP_KERNEL); | ||
| 83 | |||
| 84 | return alloc; | ||
| 85 | } | ||
| 86 | |||
| 87 | void *__nvgpu_kzalloc(struct gk20a *g, size_t size, void *ip) | ||
| 88 | { | ||
| 89 | void *alloc; | ||
| 90 | |||
| 91 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 92 | alloc = __nvgpu_track_kzalloc(g, size, ip); | ||
| 93 | #else | ||
| 94 | alloc = kzalloc(size, GFP_KERNEL); | ||
| 95 | #endif | ||
| 96 | |||
| 97 | kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
| 98 | size, alloc, GFP_KERNEL); | ||
| 99 | |||
| 100 | return alloc; | ||
| 101 | } | ||
| 102 | |||
| 103 | void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, void *ip) | ||
| 104 | { | ||
| 105 | void *alloc; | ||
| 106 | |||
| 107 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 108 | alloc = __nvgpu_track_kcalloc(g, n, size, ip); | ||
| 109 | #else | ||
| 110 | alloc = kcalloc(n, size, GFP_KERNEL); | ||
| 111 | #endif | ||
| 112 | |||
| 113 | kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
| 114 | n * size, alloc, GFP_KERNEL); | ||
| 115 | |||
| 116 | return alloc; | ||
| 117 | } | ||
| 118 | |||
| 119 | void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, void *ip) | ||
| 120 | { | ||
| 121 | void *alloc; | ||
| 122 | |||
| 123 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 124 | alloc = __nvgpu_track_vmalloc(g, size, ip); | ||
| 125 | #else | ||
| 126 | alloc = vmalloc(size); | ||
| 127 | #endif | ||
| 128 | |||
| 129 | kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc); | ||
| 130 | |||
| 131 | return alloc; | ||
| 132 | } | ||
| 133 | |||
| 134 | void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, void *ip) | ||
| 135 | { | ||
| 136 | void *alloc; | ||
| 137 | |||
| 138 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 139 | alloc = __nvgpu_track_vzalloc(g, size, ip); | ||
| 140 | #else | ||
| 141 | alloc = vzalloc(size); | ||
| 142 | #endif | ||
| 143 | |||
| 144 | kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc); | ||
| 145 | |||
| 146 | return alloc; | ||
| 147 | } | ||
| 148 | |||
| 149 | void __nvgpu_kfree(struct gk20a *g, void *addr) | ||
| 150 | { | ||
| 151 | kmem_dbg(g, "kfree: addr=0x%p", addr); | ||
| 152 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 153 | __nvgpu_track_kfree(g, addr); | ||
| 154 | #else | ||
| 155 | kfree(addr); | ||
| 156 | #endif | ||
| 157 | } | ||
| 158 | |||
| 159 | void __nvgpu_vfree(struct gk20a *g, void *addr) | ||
| 160 | { | ||
| 161 | kmem_dbg(g, "vfree: addr=0x%p", addr); | ||
| 162 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 163 | __nvgpu_track_vfree(g, addr); | ||
| 164 | #else | ||
| 165 | vfree(addr); | ||
| 166 | #endif | ||
| 167 | } | ||
| 168 | |||
| 169 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 170 | |||
| 171 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
| 172 | { | ||
| 173 | nvgpu_mutex_acquire(&tracker->lock); | ||
| 174 | } | ||
| 175 | |||
| 176 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
| 177 | { | ||
| 178 | nvgpu_mutex_release(&tracker->lock); | ||
| 179 | } | ||
| 180 | |||
| 181 | void kmem_print_mem_alloc(struct gk20a *g, | ||
| 182 | struct nvgpu_mem_alloc *alloc, | ||
| 183 | struct seq_file *s) | ||
| 184 | { | ||
| 185 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
| 186 | int i; | ||
| 187 | |||
| 188 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", | ||
| 189 | alloc->addr, alloc->size); | ||
| 190 | for (i = 0; i < alloc->stack_length; i++) | ||
| 191 | __pstat(s, " %3d [<%p>] %pS\n", i, | ||
| 192 | (void *)alloc->stack[i], | ||
| 193 | (void *)alloc->stack[i]); | ||
| 194 | __pstat(s, "\n"); | ||
| 195 | #else | ||
| 196 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", | ||
| 197 | alloc->addr, alloc->size, alloc->ip); | ||
| 198 | #endif | ||
| 199 | } | ||
| 200 | |||
| 201 | static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
| 202 | struct nvgpu_mem_alloc *alloc) | ||
| 203 | { | ||
| 204 | alloc->allocs_entry.key_start = alloc->addr; | ||
| 205 | alloc->allocs_entry.key_end = alloc->addr + alloc->size; | ||
| 206 | |||
| 207 | nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs); | ||
| 208 | return 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | static struct nvgpu_mem_alloc *nvgpu_rem_alloc( | ||
| 212 | struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) | ||
| 213 | { | ||
| 214 | struct nvgpu_mem_alloc *alloc; | ||
| 215 | struct nvgpu_rbtree_node *node = NULL; | ||
| 216 | |||
| 217 | nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs); | ||
| 218 | if (!node) | ||
| 219 | return NULL; | ||
| 220 | |||
| 221 | alloc = nvgpu_mem_alloc_from_rbtree_node(node); | ||
| 222 | |||
| 223 | nvgpu_rbtree_unlink(node, &tracker->allocs); | ||
| 224 | |||
| 225 | return alloc; | ||
| 226 | } | ||
| 227 | |||
| 228 | static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
| 229 | unsigned long size, unsigned long real_size, | ||
| 230 | u64 addr, void *ip) | ||
| 231 | { | ||
| 232 | int ret; | ||
| 233 | struct nvgpu_mem_alloc *alloc; | ||
| 234 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
| 235 | struct stack_trace stack_trace; | ||
| 236 | #endif | ||
| 237 | |||
| 238 | alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); | ||
| 239 | if (!alloc) | ||
| 240 | return -ENOMEM; | ||
| 241 | |||
| 242 | alloc->owner = tracker; | ||
| 243 | alloc->size = size; | ||
| 244 | alloc->real_size = real_size; | ||
| 245 | alloc->addr = addr; | ||
| 246 | alloc->ip = ip; | ||
| 247 | |||
| 248 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
| 249 | stack_trace.max_entries = MAX_STACK_TRACE; | ||
| 250 | stack_trace.nr_entries = 0; | ||
| 251 | stack_trace.entries = alloc->stack; | ||
| 252 | /* | ||
| 253 | * This 4 here skips the 2 function calls that happen for all traced | ||
| 254 | * allocs due to nvgpu: | ||
| 255 | * | ||
| 256 | * __nvgpu_save_kmem_alloc+0x7c/0x128 | ||
| 257 | * __nvgpu_track_kzalloc+0xcc/0xf8 | ||
| 258 | * | ||
| 259 | * And the function calls that get made by the stack trace code itself. | ||
| 260 | * If the trace savings code changes this will likely have to change | ||
| 261 | * as well. | ||
| 262 | */ | ||
| 263 | stack_trace.skip = 4; | ||
| 264 | save_stack_trace(&stack_trace); | ||
| 265 | alloc->stack_length = stack_trace.nr_entries; | ||
| 266 | #endif | ||
| 267 | |||
| 268 | nvgpu_lock_tracker(tracker); | ||
| 269 | tracker->bytes_alloced += size; | ||
| 270 | tracker->bytes_alloced_real += real_size; | ||
| 271 | tracker->nr_allocs++; | ||
| 272 | |||
| 273 | /* Keep track of this for building a histogram later on. */ | ||
| 274 | if (tracker->max_alloc < size) | ||
| 275 | tracker->max_alloc = size; | ||
| 276 | if (tracker->min_alloc > size) | ||
| 277 | tracker->min_alloc = size; | ||
| 278 | |||
| 279 | ret = nvgpu_add_alloc(tracker, alloc); | ||
| 280 | if (ret) { | ||
| 281 | WARN(1, "Duplicate alloc??? 0x%llx\n", addr); | ||
| 282 | kfree(alloc); | ||
| 283 | nvgpu_unlock_tracker(tracker); | ||
| 284 | return ret; | ||
| 285 | } | ||
| 286 | nvgpu_unlock_tracker(tracker); | ||
| 287 | |||
| 288 | return 0; | ||
| 289 | } | ||
| 290 | |||
| 291 | static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
| 292 | u64 addr) | ||
| 293 | { | ||
| 294 | struct nvgpu_mem_alloc *alloc; | ||
| 295 | |||
| 296 | nvgpu_lock_tracker(tracker); | ||
| 297 | alloc = nvgpu_rem_alloc(tracker, addr); | ||
| 298 | if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { | ||
| 299 | nvgpu_unlock_tracker(tracker); | ||
| 300 | return -EINVAL; | ||
| 301 | } | ||
| 302 | |||
| 303 | memset((void *)alloc->addr, 0, alloc->size); | ||
| 304 | |||
| 305 | tracker->nr_frees++; | ||
| 306 | tracker->bytes_freed += alloc->size; | ||
| 307 | tracker->bytes_freed_real += alloc->real_size; | ||
| 308 | nvgpu_unlock_tracker(tracker); | ||
| 309 | |||
| 310 | return 0; | ||
| 311 | } | ||
| 312 | |||
| 313 | static void __nvgpu_check_valloc_size(unsigned long size) | ||
| 314 | { | ||
| 315 | WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); | ||
| 316 | } | ||
| 317 | |||
| 318 | static void __nvgpu_check_kalloc_size(size_t size) | ||
| 319 | { | ||
| 320 | WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); | ||
| 321 | } | ||
| 322 | |||
| 323 | void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, | ||
| 324 | void *ip) | ||
| 325 | { | ||
| 326 | void *alloc = vmalloc(size); | ||
| 327 | |||
| 328 | if (!alloc) | ||
| 329 | return NULL; | ||
| 330 | |||
| 331 | __nvgpu_check_valloc_size(size); | ||
| 332 | |||
| 333 | /* | ||
| 334 | * Ignore the return message. If this fails let's not cause any issues | ||
| 335 | * for the rest of the driver. | ||
| 336 | */ | ||
| 337 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
| 338 | (u64)(uintptr_t)alloc, ip); | ||
| 339 | |||
| 340 | return alloc; | ||
| 341 | } | ||
| 342 | |||
| 343 | void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, | ||
| 344 | void *ip) | ||
| 345 | { | ||
| 346 | void *alloc = vzalloc(size); | ||
| 347 | |||
| 348 | if (!alloc) | ||
| 349 | return NULL; | ||
| 350 | |||
| 351 | __nvgpu_check_valloc_size(size); | ||
| 352 | |||
| 353 | /* | ||
| 354 | * Ignore the return message. If this fails let's not cause any issues | ||
| 355 | * for the rest of the driver. | ||
| 356 | */ | ||
| 357 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
| 358 | (u64)(uintptr_t)alloc, ip); | ||
| 359 | |||
| 360 | return alloc; | ||
| 361 | } | ||
| 362 | |||
| 363 | void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, void *ip) | ||
| 364 | { | ||
| 365 | void *alloc = kmalloc(size, GFP_KERNEL); | ||
| 366 | |||
| 367 | if (!alloc) | ||
| 368 | return NULL; | ||
| 369 | |||
| 370 | __nvgpu_check_kalloc_size(size); | ||
| 371 | |||
| 372 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
| 373 | (u64)(uintptr_t)alloc, ip); | ||
| 374 | |||
| 375 | return alloc; | ||
| 376 | } | ||
| 377 | |||
| 378 | void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, void *ip) | ||
| 379 | { | ||
| 380 | void *alloc = kzalloc(size, GFP_KERNEL); | ||
| 381 | |||
| 382 | if (!alloc) | ||
| 383 | return NULL; | ||
| 384 | |||
| 385 | __nvgpu_check_kalloc_size(size); | ||
| 386 | |||
| 387 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
| 388 | (u64)(uintptr_t)alloc, ip); | ||
| 389 | |||
| 390 | return alloc; | ||
| 391 | } | ||
| 392 | |||
| 393 | void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, | ||
| 394 | void *ip) | ||
| 395 | { | ||
| 396 | void *alloc = kcalloc(n, size, GFP_KERNEL); | ||
| 397 | |||
| 398 | if (!alloc) | ||
| 399 | return NULL; | ||
| 400 | |||
| 401 | __nvgpu_check_kalloc_size(n * size); | ||
| 402 | |||
| 403 | __nvgpu_save_kmem_alloc(g->kmallocs, n * size, | ||
| 404 | roundup_pow_of_two(n * size), | ||
| 405 | (u64)(uintptr_t)alloc, ip); | ||
| 406 | |||
| 407 | return alloc; | ||
| 408 | } | ||
| 409 | |||
| 410 | void __nvgpu_track_vfree(struct gk20a *g, void *addr) | ||
| 411 | { | ||
| 412 | /* | ||
| 413 | * Often it is accepted practice to pass NULL pointers into free | ||
| 414 | * functions to save code. | ||
| 415 | */ | ||
| 416 | if (!addr) | ||
| 417 | return; | ||
| 418 | |||
| 419 | __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); | ||
| 420 | |||
| 421 | vfree(addr); | ||
| 422 | } | ||
| 423 | |||
| 424 | void __nvgpu_track_kfree(struct gk20a *g, void *addr) | ||
| 425 | { | ||
| 426 | if (!addr) | ||
| 427 | return; | ||
| 428 | |||
| 429 | __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); | ||
| 430 | |||
| 431 | kfree(addr); | ||
| 432 | } | ||
| 433 | |||
| 434 | static int __do_check_for_outstanding_allocs( | ||
| 435 | struct gk20a *g, | ||
| 436 | struct nvgpu_mem_alloc_tracker *tracker, | ||
| 437 | const char *type, bool silent) | ||
| 438 | { | ||
| 439 | struct nvgpu_rbtree_node *node; | ||
| 440 | int count = 0; | ||
| 441 | |||
| 442 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
| 443 | while (node) { | ||
| 444 | struct nvgpu_mem_alloc *alloc = | ||
| 445 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
| 446 | |||
| 447 | if (!silent) | ||
| 448 | kmem_print_mem_alloc(g, alloc, NULL); | ||
| 449 | |||
| 450 | count++; | ||
| 451 | nvgpu_rbtree_enum_next(&node, node); | ||
| 452 | } | ||
| 453 | |||
| 454 | return count; | ||
| 455 | } | ||
| 456 | |||
| 457 | /** | ||
| 458 | * check_for_outstanding_allocs - Count and display outstanding allocs | ||
| 459 | * | ||
| 460 | * @g - The GPU. | ||
| 461 | * @silent - If set don't print anything about the allocs. | ||
| 462 | * | ||
| 463 | * Dump (or just count) the number of allocations left outstanding. | ||
| 464 | */ | ||
| 465 | static int check_for_outstanding_allocs(struct gk20a *g, bool silent) | ||
| 466 | { | ||
| 467 | int count = 0; | ||
| 468 | |||
| 469 | count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", | ||
| 470 | silent); | ||
| 471 | count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", | ||
| 472 | silent); | ||
| 473 | |||
| 474 | return count; | ||
| 475 | } | ||
| 476 | |||
| 477 | static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, | ||
| 478 | void (*force_free_func)(const void *)) | ||
| 479 | { | ||
| 480 | struct nvgpu_rbtree_node *node; | ||
| 481 | |||
| 482 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
| 483 | while (node) { | ||
| 484 | struct nvgpu_mem_alloc *alloc = | ||
| 485 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
| 486 | |||
| 487 | if (force_free_func) | ||
| 488 | force_free_func((void *)alloc->addr); | ||
| 489 | |||
| 490 | nvgpu_rbtree_unlink(node, &tracker->allocs); | ||
| 491 | kfree(alloc); | ||
| 492 | |||
| 493 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | /** | ||
| 498 | * nvgpu_kmem_cleanup - Cleanup the kmem tracking | ||
| 499 | * | ||
| 500 | * @g - The GPU. | ||
| 501 | * @force_free - If set will also free leaked objects if possible. | ||
| 502 | * | ||
| 503 | * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free | ||
| 504 | * is non-zero then the allocation made by nvgpu is also freed. This is risky, | ||
| 505 | * though, as it is possible that the memory is still in use by other parts of | ||
| 506 | * the GPU driver not aware that this has happened. | ||
| 507 | * | ||
| 508 | * In theory it should be fine if the GPU driver has been deinitialized and | ||
| 509 | * there are no bugs in that code. However, if there are any bugs in that code | ||
| 510 | * then they could likely manifest as odd crashes indeterminate amounts of time | ||
| 511 | * in the future. So use @force_free at your own risk. | ||
| 512 | */ | ||
| 513 | static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) | ||
| 514 | { | ||
| 515 | do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); | ||
| 516 | do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); | ||
| 517 | } | ||
| 518 | |||
| 519 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
| 520 | { | ||
| 521 | int count; | ||
| 522 | bool silent, force_free; | ||
| 523 | |||
| 524 | if (!flags) | ||
| 525 | return; | ||
| 526 | |||
| 527 | silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); | ||
| 528 | force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
| 529 | |||
| 530 | count = check_for_outstanding_allocs(g, silent); | ||
| 531 | nvgpu_kmem_cleanup(g, force_free); | ||
| 532 | |||
| 533 | /* | ||
| 534 | * If we leak objects we can either BUG() out or just WARN(). In general | ||
| 535 | * it doesn't make sense to BUG() on here since leaking a few objects | ||
| 536 | * won't crash the kernel but it can be helpful for development. | ||
| 537 | * | ||
| 538 | * If neither flag is set then we just silently do nothing. | ||
| 539 | */ | ||
| 540 | if (count > 0) { | ||
| 541 | if (flags & NVGPU_KMEM_FINI_WARN) { | ||
| 542 | WARN(1, "Letting %d allocs leak!!\n", count); | ||
| 543 | } else if (flags & NVGPU_KMEM_FINI_BUG) { | ||
| 544 | nvgpu_err(g, "Letting %d allocs leak!!", count); | ||
| 545 | BUG(); | ||
| 546 | } | ||
| 547 | } | ||
| 548 | } | ||
| 549 | |||
| 550 | int nvgpu_kmem_init(struct gk20a *g) | ||
| 551 | { | ||
| 552 | int err; | ||
| 553 | |||
| 554 | g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); | ||
| 555 | g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); | ||
| 556 | |||
| 557 | if (!g->vmallocs || !g->kmallocs) { | ||
| 558 | err = -ENOMEM; | ||
| 559 | goto fail; | ||
| 560 | } | ||
| 561 | |||
| 562 | g->vmallocs->name = "vmalloc"; | ||
| 563 | g->kmallocs->name = "kmalloc"; | ||
| 564 | |||
| 565 | g->vmallocs->allocs = NULL; | ||
| 566 | g->kmallocs->allocs = NULL; | ||
| 567 | |||
| 568 | nvgpu_mutex_init(&g->vmallocs->lock); | ||
| 569 | nvgpu_mutex_init(&g->kmallocs->lock); | ||
| 570 | |||
| 571 | g->vmallocs->min_alloc = PAGE_SIZE; | ||
| 572 | g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; | ||
| 573 | |||
| 574 | /* | ||
| 575 | * This needs to go after all the other initialization since they use | ||
| 576 | * the nvgpu_kzalloc() API. | ||
| 577 | */ | ||
| 578 | g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
| 579 | sizeof(struct nvgpu_mem_alloc)); | ||
| 580 | g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
| 581 | sizeof(struct nvgpu_mem_alloc)); | ||
| 582 | |||
| 583 | if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { | ||
| 584 | err = -ENOMEM; | ||
| 585 | if (g->vmallocs->allocs_cache) | ||
| 586 | nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); | ||
| 587 | if (g->kmallocs->allocs_cache) | ||
| 588 | nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); | ||
| 589 | goto fail; | ||
| 590 | } | ||
| 591 | |||
| 592 | return 0; | ||
| 593 | |||
| 594 | fail: | ||
| 595 | if (g->vmallocs) | ||
| 596 | kfree(g->vmallocs); | ||
| 597 | if (g->kmallocs) | ||
| 598 | kfree(g->kmallocs); | ||
| 599 | return err; | ||
| 600 | } | ||
| 601 | |||
| 602 | #else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
| 603 | |||
| 604 | int nvgpu_kmem_init(struct gk20a *g) | ||
| 605 | { | ||
| 606 | return 0; | ||
| 607 | } | ||
| 608 | |||
| 609 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
| 610 | { | ||
| 611 | } | ||
| 612 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
| 613 | |||
| 614 | struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) | ||
| 615 | { | ||
| 616 | struct nvgpu_kmem_cache *cache = | ||
| 617 | nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); | ||
| 618 | |||
| 619 | if (!cache) | ||
| 620 | return NULL; | ||
| 621 | |||
| 622 | cache->g = g; | ||
| 623 | |||
| 624 | snprintf(cache->name, sizeof(cache->name), | ||
| 625 | "nvgpu-cache-0x%p-%d-%d", g, (int)size, | ||
| 626 | atomic_inc_return(&kmem_cache_id)); | ||
| 627 | cache->cache = kmem_cache_create(cache->name, | ||
| 628 | size, size, 0, NULL); | ||
| 629 | if (!cache->cache) { | ||
| 630 | nvgpu_kfree(g, cache); | ||
| 631 | return NULL; | ||
| 632 | } | ||
| 633 | |||
| 634 | return cache; | ||
| 635 | } | ||
| 636 | |||
| 637 | void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) | ||
| 638 | { | ||
| 639 | struct gk20a *g = cache->g; | ||
| 640 | |||
| 641 | kmem_cache_destroy(cache->cache); | ||
| 642 | nvgpu_kfree(g, cache); | ||
| 643 | } | ||
| 644 | |||
| 645 | void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) | ||
| 646 | { | ||
| 647 | return kmem_cache_alloc(cache->cache, GFP_KERNEL); | ||
| 648 | } | ||
| 649 | |||
| 650 | void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr) | ||
| 651 | { | ||
| 652 | kmem_cache_free(cache->cache, ptr); | ||
| 653 | } | ||
diff --git a/include/os/linux/kmem_priv.h b/include/os/linux/kmem_priv.h new file mode 100644 index 0000000..a41762a --- /dev/null +++ b/include/os/linux/kmem_priv.h | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __KMEM_PRIV_H__ | ||
| 18 | #define __KMEM_PRIV_H__ | ||
| 19 | |||
| 20 | #include <nvgpu/rbtree.h> | ||
| 21 | #include <nvgpu/lock.h> | ||
| 22 | |||
| 23 | struct seq_file; | ||
| 24 | |||
| 25 | #define __pstat(s, fmt, msg...) \ | ||
| 26 | do { \ | ||
| 27 | if (s) \ | ||
| 28 | seq_printf(s, fmt, ##msg); \ | ||
| 29 | else \ | ||
| 30 | pr_info(fmt, ##msg); \ | ||
| 31 | } while (0) | ||
| 32 | |||
| 33 | #define MAX_STACK_TRACE 20 | ||
| 34 | |||
| 35 | /* | ||
| 36 | * Linux specific version of the nvgpu_kmem_cache struct. This type is | ||
| 37 | * completely opaque to the rest of the driver. | ||
| 38 | */ | ||
| 39 | struct nvgpu_kmem_cache { | ||
| 40 | struct gk20a *g; | ||
| 41 | struct kmem_cache *cache; | ||
| 42 | |||
| 43 | /* | ||
| 44 | * Memory to hold the kmem_cache unique name. Only necessary on our | ||
| 45 | * k3.10 kernel when not using the SLUB allocator but it's easier to | ||
| 46 | * just carry this on to newer kernels. | ||
| 47 | */ | ||
| 48 | char name[128]; | ||
| 49 | }; | ||
| 50 | |||
| 51 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
| 52 | |||
| 53 | struct nvgpu_mem_alloc { | ||
| 54 | struct nvgpu_mem_alloc_tracker *owner; | ||
| 55 | |||
| 56 | void *ip; | ||
| 57 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
| 58 | unsigned long stack[MAX_STACK_TRACE]; | ||
| 59 | int stack_length; | ||
| 60 | #endif | ||
| 61 | |||
| 62 | u64 addr; | ||
| 63 | |||
| 64 | unsigned long size; | ||
| 65 | unsigned long real_size; | ||
| 66 | |||
| 67 | struct nvgpu_rbtree_node allocs_entry; | ||
| 68 | }; | ||
| 69 | |||
| 70 | static inline struct nvgpu_mem_alloc * | ||
| 71 | nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) | ||
| 72 | { | ||
| 73 | return (struct nvgpu_mem_alloc *) | ||
| 74 | ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry)); | ||
| 75 | }; | ||
| 76 | |||
| 77 | /* | ||
| 78 | * Linux specific tracking of vmalloc, kmalloc, etc. | ||
| 79 | */ | ||
| 80 | struct nvgpu_mem_alloc_tracker { | ||
| 81 | const char *name; | ||
| 82 | struct nvgpu_kmem_cache *allocs_cache; | ||
| 83 | struct nvgpu_rbtree_node *allocs; | ||
| 84 | struct nvgpu_mutex lock; | ||
| 85 | |||
| 86 | u64 bytes_alloced; | ||
| 87 | u64 bytes_freed; | ||
| 88 | u64 bytes_alloced_real; | ||
| 89 | u64 bytes_freed_real; | ||
| 90 | u64 nr_allocs; | ||
| 91 | u64 nr_frees; | ||
| 92 | |||
| 93 | unsigned long min_alloc; | ||
| 94 | unsigned long max_alloc; | ||
| 95 | }; | ||
| 96 | |||
| 97 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker); | ||
| 98 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker); | ||
| 99 | |||
| 100 | void kmem_print_mem_alloc(struct gk20a *g, | ||
| 101 | struct nvgpu_mem_alloc *alloc, | ||
| 102 | struct seq_file *s); | ||
| 103 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
| 104 | |||
| 105 | #endif /* __KMEM_PRIV_H__ */ | ||
diff --git a/include/os/linux/linux-channel.c b/include/os/linux/linux-channel.c new file mode 100644 index 0000000..d035baf --- /dev/null +++ b/include/os/linux/linux-channel.c | |||
| @@ -0,0 +1,657 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/enabled.h> | ||
| 18 | #include <nvgpu/debug.h> | ||
| 19 | #include <nvgpu/error_notifier.h> | ||
| 20 | #include <nvgpu/os_sched.h> | ||
| 21 | #include <nvgpu/gk20a.h> | ||
| 22 | #include <nvgpu/channel.h> | ||
| 23 | #include <nvgpu/dma.h> | ||
| 24 | |||
| 25 | /* | ||
| 26 | * This is required for nvgpu_vm_find_buf() which is used in the tracing | ||
| 27 | * code. Once we can get and access userspace buffers without requiring | ||
| 28 | * direct dma_buf usage this can be removed. | ||
| 29 | */ | ||
| 30 | #include <nvgpu/linux/vm.h> | ||
| 31 | |||
| 32 | #include "channel.h" | ||
| 33 | #include "ioctl_channel.h" | ||
| 34 | #include "os_linux.h" | ||
| 35 | #include "dmabuf.h" | ||
| 36 | |||
| 37 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
| 38 | |||
| 39 | #include <linux/uaccess.h> | ||
| 40 | #include <linux/dma-buf.h> | ||
| 41 | #include <trace/events/gk20a.h> | ||
| 42 | #include <uapi/linux/nvgpu.h> | ||
| 43 | |||
| 44 | #include "sync_sema_android.h" | ||
| 45 | |||
| 46 | u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) | ||
| 47 | { | ||
| 48 | u32 flags = 0; | ||
| 49 | |||
| 50 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) | ||
| 51 | flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; | ||
| 52 | |||
| 53 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
| 54 | flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
| 55 | |||
| 56 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) | ||
| 57 | flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; | ||
| 58 | |||
| 59 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | ||
| 60 | flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; | ||
| 61 | |||
| 62 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) | ||
| 63 | flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; | ||
| 64 | |||
| 65 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) | ||
| 66 | flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; | ||
| 67 | |||
| 68 | return flags; | ||
| 69 | } | ||
| 70 | |||
| 71 | /* | ||
| 72 | * API to convert error_notifiers in common code and of the form | ||
| 73 | * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user | ||
| 74 | * space and of the form NVGPU_CHANNEL_* | ||
| 75 | */ | ||
| 76 | static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) | ||
| 77 | { | ||
| 78 | switch (error_notifier) { | ||
| 79 | case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: | ||
| 80 | return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; | ||
| 81 | case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: | ||
| 82 | return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; | ||
| 83 | case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: | ||
| 84 | return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; | ||
| 85 | case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: | ||
| 86 | return NVGPU_CHANNEL_GR_EXCEPTION; | ||
| 87 | case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: | ||
| 88 | return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; | ||
| 89 | case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: | ||
| 90 | return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; | ||
| 91 | case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: | ||
| 92 | return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; | ||
| 93 | case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: | ||
| 94 | return NVGPU_CHANNEL_PBDMA_ERROR; | ||
| 95 | case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: | ||
| 96 | return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; | ||
| 97 | case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: | ||
| 98 | return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; | ||
| 99 | case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: | ||
| 100 | return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; | ||
| 101 | } | ||
| 102 | |||
| 103 | pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); | ||
| 104 | |||
| 105 | return error_notifier; | ||
| 106 | } | ||
| 107 | |||
| 108 | /** | ||
| 109 | * nvgpu_set_error_notifier_locked() | ||
| 110 | * Should be called with ch->error_notifier_mutex held | ||
| 111 | * | ||
| 112 | * error should be of the form NVGPU_ERR_NOTIFIER_* | ||
| 113 | */ | ||
| 114 | void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) | ||
| 115 | { | ||
| 116 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 117 | |||
| 118 | error = nvgpu_error_notifier_to_channel_notifier(error); | ||
| 119 | |||
| 120 | if (priv->error_notifier.dmabuf) { | ||
| 121 | struct nvgpu_notification *notification = | ||
| 122 | priv->error_notifier.notification; | ||
| 123 | struct timespec time_data; | ||
| 124 | u64 nsec; | ||
| 125 | |||
| 126 | getnstimeofday(&time_data); | ||
| 127 | nsec = ((u64)time_data.tv_sec) * 1000000000u + | ||
| 128 | (u64)time_data.tv_nsec; | ||
| 129 | notification->time_stamp.nanoseconds[0] = | ||
| 130 | (u32)nsec; | ||
| 131 | notification->time_stamp.nanoseconds[1] = | ||
| 132 | (u32)(nsec >> 32); | ||
| 133 | notification->info32 = error; | ||
| 134 | notification->status = 0xffff; | ||
| 135 | |||
| 136 | nvgpu_err(ch->g, | ||
| 137 | "error notifier set to %d for ch %d", error, ch->chid); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | /* error should be of the form NVGPU_ERR_NOTIFIER_* */ | ||
| 142 | void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) | ||
| 143 | { | ||
| 144 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 145 | |||
| 146 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
| 147 | nvgpu_set_error_notifier_locked(ch, error); | ||
| 148 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
| 149 | } | ||
| 150 | |||
| 151 | void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) | ||
| 152 | { | ||
| 153 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 154 | |||
| 155 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
| 156 | if (priv->error_notifier.dmabuf) { | ||
| 157 | struct nvgpu_notification *notification = | ||
| 158 | priv->error_notifier.notification; | ||
| 159 | |||
| 160 | /* Don't overwrite error flag if it is already set */ | ||
| 161 | if (notification->status != 0xffff) | ||
| 162 | nvgpu_set_error_notifier_locked(ch, error); | ||
| 163 | } | ||
| 164 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
| 165 | } | ||
| 166 | |||
| 167 | /* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ | ||
| 168 | bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) | ||
| 169 | { | ||
| 170 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 171 | bool notifier_set = false; | ||
| 172 | |||
| 173 | error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); | ||
| 174 | |||
| 175 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
| 176 | if (priv->error_notifier.dmabuf) { | ||
| 177 | struct nvgpu_notification *notification = | ||
| 178 | priv->error_notifier.notification; | ||
| 179 | u32 err = notification->info32; | ||
| 180 | |||
| 181 | if (err == error_notifier) | ||
| 182 | notifier_set = true; | ||
| 183 | } | ||
| 184 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
| 185 | |||
| 186 | return notifier_set; | ||
| 187 | } | ||
| 188 | |||
| 189 | static void gk20a_channel_update_runcb_fn(struct work_struct *work) | ||
| 190 | { | ||
| 191 | struct nvgpu_channel_completion_cb *completion_cb = | ||
| 192 | container_of(work, struct nvgpu_channel_completion_cb, work); | ||
| 193 | struct nvgpu_channel_linux *priv = | ||
| 194 | container_of(completion_cb, | ||
| 195 | struct nvgpu_channel_linux, completion_cb); | ||
| 196 | struct channel_gk20a *ch = priv->ch; | ||
| 197 | void (*fn)(struct channel_gk20a *, void *); | ||
| 198 | void *user_data; | ||
| 199 | |||
| 200 | nvgpu_spinlock_acquire(&completion_cb->lock); | ||
| 201 | fn = completion_cb->fn; | ||
| 202 | user_data = completion_cb->user_data; | ||
| 203 | nvgpu_spinlock_release(&completion_cb->lock); | ||
| 204 | |||
| 205 | if (fn) | ||
| 206 | fn(ch, user_data); | ||
| 207 | } | ||
| 208 | |||
| 209 | static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) | ||
| 210 | { | ||
| 211 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 212 | |||
| 213 | priv->completion_cb.fn = NULL; | ||
| 214 | priv->completion_cb.user_data = NULL; | ||
| 215 | nvgpu_spinlock_init(&priv->completion_cb.lock); | ||
| 216 | INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); | ||
| 217 | } | ||
| 218 | |||
| 219 | static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) | ||
| 220 | { | ||
| 221 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 222 | |||
| 223 | nvgpu_spinlock_acquire(&priv->completion_cb.lock); | ||
| 224 | priv->completion_cb.fn = NULL; | ||
| 225 | priv->completion_cb.user_data = NULL; | ||
| 226 | nvgpu_spinlock_release(&priv->completion_cb.lock); | ||
| 227 | cancel_work_sync(&priv->completion_cb.work); | ||
| 228 | } | ||
| 229 | |||
| 230 | static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) | ||
| 231 | { | ||
| 232 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 233 | |||
| 234 | if (priv->completion_cb.fn) | ||
| 235 | schedule_work(&priv->completion_cb.work); | ||
| 236 | } | ||
| 237 | |||
| 238 | static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) | ||
| 239 | { | ||
| 240 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 241 | |||
| 242 | if (priv->completion_cb.fn) | ||
| 243 | cancel_work_sync(&priv->completion_cb.work); | ||
| 244 | } | ||
| 245 | |||
| 246 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
| 247 | void (*update_fn)(struct channel_gk20a *, void *), | ||
| 248 | void *update_fn_data, | ||
| 249 | int runlist_id, | ||
| 250 | bool is_privileged_channel) | ||
| 251 | { | ||
| 252 | struct channel_gk20a *ch; | ||
| 253 | struct nvgpu_channel_linux *priv; | ||
| 254 | |||
| 255 | ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, | ||
| 256 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
| 257 | |||
| 258 | if (ch) { | ||
| 259 | priv = ch->os_priv; | ||
| 260 | nvgpu_spinlock_acquire(&priv->completion_cb.lock); | ||
| 261 | priv->completion_cb.fn = update_fn; | ||
| 262 | priv->completion_cb.user_data = update_fn_data; | ||
| 263 | nvgpu_spinlock_release(&priv->completion_cb.lock); | ||
| 264 | } | ||
| 265 | |||
| 266 | return ch; | ||
| 267 | } | ||
| 268 | |||
| 269 | static void nvgpu_channel_open_linux(struct channel_gk20a *ch) | ||
| 270 | { | ||
| 271 | } | ||
| 272 | |||
| 273 | static void nvgpu_channel_close_linux(struct channel_gk20a *ch) | ||
| 274 | { | ||
| 275 | nvgpu_channel_work_completion_clear(ch); | ||
| 276 | |||
| 277 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
| 278 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
| 279 | gk20a_channel_free_cycle_stats_snapshot(ch); | ||
| 280 | #endif | ||
| 281 | } | ||
| 282 | |||
| 283 | static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) | ||
| 284 | { | ||
| 285 | struct nvgpu_channel_linux *priv; | ||
| 286 | int err; | ||
| 287 | |||
| 288 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
| 289 | if (!priv) | ||
| 290 | return -ENOMEM; | ||
| 291 | |||
| 292 | ch->os_priv = priv; | ||
| 293 | priv->ch = ch; | ||
| 294 | |||
| 295 | #ifdef CONFIG_SYNC | ||
| 296 | ch->has_os_fence_framework_support = true; | ||
| 297 | #endif | ||
| 298 | |||
| 299 | err = nvgpu_mutex_init(&priv->error_notifier.mutex); | ||
| 300 | if (err) { | ||
| 301 | nvgpu_kfree(g, priv); | ||
| 302 | return err; | ||
| 303 | } | ||
| 304 | |||
| 305 | nvgpu_channel_work_completion_init(ch); | ||
| 306 | |||
| 307 | return 0; | ||
| 308 | } | ||
| 309 | |||
| 310 | static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) | ||
| 311 | { | ||
| 312 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 313 | |||
| 314 | nvgpu_mutex_destroy(&priv->error_notifier.mutex); | ||
| 315 | nvgpu_kfree(g, priv); | ||
| 316 | |||
| 317 | ch->os_priv = NULL; | ||
| 318 | |||
| 319 | #ifdef CONFIG_SYNC | ||
| 320 | ch->has_os_fence_framework_support = false; | ||
| 321 | #endif | ||
| 322 | } | ||
| 323 | |||
| 324 | static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, | ||
| 325 | const char *fmt, ...) | ||
| 326 | { | ||
| 327 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 328 | struct nvgpu_os_fence_framework *fence_framework; | ||
| 329 | char name[30]; | ||
| 330 | va_list args; | ||
| 331 | |||
| 332 | fence_framework = &priv->fence_framework; | ||
| 333 | |||
| 334 | va_start(args, fmt); | ||
| 335 | vsnprintf(name, sizeof(name), fmt, args); | ||
| 336 | va_end(args); | ||
| 337 | |||
| 338 | fence_framework->timeline = gk20a_sync_timeline_create(name); | ||
| 339 | |||
| 340 | if (!fence_framework->timeline) | ||
| 341 | return -EINVAL; | ||
| 342 | |||
| 343 | return 0; | ||
| 344 | } | ||
| 345 | static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) | ||
| 346 | { | ||
| 347 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 348 | struct nvgpu_os_fence_framework *fence_framework; | ||
| 349 | |||
| 350 | fence_framework = &priv->fence_framework; | ||
| 351 | |||
| 352 | gk20a_sync_timeline_signal(fence_framework->timeline); | ||
| 353 | } | ||
| 354 | |||
| 355 | static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) | ||
| 356 | { | ||
| 357 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 358 | struct nvgpu_os_fence_framework *fence_framework; | ||
| 359 | |||
| 360 | fence_framework = &priv->fence_framework; | ||
| 361 | |||
| 362 | gk20a_sync_timeline_destroy(fence_framework->timeline); | ||
| 363 | fence_framework->timeline = NULL; | ||
| 364 | } | ||
| 365 | |||
| 366 | static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) | ||
| 367 | { | ||
| 368 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
| 369 | struct nvgpu_os_fence_framework *fence_framework; | ||
| 370 | |||
| 371 | fence_framework = &priv->fence_framework; | ||
| 372 | |||
| 373 | return (fence_framework->timeline != NULL); | ||
| 374 | } | ||
| 375 | |||
| 376 | static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest, | ||
| 377 | struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length) | ||
| 378 | { | ||
| 379 | struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries; | ||
| 380 | unsigned long n; | ||
| 381 | |||
| 382 | n = copy_from_user(dest, user_gpfifo + start, | ||
| 383 | length * sizeof(struct nvgpu_gpfifo_entry)); | ||
| 384 | |||
| 385 | return n == 0 ? 0 : -EFAULT; | ||
| 386 | } | ||
| 387 | |||
| 388 | int nvgpu_usermode_buf_from_dmabuf(struct gk20a *g, int dmabuf_fd, | ||
| 389 | struct nvgpu_mem *mem, struct nvgpu_usermode_buf_linux *buf) | ||
| 390 | { | ||
| 391 | struct device *dev = dev_from_gk20a(g); | ||
| 392 | struct dma_buf *dmabuf; | ||
| 393 | struct sg_table *sgt; | ||
| 394 | struct dma_buf_attachment *attachment; | ||
| 395 | int err; | ||
| 396 | |||
| 397 | dmabuf = dma_buf_get(dmabuf_fd); | ||
| 398 | if (IS_ERR(dmabuf)) { | ||
| 399 | return PTR_ERR(dmabuf); | ||
| 400 | } | ||
| 401 | |||
| 402 | if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { | ||
| 403 | err = -EINVAL; | ||
| 404 | goto put_dmabuf; | ||
| 405 | } | ||
| 406 | |||
| 407 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); | ||
| 408 | if (err != 0) { | ||
| 409 | goto put_dmabuf; | ||
| 410 | } | ||
| 411 | |||
| 412 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | ||
| 413 | if (IS_ERR(sgt)) { | ||
| 414 | nvgpu_warn(g, "Failed to pin dma_buf!"); | ||
| 415 | err = PTR_ERR(sgt); | ||
| 416 | goto put_dmabuf; | ||
| 417 | } | ||
| 418 | |||
| 419 | buf->dmabuf = dmabuf; | ||
| 420 | buf->attachment = attachment; | ||
| 421 | buf->sgt = sgt; | ||
| 422 | |||
| 423 | /* | ||
| 424 | * This mem is unmapped and freed in a common path; for Linux, we'll | ||
| 425 | * also need to unref the dmabuf stuff (above) but the sgt here is only | ||
| 426 | * borrowed, so it cannot be freed by nvgpu_mem_*. | ||
| 427 | */ | ||
| 428 | mem->mem_flags = NVGPU_MEM_FLAG_FOREIGN_SGT; | ||
| 429 | mem->aperture = APERTURE_SYSMEM; | ||
| 430 | mem->skip_wmb = 0; | ||
| 431 | mem->size = dmabuf->size; | ||
| 432 | |||
| 433 | mem->priv.flags = 0; | ||
| 434 | mem->priv.pages = NULL; | ||
| 435 | mem->priv.sgt = sgt; | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | put_dmabuf: | ||
| 439 | dma_buf_put(dmabuf); | ||
| 440 | return err; | ||
| 441 | } | ||
| 442 | |||
| 443 | void nvgpu_channel_free_usermode_buffers(struct channel_gk20a *c) | ||
| 444 | { | ||
| 445 | struct nvgpu_channel_linux *priv = c->os_priv; | ||
| 446 | struct gk20a *g = c->g; | ||
| 447 | struct device *dev = dev_from_gk20a(g); | ||
| 448 | |||
| 449 | if (priv->usermode.gpfifo.dmabuf != NULL) { | ||
| 450 | gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf, | ||
| 451 | priv->usermode.gpfifo.attachment, | ||
| 452 | priv->usermode.gpfifo.sgt); | ||
| 453 | dma_buf_put(priv->usermode.gpfifo.dmabuf); | ||
| 454 | priv->usermode.gpfifo.dmabuf = NULL; | ||
| 455 | } | ||
| 456 | |||
| 457 | if (priv->usermode.userd.dmabuf != NULL) { | ||
| 458 | gk20a_mm_unpin(dev, priv->usermode.userd.dmabuf, | ||
| 459 | priv->usermode.userd.attachment, | ||
| 460 | priv->usermode.userd.sgt); | ||
| 461 | dma_buf_put(priv->usermode.userd.dmabuf); | ||
| 462 | priv->usermode.userd.dmabuf = NULL; | ||
| 463 | } | ||
| 464 | } | ||
| 465 | |||
| 466 | static int nvgpu_channel_alloc_usermode_buffers(struct channel_gk20a *c, | ||
| 467 | struct nvgpu_setup_bind_args *args) | ||
| 468 | { | ||
| 469 | struct nvgpu_channel_linux *priv = c->os_priv; | ||
| 470 | struct gk20a *g = c->g; | ||
| 471 | struct device *dev = dev_from_gk20a(g); | ||
| 472 | size_t gpfifo_size; | ||
| 473 | int err; | ||
| 474 | |||
| 475 | if (args->gpfifo_dmabuf_fd == 0 || args->userd_dmabuf_fd == 0) { | ||
| 476 | return -EINVAL; | ||
| 477 | } | ||
| 478 | |||
| 479 | if (args->gpfifo_dmabuf_offset != 0 || | ||
| 480 | args->userd_dmabuf_offset != 0) { | ||
| 481 | /* TODO - not yet supported */ | ||
| 482 | return -EINVAL; | ||
| 483 | } | ||
| 484 | |||
| 485 | err = nvgpu_usermode_buf_from_dmabuf(g, args->gpfifo_dmabuf_fd, | ||
| 486 | &c->usermode_gpfifo, &priv->usermode.gpfifo); | ||
| 487 | if (err < 0) { | ||
| 488 | return err; | ||
| 489 | } | ||
| 490 | |||
| 491 | gpfifo_size = max_t(u32, SZ_4K, | ||
| 492 | args->num_gpfifo_entries * | ||
| 493 | nvgpu_get_gpfifo_entry_size()); | ||
| 494 | |||
| 495 | if (c->usermode_gpfifo.size < gpfifo_size) { | ||
| 496 | err = -EINVAL; | ||
| 497 | goto free_gpfifo; | ||
| 498 | } | ||
| 499 | |||
| 500 | c->usermode_gpfifo.gpu_va = nvgpu_gmmu_map(c->vm, &c->usermode_gpfifo, | ||
| 501 | c->usermode_gpfifo.size, 0, gk20a_mem_flag_none, | ||
| 502 | false, c->usermode_gpfifo.aperture); | ||
| 503 | |||
| 504 | if (c->usermode_gpfifo.gpu_va == 0) { | ||
| 505 | err = -ENOMEM; | ||
| 506 | goto unmap_free_gpfifo; | ||
| 507 | } | ||
| 508 | |||
| 509 | err = nvgpu_usermode_buf_from_dmabuf(g, args->userd_dmabuf_fd, | ||
| 510 | &c->usermode_userd, &priv->usermode.userd); | ||
| 511 | if (err < 0) { | ||
| 512 | goto unmap_free_gpfifo; | ||
| 513 | } | ||
| 514 | |||
| 515 | args->work_submit_token = g->fifo.channel_base + c->chid; | ||
| 516 | |||
| 517 | return 0; | ||
| 518 | unmap_free_gpfifo: | ||
| 519 | nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo); | ||
| 520 | free_gpfifo: | ||
| 521 | gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf, | ||
| 522 | priv->usermode.gpfifo.attachment, | ||
| 523 | priv->usermode.gpfifo.sgt); | ||
| 524 | dma_buf_put(priv->usermode.gpfifo.dmabuf); | ||
| 525 | priv->usermode.gpfifo.dmabuf = NULL; | ||
| 526 | return err; | ||
| 527 | } | ||
| 528 | |||
| 529 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) | ||
| 530 | { | ||
| 531 | struct gk20a *g = &l->g; | ||
| 532 | struct fifo_gk20a *f = &g->fifo; | ||
| 533 | int chid; | ||
| 534 | int err; | ||
| 535 | |||
| 536 | for (chid = 0; chid < (int)f->num_channels; chid++) { | ||
| 537 | struct channel_gk20a *ch = &f->channel[chid]; | ||
| 538 | |||
| 539 | err = nvgpu_channel_alloc_linux(g, ch); | ||
| 540 | if (err) | ||
| 541 | goto err_clean; | ||
| 542 | } | ||
| 543 | |||
| 544 | g->os_channel.open = nvgpu_channel_open_linux; | ||
| 545 | g->os_channel.close = nvgpu_channel_close_linux; | ||
| 546 | g->os_channel.work_completion_signal = | ||
| 547 | nvgpu_channel_work_completion_signal; | ||
| 548 | g->os_channel.work_completion_cancel_sync = | ||
| 549 | nvgpu_channel_work_completion_cancel_sync; | ||
| 550 | |||
| 551 | g->os_channel.os_fence_framework_inst_exists = | ||
| 552 | nvgpu_channel_fence_framework_exists; | ||
| 553 | g->os_channel.init_os_fence_framework = | ||
| 554 | nvgpu_channel_init_os_fence_framework; | ||
| 555 | g->os_channel.signal_os_fence_framework = | ||
| 556 | nvgpu_channel_signal_os_fence_framework; | ||
| 557 | g->os_channel.destroy_os_fence_framework = | ||
| 558 | nvgpu_channel_destroy_os_fence_framework; | ||
| 559 | |||
| 560 | g->os_channel.copy_user_gpfifo = | ||
| 561 | nvgpu_channel_copy_user_gpfifo; | ||
| 562 | |||
| 563 | g->os_channel.alloc_usermode_buffers = | ||
| 564 | nvgpu_channel_alloc_usermode_buffers; | ||
| 565 | |||
| 566 | g->os_channel.free_usermode_buffers = | ||
| 567 | nvgpu_channel_free_usermode_buffers; | ||
| 568 | |||
| 569 | return 0; | ||
| 570 | |||
| 571 | err_clean: | ||
| 572 | for (; chid >= 0; chid--) { | ||
| 573 | struct channel_gk20a *ch = &f->channel[chid]; | ||
| 574 | |||
| 575 | nvgpu_channel_free_linux(g, ch); | ||
| 576 | } | ||
| 577 | return err; | ||
| 578 | } | ||
| 579 | |||
| 580 | void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) | ||
| 581 | { | ||
| 582 | struct gk20a *g = &l->g; | ||
| 583 | struct fifo_gk20a *f = &g->fifo; | ||
| 584 | unsigned int chid; | ||
| 585 | |||
| 586 | for (chid = 0; chid < f->num_channels; chid++) { | ||
| 587 | struct channel_gk20a *ch = &f->channel[chid]; | ||
| 588 | |||
| 589 | nvgpu_channel_free_linux(g, ch); | ||
| 590 | } | ||
| 591 | |||
| 592 | g->os_channel.os_fence_framework_inst_exists = NULL; | ||
| 593 | g->os_channel.init_os_fence_framework = NULL; | ||
| 594 | g->os_channel.signal_os_fence_framework = NULL; | ||
| 595 | g->os_channel.destroy_os_fence_framework = NULL; | ||
| 596 | } | ||
| 597 | |||
| 598 | u32 nvgpu_get_gpfifo_entry_size(void) | ||
| 599 | { | ||
| 600 | return sizeof(struct nvgpu_gpfifo_entry); | ||
| 601 | } | ||
| 602 | |||
| 603 | #ifdef CONFIG_DEBUG_FS | ||
| 604 | static void trace_write_pushbuffer(struct channel_gk20a *c, | ||
| 605 | struct nvgpu_gpfifo_entry *g) | ||
| 606 | { | ||
| 607 | void *mem = NULL; | ||
| 608 | unsigned int words; | ||
| 609 | u64 offset; | ||
| 610 | struct dma_buf *dmabuf = NULL; | ||
| 611 | |||
| 612 | if (gk20a_debug_trace_cmdbuf) { | ||
| 613 | u64 gpu_va = (u64)g->entry0 | | ||
| 614 | (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); | ||
| 615 | int err; | ||
| 616 | |||
| 617 | words = pbdma_gp_entry1_length_v(g->entry1); | ||
| 618 | err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); | ||
| 619 | if (!err) | ||
| 620 | mem = dma_buf_vmap(dmabuf); | ||
| 621 | } | ||
| 622 | |||
| 623 | if (mem) { | ||
| 624 | u32 i; | ||
| 625 | /* | ||
| 626 | * Write in batches of 128 as there seems to be a limit | ||
| 627 | * of how much you can output to ftrace at once. | ||
| 628 | */ | ||
| 629 | for (i = 0; i < words; i += 128U) { | ||
| 630 | trace_gk20a_push_cmdbuf( | ||
| 631 | c->g->name, | ||
| 632 | 0, | ||
| 633 | min(words - i, 128U), | ||
| 634 | offset + i * sizeof(u32), | ||
| 635 | mem); | ||
| 636 | } | ||
| 637 | dma_buf_vunmap(dmabuf, mem); | ||
| 638 | } | ||
| 639 | } | ||
| 640 | |||
| 641 | void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | ||
| 642 | { | ||
| 643 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; | ||
| 644 | u32 n = c->gpfifo.entry_num; | ||
| 645 | u32 start = c->gpfifo.put; | ||
| 646 | u32 i; | ||
| 647 | |||
| 648 | if (!gk20a_debug_trace_cmdbuf) | ||
| 649 | return; | ||
| 650 | |||
| 651 | if (!gp) | ||
| 652 | return; | ||
| 653 | |||
| 654 | for (i = 0; i < count; i++) | ||
| 655 | trace_write_pushbuffer(c, &gp[(start + i) % n]); | ||
| 656 | } | ||
| 657 | #endif | ||
diff --git a/include/os/linux/linux-dma.c b/include/os/linux/linux-dma.c new file mode 100644 index 0000000..d704b2a --- /dev/null +++ b/include/os/linux/linux-dma.c | |||
| @@ -0,0 +1,534 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/dma-mapping.h> | ||
| 18 | #include <linux/version.h> | ||
| 19 | |||
| 20 | #include <nvgpu/log.h> | ||
| 21 | #include <nvgpu/dma.h> | ||
| 22 | #include <nvgpu/lock.h> | ||
| 23 | #include <nvgpu/bug.h> | ||
| 24 | #include <nvgpu/gmmu.h> | ||
| 25 | #include <nvgpu/kmem.h> | ||
| 26 | #include <nvgpu/enabled.h> | ||
| 27 | #include <nvgpu/vidmem.h> | ||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | |||
| 30 | #include <nvgpu/linux/dma.h> | ||
| 31 | |||
| 32 | #include "platform_gk20a.h" | ||
| 33 | #include "os_linux.h" | ||
| 34 | #include "dmabuf_vidmem.h" | ||
| 35 | |||
| 36 | #ifdef __DMA_ATTRS_LONGS | ||
| 37 | #define NVGPU_DEFINE_DMA_ATTRS(x) \ | ||
| 38 | struct dma_attrs x = { \ | ||
| 39 | .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ | ||
| 40 | } | ||
| 41 | #define NVGPU_DMA_ATTR(attrs) &attrs | ||
| 42 | #else | ||
| 43 | #define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0 | ||
| 44 | #define NVGPU_DMA_ATTR(attrs) attrs | ||
| 45 | #endif | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Enough to hold all the possible flags in string form. When a new flag is | ||
| 49 | * added it must be added here as well!! | ||
| 50 | */ | ||
| 51 | #define NVGPU_DMA_STR_SIZE \ | ||
| 52 | sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS") | ||
| 53 | |||
| 54 | /* | ||
| 55 | * The returned string is kmalloc()ed here but must be freed by the caller. | ||
| 56 | */ | ||
| 57 | static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags) | ||
| 58 | { | ||
| 59 | char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE); | ||
| 60 | int bytes_available = NVGPU_DMA_STR_SIZE; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Return the empty buffer if there's no flags. Makes it easier on the | ||
| 64 | * calling code to just print it instead of any if (NULL) type logic. | ||
| 65 | */ | ||
| 66 | if (!flags) | ||
| 67 | return buf; | ||
| 68 | |||
| 69 | #define APPEND_FLAG(flag, str_flag) \ | ||
| 70 | do { \ | ||
| 71 | if (flags & flag) { \ | ||
| 72 | strncat(buf, str_flag, bytes_available); \ | ||
| 73 | bytes_available -= strlen(str_flag); \ | ||
| 74 | } \ | ||
| 75 | } while (0) | ||
| 76 | |||
| 77 | APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING "); | ||
| 78 | APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS "); | ||
| 79 | #undef APPEND_FLAG | ||
| 80 | |||
| 81 | return buf; | ||
| 82 | } | ||
| 83 | |||
| 84 | /** | ||
| 85 | * __dma_dbg - Debug print for DMA allocs and frees. | ||
| 86 | * | ||
| 87 | * @g - The GPU. | ||
| 88 | * @size - The requested size of the alloc (size_t). | ||
| 89 | * @flags - The flags (unsigned long). | ||
| 90 | * @type - A string describing the type (i.e: sysmem or vidmem). | ||
| 91 | * @what - A string with 'alloc' or 'free'. | ||
| 92 | * | ||
| 93 | * @flags is the DMA flags. If there are none or it doesn't make sense to print | ||
| 94 | * flags just pass 0. | ||
| 95 | * | ||
| 96 | * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function. | ||
| 97 | */ | ||
| 98 | static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags, | ||
| 99 | const char *type, const char *what, | ||
| 100 | const char *func, int line) | ||
| 101 | { | ||
| 102 | char *flags_str = NULL; | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Don't bother making the flags_str if debugging is | ||
| 106 | * not enabled. This saves a malloc and a free. | ||
| 107 | */ | ||
| 108 | if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma)) | ||
| 109 | return; | ||
| 110 | |||
| 111 | flags_str = nvgpu_dma_flags_to_str(g, flags); | ||
| 112 | |||
| 113 | __nvgpu_log_dbg(g, gpu_dbg_dma, | ||
| 114 | func, line, | ||
| 115 | "DMA %s: [%s] size=%-7zu " | ||
| 116 | "aligned=%-7zu total=%-10llukB %s", | ||
| 117 | what, type, | ||
| 118 | size, PAGE_ALIGN(size), | ||
| 119 | g->dma_memory_used >> 10, | ||
| 120 | flags_str); | ||
| 121 | |||
| 122 | if (flags_str) | ||
| 123 | nvgpu_kfree(g, flags_str); | ||
| 124 | } | ||
| 125 | |||
| 126 | #define dma_dbg_alloc(g, size, flags, type) \ | ||
| 127 | __dma_dbg(g, size, flags, type, "alloc", __func__, __LINE__) | ||
| 128 | #define dma_dbg_free(g, size, flags, type) \ | ||
| 129 | __dma_dbg(g, size, flags, type, "free", __func__, __LINE__) | ||
| 130 | |||
| 131 | /* | ||
| 132 | * For after the DMA alloc is done. | ||
| 133 | */ | ||
| 134 | #define __dma_dbg_done(g, size, type, what) \ | ||
| 135 | nvgpu_log(g, gpu_dbg_dma, \ | ||
| 136 | "DMA %s: [%s] size=%-7zu Done!", \ | ||
| 137 | what, type, size); \ | ||
| 138 | |||
| 139 | #define dma_dbg_alloc_done(g, size, type) \ | ||
| 140 | __dma_dbg_done(g, size, type, "alloc") | ||
| 141 | #define dma_dbg_free_done(g, size, type) \ | ||
| 142 | __dma_dbg_done(g, size, type, "free") | ||
| 143 | |||
| 144 | #if defined(CONFIG_GK20A_VIDMEM) | ||
| 145 | static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at, | ||
| 146 | size_t size) | ||
| 147 | { | ||
| 148 | u64 addr = 0; | ||
| 149 | |||
| 150 | if (at) | ||
| 151 | addr = nvgpu_alloc_fixed(allocator, at, size, 0); | ||
| 152 | else | ||
| 153 | addr = nvgpu_alloc(allocator, size); | ||
| 154 | |||
| 155 | return addr; | ||
| 156 | } | ||
| 157 | #endif | ||
| 158 | |||
| 159 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) | ||
| 160 | static void nvgpu_dma_flags_to_attrs(unsigned long *attrs, | ||
| 161 | unsigned long flags) | ||
| 162 | #define ATTR_ARG(x) *x | ||
| 163 | #else | ||
| 164 | static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs, | ||
| 165 | unsigned long flags) | ||
| 166 | #define ATTR_ARG(x) x | ||
| 167 | #endif | ||
| 168 | { | ||
| 169 | if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
| 170 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs)); | ||
| 171 | if (flags & NVGPU_DMA_FORCE_CONTIGUOUS) | ||
| 172 | dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs)); | ||
| 173 | #undef ATTR_ARG | ||
| 174 | } | ||
| 175 | |||
| 176 | int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | ||
| 177 | size_t size, struct nvgpu_mem *mem) | ||
| 178 | { | ||
| 179 | struct device *d = dev_from_gk20a(g); | ||
| 180 | int err; | ||
| 181 | dma_addr_t iova; | ||
| 182 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
| 183 | void *alloc_ret; | ||
| 184 | |||
| 185 | if (nvgpu_mem_is_valid(mem)) { | ||
| 186 | nvgpu_warn(g, "memory leak !!"); | ||
| 187 | WARN_ON(1); | ||
| 188 | } | ||
| 189 | |||
| 190 | /* | ||
| 191 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
| 192 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
| 193 | * | ||
| 194 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
| 195 | * and then make a vmap() ourselves. | ||
| 196 | */ | ||
| 197 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
| 198 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
| 199 | |||
| 200 | /* | ||
| 201 | * Before the debug print so we see this in the total. But during | ||
| 202 | * cleanup in the fail path this has to be subtracted. | ||
| 203 | */ | ||
| 204 | g->dma_memory_used += PAGE_ALIGN(size); | ||
| 205 | |||
| 206 | dma_dbg_alloc(g, size, flags, "sysmem"); | ||
| 207 | |||
| 208 | /* | ||
| 209 | * Save the old size but for actual allocation purposes the size is | ||
| 210 | * going to be page aligned. | ||
| 211 | */ | ||
| 212 | mem->size = size; | ||
| 213 | size = PAGE_ALIGN(size); | ||
| 214 | |||
| 215 | nvgpu_dma_flags_to_attrs(&dma_attrs, flags); | ||
| 216 | |||
| 217 | alloc_ret = dma_alloc_attrs(d, size, &iova, | ||
| 218 | GFP_KERNEL|__GFP_ZERO, | ||
| 219 | NVGPU_DMA_ATTR(dma_attrs)); | ||
| 220 | if (!alloc_ret) | ||
| 221 | return -ENOMEM; | ||
| 222 | |||
| 223 | if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { | ||
| 224 | mem->priv.pages = alloc_ret; | ||
| 225 | err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt, | ||
| 226 | mem->priv.pages, | ||
| 227 | iova, size); | ||
| 228 | } else { | ||
| 229 | mem->cpu_va = alloc_ret; | ||
| 230 | err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va, | ||
| 231 | iova, size, flags); | ||
| 232 | } | ||
| 233 | if (err) | ||
| 234 | goto fail_free_dma; | ||
| 235 | |||
| 236 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
| 237 | mem->cpu_va = vmap(mem->priv.pages, | ||
| 238 | size >> PAGE_SHIFT, | ||
| 239 | 0, PAGE_KERNEL); | ||
| 240 | if (!mem->cpu_va) { | ||
| 241 | err = -ENOMEM; | ||
| 242 | goto fail_free_sgt; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | |||
| 246 | mem->aligned_size = size; | ||
| 247 | mem->aperture = APERTURE_SYSMEM; | ||
| 248 | mem->priv.flags = flags; | ||
| 249 | |||
| 250 | dma_dbg_alloc_done(g, mem->size, "sysmem"); | ||
| 251 | |||
| 252 | return 0; | ||
| 253 | |||
| 254 | fail_free_sgt: | ||
| 255 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
| 256 | fail_free_dma: | ||
| 257 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | ||
| 258 | mem->cpu_va = NULL; | ||
| 259 | mem->priv.sgt = NULL; | ||
| 260 | mem->size = 0; | ||
| 261 | g->dma_memory_used -= mem->aligned_size; | ||
| 262 | return err; | ||
| 263 | } | ||
| 264 | |||
| 265 | int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, | ||
| 266 | size_t size, struct nvgpu_mem *mem, u64 at) | ||
| 267 | { | ||
| 268 | #if defined(CONFIG_GK20A_VIDMEM) | ||
| 269 | u64 addr; | ||
| 270 | int err; | ||
| 271 | struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? | ||
| 272 | &g->mm.vidmem.allocator : | ||
| 273 | &g->mm.vidmem.bootstrap_allocator; | ||
| 274 | u64 before_pending; | ||
| 275 | |||
| 276 | if (nvgpu_mem_is_valid(mem)) { | ||
| 277 | nvgpu_warn(g, "memory leak !!"); | ||
| 278 | WARN_ON(1); | ||
| 279 | } | ||
| 280 | |||
| 281 | dma_dbg_alloc(g, size, flags, "vidmem"); | ||
| 282 | |||
| 283 | mem->size = size; | ||
| 284 | size = PAGE_ALIGN(size); | ||
| 285 | |||
| 286 | if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | ||
| 287 | return -ENOSYS; | ||
| 288 | |||
| 289 | /* | ||
| 290 | * Our own allocator doesn't have any flags yet, and we can't | ||
| 291 | * kernel-map these, so require explicit flags. | ||
| 292 | */ | ||
| 293 | WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING); | ||
| 294 | |||
| 295 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
| 296 | before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var); | ||
| 297 | addr = __nvgpu_dma_alloc(vidmem_alloc, at, size); | ||
| 298 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
| 299 | if (!addr) { | ||
| 300 | /* | ||
| 301 | * If memory is known to be freed soon, let the user know that | ||
| 302 | * it may be available after a while. | ||
| 303 | */ | ||
| 304 | if (before_pending) | ||
| 305 | return -EAGAIN; | ||
| 306 | else | ||
| 307 | return -ENOMEM; | ||
| 308 | } | ||
| 309 | |||
| 310 | if (at) | ||
| 311 | mem->mem_flags |= NVGPU_MEM_FLAG_FIXED; | ||
| 312 | |||
| 313 | mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
| 314 | if (!mem->priv.sgt) { | ||
| 315 | err = -ENOMEM; | ||
| 316 | goto fail_physfree; | ||
| 317 | } | ||
| 318 | |||
| 319 | err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL); | ||
| 320 | if (err) | ||
| 321 | goto fail_kfree; | ||
| 322 | |||
| 323 | nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr); | ||
| 324 | sg_set_page(mem->priv.sgt->sgl, NULL, size, 0); | ||
| 325 | |||
| 326 | mem->aligned_size = size; | ||
| 327 | mem->aperture = APERTURE_VIDMEM; | ||
| 328 | mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr; | ||
| 329 | mem->allocator = vidmem_alloc; | ||
| 330 | mem->priv.flags = flags; | ||
| 331 | |||
| 332 | nvgpu_init_list_node(&mem->clear_list_entry); | ||
| 333 | |||
| 334 | dma_dbg_alloc_done(g, mem->size, "vidmem"); | ||
| 335 | |||
| 336 | return 0; | ||
| 337 | |||
| 338 | fail_kfree: | ||
| 339 | nvgpu_kfree(g, mem->priv.sgt); | ||
| 340 | fail_physfree: | ||
| 341 | nvgpu_free(&g->mm.vidmem.allocator, addr); | ||
| 342 | mem->size = 0; | ||
| 343 | return err; | ||
| 344 | #else | ||
| 345 | return -ENOSYS; | ||
| 346 | #endif | ||
| 347 | } | ||
| 348 | |||
| 349 | void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | ||
| 350 | { | ||
| 351 | struct device *d = dev_from_gk20a(g); | ||
| 352 | |||
| 353 | g->dma_memory_used -= mem->aligned_size; | ||
| 354 | |||
| 355 | dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem"); | ||
| 356 | |||
| 357 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | ||
| 358 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | ||
| 359 | (mem->cpu_va || mem->priv.pages)) { | ||
| 360 | /* | ||
| 361 | * Free side of WAR for bug 2040115. | ||
| 362 | */ | ||
| 363 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
| 364 | vunmap(mem->cpu_va); | ||
| 365 | |||
| 366 | if (mem->priv.flags) { | ||
| 367 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
| 368 | |||
| 369 | nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags); | ||
| 370 | |||
| 371 | if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) { | ||
| 372 | dma_free_attrs(d, mem->aligned_size, mem->priv.pages, | ||
| 373 | sg_dma_address(mem->priv.sgt->sgl), | ||
| 374 | NVGPU_DMA_ATTR(dma_attrs)); | ||
| 375 | } else { | ||
| 376 | dma_free_attrs(d, mem->aligned_size, mem->cpu_va, | ||
| 377 | sg_dma_address(mem->priv.sgt->sgl), | ||
| 378 | NVGPU_DMA_ATTR(dma_attrs)); | ||
| 379 | } | ||
| 380 | } else { | ||
| 381 | dma_free_coherent(d, mem->aligned_size, mem->cpu_va, | ||
| 382 | sg_dma_address(mem->priv.sgt->sgl)); | ||
| 383 | } | ||
| 384 | mem->cpu_va = NULL; | ||
| 385 | mem->priv.pages = NULL; | ||
| 386 | } | ||
| 387 | |||
| 388 | /* | ||
| 389 | * When this flag is set we expect that pages is still populated but not | ||
| 390 | * by the DMA API. | ||
| 391 | */ | ||
| 392 | if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) | ||
| 393 | nvgpu_kfree(g, mem->priv.pages); | ||
| 394 | |||
| 395 | if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 && | ||
| 396 | mem->priv.sgt != NULL) { | ||
| 397 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
| 398 | } | ||
| 399 | |||
| 400 | dma_dbg_free_done(g, mem->size, "sysmem"); | ||
| 401 | |||
| 402 | mem->size = 0; | ||
| 403 | mem->aligned_size = 0; | ||
| 404 | mem->aperture = APERTURE_INVALID; | ||
| 405 | } | ||
| 406 | |||
| 407 | void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | ||
| 408 | { | ||
| 409 | #if defined(CONFIG_GK20A_VIDMEM) | ||
| 410 | size_t mem_size = mem->size; | ||
| 411 | |||
| 412 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); | ||
| 413 | |||
| 414 | /* Sanity check - only this supported when allocating. */ | ||
| 415 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); | ||
| 416 | |||
| 417 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { | ||
| 418 | int err = nvgpu_vidmem_clear_list_enqueue(g, mem); | ||
| 419 | |||
| 420 | /* | ||
| 421 | * If there's an error here then that means we can't clear the | ||
| 422 | * vidmem. That's too bad; however, we still own the nvgpu_mem | ||
| 423 | * buf so we have to free that. | ||
| 424 | * | ||
| 425 | * We don't need to worry about the vidmem allocator itself | ||
| 426 | * since when that gets cleaned up in the driver shutdown path | ||
| 427 | * all the outstanding allocs are force freed. | ||
| 428 | */ | ||
| 429 | if (err) | ||
| 430 | nvgpu_kfree(g, mem); | ||
| 431 | } else { | ||
| 432 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); | ||
| 433 | nvgpu_free(mem->allocator, | ||
| 434 | (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); | ||
| 435 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
| 436 | |||
| 437 | mem->size = 0; | ||
| 438 | mem->aligned_size = 0; | ||
| 439 | mem->aperture = APERTURE_INVALID; | ||
| 440 | } | ||
| 441 | |||
| 442 | dma_dbg_free_done(g, mem_size, "vidmem"); | ||
| 443 | #endif | ||
| 444 | } | ||
| 445 | |||
| 446 | int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt, | ||
| 447 | void *cpuva, u64 iova, size_t size, unsigned long flags) | ||
| 448 | { | ||
| 449 | int err = 0; | ||
| 450 | struct sg_table *tbl; | ||
| 451 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
| 452 | |||
| 453 | tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
| 454 | if (!tbl) { | ||
| 455 | err = -ENOMEM; | ||
| 456 | goto fail; | ||
| 457 | } | ||
| 458 | |||
| 459 | nvgpu_dma_flags_to_attrs(&dma_attrs, flags); | ||
| 460 | err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova, | ||
| 461 | size, NVGPU_DMA_ATTR(dma_attrs)); | ||
| 462 | if (err) | ||
| 463 | goto fail; | ||
| 464 | |||
| 465 | sg_dma_address(tbl->sgl) = iova; | ||
| 466 | *sgt = tbl; | ||
| 467 | |||
| 468 | return 0; | ||
| 469 | |||
| 470 | fail: | ||
| 471 | if (tbl) | ||
| 472 | nvgpu_kfree(g, tbl); | ||
| 473 | |||
| 474 | return err; | ||
| 475 | } | ||
| 476 | |||
| 477 | int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt, | ||
| 478 | void *cpuva, u64 iova, size_t size) | ||
| 479 | { | ||
| 480 | return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0); | ||
| 481 | } | ||
| 482 | |||
| 483 | int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt, | ||
| 484 | struct page **pages, u64 iova, size_t size) | ||
| 485 | { | ||
| 486 | int err = 0; | ||
| 487 | struct sg_table *tbl; | ||
| 488 | |||
| 489 | tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
| 490 | if (!tbl) { | ||
| 491 | err = -ENOMEM; | ||
| 492 | goto fail; | ||
| 493 | } | ||
| 494 | |||
| 495 | err = sg_alloc_table_from_pages(tbl, pages, | ||
| 496 | DIV_ROUND_UP(size, PAGE_SIZE), | ||
| 497 | 0, size, GFP_KERNEL); | ||
| 498 | if (err) | ||
| 499 | goto fail; | ||
| 500 | |||
| 501 | sg_dma_address(tbl->sgl) = iova; | ||
| 502 | *sgt = tbl; | ||
| 503 | |||
| 504 | return 0; | ||
| 505 | |||
| 506 | fail: | ||
| 507 | if (tbl) | ||
| 508 | nvgpu_kfree(g, tbl); | ||
| 509 | |||
| 510 | return err; | ||
| 511 | } | ||
| 512 | |||
| 513 | void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt) | ||
| 514 | { | ||
| 515 | sg_free_table(*sgt); | ||
| 516 | nvgpu_kfree(g, *sgt); | ||
| 517 | *sgt = NULL; | ||
| 518 | } | ||
| 519 | |||
| 520 | bool nvgpu_iommuable(struct gk20a *g) | ||
| 521 | { | ||
| 522 | #ifdef CONFIG_TEGRA_GK20A | ||
| 523 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 524 | |||
| 525 | /* | ||
| 526 | * Check against the nvgpu device to see if it's been marked as | ||
| 527 | * IOMMU'able. | ||
| 528 | */ | ||
| 529 | if (!device_is_iommuable(l->dev)) | ||
| 530 | return false; | ||
| 531 | #endif | ||
| 532 | |||
| 533 | return true; | ||
| 534 | } | ||
diff --git a/include/os/linux/log.c b/include/os/linux/log.c new file mode 100644 index 0000000..bd9f67d --- /dev/null +++ b/include/os/linux/log.c | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/kernel.h> | ||
| 18 | #include <linux/device.h> | ||
| 19 | |||
| 20 | #include <nvgpu/log.h> | ||
| 21 | #include <nvgpu/gk20a.h> | ||
| 22 | |||
| 23 | #include "platform_gk20a.h" | ||
| 24 | #include "os_linux.h" | ||
| 25 | |||
| 26 | /* | ||
| 27 | * Define a length for log buffers. This is the buffer that the 'fmt, ...' part | ||
| 28 | * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it | ||
| 29 | * needs to not be overly sized since we have limited kernel stack space. But at | ||
| 30 | * the same time we don't want it to be restrictive either. | ||
| 31 | */ | ||
| 32 | #define LOG_BUFFER_LENGTH 160 | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Annoying quirk of Linux: this has to be a string literal since the printk() | ||
| 36 | * function and friends use the preprocessor to concatenate stuff to the start | ||
| 37 | * of this string when printing. | ||
| 38 | */ | ||
| 39 | #define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n" | ||
| 40 | |||
| 41 | static const char *log_types[] = { | ||
| 42 | "ERR", | ||
| 43 | "WRN", | ||
| 44 | "DBG", | ||
| 45 | "INFO", | ||
| 46 | }; | ||
| 47 | |||
| 48 | int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask) | ||
| 49 | { | ||
| 50 | return !!(g->log_mask & log_mask); | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline const char *nvgpu_log_name(struct gk20a *g) | ||
| 54 | { | ||
| 55 | return dev_name(dev_from_gk20a(g)); | ||
| 56 | } | ||
| 57 | |||
| 58 | #ifdef CONFIG_GK20A_TRACE_PRINTK | ||
| 59 | static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name, | ||
| 60 | const char *func_name, int line, | ||
| 61 | const char *log_type, const char *log) | ||
| 62 | { | ||
| 63 | trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log); | ||
| 64 | } | ||
| 65 | #endif | ||
| 66 | |||
| 67 | static void __nvgpu_really_print_log(u32 trace, const char *gpu_name, | ||
| 68 | const char *func_name, int line, | ||
| 69 | enum nvgpu_log_type type, const char *log) | ||
| 70 | { | ||
| 71 | const char *name = gpu_name ? gpu_name : ""; | ||
| 72 | const char *log_type = log_types[type]; | ||
| 73 | |||
| 74 | #ifdef CONFIG_GK20A_TRACE_PRINTK | ||
| 75 | if (trace) | ||
| 76 | return __nvgpu_trace_printk_log(trace, name, func_name, | ||
| 77 | line, log_type, log); | ||
| 78 | #endif | ||
| 79 | switch (type) { | ||
| 80 | case NVGPU_DEBUG: | ||
| 81 | /* | ||
| 82 | * We could use pr_debug() here but we control debug enablement | ||
| 83 | * separately from the Linux kernel. Perhaps this is a bug in | ||
| 84 | * nvgpu. | ||
| 85 | */ | ||
| 86 | pr_info(LOG_FMT, name, func_name, line, log_type, log); | ||
| 87 | break; | ||
| 88 | case NVGPU_INFO: | ||
| 89 | pr_info(LOG_FMT, name, func_name, line, log_type, log); | ||
| 90 | break; | ||
| 91 | case NVGPU_WARNING: | ||
| 92 | pr_warn(LOG_FMT, name, func_name, line, log_type, log); | ||
| 93 | break; | ||
| 94 | case NVGPU_ERROR: | ||
| 95 | pr_err(LOG_FMT, name, func_name, line, log_type, log); | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | __attribute__((format (printf, 5, 6))) | ||
| 101 | void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line, | ||
| 102 | enum nvgpu_log_type type, const char *fmt, ...) | ||
| 103 | { | ||
| 104 | char log[LOG_BUFFER_LENGTH]; | ||
| 105 | va_list args; | ||
| 106 | |||
| 107 | va_start(args, fmt); | ||
| 108 | vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); | ||
| 109 | va_end(args); | ||
| 110 | |||
| 111 | __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "", | ||
| 112 | func_name, line, type, log); | ||
| 113 | } | ||
| 114 | |||
| 115 | __attribute__((format (printf, 5, 6))) | ||
| 116 | void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask, | ||
| 117 | const char *func_name, int line, | ||
| 118 | const char *fmt, ...) | ||
| 119 | { | ||
| 120 | char log[LOG_BUFFER_LENGTH]; | ||
| 121 | va_list args; | ||
| 122 | |||
| 123 | if ((log_mask & g->log_mask) == 0) | ||
| 124 | return; | ||
| 125 | |||
| 126 | va_start(args, fmt); | ||
| 127 | vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); | ||
| 128 | va_end(args); | ||
| 129 | |||
| 130 | __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g), | ||
| 131 | func_name, line, NVGPU_DEBUG, log); | ||
| 132 | } | ||
diff --git a/include/os/linux/ltc.c b/include/os/linux/ltc.c new file mode 100644 index 0000000..baeb20b --- /dev/null +++ b/include/os/linux/ltc.c | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 20 | * DEALINGS IN THE SOFTWARE. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <nvgpu/ltc.h> | ||
| 24 | #include <nvgpu/dma.h> | ||
| 25 | #include <nvgpu/nvgpu_mem.h> | ||
| 26 | #include <nvgpu/gk20a.h> | ||
| 27 | |||
| 28 | #include "gk20a/gr_gk20a.h" | ||
| 29 | |||
| 30 | int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, | ||
| 31 | bool vidmem_alloc) | ||
| 32 | { | ||
| 33 | struct gr_gk20a *gr = &g->gr; | ||
| 34 | unsigned long flags = 0; | ||
| 35 | |||
| 36 | if (nvgpu_mem_is_valid(&gr->compbit_store.mem)) | ||
| 37 | return 0; | ||
| 38 | |||
| 39 | if (vidmem_alloc) { | ||
| 40 | /* | ||
| 41 | * Backing store MUST be physically contiguous and allocated in | ||
| 42 | * one chunk | ||
| 43 | * Vidmem allocation API does not support FORCE_CONTIGUOUS like | ||
| 44 | * flag to allocate contiguous memory | ||
| 45 | * But this allocation will happen in vidmem bootstrap allocator | ||
| 46 | * which always allocates contiguous memory | ||
| 47 | */ | ||
| 48 | return nvgpu_dma_alloc_vid(g, | ||
| 49 | compbit_backing_size, | ||
| 50 | &gr->compbit_store.mem); | ||
| 51 | } else { | ||
| 52 | if (!nvgpu_iommuable(g)) | ||
| 53 | flags = NVGPU_DMA_FORCE_CONTIGUOUS; | ||
| 54 | |||
| 55 | return nvgpu_dma_alloc_flags_sys(g, | ||
| 56 | flags, | ||
| 57 | compbit_backing_size, | ||
| 58 | &gr->compbit_store.mem); | ||
| 59 | } | ||
| 60 | } | ||
diff --git a/include/os/linux/module.c b/include/os/linux/module.c new file mode 100644 index 0000000..807df2c --- /dev/null +++ b/include/os/linux/module.c | |||
| @@ -0,0 +1,1529 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Graphics | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/module.h> | ||
| 20 | #include <linux/of.h> | ||
| 21 | #include <linux/of_device.h> | ||
| 22 | #include <linux/of_platform.h> | ||
| 23 | #include <linux/of_address.h> | ||
| 24 | #include <linux/interrupt.h> | ||
| 25 | #include <linux/pm_runtime.h> | ||
| 26 | #include <linux/reset.h> | ||
| 27 | #include <linux/reboot.h> | ||
| 28 | #include <linux/notifier.h> | ||
| 29 | #include <linux/platform/tegra/common.h> | ||
| 30 | #include <linux/pci.h> | ||
| 31 | |||
| 32 | #include <uapi/linux/nvgpu.h> | ||
| 33 | #include <dt-bindings/soc/gm20b-fuse.h> | ||
| 34 | #include <dt-bindings/soc/gp10b-fuse.h> | ||
| 35 | #include <dt-bindings/soc/gv11b-fuse.h> | ||
| 36 | |||
| 37 | #include <soc/tegra/fuse.h> | ||
| 38 | |||
| 39 | #include <nvgpu/hal_init.h> | ||
| 40 | #include <nvgpu/dma.h> | ||
| 41 | #include <nvgpu/kmem.h> | ||
| 42 | #include <nvgpu/nvgpu_common.h> | ||
| 43 | #include <nvgpu/soc.h> | ||
| 44 | #include <nvgpu/enabled.h> | ||
| 45 | #include <nvgpu/debug.h> | ||
| 46 | #include <nvgpu/ctxsw_trace.h> | ||
| 47 | #include <nvgpu/vidmem.h> | ||
| 48 | #include <nvgpu/sim.h> | ||
| 49 | #include <nvgpu/clk_arb.h> | ||
| 50 | #include <nvgpu/timers.h> | ||
| 51 | #include <nvgpu/channel.h> | ||
| 52 | |||
| 53 | #include "platform_gk20a.h" | ||
| 54 | #include "sysfs.h" | ||
| 55 | #include "vgpu/vgpu_linux.h" | ||
| 56 | #include "scale.h" | ||
| 57 | #include "pci.h" | ||
| 58 | #include "module.h" | ||
| 59 | #include "module_usermode.h" | ||
| 60 | #include "intr.h" | ||
| 61 | #include "ioctl.h" | ||
| 62 | #include "ioctl_ctrl.h" | ||
| 63 | |||
| 64 | #include "os_linux.h" | ||
| 65 | #include "os_ops.h" | ||
| 66 | #include "ctxsw_trace.h" | ||
| 67 | #include "driver_common.h" | ||
| 68 | #include "channel.h" | ||
| 69 | #include "debug_pmgr.h" | ||
| 70 | |||
| 71 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 72 | #include "cde.h" | ||
| 73 | #endif | ||
| 74 | |||
| 75 | #define CLASS_NAME "nvidia-gpu" | ||
| 76 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ | ||
| 77 | |||
| 78 | #define GK20A_WAIT_FOR_IDLE_MS 2000 | ||
| 79 | |||
| 80 | #define CREATE_TRACE_POINTS | ||
| 81 | #include <trace/events/gk20a.h> | ||
| 82 | |||
| 83 | static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb, | ||
| 84 | unsigned long event, void *unused) | ||
| 85 | { | ||
| 86 | struct gk20a *g = container_of(nb, struct gk20a, nvgpu_reboot_nb); | ||
| 87 | |||
| 88 | __nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); | ||
| 89 | return NOTIFY_DONE; | ||
| 90 | } | ||
| 91 | |||
| 92 | struct device_node *nvgpu_get_node(struct gk20a *g) | ||
| 93 | { | ||
| 94 | struct device *dev = dev_from_gk20a(g); | ||
| 95 | |||
| 96 | if (dev_is_pci(dev)) { | ||
| 97 | struct pci_bus *bus = to_pci_dev(dev)->bus; | ||
| 98 | |||
| 99 | while (!pci_is_root_bus(bus)) | ||
| 100 | bus = bus->parent; | ||
| 101 | |||
| 102 | return bus->bridge->parent->of_node; | ||
| 103 | } | ||
| 104 | |||
| 105 | return dev->of_node; | ||
| 106 | } | ||
| 107 | |||
| 108 | void gk20a_busy_noresume(struct gk20a *g) | ||
| 109 | { | ||
| 110 | pm_runtime_get_noresume(dev_from_gk20a(g)); | ||
| 111 | } | ||
| 112 | |||
| 113 | /* | ||
| 114 | * Check if the device can go busy. | ||
| 115 | */ | ||
| 116 | static int nvgpu_can_busy(struct gk20a *g) | ||
| 117 | { | ||
| 118 | /* Can't do anything if the system is rebooting/shutting down. */ | ||
| 119 | if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING)) | ||
| 120 | return 0; | ||
| 121 | |||
| 122 | /* Can't do anything if the driver is restarting. */ | ||
| 123 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
| 124 | return 0; | ||
| 125 | |||
| 126 | return 1; | ||
| 127 | } | ||
| 128 | |||
| 129 | int gk20a_busy(struct gk20a *g) | ||
| 130 | { | ||
| 131 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 132 | int ret = 0; | ||
| 133 | struct device *dev; | ||
| 134 | |||
| 135 | if (!g) | ||
| 136 | return -ENODEV; | ||
| 137 | |||
| 138 | atomic_inc(&g->usage_count.atomic_var); | ||
| 139 | |||
| 140 | down_read(&l->busy_lock); | ||
| 141 | |||
| 142 | if (!nvgpu_can_busy(g)) { | ||
| 143 | ret = -ENODEV; | ||
| 144 | atomic_dec(&g->usage_count.atomic_var); | ||
| 145 | goto fail; | ||
| 146 | } | ||
| 147 | |||
| 148 | dev = dev_from_gk20a(g); | ||
| 149 | |||
| 150 | if (pm_runtime_enabled(dev)) { | ||
| 151 | /* Increment usage count and attempt to resume device */ | ||
| 152 | ret = pm_runtime_get_sync(dev); | ||
| 153 | if (ret < 0) { | ||
| 154 | /* Mark suspended so runtime pm will retry later */ | ||
| 155 | pm_runtime_set_suspended(dev); | ||
| 156 | pm_runtime_put_noidle(dev); | ||
| 157 | atomic_dec(&g->usage_count.atomic_var); | ||
| 158 | goto fail; | ||
| 159 | } | ||
| 160 | } else { | ||
| 161 | ret = gk20a_gpu_is_virtual(dev) ? | ||
| 162 | vgpu_pm_finalize_poweron(dev) : | ||
| 163 | gk20a_pm_finalize_poweron(dev); | ||
| 164 | if (ret) { | ||
| 165 | atomic_dec(&g->usage_count.atomic_var); | ||
| 166 | goto fail; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | fail: | ||
| 171 | up_read(&l->busy_lock); | ||
| 172 | |||
| 173 | return ret < 0 ? ret : 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | void gk20a_idle_nosuspend(struct gk20a *g) | ||
| 177 | { | ||
| 178 | pm_runtime_put_noidle(dev_from_gk20a(g)); | ||
| 179 | } | ||
| 180 | |||
| 181 | void gk20a_idle(struct gk20a *g) | ||
| 182 | { | ||
| 183 | struct device *dev; | ||
| 184 | |||
| 185 | atomic_dec(&g->usage_count.atomic_var); | ||
| 186 | |||
| 187 | dev = dev_from_gk20a(g); | ||
| 188 | |||
| 189 | if (!(dev && nvgpu_can_busy(g))) | ||
| 190 | return; | ||
| 191 | |||
| 192 | if (pm_runtime_enabled(dev)) { | ||
| 193 | pm_runtime_mark_last_busy(dev); | ||
| 194 | pm_runtime_put_sync_autosuspend(dev); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | /* | ||
| 199 | * Undoes gk20a_lockout_registers(). | ||
| 200 | */ | ||
| 201 | static int gk20a_restore_registers(struct gk20a *g) | ||
| 202 | { | ||
| 203 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 204 | |||
| 205 | l->regs = l->regs_saved; | ||
| 206 | l->bar1 = l->bar1_saved; | ||
| 207 | |||
| 208 | nvgpu_restore_usermode_registers(g); | ||
| 209 | |||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l) | ||
| 214 | { | ||
| 215 | struct gk20a *g = &l->g; | ||
| 216 | int err; | ||
| 217 | |||
| 218 | if (l->init_done) | ||
| 219 | return 0; | ||
| 220 | |||
| 221 | err = nvgpu_init_channel_support_linux(l); | ||
| 222 | if (err) { | ||
| 223 | nvgpu_err(g, "failed to init linux channel support"); | ||
| 224 | return err; | ||
| 225 | } | ||
| 226 | |||
| 227 | if (l->ops.clk.init_debugfs) { | ||
| 228 | err = l->ops.clk.init_debugfs(g); | ||
| 229 | if (err) { | ||
| 230 | nvgpu_err(g, "failed to init linux clk debugfs"); | ||
| 231 | return err; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | if (l->ops.therm.init_debugfs) { | ||
| 236 | err = l->ops.therm.init_debugfs(g); | ||
| 237 | if (err) { | ||
| 238 | nvgpu_err(g, "failed to init linux therm debugfs"); | ||
| 239 | return err; | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | if (l->ops.fecs_trace.init_debugfs) { | ||
| 244 | err = l->ops.fecs_trace.init_debugfs(g); | ||
| 245 | if (err) { | ||
| 246 | nvgpu_err(g, "failed to init linux fecs trace debugfs"); | ||
| 247 | return err; | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | err = nvgpu_pmgr_init_debugfs_linux(l); | ||
| 252 | if (err) { | ||
| 253 | nvgpu_err(g, "failed to init linux pmgr debugfs"); | ||
| 254 | return err; | ||
| 255 | } | ||
| 256 | |||
| 257 | l->init_done = true; | ||
| 258 | |||
| 259 | return 0; | ||
| 260 | } | ||
| 261 | |||
| 262 | bool gk20a_check_poweron(struct gk20a *g) | ||
| 263 | { | ||
| 264 | bool ret; | ||
| 265 | |||
| 266 | nvgpu_mutex_acquire(&g->power_lock); | ||
| 267 | ret = g->power_on; | ||
| 268 | nvgpu_mutex_release(&g->power_lock); | ||
| 269 | |||
| 270 | return ret; | ||
| 271 | } | ||
| 272 | |||
| 273 | int gk20a_pm_finalize_poweron(struct device *dev) | ||
| 274 | { | ||
| 275 | struct gk20a *g = get_gk20a(dev); | ||
| 276 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 277 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 278 | int err = 0; | ||
| 279 | |||
| 280 | nvgpu_log_fn(g, " "); | ||
| 281 | |||
| 282 | nvgpu_mutex_acquire(&g->power_lock); | ||
| 283 | |||
| 284 | if (g->power_on) | ||
| 285 | goto done; | ||
| 286 | |||
| 287 | trace_gk20a_finalize_poweron(dev_name(dev)); | ||
| 288 | |||
| 289 | /* Increment platform power refcount */ | ||
| 290 | if (platform->busy) { | ||
| 291 | err = platform->busy(dev); | ||
| 292 | if (err < 0) { | ||
| 293 | nvgpu_err(g, "failed to poweron platform dependency"); | ||
| 294 | goto done; | ||
| 295 | } | ||
| 296 | } | ||
| 297 | |||
| 298 | err = gk20a_restore_registers(g); | ||
| 299 | if (err) | ||
| 300 | goto done; | ||
| 301 | |||
| 302 | nvgpu_restore_usermode_for_poweron(g); | ||
| 303 | |||
| 304 | /* Enable interrupt workqueue */ | ||
| 305 | if (!l->nonstall_work_queue) { | ||
| 306 | l->nonstall_work_queue = alloc_workqueue("%s", | ||
| 307 | WQ_HIGHPRI, 1, "mc_nonstall"); | ||
| 308 | INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb); | ||
| 309 | } | ||
| 310 | |||
| 311 | err = nvgpu_detect_chip(g); | ||
| 312 | if (err) | ||
| 313 | goto done; | ||
| 314 | |||
| 315 | if (g->sim) { | ||
| 316 | if (g->sim->sim_init_late) | ||
| 317 | g->sim->sim_init_late(g); | ||
| 318 | } | ||
| 319 | |||
| 320 | err = gk20a_finalize_poweron(g); | ||
| 321 | if (err) | ||
| 322 | goto done; | ||
| 323 | |||
| 324 | err = nvgpu_init_os_linux_ops(l); | ||
| 325 | if (err) | ||
| 326 | goto done; | ||
| 327 | |||
| 328 | err = nvgpu_finalize_poweron_linux(l); | ||
| 329 | if (err) | ||
| 330 | goto done; | ||
| 331 | |||
| 332 | nvgpu_init_mm_ce_context(g); | ||
| 333 | |||
| 334 | nvgpu_vidmem_thread_unpause(&g->mm); | ||
| 335 | |||
| 336 | /* Initialise scaling: it will initialize scaling drive only once */ | ||
| 337 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) && | ||
| 338 | nvgpu_platform_is_silicon(g)) { | ||
| 339 | gk20a_scale_init(dev); | ||
| 340 | if (platform->initscale) | ||
| 341 | platform->initscale(dev); | ||
| 342 | } | ||
| 343 | |||
| 344 | trace_gk20a_finalize_poweron_done(dev_name(dev)); | ||
| 345 | |||
| 346 | enable_irq(g->irq_stall); | ||
| 347 | if (g->irq_stall != g->irq_nonstall) | ||
| 348 | enable_irq(g->irq_nonstall); | ||
| 349 | g->irqs_enabled = 1; | ||
| 350 | |||
| 351 | gk20a_scale_resume(dev_from_gk20a(g)); | ||
| 352 | |||
| 353 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 354 | if (platform->has_cde) | ||
| 355 | gk20a_init_cde_support(l); | ||
| 356 | #endif | ||
| 357 | |||
| 358 | err = gk20a_sched_ctrl_init(g); | ||
| 359 | if (err) { | ||
| 360 | nvgpu_err(g, "failed to init sched control"); | ||
| 361 | goto done; | ||
| 362 | } | ||
| 363 | |||
| 364 | g->sw_ready = true; | ||
| 365 | |||
| 366 | done: | ||
| 367 | if (err) | ||
| 368 | g->power_on = false; | ||
| 369 | |||
| 370 | nvgpu_mutex_release(&g->power_lock); | ||
| 371 | return err; | ||
| 372 | } | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Locks out the driver from accessing GPU registers. This prevents access to | ||
| 376 | * thse registers after the GPU has been clock or power gated. This should help | ||
| 377 | * find annoying bugs where register reads and writes are silently dropped | ||
| 378 | * after the GPU has been turned off. On older chips these reads and writes can | ||
| 379 | * also lock the entire CPU up. | ||
| 380 | */ | ||
| 381 | static int gk20a_lockout_registers(struct gk20a *g) | ||
| 382 | { | ||
| 383 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 384 | |||
| 385 | l->regs = NULL; | ||
| 386 | l->bar1 = NULL; | ||
| 387 | |||
| 388 | nvgpu_lockout_usermode_registers(g); | ||
| 389 | |||
| 390 | return 0; | ||
| 391 | } | ||
| 392 | |||
| 393 | static int gk20a_pm_prepare_poweroff(struct device *dev) | ||
| 394 | { | ||
| 395 | struct gk20a *g = get_gk20a(dev); | ||
| 396 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 397 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 398 | #endif | ||
| 399 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 400 | bool irqs_enabled; | ||
| 401 | int ret = 0; | ||
| 402 | |||
| 403 | nvgpu_log_fn(g, " "); | ||
| 404 | |||
| 405 | nvgpu_mutex_acquire(&g->power_lock); | ||
| 406 | |||
| 407 | if (!g->power_on) | ||
| 408 | goto done; | ||
| 409 | |||
| 410 | /* disable IRQs and wait for completion */ | ||
| 411 | irqs_enabled = g->irqs_enabled; | ||
| 412 | if (irqs_enabled) { | ||
| 413 | disable_irq(g->irq_stall); | ||
| 414 | if (g->irq_stall != g->irq_nonstall) | ||
| 415 | disable_irq(g->irq_nonstall); | ||
| 416 | g->irqs_enabled = 0; | ||
| 417 | } | ||
| 418 | |||
| 419 | gk20a_scale_suspend(dev); | ||
| 420 | |||
| 421 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 422 | gk20a_cde_suspend(l); | ||
| 423 | #endif | ||
| 424 | |||
| 425 | ret = gk20a_prepare_poweroff(g); | ||
| 426 | if (ret) | ||
| 427 | goto error; | ||
| 428 | |||
| 429 | /* Decrement platform power refcount */ | ||
| 430 | if (platform->idle) | ||
| 431 | platform->idle(dev); | ||
| 432 | |||
| 433 | /* Stop CPU from accessing the GPU registers. */ | ||
| 434 | gk20a_lockout_registers(g); | ||
| 435 | |||
| 436 | nvgpu_hide_usermode_for_poweroff(g); | ||
| 437 | nvgpu_mutex_release(&g->power_lock); | ||
| 438 | return 0; | ||
| 439 | |||
| 440 | error: | ||
| 441 | /* re-enabled IRQs if previously enabled */ | ||
| 442 | if (irqs_enabled) { | ||
| 443 | enable_irq(g->irq_stall); | ||
| 444 | if (g->irq_stall != g->irq_nonstall) | ||
| 445 | enable_irq(g->irq_nonstall); | ||
| 446 | g->irqs_enabled = 1; | ||
| 447 | } | ||
| 448 | |||
| 449 | gk20a_scale_resume(dev); | ||
| 450 | done: | ||
| 451 | nvgpu_mutex_release(&g->power_lock); | ||
| 452 | |||
| 453 | return ret; | ||
| 454 | } | ||
| 455 | |||
| 456 | static struct of_device_id tegra_gk20a_of_match[] = { | ||
| 457 | #ifdef CONFIG_TEGRA_GK20A | ||
| 458 | { .compatible = "nvidia,tegra210-gm20b", | ||
| 459 | .data = &gm20b_tegra_platform }, | ||
| 460 | { .compatible = "nvidia,tegra186-gp10b", | ||
| 461 | .data = &gp10b_tegra_platform }, | ||
| 462 | { .compatible = "nvidia,gv11b", | ||
| 463 | .data = &gv11b_tegra_platform }, | ||
| 464 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
| 465 | { .compatible = "nvidia,gv11b-vgpu", | ||
| 466 | .data = &gv11b_vgpu_tegra_platform}, | ||
| 467 | #endif | ||
| 468 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
| 469 | { .compatible = "nvidia,tegra124-gk20a-vgpu", | ||
| 470 | .data = &vgpu_tegra_platform }, | ||
| 471 | #endif | ||
| 472 | #endif | ||
| 473 | |||
| 474 | { }, | ||
| 475 | }; | ||
| 476 | MODULE_DEVICE_TABLE(of, tegra_gk20a_of_match); | ||
| 477 | |||
| 478 | #ifdef CONFIG_PM | ||
| 479 | /** | ||
| 480 | * __gk20a_do_idle() - force the GPU to idle and railgate | ||
| 481 | * | ||
| 482 | * In success, this call MUST be balanced by caller with __gk20a_do_unidle() | ||
| 483 | * | ||
| 484 | * Acquires two locks : &l->busy_lock and &platform->railgate_lock | ||
| 485 | * In success, we hold these locks and return | ||
| 486 | * In failure, we release these locks and return | ||
| 487 | */ | ||
| 488 | int __gk20a_do_idle(struct gk20a *g, bool force_reset) | ||
| 489 | { | ||
| 490 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 491 | struct device *dev = dev_from_gk20a(g); | ||
| 492 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 493 | struct nvgpu_timeout timeout; | ||
| 494 | int ref_cnt; | ||
| 495 | int target_ref_cnt = 0; | ||
| 496 | bool is_railgated; | ||
| 497 | int err = 0; | ||
| 498 | |||
| 499 | /* | ||
| 500 | * Hold back deterministic submits and changes to deterministic | ||
| 501 | * channels - this must be outside the power busy locks. | ||
| 502 | */ | ||
| 503 | gk20a_channel_deterministic_idle(g); | ||
| 504 | |||
| 505 | /* acquire busy lock to block other busy() calls */ | ||
| 506 | down_write(&l->busy_lock); | ||
| 507 | |||
| 508 | /* acquire railgate lock to prevent unrailgate in midst of do_idle() */ | ||
| 509 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
| 510 | |||
| 511 | /* check if it is already railgated ? */ | ||
| 512 | if (platform->is_railgated(dev)) | ||
| 513 | return 0; | ||
| 514 | |||
| 515 | /* | ||
| 516 | * release railgate_lock, prevent suspend by incrementing usage counter, | ||
| 517 | * re-acquire railgate_lock | ||
| 518 | */ | ||
| 519 | nvgpu_mutex_release(&platform->railgate_lock); | ||
| 520 | pm_runtime_get_sync(dev); | ||
| 521 | |||
| 522 | /* | ||
| 523 | * One refcount taken in this API | ||
| 524 | * If User disables rail gating, we take one more | ||
| 525 | * extra refcount | ||
| 526 | */ | ||
| 527 | if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) | ||
| 528 | target_ref_cnt = 1; | ||
| 529 | else | ||
| 530 | target_ref_cnt = 2; | ||
| 531 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
| 532 | |||
| 533 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
| 534 | NVGPU_TIMER_CPU_TIMER); | ||
| 535 | |||
| 536 | /* check and wait until GPU is idle (with a timeout) */ | ||
| 537 | do { | ||
| 538 | nvgpu_usleep_range(1000, 1100); | ||
| 539 | ref_cnt = atomic_read(&dev->power.usage_count); | ||
| 540 | } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout)); | ||
| 541 | |||
| 542 | if (ref_cnt != target_ref_cnt) { | ||
| 543 | nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt", | ||
| 544 | ref_cnt); | ||
| 545 | goto fail_drop_usage_count; | ||
| 546 | } | ||
| 547 | |||
| 548 | /* check if global force_reset flag is set */ | ||
| 549 | force_reset |= platform->force_reset_in_do_idle; | ||
| 550 | |||
| 551 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
| 552 | NVGPU_TIMER_CPU_TIMER); | ||
| 553 | |||
| 554 | if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) && !force_reset) { | ||
| 555 | /* | ||
| 556 | * Case 1 : GPU railgate is supported | ||
| 557 | * | ||
| 558 | * if GPU is now idle, we will have only one ref count, | ||
| 559 | * drop this ref which will rail gate the GPU | ||
| 560 | */ | ||
| 561 | pm_runtime_put_sync(dev); | ||
| 562 | |||
| 563 | /* add sufficient delay to allow GPU to rail gate */ | ||
| 564 | nvgpu_msleep(g->railgate_delay); | ||
| 565 | |||
| 566 | /* check in loop if GPU is railgated or not */ | ||
| 567 | do { | ||
| 568 | nvgpu_usleep_range(1000, 1100); | ||
| 569 | is_railgated = platform->is_railgated(dev); | ||
| 570 | } while (!is_railgated && !nvgpu_timeout_expired(&timeout)); | ||
| 571 | |||
| 572 | if (is_railgated) { | ||
| 573 | return 0; | ||
| 574 | } else { | ||
| 575 | nvgpu_err(g, "failed to idle in timeout"); | ||
| 576 | goto fail_timeout; | ||
| 577 | } | ||
| 578 | } else { | ||
| 579 | /* | ||
| 580 | * Case 2 : GPU railgate is not supported or we explicitly | ||
| 581 | * do not want to depend on runtime PM | ||
| 582 | * | ||
| 583 | * if GPU is now idle, call prepare_poweroff() to save the | ||
| 584 | * state and then do explicit railgate | ||
| 585 | * | ||
| 586 | * __gk20a_do_unidle() needs to unrailgate, call | ||
| 587 | * finalize_poweron(), and then call pm_runtime_put_sync() | ||
| 588 | * to balance the GPU usage counter | ||
| 589 | */ | ||
| 590 | |||
| 591 | /* Save the GPU state */ | ||
| 592 | err = gk20a_pm_prepare_poweroff(dev); | ||
| 593 | if (err) | ||
| 594 | goto fail_drop_usage_count; | ||
| 595 | |||
| 596 | /* railgate GPU */ | ||
| 597 | platform->railgate(dev); | ||
| 598 | |||
| 599 | nvgpu_udelay(10); | ||
| 600 | |||
| 601 | g->forced_reset = true; | ||
| 602 | return 0; | ||
| 603 | } | ||
| 604 | |||
| 605 | fail_drop_usage_count: | ||
| 606 | pm_runtime_put_noidle(dev); | ||
| 607 | fail_timeout: | ||
| 608 | nvgpu_mutex_release(&platform->railgate_lock); | ||
| 609 | up_write(&l->busy_lock); | ||
| 610 | gk20a_channel_deterministic_unidle(g); | ||
| 611 | return -EBUSY; | ||
| 612 | } | ||
| 613 | |||
| 614 | /** | ||
| 615 | * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called | ||
| 616 | * from outside of GPU driver | ||
| 617 | * | ||
| 618 | * In success, this call MUST be balanced by caller with gk20a_do_unidle() | ||
| 619 | */ | ||
| 620 | static int gk20a_do_idle(void *_g) | ||
| 621 | { | ||
| 622 | struct gk20a *g = (struct gk20a *)_g; | ||
| 623 | |||
| 624 | return __gk20a_do_idle(g, true); | ||
| 625 | } | ||
| 626 | |||
| 627 | /** | ||
| 628 | * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle() | ||
| 629 | */ | ||
| 630 | int __gk20a_do_unidle(struct gk20a *g) | ||
| 631 | { | ||
| 632 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 633 | struct device *dev = dev_from_gk20a(g); | ||
| 634 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 635 | int err; | ||
| 636 | |||
| 637 | if (g->forced_reset) { | ||
| 638 | /* | ||
| 639 | * If we did a forced-reset/railgate | ||
| 640 | * then unrailgate the GPU here first | ||
| 641 | */ | ||
| 642 | platform->unrailgate(dev); | ||
| 643 | |||
| 644 | /* restore the GPU state */ | ||
| 645 | err = gk20a_pm_finalize_poweron(dev); | ||
| 646 | if (err) | ||
| 647 | return err; | ||
| 648 | |||
| 649 | /* balance GPU usage counter */ | ||
| 650 | pm_runtime_put_sync(dev); | ||
| 651 | |||
| 652 | g->forced_reset = false; | ||
| 653 | } | ||
| 654 | |||
| 655 | /* release the lock and open up all other busy() calls */ | ||
| 656 | nvgpu_mutex_release(&platform->railgate_lock); | ||
| 657 | up_write(&l->busy_lock); | ||
| 658 | |||
| 659 | gk20a_channel_deterministic_unidle(g); | ||
| 660 | |||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | |||
| 664 | /** | ||
| 665 | * gk20a_do_unidle() - wrap up for __gk20a_do_unidle() | ||
| 666 | */ | ||
| 667 | static int gk20a_do_unidle(void *_g) | ||
| 668 | { | ||
| 669 | struct gk20a *g = (struct gk20a *)_g; | ||
| 670 | |||
| 671 | return __gk20a_do_unidle(g); | ||
| 672 | } | ||
| 673 | #endif | ||
| 674 | |||
| 675 | void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i, | ||
| 676 | struct resource **out) | ||
| 677 | { | ||
| 678 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
| 679 | |||
| 680 | if (!r) | ||
| 681 | return NULL; | ||
| 682 | if (out) | ||
| 683 | *out = r; | ||
| 684 | return devm_ioremap_resource(&dev->dev, r); | ||
| 685 | } | ||
| 686 | |||
| 687 | void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset, | ||
| 688 | resource_size_t size) | ||
| 689 | { | ||
| 690 | return devm_ioremap(dev, offset, size); | ||
| 691 | } | ||
| 692 | |||
| 693 | u64 nvgpu_resource_addr(struct platform_device *dev, int i) | ||
| 694 | { | ||
| 695 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
| 696 | |||
| 697 | if (!r) | ||
| 698 | return 0; | ||
| 699 | |||
| 700 | return r->start; | ||
| 701 | } | ||
| 702 | |||
| 703 | static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) | ||
| 704 | { | ||
| 705 | struct gk20a *g = dev_id; | ||
| 706 | |||
| 707 | return nvgpu_intr_stall(g); | ||
| 708 | } | ||
| 709 | |||
| 710 | static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) | ||
| 711 | { | ||
| 712 | struct gk20a *g = dev_id; | ||
| 713 | |||
| 714 | return nvgpu_intr_nonstall(g); | ||
| 715 | } | ||
| 716 | |||
| 717 | static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) | ||
| 718 | { | ||
| 719 | struct gk20a *g = dev_id; | ||
| 720 | |||
| 721 | return nvgpu_intr_thread_stall(g); | ||
| 722 | } | ||
| 723 | |||
| 724 | void gk20a_remove_support(struct gk20a *g) | ||
| 725 | { | ||
| 726 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 727 | struct sim_nvgpu_linux *sim_linux; | ||
| 728 | |||
| 729 | tegra_unregister_idle_unidle(gk20a_do_idle); | ||
| 730 | |||
| 731 | nvgpu_kfree(g, g->dbg_regops_tmp_buf); | ||
| 732 | |||
| 733 | nvgpu_remove_channel_support_linux(l); | ||
| 734 | |||
| 735 | if (g->pmu.remove_support) | ||
| 736 | g->pmu.remove_support(&g->pmu); | ||
| 737 | |||
| 738 | if (g->acr.remove_support != NULL) { | ||
| 739 | g->acr.remove_support(&g->acr); | ||
| 740 | } | ||
| 741 | |||
| 742 | if (g->gr.remove_support) | ||
| 743 | g->gr.remove_support(&g->gr); | ||
| 744 | |||
| 745 | if (g->mm.remove_ce_support) | ||
| 746 | g->mm.remove_ce_support(&g->mm); | ||
| 747 | |||
| 748 | if (g->fifo.remove_support) | ||
| 749 | g->fifo.remove_support(&g->fifo); | ||
| 750 | |||
| 751 | if (g->mm.remove_support) | ||
| 752 | g->mm.remove_support(&g->mm); | ||
| 753 | |||
| 754 | if (g->sim) { | ||
| 755 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
| 756 | if (g->sim->remove_support) | ||
| 757 | g->sim->remove_support(g); | ||
| 758 | if (sim_linux->remove_support_linux) | ||
| 759 | sim_linux->remove_support_linux(g); | ||
| 760 | } | ||
| 761 | |||
| 762 | nvgpu_remove_usermode_support(g); | ||
| 763 | |||
| 764 | nvgpu_free_enabled_flags(g); | ||
| 765 | |||
| 766 | gk20a_lockout_registers(g); | ||
| 767 | } | ||
| 768 | |||
| 769 | static int gk20a_init_support(struct platform_device *pdev) | ||
| 770 | { | ||
| 771 | struct device *dev = &pdev->dev; | ||
| 772 | struct gk20a *g = get_gk20a(dev); | ||
| 773 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 774 | int err = -ENOMEM; | ||
| 775 | |||
| 776 | tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g); | ||
| 777 | |||
| 778 | l->regs = nvgpu_devm_ioremap_resource(pdev, | ||
| 779 | GK20A_BAR0_IORESOURCE_MEM, | ||
| 780 | &l->reg_mem); | ||
| 781 | if (IS_ERR(l->regs)) { | ||
| 782 | nvgpu_err(g, "failed to remap gk20a registers"); | ||
| 783 | err = PTR_ERR(l->regs); | ||
| 784 | goto fail; | ||
| 785 | } | ||
| 786 | |||
| 787 | l->regs_bus_addr = nvgpu_resource_addr(pdev, | ||
| 788 | GK20A_BAR0_IORESOURCE_MEM); | ||
| 789 | if (!l->regs_bus_addr) { | ||
| 790 | nvgpu_err(g, "failed to read register bus offset"); | ||
| 791 | err = -ENODEV; | ||
| 792 | goto fail; | ||
| 793 | } | ||
| 794 | |||
| 795 | l->bar1 = nvgpu_devm_ioremap_resource(pdev, | ||
| 796 | GK20A_BAR1_IORESOURCE_MEM, | ||
| 797 | &l->bar1_mem); | ||
| 798 | if (IS_ERR(l->bar1)) { | ||
| 799 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
| 800 | err = PTR_ERR(l->bar1); | ||
| 801 | goto fail; | ||
| 802 | } | ||
| 803 | |||
| 804 | err = nvgpu_init_sim_support_linux(g, pdev); | ||
| 805 | if (err) | ||
| 806 | goto fail; | ||
| 807 | err = nvgpu_init_sim_support(g); | ||
| 808 | if (err) | ||
| 809 | goto fail_sim; | ||
| 810 | |||
| 811 | nvgpu_init_usermode_support(g); | ||
| 812 | return 0; | ||
| 813 | |||
| 814 | fail_sim: | ||
| 815 | nvgpu_remove_sim_support_linux(g); | ||
| 816 | fail: | ||
| 817 | if (l->regs) | ||
| 818 | l->regs = NULL; | ||
| 819 | |||
| 820 | if (l->bar1) | ||
| 821 | l->bar1 = NULL; | ||
| 822 | |||
| 823 | return err; | ||
| 824 | } | ||
| 825 | |||
| 826 | static int gk20a_pm_railgate(struct device *dev) | ||
| 827 | { | ||
| 828 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 829 | int ret = 0; | ||
| 830 | struct gk20a *g = get_gk20a(dev); | ||
| 831 | |||
| 832 | /* return early if platform didn't implement railgate */ | ||
| 833 | if (!platform->railgate) | ||
| 834 | return 0; | ||
| 835 | |||
| 836 | /* if platform is already railgated, then just return */ | ||
| 837 | if (platform->is_railgated && platform->is_railgated(dev)) | ||
| 838 | return ret; | ||
| 839 | |||
| 840 | #ifdef CONFIG_DEBUG_FS | ||
| 841 | g->pstats.last_rail_gate_start = jiffies; | ||
| 842 | |||
| 843 | if (g->pstats.railgating_cycle_count >= 1) | ||
| 844 | g->pstats.total_rail_ungate_time_ms = | ||
| 845 | g->pstats.total_rail_ungate_time_ms + | ||
| 846 | jiffies_to_msecs(g->pstats.last_rail_gate_start - | ||
| 847 | g->pstats.last_rail_ungate_complete); | ||
| 848 | #endif | ||
| 849 | |||
| 850 | ret = platform->railgate(dev); | ||
| 851 | if (ret) { | ||
| 852 | nvgpu_err(g, "failed to railgate platform, err=%d", ret); | ||
| 853 | return ret; | ||
| 854 | } | ||
| 855 | |||
| 856 | #ifdef CONFIG_DEBUG_FS | ||
| 857 | g->pstats.last_rail_gate_complete = jiffies; | ||
| 858 | #endif | ||
| 859 | ret = tegra_fuse_clock_disable(); | ||
| 860 | if (ret) | ||
| 861 | nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret); | ||
| 862 | |||
| 863 | return ret; | ||
| 864 | } | ||
| 865 | |||
| 866 | static int gk20a_pm_unrailgate(struct device *dev) | ||
| 867 | { | ||
| 868 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 869 | int ret = 0; | ||
| 870 | struct gk20a *g = get_gk20a(dev); | ||
| 871 | |||
| 872 | /* return early if platform didn't implement unrailgate */ | ||
| 873 | if (!platform->unrailgate) | ||
| 874 | return 0; | ||
| 875 | |||
| 876 | ret = tegra_fuse_clock_enable(); | ||
| 877 | if (ret) { | ||
| 878 | nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret); | ||
| 879 | return ret; | ||
| 880 | } | ||
| 881 | #ifdef CONFIG_DEBUG_FS | ||
| 882 | g->pstats.last_rail_ungate_start = jiffies; | ||
| 883 | if (g->pstats.railgating_cycle_count >= 1) | ||
| 884 | g->pstats.total_rail_gate_time_ms = | ||
| 885 | g->pstats.total_rail_gate_time_ms + | ||
| 886 | jiffies_to_msecs(g->pstats.last_rail_ungate_start - | ||
| 887 | g->pstats.last_rail_gate_complete); | ||
| 888 | |||
| 889 | g->pstats.railgating_cycle_count++; | ||
| 890 | #endif | ||
| 891 | |||
| 892 | trace_gk20a_pm_unrailgate(dev_name(dev)); | ||
| 893 | |||
| 894 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
| 895 | ret = platform->unrailgate(dev); | ||
| 896 | nvgpu_mutex_release(&platform->railgate_lock); | ||
| 897 | |||
| 898 | #ifdef CONFIG_DEBUG_FS | ||
| 899 | g->pstats.last_rail_ungate_complete = jiffies; | ||
| 900 | #endif | ||
| 901 | |||
| 902 | return ret; | ||
| 903 | } | ||
| 904 | |||
| 905 | /* | ||
| 906 | * Remove association of the driver with OS interrupt handler | ||
| 907 | */ | ||
| 908 | void nvgpu_free_irq(struct gk20a *g) | ||
| 909 | { | ||
| 910 | struct device *dev = dev_from_gk20a(g); | ||
| 911 | |||
| 912 | devm_free_irq(dev, g->irq_stall, g); | ||
| 913 | if (g->irq_stall != g->irq_nonstall) | ||
| 914 | devm_free_irq(dev, g->irq_nonstall, g); | ||
| 915 | } | ||
| 916 | |||
| 917 | /* | ||
| 918 | * Idle the GPU in preparation of shutdown/remove. | ||
| 919 | * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW | ||
| 920 | * state to prevent further activity on the driver SW side. | ||
| 921 | * On driver removal quiesce() should be called after start_unload() | ||
| 922 | */ | ||
| 923 | int nvgpu_quiesce(struct gk20a *g) | ||
| 924 | { | ||
| 925 | int err; | ||
| 926 | struct device *dev = dev_from_gk20a(g); | ||
| 927 | |||
| 928 | if (g->power_on) { | ||
| 929 | err = gk20a_wait_for_idle(g); | ||
| 930 | if (err) { | ||
| 931 | nvgpu_err(g, "failed to idle GPU, err=%d", err); | ||
| 932 | return err; | ||
| 933 | } | ||
| 934 | |||
| 935 | err = gk20a_fifo_disable_all_engine_activity(g, true); | ||
| 936 | if (err) { | ||
| 937 | nvgpu_err(g, | ||
| 938 | "failed to disable engine activity, err=%d", | ||
| 939 | err); | ||
| 940 | return err; | ||
| 941 | } | ||
| 942 | |||
| 943 | err = gk20a_fifo_wait_engine_idle(g); | ||
| 944 | if (err) { | ||
| 945 | nvgpu_err(g, "failed to idle engines, err=%d", | ||
| 946 | err); | ||
| 947 | return err; | ||
| 948 | } | ||
| 949 | } | ||
| 950 | |||
| 951 | if (gk20a_gpu_is_virtual(dev)) | ||
| 952 | err = vgpu_pm_prepare_poweroff(dev); | ||
| 953 | else | ||
| 954 | err = gk20a_pm_prepare_poweroff(dev); | ||
| 955 | |||
| 956 | if (err) | ||
| 957 | nvgpu_err(g, "failed to prepare for poweroff, err=%d", | ||
| 958 | err); | ||
| 959 | |||
| 960 | return err; | ||
| 961 | } | ||
| 962 | |||
| 963 | static void gk20a_pm_shutdown(struct platform_device *pdev) | ||
| 964 | { | ||
| 965 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
| 966 | struct gk20a *g = platform->g; | ||
| 967 | int err; | ||
| 968 | |||
| 969 | nvgpu_info(g, "shutting down"); | ||
| 970 | |||
| 971 | /* vgpu has nothing to clean up currently */ | ||
| 972 | if (gk20a_gpu_is_virtual(&pdev->dev)) | ||
| 973 | return; | ||
| 974 | |||
| 975 | if (!g->power_on) | ||
| 976 | goto finish; | ||
| 977 | |||
| 978 | gk20a_driver_start_unload(g); | ||
| 979 | |||
| 980 | /* If GPU is already railgated, | ||
| 981 | * just prevent more requests, and return */ | ||
| 982 | if (platform->is_railgated && platform->is_railgated(&pdev->dev)) { | ||
| 983 | __pm_runtime_disable(&pdev->dev, false); | ||
| 984 | nvgpu_info(g, "already railgated, shut down complete"); | ||
| 985 | return; | ||
| 986 | } | ||
| 987 | |||
| 988 | /* Prevent more requests by disabling Runtime PM */ | ||
| 989 | __pm_runtime_disable(&pdev->dev, false); | ||
| 990 | |||
| 991 | err = nvgpu_quiesce(g); | ||
| 992 | if (err) | ||
| 993 | goto finish; | ||
| 994 | |||
| 995 | err = gk20a_pm_railgate(&pdev->dev); | ||
| 996 | if (err) | ||
| 997 | nvgpu_err(g, "failed to railgate, err=%d", err); | ||
| 998 | |||
| 999 | finish: | ||
| 1000 | nvgpu_info(g, "shut down complete"); | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | #ifdef CONFIG_PM | ||
| 1004 | static int gk20a_pm_runtime_resume(struct device *dev) | ||
| 1005 | { | ||
| 1006 | int err = 0; | ||
| 1007 | |||
| 1008 | err = gk20a_pm_unrailgate(dev); | ||
| 1009 | if (err) | ||
| 1010 | goto fail; | ||
| 1011 | |||
| 1012 | if (gk20a_gpu_is_virtual(dev)) | ||
| 1013 | err = vgpu_pm_finalize_poweron(dev); | ||
| 1014 | else | ||
| 1015 | err = gk20a_pm_finalize_poweron(dev); | ||
| 1016 | if (err) | ||
| 1017 | goto fail_poweron; | ||
| 1018 | |||
| 1019 | return 0; | ||
| 1020 | |||
| 1021 | fail_poweron: | ||
| 1022 | gk20a_pm_railgate(dev); | ||
| 1023 | fail: | ||
| 1024 | return err; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | static int gk20a_pm_runtime_suspend(struct device *dev) | ||
| 1028 | { | ||
| 1029 | int err = 0; | ||
| 1030 | struct gk20a *g = get_gk20a(dev); | ||
| 1031 | |||
| 1032 | if (!g) | ||
| 1033 | return 0; | ||
| 1034 | |||
| 1035 | if (gk20a_gpu_is_virtual(dev)) | ||
| 1036 | err = vgpu_pm_prepare_poweroff(dev); | ||
| 1037 | else | ||
| 1038 | err = gk20a_pm_prepare_poweroff(dev); | ||
| 1039 | if (err) { | ||
| 1040 | nvgpu_err(g, "failed to power off, err=%d", err); | ||
| 1041 | goto fail; | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | err = gk20a_pm_railgate(dev); | ||
| 1045 | if (err) | ||
| 1046 | goto fail; | ||
| 1047 | |||
| 1048 | return 0; | ||
| 1049 | |||
| 1050 | fail: | ||
| 1051 | gk20a_pm_finalize_poweron(dev); | ||
| 1052 | pm_runtime_mark_last_busy(dev); | ||
| 1053 | return err; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | static int gk20a_pm_suspend(struct device *dev) | ||
| 1057 | { | ||
| 1058 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 1059 | struct gk20a *g = get_gk20a(dev); | ||
| 1060 | int ret = 0; | ||
| 1061 | int usage_count; | ||
| 1062 | struct nvgpu_timeout timeout; | ||
| 1063 | |||
| 1064 | if (!g->power_on) { | ||
| 1065 | if (platform->suspend) | ||
| 1066 | ret = platform->suspend(dev); | ||
| 1067 | |||
| 1068 | if (ret) | ||
| 1069 | return ret; | ||
| 1070 | |||
| 1071 | if (!pm_runtime_enabled(dev)) | ||
| 1072 | ret = gk20a_pm_railgate(dev); | ||
| 1073 | |||
| 1074 | return ret; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
| 1078 | NVGPU_TIMER_CPU_TIMER); | ||
| 1079 | /* | ||
| 1080 | * Hold back deterministic submits and changes to deterministic | ||
| 1081 | * channels - this must be outside the power busy locks. | ||
| 1082 | */ | ||
| 1083 | gk20a_channel_deterministic_idle(g); | ||
| 1084 | |||
| 1085 | /* check and wait until GPU is idle (with a timeout) */ | ||
| 1086 | do { | ||
| 1087 | nvgpu_usleep_range(1000, 1100); | ||
| 1088 | usage_count = nvgpu_atomic_read(&g->usage_count); | ||
| 1089 | } while (usage_count != 0 && !nvgpu_timeout_expired(&timeout)); | ||
| 1090 | |||
| 1091 | if (usage_count != 0) { | ||
| 1092 | nvgpu_err(g, "failed to idle - usage_count %d", usage_count); | ||
| 1093 | ret = -EINVAL; | ||
| 1094 | goto fail_idle; | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | ret = gk20a_pm_runtime_suspend(dev); | ||
| 1098 | if (ret) | ||
| 1099 | goto fail_idle; | ||
| 1100 | |||
| 1101 | if (platform->suspend) | ||
| 1102 | ret = platform->suspend(dev); | ||
| 1103 | if (ret) | ||
| 1104 | goto fail_suspend; | ||
| 1105 | |||
| 1106 | g->suspended = true; | ||
| 1107 | |||
| 1108 | return 0; | ||
| 1109 | |||
| 1110 | fail_suspend: | ||
| 1111 | gk20a_pm_runtime_resume(dev); | ||
| 1112 | fail_idle: | ||
| 1113 | gk20a_channel_deterministic_unidle(g); | ||
| 1114 | return ret; | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | static int gk20a_pm_resume(struct device *dev) | ||
| 1118 | { | ||
| 1119 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 1120 | struct gk20a *g = get_gk20a(dev); | ||
| 1121 | int ret = 0; | ||
| 1122 | |||
| 1123 | if (!g->suspended) { | ||
| 1124 | if (platform->resume) | ||
| 1125 | ret = platform->resume(dev); | ||
| 1126 | if (ret) | ||
| 1127 | return ret; | ||
| 1128 | |||
| 1129 | if (!pm_runtime_enabled(dev)) | ||
| 1130 | ret = gk20a_pm_unrailgate(dev); | ||
| 1131 | |||
| 1132 | return ret; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | if (platform->resume) | ||
| 1136 | ret = platform->resume(dev); | ||
| 1137 | if (ret) | ||
| 1138 | return ret; | ||
| 1139 | |||
| 1140 | ret = gk20a_pm_runtime_resume(dev); | ||
| 1141 | if (ret) | ||
| 1142 | return ret; | ||
| 1143 | |||
| 1144 | g->suspended = false; | ||
| 1145 | |||
| 1146 | gk20a_channel_deterministic_unidle(g); | ||
| 1147 | |||
| 1148 | return ret; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | static const struct dev_pm_ops gk20a_pm_ops = { | ||
| 1152 | .runtime_resume = gk20a_pm_runtime_resume, | ||
| 1153 | .runtime_suspend = gk20a_pm_runtime_suspend, | ||
| 1154 | .resume = gk20a_pm_resume, | ||
| 1155 | .suspend = gk20a_pm_suspend, | ||
| 1156 | }; | ||
| 1157 | #endif | ||
| 1158 | |||
| 1159 | static int gk20a_pm_init(struct device *dev) | ||
| 1160 | { | ||
| 1161 | struct gk20a *g = get_gk20a(dev); | ||
| 1162 | int err = 0; | ||
| 1163 | |||
| 1164 | nvgpu_log_fn(g, " "); | ||
| 1165 | |||
| 1166 | /* | ||
| 1167 | * Initialise pm runtime. For railgate disable | ||
| 1168 | * case, set autosuspend delay to negative which | ||
| 1169 | * will suspend runtime pm | ||
| 1170 | */ | ||
| 1171 | if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) | ||
| 1172 | pm_runtime_set_autosuspend_delay(dev, | ||
| 1173 | g->railgate_delay); | ||
| 1174 | else | ||
| 1175 | pm_runtime_set_autosuspend_delay(dev, -1); | ||
| 1176 | |||
| 1177 | pm_runtime_use_autosuspend(dev); | ||
| 1178 | pm_runtime_enable(dev); | ||
| 1179 | |||
| 1180 | return err; | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | static int gk20a_pm_deinit(struct device *dev) | ||
| 1184 | { | ||
| 1185 | pm_runtime_dont_use_autosuspend(dev); | ||
| 1186 | pm_runtime_disable(dev); | ||
| 1187 | return 0; | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | /* | ||
| 1191 | * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING. | ||
| 1192 | */ | ||
| 1193 | void gk20a_driver_start_unload(struct gk20a *g) | ||
| 1194 | { | ||
| 1195 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 1196 | |||
| 1197 | nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n"); | ||
| 1198 | |||
| 1199 | down_write(&l->busy_lock); | ||
| 1200 | __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); | ||
| 1201 | /* GR SW ready needs to be invalidated at this time with the busy lock | ||
| 1202 | * held to prevent a racing condition on the gr/mm code */ | ||
| 1203 | g->gr.sw_ready = false; | ||
| 1204 | g->sw_ready = false; | ||
| 1205 | up_write(&l->busy_lock); | ||
| 1206 | |||
| 1207 | if (g->is_virtual) | ||
| 1208 | return; | ||
| 1209 | |||
| 1210 | gk20a_wait_for_idle(g); | ||
| 1211 | |||
| 1212 | nvgpu_wait_for_deferred_interrupts(g); | ||
| 1213 | |||
| 1214 | if (l->nonstall_work_queue) { | ||
| 1215 | cancel_work_sync(&l->nonstall_fn_work); | ||
| 1216 | destroy_workqueue(l->nonstall_work_queue); | ||
| 1217 | l->nonstall_work_queue = NULL; | ||
| 1218 | } | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a) | ||
| 1222 | { | ||
| 1223 | gk20a_get_platform(&pdev->dev)->g = gk20a; | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | static int nvgpu_read_fuse_overrides(struct gk20a *g) | ||
| 1227 | { | ||
| 1228 | struct device_node *np = nvgpu_get_node(g); | ||
| 1229 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
| 1230 | u32 *fuses; | ||
| 1231 | int count, i; | ||
| 1232 | |||
| 1233 | if (!np) /* may be pcie device */ | ||
| 1234 | return 0; | ||
| 1235 | |||
| 1236 | count = of_property_count_elems_of_size(np, "fuse-overrides", 8); | ||
| 1237 | if (count <= 0) | ||
| 1238 | return count; | ||
| 1239 | |||
| 1240 | fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2); | ||
| 1241 | if (!fuses) | ||
| 1242 | return -ENOMEM; | ||
| 1243 | of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2); | ||
| 1244 | for (i = 0; i < count; i++) { | ||
| 1245 | u32 fuse, value; | ||
| 1246 | |||
| 1247 | fuse = fuses[2 * i]; | ||
| 1248 | value = fuses[2 * i + 1]; | ||
| 1249 | switch (fuse) { | ||
| 1250 | case GM20B_FUSE_OPT_TPC_DISABLE: | ||
| 1251 | g->tpc_fs_mask_user = ~value; | ||
| 1252 | break; | ||
| 1253 | case GP10B_FUSE_OPT_ECC_EN: | ||
| 1254 | g->gr.fecs_feature_override_ecc_val = value; | ||
| 1255 | break; | ||
| 1256 | case GV11B_FUSE_OPT_TPC_DISABLE: | ||
| 1257 | if (platform->set_tpc_pg_mask != NULL) | ||
| 1258 | platform->set_tpc_pg_mask(dev_from_gk20a(g), | ||
| 1259 | value); | ||
| 1260 | break; | ||
| 1261 | default: | ||
| 1262 | nvgpu_err(g, "ignore unknown fuse override %08x", fuse); | ||
| 1263 | break; | ||
| 1264 | } | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | nvgpu_kfree(g, fuses); | ||
| 1268 | |||
| 1269 | return 0; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | static int gk20a_probe(struct platform_device *dev) | ||
| 1273 | { | ||
| 1274 | struct nvgpu_os_linux *l = NULL; | ||
| 1275 | struct gk20a *gk20a; | ||
| 1276 | int err; | ||
| 1277 | struct gk20a_platform *platform = NULL; | ||
| 1278 | struct device_node *np; | ||
| 1279 | |||
| 1280 | if (dev->dev.of_node) { | ||
| 1281 | const struct of_device_id *match; | ||
| 1282 | |||
| 1283 | match = of_match_device(tegra_gk20a_of_match, &dev->dev); | ||
| 1284 | if (match) | ||
| 1285 | platform = (struct gk20a_platform *)match->data; | ||
| 1286 | } else | ||
| 1287 | platform = (struct gk20a_platform *)dev->dev.platform_data; | ||
| 1288 | |||
| 1289 | if (!platform) { | ||
| 1290 | dev_err(&dev->dev, "no platform data\n"); | ||
| 1291 | return -ENODATA; | ||
| 1292 | } | ||
| 1293 | |||
| 1294 | platform_set_drvdata(dev, platform); | ||
| 1295 | |||
| 1296 | if (gk20a_gpu_is_virtual(&dev->dev)) | ||
| 1297 | return vgpu_probe(dev); | ||
| 1298 | |||
| 1299 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
| 1300 | if (!l) { | ||
| 1301 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | ||
| 1302 | return -ENOMEM; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | hash_init(l->ecc_sysfs_stats_htable); | ||
| 1306 | |||
| 1307 | gk20a = &l->g; | ||
| 1308 | |||
| 1309 | nvgpu_log_fn(gk20a, " "); | ||
| 1310 | |||
| 1311 | nvgpu_init_gk20a(gk20a); | ||
| 1312 | set_gk20a(dev, gk20a); | ||
| 1313 | l->dev = &dev->dev; | ||
| 1314 | gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK; | ||
| 1315 | |||
| 1316 | nvgpu_kmem_init(gk20a); | ||
| 1317 | |||
| 1318 | err = nvgpu_init_enabled_flags(gk20a); | ||
| 1319 | if (err) | ||
| 1320 | goto return_err; | ||
| 1321 | |||
| 1322 | np = nvgpu_get_node(gk20a); | ||
| 1323 | if (of_dma_is_coherent(np)) { | ||
| 1324 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
| 1325 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | if (nvgpu_platform_is_simulation(gk20a)) | ||
| 1329 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | ||
| 1330 | |||
| 1331 | gk20a->irq_stall = platform_get_irq(dev, 0); | ||
| 1332 | gk20a->irq_nonstall = platform_get_irq(dev, 1); | ||
| 1333 | if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) { | ||
| 1334 | err = -ENXIO; | ||
| 1335 | goto return_err; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | err = devm_request_threaded_irq(&dev->dev, | ||
| 1339 | gk20a->irq_stall, | ||
| 1340 | gk20a_intr_isr_stall, | ||
| 1341 | gk20a_intr_thread_stall, | ||
| 1342 | 0, "gk20a_stall", gk20a); | ||
| 1343 | if (err) { | ||
| 1344 | dev_err(&dev->dev, | ||
| 1345 | "failed to request stall intr irq @ %d\n", | ||
| 1346 | gk20a->irq_stall); | ||
| 1347 | goto return_err; | ||
| 1348 | } | ||
| 1349 | err = devm_request_irq(&dev->dev, | ||
| 1350 | gk20a->irq_nonstall, | ||
| 1351 | gk20a_intr_isr_nonstall, | ||
| 1352 | 0, "gk20a_nonstall", gk20a); | ||
| 1353 | if (err) { | ||
| 1354 | dev_err(&dev->dev, | ||
| 1355 | "failed to request non-stall intr irq @ %d\n", | ||
| 1356 | gk20a->irq_nonstall); | ||
| 1357 | goto return_err; | ||
| 1358 | } | ||
| 1359 | disable_irq(gk20a->irq_stall); | ||
| 1360 | if (gk20a->irq_stall != gk20a->irq_nonstall) | ||
| 1361 | disable_irq(gk20a->irq_nonstall); | ||
| 1362 | |||
| 1363 | err = gk20a_init_support(dev); | ||
| 1364 | if (err) | ||
| 1365 | goto return_err; | ||
| 1366 | |||
| 1367 | err = nvgpu_read_fuse_overrides(gk20a); | ||
| 1368 | |||
| 1369 | #ifdef CONFIG_RESET_CONTROLLER | ||
| 1370 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); | ||
| 1371 | if (IS_ERR(platform->reset_control)) | ||
| 1372 | platform->reset_control = NULL; | ||
| 1373 | #endif | ||
| 1374 | |||
| 1375 | err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class); | ||
| 1376 | if (err) | ||
| 1377 | goto return_err; | ||
| 1378 | |||
| 1379 | err = gk20a_pm_init(&dev->dev); | ||
| 1380 | if (err) { | ||
| 1381 | dev_err(&dev->dev, "pm init failed"); | ||
| 1382 | goto return_err; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | gk20a->nvgpu_reboot_nb.notifier_call = | ||
| 1386 | nvgpu_kernel_shutdown_notification; | ||
| 1387 | err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb); | ||
| 1388 | if (err) | ||
| 1389 | goto return_err; | ||
| 1390 | |||
| 1391 | return 0; | ||
| 1392 | |||
| 1393 | return_err: | ||
| 1394 | nvgpu_free_enabled_flags(gk20a); | ||
| 1395 | |||
| 1396 | /* | ||
| 1397 | * Last since the above allocs may use data structures in here. | ||
| 1398 | */ | ||
| 1399 | nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
| 1400 | |||
| 1401 | kfree(l); | ||
| 1402 | |||
| 1403 | return err; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | int nvgpu_remove(struct device *dev, struct class *class) | ||
| 1407 | { | ||
| 1408 | struct gk20a *g = get_gk20a(dev); | ||
| 1409 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 1410 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 1411 | #endif | ||
| 1412 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 1413 | int err; | ||
| 1414 | |||
| 1415 | nvgpu_log_fn(g, " "); | ||
| 1416 | |||
| 1417 | err = nvgpu_quiesce(g); | ||
| 1418 | WARN(err, "gpu failed to idle during driver removal"); | ||
| 1419 | |||
| 1420 | if (nvgpu_mem_is_valid(&g->syncpt_mem)) | ||
| 1421 | nvgpu_dma_free(g, &g->syncpt_mem); | ||
| 1422 | |||
| 1423 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 1424 | if (platform->has_cde) | ||
| 1425 | gk20a_cde_destroy(l); | ||
| 1426 | #endif | ||
| 1427 | |||
| 1428 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 1429 | gk20a_ctxsw_trace_cleanup(g); | ||
| 1430 | #endif | ||
| 1431 | |||
| 1432 | gk20a_sched_ctrl_cleanup(g); | ||
| 1433 | |||
| 1434 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
| 1435 | gk20a_scale_exit(dev); | ||
| 1436 | |||
| 1437 | nvgpu_clk_arb_cleanup_arbiter(g); | ||
| 1438 | |||
| 1439 | gk20a_user_deinit(dev, class); | ||
| 1440 | |||
| 1441 | gk20a_debug_deinit(g); | ||
| 1442 | |||
| 1443 | nvgpu_remove_sysfs(dev); | ||
| 1444 | |||
| 1445 | if (platform->secure_buffer.destroy) | ||
| 1446 | platform->secure_buffer.destroy(g, | ||
| 1447 | &platform->secure_buffer); | ||
| 1448 | |||
| 1449 | if (platform->remove) | ||
| 1450 | platform->remove(dev); | ||
| 1451 | |||
| 1452 | nvgpu_mutex_destroy(&g->clk_arb_enable_lock); | ||
| 1453 | |||
| 1454 | nvgpu_log_fn(g, "removed"); | ||
| 1455 | |||
| 1456 | return err; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | static int __exit gk20a_remove(struct platform_device *pdev) | ||
| 1460 | { | ||
| 1461 | int err; | ||
| 1462 | struct device *dev = &pdev->dev; | ||
| 1463 | struct gk20a *g = get_gk20a(dev); | ||
| 1464 | |||
| 1465 | if (gk20a_gpu_is_virtual(dev)) | ||
| 1466 | return vgpu_remove(pdev); | ||
| 1467 | |||
| 1468 | err = nvgpu_remove(dev, &nvgpu_class); | ||
| 1469 | |||
| 1470 | unregister_reboot_notifier(&g->nvgpu_reboot_nb); | ||
| 1471 | |||
| 1472 | set_gk20a(pdev, NULL); | ||
| 1473 | |||
| 1474 | gk20a_put(g); | ||
| 1475 | |||
| 1476 | gk20a_pm_deinit(dev); | ||
| 1477 | |||
| 1478 | return err; | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | static struct platform_driver gk20a_driver = { | ||
| 1482 | .probe = gk20a_probe, | ||
| 1483 | .remove = __exit_p(gk20a_remove), | ||
| 1484 | .shutdown = gk20a_pm_shutdown, | ||
| 1485 | .driver = { | ||
| 1486 | .owner = THIS_MODULE, | ||
| 1487 | .name = "gk20a", | ||
| 1488 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, | ||
| 1489 | #ifdef CONFIG_OF | ||
| 1490 | .of_match_table = tegra_gk20a_of_match, | ||
| 1491 | #endif | ||
| 1492 | #ifdef CONFIG_PM | ||
| 1493 | .pm = &gk20a_pm_ops, | ||
| 1494 | #endif | ||
| 1495 | .suppress_bind_attrs = true, | ||
| 1496 | } | ||
| 1497 | }; | ||
| 1498 | |||
| 1499 | struct class nvgpu_class = { | ||
| 1500 | .owner = THIS_MODULE, | ||
| 1501 | .name = CLASS_NAME, | ||
| 1502 | }; | ||
| 1503 | |||
| 1504 | static int __init gk20a_init(void) | ||
| 1505 | { | ||
| 1506 | |||
| 1507 | int ret; | ||
| 1508 | |||
| 1509 | ret = class_register(&nvgpu_class); | ||
| 1510 | if (ret) | ||
| 1511 | return ret; | ||
| 1512 | |||
| 1513 | ret = nvgpu_pci_init(); | ||
| 1514 | if (ret) | ||
| 1515 | return ret; | ||
| 1516 | |||
| 1517 | return platform_driver_register(&gk20a_driver); | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | static void __exit gk20a_exit(void) | ||
| 1521 | { | ||
| 1522 | nvgpu_pci_exit(); | ||
| 1523 | platform_driver_unregister(&gk20a_driver); | ||
| 1524 | class_unregister(&nvgpu_class); | ||
| 1525 | } | ||
| 1526 | |||
| 1527 | MODULE_LICENSE("GPL v2"); | ||
| 1528 | module_init(gk20a_init); | ||
| 1529 | module_exit(gk20a_exit); | ||
diff --git a/include/os/linux/module.h b/include/os/linux/module.h new file mode 100644 index 0000000..76c7274 --- /dev/null +++ b/include/os/linux/module.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | #ifndef __NVGPU_COMMON_LINUX_MODULE_H__ | ||
| 14 | #define __NVGPU_COMMON_LINUX_MODULE_H__ | ||
| 15 | |||
| 16 | struct gk20a; | ||
| 17 | struct device; | ||
| 18 | struct nvgpu_os_linux; | ||
| 19 | |||
| 20 | int gk20a_pm_finalize_poweron(struct device *dev); | ||
| 21 | int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l); | ||
| 22 | void gk20a_remove_support(struct gk20a *g); | ||
| 23 | void gk20a_driver_start_unload(struct gk20a *g); | ||
| 24 | int nvgpu_quiesce(struct gk20a *g); | ||
| 25 | int nvgpu_remove(struct device *dev, struct class *class); | ||
| 26 | void nvgpu_free_irq(struct gk20a *g); | ||
| 27 | struct device_node *nvgpu_get_node(struct gk20a *g); | ||
| 28 | void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i, | ||
| 29 | struct resource **out); | ||
| 30 | void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset, | ||
| 31 | resource_size_t size); | ||
| 32 | u64 nvgpu_resource_addr(struct platform_device *dev, int i); | ||
| 33 | extern struct class nvgpu_class; | ||
| 34 | |||
| 35 | #endif | ||
diff --git a/include/os/linux/module_usermode.c b/include/os/linux/module_usermode.c new file mode 100644 index 0000000..ea01c1b --- /dev/null +++ b/include/os/linux/module_usermode.c | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/types.h> | ||
| 18 | |||
| 19 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
| 20 | |||
| 21 | #include "os_linux.h" | ||
| 22 | |||
| 23 | /* | ||
| 24 | * Locks out the driver from accessing GPU registers. This prevents access to | ||
| 25 | * thse registers after the GPU has been clock or power gated. This should help | ||
| 26 | * find annoying bugs where register reads and writes are silently dropped | ||
| 27 | * after the GPU has been turned off. On older chips these reads and writes can | ||
| 28 | * also lock the entire CPU up. | ||
| 29 | */ | ||
| 30 | void nvgpu_lockout_usermode_registers(struct gk20a *g) | ||
| 31 | { | ||
| 32 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 33 | |||
| 34 | l->usermode_regs = NULL; | ||
| 35 | } | ||
| 36 | |||
| 37 | /* | ||
| 38 | * Undoes t19x_lockout_registers(). | ||
| 39 | */ | ||
| 40 | void nvgpu_restore_usermode_registers(struct gk20a *g) | ||
| 41 | { | ||
| 42 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 43 | |||
| 44 | l->usermode_regs = l->usermode_regs_saved; | ||
| 45 | } | ||
| 46 | |||
| 47 | void nvgpu_remove_usermode_support(struct gk20a *g) | ||
| 48 | { | ||
| 49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 50 | |||
| 51 | if (l->usermode_regs) { | ||
| 52 | l->usermode_regs = NULL; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void nvgpu_init_usermode_support(struct gk20a *g) | ||
| 57 | { | ||
| 58 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 59 | |||
| 60 | l->usermode_regs = l->regs + usermode_cfg0_r(); | ||
| 61 | l->usermode_regs_saved = l->usermode_regs; | ||
| 62 | } | ||
diff --git a/include/os/linux/module_usermode.h b/include/os/linux/module_usermode.h new file mode 100644 index 0000000..b17053c --- /dev/null +++ b/include/os/linux/module_usermode.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __NVGPU_MODULE_T19X_H__ | ||
| 18 | #define __NVGPU_MODULE_T19X_H__ | ||
| 19 | |||
| 20 | struct gk20a; | ||
| 21 | |||
| 22 | void nvgpu_init_usermode_support(struct gk20a *g); | ||
| 23 | void nvgpu_remove_usermode_support(struct gk20a *g); | ||
| 24 | void nvgpu_lockout_usermode_registers(struct gk20a *g); | ||
| 25 | void nvgpu_restore_usermode_registers(struct gk20a *g); | ||
| 26 | |||
| 27 | #endif | ||
diff --git a/include/os/linux/nvgpu_mem.c b/include/os/linux/nvgpu_mem.c new file mode 100644 index 0000000..d6a3189 --- /dev/null +++ b/include/os/linux/nvgpu_mem.c | |||
| @@ -0,0 +1,348 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/dma.h> | ||
| 18 | #include <nvgpu/gmmu.h> | ||
| 19 | #include <nvgpu/nvgpu_mem.h> | ||
| 20 | #include <nvgpu/page_allocator.h> | ||
| 21 | #include <nvgpu/log.h> | ||
| 22 | #include <nvgpu/bug.h> | ||
| 23 | #include <nvgpu/enabled.h> | ||
| 24 | #include <nvgpu/kmem.h> | ||
| 25 | #include <nvgpu/vidmem.h> | ||
| 26 | #include <nvgpu/gk20a.h> | ||
| 27 | |||
| 28 | #include <nvgpu/linux/dma.h> | ||
| 29 | |||
| 30 | #include <linux/vmalloc.h> | ||
| 31 | #include <linux/dma-mapping.h> | ||
| 32 | |||
| 33 | #include "os_linux.h" | ||
| 34 | #include "dmabuf_vidmem.h" | ||
| 35 | |||
| 36 | #include "gk20a/mm_gk20a.h" | ||
| 37 | #include "platform_gk20a.h" | ||
| 38 | |||
| 39 | static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
| 40 | { | ||
| 41 | struct device *dev = dev_from_gk20a(g); | ||
| 42 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 43 | u64 ipa = sg_phys((struct scatterlist *)sgl); | ||
| 44 | |||
| 45 | if (platform->phys_addr) | ||
| 46 | return platform->phys_addr(g, ipa); | ||
| 47 | |||
| 48 | return ipa; | ||
| 49 | } | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Obtain a SYSMEM address from a Linux SGL. This should eventually go away | ||
| 53 | * and/or become private to this file once all bad usages of Linux SGLs are | ||
| 54 | * cleaned up in the driver. | ||
| 55 | */ | ||
| 56 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | ||
| 57 | { | ||
| 58 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || | ||
| 59 | !nvgpu_iommuable(g)) | ||
| 60 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
| 61 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
| 62 | |||
| 63 | if (sg_dma_address(sgl) == 0) | ||
| 64 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
| 65 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
| 66 | |||
| 67 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
| 68 | return 0; | ||
| 69 | |||
| 70 | return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); | ||
| 71 | } | ||
| 72 | |||
| 73 | /* | ||
| 74 | * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM | ||
| 75 | * allocation. | ||
| 76 | */ | ||
| 77 | static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) | ||
| 78 | { | ||
| 79 | return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); | ||
| 80 | } | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM | ||
| 84 | * allocation. | ||
| 85 | * | ||
| 86 | * Note: this API does not make sense to use for _VIDMEM_ buffers with greater | ||
| 87 | * than one scatterlist chunk. If there's more than one scatterlist chunk then | ||
| 88 | * the buffer will not be contiguous. As such the base address probably isn't | ||
| 89 | * very useful. This is true for SYSMEM as well, if there's no IOMMU. | ||
| 90 | * | ||
| 91 | * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's | ||
| 92 | * an IOMMU present and enabled for the GPU. | ||
| 93 | * | ||
| 94 | * %attrs can be NULL. If it is not NULL then it may be inspected to determine | ||
| 95 | * if the address needs to be modified before writing into a PTE. | ||
| 96 | */ | ||
| 97 | u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
| 98 | { | ||
| 99 | struct nvgpu_page_alloc *alloc; | ||
| 100 | |||
| 101 | if (mem->aperture == APERTURE_SYSMEM) | ||
| 102 | return nvgpu_mem_get_addr_sysmem(g, mem); | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Otherwise get the vidmem address. | ||
| 106 | */ | ||
| 107 | alloc = mem->vidmem_alloc; | ||
| 108 | |||
| 109 | /* This API should not be used with > 1 chunks */ | ||
| 110 | WARN_ON(alloc->nr_chunks != 1); | ||
| 111 | |||
| 112 | return alloc->base; | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * This should only be used on contiguous buffers regardless of whether | ||
| 117 | * there's an IOMMU present/enabled. This applies to both SYSMEM and | ||
| 118 | * VIDMEM. | ||
| 119 | */ | ||
| 120 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
| 121 | { | ||
| 122 | /* | ||
| 123 | * For a VIDMEM buf, this is identical to simply get_addr() so just fall | ||
| 124 | * back to that. | ||
| 125 | */ | ||
| 126 | if (mem->aperture == APERTURE_VIDMEM) | ||
| 127 | return nvgpu_mem_get_addr(g, mem); | ||
| 128 | |||
| 129 | return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl); | ||
| 130 | } | ||
| 131 | |||
| 132 | /* | ||
| 133 | * Be careful how you use this! You are responsible for correctly freeing this | ||
| 134 | * memory. | ||
| 135 | */ | ||
| 136 | int nvgpu_mem_create_from_mem(struct gk20a *g, | ||
| 137 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | ||
| 138 | u64 start_page, int nr_pages) | ||
| 139 | { | ||
| 140 | int ret; | ||
| 141 | u64 start = start_page * PAGE_SIZE; | ||
| 142 | u64 size = nr_pages * PAGE_SIZE; | ||
| 143 | dma_addr_t new_iova; | ||
| 144 | |||
| 145 | if (src->aperture != APERTURE_SYSMEM) | ||
| 146 | return -EINVAL; | ||
| 147 | |||
| 148 | /* Some silly things a caller might do... */ | ||
| 149 | if (size > src->size) | ||
| 150 | return -EINVAL; | ||
| 151 | if ((start + size) > src->size) | ||
| 152 | return -EINVAL; | ||
| 153 | |||
| 154 | dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; | ||
| 155 | dest->aperture = src->aperture; | ||
| 156 | dest->skip_wmb = src->skip_wmb; | ||
| 157 | dest->size = size; | ||
| 158 | |||
| 159 | /* | ||
| 160 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | ||
| 161 | * | ||
| 162 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
| 163 | * re-use. | ||
| 164 | */ | ||
| 165 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || | ||
| 166 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
| 167 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | ||
| 168 | |||
| 169 | dest->priv.pages = src->priv.pages + start_page; | ||
| 170 | dest->priv.flags = src->priv.flags; | ||
| 171 | |||
| 172 | new_iova = sg_dma_address(src->priv.sgt->sgl) ? | ||
| 173 | sg_dma_address(src->priv.sgt->sgl) + start : 0; | ||
| 174 | |||
| 175 | /* | ||
| 176 | * Make a new SG table that is based only on the subset of pages that | ||
| 177 | * is passed to us. This table gets freed by the dma free routines. | ||
| 178 | */ | ||
| 179 | if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
| 180 | ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, | ||
| 181 | src->priv.pages + start_page, | ||
| 182 | new_iova, size); | ||
| 183 | else | ||
| 184 | ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, | ||
| 185 | new_iova, size); | ||
| 186 | |||
| 187 | return ret; | ||
| 188 | } | ||
| 189 | |||
| 190 | int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | ||
| 191 | struct page **pages, int nr_pages) | ||
| 192 | { | ||
| 193 | struct sg_table *sgt; | ||
| 194 | struct page **our_pages = | ||
| 195 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
| 196 | |||
| 197 | if (!our_pages) | ||
| 198 | return -ENOMEM; | ||
| 199 | |||
| 200 | memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); | ||
| 201 | |||
| 202 | if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, | ||
| 203 | nr_pages * PAGE_SIZE)) { | ||
| 204 | nvgpu_kfree(g, our_pages); | ||
| 205 | return -ENOMEM; | ||
| 206 | } | ||
| 207 | |||
| 208 | /* | ||
| 209 | * If we are making an SGT from physical pages we can be reasonably | ||
| 210 | * certain that this should bypass the SMMU - thus we set the DMA (aka | ||
| 211 | * IOVA) address to 0. This tells the GMMU mapping code to not make a | ||
| 212 | * mapping directed to the SMMU. | ||
| 213 | */ | ||
| 214 | sg_dma_address(sgt->sgl) = 0; | ||
| 215 | |||
| 216 | dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; | ||
| 217 | dest->aperture = APERTURE_SYSMEM; | ||
| 218 | dest->skip_wmb = 0; | ||
| 219 | dest->size = PAGE_SIZE * nr_pages; | ||
| 220 | |||
| 221 | dest->priv.flags = 0; | ||
| 222 | dest->priv.pages = our_pages; | ||
| 223 | dest->priv.sgt = sgt; | ||
| 224 | |||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | |||
| 228 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 229 | int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, | ||
| 230 | u64 src_phys, int nr_pages) | ||
| 231 | { | ||
| 232 | struct page **pages = | ||
| 233 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
| 234 | int i, ret = 0; | ||
| 235 | |||
| 236 | if (!pages) | ||
| 237 | return -ENOMEM; | ||
| 238 | |||
| 239 | for (i = 0; i < nr_pages; i++) | ||
| 240 | pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); | ||
| 241 | |||
| 242 | ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); | ||
| 243 | nvgpu_kfree(g, pages); | ||
| 244 | |||
| 245 | return ret; | ||
| 246 | } | ||
| 247 | #endif | ||
| 248 | |||
| 249 | static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) | ||
| 250 | { | ||
| 251 | return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); | ||
| 252 | } | ||
| 253 | |||
| 254 | static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
| 255 | { | ||
| 256 | return (u64)__nvgpu_sgl_phys(g, sgl); | ||
| 257 | } | ||
| 258 | |||
| 259 | static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl) | ||
| 260 | { | ||
| 261 | return (u64)sg_dma_address((struct scatterlist *)sgl); | ||
| 262 | } | ||
| 263 | |||
| 264 | static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl) | ||
| 265 | { | ||
| 266 | return (u64)((struct scatterlist *)sgl)->length; | ||
| 267 | } | ||
| 268 | |||
| 269 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, | ||
| 270 | struct nvgpu_sgl *sgl, | ||
| 271 | struct nvgpu_gmmu_attrs *attrs) | ||
| 272 | { | ||
| 273 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
| 274 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
| 275 | __nvgpu_sgl_phys(g, sgl)); | ||
| 276 | |||
| 277 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
| 278 | return 0; | ||
| 279 | |||
| 280 | return nvgpu_mem_iommu_translate(g, | ||
| 281 | sg_dma_address((struct scatterlist *)sgl)); | ||
| 282 | } | ||
| 283 | |||
| 284 | static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, | ||
| 285 | struct nvgpu_sgt *sgt) | ||
| 286 | { | ||
| 287 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) | ||
| 288 | return false; | ||
| 289 | return true; | ||
| 290 | } | ||
| 291 | |||
| 292 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
| 293 | { | ||
| 294 | /* | ||
| 295 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
| 296 | * SGT/SGL needs to be freed separately. | ||
| 297 | */ | ||
| 298 | nvgpu_kfree(g, sgt); | ||
| 299 | } | ||
| 300 | |||
| 301 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
| 302 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
| 303 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
| 304 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
| 305 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
| 306 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
| 307 | .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, | ||
| 308 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
| 309 | }; | ||
| 310 | |||
| 311 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
| 312 | struct gk20a *g, | ||
| 313 | struct scatterlist *linux_sgl) | ||
| 314 | { | ||
| 315 | struct nvgpu_page_alloc *vidmem_alloc; | ||
| 316 | |||
| 317 | vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); | ||
| 318 | if (!vidmem_alloc) | ||
| 319 | return NULL; | ||
| 320 | |||
| 321 | return &vidmem_alloc->sgt; | ||
| 322 | } | ||
| 323 | |||
| 324 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) | ||
| 325 | { | ||
| 326 | struct nvgpu_sgt *nvgpu_sgt; | ||
| 327 | struct scatterlist *linux_sgl = sgt->sgl; | ||
| 328 | |||
| 329 | if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
| 330 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); | ||
| 331 | |||
| 332 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); | ||
| 333 | if (!nvgpu_sgt) | ||
| 334 | return NULL; | ||
| 335 | |||
| 336 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); | ||
| 337 | |||
| 338 | nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl; | ||
| 339 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; | ||
| 340 | |||
| 341 | return nvgpu_sgt; | ||
| 342 | } | ||
| 343 | |||
| 344 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
| 345 | struct nvgpu_mem *mem) | ||
| 346 | { | ||
| 347 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); | ||
| 348 | } | ||
diff --git a/include/os/linux/nvhost.c b/include/os/linux/nvhost.c new file mode 100644 index 0000000..a9341c7 --- /dev/null +++ b/include/os/linux/nvhost.c | |||
| @@ -0,0 +1,295 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/nvhost.h> | ||
| 18 | #include <linux/nvhost_t194.h> | ||
| 19 | #include <uapi/linux/nvhost_ioctl.h> | ||
| 20 | #include <linux/of_platform.h> | ||
| 21 | |||
| 22 | #include <nvgpu/gk20a.h> | ||
| 23 | #include <nvgpu/nvhost.h> | ||
| 24 | #include <nvgpu/enabled.h> | ||
| 25 | |||
| 26 | #include "nvhost_priv.h" | ||
| 27 | |||
| 28 | #include "os_linux.h" | ||
| 29 | #include "module.h" | ||
| 30 | |||
| 31 | int nvgpu_get_nvhost_dev(struct gk20a *g) | ||
| 32 | { | ||
| 33 | struct device_node *np = nvgpu_get_node(g); | ||
| 34 | struct platform_device *host1x_pdev = NULL; | ||
| 35 | const __be32 *host1x_ptr; | ||
| 36 | |||
| 37 | host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); | ||
| 38 | if (host1x_ptr) { | ||
| 39 | struct device_node *host1x_node = | ||
| 40 | of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); | ||
| 41 | |||
| 42 | host1x_pdev = of_find_device_by_node(host1x_node); | ||
| 43 | if (!host1x_pdev) { | ||
| 44 | nvgpu_warn(g, "host1x device not available"); | ||
| 45 | return -EPROBE_DEFER; | ||
| 46 | } | ||
| 47 | |||
| 48 | } else { | ||
| 49 | if (nvgpu_has_syncpoints(g)) { | ||
| 50 | nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support"); | ||
| 51 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
| 52 | } | ||
| 53 | return 0; | ||
| 54 | } | ||
| 55 | |||
| 56 | g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev)); | ||
| 57 | if (!g->nvhost_dev) | ||
| 58 | return -ENOMEM; | ||
| 59 | |||
| 60 | g->nvhost_dev->host1x_pdev = host1x_pdev; | ||
| 61 | |||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | void nvgpu_free_nvhost_dev(struct gk20a *g) | ||
| 66 | { | ||
| 67 | nvgpu_kfree(g, g->nvhost_dev); | ||
| 68 | } | ||
| 69 | |||
| 70 | int nvgpu_nvhost_module_busy_ext( | ||
| 71 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
| 72 | { | ||
| 73 | return nvhost_module_busy_ext(nvhost_dev->host1x_pdev); | ||
| 74 | } | ||
| 75 | |||
| 76 | void nvgpu_nvhost_module_idle_ext( | ||
| 77 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
| 78 | { | ||
| 79 | nvhost_module_idle_ext(nvhost_dev->host1x_pdev); | ||
| 80 | } | ||
| 81 | |||
| 82 | void nvgpu_nvhost_debug_dump_device( | ||
| 83 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
| 84 | { | ||
| 85 | nvhost_debug_dump_device(nvhost_dev->host1x_pdev); | ||
| 86 | } | ||
| 87 | |||
| 88 | const char *nvgpu_nvhost_syncpt_get_name( | ||
| 89 | struct nvgpu_nvhost_dev *nvhost_dev, int id) | ||
| 90 | { | ||
| 91 | return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id); | ||
| 92 | } | ||
| 93 | |||
| 94 | bool nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
| 95 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
| 96 | { | ||
| 97 | return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id); | ||
| 98 | } | ||
| 99 | |||
| 100 | int nvgpu_nvhost_syncpt_is_expired_ext( | ||
| 101 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) | ||
| 102 | { | ||
| 103 | return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev, | ||
| 104 | id, thresh); | ||
| 105 | } | ||
| 106 | |||
| 107 | u32 nvgpu_nvhost_syncpt_incr_max_ext( | ||
| 108 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs) | ||
| 109 | { | ||
| 110 | return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs); | ||
| 111 | } | ||
| 112 | |||
| 113 | int nvgpu_nvhost_intr_register_notifier( | ||
| 114 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, | ||
| 115 | void (*callback)(void *, int), void *private_data) | ||
| 116 | { | ||
| 117 | return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev, | ||
| 118 | id, thresh, | ||
| 119 | callback, private_data); | ||
| 120 | } | ||
| 121 | |||
| 122 | void nvgpu_nvhost_syncpt_set_min_eq_max_ext( | ||
| 123 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
| 124 | { | ||
| 125 | nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id); | ||
| 126 | } | ||
| 127 | |||
| 128 | void nvgpu_nvhost_syncpt_put_ref_ext( | ||
| 129 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
| 130 | { | ||
| 131 | nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id); | ||
| 132 | } | ||
| 133 | |||
| 134 | u32 nvgpu_nvhost_get_syncpt_host_managed( | ||
| 135 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
| 136 | u32 param, const char *syncpt_name) | ||
| 137 | { | ||
| 138 | return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev, | ||
| 139 | param, syncpt_name); | ||
| 140 | } | ||
| 141 | |||
| 142 | u32 nvgpu_nvhost_get_syncpt_client_managed( | ||
| 143 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
| 144 | const char *syncpt_name) | ||
| 145 | { | ||
| 146 | return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev, | ||
| 147 | syncpt_name); | ||
| 148 | } | ||
| 149 | |||
| 150 | int nvgpu_nvhost_syncpt_wait_timeout_ext( | ||
| 151 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, | ||
| 152 | u32 thresh, u32 timeout, u32 *value, struct timespec *ts) | ||
| 153 | { | ||
| 154 | return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev, | ||
| 155 | id, thresh, timeout, value, ts); | ||
| 156 | } | ||
| 157 | |||
| 158 | int nvgpu_nvhost_syncpt_read_ext_check( | ||
| 159 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val) | ||
| 160 | { | ||
| 161 | return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val); | ||
| 162 | } | ||
| 163 | |||
| 164 | u32 nvgpu_nvhost_syncpt_read_maxval( | ||
| 165 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
| 166 | { | ||
| 167 | return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id); | ||
| 168 | } | ||
| 169 | |||
| 170 | void nvgpu_nvhost_syncpt_set_safe_state( | ||
| 171 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
| 172 | { | ||
| 173 | u32 val; | ||
| 174 | |||
| 175 | /* | ||
| 176 | * Add large number of increments to current value | ||
| 177 | * so that all waiters on this syncpoint are released | ||
| 178 | * | ||
| 179 | * We don't expect any case where more than 0x10000 increments | ||
| 180 | * are pending | ||
| 181 | */ | ||
| 182 | val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id); | ||
| 183 | val += 0x10000; | ||
| 184 | |||
| 185 | nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val); | ||
| 186 | nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val); | ||
| 187 | } | ||
| 188 | |||
| 189 | int nvgpu_nvhost_create_symlink(struct gk20a *g) | ||
| 190 | { | ||
| 191 | struct device *dev = dev_from_gk20a(g); | ||
| 192 | int err = 0; | ||
| 193 | |||
| 194 | if (g->nvhost_dev && | ||
| 195 | (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { | ||
| 196 | err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj, | ||
| 197 | &dev->kobj, | ||
| 198 | dev_name(dev)); | ||
| 199 | } | ||
| 200 | |||
| 201 | return err; | ||
| 202 | } | ||
| 203 | |||
| 204 | void nvgpu_nvhost_remove_symlink(struct gk20a *g) | ||
| 205 | { | ||
| 206 | struct device *dev = dev_from_gk20a(g); | ||
| 207 | |||
| 208 | if (g->nvhost_dev && | ||
| 209 | (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { | ||
| 210 | sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj, | ||
| 211 | dev_name(dev)); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | |||
| 215 | #ifdef CONFIG_SYNC | ||
| 216 | u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt) | ||
| 217 | { | ||
| 218 | return nvhost_sync_pt_id(pt); | ||
| 219 | } | ||
| 220 | |||
| 221 | u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt) | ||
| 222 | { | ||
| 223 | return nvhost_sync_pt_thresh(pt); | ||
| 224 | } | ||
| 225 | |||
| 226 | struct sync_fence *nvgpu_nvhost_sync_fdget(int fd) | ||
| 227 | { | ||
| 228 | return nvhost_sync_fdget(fd); | ||
| 229 | } | ||
| 230 | |||
| 231 | int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence) | ||
| 232 | { | ||
| 233 | return nvhost_sync_num_pts(fence); | ||
| 234 | } | ||
| 235 | |||
| 236 | struct sync_fence *nvgpu_nvhost_sync_create_fence( | ||
| 237 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
| 238 | u32 id, u32 thresh, const char *name) | ||
| 239 | { | ||
| 240 | struct nvhost_ctrl_sync_fence_info pt = { | ||
| 241 | .id = id, | ||
| 242 | .thresh = thresh, | ||
| 243 | }; | ||
| 244 | |||
| 245 | return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name); | ||
| 246 | } | ||
| 247 | #endif /* CONFIG_SYNC */ | ||
| 248 | |||
| 249 | #ifdef CONFIG_TEGRA_T19X_GRHOST | ||
| 250 | int nvgpu_nvhost_syncpt_unit_interface_get_aperture( | ||
| 251 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
| 252 | u64 *base, size_t *size) | ||
| 253 | { | ||
| 254 | return nvhost_syncpt_unit_interface_get_aperture( | ||
| 255 | nvhost_dev->host1x_pdev, (phys_addr_t *)base, size); | ||
| 256 | } | ||
| 257 | |||
| 258 | u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id) | ||
| 259 | { | ||
| 260 | return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id); | ||
| 261 | } | ||
| 262 | |||
| 263 | int nvgpu_nvhost_syncpt_init(struct gk20a *g) | ||
| 264 | { | ||
| 265 | int err = 0; | ||
| 266 | |||
| 267 | if (!nvgpu_has_syncpoints(g)) | ||
| 268 | return -ENOSYS; | ||
| 269 | |||
| 270 | err = nvgpu_get_nvhost_dev(g); | ||
| 271 | if (err) { | ||
| 272 | nvgpu_err(g, "host1x device not available"); | ||
| 273 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
| 274 | return -ENOSYS; | ||
| 275 | } | ||
| 276 | |||
| 277 | err = nvgpu_nvhost_syncpt_unit_interface_get_aperture( | ||
| 278 | g->nvhost_dev, | ||
| 279 | &g->syncpt_unit_base, | ||
| 280 | &g->syncpt_unit_size); | ||
| 281 | if (err) { | ||
| 282 | nvgpu_err(g, "Failed to get syncpt interface"); | ||
| 283 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
| 284 | return -ENOSYS; | ||
| 285 | } | ||
| 286 | |||
| 287 | g->syncpt_size = | ||
| 288 | nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); | ||
| 289 | nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", | ||
| 290 | g->syncpt_unit_base, g->syncpt_unit_size, | ||
| 291 | g->syncpt_size); | ||
| 292 | |||
| 293 | return 0; | ||
| 294 | } | ||
| 295 | #endif | ||
diff --git a/include/os/linux/nvhost_priv.h b/include/os/linux/nvhost_priv.h new file mode 100644 index 0000000..c03390a --- /dev/null +++ b/include/os/linux/nvhost_priv.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __NVGPU_NVHOST_PRIV_H__ | ||
| 18 | #define __NVGPU_NVHOST_PRIV_H__ | ||
| 19 | |||
| 20 | struct nvgpu_nvhost_dev { | ||
| 21 | struct platform_device *host1x_pdev; | ||
| 22 | }; | ||
| 23 | |||
| 24 | #endif /* __NVGPU_NVHOST_PRIV_H__ */ | ||
diff --git a/include/os/linux/nvidia_p2p.c b/include/os/linux/nvidia_p2p.c new file mode 100644 index 0000000..87db8c5 --- /dev/null +++ b/include/os/linux/nvidia_p2p.c | |||
| @@ -0,0 +1,299 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 20 | * DEALINGS IN THE SOFTWARE. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/slab.h> | ||
| 24 | #include <linux/nv-p2p.h> | ||
| 25 | |||
| 26 | static void nvidia_p2p_mn_release(struct mmu_notifier *mn, | ||
| 27 | struct mm_struct *mm) | ||
| 28 | { | ||
| 29 | struct nvidia_p2p_page_table *page_table = container_of(mn, | ||
| 30 | struct nvidia_p2p_page_table, | ||
| 31 | mn); | ||
| 32 | |||
| 33 | page_table->free_callback(page_table->data); | ||
| 34 | } | ||
| 35 | |||
| 36 | static void nvidia_p2p_mn_invl_range_start(struct mmu_notifier *mn, | ||
| 37 | struct mm_struct *mm, unsigned long start, unsigned long end) | ||
| 38 | { | ||
| 39 | struct nvidia_p2p_page_table *page_table = container_of(mn, | ||
| 40 | struct nvidia_p2p_page_table, | ||
| 41 | mn); | ||
| 42 | u64 vaddr = 0; | ||
| 43 | u64 size = 0; | ||
| 44 | |||
| 45 | vaddr = page_table->vaddr; | ||
| 46 | size = page_table->size; | ||
| 47 | |||
| 48 | if (vaddr >= start && vaddr <= end) { | ||
| 49 | mmu_notifier_unregister_no_release(&page_table->mn, page_table->mm); | ||
| 50 | page_table->free_callback(page_table->data); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | static struct mmu_notifier_ops nvidia_p2p_mmu_ops = { | ||
| 55 | .release = nvidia_p2p_mn_release, | ||
| 56 | .invalidate_range_start = nvidia_p2p_mn_invl_range_start, | ||
| 57 | }; | ||
| 58 | |||
| 59 | int nvidia_p2p_get_pages(u64 vaddr, u64 size, | ||
| 60 | struct nvidia_p2p_page_table **page_table, | ||
| 61 | void (*free_callback)(void *data), void *data) | ||
| 62 | { | ||
| 63 | int ret = 0; | ||
| 64 | int user_pages = 0; | ||
| 65 | int locked = 0; | ||
| 66 | int nr_pages = size >> PAGE_SHIFT; | ||
| 67 | struct page **pages; | ||
| 68 | |||
| 69 | if (nr_pages <= 0) { | ||
| 70 | return -EINVAL; | ||
| 71 | } | ||
| 72 | |||
| 73 | *page_table = kzalloc(sizeof(**page_table), GFP_KERNEL); | ||
| 74 | if (!*page_table) { | ||
| 75 | return -ENOMEM; | ||
| 76 | } | ||
| 77 | |||
| 78 | pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); | ||
| 79 | if (!pages) { | ||
| 80 | ret = -ENOMEM; | ||
| 81 | goto free_page_table; | ||
| 82 | } | ||
| 83 | down_read(¤t->mm->mmap_sem); | ||
| 84 | locked = 1; | ||
| 85 | user_pages = get_user_pages_locked(vaddr & PAGE_MASK, nr_pages, | ||
| 86 | FOLL_WRITE | FOLL_FORCE, | ||
| 87 | pages, &locked); | ||
| 88 | up_read(¤t->mm->mmap_sem); | ||
| 89 | if (user_pages != nr_pages) { | ||
| 90 | ret = user_pages < 0 ? user_pages : -ENOMEM; | ||
| 91 | goto free_pages; | ||
| 92 | } | ||
| 93 | |||
| 94 | (*page_table)->version = NVIDIA_P2P_PAGE_TABLE_VERSION; | ||
| 95 | (*page_table)->pages = pages; | ||
| 96 | (*page_table)->entries = user_pages; | ||
| 97 | (*page_table)->page_size = NVIDIA_P2P_PAGE_SIZE_4KB; | ||
| 98 | (*page_table)->size = size; | ||
| 99 | |||
| 100 | (*page_table)->mn.ops = &nvidia_p2p_mmu_ops; | ||
| 101 | (*page_table)->mm = current->mm; | ||
| 102 | (*page_table)->free_callback = free_callback; | ||
| 103 | (*page_table)->data = data; | ||
| 104 | (*page_table)->vaddr = vaddr; | ||
| 105 | mutex_init(&(*page_table)->lock); | ||
| 106 | (*page_table)->mapped = NVIDIA_P2P_PINNED; | ||
| 107 | |||
| 108 | ret = mmu_notifier_register(&(*page_table)->mn, (*page_table)->mm); | ||
| 109 | if (ret) { | ||
| 110 | goto free_pages; | ||
| 111 | } | ||
| 112 | |||
| 113 | return 0; | ||
| 114 | free_pages: | ||
| 115 | while (--user_pages >= 0) { | ||
| 116 | put_page(pages[user_pages]); | ||
| 117 | } | ||
| 118 | kfree(pages); | ||
| 119 | free_page_table: | ||
| 120 | kfree(*page_table); | ||
| 121 | *page_table = NULL; | ||
| 122 | return ret; | ||
| 123 | } | ||
| 124 | EXPORT_SYMBOL(nvidia_p2p_get_pages); | ||
| 125 | |||
| 126 | int nvidia_p2p_put_pages(struct nvidia_p2p_page_table *page_table) | ||
| 127 | { | ||
| 128 | if (!page_table) { | ||
| 129 | return -EINVAL; | ||
| 130 | } | ||
| 131 | |||
| 132 | mmu_notifier_unregister(&page_table->mn, page_table->mm); | ||
| 133 | |||
| 134 | return 0; | ||
| 135 | } | ||
| 136 | EXPORT_SYMBOL(nvidia_p2p_put_pages); | ||
| 137 | |||
| 138 | int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table) | ||
| 139 | { | ||
| 140 | int user_pages = 0; | ||
| 141 | struct page **pages = NULL; | ||
| 142 | |||
| 143 | if (!page_table) { | ||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | |||
| 147 | mutex_lock(&page_table->lock); | ||
| 148 | |||
| 149 | if (page_table->mapped & NVIDIA_P2P_MAPPED) { | ||
| 150 | WARN(1, "Attempting to free unmapped pages"); | ||
| 151 | } | ||
| 152 | |||
| 153 | if (page_table->mapped & NVIDIA_P2P_PINNED) { | ||
| 154 | pages = page_table->pages; | ||
| 155 | user_pages = page_table->entries; | ||
| 156 | |||
| 157 | while (--user_pages >= 0) { | ||
| 158 | put_page(pages[user_pages]); | ||
| 159 | } | ||
| 160 | |||
| 161 | kfree(pages); | ||
| 162 | page_table->mapped &= (u32)~NVIDIA_P2P_PINNED; | ||
| 163 | } | ||
| 164 | |||
| 165 | mutex_unlock(&page_table->lock); | ||
| 166 | |||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | EXPORT_SYMBOL(nvidia_p2p_free_page_table); | ||
| 170 | |||
| 171 | int nvidia_p2p_dma_map_pages(struct device *dev, | ||
| 172 | struct nvidia_p2p_page_table *page_table, | ||
| 173 | struct nvidia_p2p_dma_mapping **dma_mapping, | ||
| 174 | enum dma_data_direction direction) | ||
| 175 | { | ||
| 176 | struct sg_table *sgt = NULL; | ||
| 177 | struct scatterlist *sg; | ||
| 178 | struct page **pages = NULL; | ||
| 179 | u32 nr_pages = 0; | ||
| 180 | int ret = 0; | ||
| 181 | int i, count; | ||
| 182 | |||
| 183 | if (!page_table) { | ||
| 184 | return -EINVAL; | ||
| 185 | } | ||
| 186 | |||
| 187 | mutex_lock(&page_table->lock); | ||
| 188 | |||
| 189 | pages = page_table->pages; | ||
| 190 | nr_pages = page_table->entries; | ||
| 191 | if (nr_pages <= 0) { | ||
| 192 | mutex_unlock(&page_table->lock); | ||
| 193 | return -EINVAL; | ||
| 194 | } | ||
| 195 | |||
| 196 | *dma_mapping = kzalloc(sizeof(**dma_mapping), GFP_KERNEL); | ||
| 197 | if (!*dma_mapping) { | ||
| 198 | mutex_unlock(&page_table->lock); | ||
| 199 | return -ENOMEM; | ||
| 200 | } | ||
| 201 | sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); | ||
| 202 | if (!sgt) { | ||
| 203 | ret = -ENOMEM; | ||
| 204 | goto free_dma_mapping; | ||
| 205 | } | ||
| 206 | ret = sg_alloc_table_from_pages(sgt, pages, | ||
| 207 | nr_pages, 0, page_table->size, GFP_KERNEL); | ||
| 208 | if (ret) { | ||
| 209 | goto free_sgt; | ||
| 210 | } | ||
| 211 | |||
| 212 | (*dma_mapping)->version = NVIDIA_P2P_DMA_MAPPING_VERSION; | ||
| 213 | (*dma_mapping)->sgt = sgt; | ||
| 214 | (*dma_mapping)->dev = dev; | ||
| 215 | (*dma_mapping)->direction = direction; | ||
| 216 | (*dma_mapping)->page_table = page_table; | ||
| 217 | |||
| 218 | count = dma_map_sg(dev, sgt->sgl, sgt->nents, direction); | ||
| 219 | if (count < 1) { | ||
| 220 | goto free_sg_table; | ||
| 221 | } | ||
| 222 | |||
| 223 | (*dma_mapping)->entries = count; | ||
| 224 | |||
| 225 | (*dma_mapping)->hw_address = kcalloc(count, sizeof(u64), GFP_KERNEL); | ||
| 226 | if (!((*dma_mapping)->hw_address)) { | ||
| 227 | ret = -ENOMEM; | ||
| 228 | goto unmap_sg; | ||
| 229 | } | ||
| 230 | (*dma_mapping)->hw_len = kcalloc(count, sizeof(u64), GFP_KERNEL); | ||
| 231 | if (!((*dma_mapping)->hw_len)) { | ||
| 232 | ret = -ENOMEM; | ||
| 233 | goto free_hw_address; | ||
| 234 | } | ||
| 235 | |||
| 236 | for_each_sg(sgt->sgl, sg, count, i) { | ||
| 237 | (*dma_mapping)->hw_address[i] = sg_dma_address(sg); | ||
| 238 | (*dma_mapping)->hw_len[i] = sg_dma_len(sg); | ||
| 239 | } | ||
| 240 | (*dma_mapping)->page_table->mapped |= NVIDIA_P2P_MAPPED; | ||
| 241 | mutex_unlock(&page_table->lock); | ||
| 242 | |||
| 243 | return 0; | ||
| 244 | free_hw_address: | ||
| 245 | kfree((*dma_mapping)->hw_address); | ||
| 246 | unmap_sg: | ||
| 247 | dma_unmap_sg(dev, sgt->sgl, | ||
| 248 | sgt->nents, direction); | ||
| 249 | free_sg_table: | ||
| 250 | sg_free_table(sgt); | ||
| 251 | free_sgt: | ||
| 252 | kfree(sgt); | ||
| 253 | free_dma_mapping: | ||
| 254 | kfree(*dma_mapping); | ||
| 255 | *dma_mapping = NULL; | ||
| 256 | mutex_unlock(&page_table->lock); | ||
| 257 | |||
| 258 | return ret; | ||
| 259 | } | ||
| 260 | EXPORT_SYMBOL(nvidia_p2p_dma_map_pages); | ||
| 261 | |||
| 262 | int nvidia_p2p_dma_unmap_pages(struct nvidia_p2p_dma_mapping *dma_mapping) | ||
| 263 | { | ||
| 264 | struct nvidia_p2p_page_table *page_table = NULL; | ||
| 265 | |||
| 266 | if (!dma_mapping) { | ||
| 267 | return -EINVAL; | ||
| 268 | } | ||
| 269 | |||
| 270 | page_table = dma_mapping->page_table; | ||
| 271 | if (!page_table) { | ||
| 272 | return -EFAULT; | ||
| 273 | } | ||
| 274 | |||
| 275 | mutex_lock(&page_table->lock); | ||
| 276 | if (page_table->mapped & NVIDIA_P2P_MAPPED) { | ||
| 277 | kfree(dma_mapping->hw_len); | ||
| 278 | kfree(dma_mapping->hw_address); | ||
| 279 | if (dma_mapping->entries) | ||
| 280 | dma_unmap_sg(dma_mapping->dev, | ||
| 281 | dma_mapping->sgt->sgl, | ||
| 282 | dma_mapping->sgt->nents, | ||
| 283 | dma_mapping->direction); | ||
| 284 | sg_free_table(dma_mapping->sgt); | ||
| 285 | kfree(dma_mapping->sgt); | ||
| 286 | kfree(dma_mapping); | ||
| 287 | page_table->mapped &= (u32)~NVIDIA_P2P_MAPPED; | ||
| 288 | } | ||
| 289 | mutex_unlock(&page_table->lock); | ||
| 290 | |||
| 291 | return 0; | ||
| 292 | } | ||
| 293 | EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages); | ||
| 294 | |||
| 295 | int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping) | ||
| 296 | { | ||
| 297 | return nvidia_p2p_dma_unmap_pages(dma_mapping); | ||
| 298 | } | ||
| 299 | EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping); | ||
diff --git a/include/os/linux/nvlink.c b/include/os/linux/nvlink.c new file mode 100644 index 0000000..dd7c02c --- /dev/null +++ b/include/os/linux/nvlink.c | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifdef CONFIG_TEGRA_NVLINK | ||
| 18 | #include <linux/platform/tegra/tegra-nvlink.h> | ||
| 19 | #endif | ||
| 20 | |||
| 21 | #include <nvgpu/gk20a.h> | ||
| 22 | #include <nvgpu/nvlink.h> | ||
| 23 | #include <nvgpu/enabled.h> | ||
| 24 | #include "module.h" | ||
| 25 | |||
| 26 | #ifdef CONFIG_TEGRA_NVLINK | ||
| 27 | int nvgpu_nvlink_read_dt_props(struct gk20a *g) | ||
| 28 | { | ||
| 29 | struct device_node *np; | ||
| 30 | struct nvlink_device *ndev = g->nvlink.priv; | ||
| 31 | u32 local_dev_id; | ||
| 32 | u32 local_link_id; | ||
| 33 | u32 remote_dev_id; | ||
| 34 | u32 remote_link_id; | ||
| 35 | bool is_master; | ||
| 36 | |||
| 37 | /* Parse DT */ | ||
| 38 | np = nvgpu_get_node(g); | ||
| 39 | if (!np) | ||
| 40 | goto fail; | ||
| 41 | |||
| 42 | np = of_get_child_by_name(np, "nvidia,nvlink"); | ||
| 43 | if (!np) | ||
| 44 | goto fail; | ||
| 45 | |||
| 46 | np = of_get_child_by_name(np, "endpoint"); | ||
| 47 | if (!np) | ||
| 48 | goto fail; | ||
| 49 | |||
| 50 | /* Parse DT structure to detect endpoint topology */ | ||
| 51 | of_property_read_u32(np, "local_dev_id", &local_dev_id); | ||
| 52 | of_property_read_u32(np, "local_link_id", &local_link_id); | ||
| 53 | of_property_read_u32(np, "remote_dev_id", &remote_dev_id); | ||
| 54 | of_property_read_u32(np, "remote_link_id", &remote_link_id); | ||
| 55 | is_master = of_property_read_bool(np, "is_master"); | ||
| 56 | |||
| 57 | /* Check that we are in dGPU mode */ | ||
| 58 | if (local_dev_id != NVLINK_ENDPT_GV100) { | ||
| 59 | nvgpu_err(g, "Local nvlink device is not dGPU"); | ||
| 60 | return -EINVAL; | ||
| 61 | } | ||
| 62 | |||
| 63 | ndev->is_master = is_master; | ||
| 64 | ndev->device_id = local_dev_id; | ||
| 65 | ndev->link.link_id = local_link_id; | ||
| 66 | ndev->link.remote_dev_info.device_id = remote_dev_id; | ||
| 67 | ndev->link.remote_dev_info.link_id = remote_link_id; | ||
| 68 | |||
| 69 | return 0; | ||
| 70 | |||
| 71 | fail: | ||
| 72 | nvgpu_info(g, "nvlink endpoint not found or invaling in DT"); | ||
| 73 | return -ENODEV; | ||
| 74 | } | ||
| 75 | #endif /* CONFIG_TEGRA_NVLINK */ | ||
| 76 | |||
| 77 | void nvgpu_mss_nvlink_init_credits(struct gk20a *g) | ||
| 78 | { | ||
| 79 | /* MSS_NVLINK_1_BASE */ | ||
| 80 | void __iomem *soc1 = ioremap(0x01f20010, 4096); | ||
| 81 | /* MSS_NVLINK_2_BASE */ | ||
| 82 | void __iomem *soc2 = ioremap(0x01f40010, 4096); | ||
| 83 | /* MSS_NVLINK_3_BASE */ | ||
| 84 | void __iomem *soc3 = ioremap(0x01f60010, 4096); | ||
| 85 | /* MSS_NVLINK_4_BASE */ | ||
| 86 | void __iomem *soc4 = ioremap(0x01f80010, 4096); | ||
| 87 | u32 val; | ||
| 88 | |||
| 89 | nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits"); | ||
| 90 | |||
| 91 | val = readl_relaxed(soc1); | ||
| 92 | writel_relaxed(val, soc1); | ||
| 93 | val = readl_relaxed(soc1 + 4); | ||
| 94 | writel_relaxed(val, soc1 + 4); | ||
| 95 | |||
| 96 | val = readl_relaxed(soc2); | ||
| 97 | writel_relaxed(val, soc2); | ||
| 98 | val = readl_relaxed(soc2 + 4); | ||
| 99 | writel_relaxed(val, soc2 + 4); | ||
| 100 | |||
| 101 | val = readl_relaxed(soc3); | ||
| 102 | writel_relaxed(val, soc3); | ||
| 103 | val = readl_relaxed(soc3 + 4); | ||
| 104 | writel_relaxed(val, soc3 + 4); | ||
| 105 | |||
| 106 | val = readl_relaxed(soc4); | ||
| 107 | writel_relaxed(val, soc4); | ||
| 108 | val = readl_relaxed(soc4 + 4); | ||
| 109 | writel_relaxed(val, soc4 + 4); | ||
| 110 | } | ||
| 111 | |||
| 112 | int nvgpu_nvlink_deinit(struct gk20a *g) | ||
| 113 | { | ||
| 114 | #ifdef CONFIG_TEGRA_NVLINK | ||
| 115 | struct nvlink_device *ndev = g->nvlink.priv; | ||
| 116 | int err; | ||
| 117 | |||
| 118 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) | ||
| 119 | return -ENODEV; | ||
| 120 | |||
| 121 | err = nvlink_shutdown(ndev); | ||
| 122 | if (err) { | ||
| 123 | nvgpu_err(g, "failed to shut down nvlink"); | ||
| 124 | return err; | ||
| 125 | } | ||
| 126 | |||
| 127 | nvgpu_nvlink_remove(g); | ||
| 128 | |||
| 129 | return 0; | ||
| 130 | #endif | ||
| 131 | return -ENODEV; | ||
| 132 | } | ||
diff --git a/include/os/linux/nvlink.h b/include/os/linux/nvlink.h new file mode 100644 index 0000000..4dc54f6 --- /dev/null +++ b/include/os/linux/nvlink.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef NVGPU_OS_LINUX_NVLINK_H | ||
| 18 | |||
| 19 | struct gk20a; | ||
| 20 | int nvgpu_nvlink_deinit(struct gk20a *g); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/os_fence_android.c b/include/os/linux/os_fence_android.c new file mode 100644 index 0000000..013989e --- /dev/null +++ b/include/os/linux/os_fence_android.c | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #include <nvgpu/types.h> | ||
| 17 | #include <nvgpu/os_fence.h> | ||
| 18 | #include <nvgpu/linux/os_fence_android.h> | ||
| 19 | #include <nvgpu/gk20a.h> | ||
| 20 | #include <nvgpu/channel.h> | ||
| 21 | |||
| 22 | #include "../drivers/staging/android/sync.h" | ||
| 23 | |||
| 24 | inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s) | ||
| 25 | { | ||
| 26 | struct sync_fence *fence = (struct sync_fence *)s->priv; | ||
| 27 | return fence; | ||
| 28 | } | ||
| 29 | |||
| 30 | static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out) | ||
| 31 | { | ||
| 32 | fence_out->priv = NULL; | ||
| 33 | fence_out->g = NULL; | ||
| 34 | fence_out->ops = NULL; | ||
| 35 | } | ||
| 36 | |||
| 37 | void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out, | ||
| 38 | struct gk20a *g, const struct nvgpu_os_fence_ops *fops, | ||
| 39 | struct sync_fence *fence) | ||
| 40 | { | ||
| 41 | fence_out->g = g; | ||
| 42 | fence_out->ops = fops; | ||
| 43 | fence_out->priv = (void *)fence; | ||
| 44 | } | ||
| 45 | |||
| 46 | void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s) | ||
| 47 | { | ||
| 48 | struct sync_fence *fence = nvgpu_get_sync_fence(s); | ||
| 49 | |||
| 50 | sync_fence_put(fence); | ||
| 51 | |||
| 52 | nvgpu_os_fence_clear(s); | ||
| 53 | } | ||
| 54 | |||
| 55 | void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd) | ||
| 56 | { | ||
| 57 | struct sync_fence *fence = nvgpu_get_sync_fence(s); | ||
| 58 | |||
| 59 | sync_fence_get(fence); | ||
| 60 | sync_fence_install(fence, fd); | ||
| 61 | } | ||
| 62 | |||
| 63 | int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, | ||
| 64 | struct channel_gk20a *c, int fd) | ||
| 65 | { | ||
| 66 | int err = -ENOSYS; | ||
| 67 | |||
| 68 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 69 | err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd); | ||
| 70 | #endif | ||
| 71 | |||
| 72 | if (err) | ||
| 73 | err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); | ||
| 74 | |||
| 75 | if (err) | ||
| 76 | nvgpu_err(c->g, "error obtaining fence from fd %d", fd); | ||
| 77 | |||
| 78 | return err; | ||
| 79 | } | ||
diff --git a/include/os/linux/os_fence_android_sema.c b/include/os/linux/os_fence_android_sema.c new file mode 100644 index 0000000..eb60600 --- /dev/null +++ b/include/os/linux/os_fence_android_sema.c | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/errno.h> | ||
| 18 | |||
| 19 | #include <nvgpu/types.h> | ||
| 20 | #include <nvgpu/os_fence.h> | ||
| 21 | #include <nvgpu/linux/os_fence_android.h> | ||
| 22 | #include <nvgpu/semaphore.h> | ||
| 23 | #include <nvgpu/gk20a.h> | ||
| 24 | #include <nvgpu/channel.h> | ||
| 25 | #include <nvgpu/channel_sync.h> | ||
| 26 | |||
| 27 | #include "gk20a/mm_gk20a.h" | ||
| 28 | |||
| 29 | #include "sync_sema_android.h" | ||
| 30 | |||
| 31 | #include "../drivers/staging/android/sync.h" | ||
| 32 | |||
| 33 | int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, | ||
| 34 | struct priv_cmd_entry *wait_cmd, | ||
| 35 | struct channel_gk20a *c, | ||
| 36 | int max_wait_cmds) | ||
| 37 | { | ||
| 38 | int err; | ||
| 39 | int wait_cmd_size; | ||
| 40 | int num_wait_cmds; | ||
| 41 | int i; | ||
| 42 | struct nvgpu_semaphore *sema; | ||
| 43 | struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); | ||
| 44 | |||
| 45 | wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size(); | ||
| 46 | |||
| 47 | num_wait_cmds = sync_fence->num_fences; | ||
| 48 | if (num_wait_cmds == 0) | ||
| 49 | return 0; | ||
| 50 | |||
| 51 | if (max_wait_cmds && num_wait_cmds > max_wait_cmds) | ||
| 52 | return -EINVAL; | ||
| 53 | |||
| 54 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
| 55 | wait_cmd_size * num_wait_cmds, | ||
| 56 | wait_cmd); | ||
| 57 | if (err) { | ||
| 58 | return err; | ||
| 59 | } | ||
| 60 | |||
| 61 | for (i = 0; i < num_wait_cmds; i++) { | ||
| 62 | struct sync_pt *pt = sync_pt_from_fence( | ||
| 63 | sync_fence->cbs[i].sync_pt); | ||
| 64 | |||
| 65 | sema = gk20a_sync_pt_sema(pt); | ||
| 66 | channel_sync_semaphore_gen_wait_cmd(c, sema, wait_cmd, | ||
| 67 | wait_cmd_size, i); | ||
| 68 | } | ||
| 69 | |||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | |||
| 73 | static const struct nvgpu_os_fence_ops sema_ops = { | ||
| 74 | .program_waits = nvgpu_os_fence_sema_wait_gen_cmd, | ||
| 75 | .drop_ref = nvgpu_os_fence_android_drop_ref, | ||
| 76 | .install_fence = nvgpu_os_fence_android_install_fd, | ||
| 77 | }; | ||
| 78 | |||
| 79 | int nvgpu_os_fence_sema_create( | ||
| 80 | struct nvgpu_os_fence *fence_out, | ||
| 81 | struct channel_gk20a *c, | ||
| 82 | struct nvgpu_semaphore *sema) | ||
| 83 | { | ||
| 84 | struct sync_fence *fence; | ||
| 85 | |||
| 86 | fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x", | ||
| 87 | nvgpu_semaphore_gpu_ro_va(sema)); | ||
| 88 | |||
| 89 | if (!fence) { | ||
| 90 | nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x", | ||
| 91 | (u32)nvgpu_semaphore_gpu_ro_va(sema)); | ||
| 92 | |||
| 93 | return -ENOMEM; | ||
| 94 | } | ||
| 95 | |||
| 96 | nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); | ||
| 97 | |||
| 98 | return 0; | ||
| 99 | } | ||
| 100 | |||
| 101 | int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out, | ||
| 102 | struct channel_gk20a *c, int fd) | ||
| 103 | { | ||
| 104 | struct sync_fence *fence = gk20a_sync_fence_fdget(fd); | ||
| 105 | |||
| 106 | if (!fence) | ||
| 107 | return -EINVAL; | ||
| 108 | |||
| 109 | nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); | ||
| 110 | |||
| 111 | return 0; | ||
| 112 | } | ||
diff --git a/include/os/linux/os_fence_android_syncpt.c b/include/os/linux/os_fence_android_syncpt.c new file mode 100644 index 0000000..368a03c --- /dev/null +++ b/include/os/linux/os_fence_android_syncpt.c | |||
| @@ -0,0 +1,121 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/err.h> | ||
| 18 | #include <nvgpu/errno.h> | ||
| 19 | |||
| 20 | #include <nvgpu/types.h> | ||
| 21 | #include <nvgpu/os_fence.h> | ||
| 22 | #include <nvgpu/linux/os_fence_android.h> | ||
| 23 | #include <nvgpu/nvhost.h> | ||
| 24 | #include <nvgpu/atomic.h> | ||
| 25 | #include <nvgpu/gk20a.h> | ||
| 26 | #include <nvgpu/channel.h> | ||
| 27 | #include <nvgpu/channel_sync.h> | ||
| 28 | |||
| 29 | #include "gk20a/mm_gk20a.h" | ||
| 30 | |||
| 31 | #include "../drivers/staging/android/sync.h" | ||
| 32 | |||
| 33 | int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s, | ||
| 34 | struct priv_cmd_entry *wait_cmd, | ||
| 35 | struct channel_gk20a *c, | ||
| 36 | int max_wait_cmds) | ||
| 37 | { | ||
| 38 | int err; | ||
| 39 | int wait_cmd_size; | ||
| 40 | int num_wait_cmds; | ||
| 41 | int i; | ||
| 42 | u32 wait_id; | ||
| 43 | struct sync_pt *pt; | ||
| 44 | |||
| 45 | struct sync_fence *sync_fence = (struct sync_fence *)s->priv; | ||
| 46 | |||
| 47 | if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) | ||
| 48 | return -EINVAL; | ||
| 49 | |||
| 50 | /* validate syncpt ids */ | ||
| 51 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
| 52 | pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); | ||
| 53 | wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
| 54 | if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
| 55 | c->g->nvhost_dev, wait_id)) { | ||
| 56 | return -EINVAL; | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); | ||
| 61 | if (num_wait_cmds == 0) | ||
| 62 | return 0; | ||
| 63 | |||
| 64 | wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); | ||
| 65 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
| 66 | wait_cmd_size * num_wait_cmds, wait_cmd); | ||
| 67 | if (err) { | ||
| 68 | return err; | ||
| 69 | } | ||
| 70 | |||
| 71 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
| 72 | struct sync_pt *pt = sync_pt_from_fence( | ||
| 73 | sync_fence->cbs[i].sync_pt); | ||
| 74 | u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
| 75 | u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); | ||
| 76 | |||
| 77 | err = channel_sync_syncpt_gen_wait_cmd(c, wait_id, wait_value, | ||
| 78 | wait_cmd, wait_cmd_size, i, true); | ||
| 79 | } | ||
| 80 | |||
| 81 | WARN_ON(i != num_wait_cmds); | ||
| 82 | |||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | static const struct nvgpu_os_fence_ops syncpt_ops = { | ||
| 87 | .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd, | ||
| 88 | .drop_ref = nvgpu_os_fence_android_drop_ref, | ||
| 89 | .install_fence = nvgpu_os_fence_android_install_fd, | ||
| 90 | }; | ||
| 91 | |||
| 92 | int nvgpu_os_fence_syncpt_create( | ||
| 93 | struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, | ||
| 94 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) | ||
| 95 | { | ||
| 96 | struct sync_fence *fence = nvgpu_nvhost_sync_create_fence( | ||
| 97 | nvhost_dev, id, thresh, "fence"); | ||
| 98 | |||
| 99 | if (IS_ERR(fence)) { | ||
| 100 | nvgpu_err(c->g, "error %d during construction of fence.", (int)PTR_ERR(fence)); | ||
| 101 | return PTR_ERR(fence); | ||
| 102 | } | ||
| 103 | |||
| 104 | nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); | ||
| 105 | |||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out, | ||
| 110 | struct channel_gk20a *c, int fd) | ||
| 111 | { | ||
| 112 | struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd); | ||
| 113 | |||
| 114 | if (fence == NULL) { | ||
| 115 | return -ENOMEM; | ||
| 116 | } | ||
| 117 | |||
| 118 | nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); | ||
| 119 | |||
| 120 | return 0; | ||
| 121 | } | ||
diff --git a/include/os/linux/os_linux.h b/include/os/linux/os_linux.h new file mode 100644 index 0000000..25c6c03 --- /dev/null +++ b/include/os/linux/os_linux.h | |||
| @@ -0,0 +1,187 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef NVGPU_OS_LINUX_H | ||
| 18 | #define NVGPU_OS_LINUX_H | ||
| 19 | |||
| 20 | #include <linux/cdev.h> | ||
| 21 | #include <linux/iommu.h> | ||
| 22 | #include <linux/hashtable.h> | ||
| 23 | |||
| 24 | #include <nvgpu/gk20a.h> | ||
| 25 | |||
| 26 | #include "cde.h" | ||
| 27 | #include "sched.h" | ||
| 28 | |||
| 29 | struct nvgpu_os_linux_ops { | ||
| 30 | struct { | ||
| 31 | void (*get_program_numbers)(struct gk20a *g, | ||
| 32 | u32 block_height_log2, | ||
| 33 | u32 shader_parameter, | ||
| 34 | int *hprog, int *vprog); | ||
| 35 | bool (*need_scatter_buffer)(struct gk20a *g); | ||
| 36 | int (*populate_scatter_buffer)(struct gk20a *g, | ||
| 37 | struct sg_table *sgt, | ||
| 38 | size_t surface_size, | ||
| 39 | void *scatter_buffer_ptr, | ||
| 40 | size_t scatter_buffer_size); | ||
| 41 | } cde; | ||
| 42 | |||
| 43 | struct { | ||
| 44 | int (*init_debugfs)(struct gk20a *g); | ||
| 45 | } clk; | ||
| 46 | |||
| 47 | struct { | ||
| 48 | int (*init_debugfs)(struct gk20a *g); | ||
| 49 | } therm; | ||
| 50 | |||
| 51 | struct { | ||
| 52 | int (*init_debugfs)(struct gk20a *g); | ||
| 53 | } fecs_trace; | ||
| 54 | }; | ||
| 55 | |||
| 56 | struct nvgpu_os_linux { | ||
| 57 | struct gk20a g; | ||
| 58 | struct device *dev; | ||
| 59 | |||
| 60 | struct { | ||
| 61 | struct cdev cdev; | ||
| 62 | struct device *node; | ||
| 63 | } channel; | ||
| 64 | |||
| 65 | struct { | ||
| 66 | struct cdev cdev; | ||
| 67 | struct device *node; | ||
| 68 | /* see gk20a_ctrl_priv */ | ||
| 69 | struct nvgpu_list_node privs; | ||
| 70 | /* guards modifications to the list and its contents */ | ||
| 71 | struct nvgpu_mutex privs_lock; | ||
| 72 | } ctrl; | ||
| 73 | |||
| 74 | struct { | ||
| 75 | struct cdev cdev; | ||
| 76 | struct device *node; | ||
| 77 | } as_dev; | ||
| 78 | |||
| 79 | struct { | ||
| 80 | struct cdev cdev; | ||
| 81 | struct device *node; | ||
| 82 | } dbg; | ||
| 83 | |||
| 84 | struct { | ||
| 85 | struct cdev cdev; | ||
| 86 | struct device *node; | ||
| 87 | } prof; | ||
| 88 | |||
| 89 | struct { | ||
| 90 | struct cdev cdev; | ||
| 91 | struct device *node; | ||
| 92 | } tsg; | ||
| 93 | |||
| 94 | struct { | ||
| 95 | struct cdev cdev; | ||
| 96 | struct device *node; | ||
| 97 | } ctxsw; | ||
| 98 | |||
| 99 | struct { | ||
| 100 | struct cdev cdev; | ||
| 101 | struct device *node; | ||
| 102 | } sched; | ||
| 103 | |||
| 104 | dev_t cdev_region; | ||
| 105 | |||
| 106 | struct devfreq *devfreq; | ||
| 107 | |||
| 108 | struct device_dma_parameters dma_parms; | ||
| 109 | |||
| 110 | atomic_t hw_irq_stall_count; | ||
| 111 | atomic_t hw_irq_nonstall_count; | ||
| 112 | |||
| 113 | struct nvgpu_cond sw_irq_stall_last_handled_wq; | ||
| 114 | atomic_t sw_irq_stall_last_handled; | ||
| 115 | |||
| 116 | atomic_t nonstall_ops; | ||
| 117 | |||
| 118 | struct nvgpu_cond sw_irq_nonstall_last_handled_wq; | ||
| 119 | atomic_t sw_irq_nonstall_last_handled; | ||
| 120 | |||
| 121 | struct work_struct nonstall_fn_work; | ||
| 122 | struct workqueue_struct *nonstall_work_queue; | ||
| 123 | |||
| 124 | struct resource *reg_mem; | ||
| 125 | void __iomem *regs; | ||
| 126 | void __iomem *regs_saved; | ||
| 127 | |||
| 128 | struct resource *bar1_mem; | ||
| 129 | void __iomem *bar1; | ||
| 130 | void __iomem *bar1_saved; | ||
| 131 | |||
| 132 | void __iomem *usermode_regs; | ||
| 133 | void __iomem *usermode_regs_saved; | ||
| 134 | |||
| 135 | u64 regs_bus_addr; | ||
| 136 | |||
| 137 | struct nvgpu_os_linux_ops ops; | ||
| 138 | |||
| 139 | #ifdef CONFIG_DEBUG_FS | ||
| 140 | struct dentry *debugfs; | ||
| 141 | struct dentry *debugfs_alias; | ||
| 142 | |||
| 143 | struct dentry *debugfs_ltc_enabled; | ||
| 144 | struct dentry *debugfs_timeouts_enabled; | ||
| 145 | struct dentry *debugfs_gr_idle_timeout_default; | ||
| 146 | struct dentry *debugfs_disable_bigpage; | ||
| 147 | struct dentry *debugfs_gr_default_attrib_cb_size; | ||
| 148 | |||
| 149 | struct dentry *debugfs_timeslice_low_priority_us; | ||
| 150 | struct dentry *debugfs_timeslice_medium_priority_us; | ||
| 151 | struct dentry *debugfs_timeslice_high_priority_us; | ||
| 152 | struct dentry *debugfs_runlist_interleave; | ||
| 153 | struct dentry *debugfs_allocators; | ||
| 154 | struct dentry *debugfs_xve; | ||
| 155 | struct dentry *debugfs_kmem; | ||
| 156 | struct dentry *debugfs_hal; | ||
| 157 | struct dentry *debugfs_ltc; | ||
| 158 | |||
| 159 | struct dentry *debugfs_force_preemption_cilp; | ||
| 160 | struct dentry *debugfs_force_preemption_gfxp; | ||
| 161 | struct dentry *debugfs_dump_ctxsw_stats; | ||
| 162 | #endif | ||
| 163 | DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); | ||
| 164 | struct dev_ext_attribute *ecc_attrs; | ||
| 165 | |||
| 166 | struct gk20a_cde_app cde_app; | ||
| 167 | |||
| 168 | struct rw_semaphore busy_lock; | ||
| 169 | |||
| 170 | bool init_done; | ||
| 171 | }; | ||
| 172 | |||
| 173 | static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) | ||
| 174 | { | ||
| 175 | return container_of(g, struct nvgpu_os_linux, g); | ||
| 176 | } | ||
| 177 | |||
| 178 | static inline struct device *dev_from_gk20a(struct gk20a *g) | ||
| 179 | { | ||
| 180 | return nvgpu_os_linux_from_gk20a(g)->dev; | ||
| 181 | } | ||
| 182 | |||
| 183 | #define INTERFACE_NAME "nvhost%s-gpu" | ||
| 184 | |||
| 185 | #define totalram_size_in_mb (totalram_pages >> (10 - (PAGE_SHIFT - 10))) | ||
| 186 | |||
| 187 | #endif | ||
diff --git a/include/os/linux/os_ops.c b/include/os/linux/os_ops.c new file mode 100644 index 0000000..f1ab4b1 --- /dev/null +++ b/include/os/linux/os_ops.c | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include "os_ops_gm20b.h" | ||
| 20 | #include "os_ops_gp10b.h" | ||
| 21 | #include "os_ops_gp106.h" | ||
| 22 | #include "os_ops_gv11b.h" | ||
| 23 | #include "os_ops_gv100.h" | ||
| 24 | |||
| 25 | #if defined(CONFIG_TEGRA_GPU_NEXT) | ||
| 26 | #include "nvgpu_gpuid_next.h" | ||
| 27 | #endif | ||
| 28 | |||
| 29 | int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l) | ||
| 30 | { | ||
| 31 | struct gk20a *g = &l->g; | ||
| 32 | u32 ver = g->params.gpu_arch + g->params.gpu_impl; | ||
| 33 | |||
| 34 | switch (ver) { | ||
| 35 | case GK20A_GPUID_GM20B: | ||
| 36 | case GK20A_GPUID_GM20B_B: | ||
| 37 | nvgpu_gm20b_init_os_ops(l); | ||
| 38 | break; | ||
| 39 | case NVGPU_GPUID_GP10B: | ||
| 40 | nvgpu_gp10b_init_os_ops(l); | ||
| 41 | break; | ||
| 42 | case NVGPU_GPUID_GP106: | ||
| 43 | nvgpu_gp106_init_os_ops(l); | ||
| 44 | break; | ||
| 45 | case NVGPU_GPUID_GV100: | ||
| 46 | nvgpu_gv100_init_os_ops(l); | ||
| 47 | break; | ||
| 48 | case NVGPU_GPUID_GV11B: | ||
| 49 | nvgpu_gv11b_init_os_ops(l); | ||
| 50 | break; | ||
| 51 | #if defined(CONFIG_TEGRA_GPU_NEXT) | ||
| 52 | case NVGPU_GPUID_NEXT: | ||
| 53 | NVGPU_NEXT_INIT_OS_OPS(l); | ||
| 54 | break; | ||
| 55 | #endif | ||
| 56 | default: | ||
| 57 | break; | ||
| 58 | } | ||
| 59 | |||
| 60 | return 0; | ||
| 61 | } | ||
diff --git a/include/os/linux/os_ops.h b/include/os/linux/os_ops.h new file mode 100644 index 0000000..af3ce0a --- /dev/null +++ b/include/os/linux/os_ops.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __LINUX_OS_OPS_H | ||
| 18 | #define __LINUX_OS_OPS_H | ||
| 19 | |||
| 20 | int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/os_ops_gm20b.c b/include/os/linux/os_ops_gm20b.c new file mode 100644 index 0000000..77aee39 --- /dev/null +++ b/include/os/linux/os_ops_gm20b.c | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include "cde_gm20b.h" | ||
| 20 | #include "debug_clk_gm20b.h" | ||
| 21 | #include "debug_fecs_trace.h" | ||
| 22 | |||
| 23 | |||
| 24 | static struct nvgpu_os_linux_ops gm20b_os_linux_ops = { | ||
| 25 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 26 | .cde = { | ||
| 27 | .get_program_numbers = gm20b_cde_get_program_numbers, | ||
| 28 | }, | ||
| 29 | #endif | ||
| 30 | .clk = { | ||
| 31 | .init_debugfs = gm20b_clk_init_debugfs, | ||
| 32 | }, | ||
| 33 | |||
| 34 | .fecs_trace = { | ||
| 35 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
| 36 | }, | ||
| 37 | }; | ||
| 38 | |||
| 39 | void nvgpu_gm20b_init_os_ops(struct nvgpu_os_linux *l) | ||
| 40 | { | ||
| 41 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 42 | l->ops.cde = gm20b_os_linux_ops.cde; | ||
| 43 | #endif | ||
| 44 | l->ops.clk = gm20b_os_linux_ops.clk; | ||
| 45 | |||
| 46 | l->ops.fecs_trace = gm20b_os_linux_ops.fecs_trace; | ||
| 47 | } | ||
diff --git a/include/os/linux/os_ops_gm20b.h b/include/os/linux/os_ops_gm20b.h new file mode 100644 index 0000000..7d27e40 --- /dev/null +++ b/include/os/linux/os_ops_gm20b.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __LINUX_OS_OPS_GM20B_H | ||
| 18 | #define __LINUX_OS_OPS_GM20B_H | ||
| 19 | |||
| 20 | void nvgpu_gm20b_init_os_ops(struct nvgpu_os_linux *l); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/os_ops_gp106.c b/include/os/linux/os_ops_gp106.c new file mode 100644 index 0000000..14f1b00 --- /dev/null +++ b/include/os/linux/os_ops_gp106.c | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include "debug_clk_gp106.h" | ||
| 20 | #include "debug_therm_gp106.h" | ||
| 21 | #include "debug_fecs_trace.h" | ||
| 22 | |||
| 23 | static struct nvgpu_os_linux_ops gp106_os_linux_ops = { | ||
| 24 | .clk = { | ||
| 25 | .init_debugfs = gp106_clk_init_debugfs, | ||
| 26 | }, | ||
| 27 | .therm = { | ||
| 28 | .init_debugfs = gp106_therm_init_debugfs, | ||
| 29 | }, | ||
| 30 | .fecs_trace = { | ||
| 31 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
| 32 | }, | ||
| 33 | }; | ||
| 34 | |||
| 35 | void nvgpu_gp106_init_os_ops(struct nvgpu_os_linux *l) | ||
| 36 | { | ||
| 37 | l->ops.clk = gp106_os_linux_ops.clk; | ||
| 38 | l->ops.therm = gp106_os_linux_ops.therm; | ||
| 39 | l->ops.fecs_trace = gp106_os_linux_ops.fecs_trace; | ||
| 40 | } | ||
diff --git a/include/os/linux/os_ops_gp106.h b/include/os/linux/os_ops_gp106.h new file mode 100644 index 0000000..7d423d5 --- /dev/null +++ b/include/os/linux/os_ops_gp106.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __LINUX_OS_OPS_GP106_H | ||
| 18 | #define __LINUX_OS_OPS_GP106_H | ||
| 19 | |||
| 20 | void nvgpu_gp106_init_os_ops(struct nvgpu_os_linux *l); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/os_ops_gp10b.c b/include/os/linux/os_ops_gp10b.c new file mode 100644 index 0000000..e2891f7 --- /dev/null +++ b/include/os/linux/os_ops_gp10b.c | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include "cde_gp10b.h" | ||
| 20 | #include "debug_fecs_trace.h" | ||
| 21 | |||
| 22 | static struct nvgpu_os_linux_ops gp10b_os_linux_ops = { | ||
| 23 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 24 | .cde = { | ||
| 25 | .get_program_numbers = gp10b_cde_get_program_numbers, | ||
| 26 | .need_scatter_buffer = gp10b_need_scatter_buffer, | ||
| 27 | .populate_scatter_buffer = gp10b_populate_scatter_buffer, | ||
| 28 | }, | ||
| 29 | #endif | ||
| 30 | .fecs_trace = { | ||
| 31 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
| 32 | }, | ||
| 33 | }; | ||
| 34 | |||
| 35 | void nvgpu_gp10b_init_os_ops(struct nvgpu_os_linux *l) | ||
| 36 | { | ||
| 37 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 38 | l->ops.cde = gp10b_os_linux_ops.cde; | ||
| 39 | #endif | ||
| 40 | l->ops.fecs_trace = gp10b_os_linux_ops.fecs_trace; | ||
| 41 | } | ||
diff --git a/include/os/linux/os_ops_gp10b.h b/include/os/linux/os_ops_gp10b.h new file mode 100644 index 0000000..0be1bca --- /dev/null +++ b/include/os/linux/os_ops_gp10b.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __LINUX_OS_OPS_GP10B_H | ||
| 18 | #define __LINUX_OS_OPS_GP10B_H | ||
| 19 | |||
| 20 | void nvgpu_gp10b_init_os_ops(struct nvgpu_os_linux *l); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/os_ops_gv100.c b/include/os/linux/os_ops_gv100.c new file mode 100644 index 0000000..9d92bdf --- /dev/null +++ b/include/os/linux/os_ops_gv100.c | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include "debug_clk_gv100.h" | ||
| 20 | #include "debug_therm_gp106.h" | ||
| 21 | #include "debug_fecs_trace.h" | ||
| 22 | |||
| 23 | static struct nvgpu_os_linux_ops gv100_os_linux_ops = { | ||
| 24 | .clk = { | ||
| 25 | .init_debugfs = gv100_clk_init_debugfs, | ||
| 26 | }, | ||
| 27 | .therm = { | ||
| 28 | .init_debugfs = gp106_therm_init_debugfs, | ||
| 29 | }, | ||
| 30 | .fecs_trace = { | ||
| 31 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
| 32 | }, | ||
| 33 | }; | ||
| 34 | |||
| 35 | void nvgpu_gv100_init_os_ops(struct nvgpu_os_linux *l) | ||
| 36 | { | ||
| 37 | l->ops.clk = gv100_os_linux_ops.clk; | ||
| 38 | l->ops.therm = gv100_os_linux_ops.therm; | ||
| 39 | l->ops.fecs_trace = gv100_os_linux_ops.fecs_trace; | ||
| 40 | } | ||
diff --git a/include/os/linux/os_ops_gv100.h b/include/os/linux/os_ops_gv100.h new file mode 100644 index 0000000..43923b2 --- /dev/null +++ b/include/os/linux/os_ops_gv100.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef __LINUX_OS_OPS_GV100_H | ||
| 18 | #define __LINUX_OS_OPS_GV100_H | ||
| 19 | |||
| 20 | void nvgpu_gv100_init_os_ops(struct nvgpu_os_linux *l); | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/os_ops_gv11b.c b/include/os/linux/os_ops_gv11b.c new file mode 100644 index 0000000..a82ad0a --- /dev/null +++ b/include/os/linux/os_ops_gv11b.c | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include "os_linux.h" | ||
| 18 | |||
| 19 | #include "debug_fecs_trace.h" | ||
| 20 | |||
| 21 | static struct nvgpu_os_linux_ops gv11b_os_linux_ops = { | ||
| 22 | .fecs_trace = { | ||
| 23 | .init_debugfs = nvgpu_fecs_trace_init_debugfs, | ||
| 24 | }, | ||
| 25 | }; | ||
| 26 | |||
| 27 | void nvgpu_gv11b_init_os_ops(struct nvgpu_os_linux *l) | ||
| 28 | { | ||
| 29 | l->ops.fecs_trace = gv11b_os_linux_ops.fecs_trace; | ||
| 30 | } | ||
diff --git a/include/os/linux/os_ops_gv11b.h b/include/os/linux/os_ops_gv11b.h new file mode 100644 index 0000000..eef6c4a --- /dev/null +++ b/include/os/linux/os_ops_gv11b.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef LINUX_OS_OPS_GV11B_H | ||
| 18 | #define LINUX_OS_OPS_GV11B_H | ||
| 19 | |||
| 20 | struct nvgpu_os_linux; | ||
| 21 | |||
| 22 | void nvgpu_gv11b_init_os_ops(struct nvgpu_os_linux *l); | ||
| 23 | |||
| 24 | #endif | ||
diff --git a/include/os/linux/os_sched.c b/include/os/linux/os_sched.c new file mode 100644 index 0000000..9a25da1 --- /dev/null +++ b/include/os/linux/os_sched.c | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <nvgpu/os_sched.h> | ||
| 15 | |||
| 16 | #include <linux/sched.h> | ||
| 17 | |||
| 18 | int nvgpu_current_tid(struct gk20a *g) | ||
| 19 | { | ||
| 20 | return current->pid; | ||
| 21 | } | ||
| 22 | |||
| 23 | int nvgpu_current_pid(struct gk20a *g) | ||
| 24 | { | ||
| 25 | return current->tgid; | ||
| 26 | } | ||
| 27 | |||
| 28 | void __nvgpu_print_current(struct gk20a *g, const char *func_name, int line, | ||
| 29 | void *ctx, enum nvgpu_log_type type) | ||
| 30 | { | ||
| 31 | __nvgpu_log_msg(g, func_name, line, type, current->comm); | ||
| 32 | } | ||
diff --git a/include/os/linux/pci.c b/include/os/linux/pci.c new file mode 100644 index 0000000..07071d1 --- /dev/null +++ b/include/os/linux/pci.c | |||
| @@ -0,0 +1,854 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/pci.h> | ||
| 18 | #include <linux/interrupt.h> | ||
| 19 | #include <linux/pm_runtime.h> | ||
| 20 | #include <linux/of_platform.h> | ||
| 21 | #include <linux/of_address.h> | ||
| 22 | |||
| 23 | #include <nvgpu/nvhost.h> | ||
| 24 | #include <nvgpu/nvgpu_common.h> | ||
| 25 | #include <nvgpu/kmem.h> | ||
| 26 | #include <nvgpu/enabled.h> | ||
| 27 | #include <nvgpu/nvlink.h> | ||
| 28 | #include <nvgpu/soc.h> | ||
| 29 | #include <nvgpu/sim.h> | ||
| 30 | #include <nvgpu/gk20a.h> | ||
| 31 | |||
| 32 | #include "nvlink.h" | ||
| 33 | #include "clk/clk.h" | ||
| 34 | #include "clk/clk_mclk.h" | ||
| 35 | #include "module.h" | ||
| 36 | #include "intr.h" | ||
| 37 | #include "sysfs.h" | ||
| 38 | #include "os_linux.h" | ||
| 39 | #include "platform_gk20a.h" | ||
| 40 | |||
| 41 | #include "pci.h" | ||
| 42 | #include "pci_usermode.h" | ||
| 43 | |||
| 44 | #include "driver_common.h" | ||
| 45 | |||
| 46 | #define PCI_INTERFACE_NAME "card-%s%%s" | ||
| 47 | |||
| 48 | static int nvgpu_pci_tegra_probe(struct device *dev) | ||
| 49 | { | ||
| 50 | return 0; | ||
| 51 | } | ||
| 52 | |||
| 53 | static int nvgpu_pci_tegra_remove(struct device *dev) | ||
| 54 | { | ||
| 55 | return 0; | ||
| 56 | } | ||
| 57 | |||
| 58 | static bool nvgpu_pci_tegra_is_railgated(struct device *pdev) | ||
| 59 | { | ||
| 60 | return false; | ||
| 61 | } | ||
| 62 | |||
| 63 | static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate) | ||
| 64 | { | ||
| 65 | long ret = (long)rate; | ||
| 66 | |||
| 67 | if (rate == UINT_MAX) | ||
| 68 | ret = BOOT_GPC2CLK_MHZ * 1000000UL; | ||
| 69 | |||
| 70 | return ret; | ||
| 71 | } | ||
| 72 | |||
| 73 | static struct gk20a_platform nvgpu_pci_device[] = { | ||
| 74 | { /* DEVICE=0x1c35 */ | ||
| 75 | /* ptimer src frequency in hz */ | ||
| 76 | .ptimer_src_freq = 31250000, | ||
| 77 | |||
| 78 | .probe = nvgpu_pci_tegra_probe, | ||
| 79 | .remove = nvgpu_pci_tegra_remove, | ||
| 80 | |||
| 81 | /* power management configuration */ | ||
| 82 | .railgate_delay_init = 500, | ||
| 83 | .can_railgate_init = false, | ||
| 84 | .can_elpg_init = true, | ||
| 85 | .enable_elpg = true, | ||
| 86 | .enable_elcg = false, | ||
| 87 | .enable_slcg = true, | ||
| 88 | .enable_blcg = true, | ||
| 89 | .enable_mscg = true, | ||
| 90 | .can_slcg = true, | ||
| 91 | .can_blcg = true, | ||
| 92 | .can_elcg = true, | ||
| 93 | |||
| 94 | .disable_aspm = true, | ||
| 95 | |||
| 96 | /* power management callbacks */ | ||
| 97 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 98 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 99 | |||
| 100 | .ch_wdt_timeout_ms = 7000, | ||
| 101 | |||
| 102 | .honors_aperture = true, | ||
| 103 | .dma_mask = DMA_BIT_MASK(40), | ||
| 104 | .vbios_min_version = 0x86063000, | ||
| 105 | .hardcode_sw_threshold = true, | ||
| 106 | .ina3221_dcb_index = 0, | ||
| 107 | .ina3221_i2c_address = 0x84, | ||
| 108 | .ina3221_i2c_port = 0x2, | ||
| 109 | }, | ||
| 110 | { /* DEVICE=0x1c36 */ | ||
| 111 | /* ptimer src frequency in hz */ | ||
| 112 | .ptimer_src_freq = 31250000, | ||
| 113 | |||
| 114 | .probe = nvgpu_pci_tegra_probe, | ||
| 115 | .remove = nvgpu_pci_tegra_remove, | ||
| 116 | |||
| 117 | /* power management configuration */ | ||
| 118 | .railgate_delay_init = 500, | ||
| 119 | .can_railgate_init = false, | ||
| 120 | .can_elpg_init = true, | ||
| 121 | .enable_elpg = true, | ||
| 122 | .enable_elcg = false, | ||
| 123 | .enable_slcg = true, | ||
| 124 | .enable_blcg = true, | ||
| 125 | .enable_mscg = true, | ||
| 126 | .can_slcg = true, | ||
| 127 | .can_blcg = true, | ||
| 128 | .can_elcg = true, | ||
| 129 | |||
| 130 | .disable_aspm = true, | ||
| 131 | |||
| 132 | /* power management callbacks */ | ||
| 133 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 134 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 135 | |||
| 136 | .ch_wdt_timeout_ms = 7000, | ||
| 137 | |||
| 138 | .honors_aperture = true, | ||
| 139 | .dma_mask = DMA_BIT_MASK(40), | ||
| 140 | .vbios_min_version = 0x86062d00, | ||
| 141 | .hardcode_sw_threshold = true, | ||
| 142 | .ina3221_dcb_index = 0, | ||
| 143 | .ina3221_i2c_address = 0x84, | ||
| 144 | .ina3221_i2c_port = 0x2, | ||
| 145 | }, | ||
| 146 | { /* DEVICE=0x1c37 */ | ||
| 147 | /* ptimer src frequency in hz */ | ||
| 148 | .ptimer_src_freq = 31250000, | ||
| 149 | |||
| 150 | .probe = nvgpu_pci_tegra_probe, | ||
| 151 | .remove = nvgpu_pci_tegra_remove, | ||
| 152 | |||
| 153 | /* power management configuration */ | ||
| 154 | .railgate_delay_init = 500, | ||
| 155 | .can_railgate_init = false, | ||
| 156 | .can_elpg_init = true, | ||
| 157 | .enable_elpg = true, | ||
| 158 | .enable_elcg = false, | ||
| 159 | .enable_slcg = true, | ||
| 160 | .enable_blcg = true, | ||
| 161 | .enable_mscg = true, | ||
| 162 | .can_slcg = true, | ||
| 163 | .can_blcg = true, | ||
| 164 | .can_elcg = true, | ||
| 165 | |||
| 166 | .disable_aspm = true, | ||
| 167 | |||
| 168 | /* power management callbacks */ | ||
| 169 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 170 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 171 | |||
| 172 | .ch_wdt_timeout_ms = 7000, | ||
| 173 | |||
| 174 | .honors_aperture = true, | ||
| 175 | .dma_mask = DMA_BIT_MASK(40), | ||
| 176 | .vbios_min_version = 0x86063000, | ||
| 177 | .hardcode_sw_threshold = true, | ||
| 178 | .ina3221_dcb_index = 0, | ||
| 179 | .ina3221_i2c_address = 0x84, | ||
| 180 | .ina3221_i2c_port = 0x2, | ||
| 181 | }, | ||
| 182 | { /* DEVICE=0x1c75 */ | ||
| 183 | /* ptimer src frequency in hz */ | ||
| 184 | .ptimer_src_freq = 31250000, | ||
| 185 | |||
| 186 | .probe = nvgpu_pci_tegra_probe, | ||
| 187 | .remove = nvgpu_pci_tegra_remove, | ||
| 188 | |||
| 189 | /* power management configuration */ | ||
| 190 | .railgate_delay_init = 500, | ||
| 191 | .can_railgate_init = false, | ||
| 192 | .can_elpg_init = true, | ||
| 193 | .enable_elpg = true, | ||
| 194 | .enable_elcg = false, | ||
| 195 | .enable_slcg = true, | ||
| 196 | .enable_blcg = true, | ||
| 197 | .enable_mscg = true, | ||
| 198 | .can_slcg = true, | ||
| 199 | .can_blcg = true, | ||
| 200 | .can_elcg = true, | ||
| 201 | |||
| 202 | .disable_aspm = true, | ||
| 203 | |||
| 204 | /* power management callbacks */ | ||
| 205 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 206 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 207 | |||
| 208 | .ch_wdt_timeout_ms = 7000, | ||
| 209 | |||
| 210 | .honors_aperture = true, | ||
| 211 | .dma_mask = DMA_BIT_MASK(40), | ||
| 212 | .vbios_min_version = 0x86065300, | ||
| 213 | .hardcode_sw_threshold = false, | ||
| 214 | .ina3221_dcb_index = 1, | ||
| 215 | .ina3221_i2c_address = 0x80, | ||
| 216 | .ina3221_i2c_port = 0x1, | ||
| 217 | }, | ||
| 218 | { /* DEVICE=PG503 SKU 201 */ | ||
| 219 | /* ptimer src frequency in hz */ | ||
| 220 | .ptimer_src_freq = 31250000, | ||
| 221 | |||
| 222 | .probe = nvgpu_pci_tegra_probe, | ||
| 223 | .remove = nvgpu_pci_tegra_remove, | ||
| 224 | |||
| 225 | /* power management configuration */ | ||
| 226 | .railgate_delay_init = 500, | ||
| 227 | .can_railgate_init = false, | ||
| 228 | .can_elpg_init = false, | ||
| 229 | .enable_elpg = false, | ||
| 230 | .enable_elcg = false, | ||
| 231 | .enable_slcg = false, | ||
| 232 | .enable_blcg = false, | ||
| 233 | .enable_mscg = false, | ||
| 234 | .can_slcg = false, | ||
| 235 | .can_blcg = false, | ||
| 236 | .can_elcg = false, | ||
| 237 | |||
| 238 | .disable_aspm = true, | ||
| 239 | |||
| 240 | /* power management callbacks */ | ||
| 241 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 242 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 243 | |||
| 244 | .ch_wdt_timeout_ms = 7000, | ||
| 245 | |||
| 246 | .honors_aperture = true, | ||
| 247 | .dma_mask = DMA_BIT_MASK(40), | ||
| 248 | .vbios_min_version = 0x88001e00, | ||
| 249 | .hardcode_sw_threshold = false, | ||
| 250 | .run_preos = true, | ||
| 251 | }, | ||
| 252 | { /* DEVICE=PG503 SKU 200 ES */ | ||
| 253 | /* ptimer src frequency in hz */ | ||
| 254 | .ptimer_src_freq = 31250000, | ||
| 255 | |||
| 256 | .probe = nvgpu_pci_tegra_probe, | ||
| 257 | .remove = nvgpu_pci_tegra_remove, | ||
| 258 | |||
| 259 | /* power management configuration */ | ||
| 260 | .railgate_delay_init = 500, | ||
| 261 | .can_railgate_init = false, | ||
| 262 | .can_elpg_init = false, | ||
| 263 | .enable_elpg = false, | ||
| 264 | .enable_elcg = false, | ||
| 265 | .enable_slcg = false, | ||
| 266 | .enable_blcg = false, | ||
| 267 | .enable_mscg = false, | ||
| 268 | .can_slcg = false, | ||
| 269 | .can_blcg = false, | ||
| 270 | .can_elcg = false, | ||
| 271 | |||
| 272 | .disable_aspm = true, | ||
| 273 | |||
| 274 | /* power management callbacks */ | ||
| 275 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 276 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 277 | |||
| 278 | .ch_wdt_timeout_ms = 7000, | ||
| 279 | |||
| 280 | .honors_aperture = true, | ||
| 281 | .dma_mask = DMA_BIT_MASK(40), | ||
| 282 | .vbios_min_version = 0x88001e00, | ||
| 283 | .hardcode_sw_threshold = false, | ||
| 284 | .run_preos = true, | ||
| 285 | }, | ||
| 286 | { | ||
| 287 | /* ptimer src frequency in hz */ | ||
| 288 | .ptimer_src_freq = 31250000, | ||
| 289 | |||
| 290 | .probe = nvgpu_pci_tegra_probe, | ||
| 291 | .remove = nvgpu_pci_tegra_remove, | ||
| 292 | |||
| 293 | /* power management configuration */ | ||
| 294 | .railgate_delay_init = 500, | ||
| 295 | .can_railgate_init = false, | ||
| 296 | .can_elpg_init = false, | ||
| 297 | .enable_elpg = false, | ||
| 298 | .enable_elcg = false, | ||
| 299 | .enable_slcg = false, | ||
| 300 | .enable_blcg = false, | ||
| 301 | .enable_mscg = false, | ||
| 302 | .can_slcg = false, | ||
| 303 | .can_blcg = false, | ||
| 304 | .can_elcg = false, | ||
| 305 | |||
| 306 | .disable_aspm = true, | ||
| 307 | |||
| 308 | /* power management callbacks */ | ||
| 309 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 310 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 311 | |||
| 312 | .ch_wdt_timeout_ms = 7000, | ||
| 313 | |||
| 314 | .honors_aperture = true, | ||
| 315 | .dma_mask = DMA_BIT_MASK(40), | ||
| 316 | .vbios_min_version = 0x88000126, | ||
| 317 | .hardcode_sw_threshold = false, | ||
| 318 | .run_preos = true, | ||
| 319 | .has_syncpoints = true, | ||
| 320 | }, | ||
| 321 | { /* SKU250 */ | ||
| 322 | /* ptimer src frequency in hz */ | ||
| 323 | .ptimer_src_freq = 31250000, | ||
| 324 | |||
| 325 | .probe = nvgpu_pci_tegra_probe, | ||
| 326 | .remove = nvgpu_pci_tegra_remove, | ||
| 327 | |||
| 328 | /* power management configuration */ | ||
| 329 | .railgate_delay_init = 500, | ||
| 330 | .can_railgate_init = false, | ||
| 331 | .can_elpg_init = false, | ||
| 332 | .enable_elpg = false, | ||
| 333 | .enable_elcg = false, | ||
| 334 | .enable_slcg = true, | ||
| 335 | .enable_blcg = true, | ||
| 336 | .enable_mscg = false, | ||
| 337 | .can_slcg = true, | ||
| 338 | .can_blcg = true, | ||
| 339 | .can_elcg = false, | ||
| 340 | |||
| 341 | .disable_aspm = true, | ||
| 342 | |||
| 343 | /* power management callbacks */ | ||
| 344 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 345 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 346 | |||
| 347 | .ch_wdt_timeout_ms = 7000, | ||
| 348 | |||
| 349 | .honors_aperture = true, | ||
| 350 | .dma_mask = DMA_BIT_MASK(40), | ||
| 351 | .vbios_min_version = 0x1, | ||
| 352 | .hardcode_sw_threshold = false, | ||
| 353 | .run_preos = true, | ||
| 354 | .has_syncpoints = true, | ||
| 355 | }, | ||
| 356 | { /* SKU 0x1e3f */ | ||
| 357 | /* ptimer src frequency in hz */ | ||
| 358 | .ptimer_src_freq = 31250000, | ||
| 359 | |||
| 360 | .probe = nvgpu_pci_tegra_probe, | ||
| 361 | .remove = nvgpu_pci_tegra_remove, | ||
| 362 | |||
| 363 | /* power management configuration */ | ||
| 364 | .railgate_delay_init = 500, | ||
| 365 | .can_railgate_init = false, | ||
| 366 | .can_elpg_init = false, | ||
| 367 | .enable_elpg = false, | ||
| 368 | .enable_elcg = false, | ||
| 369 | .enable_slcg = false, | ||
| 370 | .enable_blcg = false, | ||
| 371 | .enable_mscg = false, | ||
| 372 | .can_slcg = false, | ||
| 373 | .can_blcg = false, | ||
| 374 | .can_elcg = false, | ||
| 375 | |||
| 376 | .disable_aspm = true, | ||
| 377 | |||
| 378 | /* power management callbacks */ | ||
| 379 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
| 380 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
| 381 | |||
| 382 | /* | ||
| 383 | * WAR: PCIE X1 is very slow, set to very high value till nvlink is up | ||
| 384 | */ | ||
| 385 | .ch_wdt_timeout_ms = 30000, | ||
| 386 | |||
| 387 | .honors_aperture = true, | ||
| 388 | .dma_mask = DMA_BIT_MASK(40), | ||
| 389 | .vbios_min_version = 0x1, | ||
| 390 | .hardcode_sw_threshold = false, | ||
| 391 | .unified_memory = false, | ||
| 392 | }, | ||
| 393 | |||
| 394 | }; | ||
| 395 | |||
| 396 | static struct pci_device_id nvgpu_pci_table[] = { | ||
| 397 | { | ||
| 398 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35), | ||
| 399 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 400 | .class_mask = 0xff << 16, | ||
| 401 | .driver_data = 0, | ||
| 402 | }, | ||
| 403 | { | ||
| 404 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36), | ||
| 405 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 406 | .class_mask = 0xff << 16, | ||
| 407 | .driver_data = 1, | ||
| 408 | }, | ||
| 409 | { | ||
| 410 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37), | ||
| 411 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 412 | .class_mask = 0xff << 16, | ||
| 413 | .driver_data = 2, | ||
| 414 | }, | ||
| 415 | { | ||
| 416 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75), | ||
| 417 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 418 | .class_mask = 0xff << 16, | ||
| 419 | .driver_data = 3, | ||
| 420 | }, | ||
| 421 | { | ||
| 422 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1), | ||
| 423 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 424 | .class_mask = 0xff << 16, | ||
| 425 | .driver_data = 4, | ||
| 426 | }, | ||
| 427 | { | ||
| 428 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0), | ||
| 429 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 430 | .class_mask = 0xff << 16, | ||
| 431 | .driver_data = 5, | ||
| 432 | }, | ||
| 433 | { | ||
| 434 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe), | ||
| 435 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 436 | .class_mask = 0xff << 16, | ||
| 437 | .driver_data = 6, | ||
| 438 | }, | ||
| 439 | { | ||
| 440 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1), | ||
| 441 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 442 | .class_mask = 0xff << 16, | ||
| 443 | .driver_data = 7, | ||
| 444 | }, | ||
| 445 | { | ||
| 446 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f), | ||
| 447 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
| 448 | .class_mask = 0xff << 16, | ||
| 449 | .driver_data = 8, | ||
| 450 | }, | ||
| 451 | {} | ||
| 452 | }; | ||
| 453 | |||
| 454 | static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id) | ||
| 455 | { | ||
| 456 | struct gk20a *g = dev_id; | ||
| 457 | irqreturn_t ret_stall; | ||
| 458 | irqreturn_t ret_nonstall; | ||
| 459 | |||
| 460 | ret_stall = nvgpu_intr_stall(g); | ||
| 461 | ret_nonstall = nvgpu_intr_nonstall(g); | ||
| 462 | |||
| 463 | #if defined(CONFIG_PCI_MSI) | ||
| 464 | /* Send MSI EOI */ | ||
| 465 | if (g->ops.xve.rearm_msi && g->msi_enabled) | ||
| 466 | g->ops.xve.rearm_msi(g); | ||
| 467 | #endif | ||
| 468 | |||
| 469 | return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD; | ||
| 470 | } | ||
| 471 | |||
| 472 | static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id) | ||
| 473 | { | ||
| 474 | struct gk20a *g = dev_id; | ||
| 475 | |||
| 476 | return nvgpu_intr_thread_stall(g); | ||
| 477 | } | ||
| 478 | |||
| 479 | static int nvgpu_pci_init_support(struct pci_dev *pdev) | ||
| 480 | { | ||
| 481 | int err = 0; | ||
| 482 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
| 483 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 484 | struct device *dev = &pdev->dev; | ||
| 485 | |||
| 486 | l->regs = nvgpu_devm_ioremap(dev, pci_resource_start(pdev, 0), | ||
| 487 | pci_resource_len(pdev, 0)); | ||
| 488 | if (IS_ERR(l->regs)) { | ||
| 489 | nvgpu_err(g, "failed to remap gk20a registers"); | ||
| 490 | err = PTR_ERR(l->regs); | ||
| 491 | goto fail; | ||
| 492 | } | ||
| 493 | |||
| 494 | l->regs_bus_addr = pci_resource_start(pdev, 0); | ||
| 495 | if (!l->regs_bus_addr) { | ||
| 496 | nvgpu_err(g, "failed to read register bus offset"); | ||
| 497 | err = -ENODEV; | ||
| 498 | goto fail; | ||
| 499 | } | ||
| 500 | |||
| 501 | l->bar1 = nvgpu_devm_ioremap(dev, pci_resource_start(pdev, 1), | ||
| 502 | pci_resource_len(pdev, 1)); | ||
| 503 | if (IS_ERR(l->bar1)) { | ||
| 504 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
| 505 | err = PTR_ERR(l->bar1); | ||
| 506 | goto fail; | ||
| 507 | } | ||
| 508 | |||
| 509 | err = nvgpu_init_sim_support_linux_pci(g); | ||
| 510 | if (err) | ||
| 511 | goto fail; | ||
| 512 | err = nvgpu_init_sim_support_pci(g); | ||
| 513 | if (err) | ||
| 514 | goto fail_sim; | ||
| 515 | |||
| 516 | nvgpu_pci_init_usermode_support(l); | ||
| 517 | |||
| 518 | return 0; | ||
| 519 | |||
| 520 | fail_sim: | ||
| 521 | nvgpu_remove_sim_support_linux_pci(g); | ||
| 522 | fail: | ||
| 523 | if (l->regs) | ||
| 524 | l->regs = NULL; | ||
| 525 | |||
| 526 | if (l->bar1) | ||
| 527 | l->bar1 = NULL; | ||
| 528 | |||
| 529 | return err; | ||
| 530 | } | ||
| 531 | |||
| 532 | static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode) | ||
| 533 | { | ||
| 534 | if (mode) | ||
| 535 | *mode = S_IRUGO | S_IWUGO; | ||
| 536 | return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev)); | ||
| 537 | } | ||
| 538 | |||
| 539 | static struct class nvgpu_pci_class = { | ||
| 540 | .owner = THIS_MODULE, | ||
| 541 | .name = "nvidia-pci-gpu", | ||
| 542 | .devnode = nvgpu_pci_devnode, | ||
| 543 | }; | ||
| 544 | |||
| 545 | #ifdef CONFIG_PM | ||
| 546 | static int nvgpu_pci_pm_runtime_resume(struct device *dev) | ||
| 547 | { | ||
| 548 | return gk20a_pm_finalize_poweron(dev); | ||
| 549 | } | ||
| 550 | |||
| 551 | static int nvgpu_pci_pm_runtime_suspend(struct device *dev) | ||
| 552 | { | ||
| 553 | return 0; | ||
| 554 | } | ||
| 555 | |||
| 556 | static int nvgpu_pci_pm_resume(struct device *dev) | ||
| 557 | { | ||
| 558 | return gk20a_pm_finalize_poweron(dev); | ||
| 559 | } | ||
| 560 | |||
| 561 | static int nvgpu_pci_pm_suspend(struct device *dev) | ||
| 562 | { | ||
| 563 | return 0; | ||
| 564 | } | ||
| 565 | |||
| 566 | static const struct dev_pm_ops nvgpu_pci_pm_ops = { | ||
| 567 | .runtime_resume = nvgpu_pci_pm_runtime_resume, | ||
| 568 | .runtime_suspend = nvgpu_pci_pm_runtime_suspend, | ||
| 569 | .resume = nvgpu_pci_pm_resume, | ||
| 570 | .suspend = nvgpu_pci_pm_suspend, | ||
| 571 | }; | ||
| 572 | #endif | ||
| 573 | |||
| 574 | static int nvgpu_pci_pm_init(struct device *dev) | ||
| 575 | { | ||
| 576 | #ifdef CONFIG_PM | ||
| 577 | struct gk20a *g = get_gk20a(dev); | ||
| 578 | |||
| 579 | if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) { | ||
| 580 | pm_runtime_disable(dev); | ||
| 581 | } else { | ||
| 582 | if (g->railgate_delay) | ||
| 583 | pm_runtime_set_autosuspend_delay(dev, | ||
| 584 | g->railgate_delay); | ||
| 585 | |||
| 586 | /* | ||
| 587 | * set gpu dev's use_autosuspend flag to allow | ||
| 588 | * runtime power management of GPU | ||
| 589 | */ | ||
| 590 | pm_runtime_use_autosuspend(dev); | ||
| 591 | |||
| 592 | /* | ||
| 593 | * runtime PM for PCI devices is forbidden | ||
| 594 | * by default, so unblock RTPM of GPU | ||
| 595 | */ | ||
| 596 | pm_runtime_put_noidle(dev); | ||
| 597 | pm_runtime_allow(dev); | ||
| 598 | } | ||
| 599 | #endif | ||
| 600 | return 0; | ||
| 601 | } | ||
| 602 | |||
| 603 | static int nvgpu_pci_pm_deinit(struct device *dev) | ||
| 604 | { | ||
| 605 | #ifdef CONFIG_PM | ||
| 606 | struct gk20a *g = get_gk20a(dev); | ||
| 607 | |||
| 608 | if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) | ||
| 609 | pm_runtime_enable(dev); | ||
| 610 | else | ||
| 611 | pm_runtime_forbid(dev); | ||
| 612 | #endif | ||
| 613 | return 0; | ||
| 614 | } | ||
| 615 | |||
| 616 | static int nvgpu_pci_probe(struct pci_dev *pdev, | ||
| 617 | const struct pci_device_id *pent) | ||
| 618 | { | ||
| 619 | struct gk20a_platform *platform = NULL; | ||
| 620 | struct nvgpu_os_linux *l; | ||
| 621 | struct gk20a *g; | ||
| 622 | int err; | ||
| 623 | char nodefmt[64]; | ||
| 624 | struct device_node *np; | ||
| 625 | |||
| 626 | /* make sure driver_data is a sane index */ | ||
| 627 | if (pent->driver_data >= sizeof(nvgpu_pci_device) / | ||
| 628 | sizeof(nvgpu_pci_device[0])) { | ||
| 629 | return -EINVAL; | ||
| 630 | } | ||
| 631 | |||
| 632 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
| 633 | if (!l) { | ||
| 634 | dev_err(&pdev->dev, "couldn't allocate gk20a support"); | ||
| 635 | return -ENOMEM; | ||
| 636 | } | ||
| 637 | |||
| 638 | hash_init(l->ecc_sysfs_stats_htable); | ||
| 639 | |||
| 640 | g = &l->g; | ||
| 641 | |||
| 642 | g->log_mask = NVGPU_DEFAULT_DBG_MASK; | ||
| 643 | |||
| 644 | nvgpu_init_gk20a(g); | ||
| 645 | |||
| 646 | nvgpu_kmem_init(g); | ||
| 647 | |||
| 648 | /* Allocate memory to hold platform data*/ | ||
| 649 | platform = (struct gk20a_platform *)nvgpu_kzalloc( g, | ||
| 650 | sizeof(struct gk20a_platform)); | ||
| 651 | if (!platform) { | ||
| 652 | dev_err(&pdev->dev, "couldn't allocate platform data"); | ||
| 653 | err = -ENOMEM; | ||
| 654 | goto err_free_l; | ||
| 655 | } | ||
| 656 | |||
| 657 | /* copy detected device data to allocated platform space*/ | ||
| 658 | memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data], | ||
| 659 | sizeof(struct gk20a_platform)); | ||
| 660 | |||
| 661 | pci_set_drvdata(pdev, platform); | ||
| 662 | |||
| 663 | err = nvgpu_init_enabled_flags(g); | ||
| 664 | if (err) | ||
| 665 | goto err_free_platform; | ||
| 666 | |||
| 667 | platform->g = g; | ||
| 668 | l->dev = &pdev->dev; | ||
| 669 | |||
| 670 | np = nvgpu_get_node(g); | ||
| 671 | if (of_dma_is_coherent(np)) { | ||
| 672 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
| 673 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
| 674 | } | ||
| 675 | |||
| 676 | err = pci_enable_device(pdev); | ||
| 677 | if (err) | ||
| 678 | goto err_free_platform; | ||
| 679 | pci_set_master(pdev); | ||
| 680 | |||
| 681 | g->pci_vendor_id = pdev->vendor; | ||
| 682 | g->pci_device_id = pdev->device; | ||
| 683 | g->pci_subsystem_vendor_id = pdev->subsystem_vendor; | ||
| 684 | g->pci_subsystem_device_id = pdev->subsystem_device; | ||
| 685 | g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub | ||
| 686 | g->pci_revision = pdev->revision; | ||
| 687 | |||
| 688 | g->ina3221_dcb_index = platform->ina3221_dcb_index; | ||
| 689 | g->ina3221_i2c_address = platform->ina3221_i2c_address; | ||
| 690 | g->ina3221_i2c_port = platform->ina3221_i2c_port; | ||
| 691 | g->hardcode_sw_threshold = platform->hardcode_sw_threshold; | ||
| 692 | |||
| 693 | #if defined(CONFIG_PCI_MSI) | ||
| 694 | err = pci_enable_msi(pdev); | ||
| 695 | if (err) { | ||
| 696 | nvgpu_err(g, | ||
| 697 | "MSI could not be enabled, falling back to legacy"); | ||
| 698 | g->msi_enabled = false; | ||
| 699 | } else | ||
| 700 | g->msi_enabled = true; | ||
| 701 | #endif | ||
| 702 | |||
| 703 | g->irq_stall = pdev->irq; | ||
| 704 | g->irq_nonstall = pdev->irq; | ||
| 705 | if (g->irq_stall < 0) { | ||
| 706 | err = -ENXIO; | ||
| 707 | goto err_disable_msi; | ||
| 708 | } | ||
| 709 | |||
| 710 | err = devm_request_threaded_irq(&pdev->dev, | ||
| 711 | g->irq_stall, | ||
| 712 | nvgpu_pci_isr, | ||
| 713 | nvgpu_pci_intr_thread, | ||
| 714 | #if defined(CONFIG_PCI_MSI) | ||
| 715 | g->msi_enabled ? 0 : | ||
| 716 | #endif | ||
| 717 | IRQF_SHARED, "nvgpu", g); | ||
| 718 | if (err) { | ||
| 719 | nvgpu_err(g, | ||
| 720 | "failed to request irq @ %d", g->irq_stall); | ||
| 721 | goto err_disable_msi; | ||
| 722 | } | ||
| 723 | disable_irq(g->irq_stall); | ||
| 724 | |||
| 725 | err = nvgpu_pci_init_support(pdev); | ||
| 726 | if (err) | ||
| 727 | goto err_free_irq; | ||
| 728 | |||
| 729 | if (strchr(dev_name(&pdev->dev), '%')) { | ||
| 730 | nvgpu_err(g, "illegal character in device name"); | ||
| 731 | err = -EINVAL; | ||
| 732 | goto err_free_irq; | ||
| 733 | } | ||
| 734 | |||
| 735 | snprintf(nodefmt, sizeof(nodefmt), | ||
| 736 | PCI_INTERFACE_NAME, dev_name(&pdev->dev)); | ||
| 737 | |||
| 738 | err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class); | ||
| 739 | if (err) | ||
| 740 | goto err_free_irq; | ||
| 741 | |||
| 742 | err = nvgpu_pci_pm_init(&pdev->dev); | ||
| 743 | if (err) { | ||
| 744 | nvgpu_err(g, "pm init failed"); | ||
| 745 | goto err_free_irq; | ||
| 746 | } | ||
| 747 | |||
| 748 | err = nvgpu_nvlink_probe(g); | ||
| 749 | /* | ||
| 750 | * ENODEV is a legal error which means there is no NVLINK | ||
| 751 | * any other error is fatal | ||
| 752 | */ | ||
| 753 | if (err) { | ||
| 754 | if (err != -ENODEV) { | ||
| 755 | nvgpu_err(g, "fatal error probing nvlink, bailing out"); | ||
| 756 | goto err_free_irq; | ||
| 757 | } | ||
| 758 | /* Enable Semaphore SHIM on nvlink only for now. */ | ||
| 759 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); | ||
| 760 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false); | ||
| 761 | } else { | ||
| 762 | err = nvgpu_nvhost_syncpt_init(g); | ||
| 763 | if (err) { | ||
| 764 | if (err != -ENOSYS) { | ||
| 765 | nvgpu_err(g, "syncpt init failed"); | ||
| 766 | goto err_free_irq; | ||
| 767 | } | ||
| 768 | } | ||
| 769 | } | ||
| 770 | |||
| 771 | return 0; | ||
| 772 | |||
| 773 | err_free_irq: | ||
| 774 | nvgpu_free_irq(g); | ||
| 775 | err_disable_msi: | ||
| 776 | #if defined(CONFIG_PCI_MSI) | ||
| 777 | if (g->msi_enabled) | ||
| 778 | pci_disable_msi(pdev); | ||
| 779 | #endif | ||
| 780 | err_free_platform: | ||
| 781 | nvgpu_kfree(g, platform); | ||
| 782 | err_free_l: | ||
| 783 | kfree(l); | ||
| 784 | return err; | ||
| 785 | } | ||
| 786 | |||
| 787 | static void nvgpu_pci_remove(struct pci_dev *pdev) | ||
| 788 | { | ||
| 789 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
| 790 | struct device *dev = dev_from_gk20a(g); | ||
| 791 | int err; | ||
| 792 | |||
| 793 | /* no support yet for unbind if DGPU is in VGPU mode */ | ||
| 794 | if (gk20a_gpu_is_virtual(dev)) | ||
| 795 | return; | ||
| 796 | |||
| 797 | err = nvgpu_nvlink_deinit(g); | ||
| 798 | WARN(err, "gpu failed to remove nvlink"); | ||
| 799 | |||
| 800 | gk20a_driver_start_unload(g); | ||
| 801 | |||
| 802 | err = nvgpu_quiesce(g); | ||
| 803 | /* TODO: handle failure to idle */ | ||
| 804 | WARN(err, "gpu failed to idle during driver removal"); | ||
| 805 | |||
| 806 | nvgpu_free_irq(g); | ||
| 807 | |||
| 808 | nvgpu_remove(dev, &nvgpu_pci_class); | ||
| 809 | |||
| 810 | #if defined(CONFIG_PCI_MSI) | ||
| 811 | if (g->msi_enabled) | ||
| 812 | pci_disable_msi(pdev); | ||
| 813 | else { | ||
| 814 | /* IRQ does not need to be enabled in MSI as the line is not | ||
| 815 | * shared | ||
| 816 | */ | ||
| 817 | enable_irq(g->irq_stall); | ||
| 818 | } | ||
| 819 | #endif | ||
| 820 | nvgpu_pci_pm_deinit(&pdev->dev); | ||
| 821 | |||
| 822 | /* free allocated platform data space */ | ||
| 823 | gk20a_get_platform(&pdev->dev)->g = NULL; | ||
| 824 | nvgpu_kfree(g, gk20a_get_platform(&pdev->dev)); | ||
| 825 | |||
| 826 | gk20a_put(g); | ||
| 827 | } | ||
| 828 | |||
| 829 | static struct pci_driver nvgpu_pci_driver = { | ||
| 830 | .name = "nvgpu", | ||
| 831 | .id_table = nvgpu_pci_table, | ||
| 832 | .probe = nvgpu_pci_probe, | ||
| 833 | .remove = nvgpu_pci_remove, | ||
| 834 | #ifdef CONFIG_PM | ||
| 835 | .driver.pm = &nvgpu_pci_pm_ops, | ||
| 836 | #endif | ||
| 837 | }; | ||
| 838 | |||
| 839 | int __init nvgpu_pci_init(void) | ||
| 840 | { | ||
| 841 | int ret; | ||
| 842 | |||
| 843 | ret = class_register(&nvgpu_pci_class); | ||
| 844 | if (ret) | ||
| 845 | return ret; | ||
| 846 | |||
| 847 | return pci_register_driver(&nvgpu_pci_driver); | ||
| 848 | } | ||
| 849 | |||
| 850 | void __exit nvgpu_pci_exit(void) | ||
| 851 | { | ||
| 852 | pci_unregister_driver(&nvgpu_pci_driver); | ||
| 853 | class_unregister(&nvgpu_pci_class); | ||
| 854 | } | ||
diff --git a/include/os/linux/pci.h b/include/os/linux/pci.h new file mode 100644 index 0000000..cc6b77b --- /dev/null +++ b/include/os/linux/pci.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #ifndef NVGPU_PCI_H | ||
| 17 | #define NVGPU_PCI_H | ||
| 18 | |||
| 19 | #ifdef CONFIG_GK20A_PCI | ||
| 20 | int nvgpu_pci_init(void); | ||
| 21 | void nvgpu_pci_exit(void); | ||
| 22 | #else | ||
| 23 | static inline int nvgpu_pci_init(void) { return 0; } | ||
| 24 | static inline void nvgpu_pci_exit(void) {} | ||
| 25 | #endif | ||
| 26 | |||
| 27 | #endif | ||
diff --git a/include/os/linux/pci_usermode.c b/include/os/linux/pci_usermode.c new file mode 100644 index 0000000..270b834 --- /dev/null +++ b/include/os/linux/pci_usermode.c | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <nvgpu/types.h> | ||
| 15 | |||
| 16 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
| 17 | |||
| 18 | #include "os_linux.h" | ||
| 19 | |||
| 20 | void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l) | ||
| 21 | { | ||
| 22 | l->usermode_regs = l->regs + usermode_cfg0_r(); | ||
| 23 | l->usermode_regs_saved = l->usermode_regs; | ||
| 24 | } | ||
diff --git a/include/os/linux/pci_usermode.h b/include/os/linux/pci_usermode.h new file mode 100644 index 0000000..25a08d2 --- /dev/null +++ b/include/os/linux/pci_usermode.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #ifndef __NVGPU_PCI_USERMODE_H__ | ||
| 17 | #define __NVGPU_PCI_USERMODE_H__ | ||
| 18 | |||
| 19 | struct nvgpu_os_linux; | ||
| 20 | |||
| 21 | void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l); | ||
| 22 | |||
| 23 | #endif | ||
diff --git a/include/os/linux/platform_gk20a.h b/include/os/linux/platform_gk20a.h new file mode 100644 index 0000000..adec860 --- /dev/null +++ b/include/os/linux/platform_gk20a.h | |||
| @@ -0,0 +1,329 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Platform (SoC) Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #ifndef _GK20A_PLATFORM_H_ | ||
| 17 | #define _GK20A_PLATFORM_H_ | ||
| 18 | |||
| 19 | #include <linux/device.h> | ||
| 20 | |||
| 21 | #include <nvgpu/lock.h> | ||
| 22 | #include <nvgpu/gk20a.h> | ||
| 23 | |||
| 24 | #define GK20A_CLKS_MAX 4 | ||
| 25 | |||
| 26 | struct gk20a; | ||
| 27 | struct channel_gk20a; | ||
| 28 | struct gr_ctx_buffer_desc; | ||
| 29 | struct gk20a_scale_profile; | ||
| 30 | |||
| 31 | struct secure_page_buffer { | ||
| 32 | void (*destroy)(struct gk20a *, struct secure_page_buffer *); | ||
| 33 | size_t size; | ||
| 34 | dma_addr_t phys; | ||
| 35 | size_t used; | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct gk20a_platform { | ||
| 39 | /* Populated by the gk20a driver before probing the platform. */ | ||
| 40 | struct gk20a *g; | ||
| 41 | |||
| 42 | /* Should be populated at probe. */ | ||
| 43 | bool can_railgate_init; | ||
| 44 | |||
| 45 | /* Should be populated at probe. */ | ||
| 46 | bool can_tpc_powergate; | ||
| 47 | |||
| 48 | /* Should be populated at probe. */ | ||
| 49 | bool can_elpg_init; | ||
| 50 | |||
| 51 | /* Should be populated at probe. */ | ||
| 52 | bool has_syncpoints; | ||
| 53 | |||
| 54 | /* channel limit after which to start aggressive sync destroy */ | ||
| 55 | unsigned int aggressive_sync_destroy_thresh; | ||
| 56 | |||
| 57 | /* flag to set sync destroy aggressiveness */ | ||
| 58 | bool aggressive_sync_destroy; | ||
| 59 | |||
| 60 | /* set if ASPM should be disabled on boot; only makes sense for PCI */ | ||
| 61 | bool disable_aspm; | ||
| 62 | |||
| 63 | /* Set if the platform can unify the small/large address spaces. */ | ||
| 64 | bool unify_address_spaces; | ||
| 65 | |||
| 66 | /* Clock configuration is stored here. Platform probe is responsible | ||
| 67 | * for filling this data. */ | ||
| 68 | struct clk *clk[GK20A_CLKS_MAX]; | ||
| 69 | int num_clks; | ||
| 70 | int maxmin_clk_id; | ||
| 71 | |||
| 72 | #ifdef CONFIG_RESET_CONTROLLER | ||
| 73 | /* Reset control for device */ | ||
| 74 | struct reset_control *reset_control; | ||
| 75 | #endif | ||
| 76 | /* valid TPC-MASK */ | ||
| 77 | u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS]; | ||
| 78 | |||
| 79 | /* Delay before rail gated */ | ||
| 80 | int railgate_delay_init; | ||
| 81 | |||
| 82 | /* init value for slowdown factor */ | ||
| 83 | u8 ldiv_slowdown_factor_init; | ||
| 84 | |||
| 85 | /* Second Level Clock Gating: true = enable false = disable */ | ||
| 86 | bool enable_slcg; | ||
| 87 | |||
| 88 | /* Block Level Clock Gating: true = enable flase = disable */ | ||
| 89 | bool enable_blcg; | ||
| 90 | |||
| 91 | /* Engine Level Clock Gating: true = enable flase = disable */ | ||
| 92 | bool enable_elcg; | ||
| 93 | |||
| 94 | /* Should be populated at probe. */ | ||
| 95 | bool can_slcg; | ||
| 96 | |||
| 97 | /* Should be populated at probe. */ | ||
| 98 | bool can_blcg; | ||
| 99 | |||
| 100 | /* Should be populated at probe. */ | ||
| 101 | bool can_elcg; | ||
| 102 | |||
| 103 | /* Engine Level Power Gating: true = enable flase = disable */ | ||
| 104 | bool enable_elpg; | ||
| 105 | |||
| 106 | /* Adaptative ELPG: true = enable flase = disable */ | ||
| 107 | bool enable_aelpg; | ||
| 108 | |||
| 109 | /* PMU Perfmon: true = enable false = disable */ | ||
| 110 | bool enable_perfmon; | ||
| 111 | |||
| 112 | /* Memory System Clock Gating: true = enable flase = disable*/ | ||
| 113 | bool enable_mscg; | ||
| 114 | |||
| 115 | /* Timeout for per-channel watchdog (in mS) */ | ||
| 116 | u32 ch_wdt_timeout_ms; | ||
| 117 | |||
| 118 | /* Disable big page support */ | ||
| 119 | bool disable_bigpage; | ||
| 120 | |||
| 121 | /* | ||
| 122 | * gk20a_do_idle() API can take GPU either into rail gate or CAR reset | ||
| 123 | * This flag can be used to force CAR reset case instead of rail gate | ||
| 124 | */ | ||
| 125 | bool force_reset_in_do_idle; | ||
| 126 | |||
| 127 | /* guest/vm id, needed for IPA to PA transation */ | ||
| 128 | int vmid; | ||
| 129 | |||
| 130 | /* Initialize the platform interface of the gk20a driver. | ||
| 131 | * | ||
| 132 | * The platform implementation of this function must | ||
| 133 | * - set the power and clocks of the gk20a device to a known | ||
| 134 | * state, and | ||
| 135 | * - populate the gk20a_platform structure (a pointer to the | ||
| 136 | * structure can be obtained by calling gk20a_get_platform). | ||
| 137 | * | ||
| 138 | * After this function is finished, the driver will initialise | ||
| 139 | * pm runtime and genpd based on the platform configuration. | ||
| 140 | */ | ||
| 141 | int (*probe)(struct device *dev); | ||
| 142 | |||
| 143 | /* Second stage initialisation - called once all power management | ||
| 144 | * initialisations are done. | ||
| 145 | */ | ||
| 146 | int (*late_probe)(struct device *dev); | ||
| 147 | |||
| 148 | /* Remove device after power management has been done | ||
| 149 | */ | ||
| 150 | int (*remove)(struct device *dev); | ||
| 151 | |||
| 152 | /* Poweron platform dependencies */ | ||
| 153 | int (*busy)(struct device *dev); | ||
| 154 | |||
| 155 | /* Powerdown platform dependencies */ | ||
| 156 | void (*idle)(struct device *dev); | ||
| 157 | |||
| 158 | /* Preallocated VPR buffer for kernel */ | ||
| 159 | size_t secure_buffer_size; | ||
| 160 | struct secure_page_buffer secure_buffer; | ||
| 161 | |||
| 162 | /* Device is going to be suspended */ | ||
| 163 | int (*suspend)(struct device *); | ||
| 164 | |||
| 165 | /* Device is going to be resumed */ | ||
| 166 | int (*resume)(struct device *); | ||
| 167 | |||
| 168 | /* Called to turn off the device */ | ||
| 169 | int (*railgate)(struct device *dev); | ||
| 170 | |||
| 171 | /* Called to turn on the device */ | ||
| 172 | int (*unrailgate)(struct device *dev); | ||
| 173 | struct nvgpu_mutex railgate_lock; | ||
| 174 | |||
| 175 | /* Called to check state of device */ | ||
| 176 | bool (*is_railgated)(struct device *dev); | ||
| 177 | |||
| 178 | /* get supported frequency list */ | ||
| 179 | int (*get_clk_freqs)(struct device *pdev, | ||
| 180 | unsigned long **freqs, int *num_freqs); | ||
| 181 | |||
| 182 | /* clk related supported functions */ | ||
| 183 | long (*clk_round_rate)(struct device *dev, | ||
| 184 | unsigned long rate); | ||
| 185 | |||
| 186 | /* Called to register GPCPLL with common clk framework */ | ||
| 187 | int (*clk_register)(struct gk20a *g); | ||
| 188 | |||
| 189 | /* platform specific scale init quirks */ | ||
| 190 | void (*initscale)(struct device *dev); | ||
| 191 | |||
| 192 | /* Postscale callback is called after frequency change */ | ||
| 193 | void (*postscale)(struct device *dev, | ||
| 194 | unsigned long freq); | ||
| 195 | |||
| 196 | /* Pre callback is called before frequency change */ | ||
| 197 | void (*prescale)(struct device *dev); | ||
| 198 | |||
| 199 | /* Set TPC_PG_MASK during probe */ | ||
| 200 | void (*set_tpc_pg_mask)(struct device *dev, u32 tpc_pg_mask); | ||
| 201 | |||
| 202 | /* Devfreq governor name. If scaling is enabled, we request | ||
| 203 | * this governor to be used in scaling */ | ||
| 204 | const char *devfreq_governor; | ||
| 205 | |||
| 206 | /* Quality of service notifier callback. If this is set, the scaling | ||
| 207 | * routines will register a callback to Qos. Each time we receive | ||
| 208 | * a new value, this callback gets called. */ | ||
| 209 | int (*qos_notify)(struct notifier_block *nb, | ||
| 210 | unsigned long n, void *p); | ||
| 211 | |||
| 212 | /* Called as part of debug dump. If the gpu gets hung, this function | ||
| 213 | * is responsible for delivering all necessary debug data of other | ||
| 214 | * hw units which may interact with the gpu without direct supervision | ||
| 215 | * of the CPU. | ||
| 216 | */ | ||
| 217 | void (*dump_platform_dependencies)(struct device *dev); | ||
| 218 | |||
| 219 | /* Defined when SMMU stage-2 is enabled, and we need to use physical | ||
| 220 | * addresses (not IPA). This is the case for GV100 nvlink in HV+L | ||
| 221 | * configuration, when dGPU is in pass-through mode. | ||
| 222 | */ | ||
| 223 | u64 (*phys_addr)(struct gk20a *g, u64 ipa); | ||
| 224 | |||
| 225 | /* Callbacks to assert/deassert GPU reset */ | ||
| 226 | int (*reset_assert)(struct device *dev); | ||
| 227 | int (*reset_deassert)(struct device *dev); | ||
| 228 | struct clk *clk_reset; | ||
| 229 | struct dvfs_rail *gpu_rail; | ||
| 230 | |||
| 231 | bool virtual_dev; | ||
| 232 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
| 233 | void *vgpu_priv; | ||
| 234 | #endif | ||
| 235 | /* source frequency for ptimer in hz */ | ||
| 236 | u32 ptimer_src_freq; | ||
| 237 | |||
| 238 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 239 | bool has_cde; | ||
| 240 | #endif | ||
| 241 | |||
| 242 | /* soc name for finding firmware files */ | ||
| 243 | const char *soc_name; | ||
| 244 | |||
| 245 | /* false if vidmem aperture actually points to sysmem */ | ||
| 246 | bool honors_aperture; | ||
| 247 | /* unified or split memory with separate vidmem? */ | ||
| 248 | bool unified_memory; | ||
| 249 | /* WAR for gm20b chips. */ | ||
| 250 | bool force_128K_pmu_vm; | ||
| 251 | |||
| 252 | /* | ||
| 253 | * DMA mask for Linux (both coh and non-coh). If not set defaults to | ||
| 254 | * 0x3ffffffff (i.e a 34 bit mask). | ||
| 255 | */ | ||
| 256 | u64 dma_mask; | ||
| 257 | |||
| 258 | /* minimum supported VBIOS version */ | ||
| 259 | u32 vbios_min_version; | ||
| 260 | |||
| 261 | /* true if we run preos microcode on this board */ | ||
| 262 | bool run_preos; | ||
| 263 | |||
| 264 | /* true if we need to program sw threshold for | ||
| 265 | * power limits | ||
| 266 | */ | ||
| 267 | bool hardcode_sw_threshold; | ||
| 268 | |||
| 269 | /* i2c device index, port and address for INA3221 */ | ||
| 270 | u32 ina3221_dcb_index; | ||
| 271 | u32 ina3221_i2c_address; | ||
| 272 | u32 ina3221_i2c_port; | ||
| 273 | |||
| 274 | /* stream id to use */ | ||
| 275 | u32 ltc_streamid; | ||
| 276 | |||
| 277 | /* synchronized access to platform->clk_get_freqs */ | ||
| 278 | struct nvgpu_mutex clk_get_freq_lock; | ||
| 279 | }; | ||
| 280 | |||
| 281 | static inline struct gk20a_platform *gk20a_get_platform( | ||
| 282 | struct device *dev) | ||
| 283 | { | ||
| 284 | return (struct gk20a_platform *)dev_get_drvdata(dev); | ||
| 285 | } | ||
| 286 | |||
| 287 | #ifdef CONFIG_TEGRA_GK20A | ||
| 288 | extern struct gk20a_platform gm20b_tegra_platform; | ||
| 289 | extern struct gk20a_platform gp10b_tegra_platform; | ||
| 290 | extern struct gk20a_platform gv11b_tegra_platform; | ||
| 291 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
| 292 | extern struct gk20a_platform vgpu_tegra_platform; | ||
| 293 | extern struct gk20a_platform gv11b_vgpu_tegra_platform; | ||
| 294 | #endif | ||
| 295 | #endif | ||
| 296 | |||
| 297 | int gk20a_tegra_busy(struct device *dev); | ||
| 298 | void gk20a_tegra_idle(struct device *dev); | ||
| 299 | void gk20a_tegra_debug_dump(struct device *pdev); | ||
| 300 | |||
| 301 | static inline struct gk20a *get_gk20a(struct device *dev) | ||
| 302 | { | ||
| 303 | return gk20a_get_platform(dev)->g; | ||
| 304 | } | ||
| 305 | static inline struct gk20a *gk20a_from_dev(struct device *dev) | ||
| 306 | { | ||
| 307 | if (!dev) | ||
| 308 | return NULL; | ||
| 309 | |||
| 310 | return ((struct gk20a_platform *)dev_get_drvdata(dev))->g; | ||
| 311 | } | ||
| 312 | static inline bool gk20a_gpu_is_virtual(struct device *dev) | ||
| 313 | { | ||
| 314 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 315 | |||
| 316 | return platform->virtual_dev; | ||
| 317 | } | ||
| 318 | |||
| 319 | static inline int support_gk20a_pmu(struct device *dev) | ||
| 320 | { | ||
| 321 | if (IS_ENABLED(CONFIG_GK20A_PMU)) { | ||
| 322 | /* gPMU is not supported for vgpu */ | ||
| 323 | return !gk20a_gpu_is_virtual(dev); | ||
| 324 | } | ||
| 325 | |||
| 326 | return 0; | ||
| 327 | } | ||
| 328 | |||
| 329 | #endif | ||
diff --git a/include/os/linux/platform_gk20a_tegra.c b/include/os/linux/platform_gk20a_tegra.c new file mode 100644 index 0000000..c39e4f0 --- /dev/null +++ b/include/os/linux/platform_gk20a_tegra.c | |||
| @@ -0,0 +1,966 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Tegra Platform Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <linux/clkdev.h> | ||
| 17 | #include <linux/of_platform.h> | ||
| 18 | #include <linux/debugfs.h> | ||
| 19 | #include <linux/platform_data/tegra_edp.h> | ||
| 20 | #include <linux/delay.h> | ||
| 21 | #include <uapi/linux/nvgpu.h> | ||
| 22 | #include <linux/dma-buf.h> | ||
| 23 | #include <linux/dma-attrs.h> | ||
| 24 | #include <linux/nvmap.h> | ||
| 25 | #include <linux/reset.h> | ||
| 26 | #if defined(CONFIG_TEGRA_DVFS) | ||
| 27 | #include <linux/tegra_soctherm.h> | ||
| 28 | #endif | ||
| 29 | #include <linux/platform/tegra/common.h> | ||
| 30 | #include <linux/platform/tegra/mc.h> | ||
| 31 | #include <linux/clk/tegra.h> | ||
| 32 | #if defined(CONFIG_COMMON_CLK) | ||
| 33 | #include <soc/tegra/tegra-dvfs.h> | ||
| 34 | #endif | ||
| 35 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 36 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
| 37 | #endif | ||
| 38 | |||
| 39 | #include <linux/platform/tegra/tegra_emc.h> | ||
| 40 | #include <soc/tegra/chip-id.h> | ||
| 41 | |||
| 42 | #include <nvgpu/kmem.h> | ||
| 43 | #include <nvgpu/bug.h> | ||
| 44 | #include <nvgpu/enabled.h> | ||
| 45 | #include <nvgpu/gk20a.h> | ||
| 46 | #include <nvgpu/nvhost.h> | ||
| 47 | |||
| 48 | #include <nvgpu/linux/dma.h> | ||
| 49 | |||
| 50 | #include "gm20b/clk_gm20b.h" | ||
| 51 | |||
| 52 | #include "scale.h" | ||
| 53 | #include "platform_gk20a.h" | ||
| 54 | #include "clk.h" | ||
| 55 | #include "os_linux.h" | ||
| 56 | |||
| 57 | #include "../../../arch/arm/mach-tegra/iomap.h" | ||
| 58 | #include <soc/tegra/pmc.h> | ||
| 59 | |||
| 60 | #define TEGRA_GK20A_BW_PER_FREQ 32 | ||
| 61 | #define TEGRA_GM20B_BW_PER_FREQ 64 | ||
| 62 | #define TEGRA_DDR3_BW_PER_FREQ 16 | ||
| 63 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
| 64 | #define MC_CLIENT_GPU 34 | ||
| 65 | #define PMC_GPU_RG_CNTRL_0 0x2d4 | ||
| 66 | |||
| 67 | #ifdef CONFIG_COMMON_CLK | ||
| 68 | #define GPU_RAIL_NAME "vdd-gpu" | ||
| 69 | #else | ||
| 70 | #define GPU_RAIL_NAME "vdd_gpu" | ||
| 71 | #endif | ||
| 72 | |||
| 73 | extern struct device tegra_vpr_dev; | ||
| 74 | |||
| 75 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 76 | struct gk20a_emc_params { | ||
| 77 | unsigned long bw_ratio; | ||
| 78 | unsigned long freq_last_set; | ||
| 79 | struct tegra_bwmgr_client *bwmgr_cl; | ||
| 80 | }; | ||
| 81 | #else | ||
| 82 | struct gk20a_emc_params { | ||
| 83 | unsigned long bw_ratio; | ||
| 84 | unsigned long freq_last_set; | ||
| 85 | }; | ||
| 86 | #endif | ||
| 87 | |||
| 88 | #define MHZ_TO_HZ(x) ((x) * 1000000) | ||
| 89 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
| 90 | |||
| 91 | static void gk20a_tegra_secure_page_destroy(struct gk20a *g, | ||
| 92 | struct secure_page_buffer *secure_buffer) | ||
| 93 | { | ||
| 94 | DEFINE_DMA_ATTRS(attrs); | ||
| 95 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); | ||
| 96 | dma_free_attrs(&tegra_vpr_dev, secure_buffer->size, | ||
| 97 | (void *)(uintptr_t)secure_buffer->phys, | ||
| 98 | secure_buffer->phys, __DMA_ATTR(attrs)); | ||
| 99 | |||
| 100 | secure_buffer->destroy = NULL; | ||
| 101 | } | ||
| 102 | |||
| 103 | static int gk20a_tegra_secure_alloc(struct gk20a *g, | ||
| 104 | struct gr_ctx_buffer_desc *desc, | ||
| 105 | size_t size) | ||
| 106 | { | ||
| 107 | struct device *dev = dev_from_gk20a(g); | ||
| 108 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 109 | struct secure_page_buffer *secure_buffer = &platform->secure_buffer; | ||
| 110 | dma_addr_t phys; | ||
| 111 | struct sg_table *sgt; | ||
| 112 | struct page *page; | ||
| 113 | int err = 0; | ||
| 114 | size_t aligned_size = PAGE_ALIGN(size); | ||
| 115 | |||
| 116 | if (nvgpu_mem_is_valid(&desc->mem)) | ||
| 117 | return 0; | ||
| 118 | |||
| 119 | /* We ran out of preallocated memory */ | ||
| 120 | if (secure_buffer->used + aligned_size > secure_buffer->size) { | ||
| 121 | nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used", | ||
| 122 | size, secure_buffer->used, secure_buffer->size); | ||
| 123 | return -ENOMEM; | ||
| 124 | } | ||
| 125 | |||
| 126 | phys = secure_buffer->phys + secure_buffer->used; | ||
| 127 | |||
| 128 | sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt)); | ||
| 129 | if (!sgt) { | ||
| 130 | nvgpu_err(platform->g, "failed to allocate memory"); | ||
| 131 | return -ENOMEM; | ||
| 132 | } | ||
| 133 | err = sg_alloc_table(sgt, 1, GFP_KERNEL); | ||
| 134 | if (err) { | ||
| 135 | nvgpu_err(platform->g, "failed to allocate sg_table"); | ||
| 136 | goto fail_sgt; | ||
| 137 | } | ||
| 138 | page = phys_to_page(phys); | ||
| 139 | sg_set_page(sgt->sgl, page, size, 0); | ||
| 140 | /* This bypasses SMMU for VPR during gmmu_map. */ | ||
| 141 | sg_dma_address(sgt->sgl) = 0; | ||
| 142 | |||
| 143 | desc->destroy = NULL; | ||
| 144 | |||
| 145 | desc->mem.priv.sgt = sgt; | ||
| 146 | desc->mem.size = size; | ||
| 147 | desc->mem.aperture = APERTURE_SYSMEM; | ||
| 148 | |||
| 149 | secure_buffer->used += aligned_size; | ||
| 150 | |||
| 151 | return err; | ||
| 152 | |||
| 153 | fail_sgt: | ||
| 154 | nvgpu_kfree(platform->g, sgt); | ||
| 155 | return err; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * gk20a_tegra_get_emc_rate() | ||
| 160 | * | ||
| 161 | * This function returns the minimum emc clock based on gpu frequency | ||
| 162 | */ | ||
| 163 | |||
| 164 | static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, | ||
| 165 | struct gk20a_emc_params *emc_params) | ||
| 166 | { | ||
| 167 | unsigned long gpu_freq, gpu_fmax_at_vmin; | ||
| 168 | unsigned long emc_rate, emc_scale; | ||
| 169 | |||
| 170 | gpu_freq = clk_get_rate(g->clk.tegra_clk); | ||
| 171 | gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t( | ||
| 172 | clk_get_parent(g->clk.tegra_clk)); | ||
| 173 | |||
| 174 | /* When scaling emc, account for the gpu load when the | ||
| 175 | * gpu frequency is less than or equal to fmax@vmin. */ | ||
| 176 | if (gpu_freq <= gpu_fmax_at_vmin) | ||
| 177 | emc_scale = min(g->pmu.load_avg, g->emc3d_ratio); | ||
| 178 | else | ||
| 179 | emc_scale = g->emc3d_ratio; | ||
| 180 | |||
| 181 | emc_rate = | ||
| 182 | (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000; | ||
| 183 | |||
| 184 | return MHZ_TO_HZ(emc_rate); | ||
| 185 | } | ||
| 186 | |||
| 187 | /* | ||
| 188 | * gk20a_tegra_prescale(profile, freq) | ||
| 189 | * | ||
| 190 | * This function informs EDP about changed constraints. | ||
| 191 | */ | ||
| 192 | |||
| 193 | static void gk20a_tegra_prescale(struct device *dev) | ||
| 194 | { | ||
| 195 | struct gk20a *g = get_gk20a(dev); | ||
| 196 | u32 avg = 0; | ||
| 197 | |||
| 198 | nvgpu_pmu_load_norm(g, &avg); | ||
| 199 | tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * gk20a_tegra_calibrate_emc() | ||
| 204 | * | ||
| 205 | */ | ||
| 206 | |||
| 207 | static void gk20a_tegra_calibrate_emc(struct device *dev, | ||
| 208 | struct gk20a_emc_params *emc_params) | ||
| 209 | { | ||
| 210 | enum tegra_chipid cid = tegra_get_chip_id(); | ||
| 211 | long gpu_bw, emc_bw; | ||
| 212 | |||
| 213 | /* store gpu bw based on soc */ | ||
| 214 | switch (cid) { | ||
| 215 | case TEGRA210: | ||
| 216 | gpu_bw = TEGRA_GM20B_BW_PER_FREQ; | ||
| 217 | break; | ||
| 218 | case TEGRA124: | ||
| 219 | case TEGRA132: | ||
| 220 | gpu_bw = TEGRA_GK20A_BW_PER_FREQ; | ||
| 221 | break; | ||
| 222 | default: | ||
| 223 | gpu_bw = 0; | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* TODO detect DDR type. | ||
| 228 | * Okay for now since DDR3 and DDR4 have the same BW ratio */ | ||
| 229 | emc_bw = TEGRA_DDR3_BW_PER_FREQ; | ||
| 230 | |||
| 231 | /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq | ||
| 232 | * NOTE the ratio must come out as an integer */ | ||
| 233 | emc_params->bw_ratio = (gpu_bw / emc_bw); | ||
| 234 | } | ||
| 235 | |||
| 236 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 237 | #ifdef CONFIG_TEGRA_DVFS | ||
| 238 | static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb) | ||
| 239 | { | ||
| 240 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 241 | struct gk20a_emc_params *params; | ||
| 242 | unsigned long rate; | ||
| 243 | |||
| 244 | if (!profile || !profile->private_data) | ||
| 245 | return; | ||
| 246 | |||
| 247 | params = (struct gk20a_emc_params *)profile->private_data; | ||
| 248 | rate = (enb) ? params->freq_last_set : 0; | ||
| 249 | tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 250 | } | ||
| 251 | #endif | ||
| 252 | |||
| 253 | static void gm20b_tegra_postscale(struct device *dev, unsigned long freq) | ||
| 254 | { | ||
| 255 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 256 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 257 | struct gk20a_emc_params *emc_params; | ||
| 258 | unsigned long emc_rate; | ||
| 259 | |||
| 260 | if (!profile || !profile->private_data) | ||
| 261 | return; | ||
| 262 | |||
| 263 | emc_params = profile->private_data; | ||
| 264 | emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params); | ||
| 265 | |||
| 266 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
| 267 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
| 268 | |||
| 269 | emc_params->freq_last_set = emc_rate; | ||
| 270 | if (platform->is_railgated && platform->is_railgated(dev)) | ||
| 271 | return; | ||
| 272 | |||
| 273 | tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate, | ||
| 274 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 275 | |||
| 276 | } | ||
| 277 | |||
| 278 | #endif | ||
| 279 | |||
| 280 | #if defined(CONFIG_TEGRA_DVFS) | ||
| 281 | /* | ||
| 282 | * gk20a_tegra_is_railgated() | ||
| 283 | * | ||
| 284 | * Check status of gk20a power rail | ||
| 285 | */ | ||
| 286 | |||
| 287 | static bool gk20a_tegra_is_railgated(struct device *dev) | ||
| 288 | { | ||
| 289 | struct gk20a *g = get_gk20a(dev); | ||
| 290 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 291 | bool ret = false; | ||
| 292 | |||
| 293 | if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
| 294 | ret = !tegra_dvfs_is_rail_up(platform->gpu_rail); | ||
| 295 | |||
| 296 | return ret; | ||
| 297 | } | ||
| 298 | |||
| 299 | /* | ||
| 300 | * gm20b_tegra_railgate() | ||
| 301 | * | ||
| 302 | * Gate (disable) gm20b power rail | ||
| 303 | */ | ||
| 304 | |||
| 305 | static int gm20b_tegra_railgate(struct device *dev) | ||
| 306 | { | ||
| 307 | struct gk20a *g = get_gk20a(dev); | ||
| 308 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 309 | int ret = 0; | ||
| 310 | |||
| 311 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) || | ||
| 312 | !tegra_dvfs_is_rail_up(platform->gpu_rail)) | ||
| 313 | return 0; | ||
| 314 | |||
| 315 | tegra_mc_flush(MC_CLIENT_GPU); | ||
| 316 | |||
| 317 | udelay(10); | ||
| 318 | |||
| 319 | /* enable clamp */ | ||
| 320 | tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0); | ||
| 321 | tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); | ||
| 322 | |||
| 323 | udelay(10); | ||
| 324 | |||
| 325 | platform->reset_assert(dev); | ||
| 326 | |||
| 327 | udelay(10); | ||
| 328 | |||
| 329 | /* | ||
| 330 | * GPCPLL is already disabled before entering this function; reference | ||
| 331 | * clocks are enabled until now - disable them just before rail gating | ||
| 332 | */ | ||
| 333 | clk_disable_unprepare(platform->clk_reset); | ||
| 334 | clk_disable_unprepare(platform->clk[0]); | ||
| 335 | clk_disable_unprepare(platform->clk[1]); | ||
| 336 | if (platform->clk[3]) | ||
| 337 | clk_disable_unprepare(platform->clk[3]); | ||
| 338 | |||
| 339 | udelay(10); | ||
| 340 | |||
| 341 | tegra_soctherm_gpu_tsens_invalidate(1); | ||
| 342 | |||
| 343 | if (tegra_dvfs_is_rail_up(platform->gpu_rail)) { | ||
| 344 | ret = tegra_dvfs_rail_power_down(platform->gpu_rail); | ||
| 345 | if (ret) | ||
| 346 | goto err_power_off; | ||
| 347 | } else | ||
| 348 | pr_info("No GPU regulator?\n"); | ||
| 349 | |||
| 350 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 351 | gm20b_bwmgr_set_rate(platform, false); | ||
| 352 | #endif | ||
| 353 | |||
| 354 | return 0; | ||
| 355 | |||
| 356 | err_power_off: | ||
| 357 | nvgpu_err(platform->g, "Could not railgate GPU"); | ||
| 358 | return ret; | ||
| 359 | } | ||
| 360 | |||
| 361 | |||
| 362 | /* | ||
| 363 | * gm20b_tegra_unrailgate() | ||
| 364 | * | ||
| 365 | * Ungate (enable) gm20b power rail | ||
| 366 | */ | ||
| 367 | |||
| 368 | static int gm20b_tegra_unrailgate(struct device *dev) | ||
| 369 | { | ||
| 370 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 371 | struct gk20a *g = platform->g; | ||
| 372 | int ret = 0; | ||
| 373 | bool first = false; | ||
| 374 | |||
| 375 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
| 376 | return 0; | ||
| 377 | |||
| 378 | ret = tegra_dvfs_rail_power_up(platform->gpu_rail); | ||
| 379 | if (ret) | ||
| 380 | return ret; | ||
| 381 | |||
| 382 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 383 | gm20b_bwmgr_set_rate(platform, true); | ||
| 384 | #endif | ||
| 385 | |||
| 386 | tegra_soctherm_gpu_tsens_invalidate(0); | ||
| 387 | |||
| 388 | if (!platform->clk_reset) { | ||
| 389 | platform->clk_reset = clk_get(dev, "gpu_gate"); | ||
| 390 | if (IS_ERR(platform->clk_reset)) { | ||
| 391 | nvgpu_err(g, "fail to get gpu reset clk"); | ||
| 392 | goto err_clk_on; | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | if (!first) { | ||
| 397 | ret = clk_prepare_enable(platform->clk_reset); | ||
| 398 | if (ret) { | ||
| 399 | nvgpu_err(g, "could not turn on gpu_gate"); | ||
| 400 | goto err_clk_on; | ||
| 401 | } | ||
| 402 | |||
| 403 | ret = clk_prepare_enable(platform->clk[0]); | ||
| 404 | if (ret) { | ||
| 405 | nvgpu_err(g, "could not turn on gpu pll"); | ||
| 406 | goto err_clk_on; | ||
| 407 | } | ||
| 408 | ret = clk_prepare_enable(platform->clk[1]); | ||
| 409 | if (ret) { | ||
| 410 | nvgpu_err(g, "could not turn on pwr clock"); | ||
| 411 | goto err_clk_on; | ||
| 412 | } | ||
| 413 | |||
| 414 | if (platform->clk[3]) { | ||
| 415 | ret = clk_prepare_enable(platform->clk[3]); | ||
| 416 | if (ret) { | ||
| 417 | nvgpu_err(g, "could not turn on fuse clock"); | ||
| 418 | goto err_clk_on; | ||
| 419 | } | ||
| 420 | } | ||
| 421 | } | ||
| 422 | |||
| 423 | udelay(10); | ||
| 424 | |||
| 425 | platform->reset_assert(dev); | ||
| 426 | |||
| 427 | udelay(10); | ||
| 428 | |||
| 429 | tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0); | ||
| 430 | tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); | ||
| 431 | |||
| 432 | udelay(10); | ||
| 433 | |||
| 434 | clk_disable(platform->clk_reset); | ||
| 435 | platform->reset_deassert(dev); | ||
| 436 | clk_enable(platform->clk_reset); | ||
| 437 | |||
| 438 | /* Flush MC after boot/railgate/SC7 */ | ||
| 439 | tegra_mc_flush(MC_CLIENT_GPU); | ||
| 440 | |||
| 441 | udelay(10); | ||
| 442 | |||
| 443 | tegra_mc_flush_done(MC_CLIENT_GPU); | ||
| 444 | |||
| 445 | udelay(10); | ||
| 446 | |||
| 447 | return 0; | ||
| 448 | |||
| 449 | err_clk_on: | ||
| 450 | tegra_dvfs_rail_power_down(platform->gpu_rail); | ||
| 451 | |||
| 452 | return ret; | ||
| 453 | } | ||
| 454 | #endif | ||
| 455 | |||
| 456 | |||
| 457 | static struct { | ||
| 458 | char *name; | ||
| 459 | unsigned long default_rate; | ||
| 460 | } tegra_gk20a_clocks[] = { | ||
| 461 | {"gpu_ref", UINT_MAX}, | ||
| 462 | {"pll_p_out5", 204000000}, | ||
| 463 | {"emc", UINT_MAX}, | ||
| 464 | {"fuse", UINT_MAX}, | ||
| 465 | }; | ||
| 466 | |||
| 467 | |||
| 468 | |||
| 469 | /* | ||
| 470 | * gk20a_tegra_get_clocks() | ||
| 471 | * | ||
| 472 | * This function finds clocks in tegra platform and populates | ||
| 473 | * the clock information to gk20a platform data. | ||
| 474 | */ | ||
| 475 | |||
| 476 | static int gk20a_tegra_get_clocks(struct device *dev) | ||
| 477 | { | ||
| 478 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 479 | char devname[16]; | ||
| 480 | unsigned int i; | ||
| 481 | int ret = 0; | ||
| 482 | |||
| 483 | BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks)); | ||
| 484 | |||
| 485 | snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev)); | ||
| 486 | |||
| 487 | platform->num_clks = 0; | ||
| 488 | for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { | ||
| 489 | long rate = tegra_gk20a_clocks[i].default_rate; | ||
| 490 | struct clk *c; | ||
| 491 | |||
| 492 | c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); | ||
| 493 | if (IS_ERR(c)) { | ||
| 494 | ret = PTR_ERR(c); | ||
| 495 | goto err_get_clock; | ||
| 496 | } | ||
| 497 | rate = clk_round_rate(c, rate); | ||
| 498 | clk_set_rate(c, rate); | ||
| 499 | platform->clk[i] = c; | ||
| 500 | } | ||
| 501 | platform->num_clks = i; | ||
| 502 | |||
| 503 | return 0; | ||
| 504 | |||
| 505 | err_get_clock: | ||
| 506 | |||
| 507 | while (i--) | ||
| 508 | clk_put(platform->clk[i]); | ||
| 509 | return ret; | ||
| 510 | } | ||
| 511 | |||
| 512 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | ||
| 513 | static int gm20b_tegra_reset_assert(struct device *dev) | ||
| 514 | { | ||
| 515 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 516 | |||
| 517 | if (!platform->reset_control) { | ||
| 518 | WARN(1, "Reset control not initialized\n"); | ||
| 519 | return -ENOSYS; | ||
| 520 | } | ||
| 521 | |||
| 522 | return reset_control_assert(platform->reset_control); | ||
| 523 | } | ||
| 524 | |||
| 525 | static int gm20b_tegra_reset_deassert(struct device *dev) | ||
| 526 | { | ||
| 527 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 528 | |||
| 529 | if (!platform->reset_control) { | ||
| 530 | WARN(1, "Reset control not initialized\n"); | ||
| 531 | return -ENOSYS; | ||
| 532 | } | ||
| 533 | |||
| 534 | return reset_control_deassert(platform->reset_control); | ||
| 535 | } | ||
| 536 | #endif | ||
| 537 | |||
| 538 | static void gk20a_tegra_scale_init(struct device *dev) | ||
| 539 | { | ||
| 540 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 541 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 542 | struct gk20a_emc_params *emc_params; | ||
| 543 | struct gk20a *g = platform->g; | ||
| 544 | |||
| 545 | if (!profile) | ||
| 546 | return; | ||
| 547 | |||
| 548 | if (profile->private_data) | ||
| 549 | return; | ||
| 550 | |||
| 551 | emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params)); | ||
| 552 | if (!emc_params) | ||
| 553 | return; | ||
| 554 | |||
| 555 | emc_params->freq_last_set = -1; | ||
| 556 | gk20a_tegra_calibrate_emc(dev, emc_params); | ||
| 557 | |||
| 558 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 559 | emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
| 560 | if (!emc_params->bwmgr_cl) { | ||
| 561 | nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__); | ||
| 562 | return; | ||
| 563 | } | ||
| 564 | #endif | ||
| 565 | |||
| 566 | profile->private_data = emc_params; | ||
| 567 | } | ||
| 568 | |||
| 569 | static void gk20a_tegra_scale_exit(struct device *dev) | ||
| 570 | { | ||
| 571 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 572 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 573 | struct gk20a_emc_params *emc_params; | ||
| 574 | |||
| 575 | if (!profile) | ||
| 576 | return; | ||
| 577 | |||
| 578 | emc_params = profile->private_data; | ||
| 579 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 580 | tegra_bwmgr_unregister(emc_params->bwmgr_cl); | ||
| 581 | #endif | ||
| 582 | |||
| 583 | nvgpu_kfree(platform->g, profile->private_data); | ||
| 584 | } | ||
| 585 | |||
| 586 | void gk20a_tegra_debug_dump(struct device *dev) | ||
| 587 | { | ||
| 588 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 589 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 590 | struct gk20a *g = platform->g; | ||
| 591 | |||
| 592 | if (g->nvhost_dev) | ||
| 593 | nvgpu_nvhost_debug_dump_device(g->nvhost_dev); | ||
| 594 | #endif | ||
| 595 | } | ||
| 596 | |||
| 597 | int gk20a_tegra_busy(struct device *dev) | ||
| 598 | { | ||
| 599 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 600 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 601 | struct gk20a *g = platform->g; | ||
| 602 | |||
| 603 | if (g->nvhost_dev) | ||
| 604 | return nvgpu_nvhost_module_busy_ext(g->nvhost_dev); | ||
| 605 | #endif | ||
| 606 | return 0; | ||
| 607 | } | ||
| 608 | |||
| 609 | void gk20a_tegra_idle(struct device *dev) | ||
| 610 | { | ||
| 611 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 612 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 613 | struct gk20a *g = platform->g; | ||
| 614 | |||
| 615 | if (g->nvhost_dev) | ||
| 616 | nvgpu_nvhost_module_idle_ext(g->nvhost_dev); | ||
| 617 | #endif | ||
| 618 | } | ||
| 619 | |||
| 620 | int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform) | ||
| 621 | { | ||
| 622 | struct gk20a *g = platform->g; | ||
| 623 | struct secure_page_buffer *secure_buffer = &platform->secure_buffer; | ||
| 624 | DEFINE_DMA_ATTRS(attrs); | ||
| 625 | dma_addr_t iova; | ||
| 626 | |||
| 627 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
| 628 | return 0; | ||
| 629 | |||
| 630 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); | ||
| 631 | (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova, | ||
| 632 | GFP_KERNEL, __DMA_ATTR(attrs)); | ||
| 633 | /* Some platforms disable VPR. In that case VPR allocations always | ||
| 634 | * fail. Just disable VPR usage in nvgpu in that case. */ | ||
| 635 | if (dma_mapping_error(&tegra_vpr_dev, iova)) | ||
| 636 | return 0; | ||
| 637 | |||
| 638 | secure_buffer->size = platform->secure_buffer_size; | ||
| 639 | secure_buffer->phys = iova; | ||
| 640 | secure_buffer->destroy = gk20a_tegra_secure_page_destroy; | ||
| 641 | |||
| 642 | g->ops.secure_alloc = gk20a_tegra_secure_alloc; | ||
| 643 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true); | ||
| 644 | |||
| 645 | return 0; | ||
| 646 | } | ||
| 647 | |||
| 648 | #ifdef CONFIG_COMMON_CLK | ||
| 649 | static struct clk *gk20a_clk_get(struct gk20a *g) | ||
| 650 | { | ||
| 651 | if (!g->clk.tegra_clk) { | ||
| 652 | struct clk *clk, *clk_parent; | ||
| 653 | char clk_dev_id[32]; | ||
| 654 | struct device *dev = dev_from_gk20a(g); | ||
| 655 | |||
| 656 | snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); | ||
| 657 | |||
| 658 | clk = clk_get_sys(clk_dev_id, "gpu"); | ||
| 659 | if (IS_ERR(clk)) { | ||
| 660 | nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n", | ||
| 661 | clk_dev_id); | ||
| 662 | return NULL; | ||
| 663 | } | ||
| 664 | |||
| 665 | clk_parent = clk_get_parent(clk); | ||
| 666 | if (IS_ERR_OR_NULL(clk_parent)) { | ||
| 667 | nvgpu_err(g, "fail to get tegra gpu clk parent%s/gpu\n", | ||
| 668 | clk_dev_id); | ||
| 669 | return NULL; | ||
| 670 | } | ||
| 671 | |||
| 672 | g->clk.tegra_clk = clk; | ||
| 673 | g->clk.tegra_clk_parent = clk_parent; | ||
| 674 | } | ||
| 675 | |||
| 676 | return g->clk.tegra_clk; | ||
| 677 | } | ||
| 678 | |||
| 679 | static int gm20b_clk_prepare_ops(struct clk_hw *hw) | ||
| 680 | { | ||
| 681 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
| 682 | return gm20b_clk_prepare(clk); | ||
| 683 | } | ||
| 684 | |||
| 685 | static void gm20b_clk_unprepare_ops(struct clk_hw *hw) | ||
| 686 | { | ||
| 687 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
| 688 | gm20b_clk_unprepare(clk); | ||
| 689 | } | ||
| 690 | |||
| 691 | static int gm20b_clk_is_prepared_ops(struct clk_hw *hw) | ||
| 692 | { | ||
| 693 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
| 694 | return gm20b_clk_is_prepared(clk); | ||
| 695 | } | ||
| 696 | |||
| 697 | static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate) | ||
| 698 | { | ||
| 699 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
| 700 | return gm20b_recalc_rate(clk, parent_rate); | ||
| 701 | } | ||
| 702 | |||
| 703 | static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate, | ||
| 704 | unsigned long parent_rate) | ||
| 705 | { | ||
| 706 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
| 707 | return gm20b_gpcclk_set_rate(clk, rate, parent_rate); | ||
| 708 | } | ||
| 709 | |||
| 710 | static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate, | ||
| 711 | unsigned long *parent_rate) | ||
| 712 | { | ||
| 713 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
| 714 | return gm20b_round_rate(clk, rate, parent_rate); | ||
| 715 | } | ||
| 716 | |||
| 717 | static const struct clk_ops gm20b_clk_ops = { | ||
| 718 | .prepare = gm20b_clk_prepare_ops, | ||
| 719 | .unprepare = gm20b_clk_unprepare_ops, | ||
| 720 | .is_prepared = gm20b_clk_is_prepared_ops, | ||
| 721 | .recalc_rate = gm20b_recalc_rate_ops, | ||
| 722 | .set_rate = gm20b_gpcclk_set_rate_ops, | ||
| 723 | .round_rate = gm20b_round_rate_ops, | ||
| 724 | }; | ||
| 725 | |||
| 726 | static int gm20b_register_gpcclk(struct gk20a *g) | ||
| 727 | { | ||
| 728 | const char *parent_name = "pllg_ref"; | ||
| 729 | struct clk_gk20a *clk = &g->clk; | ||
| 730 | struct clk_init_data init; | ||
| 731 | struct clk *c; | ||
| 732 | int err = 0; | ||
| 733 | |||
| 734 | /* make sure the clock is available */ | ||
| 735 | if (!gk20a_clk_get(g)) | ||
| 736 | return -ENOSYS; | ||
| 737 | |||
| 738 | err = gm20b_init_clk_setup_sw(g); | ||
| 739 | if (err) | ||
| 740 | return err; | ||
| 741 | |||
| 742 | init.name = "gpcclk"; | ||
| 743 | init.ops = &gm20b_clk_ops; | ||
| 744 | init.parent_names = &parent_name; | ||
| 745 | init.num_parents = 1; | ||
| 746 | init.flags = 0; | ||
| 747 | |||
| 748 | /* Data in .init is copied by clk_register(), so stack variable OK */ | ||
| 749 | clk->hw.init = &init; | ||
| 750 | c = clk_register(dev_from_gk20a(g), &clk->hw); | ||
| 751 | if (IS_ERR(c)) { | ||
| 752 | nvgpu_err(g, "Failed to register GPCPLL clock"); | ||
| 753 | return -EINVAL; | ||
| 754 | } | ||
| 755 | |||
| 756 | clk->g = g; | ||
| 757 | clk_register_clkdev(c, "gpcclk", "gpcclk"); | ||
| 758 | |||
| 759 | return err; | ||
| 760 | } | ||
| 761 | #endif /* CONFIG_COMMON_CLK */ | ||
| 762 | |||
| 763 | static int gk20a_tegra_probe(struct device *dev) | ||
| 764 | { | ||
| 765 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 766 | struct device_node *np = dev->of_node; | ||
| 767 | bool joint_xpu_rail = false; | ||
| 768 | int ret; | ||
| 769 | struct gk20a *g = platform->g; | ||
| 770 | |||
| 771 | #ifdef CONFIG_COMMON_CLK | ||
| 772 | /* DVFS is not guaranteed to be initialized at the time of probe on | ||
| 773 | * kernels with Common Clock Framework enabled. | ||
| 774 | */ | ||
| 775 | if (!platform->gpu_rail) { | ||
| 776 | platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME); | ||
| 777 | if (!platform->gpu_rail) { | ||
| 778 | nvgpu_log_info(g, "deferring probe no gpu_rail"); | ||
| 779 | return -EPROBE_DEFER; | ||
| 780 | } | ||
| 781 | } | ||
| 782 | |||
| 783 | if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) { | ||
| 784 | nvgpu_log_info(g, "deferring probe gpu_rail not ready"); | ||
| 785 | return -EPROBE_DEFER; | ||
| 786 | } | ||
| 787 | #endif | ||
| 788 | |||
| 789 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 790 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
| 791 | if (ret) | ||
| 792 | return ret; | ||
| 793 | #endif | ||
| 794 | |||
| 795 | #ifdef CONFIG_OF | ||
| 796 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
| 797 | "nvidia,tegra-joint_xpu_rail"); | ||
| 798 | #endif | ||
| 799 | |||
| 800 | if (joint_xpu_rail) { | ||
| 801 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
| 802 | platform->can_railgate_init = false; | ||
| 803 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
| 804 | } | ||
| 805 | |||
| 806 | platform->g->clk.gpc_pll.id = GK20A_GPC_PLL; | ||
| 807 | if (tegra_get_chip_id() == TEGRA210) { | ||
| 808 | /* WAR for bug 1547668: Disable railgating and scaling | ||
| 809 | irrespective of platform data if the rework was not made. */ | ||
| 810 | np = of_find_node_by_path("/gpu-dvfs-rework"); | ||
| 811 | if (!(np && of_device_is_available(np))) { | ||
| 812 | platform->devfreq_governor = ""; | ||
| 813 | dev_warn(dev, "board does not support scaling"); | ||
| 814 | } | ||
| 815 | platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1; | ||
| 816 | if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p) | ||
| 817 | platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1; | ||
| 818 | } | ||
| 819 | |||
| 820 | if (tegra_get_chip_id() == TEGRA132) | ||
| 821 | platform->soc_name = "tegra13x"; | ||
| 822 | |||
| 823 | gk20a_tegra_get_clocks(dev); | ||
| 824 | nvgpu_linux_init_clk_support(platform->g); | ||
| 825 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
| 826 | if (ret) | ||
| 827 | return ret; | ||
| 828 | |||
| 829 | if (platform->clk_register) { | ||
| 830 | ret = platform->clk_register(platform->g); | ||
| 831 | if (ret) | ||
| 832 | return ret; | ||
| 833 | } | ||
| 834 | |||
| 835 | return 0; | ||
| 836 | } | ||
| 837 | |||
| 838 | static int gk20a_tegra_late_probe(struct device *dev) | ||
| 839 | { | ||
| 840 | return 0; | ||
| 841 | } | ||
| 842 | |||
| 843 | static int gk20a_tegra_remove(struct device *dev) | ||
| 844 | { | ||
| 845 | /* deinitialise tegra specific scaling quirks */ | ||
| 846 | gk20a_tegra_scale_exit(dev); | ||
| 847 | |||
| 848 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 849 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
| 850 | #endif | ||
| 851 | |||
| 852 | return 0; | ||
| 853 | } | ||
| 854 | |||
| 855 | static int gk20a_tegra_suspend(struct device *dev) | ||
| 856 | { | ||
| 857 | tegra_edp_notify_gpu_load(0, 0); | ||
| 858 | return 0; | ||
| 859 | } | ||
| 860 | |||
| 861 | #if defined(CONFIG_COMMON_CLK) | ||
| 862 | static long gk20a_round_clk_rate(struct device *dev, unsigned long rate) | ||
| 863 | { | ||
| 864 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 865 | struct gk20a *g = platform->g; | ||
| 866 | |||
| 867 | /* make sure the clock is available */ | ||
| 868 | if (!gk20a_clk_get(g)) | ||
| 869 | return rate; | ||
| 870 | |||
| 871 | return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); | ||
| 872 | } | ||
| 873 | |||
| 874 | static int gk20a_clk_get_freqs(struct device *dev, | ||
| 875 | unsigned long **freqs, int *num_freqs) | ||
| 876 | { | ||
| 877 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 878 | struct gk20a *g = platform->g; | ||
| 879 | |||
| 880 | /* make sure the clock is available */ | ||
| 881 | if (!gk20a_clk_get(g)) | ||
| 882 | return -ENOSYS; | ||
| 883 | |||
| 884 | return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), | ||
| 885 | freqs, num_freqs); | ||
| 886 | } | ||
| 887 | #endif | ||
| 888 | |||
| 889 | struct gk20a_platform gm20b_tegra_platform = { | ||
| 890 | .has_syncpoints = true, | ||
| 891 | .aggressive_sync_destroy_thresh = 64, | ||
| 892 | |||
| 893 | /* power management configuration */ | ||
| 894 | .railgate_delay_init = 500, | ||
| 895 | .can_railgate_init = true, | ||
| 896 | .can_elpg_init = true, | ||
| 897 | .enable_slcg = true, | ||
| 898 | .enable_blcg = true, | ||
| 899 | .enable_elcg = true, | ||
| 900 | .can_slcg = true, | ||
| 901 | .can_blcg = true, | ||
| 902 | .can_elcg = true, | ||
| 903 | .enable_elpg = true, | ||
| 904 | .enable_aelpg = true, | ||
| 905 | .enable_perfmon = true, | ||
| 906 | .ptimer_src_freq = 19200000, | ||
| 907 | |||
| 908 | .force_reset_in_do_idle = false, | ||
| 909 | |||
| 910 | .ch_wdt_timeout_ms = 5000, | ||
| 911 | |||
| 912 | .probe = gk20a_tegra_probe, | ||
| 913 | .late_probe = gk20a_tegra_late_probe, | ||
| 914 | .remove = gk20a_tegra_remove, | ||
| 915 | /* power management callbacks */ | ||
| 916 | .suspend = gk20a_tegra_suspend, | ||
| 917 | |||
| 918 | #if defined(CONFIG_TEGRA_DVFS) | ||
| 919 | .railgate = gm20b_tegra_railgate, | ||
| 920 | .unrailgate = gm20b_tegra_unrailgate, | ||
| 921 | .is_railgated = gk20a_tegra_is_railgated, | ||
| 922 | #endif | ||
| 923 | |||
| 924 | .busy = gk20a_tegra_busy, | ||
| 925 | .idle = gk20a_tegra_idle, | ||
| 926 | |||
| 927 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | ||
| 928 | .reset_assert = gm20b_tegra_reset_assert, | ||
| 929 | .reset_deassert = gm20b_tegra_reset_deassert, | ||
| 930 | #else | ||
| 931 | .reset_assert = gk20a_tegra_reset_assert, | ||
| 932 | .reset_deassert = gk20a_tegra_reset_deassert, | ||
| 933 | #endif | ||
| 934 | |||
| 935 | #if defined(CONFIG_COMMON_CLK) | ||
| 936 | .clk_round_rate = gk20a_round_clk_rate, | ||
| 937 | .get_clk_freqs = gk20a_clk_get_freqs, | ||
| 938 | #endif | ||
| 939 | |||
| 940 | #ifdef CONFIG_COMMON_CLK | ||
| 941 | .clk_register = gm20b_register_gpcclk, | ||
| 942 | #endif | ||
| 943 | |||
| 944 | /* frequency scaling configuration */ | ||
| 945 | .initscale = gk20a_tegra_scale_init, | ||
| 946 | .prescale = gk20a_tegra_prescale, | ||
| 947 | #ifdef CONFIG_TEGRA_BWMGR | ||
| 948 | .postscale = gm20b_tegra_postscale, | ||
| 949 | #endif | ||
| 950 | .devfreq_governor = "nvhost_podgov", | ||
| 951 | .qos_notify = gk20a_scale_qos_notify, | ||
| 952 | |||
| 953 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
| 954 | |||
| 955 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 956 | .has_cde = true, | ||
| 957 | #endif | ||
| 958 | |||
| 959 | .soc_name = "tegra21x", | ||
| 960 | |||
| 961 | .unified_memory = true, | ||
| 962 | .dma_mask = DMA_BIT_MASK(34), | ||
| 963 | .force_128K_pmu_vm = true, | ||
| 964 | |||
| 965 | .secure_buffer_size = 335872, | ||
| 966 | }; | ||
diff --git a/include/os/linux/platform_gk20a_tegra.h b/include/os/linux/platform_gk20a_tegra.h new file mode 100644 index 0000000..f7d5040 --- /dev/null +++ b/include/os/linux/platform_gk20a_tegra.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* | ||
| 2 | * GK20A Platform (SoC) Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_ | ||
| 17 | #define _NVGPU_PLATFORM_GK20A_TEGRA_H_ | ||
| 18 | |||
| 19 | struct gk20a_platform; | ||
| 20 | |||
| 21 | int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform); | ||
| 22 | |||
| 23 | #endif | ||
diff --git a/include/os/linux/platform_gp10b.h b/include/os/linux/platform_gp10b.h new file mode 100644 index 0000000..d256d12 --- /dev/null +++ b/include/os/linux/platform_gp10b.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * GP10B Platform (SoC) Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 7 | * copy of this software and associated documentation files (the "Software"), | ||
| 8 | * to deal in the Software without restriction, including without limitation | ||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 11 | * Software is furnished to do so, subject to the following conditions: | ||
| 12 | * | ||
| 13 | * The above copyright notice and this permission notice shall be included in | ||
| 14 | * all copies or substantial portions of the Software. | ||
| 15 | * | ||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 22 | * DEALINGS IN THE SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef _GP10B_PLATFORM_H_ | ||
| 26 | #define _GP10B_PLATFORM_H_ | ||
| 27 | |||
| 28 | struct device; | ||
| 29 | |||
| 30 | int gp10b_tegra_get_clocks(struct device *dev); | ||
| 31 | int gp10b_tegra_reset_assert(struct device *dev); | ||
| 32 | int gp10b_tegra_reset_deassert(struct device *dev); | ||
| 33 | void gp10b_tegra_scale_init(struct device *dev); | ||
| 34 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate); | ||
| 35 | int gp10b_clk_get_freqs(struct device *dev, | ||
| 36 | unsigned long **freqs, int *num_freqs); | ||
| 37 | void gp10b_tegra_prescale(struct device *dev); | ||
| 38 | void gp10b_tegra_postscale(struct device *pdev, unsigned long freq); | ||
| 39 | #endif | ||
diff --git a/include/os/linux/platform_gp10b_tegra.c b/include/os/linux/platform_gp10b_tegra.c new file mode 100644 index 0000000..9bf8d63 --- /dev/null +++ b/include/os/linux/platform_gp10b_tegra.c | |||
| @@ -0,0 +1,510 @@ | |||
| 1 | /* | ||
| 2 | * GP10B Tegra Platform Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <linux/of_platform.h> | ||
| 17 | #include <linux/debugfs.h> | ||
| 18 | #include <linux/dma-buf.h> | ||
| 19 | #include <linux/nvmap.h> | ||
| 20 | #include <linux/reset.h> | ||
| 21 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
| 22 | |||
| 23 | #include <uapi/linux/nvgpu.h> | ||
| 24 | |||
| 25 | #include <soc/tegra/tegra_bpmp.h> | ||
| 26 | #include <soc/tegra/tegra_powergate.h> | ||
| 27 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
| 28 | |||
| 29 | #include <dt-bindings/memory/tegra-swgroup.h> | ||
| 30 | |||
| 31 | #include <nvgpu/kmem.h> | ||
| 32 | #include <nvgpu/bug.h> | ||
| 33 | #include <nvgpu/enabled.h> | ||
| 34 | #include <nvgpu/hashtable.h> | ||
| 35 | #include <nvgpu/gk20a.h> | ||
| 36 | #include <nvgpu/nvhost.h> | ||
| 37 | |||
| 38 | #include "os_linux.h" | ||
| 39 | |||
| 40 | #include "clk.h" | ||
| 41 | |||
| 42 | #include "platform_gk20a.h" | ||
| 43 | #include "platform_gk20a_tegra.h" | ||
| 44 | #include "platform_gp10b.h" | ||
| 45 | #include "platform_gp10b_tegra.h" | ||
| 46 | #include "scale.h" | ||
| 47 | |||
| 48 | /* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ | ||
| 49 | #define GP10B_FREQ_SELECT_STEP 8 | ||
| 50 | /* Allow limited set of frequencies to be available */ | ||
| 51 | #define GP10B_NUM_SUPPORTED_FREQS 15 | ||
| 52 | /* Max number of freq supported in h/w */ | ||
| 53 | #define GP10B_MAX_SUPPORTED_FREQS 120 | ||
| 54 | static unsigned long | ||
| 55 | gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; | ||
| 56 | |||
| 57 | static bool freq_table_init_complete; | ||
| 58 | static int num_supported_freq; | ||
| 59 | |||
| 60 | #define TEGRA_GP10B_BW_PER_FREQ 64 | ||
| 61 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
| 62 | |||
| 63 | #define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) | ||
| 64 | |||
| 65 | #define GPCCLK_INIT_RATE 1000000000 | ||
| 66 | |||
| 67 | static struct { | ||
| 68 | char *name; | ||
| 69 | unsigned long default_rate; | ||
| 70 | } tegra_gp10b_clocks[] = { | ||
| 71 | {"gpu", GPCCLK_INIT_RATE}, | ||
| 72 | {"gpu_sys", 204000000} }; | ||
| 73 | |||
| 74 | /* | ||
| 75 | * gp10b_tegra_get_clocks() | ||
| 76 | * | ||
| 77 | * This function finds clocks in tegra platform and populates | ||
| 78 | * the clock information to gp10b platform data. | ||
| 79 | */ | ||
| 80 | |||
| 81 | int gp10b_tegra_get_clocks(struct device *dev) | ||
| 82 | { | ||
| 83 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 84 | unsigned int i; | ||
| 85 | |||
| 86 | platform->num_clks = 0; | ||
| 87 | for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { | ||
| 88 | long rate = tegra_gp10b_clocks[i].default_rate; | ||
| 89 | struct clk *c; | ||
| 90 | |||
| 91 | c = clk_get(dev, tegra_gp10b_clocks[i].name); | ||
| 92 | if (IS_ERR(c)) { | ||
| 93 | nvgpu_err(platform->g, "cannot get clock %s", | ||
| 94 | tegra_gp10b_clocks[i].name); | ||
| 95 | } else { | ||
| 96 | clk_set_rate(c, rate); | ||
| 97 | platform->clk[i] = c; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | platform->num_clks = i; | ||
| 101 | |||
| 102 | if (platform->clk[0]) { | ||
| 103 | i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, | ||
| 104 | tegra_gp10b_clocks[0].name); | ||
| 105 | if (i > 0) | ||
| 106 | platform->maxmin_clk_id = i; | ||
| 107 | } | ||
| 108 | |||
| 109 | return 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | void gp10b_tegra_scale_init(struct device *dev) | ||
| 113 | { | ||
| 114 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 115 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 116 | struct tegra_bwmgr_client *bwmgr_handle; | ||
| 117 | |||
| 118 | if (!profile) | ||
| 119 | return; | ||
| 120 | |||
| 121 | if ((struct tegra_bwmgr_client *)profile->private_data) | ||
| 122 | return; | ||
| 123 | |||
| 124 | bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
| 125 | if (!bwmgr_handle) | ||
| 126 | return; | ||
| 127 | |||
| 128 | profile->private_data = (void *)bwmgr_handle; | ||
| 129 | } | ||
| 130 | |||
| 131 | static void gp10b_tegra_scale_exit(struct device *dev) | ||
| 132 | { | ||
| 133 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 134 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 135 | |||
| 136 | if (profile && profile->private_data) | ||
| 137 | tegra_bwmgr_unregister( | ||
| 138 | (struct tegra_bwmgr_client *)profile->private_data); | ||
| 139 | } | ||
| 140 | |||
| 141 | static int gp10b_tegra_probe(struct device *dev) | ||
| 142 | { | ||
| 143 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 144 | bool joint_xpu_rail = false; | ||
| 145 | struct gk20a *g = platform->g; | ||
| 146 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 147 | int ret; | ||
| 148 | |||
| 149 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
| 150 | if (ret) | ||
| 151 | return ret; | ||
| 152 | #endif | ||
| 153 | |||
| 154 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
| 155 | if (ret) | ||
| 156 | return ret; | ||
| 157 | |||
| 158 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
| 159 | |||
| 160 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
| 161 | = false; | ||
| 162 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
| 163 | = false; | ||
| 164 | |||
| 165 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
| 166 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
| 167 | |||
| 168 | #ifdef CONFIG_OF | ||
| 169 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
| 170 | "nvidia,tegra-joint_xpu_rail"); | ||
| 171 | #endif | ||
| 172 | |||
| 173 | if (joint_xpu_rail) { | ||
| 174 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
| 175 | platform->can_railgate_init = false; | ||
| 176 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
| 177 | } | ||
| 178 | |||
| 179 | gp10b_tegra_get_clocks(dev); | ||
| 180 | nvgpu_linux_init_clk_support(platform->g); | ||
| 181 | |||
| 182 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
| 183 | |||
| 184 | platform->g->ops.clk.support_clk_freq_controller = true; | ||
| 185 | |||
| 186 | return 0; | ||
| 187 | } | ||
| 188 | |||
| 189 | static int gp10b_tegra_late_probe(struct device *dev) | ||
| 190 | { | ||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | static int gp10b_tegra_remove(struct device *dev) | ||
| 195 | { | ||
| 196 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 197 | |||
| 198 | /* deinitialise tegra specific scaling quirks */ | ||
| 199 | gp10b_tegra_scale_exit(dev); | ||
| 200 | |||
| 201 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 202 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
| 203 | #endif | ||
| 204 | |||
| 205 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
| 206 | |||
| 207 | return 0; | ||
| 208 | } | ||
| 209 | |||
| 210 | static bool gp10b_tegra_is_railgated(struct device *dev) | ||
| 211 | { | ||
| 212 | bool ret = false; | ||
| 213 | |||
| 214 | if (tegra_bpmp_running()) | ||
| 215 | ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); | ||
| 216 | |||
| 217 | return ret; | ||
| 218 | } | ||
| 219 | |||
| 220 | static int gp10b_tegra_railgate(struct device *dev) | ||
| 221 | { | ||
| 222 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 223 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 224 | |||
| 225 | /* remove emc frequency floor */ | ||
| 226 | if (profile) | ||
| 227 | tegra_bwmgr_set_emc( | ||
| 228 | (struct tegra_bwmgr_client *)profile->private_data, | ||
| 229 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 230 | |||
| 231 | if (tegra_bpmp_running() && | ||
| 232 | tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { | ||
| 233 | int i; | ||
| 234 | for (i = 0; i < platform->num_clks; i++) { | ||
| 235 | if (platform->clk[i]) | ||
| 236 | clk_disable_unprepare(platform->clk[i]); | ||
| 237 | } | ||
| 238 | tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
| 239 | } | ||
| 240 | return 0; | ||
| 241 | } | ||
| 242 | |||
| 243 | static int gp10b_tegra_unrailgate(struct device *dev) | ||
| 244 | { | ||
| 245 | int ret = 0; | ||
| 246 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 247 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 248 | |||
| 249 | if (tegra_bpmp_running()) { | ||
| 250 | int i; | ||
| 251 | ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
| 252 | for (i = 0; i < platform->num_clks; i++) { | ||
| 253 | if (platform->clk[i]) | ||
| 254 | clk_prepare_enable(platform->clk[i]); | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | /* to start with set emc frequency floor to max rate*/ | ||
| 259 | if (profile) | ||
| 260 | tegra_bwmgr_set_emc( | ||
| 261 | (struct tegra_bwmgr_client *)profile->private_data, | ||
| 262 | tegra_bwmgr_get_max_emc_rate(), | ||
| 263 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 264 | return ret; | ||
| 265 | } | ||
| 266 | |||
| 267 | static int gp10b_tegra_suspend(struct device *dev) | ||
| 268 | { | ||
| 269 | return 0; | ||
| 270 | } | ||
| 271 | |||
| 272 | int gp10b_tegra_reset_assert(struct device *dev) | ||
| 273 | { | ||
| 274 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 275 | int ret = 0; | ||
| 276 | |||
| 277 | if (!platform->reset_control) | ||
| 278 | return -EINVAL; | ||
| 279 | |||
| 280 | ret = reset_control_assert(platform->reset_control); | ||
| 281 | |||
| 282 | return ret; | ||
| 283 | } | ||
| 284 | |||
| 285 | int gp10b_tegra_reset_deassert(struct device *dev) | ||
| 286 | { | ||
| 287 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 288 | int ret = 0; | ||
| 289 | |||
| 290 | if (!platform->reset_control) | ||
| 291 | return -EINVAL; | ||
| 292 | |||
| 293 | ret = reset_control_deassert(platform->reset_control); | ||
| 294 | |||
| 295 | return ret; | ||
| 296 | } | ||
| 297 | |||
| 298 | void gp10b_tegra_prescale(struct device *dev) | ||
| 299 | { | ||
| 300 | struct gk20a *g = get_gk20a(dev); | ||
| 301 | u32 avg = 0; | ||
| 302 | |||
| 303 | nvgpu_log_fn(g, " "); | ||
| 304 | |||
| 305 | nvgpu_pmu_load_norm(g, &avg); | ||
| 306 | |||
| 307 | nvgpu_log_fn(g, "done"); | ||
| 308 | } | ||
| 309 | |||
| 310 | void gp10b_tegra_postscale(struct device *pdev, | ||
| 311 | unsigned long freq) | ||
| 312 | { | ||
| 313 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
| 314 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 315 | struct gk20a *g = get_gk20a(pdev); | ||
| 316 | unsigned long emc_rate; | ||
| 317 | |||
| 318 | nvgpu_log_fn(g, " "); | ||
| 319 | if (profile && profile->private_data && | ||
| 320 | !platform->is_railgated(pdev)) { | ||
| 321 | unsigned long emc_scale; | ||
| 322 | |||
| 323 | if (freq <= gp10b_freq_table[0]) | ||
| 324 | emc_scale = 0; | ||
| 325 | else | ||
| 326 | emc_scale = g->emc3d_ratio; | ||
| 327 | |||
| 328 | emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; | ||
| 329 | |||
| 330 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
| 331 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
| 332 | |||
| 333 | tegra_bwmgr_set_emc( | ||
| 334 | (struct tegra_bwmgr_client *)profile->private_data, | ||
| 335 | emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 336 | } | ||
| 337 | nvgpu_log_fn(g, "done"); | ||
| 338 | } | ||
| 339 | |||
| 340 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate) | ||
| 341 | { | ||
| 342 | struct gk20a *g = get_gk20a(dev); | ||
| 343 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
| 344 | unsigned long *freq_table = profile->devfreq_profile.freq_table; | ||
| 345 | int max_states = profile->devfreq_profile.max_state; | ||
| 346 | int i; | ||
| 347 | |||
| 348 | for (i = 0; i < max_states; ++i) | ||
| 349 | if (freq_table[i] >= rate) | ||
| 350 | return freq_table[i]; | ||
| 351 | |||
| 352 | return freq_table[max_states - 1]; | ||
| 353 | } | ||
| 354 | |||
| 355 | int gp10b_clk_get_freqs(struct device *dev, | ||
| 356 | unsigned long **freqs, int *num_freqs) | ||
| 357 | { | ||
| 358 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 359 | struct gk20a *g = platform->g; | ||
| 360 | unsigned long max_rate; | ||
| 361 | unsigned long new_rate = 0, prev_rate = 0; | ||
| 362 | int i, freq_counter = 0; | ||
| 363 | int sel_freq_cnt; | ||
| 364 | unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS]; | ||
| 365 | |||
| 366 | nvgpu_mutex_acquire(&platform->clk_get_freq_lock); | ||
| 367 | |||
| 368 | if (freq_table_init_complete) { | ||
| 369 | |||
| 370 | *freqs = gp10b_freq_table; | ||
| 371 | *num_freqs = num_supported_freq; | ||
| 372 | |||
| 373 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
| 374 | |||
| 375 | return 0; | ||
| 376 | } | ||
| 377 | |||
| 378 | max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); | ||
| 379 | |||
| 380 | /* | ||
| 381 | * Walk the h/w frequency table and update the local table | ||
| 382 | */ | ||
| 383 | for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
| 384 | prev_rate = new_rate; | ||
| 385 | new_rate = clk_round_rate(platform->clk[0], | ||
| 386 | prev_rate + 1); | ||
| 387 | loc_freq_table[i] = new_rate; | ||
| 388 | if (new_rate == max_rate) | ||
| 389 | break; | ||
| 390 | } | ||
| 391 | freq_counter = i + 1; | ||
| 392 | WARN_ON(freq_counter == GP10B_MAX_SUPPORTED_FREQS); | ||
| 393 | |||
| 394 | /* | ||
| 395 | * If the number of achievable frequencies is less than or | ||
| 396 | * equal to GP10B_NUM_SUPPORTED_FREQS, select all frequencies | ||
| 397 | * else, select one out of every 8 frequencies | ||
| 398 | */ | ||
| 399 | if (freq_counter <= GP10B_NUM_SUPPORTED_FREQS) { | ||
| 400 | for (sel_freq_cnt = 0; sel_freq_cnt < freq_counter; ++sel_freq_cnt) | ||
| 401 | gp10b_freq_table[sel_freq_cnt] = | ||
| 402 | loc_freq_table[sel_freq_cnt]; | ||
| 403 | } else { | ||
| 404 | /* | ||
| 405 | * Walk the h/w frequency table and only select | ||
| 406 | * GP10B_FREQ_SELECT_STEP'th frequencies and | ||
| 407 | * add MAX freq to last | ||
| 408 | */ | ||
| 409 | sel_freq_cnt = 0; | ||
| 410 | for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
| 411 | new_rate = loc_freq_table[i]; | ||
| 412 | |||
| 413 | if (i % GP10B_FREQ_SELECT_STEP == 0 || | ||
| 414 | new_rate == max_rate) { | ||
| 415 | gp10b_freq_table[sel_freq_cnt++] = | ||
| 416 | new_rate; | ||
| 417 | |||
| 418 | if (new_rate == max_rate) | ||
| 419 | break; | ||
| 420 | } | ||
| 421 | } | ||
| 422 | WARN_ON(sel_freq_cnt == GP10B_MAX_SUPPORTED_FREQS); | ||
| 423 | } | ||
| 424 | |||
| 425 | /* Fill freq table */ | ||
| 426 | *freqs = gp10b_freq_table; | ||
| 427 | *num_freqs = sel_freq_cnt; | ||
| 428 | num_supported_freq = sel_freq_cnt; | ||
| 429 | |||
| 430 | freq_table_init_complete = true; | ||
| 431 | |||
| 432 | nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", | ||
| 433 | gp10b_freq_table[0], max_rate, *num_freqs); | ||
| 434 | |||
| 435 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | |||
| 440 | struct gk20a_platform gp10b_tegra_platform = { | ||
| 441 | .has_syncpoints = true, | ||
| 442 | |||
| 443 | /* power management configuration */ | ||
| 444 | .railgate_delay_init = 500, | ||
| 445 | |||
| 446 | /* ldiv slowdown factor */ | ||
| 447 | .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, | ||
| 448 | |||
| 449 | /* power management configuration */ | ||
| 450 | .can_railgate_init = true, | ||
| 451 | .enable_elpg = true, | ||
| 452 | .can_elpg_init = true, | ||
| 453 | .enable_blcg = true, | ||
| 454 | .enable_slcg = true, | ||
| 455 | .enable_elcg = true, | ||
| 456 | .can_slcg = true, | ||
| 457 | .can_blcg = true, | ||
| 458 | .can_elcg = true, | ||
| 459 | .enable_aelpg = true, | ||
| 460 | .enable_perfmon = true, | ||
| 461 | |||
| 462 | /* ptimer src frequency in hz*/ | ||
| 463 | .ptimer_src_freq = 31250000, | ||
| 464 | |||
| 465 | .ch_wdt_timeout_ms = 5000, | ||
| 466 | |||
| 467 | .probe = gp10b_tegra_probe, | ||
| 468 | .late_probe = gp10b_tegra_late_probe, | ||
| 469 | .remove = gp10b_tegra_remove, | ||
| 470 | |||
| 471 | /* power management callbacks */ | ||
| 472 | .suspend = gp10b_tegra_suspend, | ||
| 473 | .railgate = gp10b_tegra_railgate, | ||
| 474 | .unrailgate = gp10b_tegra_unrailgate, | ||
| 475 | .is_railgated = gp10b_tegra_is_railgated, | ||
| 476 | |||
| 477 | .busy = gk20a_tegra_busy, | ||
| 478 | .idle = gk20a_tegra_idle, | ||
| 479 | |||
| 480 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
| 481 | |||
| 482 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
| 483 | .has_cde = true, | ||
| 484 | #endif | ||
| 485 | |||
| 486 | .clk_round_rate = gp10b_round_clk_rate, | ||
| 487 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
| 488 | |||
| 489 | /* frequency scaling configuration */ | ||
| 490 | .initscale = gp10b_tegra_scale_init, | ||
| 491 | .prescale = gp10b_tegra_prescale, | ||
| 492 | .postscale = gp10b_tegra_postscale, | ||
| 493 | .devfreq_governor = "nvhost_podgov", | ||
| 494 | |||
| 495 | .qos_notify = gk20a_scale_qos_notify, | ||
| 496 | |||
| 497 | .reset_assert = gp10b_tegra_reset_assert, | ||
| 498 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
| 499 | |||
| 500 | .force_reset_in_do_idle = false, | ||
| 501 | |||
| 502 | .soc_name = "tegra18x", | ||
| 503 | |||
| 504 | .unified_memory = true, | ||
| 505 | .dma_mask = DMA_BIT_MASK(36), | ||
| 506 | |||
| 507 | .ltc_streamid = TEGRA_SID_GPUB, | ||
| 508 | |||
| 509 | .secure_buffer_size = 401408, | ||
| 510 | }; | ||
diff --git a/include/os/linux/platform_gp10b_tegra.h b/include/os/linux/platform_gp10b_tegra.h new file mode 100644 index 0000000..85b46b9 --- /dev/null +++ b/include/os/linux/platform_gp10b_tegra.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef _PLATFORM_GP10B_TEGRA_H_ | ||
| 18 | #define _PLATFORM_GP10B_TEGRA_H_ | ||
| 19 | |||
| 20 | #include "gp10b/gr_gp10b.h" | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/os/linux/platform_gv11b_tegra.c b/include/os/linux/platform_gv11b_tegra.c new file mode 100644 index 0000000..6c9d0f5 --- /dev/null +++ b/include/os/linux/platform_gv11b_tegra.c | |||
| @@ -0,0 +1,331 @@ | |||
| 1 | /* | ||
| 2 | * GV11B Tegra Platform Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/of_platform.h> | ||
| 20 | #include <linux/debugfs.h> | ||
| 21 | #include <linux/dma-buf.h> | ||
| 22 | #include <linux/nvmap.h> | ||
| 23 | #include <linux/reset.h> | ||
| 24 | #include <linux/hashtable.h> | ||
| 25 | #include <linux/clk.h> | ||
| 26 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
| 27 | |||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | #include <nvgpu/nvhost.h> | ||
| 30 | |||
| 31 | #include <uapi/linux/nvgpu.h> | ||
| 32 | |||
| 33 | #include <soc/tegra/tegra_bpmp.h> | ||
| 34 | #include <soc/tegra/tegra_powergate.h> | ||
| 35 | |||
| 36 | #include "platform_gk20a.h" | ||
| 37 | #include "clk.h" | ||
| 38 | #include "scale.h" | ||
| 39 | |||
| 40 | #include "platform_gp10b.h" | ||
| 41 | #include "platform_gp10b_tegra.h" | ||
| 42 | |||
| 43 | #include "os_linux.h" | ||
| 44 | #include "platform_gk20a_tegra.h" | ||
| 45 | #include "gv11b/gr_gv11b.h" | ||
| 46 | |||
| 47 | #define EMC3D_GV11B_RATIO 500 | ||
| 48 | |||
| 49 | void gv11b_tegra_scale_init(struct device *dev) | ||
| 50 | { | ||
| 51 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 52 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 53 | |||
| 54 | if (!profile) | ||
| 55 | return; | ||
| 56 | |||
| 57 | platform->g->emc3d_ratio = EMC3D_GV11B_RATIO; | ||
| 58 | |||
| 59 | gp10b_tegra_scale_init(dev); | ||
| 60 | } | ||
| 61 | |||
| 62 | static void gv11b_tegra_scale_exit(struct device *dev) | ||
| 63 | { | ||
| 64 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 65 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 66 | |||
| 67 | if (profile) | ||
| 68 | tegra_bwmgr_unregister( | ||
| 69 | (struct tegra_bwmgr_client *)profile->private_data); | ||
| 70 | } | ||
| 71 | |||
| 72 | static int gv11b_tegra_probe(struct device *dev) | ||
| 73 | { | ||
| 74 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 75 | int err; | ||
| 76 | bool joint_xpu_rail = false; | ||
| 77 | struct gk20a *g = platform->g; | ||
| 78 | |||
| 79 | err = nvgpu_nvhost_syncpt_init(platform->g); | ||
| 80 | if (err) { | ||
| 81 | if (err != -ENOSYS) | ||
| 82 | return err; | ||
| 83 | } | ||
| 84 | |||
| 85 | err = gk20a_tegra_init_secure_alloc(platform); | ||
| 86 | if (err) | ||
| 87 | return err; | ||
| 88 | |||
| 89 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
| 90 | |||
| 91 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
| 92 | = false; | ||
| 93 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
| 94 | = false; | ||
| 95 | |||
| 96 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
| 97 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
| 98 | |||
| 99 | #ifdef CONFIG_OF | ||
| 100 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
| 101 | "nvidia,tegra-joint_xpu_rail"); | ||
| 102 | #endif | ||
| 103 | |||
| 104 | if (joint_xpu_rail) { | ||
| 105 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
| 106 | platform->can_railgate_init = false; | ||
| 107 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
| 108 | } | ||
| 109 | |||
| 110 | gp10b_tegra_get_clocks(dev); | ||
| 111 | nvgpu_linux_init_clk_support(platform->g); | ||
| 112 | |||
| 113 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
| 114 | |||
| 115 | platform->g->ops.clk.support_clk_freq_controller = true; | ||
| 116 | |||
| 117 | return 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | static int gv11b_tegra_late_probe(struct device *dev) | ||
| 121 | { | ||
| 122 | return 0; | ||
| 123 | } | ||
| 124 | |||
| 125 | |||
| 126 | static int gv11b_tegra_remove(struct device *dev) | ||
| 127 | { | ||
| 128 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 129 | |||
| 130 | gv11b_tegra_scale_exit(dev); | ||
| 131 | |||
| 132 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 133 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
| 134 | #endif | ||
| 135 | |||
| 136 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
| 137 | |||
| 138 | return 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | static bool gv11b_tegra_is_railgated(struct device *dev) | ||
| 142 | { | ||
| 143 | bool ret = false; | ||
| 144 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
| 145 | struct gk20a *g = get_gk20a(dev); | ||
| 146 | |||
| 147 | if (tegra_bpmp_running()) { | ||
| 148 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
| 149 | ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU); | ||
| 150 | |||
| 151 | nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no"); | ||
| 152 | } else { | ||
| 153 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
| 154 | } | ||
| 155 | #endif | ||
| 156 | return ret; | ||
| 157 | } | ||
| 158 | |||
| 159 | static int gv11b_tegra_railgate(struct device *dev) | ||
| 160 | { | ||
| 161 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
| 162 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 163 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 164 | struct gk20a *g = get_gk20a(dev); | ||
| 165 | int i; | ||
| 166 | |||
| 167 | /* remove emc frequency floor */ | ||
| 168 | if (profile) | ||
| 169 | tegra_bwmgr_set_emc( | ||
| 170 | (struct tegra_bwmgr_client *)profile->private_data, | ||
| 171 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 172 | |||
| 173 | if (tegra_bpmp_running()) { | ||
| 174 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
| 175 | if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) { | ||
| 176 | nvgpu_log(g, gpu_dbg_info, "powergate is not powered"); | ||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare"); | ||
| 180 | for (i = 0; i < platform->num_clks; i++) { | ||
| 181 | if (platform->clk[i]) | ||
| 182 | clk_disable_unprepare(platform->clk[i]); | ||
| 183 | } | ||
| 184 | nvgpu_log(g, gpu_dbg_info, "powergate_partition"); | ||
| 185 | tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU); | ||
| 186 | } else { | ||
| 187 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
| 188 | } | ||
| 189 | #endif | ||
| 190 | return 0; | ||
| 191 | } | ||
| 192 | |||
| 193 | static int gv11b_tegra_unrailgate(struct device *dev) | ||
| 194 | { | ||
| 195 | int ret = 0; | ||
| 196 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
| 197 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 198 | struct gk20a *g = get_gk20a(dev); | ||
| 199 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 200 | int i; | ||
| 201 | |||
| 202 | if (tegra_bpmp_running()) { | ||
| 203 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
| 204 | ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU); | ||
| 205 | if (ret) { | ||
| 206 | nvgpu_log(g, gpu_dbg_info, | ||
| 207 | "unpowergate partition failed"); | ||
| 208 | return ret; | ||
| 209 | } | ||
| 210 | nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable"); | ||
| 211 | for (i = 0; i < platform->num_clks; i++) { | ||
| 212 | if (platform->clk[i]) | ||
| 213 | clk_prepare_enable(platform->clk[i]); | ||
| 214 | } | ||
| 215 | } else { | ||
| 216 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
| 217 | } | ||
| 218 | |||
| 219 | /* to start with set emc frequency floor to max rate*/ | ||
| 220 | if (profile) | ||
| 221 | tegra_bwmgr_set_emc( | ||
| 222 | (struct tegra_bwmgr_client *)profile->private_data, | ||
| 223 | tegra_bwmgr_get_max_emc_rate(), | ||
| 224 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
| 225 | #endif | ||
| 226 | return ret; | ||
| 227 | } | ||
| 228 | |||
| 229 | static int gv11b_tegra_suspend(struct device *dev) | ||
| 230 | { | ||
| 231 | return 0; | ||
| 232 | } | ||
| 233 | |||
| 234 | static bool is_tpc_mask_valid(struct gk20a_platform *platform, u32 tpc_pg_mask) | ||
| 235 | { | ||
| 236 | u32 i; | ||
| 237 | bool valid = false; | ||
| 238 | |||
| 239 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) { | ||
| 240 | if (tpc_pg_mask == platform->valid_tpc_mask[i]) { | ||
| 241 | valid = true; | ||
| 242 | break; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | return valid; | ||
| 246 | } | ||
| 247 | |||
| 248 | static void gv11b_tegra_set_tpc_pg_mask(struct device *dev, u32 tpc_pg_mask) | ||
| 249 | { | ||
| 250 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 251 | struct gk20a *g = get_gk20a(dev); | ||
| 252 | |||
| 253 | if (is_tpc_mask_valid(platform, tpc_pg_mask)) { | ||
| 254 | g->tpc_pg_mask = tpc_pg_mask; | ||
| 255 | } | ||
| 256 | |||
| 257 | } | ||
| 258 | |||
| 259 | struct gk20a_platform gv11b_tegra_platform = { | ||
| 260 | .has_syncpoints = true, | ||
| 261 | |||
| 262 | /* ptimer src frequency in hz*/ | ||
| 263 | .ptimer_src_freq = 31250000, | ||
| 264 | |||
| 265 | .ch_wdt_timeout_ms = 5000, | ||
| 266 | |||
| 267 | .probe = gv11b_tegra_probe, | ||
| 268 | .late_probe = gv11b_tegra_late_probe, | ||
| 269 | .remove = gv11b_tegra_remove, | ||
| 270 | .railgate_delay_init = 500, | ||
| 271 | .can_railgate_init = true, | ||
| 272 | |||
| 273 | .can_tpc_powergate = true, | ||
| 274 | .valid_tpc_mask[0] = 0x0, | ||
| 275 | .valid_tpc_mask[1] = 0x1, | ||
| 276 | .valid_tpc_mask[2] = 0x2, | ||
| 277 | .valid_tpc_mask[3] = 0x4, | ||
| 278 | .valid_tpc_mask[4] = 0x8, | ||
| 279 | .valid_tpc_mask[5] = 0x5, | ||
| 280 | .valid_tpc_mask[6] = 0x6, | ||
| 281 | .valid_tpc_mask[7] = 0x9, | ||
| 282 | .valid_tpc_mask[8] = 0xa, | ||
| 283 | |||
| 284 | .set_tpc_pg_mask = gv11b_tegra_set_tpc_pg_mask, | ||
| 285 | |||
| 286 | .can_slcg = true, | ||
| 287 | .can_blcg = true, | ||
| 288 | .can_elcg = true, | ||
| 289 | .enable_slcg = true, | ||
| 290 | .enable_blcg = true, | ||
| 291 | .enable_elcg = true, | ||
| 292 | .enable_perfmon = true, | ||
| 293 | |||
| 294 | /* power management configuration */ | ||
| 295 | .enable_elpg = true, | ||
| 296 | .can_elpg_init = true, | ||
| 297 | .enable_aelpg = true, | ||
| 298 | |||
| 299 | /* power management callbacks */ | ||
| 300 | .suspend = gv11b_tegra_suspend, | ||
| 301 | .railgate = gv11b_tegra_railgate, | ||
| 302 | .unrailgate = gv11b_tegra_unrailgate, | ||
| 303 | .is_railgated = gv11b_tegra_is_railgated, | ||
| 304 | |||
| 305 | .busy = gk20a_tegra_busy, | ||
| 306 | .idle = gk20a_tegra_idle, | ||
| 307 | |||
| 308 | .clk_round_rate = gp10b_round_clk_rate, | ||
| 309 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
| 310 | |||
| 311 | /* frequency scaling configuration */ | ||
| 312 | .initscale = gv11b_tegra_scale_init, | ||
| 313 | .prescale = gp10b_tegra_prescale, | ||
| 314 | .postscale = gp10b_tegra_postscale, | ||
| 315 | .devfreq_governor = "nvhost_podgov", | ||
| 316 | |||
| 317 | .qos_notify = gk20a_scale_qos_notify, | ||
| 318 | |||
| 319 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
| 320 | |||
| 321 | .soc_name = "tegra19x", | ||
| 322 | |||
| 323 | .honors_aperture = true, | ||
| 324 | .unified_memory = true, | ||
| 325 | .dma_mask = DMA_BIT_MASK(36), | ||
| 326 | |||
| 327 | .reset_assert = gp10b_tegra_reset_assert, | ||
| 328 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
| 329 | |||
| 330 | .secure_buffer_size = 667648, | ||
| 331 | }; | ||
diff --git a/include/os/linux/rwsem.c b/include/os/linux/rwsem.c new file mode 100644 index 0000000..297ddf1 --- /dev/null +++ b/include/os/linux/rwsem.c | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <nvgpu/rwsem.h> | ||
| 15 | |||
| 16 | void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem) | ||
| 17 | { | ||
| 18 | init_rwsem(&rwsem->rwsem); | ||
| 19 | } | ||
| 20 | |||
| 21 | void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem) | ||
| 22 | { | ||
| 23 | up_read(&rwsem->rwsem); | ||
| 24 | } | ||
| 25 | |||
| 26 | void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem) | ||
| 27 | { | ||
| 28 | down_read(&rwsem->rwsem); | ||
| 29 | } | ||
| 30 | |||
| 31 | void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem) | ||
| 32 | { | ||
| 33 | up_write(&rwsem->rwsem); | ||
| 34 | } | ||
| 35 | |||
| 36 | void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem) | ||
| 37 | { | ||
| 38 | down_write(&rwsem->rwsem); | ||
| 39 | } | ||
diff --git a/include/os/linux/scale.c b/include/os/linux/scale.c new file mode 100644 index 0000000..388e168 --- /dev/null +++ b/include/os/linux/scale.c | |||
| @@ -0,0 +1,435 @@ | |||
| 1 | /* | ||
| 2 | * gk20a clock scaling profile | ||
| 3 | * | ||
| 4 | * Copyright (c) 2013-2020, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/devfreq.h> | ||
| 20 | #include <linux/export.h> | ||
| 21 | #include <soc/tegra/chip-id.h> | ||
| 22 | #include <linux/pm_qos.h> | ||
| 23 | |||
| 24 | #include <governor.h> | ||
| 25 | |||
| 26 | #include <nvgpu/kmem.h> | ||
| 27 | #include <nvgpu/log.h> | ||
| 28 | #include <nvgpu/gk20a.h> | ||
| 29 | #include <nvgpu/clk_arb.h> | ||
| 30 | |||
| 31 | #include "platform_gk20a.h" | ||
| 32 | #include "scale.h" | ||
| 33 | #include "os_linux.h" | ||
| 34 | |||
| 35 | /* | ||
| 36 | * gk20a_scale_qos_notify() | ||
| 37 | * | ||
| 38 | * This function is called when the minimum QoS requirement for the device | ||
| 39 | * has changed. The function calls postscaling callback if it is defined. | ||
| 40 | */ | ||
| 41 | |||
| 42 | #if defined(CONFIG_GK20A_PM_QOS) && defined(CONFIG_COMMON_CLK) | ||
| 43 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
| 44 | unsigned long n, void *p) | ||
| 45 | { | ||
| 46 | struct gk20a_scale_profile *profile = | ||
| 47 | container_of(nb, struct gk20a_scale_profile, | ||
| 48 | qos_notify_block); | ||
| 49 | struct gk20a *g = get_gk20a(profile->dev); | ||
| 50 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 51 | struct devfreq *devfreq = l->devfreq; | ||
| 52 | |||
| 53 | if (!devfreq) | ||
| 54 | return NOTIFY_OK; | ||
| 55 | |||
| 56 | mutex_lock(&devfreq->lock); | ||
| 57 | /* check for pm_qos min and max frequency requirement */ | ||
| 58 | profile->qos_min_freq = | ||
| 59 | (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; | ||
| 60 | profile->qos_max_freq = | ||
| 61 | (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; | ||
| 62 | |||
| 63 | if (profile->qos_min_freq > profile->qos_max_freq) { | ||
| 64 | nvgpu_err(g, | ||
| 65 | "QoS: setting invalid limit, min_freq=%lu max_freq=%lu", | ||
| 66 | profile->qos_min_freq, profile->qos_max_freq); | ||
| 67 | profile->qos_min_freq = profile->qos_max_freq; | ||
| 68 | } | ||
| 69 | |||
| 70 | update_devfreq(devfreq); | ||
| 71 | mutex_unlock(&devfreq->lock); | ||
| 72 | |||
| 73 | return NOTIFY_OK; | ||
| 74 | } | ||
| 75 | #elif defined(CONFIG_GK20A_PM_QOS) | ||
| 76 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
| 77 | unsigned long n, void *p) | ||
| 78 | { | ||
| 79 | struct gk20a_scale_profile *profile = | ||
| 80 | container_of(nb, struct gk20a_scale_profile, | ||
| 81 | qos_notify_block); | ||
| 82 | struct gk20a_platform *platform = dev_get_drvdata(profile->dev); | ||
| 83 | struct gk20a *g = get_gk20a(profile->dev); | ||
| 84 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 85 | unsigned long freq; | ||
| 86 | |||
| 87 | if (!platform->postscale) | ||
| 88 | return NOTIFY_OK; | ||
| 89 | |||
| 90 | /* get the frequency requirement. if devfreq is enabled, check if it | ||
| 91 | * has higher demand than qos */ | ||
| 92 | freq = platform->clk_round_rate(profile->dev, | ||
| 93 | (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS)); | ||
| 94 | if (l->devfreq) | ||
| 95 | freq = max(l->devfreq->previous_freq, freq); | ||
| 96 | |||
| 97 | /* Update gpu load because we may scale the emc target | ||
| 98 | * if the gpu load changed. */ | ||
| 99 | nvgpu_pmu_load_update(g); | ||
| 100 | platform->postscale(profile->dev, freq); | ||
| 101 | |||
| 102 | return NOTIFY_OK; | ||
| 103 | } | ||
| 104 | #else | ||
| 105 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
| 106 | unsigned long n, void *p) | ||
| 107 | { | ||
| 108 | return 0; | ||
| 109 | } | ||
| 110 | #endif | ||
| 111 | |||
| 112 | /* | ||
| 113 | * gk20a_scale_make_freq_table(profile) | ||
| 114 | * | ||
| 115 | * This function initialises the frequency table for the given device profile | ||
| 116 | */ | ||
| 117 | |||
| 118 | static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) | ||
| 119 | { | ||
| 120 | struct gk20a_platform *platform = dev_get_drvdata(profile->dev); | ||
| 121 | int num_freqs, err; | ||
| 122 | unsigned long *freqs; | ||
| 123 | |||
| 124 | if (platform->get_clk_freqs) { | ||
| 125 | /* get gpu frequency table */ | ||
| 126 | err = platform->get_clk_freqs(profile->dev, &freqs, | ||
| 127 | &num_freqs); | ||
| 128 | |||
| 129 | if (err) | ||
| 130 | return -ENOSYS; | ||
| 131 | } else | ||
| 132 | return -ENOSYS; | ||
| 133 | |||
| 134 | profile->devfreq_profile.freq_table = (unsigned long *)freqs; | ||
| 135 | profile->devfreq_profile.max_state = num_freqs; | ||
| 136 | |||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | /* | ||
| 141 | * gk20a_scale_target(dev, *freq, flags) | ||
| 142 | * | ||
| 143 | * This function scales the clock | ||
| 144 | */ | ||
| 145 | |||
| 146 | static int gk20a_scale_target(struct device *dev, unsigned long *freq, | ||
| 147 | u32 flags) | ||
| 148 | { | ||
| 149 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 150 | struct gk20a *g = platform->g; | ||
| 151 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 152 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
| 153 | struct devfreq *devfreq = l->devfreq; | ||
| 154 | unsigned long local_freq = *freq; | ||
| 155 | unsigned long rounded_rate; | ||
| 156 | unsigned long min_freq = 0, max_freq = 0; | ||
| 157 | |||
| 158 | if (nvgpu_clk_arb_has_active_req(g)) | ||
| 159 | return 0; | ||
| 160 | /* | ||
| 161 | * Calculate floor and cap frequency values | ||
| 162 | * | ||
| 163 | * Policy : | ||
| 164 | * We have two APIs to clip the frequency | ||
| 165 | * 1. devfreq | ||
| 166 | * 2. pm_qos | ||
| 167 | * | ||
| 168 | * To calculate floor (min) freq, we select MAX of floor frequencies | ||
| 169 | * requested from both APIs | ||
| 170 | * To get cap (max) freq, we select MIN of max frequencies | ||
| 171 | * | ||
| 172 | * In case we have conflict (min_freq > max_freq) after above | ||
| 173 | * steps, we ensure that max_freq wins over min_freq | ||
| 174 | */ | ||
| 175 | min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq); | ||
| 176 | max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq); | ||
| 177 | |||
| 178 | if (min_freq > max_freq) | ||
| 179 | min_freq = max_freq; | ||
| 180 | |||
| 181 | /* Clip requested frequency */ | ||
| 182 | if (local_freq < min_freq) | ||
| 183 | local_freq = min_freq; | ||
| 184 | |||
| 185 | if (local_freq > max_freq) | ||
| 186 | local_freq = max_freq; | ||
| 187 | |||
| 188 | /* set the final frequency */ | ||
| 189 | rounded_rate = platform->clk_round_rate(dev, local_freq); | ||
| 190 | |||
| 191 | /* Check for duplicate request */ | ||
| 192 | if (rounded_rate == g->last_freq) | ||
| 193 | return 0; | ||
| 194 | |||
| 195 | if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate) | ||
| 196 | *freq = rounded_rate; | ||
| 197 | else { | ||
| 198 | g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); | ||
| 199 | *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
| 200 | } | ||
| 201 | |||
| 202 | g->last_freq = *freq; | ||
| 203 | |||
| 204 | /* postscale will only scale emc (dram clock) if evaluating | ||
| 205 | * gk20a_tegra_get_emc_rate() produces a new or different emc | ||
| 206 | * target because the load or_and gpufreq has changed */ | ||
| 207 | if (platform->postscale) | ||
| 208 | platform->postscale(dev, rounded_rate); | ||
| 209 | |||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* | ||
| 214 | * update_load_estimate_busy_cycles(dev) | ||
| 215 | * | ||
| 216 | * Update load estimate using pmu idle counters. Result is normalised | ||
| 217 | * based on the time it was asked last time. | ||
| 218 | */ | ||
| 219 | |||
| 220 | static void update_load_estimate_busy_cycles(struct device *dev) | ||
| 221 | { | ||
| 222 | struct gk20a *g = get_gk20a(dev); | ||
| 223 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
| 224 | unsigned long dt; | ||
| 225 | u32 busy_cycles_norm; | ||
| 226 | ktime_t t; | ||
| 227 | |||
| 228 | t = ktime_get(); | ||
| 229 | dt = ktime_us_delta(t, profile->last_event_time); | ||
| 230 | |||
| 231 | profile->dev_stat.total_time = dt; | ||
| 232 | profile->last_event_time = t; | ||
| 233 | nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm); | ||
| 234 | profile->dev_stat.busy_time = | ||
| 235 | (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX; | ||
| 236 | } | ||
| 237 | |||
| 238 | /* | ||
| 239 | * gk20a_scale_suspend(dev) | ||
| 240 | * | ||
| 241 | * This function informs devfreq of suspend | ||
| 242 | */ | ||
| 243 | |||
| 244 | void gk20a_scale_suspend(struct device *dev) | ||
| 245 | { | ||
| 246 | struct gk20a *g = get_gk20a(dev); | ||
| 247 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 248 | struct devfreq *devfreq = l->devfreq; | ||
| 249 | |||
| 250 | if (!devfreq) | ||
| 251 | return; | ||
| 252 | |||
| 253 | devfreq_suspend_device(devfreq); | ||
| 254 | } | ||
| 255 | |||
| 256 | /* | ||
| 257 | * gk20a_scale_resume(dev) | ||
| 258 | * | ||
| 259 | * This functions informs devfreq of resume | ||
| 260 | */ | ||
| 261 | |||
| 262 | void gk20a_scale_resume(struct device *dev) | ||
| 263 | { | ||
| 264 | struct gk20a *g = get_gk20a(dev); | ||
| 265 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 266 | struct devfreq *devfreq = l->devfreq; | ||
| 267 | |||
| 268 | if (!devfreq) | ||
| 269 | return; | ||
| 270 | |||
| 271 | g->last_freq = 0; | ||
| 272 | devfreq_resume_device(devfreq); | ||
| 273 | } | ||
| 274 | |||
| 275 | /* | ||
| 276 | * gk20a_scale_get_dev_status(dev, *stat) | ||
| 277 | * | ||
| 278 | * This function queries the current device status. | ||
| 279 | */ | ||
| 280 | |||
| 281 | static int gk20a_scale_get_dev_status(struct device *dev, | ||
| 282 | struct devfreq_dev_status *stat) | ||
| 283 | { | ||
| 284 | struct gk20a *g = get_gk20a(dev); | ||
| 285 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
| 286 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 287 | |||
| 288 | /* inform edp about new constraint */ | ||
| 289 | if (platform->prescale) | ||
| 290 | platform->prescale(dev); | ||
| 291 | |||
| 292 | /* Make sure there are correct values for the current frequency */ | ||
| 293 | profile->dev_stat.current_frequency = | ||
| 294 | g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
| 295 | |||
| 296 | /* Update load estimate */ | ||
| 297 | update_load_estimate_busy_cycles(dev); | ||
| 298 | |||
| 299 | /* Copy the contents of the current device status */ | ||
| 300 | *stat = profile->dev_stat; | ||
| 301 | |||
| 302 | /* Finally, clear out the local values */ | ||
| 303 | profile->dev_stat.total_time = 0; | ||
| 304 | profile->dev_stat.busy_time = 0; | ||
| 305 | |||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | |||
| 309 | /* | ||
| 310 | * get_cur_freq(struct device *dev, unsigned long *freq) | ||
| 311 | * | ||
| 312 | * This function gets the current GPU clock rate. | ||
| 313 | */ | ||
| 314 | |||
| 315 | static int get_cur_freq(struct device *dev, unsigned long *freq) | ||
| 316 | { | ||
| 317 | struct gk20a *g = get_gk20a(dev); | ||
| 318 | *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 322 | |||
| 323 | /* | ||
| 324 | * gk20a_scale_init(dev) | ||
| 325 | */ | ||
| 326 | |||
| 327 | void gk20a_scale_init(struct device *dev) | ||
| 328 | { | ||
| 329 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 330 | struct gk20a *g = platform->g; | ||
| 331 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 332 | struct gk20a_scale_profile *profile; | ||
| 333 | int err; | ||
| 334 | |||
| 335 | if (g->scale_profile) | ||
| 336 | return; | ||
| 337 | |||
| 338 | if (!platform->devfreq_governor && !platform->qos_notify) | ||
| 339 | return; | ||
| 340 | |||
| 341 | profile = nvgpu_kzalloc(g, sizeof(*profile)); | ||
| 342 | if (!profile) | ||
| 343 | return; | ||
| 344 | |||
| 345 | profile->dev = dev; | ||
| 346 | profile->dev_stat.busy = false; | ||
| 347 | |||
| 348 | /* Create frequency table */ | ||
| 349 | err = gk20a_scale_make_freq_table(profile); | ||
| 350 | if (err || !profile->devfreq_profile.max_state) | ||
| 351 | goto err_get_freqs; | ||
| 352 | |||
| 353 | profile->qos_min_freq = 0; | ||
| 354 | profile->qos_max_freq = UINT_MAX; | ||
| 355 | |||
| 356 | /* Store device profile so we can access it if devfreq governor | ||
| 357 | * init needs that */ | ||
| 358 | g->scale_profile = profile; | ||
| 359 | |||
| 360 | if (platform->devfreq_governor) { | ||
| 361 | struct devfreq *devfreq; | ||
| 362 | |||
| 363 | profile->devfreq_profile.initial_freq = | ||
| 364 | profile->devfreq_profile.freq_table[0]; | ||
| 365 | profile->devfreq_profile.target = gk20a_scale_target; | ||
| 366 | profile->devfreq_profile.get_dev_status = | ||
| 367 | gk20a_scale_get_dev_status; | ||
| 368 | profile->devfreq_profile.get_cur_freq = get_cur_freq; | ||
| 369 | profile->devfreq_profile.polling_ms = 25; | ||
| 370 | |||
| 371 | devfreq = devm_devfreq_add_device(dev, | ||
| 372 | &profile->devfreq_profile, | ||
| 373 | platform->devfreq_governor, NULL); | ||
| 374 | |||
| 375 | if (IS_ERR_OR_NULL(devfreq)) | ||
| 376 | devfreq = NULL; | ||
| 377 | |||
| 378 | l->devfreq = devfreq; | ||
| 379 | } | ||
| 380 | |||
| 381 | #ifdef CONFIG_GK20A_PM_QOS | ||
| 382 | /* Should we register QoS callback for this device? */ | ||
| 383 | if (platform->qos_notify) { | ||
| 384 | profile->qos_notify_block.notifier_call = | ||
| 385 | platform->qos_notify; | ||
| 386 | |||
| 387 | pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
| 388 | &profile->qos_notify_block); | ||
| 389 | pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
| 390 | &profile->qos_notify_block); | ||
| 391 | } | ||
| 392 | #endif | ||
| 393 | |||
| 394 | return; | ||
| 395 | |||
| 396 | err_get_freqs: | ||
| 397 | nvgpu_kfree(g, profile); | ||
| 398 | } | ||
| 399 | |||
| 400 | void gk20a_scale_exit(struct device *dev) | ||
| 401 | { | ||
| 402 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 403 | struct gk20a *g = platform->g; | ||
| 404 | |||
| 405 | #ifdef CONFIG_GK20A_PM_QOS | ||
| 406 | if (platform->qos_notify) { | ||
| 407 | pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
| 408 | &g->scale_profile->qos_notify_block); | ||
| 409 | pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
| 410 | &g->scale_profile->qos_notify_block); | ||
| 411 | } | ||
| 412 | #endif | ||
| 413 | |||
| 414 | nvgpu_kfree(g, g->scale_profile); | ||
| 415 | g->scale_profile = NULL; | ||
| 416 | } | ||
| 417 | |||
| 418 | /* | ||
| 419 | * gk20a_scale_hw_init(dev) | ||
| 420 | * | ||
| 421 | * Initialize hardware portion of the device | ||
| 422 | */ | ||
| 423 | |||
| 424 | void gk20a_scale_hw_init(struct device *dev) | ||
| 425 | { | ||
| 426 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 427 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
| 428 | |||
| 429 | /* make sure that scaling has bee initialised */ | ||
| 430 | if (!profile) | ||
| 431 | return; | ||
| 432 | |||
| 433 | profile->dev_stat.total_time = 0; | ||
| 434 | profile->last_event_time = ktime_get(); | ||
| 435 | } | ||
diff --git a/include/os/linux/scale.h b/include/os/linux/scale.h new file mode 100644 index 0000000..c1e6fe8 --- /dev/null +++ b/include/os/linux/scale.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | /* | ||
| 2 | * gk20a clock scaling profile | ||
| 3 | * | ||
| 4 | * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef GK20A_SCALE_H | ||
| 20 | #define GK20A_SCALE_H | ||
| 21 | |||
| 22 | #include <linux/devfreq.h> | ||
| 23 | |||
| 24 | struct clk; | ||
| 25 | |||
| 26 | struct gk20a_scale_profile { | ||
| 27 | struct device *dev; | ||
| 28 | ktime_t last_event_time; | ||
| 29 | struct devfreq_dev_profile devfreq_profile; | ||
| 30 | struct devfreq_dev_status dev_stat; | ||
| 31 | struct notifier_block qos_notify_block; | ||
| 32 | unsigned long qos_min_freq; | ||
| 33 | unsigned long qos_max_freq; | ||
| 34 | void *private_data; | ||
| 35 | }; | ||
| 36 | |||
| 37 | /* Initialization and de-initialization for module */ | ||
| 38 | void gk20a_scale_init(struct device *); | ||
| 39 | void gk20a_scale_exit(struct device *); | ||
| 40 | void gk20a_scale_hw_init(struct device *dev); | ||
| 41 | |||
| 42 | #if defined(CONFIG_GK20A_DEVFREQ) | ||
| 43 | /* | ||
| 44 | * call when performing submit to notify scaling mechanism that the module is | ||
| 45 | * in use | ||
| 46 | */ | ||
| 47 | void gk20a_scale_notify_busy(struct device *); | ||
| 48 | void gk20a_scale_notify_idle(struct device *); | ||
| 49 | |||
| 50 | void gk20a_scale_suspend(struct device *); | ||
| 51 | void gk20a_scale_resume(struct device *); | ||
| 52 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
| 53 | unsigned long n, void *p); | ||
| 54 | #else | ||
| 55 | static inline void gk20a_scale_notify_busy(struct device *dev) {} | ||
| 56 | static inline void gk20a_scale_notify_idle(struct device *dev) {} | ||
| 57 | static inline void gk20a_scale_suspend(struct device *dev) {} | ||
| 58 | static inline void gk20a_scale_resume(struct device *dev) {} | ||
| 59 | static inline int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
| 60 | unsigned long n, void *p) | ||
| 61 | { | ||
| 62 | return -ENOSYS; | ||
| 63 | } | ||
| 64 | #endif | ||
| 65 | |||
| 66 | #endif | ||
diff --git a/include/os/linux/sched.c b/include/os/linux/sched.c new file mode 100644 index 0000000..30c58a1 --- /dev/null +++ b/include/os/linux/sched.c | |||
| @@ -0,0 +1,666 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #include <asm/barrier.h> | ||
| 17 | #include <linux/wait.h> | ||
| 18 | #include <linux/uaccess.h> | ||
| 19 | #include <linux/poll.h> | ||
| 20 | #include <uapi/linux/nvgpu.h> | ||
| 21 | |||
| 22 | #include <nvgpu/kmem.h> | ||
| 23 | #include <nvgpu/log.h> | ||
| 24 | #include <nvgpu/bug.h> | ||
| 25 | #include <nvgpu/barrier.h> | ||
| 26 | #include <nvgpu/gk20a.h> | ||
| 27 | |||
| 28 | #include "gk20a/gr_gk20a.h" | ||
| 29 | #include "sched.h" | ||
| 30 | #include "os_linux.h" | ||
| 31 | #include "ioctl_tsg.h" | ||
| 32 | |||
| 33 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
| 34 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
| 35 | |||
| 36 | ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, | ||
| 37 | size_t size, loff_t *off) | ||
| 38 | { | ||
| 39 | struct gk20a *g = filp->private_data; | ||
| 40 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 41 | struct nvgpu_sched_event_arg event = { 0 }; | ||
| 42 | int err; | ||
| 43 | |||
| 44 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, | ||
| 45 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
| 46 | |||
| 47 | if (size < sizeof(event)) | ||
| 48 | return -EINVAL; | ||
| 49 | size = sizeof(event); | ||
| 50 | |||
| 51 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 52 | while (!sched->status) { | ||
| 53 | nvgpu_mutex_release(&sched->status_lock); | ||
| 54 | if (filp->f_flags & O_NONBLOCK) | ||
| 55 | return -EAGAIN; | ||
| 56 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq, | ||
| 57 | sched->status, 0); | ||
| 58 | if (err) | ||
| 59 | return err; | ||
| 60 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 61 | } | ||
| 62 | |||
| 63 | event.reserved = 0; | ||
| 64 | event.status = sched->status; | ||
| 65 | |||
| 66 | if (copy_to_user(buf, &event, size)) { | ||
| 67 | nvgpu_mutex_release(&sched->status_lock); | ||
| 68 | return -EFAULT; | ||
| 69 | } | ||
| 70 | |||
| 71 | sched->status = 0; | ||
| 72 | |||
| 73 | nvgpu_mutex_release(&sched->status_lock); | ||
| 74 | |||
| 75 | return size; | ||
| 76 | } | ||
| 77 | |||
| 78 | unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait) | ||
| 79 | { | ||
| 80 | struct gk20a *g = filp->private_data; | ||
| 81 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 82 | unsigned int mask = 0; | ||
| 83 | |||
| 84 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
| 85 | |||
| 86 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 87 | poll_wait(filp, &sched->readout_wq.wq, wait); | ||
| 88 | if (sched->status) | ||
| 89 | mask |= POLLIN | POLLRDNORM; | ||
| 90 | nvgpu_mutex_release(&sched->status_lock); | ||
| 91 | |||
| 92 | return mask; | ||
| 93 | } | ||
| 94 | |||
| 95 | static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a *g, | ||
| 96 | struct nvgpu_sched_get_tsgs_args *arg) | ||
| 97 | { | ||
| 98 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 99 | |||
| 100 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", | ||
| 101 | arg->size, arg->buffer); | ||
| 102 | |||
| 103 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
| 104 | arg->size = sched->bitmap_size; | ||
| 105 | return -ENOSPC; | ||
| 106 | } | ||
| 107 | |||
| 108 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 109 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
| 110 | sched->active_tsg_bitmap, sched->bitmap_size)) { | ||
| 111 | nvgpu_mutex_release(&sched->status_lock); | ||
| 112 | return -EFAULT; | ||
| 113 | } | ||
| 114 | nvgpu_mutex_release(&sched->status_lock); | ||
| 115 | |||
| 116 | return 0; | ||
| 117 | } | ||
| 118 | |||
| 119 | static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a *g, | ||
| 120 | struct nvgpu_sched_get_tsgs_args *arg) | ||
| 121 | { | ||
| 122 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 123 | |||
| 124 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", | ||
| 125 | arg->size, arg->buffer); | ||
| 126 | |||
| 127 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
| 128 | arg->size = sched->bitmap_size; | ||
| 129 | return -ENOSPC; | ||
| 130 | } | ||
| 131 | |||
| 132 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 133 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
| 134 | sched->recent_tsg_bitmap, sched->bitmap_size)) { | ||
| 135 | nvgpu_mutex_release(&sched->status_lock); | ||
| 136 | return -EFAULT; | ||
| 137 | } | ||
| 138 | |||
| 139 | memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); | ||
| 140 | nvgpu_mutex_release(&sched->status_lock); | ||
| 141 | |||
| 142 | return 0; | ||
| 143 | } | ||
| 144 | |||
| 145 | static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a *g, | ||
| 146 | struct nvgpu_sched_get_tsgs_by_pid_args *arg) | ||
| 147 | { | ||
| 148 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 149 | struct fifo_gk20a *f = &g->fifo; | ||
| 150 | struct tsg_gk20a *tsg; | ||
| 151 | u64 *bitmap; | ||
| 152 | unsigned int tsgid; | ||
| 153 | /* pid at user level corresponds to kernel tgid */ | ||
| 154 | pid_t tgid = (pid_t)arg->pid; | ||
| 155 | int err = 0; | ||
| 156 | |||
| 157 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx", | ||
| 158 | (pid_t)arg->pid, arg->size, arg->buffer); | ||
| 159 | |||
| 160 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
| 161 | arg->size = sched->bitmap_size; | ||
| 162 | return -ENOSPC; | ||
| 163 | } | ||
| 164 | |||
| 165 | bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
| 166 | if (!bitmap) | ||
| 167 | return -ENOMEM; | ||
| 168 | |||
| 169 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 170 | for (tsgid = 0; tsgid < f->num_channels; tsgid++) { | ||
| 171 | if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { | ||
| 172 | tsg = &f->tsg[tsgid]; | ||
| 173 | if (tsg->tgid == tgid) | ||
| 174 | NVGPU_SCHED_SET(tsgid, bitmap); | ||
| 175 | } | ||
| 176 | } | ||
| 177 | nvgpu_mutex_release(&sched->status_lock); | ||
| 178 | |||
| 179 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
| 180 | bitmap, sched->bitmap_size)) | ||
| 181 | err = -EFAULT; | ||
| 182 | |||
| 183 | nvgpu_kfree(g, bitmap); | ||
| 184 | |||
| 185 | return err; | ||
| 186 | } | ||
| 187 | |||
| 188 | static int gk20a_sched_dev_ioctl_get_params(struct gk20a *g, | ||
| 189 | struct nvgpu_sched_tsg_get_params_args *arg) | ||
| 190 | { | ||
| 191 | struct fifo_gk20a *f = &g->fifo; | ||
| 192 | struct tsg_gk20a *tsg; | ||
| 193 | u32 tsgid = arg->tsgid; | ||
| 194 | |||
| 195 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
| 196 | |||
| 197 | if (tsgid >= f->num_channels) | ||
| 198 | return -EINVAL; | ||
| 199 | |||
| 200 | nvgpu_speculation_barrier(); | ||
| 201 | |||
| 202 | tsg = &f->tsg[tsgid]; | ||
| 203 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
| 204 | return -ENXIO; | ||
| 205 | |||
| 206 | arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ | ||
| 207 | arg->runlist_interleave = tsg->interleave_level; | ||
| 208 | arg->timeslice = gk20a_tsg_get_timeslice(tsg); | ||
| 209 | |||
| 210 | arg->graphics_preempt_mode = | ||
| 211 | tsg->gr_ctx.graphics_preempt_mode; | ||
| 212 | arg->compute_preempt_mode = | ||
| 213 | tsg->gr_ctx.compute_preempt_mode; | ||
| 214 | |||
| 215 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 216 | |||
| 217 | return 0; | ||
| 218 | } | ||
| 219 | |||
| 220 | static int gk20a_sched_dev_ioctl_tsg_set_timeslice( | ||
| 221 | struct gk20a *g, | ||
| 222 | struct nvgpu_sched_tsg_timeslice_args *arg) | ||
| 223 | { | ||
| 224 | struct fifo_gk20a *f = &g->fifo; | ||
| 225 | struct tsg_gk20a *tsg; | ||
| 226 | u32 tsgid = arg->tsgid; | ||
| 227 | int err; | ||
| 228 | |||
| 229 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
| 230 | |||
| 231 | if (tsgid >= f->num_channels) | ||
| 232 | return -EINVAL; | ||
| 233 | |||
| 234 | nvgpu_speculation_barrier(); | ||
| 235 | |||
| 236 | tsg = &f->tsg[tsgid]; | ||
| 237 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
| 238 | return -ENXIO; | ||
| 239 | |||
| 240 | err = gk20a_busy(g); | ||
| 241 | if (err) | ||
| 242 | goto done; | ||
| 243 | |||
| 244 | err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); | ||
| 245 | |||
| 246 | gk20a_idle(g); | ||
| 247 | |||
| 248 | done: | ||
| 249 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 250 | |||
| 251 | return err; | ||
| 252 | } | ||
| 253 | |||
| 254 | static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( | ||
| 255 | struct gk20a *g, | ||
| 256 | struct nvgpu_sched_tsg_runlist_interleave_args *arg) | ||
| 257 | { | ||
| 258 | struct fifo_gk20a *f = &g->fifo; | ||
| 259 | struct tsg_gk20a *tsg; | ||
| 260 | u32 tsgid = arg->tsgid; | ||
| 261 | int err; | ||
| 262 | |||
| 263 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
| 264 | |||
| 265 | if (tsgid >= f->num_channels) | ||
| 266 | return -EINVAL; | ||
| 267 | |||
| 268 | nvgpu_speculation_barrier(); | ||
| 269 | |||
| 270 | tsg = &f->tsg[tsgid]; | ||
| 271 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
| 272 | return -ENXIO; | ||
| 273 | |||
| 274 | err = gk20a_busy(g); | ||
| 275 | if (err) | ||
| 276 | goto done; | ||
| 277 | |||
| 278 | err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); | ||
| 279 | |||
| 280 | gk20a_idle(g); | ||
| 281 | |||
| 282 | done: | ||
| 283 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 284 | |||
| 285 | return err; | ||
| 286 | } | ||
| 287 | |||
| 288 | static int gk20a_sched_dev_ioctl_lock_control(struct gk20a *g) | ||
| 289 | { | ||
| 290 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 291 | |||
| 292 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
| 293 | |||
| 294 | nvgpu_mutex_acquire(&sched->control_lock); | ||
| 295 | sched->control_locked = true; | ||
| 296 | nvgpu_mutex_release(&sched->control_lock); | ||
| 297 | return 0; | ||
| 298 | } | ||
| 299 | |||
| 300 | static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a *g) | ||
| 301 | { | ||
| 302 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 303 | |||
| 304 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
| 305 | |||
| 306 | nvgpu_mutex_acquire(&sched->control_lock); | ||
| 307 | sched->control_locked = false; | ||
| 308 | nvgpu_mutex_release(&sched->control_lock); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | |||
| 312 | static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a *g, | ||
| 313 | struct nvgpu_sched_api_version_args *args) | ||
| 314 | { | ||
| 315 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
| 316 | |||
| 317 | args->version = NVGPU_SCHED_API_VERSION; | ||
| 318 | return 0; | ||
| 319 | } | ||
| 320 | |||
| 321 | static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a *g, | ||
| 322 | struct nvgpu_sched_tsg_refcount_args *arg) | ||
| 323 | { | ||
| 324 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 325 | struct fifo_gk20a *f = &g->fifo; | ||
| 326 | struct tsg_gk20a *tsg; | ||
| 327 | u32 tsgid = arg->tsgid; | ||
| 328 | |||
| 329 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
| 330 | |||
| 331 | if (tsgid >= f->num_channels) | ||
| 332 | return -EINVAL; | ||
| 333 | |||
| 334 | nvgpu_speculation_barrier(); | ||
| 335 | |||
| 336 | tsg = &f->tsg[tsgid]; | ||
| 337 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
| 338 | return -ENXIO; | ||
| 339 | |||
| 340 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 341 | if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
| 342 | nvgpu_warn(g, "tsgid=%d already referenced", tsgid); | ||
| 343 | /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */ | ||
| 344 | nvgpu_mutex_release(&sched->status_lock); | ||
| 345 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 346 | return -ENXIO; | ||
| 347 | } | ||
| 348 | |||
| 349 | /* keep reference on TSG, will be released on | ||
| 350 | * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close | ||
| 351 | */ | ||
| 352 | NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap); | ||
| 353 | nvgpu_mutex_release(&sched->status_lock); | ||
| 354 | |||
| 355 | return 0; | ||
| 356 | } | ||
| 357 | |||
| 358 | static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a *g, | ||
| 359 | struct nvgpu_sched_tsg_refcount_args *arg) | ||
| 360 | { | ||
| 361 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 362 | struct fifo_gk20a *f = &g->fifo; | ||
| 363 | struct tsg_gk20a *tsg; | ||
| 364 | u32 tsgid = arg->tsgid; | ||
| 365 | |||
| 366 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
| 367 | |||
| 368 | if (tsgid >= f->num_channels) | ||
| 369 | return -EINVAL; | ||
| 370 | |||
| 371 | nvgpu_speculation_barrier(); | ||
| 372 | |||
| 373 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 374 | if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
| 375 | nvgpu_mutex_release(&sched->status_lock); | ||
| 376 | nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid); | ||
| 377 | return -ENXIO; | ||
| 378 | } | ||
| 379 | NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); | ||
| 380 | nvgpu_mutex_release(&sched->status_lock); | ||
| 381 | |||
| 382 | tsg = &f->tsg[tsgid]; | ||
| 383 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 384 | |||
| 385 | return 0; | ||
| 386 | } | ||
| 387 | |||
| 388 | int gk20a_sched_dev_open(struct inode *inode, struct file *filp) | ||
| 389 | { | ||
| 390 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
| 391 | struct nvgpu_os_linux, sched.cdev); | ||
| 392 | struct gk20a *g; | ||
| 393 | struct nvgpu_sched_ctrl *sched; | ||
| 394 | int err = 0; | ||
| 395 | |||
| 396 | g = gk20a_get(&l->g); | ||
| 397 | if (!g) | ||
| 398 | return -ENODEV; | ||
| 399 | sched = &g->sched_ctrl; | ||
| 400 | |||
| 401 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g); | ||
| 402 | |||
| 403 | if (!sched->sw_ready) { | ||
| 404 | err = gk20a_busy(g); | ||
| 405 | if (err) | ||
| 406 | goto free_ref; | ||
| 407 | |||
| 408 | gk20a_idle(g); | ||
| 409 | } | ||
| 410 | |||
| 411 | if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) { | ||
| 412 | err = -EBUSY; | ||
| 413 | goto free_ref; | ||
| 414 | } | ||
| 415 | |||
| 416 | memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, | ||
| 417 | sched->bitmap_size); | ||
| 418 | memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size); | ||
| 419 | |||
| 420 | filp->private_data = g; | ||
| 421 | nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched); | ||
| 422 | |||
| 423 | free_ref: | ||
| 424 | if (err) | ||
| 425 | gk20a_put(g); | ||
| 426 | return err; | ||
| 427 | } | ||
| 428 | |||
| 429 | long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, | ||
| 430 | unsigned long arg) | ||
| 431 | { | ||
| 432 | struct gk20a *g = filp->private_data; | ||
| 433 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
| 434 | int err = 0; | ||
| 435 | |||
| 436 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd)); | ||
| 437 | |||
| 438 | if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) || | ||
| 439 | (_IOC_NR(cmd) == 0) || | ||
| 440 | (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) || | ||
| 441 | (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE)) | ||
| 442 | return -EINVAL; | ||
| 443 | |||
| 444 | memset(buf, 0, sizeof(buf)); | ||
| 445 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
| 446 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
| 447 | return -EFAULT; | ||
| 448 | } | ||
| 449 | |||
| 450 | nvgpu_speculation_barrier(); | ||
| 451 | switch (cmd) { | ||
| 452 | case NVGPU_SCHED_IOCTL_GET_TSGS: | ||
| 453 | err = gk20a_sched_dev_ioctl_get_tsgs(g, | ||
| 454 | (struct nvgpu_sched_get_tsgs_args *)buf); | ||
| 455 | break; | ||
| 456 | case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS: | ||
| 457 | err = gk20a_sched_dev_ioctl_get_recent_tsgs(g, | ||
| 458 | (struct nvgpu_sched_get_tsgs_args *)buf); | ||
| 459 | break; | ||
| 460 | case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID: | ||
| 461 | err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(g, | ||
| 462 | (struct nvgpu_sched_get_tsgs_by_pid_args *)buf); | ||
| 463 | break; | ||
| 464 | case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS: | ||
| 465 | err = gk20a_sched_dev_ioctl_get_params(g, | ||
| 466 | (struct nvgpu_sched_tsg_get_params_args *)buf); | ||
| 467 | break; | ||
| 468 | case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE: | ||
| 469 | err = gk20a_sched_dev_ioctl_tsg_set_timeslice(g, | ||
| 470 | (struct nvgpu_sched_tsg_timeslice_args *)buf); | ||
| 471 | break; | ||
| 472 | case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: | ||
| 473 | err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(g, | ||
| 474 | (struct nvgpu_sched_tsg_runlist_interleave_args *)buf); | ||
| 475 | break; | ||
| 476 | case NVGPU_SCHED_IOCTL_LOCK_CONTROL: | ||
| 477 | err = gk20a_sched_dev_ioctl_lock_control(g); | ||
| 478 | break; | ||
| 479 | case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: | ||
| 480 | err = gk20a_sched_dev_ioctl_unlock_control(g); | ||
| 481 | break; | ||
| 482 | case NVGPU_SCHED_IOCTL_GET_API_VERSION: | ||
| 483 | err = gk20a_sched_dev_ioctl_get_api_version(g, | ||
| 484 | (struct nvgpu_sched_api_version_args *)buf); | ||
| 485 | break; | ||
| 486 | case NVGPU_SCHED_IOCTL_GET_TSG: | ||
| 487 | err = gk20a_sched_dev_ioctl_get_tsg(g, | ||
| 488 | (struct nvgpu_sched_tsg_refcount_args *)buf); | ||
| 489 | break; | ||
| 490 | case NVGPU_SCHED_IOCTL_PUT_TSG: | ||
| 491 | err = gk20a_sched_dev_ioctl_put_tsg(g, | ||
| 492 | (struct nvgpu_sched_tsg_refcount_args *)buf); | ||
| 493 | break; | ||
| 494 | default: | ||
| 495 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
| 496 | err = -ENOTTY; | ||
| 497 | } | ||
| 498 | |||
| 499 | /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on | ||
| 500 | * purpose with NULL buffer and/or zero size to discover TSG bitmap | ||
| 501 | * size. We need to update user arguments in this case too, even | ||
| 502 | * if we return an error. | ||
| 503 | */ | ||
| 504 | if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) { | ||
| 505 | if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) | ||
| 506 | err = -EFAULT; | ||
| 507 | } | ||
| 508 | |||
| 509 | return err; | ||
| 510 | } | ||
| 511 | |||
| 512 | int gk20a_sched_dev_release(struct inode *inode, struct file *filp) | ||
| 513 | { | ||
| 514 | struct gk20a *g = filp->private_data; | ||
| 515 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 516 | struct fifo_gk20a *f = &g->fifo; | ||
| 517 | struct tsg_gk20a *tsg; | ||
| 518 | unsigned int tsgid; | ||
| 519 | |||
| 520 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); | ||
| 521 | |||
| 522 | /* release any reference to TSGs */ | ||
| 523 | for (tsgid = 0; tsgid < f->num_channels; tsgid++) { | ||
| 524 | if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
| 525 | tsg = &f->tsg[tsgid]; | ||
| 526 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
| 527 | } | ||
| 528 | } | ||
| 529 | |||
| 530 | /* unlock control */ | ||
| 531 | nvgpu_mutex_acquire(&sched->control_lock); | ||
| 532 | sched->control_locked = false; | ||
| 533 | nvgpu_mutex_release(&sched->control_lock); | ||
| 534 | |||
| 535 | nvgpu_mutex_release(&sched->busy_lock); | ||
| 536 | gk20a_put(g); | ||
| 537 | return 0; | ||
| 538 | } | ||
| 539 | |||
| 540 | void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) | ||
| 541 | { | ||
| 542 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 543 | int err; | ||
| 544 | |||
| 545 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
| 546 | |||
| 547 | if (!sched->sw_ready) { | ||
| 548 | err = gk20a_busy(g); | ||
| 549 | if (err) { | ||
| 550 | WARN_ON(err); | ||
| 551 | return; | ||
| 552 | } | ||
| 553 | |||
| 554 | gk20a_idle(g); | ||
| 555 | } | ||
| 556 | |||
| 557 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 558 | NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap); | ||
| 559 | NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap); | ||
| 560 | sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN; | ||
| 561 | nvgpu_mutex_release(&sched->status_lock); | ||
| 562 | nvgpu_cond_signal_interruptible(&sched->readout_wq); | ||
| 563 | } | ||
| 564 | |||
| 565 | void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg) | ||
| 566 | { | ||
| 567 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 568 | |||
| 569 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
| 570 | |||
| 571 | nvgpu_mutex_acquire(&sched->status_lock); | ||
| 572 | NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap); | ||
| 573 | |||
| 574 | /* clear recent_tsg_bitmap as well: if app manager did not | ||
| 575 | * notice that TSG was previously added, no need to notify it | ||
| 576 | * if the TSG has been released in the meantime. If the | ||
| 577 | * TSG gets reallocated, app manager will be notified as usual. | ||
| 578 | */ | ||
| 579 | NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap); | ||
| 580 | |||
| 581 | /* do not set event_pending, we only want to notify app manager | ||
| 582 | * when TSGs are added, so that it can apply sched params | ||
| 583 | */ | ||
| 584 | nvgpu_mutex_release(&sched->status_lock); | ||
| 585 | } | ||
| 586 | |||
| 587 | int gk20a_sched_ctrl_init(struct gk20a *g) | ||
| 588 | { | ||
| 589 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 590 | struct fifo_gk20a *f = &g->fifo; | ||
| 591 | int err; | ||
| 592 | |||
| 593 | if (sched->sw_ready) | ||
| 594 | return 0; | ||
| 595 | |||
| 596 | sched->bitmap_size = roundup(f->num_channels, 64) / 8; | ||
| 597 | sched->status = 0; | ||
| 598 | |||
| 599 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu", | ||
| 600 | g, sched, sched->bitmap_size); | ||
| 601 | |||
| 602 | sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
| 603 | if (!sched->active_tsg_bitmap) | ||
| 604 | return -ENOMEM; | ||
| 605 | |||
| 606 | sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
| 607 | if (!sched->recent_tsg_bitmap) { | ||
| 608 | err = -ENOMEM; | ||
| 609 | goto free_active; | ||
| 610 | } | ||
| 611 | |||
| 612 | sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
| 613 | if (!sched->ref_tsg_bitmap) { | ||
| 614 | err = -ENOMEM; | ||
| 615 | goto free_recent; | ||
| 616 | } | ||
| 617 | |||
| 618 | nvgpu_cond_init(&sched->readout_wq); | ||
| 619 | |||
| 620 | err = nvgpu_mutex_init(&sched->status_lock); | ||
| 621 | if (err) | ||
| 622 | goto free_ref; | ||
| 623 | |||
| 624 | err = nvgpu_mutex_init(&sched->control_lock); | ||
| 625 | if (err) | ||
| 626 | goto free_status_lock; | ||
| 627 | |||
| 628 | err = nvgpu_mutex_init(&sched->busy_lock); | ||
| 629 | if (err) | ||
| 630 | goto free_control_lock; | ||
| 631 | |||
| 632 | sched->sw_ready = true; | ||
| 633 | |||
| 634 | return 0; | ||
| 635 | |||
| 636 | free_control_lock: | ||
| 637 | nvgpu_mutex_destroy(&sched->control_lock); | ||
| 638 | free_status_lock: | ||
| 639 | nvgpu_mutex_destroy(&sched->status_lock); | ||
| 640 | free_ref: | ||
| 641 | nvgpu_kfree(g, sched->ref_tsg_bitmap); | ||
| 642 | free_recent: | ||
| 643 | nvgpu_kfree(g, sched->recent_tsg_bitmap); | ||
| 644 | free_active: | ||
| 645 | nvgpu_kfree(g, sched->active_tsg_bitmap); | ||
| 646 | |||
| 647 | return err; | ||
| 648 | } | ||
| 649 | |||
| 650 | void gk20a_sched_ctrl_cleanup(struct gk20a *g) | ||
| 651 | { | ||
| 652 | struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; | ||
| 653 | |||
| 654 | nvgpu_kfree(g, sched->active_tsg_bitmap); | ||
| 655 | nvgpu_kfree(g, sched->recent_tsg_bitmap); | ||
| 656 | nvgpu_kfree(g, sched->ref_tsg_bitmap); | ||
| 657 | sched->active_tsg_bitmap = NULL; | ||
| 658 | sched->recent_tsg_bitmap = NULL; | ||
| 659 | sched->ref_tsg_bitmap = NULL; | ||
| 660 | |||
| 661 | nvgpu_mutex_destroy(&sched->status_lock); | ||
| 662 | nvgpu_mutex_destroy(&sched->control_lock); | ||
| 663 | nvgpu_mutex_destroy(&sched->busy_lock); | ||
| 664 | |||
| 665 | sched->sw_ready = false; | ||
| 666 | } | ||
diff --git a/include/os/linux/sched.h b/include/os/linux/sched.h new file mode 100644 index 0000000..e88f37f --- /dev/null +++ b/include/os/linux/sched.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #ifndef __NVGPU_SCHED_H | ||
| 17 | #define __NVGPU_SCHED_H | ||
| 18 | |||
| 19 | struct gk20a; | ||
| 20 | struct gpu_ops; | ||
| 21 | struct tsg_gk20a; | ||
| 22 | struct poll_table_struct; | ||
| 23 | |||
| 24 | int gk20a_sched_dev_release(struct inode *inode, struct file *filp); | ||
| 25 | int gk20a_sched_dev_open(struct inode *inode, struct file *filp); | ||
| 26 | long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long); | ||
| 27 | ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *); | ||
| 28 | unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *); | ||
| 29 | |||
| 30 | void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); | ||
| 31 | void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); | ||
| 32 | int gk20a_sched_ctrl_init(struct gk20a *); | ||
| 33 | |||
| 34 | void gk20a_sched_ctrl_cleanup(struct gk20a *g); | ||
| 35 | |||
| 36 | #endif /* __NVGPU_SCHED_H */ | ||
diff --git a/include/os/linux/sim.c b/include/os/linux/sim.c new file mode 100644 index 0000000..792ce80 --- /dev/null +++ b/include/os/linux/sim.c | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/io.h> | ||
| 18 | #include <linux/highmem.h> | ||
| 19 | #include <linux/platform_device.h> | ||
| 20 | |||
| 21 | #include <nvgpu/log.h> | ||
| 22 | #include <nvgpu/linux/vm.h> | ||
| 23 | #include <nvgpu/bitops.h> | ||
| 24 | #include <nvgpu/nvgpu_mem.h> | ||
| 25 | #include <nvgpu/dma.h> | ||
| 26 | #include <nvgpu/soc.h> | ||
| 27 | #include <nvgpu/hw_sim.h> | ||
| 28 | #include <nvgpu/sim.h> | ||
| 29 | #include <nvgpu/gk20a.h> | ||
| 30 | |||
| 31 | #include "platform_gk20a.h" | ||
| 32 | #include "os_linux.h" | ||
| 33 | #include "module.h" | ||
| 34 | |||
| 35 | void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v) | ||
| 36 | { | ||
| 37 | struct sim_nvgpu_linux *sim_linux = | ||
| 38 | container_of(sim, struct sim_nvgpu_linux, sim); | ||
| 39 | |||
| 40 | writel(v, sim_linux->regs + r); | ||
| 41 | } | ||
| 42 | |||
| 43 | u32 sim_readl(struct sim_nvgpu *sim, u32 r) | ||
| 44 | { | ||
| 45 | struct sim_nvgpu_linux *sim_linux = | ||
| 46 | container_of(sim, struct sim_nvgpu_linux, sim); | ||
| 47 | |||
| 48 | return readl(sim_linux->regs + r); | ||
| 49 | } | ||
| 50 | |||
| 51 | void nvgpu_remove_sim_support_linux(struct gk20a *g) | ||
| 52 | { | ||
| 53 | struct sim_nvgpu_linux *sim_linux; | ||
| 54 | |||
| 55 | if (!g->sim) | ||
| 56 | return; | ||
| 57 | |||
| 58 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
| 59 | if (sim_linux->regs) { | ||
| 60 | sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); | ||
| 61 | iounmap(sim_linux->regs); | ||
| 62 | sim_linux->regs = NULL; | ||
| 63 | } | ||
| 64 | nvgpu_kfree(g, sim_linux); | ||
| 65 | g->sim = NULL; | ||
| 66 | } | ||
| 67 | |||
| 68 | int nvgpu_init_sim_support_linux(struct gk20a *g, | ||
| 69 | struct platform_device *dev) | ||
| 70 | { | ||
| 71 | struct sim_nvgpu_linux *sim_linux; | ||
| 72 | int err = -ENOMEM; | ||
| 73 | |||
| 74 | if (!nvgpu_platform_is_simulation(g)) | ||
| 75 | return 0; | ||
| 76 | |||
| 77 | sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); | ||
| 78 | if (!sim_linux) | ||
| 79 | return err; | ||
| 80 | g->sim = &sim_linux->sim; | ||
| 81 | g->sim->g = g; | ||
| 82 | sim_linux->regs = nvgpu_devm_ioremap_resource(dev, | ||
| 83 | GK20A_SIM_IORESOURCE_MEM, | ||
| 84 | &sim_linux->reg_mem); | ||
| 85 | if (IS_ERR(sim_linux->regs)) { | ||
| 86 | nvgpu_err(g, "failed to remap gk20a sim regs"); | ||
| 87 | err = PTR_ERR(sim_linux->regs); | ||
| 88 | goto fail; | ||
| 89 | } | ||
| 90 | sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux; | ||
| 91 | return 0; | ||
| 92 | |||
| 93 | fail: | ||
| 94 | nvgpu_remove_sim_support_linux(g); | ||
| 95 | return err; | ||
| 96 | } | ||
diff --git a/include/os/linux/sim_pci.c b/include/os/linux/sim_pci.c new file mode 100644 index 0000000..340f1fa --- /dev/null +++ b/include/os/linux/sim_pci.c | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/io.h> | ||
| 18 | #include <linux/highmem.h> | ||
| 19 | #include <linux/platform_device.h> | ||
| 20 | |||
| 21 | #include <nvgpu/log.h> | ||
| 22 | #include <nvgpu/linux/vm.h> | ||
| 23 | #include <nvgpu/bitops.h> | ||
| 24 | #include <nvgpu/nvgpu_mem.h> | ||
| 25 | #include <nvgpu/dma.h> | ||
| 26 | #include <nvgpu/hw_sim_pci.h> | ||
| 27 | #include <nvgpu/sim.h> | ||
| 28 | #include <nvgpu/io.h> | ||
| 29 | #include <nvgpu/gk20a.h> | ||
| 30 | |||
| 31 | #include "os_linux.h" | ||
| 32 | #include "module.h" | ||
| 33 | |||
| 34 | static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base) | ||
| 35 | { | ||
| 36 | u32 cfg; | ||
| 37 | bool is_simulation = false; | ||
| 38 | |||
| 39 | cfg = nvgpu_readl(g, sim_base + sim_config_r()); | ||
| 40 | if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v()) | ||
| 41 | is_simulation = true; | ||
| 42 | |||
| 43 | return is_simulation; | ||
| 44 | } | ||
| 45 | |||
| 46 | void nvgpu_remove_sim_support_linux_pci(struct gk20a *g) | ||
| 47 | { | ||
| 48 | struct sim_nvgpu_linux *sim_linux; | ||
| 49 | bool is_simulation; | ||
| 50 | |||
| 51 | is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); | ||
| 52 | |||
| 53 | if (!is_simulation) { | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | |||
| 57 | if (!g->sim) { | ||
| 58 | nvgpu_warn(g, "sim_gk20a not allocated"); | ||
| 59 | return; | ||
| 60 | } | ||
| 61 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
| 62 | |||
| 63 | if (sim_linux->regs) { | ||
| 64 | sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); | ||
| 65 | sim_linux->regs = NULL; | ||
| 66 | } | ||
| 67 | nvgpu_kfree(g, sim_linux); | ||
| 68 | g->sim = NULL; | ||
| 69 | } | ||
| 70 | |||
| 71 | int nvgpu_init_sim_support_linux_pci(struct gk20a *g) | ||
| 72 | { | ||
| 73 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 74 | struct sim_nvgpu_linux *sim_linux; | ||
| 75 | int err = -ENOMEM; | ||
| 76 | bool is_simulation; | ||
| 77 | |||
| 78 | is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); | ||
| 79 | __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation); | ||
| 80 | |||
| 81 | if (!is_simulation) | ||
| 82 | return 0; | ||
| 83 | |||
| 84 | sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); | ||
| 85 | if (!sim_linux) | ||
| 86 | return err; | ||
| 87 | g->sim = &sim_linux->sim; | ||
| 88 | g->sim->g = g; | ||
| 89 | sim_linux->regs = l->regs + sim_r(); | ||
| 90 | sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci; | ||
| 91 | |||
| 92 | return 0; | ||
| 93 | } | ||
diff --git a/include/os/linux/soc.c b/include/os/linux/soc.c new file mode 100644 index 0000000..1b27d6f --- /dev/null +++ b/include/os/linux/soc.c | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <soc/tegra/chip-id.h> | ||
| 15 | #include <soc/tegra/fuse.h> | ||
| 16 | #include <soc/tegra/tegra_bpmp.h> | ||
| 17 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
| 18 | #include <soc/tegra/virt/syscalls.h> | ||
| 19 | #endif | ||
| 20 | |||
| 21 | #include <nvgpu/soc.h> | ||
| 22 | #include "os_linux.h" | ||
| 23 | #include "platform_gk20a.h" | ||
| 24 | |||
| 25 | bool nvgpu_platform_is_silicon(struct gk20a *g) | ||
| 26 | { | ||
| 27 | return tegra_platform_is_silicon(); | ||
| 28 | } | ||
| 29 | |||
| 30 | bool nvgpu_platform_is_simulation(struct gk20a *g) | ||
| 31 | { | ||
| 32 | return tegra_platform_is_vdk(); | ||
| 33 | } | ||
| 34 | |||
| 35 | bool nvgpu_platform_is_fpga(struct gk20a *g) | ||
| 36 | { | ||
| 37 | return tegra_platform_is_fpga(); | ||
| 38 | } | ||
| 39 | |||
| 40 | bool nvgpu_is_hypervisor_mode(struct gk20a *g) | ||
| 41 | { | ||
| 42 | return is_tegra_hypervisor_mode(); | ||
| 43 | } | ||
| 44 | |||
| 45 | bool nvgpu_is_bpmp_running(struct gk20a *g) | ||
| 46 | { | ||
| 47 | return tegra_bpmp_running(); | ||
| 48 | } | ||
| 49 | |||
| 50 | bool nvgpu_is_soc_t194_a01(struct gk20a *g) | ||
| 51 | { | ||
| 52 | return ((tegra_get_chip_id() == TEGRA194 && | ||
| 53 | tegra_chip_get_revision() == TEGRA194_REVISION_A01) ? | ||
| 54 | true : false); | ||
| 55 | } | ||
| 56 | |||
| 57 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
| 58 | /* When nvlink is enabled on dGPU, we need to use physical memory addresses. | ||
| 59 | * There is no SMMU translation. However, the device initially enumerates as a | ||
| 60 | * PCIe device. As such, when allocation memory for this PCIe device, the DMA | ||
| 61 | * framework ends up allocating memory using SMMU (if enabled in device tree). | ||
| 62 | * As a result, when we switch to nvlink, we need to use underlying physical | ||
| 63 | * addresses, even if memory mappings exist in SMMU. | ||
| 64 | * In addition, when stage-2 SMMU translation is enabled (for instance when HV | ||
| 65 | * is enabled), the addresses we get from dma_alloc are IPAs. We need to | ||
| 66 | * convert them to PA. | ||
| 67 | */ | ||
| 68 | static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) | ||
| 69 | { | ||
| 70 | struct device *dev = dev_from_gk20a(g); | ||
| 71 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 72 | struct hyp_ipa_pa_info info; | ||
| 73 | int err; | ||
| 74 | u64 pa = 0ULL; | ||
| 75 | |||
| 76 | err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa); | ||
| 77 | if (err < 0) { | ||
| 78 | /* WAR for bug 2096877 | ||
| 79 | * hyp_read_ipa_pa_info only looks up RAM mappings. | ||
| 80 | * assume one to one IPA:PA mapping for syncpt aperture | ||
| 81 | */ | ||
| 82 | u64 start = g->syncpt_unit_base; | ||
| 83 | u64 end = g->syncpt_unit_base + g->syncpt_unit_size; | ||
| 84 | if ((ipa >= start) && (ipa < end)) { | ||
| 85 | pa = ipa; | ||
| 86 | nvgpu_log(g, gpu_dbg_map_v, | ||
| 87 | "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n", | ||
| 88 | ipa, platform->vmid, pa); | ||
| 89 | } else { | ||
| 90 | nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d", | ||
| 91 | ipa, platform->vmid, err); | ||
| 92 | } | ||
| 93 | } else { | ||
| 94 | pa = info.base + info.offset; | ||
| 95 | nvgpu_log(g, gpu_dbg_map_v, | ||
| 96 | "ipa=%llx vmid=%d -> pa=%llx " | ||
| 97 | "base=%llx offset=%llx size=%llx\n", | ||
| 98 | ipa, platform->vmid, pa, info.base, | ||
| 99 | info.offset, info.size); | ||
| 100 | } | ||
| 101 | return pa; | ||
| 102 | } | ||
| 103 | #endif | ||
| 104 | |||
| 105 | int nvgpu_init_soc_vars(struct gk20a *g) | ||
| 106 | { | ||
| 107 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
| 108 | struct device *dev = dev_from_gk20a(g); | ||
| 109 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 110 | int err; | ||
| 111 | |||
| 112 | if (nvgpu_is_hypervisor_mode(g)) { | ||
| 113 | err = hyp_read_gid(&platform->vmid); | ||
| 114 | if (err) { | ||
| 115 | nvgpu_err(g, "failed to read vmid"); | ||
| 116 | return err; | ||
| 117 | } | ||
| 118 | platform->phys_addr = nvgpu_tegra_hv_ipa_pa; | ||
| 119 | } | ||
| 120 | #endif | ||
| 121 | return 0; | ||
| 122 | } | ||
diff --git a/include/os/linux/sync_sema_android.c b/include/os/linux/sync_sema_android.c new file mode 100644 index 0000000..59e3b7a --- /dev/null +++ b/include/os/linux/sync_sema_android.c | |||
| @@ -0,0 +1,418 @@ | |||
| 1 | /* | ||
| 2 | * Semaphore Sync Framework Integration | ||
| 3 | * | ||
| 4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | #include <linux/file.h> | ||
| 19 | #include <linux/fs.h> | ||
| 20 | #include <linux/hrtimer.h> | ||
| 21 | #include <linux/module.h> | ||
| 22 | #include <nvgpu/lock.h> | ||
| 23 | |||
| 24 | #include <nvgpu/kmem.h> | ||
| 25 | #include <nvgpu/semaphore.h> | ||
| 26 | #include <nvgpu/bug.h> | ||
| 27 | #include <nvgpu/kref.h> | ||
| 28 | #include <nvgpu/channel.h> | ||
| 29 | #include "../linux/channel.h" | ||
| 30 | |||
| 31 | #include "../drivers/staging/android/sync.h" | ||
| 32 | |||
| 33 | #include "sync_sema_android.h" | ||
| 34 | |||
| 35 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
| 36 | |||
| 37 | struct gk20a_sync_timeline { | ||
| 38 | struct sync_timeline obj; | ||
| 39 | u32 max; | ||
| 40 | u32 min; | ||
| 41 | }; | ||
| 42 | |||
| 43 | /** | ||
| 44 | * The sync framework dups pts when merging fences. We share a single | ||
| 45 | * refcounted gk20a_sync_pt for each duped pt. | ||
| 46 | */ | ||
| 47 | struct gk20a_sync_pt { | ||
| 48 | struct gk20a *g; | ||
| 49 | struct nvgpu_ref refcount; | ||
| 50 | u32 thresh; | ||
| 51 | struct nvgpu_semaphore *sema; | ||
| 52 | struct gk20a_sync_timeline *obj; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Use a spin lock here since it will have better performance | ||
| 56 | * than a mutex - there should be very little contention on this | ||
| 57 | * lock. | ||
| 58 | */ | ||
| 59 | struct nvgpu_spinlock lock; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct gk20a_sync_pt_inst { | ||
| 63 | struct sync_pt pt; | ||
| 64 | struct gk20a_sync_pt *shared; | ||
| 65 | }; | ||
| 66 | |||
| 67 | /** | ||
| 68 | * Compares sync pt values a and b, both of which will trigger either before | ||
| 69 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
| 70 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
| 71 | * | ||
| 72 | * Returns -1 if a < b (a triggers before b) | ||
| 73 | * 0 if a = b (a and b trigger at the same time) | ||
| 74 | * 1 if a > b (b triggers before a) | ||
| 75 | */ | ||
| 76 | static int __gk20a_sync_pt_compare_ref( | ||
| 77 | u32 ref, | ||
| 78 | u32 a, | ||
| 79 | u32 b) | ||
| 80 | { | ||
| 81 | /* | ||
| 82 | * We normalize both a and b by subtracting ref from them. | ||
| 83 | * Denote the normalized values by a_n and b_n. Note that because | ||
| 84 | * of wrapping, a_n and/or b_n may be negative. | ||
| 85 | * | ||
| 86 | * The normalized values a_n and b_n satisfy: | ||
| 87 | * - a positive value triggers before a negative value | ||
| 88 | * - a smaller positive value triggers before a greater positive value | ||
| 89 | * - a smaller negative value (greater in absolute value) triggers | ||
| 90 | * before a greater negative value (smaller in absolute value). | ||
| 91 | * | ||
| 92 | * Thus we can just stick to unsigned arithmetic and compare | ||
| 93 | * (u32)a_n to (u32)b_n. | ||
| 94 | * | ||
| 95 | * Just to reiterate the possible cases: | ||
| 96 | * | ||
| 97 | * 1A) ...ref..a....b.... | ||
| 98 | * 1B) ...ref..b....a.... | ||
| 99 | * 2A) ...b....ref..a.... b_n < 0 | ||
| 100 | * 2B) ...a....ref..b.... a_n > 0 | ||
| 101 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
| 102 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
| 103 | */ | ||
| 104 | u32 a_n = a - ref; | ||
| 105 | u32 b_n = b - ref; | ||
| 106 | if (a_n < b_n) | ||
| 107 | return -1; | ||
| 108 | else if (a_n > b_n) | ||
| 109 | return 1; | ||
| 110 | else | ||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
| 115 | { | ||
| 116 | struct gk20a_sync_pt_inst *pti = | ||
| 117 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
| 118 | return pti->shared; | ||
| 119 | } | ||
| 120 | static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
| 121 | { | ||
| 122 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
| 123 | return NULL; | ||
| 124 | return (struct gk20a_sync_timeline *)obj; | ||
| 125 | } | ||
| 126 | |||
| 127 | static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) | ||
| 128 | { | ||
| 129 | struct gk20a_sync_pt *pt = | ||
| 130 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
| 131 | struct gk20a *g = pt->g; | ||
| 132 | |||
| 133 | if (pt->sema) | ||
| 134 | nvgpu_semaphore_put(pt->sema); | ||
| 135 | nvgpu_kfree(g, pt); | ||
| 136 | } | ||
| 137 | |||
| 138 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
| 139 | struct gk20a *g, | ||
| 140 | struct gk20a_sync_timeline *obj, | ||
| 141 | struct nvgpu_semaphore *sema) | ||
| 142 | { | ||
| 143 | struct gk20a_sync_pt *shared; | ||
| 144 | |||
| 145 | shared = nvgpu_kzalloc(g, sizeof(*shared)); | ||
| 146 | if (!shared) | ||
| 147 | return NULL; | ||
| 148 | |||
| 149 | nvgpu_ref_init(&shared->refcount); | ||
| 150 | shared->g = g; | ||
| 151 | shared->obj = obj; | ||
| 152 | shared->sema = sema; | ||
| 153 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
| 154 | |||
| 155 | nvgpu_spinlock_init(&shared->lock); | ||
| 156 | |||
| 157 | nvgpu_semaphore_get(sema); | ||
| 158 | |||
| 159 | return shared; | ||
| 160 | } | ||
| 161 | |||
| 162 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
| 163 | struct gk20a *g, | ||
| 164 | struct gk20a_sync_timeline *obj, | ||
| 165 | struct nvgpu_semaphore *sema) | ||
| 166 | { | ||
| 167 | struct gk20a_sync_pt_inst *pti; | ||
| 168 | |||
| 169 | pti = (struct gk20a_sync_pt_inst *) | ||
| 170 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
| 171 | if (!pti) | ||
| 172 | return NULL; | ||
| 173 | |||
| 174 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); | ||
| 175 | if (!pti->shared) { | ||
| 176 | sync_pt_free(&pti->pt); | ||
| 177 | return NULL; | ||
| 178 | } | ||
| 179 | return &pti->pt; | ||
| 180 | } | ||
| 181 | |||
| 182 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
| 183 | { | ||
| 184 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
| 185 | if (pt) | ||
| 186 | nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
| 187 | } | ||
| 188 | |||
| 189 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
| 190 | { | ||
| 191 | struct gk20a_sync_pt_inst *pti; | ||
| 192 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
| 193 | |||
| 194 | pti = (struct gk20a_sync_pt_inst *) | ||
| 195 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
| 196 | if (!pti) | ||
| 197 | return NULL; | ||
| 198 | pti->shared = pt; | ||
| 199 | nvgpu_ref_get(&pt->refcount); | ||
| 200 | return &pti->pt; | ||
| 201 | } | ||
| 202 | |||
| 203 | /* | ||
| 204 | * This function must be able to run on the same sync_pt concurrently. This | ||
| 205 | * requires a lock to protect access to the sync_pt's internal data structures | ||
| 206 | * which are modified as a side effect of calling this function. | ||
| 207 | */ | ||
| 208 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
| 209 | { | ||
| 210 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
| 211 | struct gk20a_sync_timeline *obj = pt->obj; | ||
| 212 | bool signaled = true; | ||
| 213 | |||
| 214 | nvgpu_spinlock_acquire(&pt->lock); | ||
| 215 | if (!pt->sema) | ||
| 216 | goto done; | ||
| 217 | |||
| 218 | /* Acquired == not realeased yet == active == not signaled. */ | ||
| 219 | signaled = !nvgpu_semaphore_is_acquired(pt->sema); | ||
| 220 | |||
| 221 | if (signaled) { | ||
| 222 | /* Update min if necessary. */ | ||
| 223 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
| 224 | obj->min) == 1) | ||
| 225 | obj->min = pt->thresh; | ||
| 226 | |||
| 227 | /* Release the semaphore to the pool. */ | ||
| 228 | nvgpu_semaphore_put(pt->sema); | ||
| 229 | pt->sema = NULL; | ||
| 230 | } | ||
| 231 | done: | ||
| 232 | nvgpu_spinlock_release(&pt->lock); | ||
| 233 | |||
| 234 | return signaled; | ||
| 235 | } | ||
| 236 | |||
| 237 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
| 238 | { | ||
| 239 | bool a_expired; | ||
| 240 | bool b_expired; | ||
| 241 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
| 242 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
| 243 | |||
| 244 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
| 245 | return 0; | ||
| 246 | |||
| 247 | /* Early out */ | ||
| 248 | if (a == b) | ||
| 249 | return 0; | ||
| 250 | |||
| 251 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
| 252 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
| 253 | if (a_expired && !b_expired) { | ||
| 254 | /* Easy, a was earlier */ | ||
| 255 | return -1; | ||
| 256 | } else if (!a_expired && b_expired) { | ||
| 257 | /* Easy, b was earlier */ | ||
| 258 | return 1; | ||
| 259 | } | ||
| 260 | |||
| 261 | /* Both a and b are expired (trigger before min) or not | ||
| 262 | * expired (trigger after min), so we can use min | ||
| 263 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
| 264 | */ | ||
| 265 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
| 266 | pt_a->thresh, pt_b->thresh); | ||
| 267 | } | ||
| 268 | |||
| 269 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
| 270 | { | ||
| 271 | return obj->min; | ||
| 272 | } | ||
| 273 | |||
| 274 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
| 275 | char *str, int size) | ||
| 276 | { | ||
| 277 | struct gk20a_sync_timeline *obj = | ||
| 278 | (struct gk20a_sync_timeline *)timeline; | ||
| 279 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
| 280 | } | ||
| 281 | |||
| 282 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
| 283 | char *str, int size) | ||
| 284 | { | ||
| 285 | struct nvgpu_semaphore *s = pt->sema; | ||
| 286 | |||
| 287 | snprintf(str, size, "S: pool=%llu [v=%u,r_v=%u]", | ||
| 288 | s->location.pool->page_idx, | ||
| 289 | nvgpu_semaphore_get_value(s), | ||
| 290 | nvgpu_semaphore_read(s)); | ||
| 291 | } | ||
| 292 | |||
| 293 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
| 294 | int size) | ||
| 295 | { | ||
| 296 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
| 297 | |||
| 298 | if (pt->sema) { | ||
| 299 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
| 300 | return; | ||
| 301 | } | ||
| 302 | |||
| 303 | snprintf(str, size, "%d", pt->thresh); | ||
| 304 | } | ||
| 305 | |||
| 306 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
| 307 | .driver_name = "nvgpu_semaphore", | ||
| 308 | .dup = gk20a_sync_pt_dup_inst, | ||
| 309 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
| 310 | .compare = gk20a_sync_pt_compare, | ||
| 311 | .free_pt = gk20a_sync_pt_free_inst, | ||
| 312 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
| 313 | .pt_value_str = gk20a_sync_pt_value_str, | ||
| 314 | }; | ||
| 315 | |||
| 316 | /* Public API */ | ||
| 317 | |||
| 318 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
| 319 | { | ||
| 320 | struct sync_fence *fence = sync_fence_fdget(fd); | ||
| 321 | int i; | ||
| 322 | |||
| 323 | if (!fence) | ||
| 324 | return NULL; | ||
| 325 | |||
| 326 | for (i = 0; i < fence->num_fences; i++) { | ||
| 327 | struct sync_pt *spt = sync_pt_from_fence(fence->cbs[i].sync_pt); | ||
| 328 | struct sync_timeline *t; | ||
| 329 | |||
| 330 | if (spt == NULL) { | ||
| 331 | sync_fence_put(fence); | ||
| 332 | return NULL; | ||
| 333 | } | ||
| 334 | |||
| 335 | t = sync_pt_parent(spt); | ||
| 336 | if (t->ops != &gk20a_sync_timeline_ops) { | ||
| 337 | sync_fence_put(fence); | ||
| 338 | return NULL; | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | return fence; | ||
| 343 | } | ||
| 344 | |||
| 345 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) | ||
| 346 | { | ||
| 347 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); | ||
| 348 | struct nvgpu_semaphore *sema; | ||
| 349 | |||
| 350 | nvgpu_spinlock_acquire(&pt->lock); | ||
| 351 | sema = pt->sema; | ||
| 352 | if (sema) | ||
| 353 | nvgpu_semaphore_get(sema); | ||
| 354 | nvgpu_spinlock_release(&pt->lock); | ||
| 355 | |||
| 356 | return sema; | ||
| 357 | } | ||
| 358 | |||
| 359 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
| 360 | { | ||
| 361 | sync_timeline_signal(timeline, 0); | ||
| 362 | } | ||
| 363 | |||
| 364 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
| 365 | { | ||
| 366 | sync_timeline_destroy(timeline); | ||
| 367 | } | ||
| 368 | |||
| 369 | struct sync_timeline *gk20a_sync_timeline_create( | ||
| 370 | const char *name) | ||
| 371 | { | ||
| 372 | struct gk20a_sync_timeline *obj; | ||
| 373 | |||
| 374 | obj = (struct gk20a_sync_timeline *) | ||
| 375 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
| 376 | sizeof(struct gk20a_sync_timeline), | ||
| 377 | name); | ||
| 378 | if (!obj) | ||
| 379 | return NULL; | ||
| 380 | obj->max = 0; | ||
| 381 | obj->min = 0; | ||
| 382 | return &obj->obj; | ||
| 383 | } | ||
| 384 | |||
| 385 | struct sync_fence *gk20a_sync_fence_create( | ||
| 386 | struct channel_gk20a *c, | ||
| 387 | struct nvgpu_semaphore *sema, | ||
| 388 | const char *fmt, ...) | ||
| 389 | { | ||
| 390 | char name[30]; | ||
| 391 | va_list args; | ||
| 392 | struct sync_pt *pt; | ||
| 393 | struct sync_fence *fence; | ||
| 394 | struct gk20a *g = c->g; | ||
| 395 | |||
| 396 | struct nvgpu_channel_linux *os_channel_priv = c->os_priv; | ||
| 397 | struct nvgpu_os_fence_framework *fence_framework = NULL; | ||
| 398 | struct gk20a_sync_timeline *timeline = NULL; | ||
| 399 | |||
| 400 | fence_framework = &os_channel_priv->fence_framework; | ||
| 401 | |||
| 402 | timeline = to_gk20a_timeline(fence_framework->timeline); | ||
| 403 | |||
| 404 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); | ||
| 405 | if (pt == NULL) | ||
| 406 | return NULL; | ||
| 407 | |||
| 408 | va_start(args, fmt); | ||
| 409 | vsnprintf(name, sizeof(name), fmt, args); | ||
| 410 | va_end(args); | ||
| 411 | |||
| 412 | fence = sync_fence_create(name, pt); | ||
| 413 | if (fence == NULL) { | ||
| 414 | sync_pt_free(pt); | ||
| 415 | return NULL; | ||
| 416 | } | ||
| 417 | return fence; | ||
| 418 | } | ||
diff --git a/include/os/linux/sync_sema_android.h b/include/os/linux/sync_sema_android.h new file mode 100644 index 0000000..4fca7be --- /dev/null +++ b/include/os/linux/sync_sema_android.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | /* | ||
| 2 | * Semaphore Sync Framework Integration | ||
| 3 | * | ||
| 4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef _GK20A_SYNC_H_ | ||
| 20 | #define _GK20A_SYNC_H_ | ||
| 21 | |||
| 22 | struct sync_timeline; | ||
| 23 | struct sync_fence; | ||
| 24 | struct sync_pt; | ||
| 25 | struct nvgpu_semaphore; | ||
| 26 | struct fence; | ||
| 27 | |||
| 28 | #ifdef CONFIG_SYNC | ||
| 29 | struct sync_timeline *gk20a_sync_timeline_create(const char *name); | ||
| 30 | void gk20a_sync_timeline_destroy(struct sync_timeline *); | ||
| 31 | void gk20a_sync_timeline_signal(struct sync_timeline *); | ||
| 32 | struct sync_fence *gk20a_sync_fence_create( | ||
| 33 | struct channel_gk20a *c, | ||
| 34 | struct nvgpu_semaphore *, | ||
| 35 | const char *fmt, ...); | ||
| 36 | struct sync_fence *gk20a_sync_fence_fdget(int fd); | ||
| 37 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt); | ||
| 38 | #else | ||
| 39 | static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} | ||
| 40 | static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} | ||
| 41 | static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
| 42 | { | ||
| 43 | return NULL; | ||
| 44 | } | ||
| 45 | static inline struct sync_timeline *gk20a_sync_timeline_create( | ||
| 46 | const char *name) { | ||
| 47 | return NULL; | ||
| 48 | } | ||
| 49 | #endif | ||
| 50 | |||
| 51 | #endif | ||
diff --git a/include/os/linux/sysfs.c b/include/os/linux/sysfs.c new file mode 100644 index 0000000..221ea0c --- /dev/null +++ b/include/os/linux/sysfs.c | |||
| @@ -0,0 +1,1275 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/device.h> | ||
| 18 | #include <linux/pm_runtime.h> | ||
| 19 | #include <linux/fb.h> | ||
| 20 | |||
| 21 | #include <nvgpu/kmem.h> | ||
| 22 | #include <nvgpu/nvhost.h> | ||
| 23 | #include <nvgpu/ptimer.h> | ||
| 24 | #include <nvgpu/power_features/cg.h> | ||
| 25 | #include <nvgpu/power_features/pg.h> | ||
| 26 | |||
| 27 | #include "os_linux.h" | ||
| 28 | #include "sysfs.h" | ||
| 29 | #include "platform_gk20a.h" | ||
| 30 | #include "gk20a/gr_gk20a.h" | ||
| 31 | #include "gv11b/gr_gv11b.h" | ||
| 32 | |||
| 33 | #define PTIMER_FP_FACTOR 1000000 | ||
| 34 | |||
| 35 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) | ||
| 36 | |||
| 37 | #define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0 | ||
| 38 | |||
| 39 | static ssize_t elcg_enable_store(struct device *dev, | ||
| 40 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 41 | { | ||
| 42 | struct gk20a *g = get_gk20a(dev); | ||
| 43 | unsigned long val = 0; | ||
| 44 | int err; | ||
| 45 | |||
| 46 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 47 | return -EINVAL; | ||
| 48 | |||
| 49 | err = gk20a_busy(g); | ||
| 50 | if (err) | ||
| 51 | return err; | ||
| 52 | |||
| 53 | if (val) { | ||
| 54 | nvgpu_cg_elcg_set_elcg_enabled(g, true); | ||
| 55 | } else { | ||
| 56 | nvgpu_cg_elcg_set_elcg_enabled(g, false); | ||
| 57 | } | ||
| 58 | |||
| 59 | gk20a_idle(g); | ||
| 60 | |||
| 61 | nvgpu_info(g, "ELCG is %s.", val ? "enabled" : | ||
| 62 | "disabled"); | ||
| 63 | |||
| 64 | return count; | ||
| 65 | } | ||
| 66 | |||
| 67 | static ssize_t elcg_enable_read(struct device *dev, | ||
| 68 | struct device_attribute *attr, char *buf) | ||
| 69 | { | ||
| 70 | struct gk20a *g = get_gk20a(dev); | ||
| 71 | |||
| 72 | return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0); | ||
| 73 | } | ||
| 74 | |||
| 75 | static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store); | ||
| 76 | |||
| 77 | static ssize_t blcg_enable_store(struct device *dev, | ||
| 78 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 79 | { | ||
| 80 | struct gk20a *g = get_gk20a(dev); | ||
| 81 | unsigned long val = 0; | ||
| 82 | int err; | ||
| 83 | |||
| 84 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 85 | return -EINVAL; | ||
| 86 | |||
| 87 | err = gk20a_busy(g); | ||
| 88 | if (err) | ||
| 89 | return err; | ||
| 90 | |||
| 91 | if (val) { | ||
| 92 | nvgpu_cg_blcg_set_blcg_enabled(g, true); | ||
| 93 | } else { | ||
| 94 | nvgpu_cg_blcg_set_blcg_enabled(g, false); | ||
| 95 | } | ||
| 96 | |||
| 97 | gk20a_idle(g); | ||
| 98 | |||
| 99 | nvgpu_info(g, "BLCG is %s.", val ? "enabled" : | ||
| 100 | "disabled"); | ||
| 101 | |||
| 102 | return count; | ||
| 103 | } | ||
| 104 | |||
| 105 | static ssize_t blcg_enable_read(struct device *dev, | ||
| 106 | struct device_attribute *attr, char *buf) | ||
| 107 | { | ||
| 108 | struct gk20a *g = get_gk20a(dev); | ||
| 109 | |||
| 110 | return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0); | ||
| 111 | } | ||
| 112 | |||
| 113 | |||
| 114 | static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store); | ||
| 115 | |||
| 116 | static ssize_t slcg_enable_store(struct device *dev, | ||
| 117 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 118 | { | ||
| 119 | struct gk20a *g = get_gk20a(dev); | ||
| 120 | unsigned long val = 0; | ||
| 121 | int err; | ||
| 122 | |||
| 123 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 124 | return -EINVAL; | ||
| 125 | |||
| 126 | err = gk20a_busy(g); | ||
| 127 | if (err) { | ||
| 128 | return err; | ||
| 129 | } | ||
| 130 | |||
| 131 | if (val) { | ||
| 132 | nvgpu_cg_slcg_set_slcg_enabled(g, true); | ||
| 133 | } else { | ||
| 134 | nvgpu_cg_slcg_set_slcg_enabled(g, false); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* | ||
| 138 | * TODO: slcg_therm_load_gating is not enabled anywhere during | ||
| 139 | * init. Therefore, it would be incongruous to add it here. Once | ||
| 140 | * it is added to init, we should add it here too. | ||
| 141 | */ | ||
| 142 | gk20a_idle(g); | ||
| 143 | |||
| 144 | nvgpu_info(g, "SLCG is %s.", val ? "enabled" : | ||
| 145 | "disabled"); | ||
| 146 | |||
| 147 | return count; | ||
| 148 | } | ||
| 149 | |||
| 150 | static ssize_t slcg_enable_read(struct device *dev, | ||
| 151 | struct device_attribute *attr, char *buf) | ||
| 152 | { | ||
| 153 | struct gk20a *g = get_gk20a(dev); | ||
| 154 | |||
| 155 | return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0); | ||
| 156 | } | ||
| 157 | |||
| 158 | static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store); | ||
| 159 | |||
| 160 | static ssize_t ptimer_scale_factor_show(struct device *dev, | ||
| 161 | struct device_attribute *attr, | ||
| 162 | char *buf) | ||
| 163 | { | ||
| 164 | struct gk20a *g = get_gk20a(dev); | ||
| 165 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 166 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
| 167 | u32 scaling_factor_fp; | ||
| 168 | ssize_t res; | ||
| 169 | |||
| 170 | if (!src_freq_hz) { | ||
| 171 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
| 172 | return -EINVAL; | ||
| 173 | } | ||
| 174 | |||
| 175 | scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / | ||
| 176 | ((u32)(src_freq_hz) / | ||
| 177 | (u32)(PTIMER_FP_FACTOR)); | ||
| 178 | res = snprintf(buf, | ||
| 179 | PAGE_SIZE, | ||
| 180 | "%u.%u\n", | ||
| 181 | scaling_factor_fp / PTIMER_FP_FACTOR, | ||
| 182 | scaling_factor_fp % PTIMER_FP_FACTOR); | ||
| 183 | |||
| 184 | return res; | ||
| 185 | |||
| 186 | } | ||
| 187 | |||
| 188 | static DEVICE_ATTR(ptimer_scale_factor, | ||
| 189 | S_IRUGO, | ||
| 190 | ptimer_scale_factor_show, | ||
| 191 | NULL); | ||
| 192 | |||
| 193 | static ssize_t ptimer_ref_freq_show(struct device *dev, | ||
| 194 | struct device_attribute *attr, | ||
| 195 | char *buf) | ||
| 196 | { | ||
| 197 | struct gk20a *g = get_gk20a(dev); | ||
| 198 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 199 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
| 200 | ssize_t res; | ||
| 201 | |||
| 202 | if (!src_freq_hz) { | ||
| 203 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
| 204 | return -EINVAL; | ||
| 205 | } | ||
| 206 | |||
| 207 | res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ); | ||
| 208 | |||
| 209 | return res; | ||
| 210 | |||
| 211 | } | ||
| 212 | |||
| 213 | static DEVICE_ATTR(ptimer_ref_freq, | ||
| 214 | S_IRUGO, | ||
| 215 | ptimer_ref_freq_show, | ||
| 216 | NULL); | ||
| 217 | |||
| 218 | static ssize_t ptimer_src_freq_show(struct device *dev, | ||
| 219 | struct device_attribute *attr, | ||
| 220 | char *buf) | ||
| 221 | { | ||
| 222 | struct gk20a *g = get_gk20a(dev); | ||
| 223 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 224 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
| 225 | ssize_t res; | ||
| 226 | |||
| 227 | if (!src_freq_hz) { | ||
| 228 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
| 229 | return -EINVAL; | ||
| 230 | } | ||
| 231 | |||
| 232 | res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz); | ||
| 233 | |||
| 234 | return res; | ||
| 235 | |||
| 236 | } | ||
| 237 | |||
| 238 | static DEVICE_ATTR(ptimer_src_freq, | ||
| 239 | S_IRUGO, | ||
| 240 | ptimer_src_freq_show, | ||
| 241 | NULL); | ||
| 242 | |||
| 243 | |||
| 244 | static ssize_t gpu_powered_on_show(struct device *dev, | ||
| 245 | struct device_attribute *attr, | ||
| 246 | char *buf) | ||
| 247 | { | ||
| 248 | struct gk20a *g = get_gk20a(dev); | ||
| 249 | |||
| 250 | return snprintf(buf, PAGE_SIZE, "%u\n", g->power_on); | ||
| 251 | } | ||
| 252 | |||
| 253 | static DEVICE_ATTR(gpu_powered_on, S_IRUGO, gpu_powered_on_show, NULL); | ||
| 254 | |||
| 255 | #if defined(CONFIG_PM) | ||
| 256 | static ssize_t railgate_enable_store(struct device *dev, | ||
| 257 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 258 | { | ||
| 259 | unsigned long railgate_enable = 0; | ||
| 260 | /* dev is guaranteed to be valid here. Ok to de-reference */ | ||
| 261 | struct gk20a *g = get_gk20a(dev); | ||
| 262 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 263 | bool enabled = nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE); | ||
| 264 | int err; | ||
| 265 | |||
| 266 | if (kstrtoul(buf, 10, &railgate_enable) < 0) | ||
| 267 | return -EINVAL; | ||
| 268 | |||
| 269 | /* convert to boolean */ | ||
| 270 | railgate_enable = !!railgate_enable; | ||
| 271 | |||
| 272 | /* writing same value should be treated as nop and successful */ | ||
| 273 | if (railgate_enable == enabled) | ||
| 274 | goto out; | ||
| 275 | |||
| 276 | if (!platform->can_railgate_init) { | ||
| 277 | nvgpu_err(g, "Railgating is not supported"); | ||
| 278 | return -EINVAL; | ||
| 279 | } | ||
| 280 | |||
| 281 | if (railgate_enable) { | ||
| 282 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, true); | ||
| 283 | pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); | ||
| 284 | } else { | ||
| 285 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
| 286 | pm_runtime_set_autosuspend_delay(dev, -1); | ||
| 287 | } | ||
| 288 | /* wake-up system to make rail-gating setting effective */ | ||
| 289 | err = gk20a_busy(g); | ||
| 290 | if (err) | ||
| 291 | return err; | ||
| 292 | gk20a_idle(g); | ||
| 293 | |||
| 294 | out: | ||
| 295 | nvgpu_info(g, "railgate is %s.", | ||
| 296 | nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ? | ||
| 297 | "enabled" : "disabled"); | ||
| 298 | |||
| 299 | return count; | ||
| 300 | } | ||
| 301 | |||
| 302 | static ssize_t railgate_enable_read(struct device *dev, | ||
| 303 | struct device_attribute *attr, char *buf) | ||
| 304 | { | ||
| 305 | struct gk20a *g = get_gk20a(dev); | ||
| 306 | |||
| 307 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
| 308 | nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ? 1 : 0); | ||
| 309 | } | ||
| 310 | |||
| 311 | static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read, | ||
| 312 | railgate_enable_store); | ||
| 313 | #endif | ||
| 314 | |||
| 315 | static ssize_t railgate_delay_store(struct device *dev, | ||
| 316 | struct device_attribute *attr, | ||
| 317 | const char *buf, size_t count) | ||
| 318 | { | ||
| 319 | int railgate_delay = 0, ret = 0; | ||
| 320 | struct gk20a *g = get_gk20a(dev); | ||
| 321 | int err; | ||
| 322 | |||
| 323 | if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) { | ||
| 324 | nvgpu_info(g, "does not support power-gating"); | ||
| 325 | return count; | ||
| 326 | } | ||
| 327 | |||
| 328 | ret = sscanf(buf, "%d", &railgate_delay); | ||
| 329 | if (ret == 1 && railgate_delay >= 0) { | ||
| 330 | g->railgate_delay = railgate_delay; | ||
| 331 | pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); | ||
| 332 | } else | ||
| 333 | nvgpu_err(g, "Invalid powergate delay"); | ||
| 334 | |||
| 335 | /* wake-up system to make rail-gating delay effective immediately */ | ||
| 336 | err = gk20a_busy(g); | ||
| 337 | if (err) | ||
| 338 | return err; | ||
| 339 | gk20a_idle(g); | ||
| 340 | |||
| 341 | return count; | ||
| 342 | } | ||
| 343 | static ssize_t railgate_delay_show(struct device *dev, | ||
| 344 | struct device_attribute *attr, char *buf) | ||
| 345 | { | ||
| 346 | struct gk20a *g = get_gk20a(dev); | ||
| 347 | |||
| 348 | return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay); | ||
| 349 | } | ||
| 350 | static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show, | ||
| 351 | railgate_delay_store); | ||
| 352 | |||
| 353 | static ssize_t is_railgated_show(struct device *dev, | ||
| 354 | struct device_attribute *attr, char *buf) | ||
| 355 | { | ||
| 356 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 357 | bool is_railgated = 0; | ||
| 358 | |||
| 359 | if (platform->is_railgated) | ||
| 360 | is_railgated = platform->is_railgated(dev); | ||
| 361 | |||
| 362 | return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no"); | ||
| 363 | } | ||
| 364 | static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL); | ||
| 365 | |||
| 366 | static ssize_t counters_show(struct device *dev, | ||
| 367 | struct device_attribute *attr, char *buf) | ||
| 368 | { | ||
| 369 | struct gk20a *g = get_gk20a(dev); | ||
| 370 | u32 busy_cycles, total_cycles; | ||
| 371 | ssize_t res; | ||
| 372 | |||
| 373 | nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); | ||
| 374 | |||
| 375 | res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); | ||
| 376 | |||
| 377 | return res; | ||
| 378 | } | ||
| 379 | static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); | ||
| 380 | |||
| 381 | static ssize_t counters_show_reset(struct device *dev, | ||
| 382 | struct device_attribute *attr, char *buf) | ||
| 383 | { | ||
| 384 | ssize_t res = counters_show(dev, attr, buf); | ||
| 385 | struct gk20a *g = get_gk20a(dev); | ||
| 386 | |||
| 387 | nvgpu_pmu_reset_load_counters(g); | ||
| 388 | |||
| 389 | return res; | ||
| 390 | } | ||
| 391 | static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); | ||
| 392 | |||
| 393 | static ssize_t gk20a_load_show(struct device *dev, | ||
| 394 | struct device_attribute *attr, | ||
| 395 | char *buf) | ||
| 396 | { | ||
| 397 | struct gk20a *g = get_gk20a(dev); | ||
| 398 | u32 busy_time; | ||
| 399 | ssize_t res; | ||
| 400 | int err; | ||
| 401 | |||
| 402 | if (!g->power_on) { | ||
| 403 | busy_time = 0; | ||
| 404 | } else { | ||
| 405 | err = gk20a_busy(g); | ||
| 406 | if (err) | ||
| 407 | return err; | ||
| 408 | |||
| 409 | nvgpu_pmu_load_update(g); | ||
| 410 | nvgpu_pmu_load_norm(g, &busy_time); | ||
| 411 | gk20a_idle(g); | ||
| 412 | } | ||
| 413 | |||
| 414 | res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); | ||
| 415 | |||
| 416 | return res; | ||
| 417 | } | ||
| 418 | static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL); | ||
| 419 | |||
| 420 | static ssize_t elpg_enable_store(struct device *dev, | ||
| 421 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 422 | { | ||
| 423 | struct gk20a *g = get_gk20a(dev); | ||
| 424 | unsigned long val = 0; | ||
| 425 | int err; | ||
| 426 | |||
| 427 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 428 | return -EINVAL; | ||
| 429 | |||
| 430 | if (!g->power_on) { | ||
| 431 | return -EINVAL; | ||
| 432 | } else { | ||
| 433 | err = gk20a_busy(g); | ||
| 434 | if (err) | ||
| 435 | return -EAGAIN; | ||
| 436 | /* | ||
| 437 | * Since elpg is refcounted, we should not unnecessarily call | ||
| 438 | * enable/disable if it is already so. | ||
| 439 | */ | ||
| 440 | if (val != 0) { | ||
| 441 | nvgpu_pg_elpg_set_elpg_enabled(g, true); | ||
| 442 | } else { | ||
| 443 | nvgpu_pg_elpg_set_elpg_enabled(g, false); | ||
| 444 | } | ||
| 445 | gk20a_idle(g); | ||
| 446 | } | ||
| 447 | nvgpu_info(g, "ELPG is %s.", val ? "enabled" : | ||
| 448 | "disabled"); | ||
| 449 | |||
| 450 | return count; | ||
| 451 | } | ||
| 452 | |||
| 453 | static ssize_t elpg_enable_read(struct device *dev, | ||
| 454 | struct device_attribute *attr, char *buf) | ||
| 455 | { | ||
| 456 | struct gk20a *g = get_gk20a(dev); | ||
| 457 | |||
| 458 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
| 459 | nvgpu_pg_elpg_is_enabled(g) ? 1 : 0); | ||
| 460 | } | ||
| 461 | |||
| 462 | static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store); | ||
| 463 | |||
| 464 | static ssize_t ldiv_slowdown_factor_store(struct device *dev, | ||
| 465 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 466 | { | ||
| 467 | struct gk20a *g = get_gk20a(dev); | ||
| 468 | unsigned long val = 0; | ||
| 469 | int err; | ||
| 470 | |||
| 471 | if (kstrtoul(buf, 10, &val) < 0) { | ||
| 472 | nvgpu_err(g, "parse error for input SLOWDOWN factor\n"); | ||
| 473 | return -EINVAL; | ||
| 474 | } | ||
| 475 | |||
| 476 | if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) { | ||
| 477 | nvgpu_err(g, "Invalid SLOWDOWN factor\n"); | ||
| 478 | return -EINVAL; | ||
| 479 | } | ||
| 480 | |||
| 481 | if (val == g->ldiv_slowdown_factor) | ||
| 482 | return count; | ||
| 483 | |||
| 484 | if (!g->power_on) { | ||
| 485 | g->ldiv_slowdown_factor = val; | ||
| 486 | } else { | ||
| 487 | err = gk20a_busy(g); | ||
| 488 | if (err) | ||
| 489 | return -EAGAIN; | ||
| 490 | |||
| 491 | g->ldiv_slowdown_factor = val; | ||
| 492 | |||
| 493 | if (g->ops.pmu.pmu_pg_init_param) | ||
| 494 | g->ops.pmu.pmu_pg_init_param(g, | ||
| 495 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS); | ||
| 496 | |||
| 497 | gk20a_idle(g); | ||
| 498 | } | ||
| 499 | |||
| 500 | nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor); | ||
| 501 | |||
| 502 | return count; | ||
| 503 | } | ||
| 504 | |||
| 505 | static ssize_t ldiv_slowdown_factor_read(struct device *dev, | ||
| 506 | struct device_attribute *attr, char *buf) | ||
| 507 | { | ||
| 508 | struct gk20a *g = get_gk20a(dev); | ||
| 509 | |||
| 510 | return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor); | ||
| 511 | } | ||
| 512 | |||
| 513 | static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW, | ||
| 514 | ldiv_slowdown_factor_read, ldiv_slowdown_factor_store); | ||
| 515 | |||
| 516 | static ssize_t mscg_enable_store(struct device *dev, | ||
| 517 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 518 | { | ||
| 519 | struct gk20a *g = get_gk20a(dev); | ||
| 520 | struct nvgpu_pmu *pmu = &g->pmu; | ||
| 521 | unsigned long val = 0; | ||
| 522 | int err; | ||
| 523 | |||
| 524 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 525 | return -EINVAL; | ||
| 526 | |||
| 527 | if (!g->power_on) { | ||
| 528 | g->mscg_enabled = val ? true : false; | ||
| 529 | } else { | ||
| 530 | err = gk20a_busy(g); | ||
| 531 | if (err) | ||
| 532 | return -EAGAIN; | ||
| 533 | /* | ||
| 534 | * Since elpg is refcounted, we should not unnecessarily call | ||
| 535 | * enable/disable if it is already so. | ||
| 536 | */ | ||
| 537 | if (val && !g->mscg_enabled) { | ||
| 538 | g->mscg_enabled = true; | ||
| 539 | if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, | ||
| 540 | PMU_PG_LPWR_FEATURE_MSCG)) { | ||
| 541 | if (!ACCESS_ONCE(pmu->mscg_stat)) { | ||
| 542 | WRITE_ONCE(pmu->mscg_stat, | ||
| 543 | PMU_MSCG_ENABLED); | ||
| 544 | /* make status visible */ | ||
| 545 | smp_mb(); | ||
| 546 | } | ||
| 547 | } | ||
| 548 | |||
| 549 | } else if (!val && g->mscg_enabled) { | ||
| 550 | if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, | ||
| 551 | PMU_PG_LPWR_FEATURE_MSCG)) { | ||
| 552 | nvgpu_pmu_pg_global_enable(g, false); | ||
| 553 | WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); | ||
| 554 | /* make status visible */ | ||
| 555 | smp_mb(); | ||
| 556 | g->mscg_enabled = false; | ||
| 557 | if (nvgpu_pg_elpg_is_enabled(g)) { | ||
| 558 | nvgpu_pg_elpg_enable(g); | ||
| 559 | } | ||
| 560 | } | ||
| 561 | g->mscg_enabled = false; | ||
| 562 | } | ||
| 563 | gk20a_idle(g); | ||
| 564 | } | ||
| 565 | nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" : | ||
| 566 | "disabled"); | ||
| 567 | |||
| 568 | return count; | ||
| 569 | } | ||
| 570 | |||
| 571 | static ssize_t mscg_enable_read(struct device *dev, | ||
| 572 | struct device_attribute *attr, char *buf) | ||
| 573 | { | ||
| 574 | struct gk20a *g = get_gk20a(dev); | ||
| 575 | |||
| 576 | return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0); | ||
| 577 | } | ||
| 578 | |||
| 579 | static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store); | ||
| 580 | |||
| 581 | static ssize_t aelpg_param_store(struct device *dev, | ||
| 582 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 583 | { | ||
| 584 | struct gk20a *g = get_gk20a(dev); | ||
| 585 | int status = 0; | ||
| 586 | union pmu_ap_cmd ap_cmd; | ||
| 587 | int *paramlist = (int *)g->pmu.aelpg_param; | ||
| 588 | u32 defaultparam[5] = { | ||
| 589 | APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US, | ||
| 590 | APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US, | ||
| 591 | APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US, | ||
| 592 | APCTRL_POWER_BREAKEVEN_DEFAULT_US, | ||
| 593 | APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT | ||
| 594 | }; | ||
| 595 | |||
| 596 | /* Get each parameter value from input string*/ | ||
| 597 | sscanf(buf, "%d %d %d %d %d", ¶mlist[0], ¶mlist[1], | ||
| 598 | ¶mlist[2], ¶mlist[3], ¶mlist[4]); | ||
| 599 | |||
| 600 | /* If parameter value is 0 then reset to SW default values*/ | ||
| 601 | if ((paramlist[0] | paramlist[1] | paramlist[2] | ||
| 602 | | paramlist[3] | paramlist[4]) == 0x00) { | ||
| 603 | memcpy(paramlist, defaultparam, sizeof(defaultparam)); | ||
| 604 | } | ||
| 605 | |||
| 606 | /* If aelpg is enabled & pmu is ready then post values to | ||
| 607 | * PMU else store then post later | ||
| 608 | */ | ||
| 609 | if (g->aelpg_enabled && g->pmu.pmu_ready) { | ||
| 610 | /* Disable AELPG */ | ||
| 611 | ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; | ||
| 612 | ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
| 613 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
| 614 | |||
| 615 | /* Enable AELPG */ | ||
| 616 | nvgpu_aelpg_init(g); | ||
| 617 | nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); | ||
| 618 | } | ||
| 619 | |||
| 620 | return count; | ||
| 621 | } | ||
| 622 | |||
| 623 | static ssize_t aelpg_param_read(struct device *dev, | ||
| 624 | struct device_attribute *attr, char *buf) | ||
| 625 | { | ||
| 626 | struct gk20a *g = get_gk20a(dev); | ||
| 627 | |||
| 628 | return snprintf(buf, PAGE_SIZE, | ||
| 629 | "%d %d %d %d %d\n", g->pmu.aelpg_param[0], | ||
| 630 | g->pmu.aelpg_param[1], g->pmu.aelpg_param[2], | ||
| 631 | g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]); | ||
| 632 | } | ||
| 633 | |||
| 634 | static DEVICE_ATTR(aelpg_param, ROOTRW, | ||
| 635 | aelpg_param_read, aelpg_param_store); | ||
| 636 | |||
| 637 | static ssize_t aelpg_enable_store(struct device *dev, | ||
| 638 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 639 | { | ||
| 640 | struct gk20a *g = get_gk20a(dev); | ||
| 641 | unsigned long val = 0; | ||
| 642 | int status = 0; | ||
| 643 | union pmu_ap_cmd ap_cmd; | ||
| 644 | int err; | ||
| 645 | |||
| 646 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 647 | return -EINVAL; | ||
| 648 | |||
| 649 | err = gk20a_busy(g); | ||
| 650 | if (err) | ||
| 651 | return err; | ||
| 652 | |||
| 653 | if (g->pmu.pmu_ready) { | ||
| 654 | if (val && !g->aelpg_enabled) { | ||
| 655 | g->aelpg_enabled = true; | ||
| 656 | /* Enable AELPG */ | ||
| 657 | ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL; | ||
| 658 | ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
| 659 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
| 660 | } else if (!val && g->aelpg_enabled) { | ||
| 661 | g->aelpg_enabled = false; | ||
| 662 | /* Disable AELPG */ | ||
| 663 | ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; | ||
| 664 | ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
| 665 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
| 666 | } | ||
| 667 | } else { | ||
| 668 | nvgpu_info(g, "PMU is not ready, AELPG request failed"); | ||
| 669 | } | ||
| 670 | gk20a_idle(g); | ||
| 671 | |||
| 672 | nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" : | ||
| 673 | "disabled"); | ||
| 674 | |||
| 675 | return count; | ||
| 676 | } | ||
| 677 | |||
| 678 | static ssize_t aelpg_enable_read(struct device *dev, | ||
| 679 | struct device_attribute *attr, char *buf) | ||
| 680 | { | ||
| 681 | struct gk20a *g = get_gk20a(dev); | ||
| 682 | |||
| 683 | return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0); | ||
| 684 | } | ||
| 685 | |||
| 686 | static DEVICE_ATTR(aelpg_enable, ROOTRW, | ||
| 687 | aelpg_enable_read, aelpg_enable_store); | ||
| 688 | |||
| 689 | |||
| 690 | static ssize_t allow_all_enable_read(struct device *dev, | ||
| 691 | struct device_attribute *attr, char *buf) | ||
| 692 | { | ||
| 693 | struct gk20a *g = get_gk20a(dev); | ||
| 694 | |||
| 695 | return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0); | ||
| 696 | } | ||
| 697 | |||
| 698 | static ssize_t allow_all_enable_store(struct device *dev, | ||
| 699 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 700 | { | ||
| 701 | struct gk20a *g = get_gk20a(dev); | ||
| 702 | unsigned long val = 0; | ||
| 703 | int err; | ||
| 704 | |||
| 705 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 706 | return -EINVAL; | ||
| 707 | |||
| 708 | err = gk20a_busy(g); | ||
| 709 | g->allow_all = (val ? true : false); | ||
| 710 | gk20a_idle(g); | ||
| 711 | |||
| 712 | return count; | ||
| 713 | } | ||
| 714 | |||
| 715 | static DEVICE_ATTR(allow_all, ROOTRW, | ||
| 716 | allow_all_enable_read, allow_all_enable_store); | ||
| 717 | |||
| 718 | static ssize_t emc3d_ratio_store(struct device *dev, | ||
| 719 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 720 | { | ||
| 721 | struct gk20a *g = get_gk20a(dev); | ||
| 722 | unsigned long val = 0; | ||
| 723 | |||
| 724 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 725 | return -EINVAL; | ||
| 726 | |||
| 727 | g->emc3d_ratio = val; | ||
| 728 | |||
| 729 | return count; | ||
| 730 | } | ||
| 731 | |||
| 732 | static ssize_t emc3d_ratio_read(struct device *dev, | ||
| 733 | struct device_attribute *attr, char *buf) | ||
| 734 | { | ||
| 735 | struct gk20a *g = get_gk20a(dev); | ||
| 736 | |||
| 737 | return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio); | ||
| 738 | } | ||
| 739 | |||
| 740 | static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); | ||
| 741 | |||
| 742 | static ssize_t fmax_at_vmin_safe_read(struct device *dev, | ||
| 743 | struct device_attribute *attr, char *buf) | ||
| 744 | { | ||
| 745 | struct gk20a *g = get_gk20a(dev); | ||
| 746 | unsigned long gpu_fmax_at_vmin_hz = 0; | ||
| 747 | |||
| 748 | if (g->ops.clk.get_fmax_at_vmin_safe) | ||
| 749 | gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g); | ||
| 750 | |||
| 751 | return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz)); | ||
| 752 | } | ||
| 753 | |||
| 754 | static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL); | ||
| 755 | |||
| 756 | #ifdef CONFIG_PM | ||
| 757 | static ssize_t force_idle_store(struct device *dev, | ||
| 758 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 759 | { | ||
| 760 | struct gk20a *g = get_gk20a(dev); | ||
| 761 | unsigned long val = 0; | ||
| 762 | int err = 0; | ||
| 763 | |||
| 764 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 765 | return -EINVAL; | ||
| 766 | |||
| 767 | if (val) { | ||
| 768 | if (g->forced_idle) | ||
| 769 | return count; /* do nothing */ | ||
| 770 | else { | ||
| 771 | err = __gk20a_do_idle(g, false); | ||
| 772 | if (!err) { | ||
| 773 | g->forced_idle = 1; | ||
| 774 | nvgpu_info(g, "gpu is idle : %d", | ||
| 775 | g->forced_idle); | ||
| 776 | } | ||
| 777 | } | ||
| 778 | } else { | ||
| 779 | if (!g->forced_idle) | ||
| 780 | return count; /* do nothing */ | ||
| 781 | else { | ||
| 782 | err = __gk20a_do_unidle(g); | ||
| 783 | if (!err) { | ||
| 784 | g->forced_idle = 0; | ||
| 785 | nvgpu_info(g, "gpu is idle : %d", | ||
| 786 | g->forced_idle); | ||
| 787 | } | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | return count; | ||
| 792 | } | ||
| 793 | |||
| 794 | static ssize_t force_idle_read(struct device *dev, | ||
| 795 | struct device_attribute *attr, char *buf) | ||
| 796 | { | ||
| 797 | struct gk20a *g = get_gk20a(dev); | ||
| 798 | |||
| 799 | return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0); | ||
| 800 | } | ||
| 801 | |||
| 802 | static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); | ||
| 803 | #endif | ||
| 804 | |||
| 805 | static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask) | ||
| 806 | { | ||
| 807 | u32 i; | ||
| 808 | bool valid = false; | ||
| 809 | |||
| 810 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) { | ||
| 811 | if (tpc_mask == g->valid_tpc_mask[i]) { | ||
| 812 | valid = true; | ||
| 813 | break; | ||
| 814 | } | ||
| 815 | } | ||
| 816 | return valid; | ||
| 817 | } | ||
| 818 | |||
| 819 | static ssize_t tpc_pg_mask_read(struct device *dev, | ||
| 820 | struct device_attribute *attr, char *buf) | ||
| 821 | { | ||
| 822 | struct gk20a *g = get_gk20a(dev); | ||
| 823 | |||
| 824 | return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask); | ||
| 825 | } | ||
| 826 | |||
| 827 | static ssize_t tpc_pg_mask_store(struct device *dev, | ||
| 828 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 829 | { | ||
| 830 | struct gk20a *g = get_gk20a(dev); | ||
| 831 | struct gr_gk20a *gr = &g->gr; | ||
| 832 | unsigned long val = 0; | ||
| 833 | |||
| 834 | nvgpu_mutex_acquire(&g->tpc_pg_lock); | ||
| 835 | |||
| 836 | if (kstrtoul(buf, 10, &val) < 0) { | ||
| 837 | nvgpu_err(g, "invalid value"); | ||
| 838 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
| 839 | return -EINVAL; | ||
| 840 | } | ||
| 841 | |||
| 842 | if (val == g->tpc_pg_mask) { | ||
| 843 | nvgpu_info(g, "no value change, same mask already set"); | ||
| 844 | goto exit; | ||
| 845 | } | ||
| 846 | |||
| 847 | if (gr->ctx_vars.golden_image_size) { | ||
| 848 | nvgpu_err(g, "golden image size already initialized"); | ||
| 849 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
| 850 | return -ENODEV; | ||
| 851 | } | ||
| 852 | |||
| 853 | /* checking that the value from userspace is within | ||
| 854 | * the possible valid TPC configurations. | ||
| 855 | */ | ||
| 856 | if (is_tpc_mask_valid(g, (u32)val)) { | ||
| 857 | g->tpc_pg_mask = val; | ||
| 858 | } else { | ||
| 859 | nvgpu_err(g, "TPC-PG mask is invalid"); | ||
| 860 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
| 861 | return -EINVAL; | ||
| 862 | } | ||
| 863 | exit: | ||
| 864 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
| 865 | |||
| 866 | return count; | ||
| 867 | } | ||
| 868 | |||
| 869 | static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store); | ||
| 870 | |||
| 871 | static ssize_t tpc_fs_mask_store(struct device *dev, | ||
| 872 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 873 | { | ||
| 874 | struct gk20a *g = get_gk20a(dev); | ||
| 875 | unsigned long val = 0; | ||
| 876 | |||
| 877 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 878 | return -EINVAL; | ||
| 879 | |||
| 880 | if (!g->gr.gpc_tpc_mask) | ||
| 881 | return -ENODEV; | ||
| 882 | |||
| 883 | if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { | ||
| 884 | g->gr.gpc_tpc_mask[0] = val; | ||
| 885 | g->tpc_fs_mask_user = val; | ||
| 886 | |||
| 887 | g->ops.gr.set_gpc_tpc_mask(g, 0); | ||
| 888 | |||
| 889 | nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image); | ||
| 890 | g->gr.ctx_vars.local_golden_image = NULL; | ||
| 891 | g->gr.ctx_vars.golden_image_initialized = false; | ||
| 892 | g->gr.ctx_vars.golden_image_size = 0; | ||
| 893 | /* Cause next poweron to reinit just gr */ | ||
| 894 | g->gr.sw_ready = false; | ||
| 895 | } | ||
| 896 | |||
| 897 | return count; | ||
| 898 | } | ||
| 899 | |||
| 900 | static ssize_t tpc_fs_mask_read(struct device *dev, | ||
| 901 | struct device_attribute *attr, char *buf) | ||
| 902 | { | ||
| 903 | struct gk20a *g = get_gk20a(dev); | ||
| 904 | struct gr_gk20a *gr = &g->gr; | ||
| 905 | u32 gpc_index; | ||
| 906 | u32 tpc_fs_mask = 0; | ||
| 907 | int err = 0; | ||
| 908 | |||
| 909 | err = gk20a_busy(g); | ||
| 910 | if (err) | ||
| 911 | return err; | ||
| 912 | |||
| 913 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
| 914 | if (g->ops.gr.get_gpc_tpc_mask) | ||
| 915 | tpc_fs_mask |= | ||
| 916 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << | ||
| 917 | (gr->max_tpc_per_gpc_count * gpc_index); | ||
| 918 | } | ||
| 919 | |||
| 920 | gk20a_idle(g); | ||
| 921 | |||
| 922 | return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask); | ||
| 923 | } | ||
| 924 | |||
| 925 | static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store); | ||
| 926 | |||
| 927 | static ssize_t min_timeslice_us_read(struct device *dev, | ||
| 928 | struct device_attribute *attr, char *buf) | ||
| 929 | { | ||
| 930 | struct gk20a *g = get_gk20a(dev); | ||
| 931 | |||
| 932 | return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us); | ||
| 933 | } | ||
| 934 | |||
| 935 | static ssize_t min_timeslice_us_store(struct device *dev, | ||
| 936 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 937 | { | ||
| 938 | struct gk20a *g = get_gk20a(dev); | ||
| 939 | unsigned long val; | ||
| 940 | |||
| 941 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 942 | return -EINVAL; | ||
| 943 | |||
| 944 | if (val > g->max_timeslice_us) | ||
| 945 | return -EINVAL; | ||
| 946 | |||
| 947 | g->min_timeslice_us = val; | ||
| 948 | |||
| 949 | return count; | ||
| 950 | } | ||
| 951 | |||
| 952 | static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read, | ||
| 953 | min_timeslice_us_store); | ||
| 954 | |||
| 955 | static ssize_t max_timeslice_us_read(struct device *dev, | ||
| 956 | struct device_attribute *attr, char *buf) | ||
| 957 | { | ||
| 958 | struct gk20a *g = get_gk20a(dev); | ||
| 959 | |||
| 960 | return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us); | ||
| 961 | } | ||
| 962 | |||
| 963 | static ssize_t max_timeslice_us_store(struct device *dev, | ||
| 964 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 965 | { | ||
| 966 | struct gk20a *g = get_gk20a(dev); | ||
| 967 | unsigned long val; | ||
| 968 | |||
| 969 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 970 | return -EINVAL; | ||
| 971 | |||
| 972 | if (val < g->min_timeslice_us) | ||
| 973 | return -EINVAL; | ||
| 974 | |||
| 975 | g->max_timeslice_us = val; | ||
| 976 | |||
| 977 | return count; | ||
| 978 | } | ||
| 979 | |||
| 980 | static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read, | ||
| 981 | max_timeslice_us_store); | ||
| 982 | |||
| 983 | static ssize_t czf_bypass_store(struct device *dev, | ||
| 984 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 985 | { | ||
| 986 | struct gk20a *g = get_gk20a(dev); | ||
| 987 | unsigned long val; | ||
| 988 | |||
| 989 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 990 | return -EINVAL; | ||
| 991 | |||
| 992 | if (val >= 4) | ||
| 993 | return -EINVAL; | ||
| 994 | |||
| 995 | g->gr.czf_bypass = val; | ||
| 996 | |||
| 997 | return count; | ||
| 998 | } | ||
| 999 | |||
| 1000 | static ssize_t czf_bypass_read(struct device *dev, | ||
| 1001 | struct device_attribute *attr, char *buf) | ||
| 1002 | { | ||
| 1003 | struct gk20a *g = get_gk20a(dev); | ||
| 1004 | |||
| 1005 | return sprintf(buf, "%d\n", g->gr.czf_bypass); | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); | ||
| 1009 | |||
| 1010 | static ssize_t pd_max_batches_store(struct device *dev, | ||
| 1011 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 1012 | { | ||
| 1013 | struct gk20a *g = get_gk20a(dev); | ||
| 1014 | unsigned long val; | ||
| 1015 | |||
| 1016 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 1017 | return -EINVAL; | ||
| 1018 | |||
| 1019 | if (val > 64) | ||
| 1020 | return -EINVAL; | ||
| 1021 | |||
| 1022 | g->gr.pd_max_batches = val; | ||
| 1023 | |||
| 1024 | return count; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | static ssize_t pd_max_batches_read(struct device *dev, | ||
| 1028 | struct device_attribute *attr, char *buf) | ||
| 1029 | { | ||
| 1030 | struct gk20a *g = get_gk20a(dev); | ||
| 1031 | |||
| 1032 | return sprintf(buf, "%d\n", g->gr.pd_max_batches); | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store); | ||
| 1036 | |||
| 1037 | static ssize_t gfxp_wfi_timeout_count_store(struct device *dev, | ||
| 1038 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 1039 | { | ||
| 1040 | struct gk20a *g = get_gk20a(dev); | ||
| 1041 | struct gr_gk20a *gr = &g->gr; | ||
| 1042 | unsigned long val = 0; | ||
| 1043 | int err = -1; | ||
| 1044 | |||
| 1045 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 1046 | return -EINVAL; | ||
| 1047 | |||
| 1048 | if (g->ops.gr.get_max_gfxp_wfi_timeout_count) { | ||
| 1049 | if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g)) | ||
| 1050 | return -EINVAL; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | gr->gfxp_wfi_timeout_count = val; | ||
| 1054 | |||
| 1055 | if (g->ops.gr.init_preemption_state && g->power_on) { | ||
| 1056 | err = gk20a_busy(g); | ||
| 1057 | if (err) | ||
| 1058 | return err; | ||
| 1059 | |||
| 1060 | err = gr_gk20a_elpg_protected_call(g, | ||
| 1061 | g->ops.gr.init_preemption_state(g)); | ||
| 1062 | |||
| 1063 | gk20a_idle(g); | ||
| 1064 | |||
| 1065 | if (err) | ||
| 1066 | return err; | ||
| 1067 | } | ||
| 1068 | return count; | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev, | ||
| 1072 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 1073 | { | ||
| 1074 | struct gk20a *g = get_gk20a(dev); | ||
| 1075 | struct gr_gk20a *gr = &g->gr; | ||
| 1076 | int err = -1; | ||
| 1077 | |||
| 1078 | if (count > 0 && buf[0] == 's') | ||
| 1079 | /* sysclk */ | ||
| 1080 | gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK; | ||
| 1081 | else | ||
| 1082 | /* usec */ | ||
| 1083 | gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC; | ||
| 1084 | |||
| 1085 | if (g->ops.gr.init_preemption_state && g->power_on) { | ||
| 1086 | err = gk20a_busy(g); | ||
| 1087 | if (err) | ||
| 1088 | return err; | ||
| 1089 | |||
| 1090 | err = gr_gk20a_elpg_protected_call(g, | ||
| 1091 | g->ops.gr.init_preemption_state(g)); | ||
| 1092 | |||
| 1093 | gk20a_idle(g); | ||
| 1094 | |||
| 1095 | if (err) | ||
| 1096 | return err; | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | return count; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | static ssize_t gfxp_wfi_timeout_count_read(struct device *dev, | ||
| 1103 | struct device_attribute *attr, char *buf) | ||
| 1104 | { | ||
| 1105 | struct gk20a *g = get_gk20a(dev); | ||
| 1106 | struct gr_gk20a *gr = &g->gr; | ||
| 1107 | u32 val = gr->gfxp_wfi_timeout_count; | ||
| 1108 | |||
| 1109 | return snprintf(buf, PAGE_SIZE, "%d\n", val); | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev, | ||
| 1113 | struct device_attribute *attr, char *buf) | ||
| 1114 | { | ||
| 1115 | struct gk20a *g = get_gk20a(dev); | ||
| 1116 | struct gr_gk20a *gr = &g->gr; | ||
| 1117 | |||
| 1118 | if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC) | ||
| 1119 | return snprintf(buf, PAGE_SIZE, "usec\n"); | ||
| 1120 | else | ||
| 1121 | return snprintf(buf, PAGE_SIZE, "sysclk\n"); | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH), | ||
| 1125 | gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store); | ||
| 1126 | |||
| 1127 | static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH), | ||
| 1128 | gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store); | ||
| 1129 | |||
| 1130 | static ssize_t comptag_mem_deduct_store(struct device *dev, | ||
| 1131 | struct device_attribute *attr, | ||
| 1132 | const char *buf, size_t count) | ||
| 1133 | { | ||
| 1134 | struct gk20a *g = get_gk20a(dev); | ||
| 1135 | unsigned long val; | ||
| 1136 | |||
| 1137 | if (kstrtoul(buf, 10, &val) < 0) | ||
| 1138 | return -EINVAL; | ||
| 1139 | |||
| 1140 | if (val >= totalram_size_in_mb) { | ||
| 1141 | dev_err(dev, "comptag_mem_deduct can not be set above %lu", | ||
| 1142 | totalram_size_in_mb); | ||
| 1143 | return -EINVAL; | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | g->gr.comptag_mem_deduct = val; | ||
| 1147 | /* Deduct the part taken by the running system */ | ||
| 1148 | g->gr.max_comptag_mem -= val; | ||
| 1149 | |||
| 1150 | return count; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | static ssize_t comptag_mem_deduct_show(struct device *dev, | ||
| 1154 | struct device_attribute *attr, char *buf) | ||
| 1155 | { | ||
| 1156 | struct gk20a *g = get_gk20a(dev); | ||
| 1157 | |||
| 1158 | return sprintf(buf, "%d\n", g->gr.comptag_mem_deduct); | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | static DEVICE_ATTR(comptag_mem_deduct, ROOTRW, | ||
| 1162 | comptag_mem_deduct_show, comptag_mem_deduct_store); | ||
| 1163 | |||
| 1164 | void nvgpu_remove_sysfs(struct device *dev) | ||
| 1165 | { | ||
| 1166 | device_remove_file(dev, &dev_attr_elcg_enable); | ||
| 1167 | device_remove_file(dev, &dev_attr_blcg_enable); | ||
| 1168 | device_remove_file(dev, &dev_attr_slcg_enable); | ||
| 1169 | device_remove_file(dev, &dev_attr_ptimer_scale_factor); | ||
| 1170 | device_remove_file(dev, &dev_attr_ptimer_ref_freq); | ||
| 1171 | device_remove_file(dev, &dev_attr_ptimer_src_freq); | ||
| 1172 | device_remove_file(dev, &dev_attr_elpg_enable); | ||
| 1173 | device_remove_file(dev, &dev_attr_mscg_enable); | ||
| 1174 | device_remove_file(dev, &dev_attr_emc3d_ratio); | ||
| 1175 | device_remove_file(dev, &dev_attr_ldiv_slowdown_factor); | ||
| 1176 | |||
| 1177 | device_remove_file(dev, &dev_attr_fmax_at_vmin_safe); | ||
| 1178 | |||
| 1179 | device_remove_file(dev, &dev_attr_counters); | ||
| 1180 | device_remove_file(dev, &dev_attr_counters_reset); | ||
| 1181 | device_remove_file(dev, &dev_attr_load); | ||
| 1182 | device_remove_file(dev, &dev_attr_railgate_delay); | ||
| 1183 | device_remove_file(dev, &dev_attr_is_railgated); | ||
| 1184 | #ifdef CONFIG_PM | ||
| 1185 | device_remove_file(dev, &dev_attr_force_idle); | ||
| 1186 | device_remove_file(dev, &dev_attr_railgate_enable); | ||
| 1187 | #endif | ||
| 1188 | device_remove_file(dev, &dev_attr_aelpg_param); | ||
| 1189 | device_remove_file(dev, &dev_attr_aelpg_enable); | ||
| 1190 | device_remove_file(dev, &dev_attr_allow_all); | ||
| 1191 | device_remove_file(dev, &dev_attr_tpc_fs_mask); | ||
| 1192 | device_remove_file(dev, &dev_attr_tpc_pg_mask); | ||
| 1193 | device_remove_file(dev, &dev_attr_min_timeslice_us); | ||
| 1194 | device_remove_file(dev, &dev_attr_max_timeslice_us); | ||
| 1195 | |||
| 1196 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 1197 | nvgpu_nvhost_remove_symlink(get_gk20a(dev)); | ||
| 1198 | #endif | ||
| 1199 | |||
| 1200 | device_remove_file(dev, &dev_attr_czf_bypass); | ||
| 1201 | device_remove_file(dev, &dev_attr_pd_max_batches); | ||
| 1202 | device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count); | ||
| 1203 | device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit); | ||
| 1204 | device_remove_file(dev, &dev_attr_gpu_powered_on); | ||
| 1205 | |||
| 1206 | device_remove_file(dev, &dev_attr_comptag_mem_deduct); | ||
| 1207 | |||
| 1208 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
| 1209 | struct kobject *kobj = &dev->kobj; | ||
| 1210 | struct device *parent = container_of((kobj->parent), | ||
| 1211 | struct device, kobj); | ||
| 1212 | sysfs_remove_link(&parent->kobj, "gpu.0"); | ||
| 1213 | } | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | int nvgpu_create_sysfs(struct device *dev) | ||
| 1217 | { | ||
| 1218 | struct gk20a *g = get_gk20a(dev); | ||
| 1219 | int error = 0; | ||
| 1220 | |||
| 1221 | error |= device_create_file(dev, &dev_attr_elcg_enable); | ||
| 1222 | error |= device_create_file(dev, &dev_attr_blcg_enable); | ||
| 1223 | error |= device_create_file(dev, &dev_attr_slcg_enable); | ||
| 1224 | error |= device_create_file(dev, &dev_attr_ptimer_scale_factor); | ||
| 1225 | error |= device_create_file(dev, &dev_attr_ptimer_ref_freq); | ||
| 1226 | error |= device_create_file(dev, &dev_attr_ptimer_src_freq); | ||
| 1227 | error |= device_create_file(dev, &dev_attr_elpg_enable); | ||
| 1228 | error |= device_create_file(dev, &dev_attr_mscg_enable); | ||
| 1229 | error |= device_create_file(dev, &dev_attr_emc3d_ratio); | ||
| 1230 | error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor); | ||
| 1231 | |||
| 1232 | error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe); | ||
| 1233 | |||
| 1234 | error |= device_create_file(dev, &dev_attr_counters); | ||
| 1235 | error |= device_create_file(dev, &dev_attr_counters_reset); | ||
| 1236 | error |= device_create_file(dev, &dev_attr_load); | ||
| 1237 | error |= device_create_file(dev, &dev_attr_railgate_delay); | ||
| 1238 | error |= device_create_file(dev, &dev_attr_is_railgated); | ||
| 1239 | #ifdef CONFIG_PM | ||
| 1240 | error |= device_create_file(dev, &dev_attr_force_idle); | ||
| 1241 | error |= device_create_file(dev, &dev_attr_railgate_enable); | ||
| 1242 | #endif | ||
| 1243 | error |= device_create_file(dev, &dev_attr_aelpg_param); | ||
| 1244 | error |= device_create_file(dev, &dev_attr_aelpg_enable); | ||
| 1245 | error |= device_create_file(dev, &dev_attr_allow_all); | ||
| 1246 | error |= device_create_file(dev, &dev_attr_tpc_fs_mask); | ||
| 1247 | error |= device_create_file(dev, &dev_attr_tpc_pg_mask); | ||
| 1248 | error |= device_create_file(dev, &dev_attr_min_timeslice_us); | ||
| 1249 | error |= device_create_file(dev, &dev_attr_max_timeslice_us); | ||
| 1250 | |||
| 1251 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 1252 | error |= nvgpu_nvhost_create_symlink(g); | ||
| 1253 | #endif | ||
| 1254 | |||
| 1255 | error |= device_create_file(dev, &dev_attr_czf_bypass); | ||
| 1256 | error |= device_create_file(dev, &dev_attr_pd_max_batches); | ||
| 1257 | error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count); | ||
| 1258 | error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit); | ||
| 1259 | error |= device_create_file(dev, &dev_attr_gpu_powered_on); | ||
| 1260 | |||
| 1261 | error |= device_create_file(dev, &dev_attr_comptag_mem_deduct); | ||
| 1262 | |||
| 1263 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
| 1264 | struct kobject *kobj = &dev->kobj; | ||
| 1265 | struct device *parent = container_of((kobj->parent), | ||
| 1266 | struct device, kobj); | ||
| 1267 | error |= sysfs_create_link(&parent->kobj, | ||
| 1268 | &dev->kobj, "gpu.0"); | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | if (error) | ||
| 1272 | nvgpu_err(g, "Failed to create sysfs attributes!\n"); | ||
| 1273 | |||
| 1274 | return error; | ||
| 1275 | } | ||
diff --git a/include/os/linux/sysfs.h b/include/os/linux/sysfs.h new file mode 100644 index 0000000..8092584 --- /dev/null +++ b/include/os/linux/sysfs.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | #ifndef NVGPU_SYSFS_H | ||
| 17 | #define NVGPU_SYSFS_H | ||
| 18 | |||
| 19 | struct device; | ||
| 20 | |||
| 21 | int nvgpu_create_sysfs(struct device *dev); | ||
| 22 | void nvgpu_remove_sysfs(struct device *dev); | ||
| 23 | |||
| 24 | #endif | ||
diff --git a/include/os/linux/thread.c b/include/os/linux/thread.c new file mode 100644 index 0000000..c56bff6 --- /dev/null +++ b/include/os/linux/thread.c | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/kthread.h> | ||
| 18 | |||
| 19 | #include <nvgpu/thread.h> | ||
| 20 | #include <nvgpu/timers.h> | ||
| 21 | |||
| 22 | int nvgpu_thread_proxy(void *threaddata) | ||
| 23 | { | ||
| 24 | struct nvgpu_thread *thread = threaddata; | ||
| 25 | int ret = thread->fn(thread->data); | ||
| 26 | |||
| 27 | thread->running = false; | ||
| 28 | return ret; | ||
| 29 | } | ||
| 30 | |||
| 31 | int nvgpu_thread_create(struct nvgpu_thread *thread, | ||
| 32 | void *data, | ||
| 33 | int (*threadfn)(void *data), const char *name) | ||
| 34 | { | ||
| 35 | struct task_struct *task = kthread_create(nvgpu_thread_proxy, | ||
| 36 | thread, name); | ||
| 37 | if (IS_ERR(task)) | ||
| 38 | return PTR_ERR(task); | ||
| 39 | |||
| 40 | thread->task = task; | ||
| 41 | thread->fn = threadfn; | ||
| 42 | thread->data = data; | ||
| 43 | thread->running = true; | ||
| 44 | wake_up_process(task); | ||
| 45 | return 0; | ||
| 46 | }; | ||
| 47 | |||
| 48 | void nvgpu_thread_stop(struct nvgpu_thread *thread) | ||
| 49 | { | ||
| 50 | if (thread->task) { | ||
| 51 | kthread_stop(thread->task); | ||
| 52 | thread->task = NULL; | ||
| 53 | } | ||
| 54 | }; | ||
| 55 | |||
| 56 | bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) | ||
| 57 | { | ||
| 58 | return kthread_should_stop(); | ||
| 59 | }; | ||
| 60 | |||
| 61 | bool nvgpu_thread_is_running(struct nvgpu_thread *thread) | ||
| 62 | { | ||
| 63 | return ACCESS_ONCE(thread->running); | ||
| 64 | }; | ||
| 65 | |||
| 66 | void nvgpu_thread_join(struct nvgpu_thread *thread) | ||
| 67 | { | ||
| 68 | while (ACCESS_ONCE(thread->running)) | ||
| 69 | nvgpu_msleep(10); | ||
| 70 | }; | ||
diff --git a/include/os/linux/timers.c b/include/os/linux/timers.c new file mode 100644 index 0000000..018fd2d --- /dev/null +++ b/include/os/linux/timers.c | |||
| @@ -0,0 +1,269 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/ktime.h> | ||
| 18 | #include <linux/delay.h> | ||
| 19 | |||
| 20 | #include <nvgpu/timers.h> | ||
| 21 | #include <nvgpu/soc.h> | ||
| 22 | #include <nvgpu/gk20a.h> | ||
| 23 | |||
| 24 | #include "platform_gk20a.h" | ||
| 25 | |||
| 26 | /* | ||
| 27 | * Returns 1 if the platform is pre-Si and should ignore the timeout checking. | ||
| 28 | * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the | ||
| 29 | * timeout check regardless of platform). | ||
| 30 | */ | ||
| 31 | static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout) | ||
| 32 | { | ||
| 33 | if (timeout->flags & NVGPU_TIMER_NO_PRE_SI) | ||
| 34 | return 0; | ||
| 35 | |||
| 36 | return !nvgpu_platform_is_silicon(timeout->g); | ||
| 37 | } | ||
| 38 | |||
| 39 | /** | ||
| 40 | * nvgpu_timeout_init - Init timer. | ||
| 41 | * | ||
| 42 | * @g - nvgpu device. | ||
| 43 | * @timeout - The timer. | ||
| 44 | * @duration - Timeout in milliseconds or number of retries. | ||
| 45 | * @flags - Flags for timer. | ||
| 46 | * | ||
| 47 | * This configures the timeout to start the timeout duration now, i.e: when this | ||
| 48 | * function is called. Available flags to pass to @flags: | ||
| 49 | * | ||
| 50 | * %NVGPU_TIMER_CPU_TIMER | ||
| 51 | * %NVGPU_TIMER_RETRY_TIMER | ||
| 52 | * %NVGPU_TIMER_NO_PRE_SI | ||
| 53 | * %NVGPU_TIMER_SILENT_TIMEOUT | ||
| 54 | * | ||
| 55 | * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then | ||
| 56 | * a CPU timer is used by default. | ||
| 57 | */ | ||
| 58 | int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout, | ||
| 59 | u32 duration, unsigned long flags) | ||
| 60 | { | ||
| 61 | if (flags & ~NVGPU_TIMER_FLAG_MASK) | ||
| 62 | return -EINVAL; | ||
| 63 | |||
| 64 | memset(timeout, 0, sizeof(*timeout)); | ||
| 65 | |||
| 66 | timeout->g = g; | ||
| 67 | timeout->flags = flags; | ||
| 68 | |||
| 69 | if (flags & NVGPU_TIMER_RETRY_TIMER) | ||
| 70 | timeout->retries.max = duration; | ||
| 71 | else | ||
| 72 | timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(), | ||
| 73 | (s64)NSEC_PER_MSEC * duration)); | ||
| 74 | |||
| 75 | return 0; | ||
| 76 | } | ||
| 77 | |||
| 78 | static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout, | ||
| 79 | void *caller, | ||
| 80 | const char *fmt, va_list args) | ||
| 81 | { | ||
| 82 | struct gk20a *g = timeout->g; | ||
| 83 | ktime_t now = ktime_get(); | ||
| 84 | |||
| 85 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
| 86 | return 0; | ||
| 87 | |||
| 88 | if (ktime_after(now, ns_to_ktime(timeout->time))) { | ||
| 89 | if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { | ||
| 90 | char buf[128]; | ||
| 91 | |||
| 92 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
| 93 | |||
| 94 | nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf); | ||
| 95 | } | ||
| 96 | |||
| 97 | return -ETIMEDOUT; | ||
| 98 | } | ||
| 99 | |||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout, | ||
| 104 | void *caller, | ||
| 105 | const char *fmt, va_list args) | ||
| 106 | { | ||
| 107 | struct gk20a *g = timeout->g; | ||
| 108 | |||
| 109 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
| 110 | return 0; | ||
| 111 | |||
| 112 | if (timeout->retries.attempted >= timeout->retries.max) { | ||
| 113 | if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { | ||
| 114 | char buf[128]; | ||
| 115 | |||
| 116 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
| 117 | |||
| 118 | nvgpu_err(g, "No more retries @ %pF %s", caller, buf); | ||
| 119 | } | ||
| 120 | |||
| 121 | return -ETIMEDOUT; | ||
| 122 | } | ||
| 123 | |||
| 124 | timeout->retries.attempted++; | ||
| 125 | |||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | /** | ||
| 130 | * __nvgpu_timeout_expired_msg - Check if a timeout has expired. | ||
| 131 | * | ||
| 132 | * @timeout - The timeout to check. | ||
| 133 | * @caller - Address of the caller of this function. | ||
| 134 | * @fmt - The fmt string. | ||
| 135 | * | ||
| 136 | * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise. | ||
| 137 | * | ||
| 138 | * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout | ||
| 139 | * then a message is printed based on %fmt. | ||
| 140 | */ | ||
| 141 | int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout, | ||
| 142 | void *caller, const char *fmt, ...) | ||
| 143 | { | ||
| 144 | int ret; | ||
| 145 | va_list args; | ||
| 146 | |||
| 147 | va_start(args, fmt); | ||
| 148 | if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) | ||
| 149 | ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt, | ||
| 150 | args); | ||
| 151 | else | ||
| 152 | ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt, | ||
| 153 | args); | ||
| 154 | va_end(args); | ||
| 155 | |||
| 156 | return ret; | ||
| 157 | } | ||
| 158 | |||
| 159 | /** | ||
| 160 | * nvgpu_timeout_peek_expired - Check the status of a timeout. | ||
| 161 | * | ||
| 162 | * @timeout - The timeout to check. | ||
| 163 | * | ||
| 164 | * Returns non-zero if the timeout is expired, zero otherwise. In the case of | ||
| 165 | * retry timers this will not increment the underlying retry count. Also if the | ||
| 166 | * timer has expired no messages will be printed. | ||
| 167 | * | ||
| 168 | * This function honors the pre-Si check as well. | ||
| 169 | */ | ||
| 170 | int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout) | ||
| 171 | { | ||
| 172 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
| 173 | return 0; | ||
| 174 | |||
| 175 | if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) | ||
| 176 | return timeout->retries.attempted >= timeout->retries.max; | ||
| 177 | else | ||
| 178 | return ktime_after(ktime_get(), ns_to_ktime(timeout->time)); | ||
| 179 | } | ||
| 180 | |||
| 181 | /** | ||
| 182 | * nvgpu_udelay - Delay for some number of microseconds. | ||
| 183 | * | ||
| 184 | * @usecs - Microseconds to wait for. | ||
| 185 | * | ||
| 186 | * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly | ||
| 187 | * accurate. This is normally backed by a busy-loop so this means waits should | ||
| 188 | * be kept short, below 100us. If longer delays are necessary then | ||
| 189 | * nvgpu_msleep() should be preferred. | ||
| 190 | * | ||
| 191 | * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This | ||
| 192 | * function will attempt to not use a busy-loop. | ||
| 193 | */ | ||
| 194 | void nvgpu_udelay(unsigned int usecs) | ||
| 195 | { | ||
| 196 | udelay(usecs); | ||
| 197 | } | ||
| 198 | |||
| 199 | /** | ||
| 200 | * nvgpu_usleep_range - Sleep for a range of microseconds. | ||
| 201 | * | ||
| 202 | * @min_us - Minimum wait time. | ||
| 203 | * @max_us - Maximum wait time. | ||
| 204 | * | ||
| 205 | * Wait for some number of microseconds between @min_us and @max_us. This, | ||
| 206 | * unlike nvgpu_udelay(), will attempt to sleep for the passed number of | ||
| 207 | * microseconds instead of busy looping. Not all platforms support this, | ||
| 208 | * and in that case this reduces to nvgpu_udelay(min_us). | ||
| 209 | * | ||
| 210 | * Linux note: this is not safe to use in atomic context. If you are in | ||
| 211 | * atomic context you must use nvgpu_udelay(). | ||
| 212 | */ | ||
| 213 | void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us) | ||
| 214 | { | ||
| 215 | usleep_range(min_us, max_us); | ||
| 216 | } | ||
| 217 | |||
| 218 | /** | ||
| 219 | * nvgpu_msleep - Sleep for some milliseconds. | ||
| 220 | * | ||
| 221 | * @msecs - Sleep for at least this many milliseconds. | ||
| 222 | * | ||
| 223 | * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms | ||
| 224 | * or so) the sleep will be significantly longer due to scheduling overhead and | ||
| 225 | * mechanics. | ||
| 226 | */ | ||
| 227 | void nvgpu_msleep(unsigned int msecs) | ||
| 228 | { | ||
| 229 | msleep(msecs); | ||
| 230 | } | ||
| 231 | |||
| 232 | /** | ||
| 233 | * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock. | ||
| 234 | * | ||
| 235 | * Return a clock in millisecond units. The start time of the clock is | ||
| 236 | * unspecified; the time returned can be compared with older ones to measure | ||
| 237 | * durations. The source clock does not jump when the system clock is adjusted. | ||
| 238 | */ | ||
| 239 | s64 nvgpu_current_time_ms(void) | ||
| 240 | { | ||
| 241 | return ktime_to_ms(ktime_get()); | ||
| 242 | } | ||
| 243 | |||
| 244 | /** | ||
| 245 | * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock. | ||
| 246 | * | ||
| 247 | * Return a clock in nanosecond units. The start time of the clock is | ||
| 248 | * unspecified; the time returned can be compared with older ones to measure | ||
| 249 | * durations. The source clock does not jump when the system clock is adjusted. | ||
| 250 | */ | ||
| 251 | s64 nvgpu_current_time_ns(void) | ||
| 252 | { | ||
| 253 | return ktime_to_ns(ktime_get()); | ||
| 254 | } | ||
| 255 | |||
| 256 | /** | ||
| 257 | * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp. | ||
| 258 | * | ||
| 259 | * Return a "high resolution" time stamp. It does not really matter exactly what | ||
| 260 | * it is, so long as it generally returns unique values and monotonically | ||
| 261 | * increases - wrap around _is_ possible though in a system running for long | ||
| 262 | * enough. | ||
| 263 | * | ||
| 264 | * Note: what high resolution means is system dependent. | ||
| 265 | */ | ||
| 266 | u64 nvgpu_hr_timestamp(void) | ||
| 267 | { | ||
| 268 | return get_cycles(); | ||
| 269 | } | ||
diff --git a/include/os/linux/vgpu/fecs_trace_vgpu.c b/include/os/linux/vgpu/fecs_trace_vgpu.c new file mode 100644 index 0000000..02a381e --- /dev/null +++ b/include/os/linux/vgpu/fecs_trace_vgpu.c | |||
| @@ -0,0 +1,225 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <uapi/linux/nvgpu.h> | ||
| 18 | |||
| 19 | #include <nvgpu/kmem.h> | ||
| 20 | #include <nvgpu/bug.h> | ||
| 21 | #include <nvgpu/enabled.h> | ||
| 22 | #include <nvgpu/ctxsw_trace.h> | ||
| 23 | #include <nvgpu/vgpu/vgpu_ivm.h> | ||
| 24 | #include <nvgpu/vgpu/tegra_vgpu.h> | ||
| 25 | #include <nvgpu/vgpu/vgpu.h> | ||
| 26 | #include <nvgpu/gk20a.h> | ||
| 27 | |||
| 28 | #include "os/linux/os_linux.h" | ||
| 29 | #include "gk20a/fecs_trace_gk20a.h" | ||
| 30 | #include "vgpu/fecs_trace_vgpu.h" | ||
| 31 | |||
| 32 | struct vgpu_fecs_trace { | ||
| 33 | struct tegra_hv_ivm_cookie *cookie; | ||
| 34 | struct nvgpu_ctxsw_ring_header *header; | ||
| 35 | struct nvgpu_gpu_ctxsw_trace_entry *entries; | ||
| 36 | int num_entries; | ||
| 37 | bool enabled; | ||
| 38 | void *buf; | ||
| 39 | }; | ||
| 40 | |||
| 41 | int vgpu_fecs_trace_init(struct gk20a *g) | ||
| 42 | { | ||
| 43 | struct device *dev = dev_from_gk20a(g); | ||
| 44 | struct device_node *np = dev->of_node; | ||
| 45 | struct of_phandle_args args; | ||
| 46 | struct vgpu_fecs_trace *vcst; | ||
| 47 | u32 mempool; | ||
| 48 | int err; | ||
| 49 | |||
| 50 | nvgpu_log_fn(g, " "); | ||
| 51 | |||
| 52 | vcst = nvgpu_kzalloc(g, sizeof(*vcst)); | ||
| 53 | if (!vcst) | ||
| 54 | return -ENOMEM; | ||
| 55 | |||
| 56 | err = of_parse_phandle_with_fixed_args(np, | ||
| 57 | "mempool-fecs-trace", 1, 0, &args); | ||
| 58 | if (err) { | ||
| 59 | nvgpu_info(g, "does not support fecs trace"); | ||
| 60 | goto fail; | ||
| 61 | } | ||
| 62 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
| 63 | |||
| 64 | mempool = args.args[0]; | ||
| 65 | vcst->cookie = vgpu_ivm_mempool_reserve(mempool); | ||
| 66 | if (IS_ERR(vcst->cookie)) { | ||
| 67 | nvgpu_info(g, | ||
| 68 | "mempool %u reserve failed", mempool); | ||
| 69 | vcst->cookie = NULL; | ||
| 70 | err = -EINVAL; | ||
| 71 | goto fail; | ||
| 72 | } | ||
| 73 | |||
| 74 | vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie), | ||
| 75 | vgpu_ivm_get_size(vcst->cookie)); | ||
| 76 | if (!vcst->buf) { | ||
| 77 | nvgpu_info(g, "ioremap_cache failed"); | ||
| 78 | err = -EINVAL; | ||
| 79 | goto fail; | ||
| 80 | } | ||
| 81 | vcst->header = vcst->buf; | ||
| 82 | vcst->num_entries = vcst->header->num_ents; | ||
| 83 | if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { | ||
| 84 | nvgpu_err(g, "entry size mismatch"); | ||
| 85 | goto fail; | ||
| 86 | } | ||
| 87 | vcst->entries = vcst->buf + sizeof(*vcst->header); | ||
| 88 | g->fecs_trace = (struct gk20a_fecs_trace *)vcst; | ||
| 89 | |||
| 90 | return 0; | ||
| 91 | fail: | ||
| 92 | iounmap(vcst->buf); | ||
| 93 | if (vcst->cookie) | ||
| 94 | vgpu_ivm_mempool_unreserve(vcst->cookie); | ||
| 95 | nvgpu_kfree(g, vcst); | ||
| 96 | return err; | ||
| 97 | } | ||
| 98 | |||
| 99 | int vgpu_fecs_trace_deinit(struct gk20a *g) | ||
| 100 | { | ||
| 101 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 102 | |||
| 103 | iounmap(vcst->buf); | ||
| 104 | vgpu_ivm_mempool_unreserve(vcst->cookie); | ||
| 105 | nvgpu_kfree(g, vcst); | ||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | int vgpu_fecs_trace_enable(struct gk20a *g) | ||
| 110 | { | ||
| 111 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 112 | struct tegra_vgpu_cmd_msg msg = { | ||
| 113 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, | ||
| 114 | .handle = vgpu_get_handle(g), | ||
| 115 | }; | ||
| 116 | int err; | ||
| 117 | |||
| 118 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 119 | err = err ? err : msg.ret; | ||
| 120 | WARN_ON(err); | ||
| 121 | vcst->enabled = !err; | ||
| 122 | return err; | ||
| 123 | } | ||
| 124 | |||
| 125 | int vgpu_fecs_trace_disable(struct gk20a *g) | ||
| 126 | { | ||
| 127 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 128 | struct tegra_vgpu_cmd_msg msg = { | ||
| 129 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, | ||
| 130 | .handle = vgpu_get_handle(g), | ||
| 131 | }; | ||
| 132 | int err; | ||
| 133 | |||
| 134 | vcst->enabled = false; | ||
| 135 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 136 | err = err ? err : msg.ret; | ||
| 137 | WARN_ON(err); | ||
| 138 | return err; | ||
| 139 | } | ||
| 140 | |||
| 141 | bool vgpu_fecs_trace_is_enabled(struct gk20a *g) | ||
| 142 | { | ||
| 143 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 144 | |||
| 145 | return (vcst && vcst->enabled); | ||
| 146 | } | ||
| 147 | |||
| 148 | int vgpu_fecs_trace_poll(struct gk20a *g) | ||
| 149 | { | ||
| 150 | struct tegra_vgpu_cmd_msg msg = { | ||
| 151 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, | ||
| 152 | .handle = vgpu_get_handle(g), | ||
| 153 | }; | ||
| 154 | int err; | ||
| 155 | |||
| 156 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 157 | err = err ? err : msg.ret; | ||
| 158 | WARN_ON(err); | ||
| 159 | return err; | ||
| 160 | } | ||
| 161 | |||
| 162 | int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) | ||
| 163 | { | ||
| 164 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 165 | |||
| 166 | *buf = vcst->buf; | ||
| 167 | *size = vgpu_ivm_get_size(vcst->cookie); | ||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 171 | int vgpu_free_user_buffer(struct gk20a *g) | ||
| 172 | { | ||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) | ||
| 177 | { | ||
| 178 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 179 | unsigned long size = vgpu_ivm_get_size(vcst->cookie); | ||
| 180 | unsigned long vsize = vma->vm_end - vma->vm_start; | ||
| 181 | |||
| 182 | size = min(size, vsize); | ||
| 183 | size = round_up(size, PAGE_SIZE); | ||
| 184 | |||
| 185 | return remap_pfn_range(vma, vma->vm_start, | ||
| 186 | vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT, | ||
| 187 | size, | ||
| 188 | vma->vm_page_prot); | ||
| 189 | } | ||
| 190 | |||
| 191 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 192 | int vgpu_fecs_trace_max_entries(struct gk20a *g, | ||
| 193 | struct nvgpu_gpu_ctxsw_trace_filter *filter) | ||
| 194 | { | ||
| 195 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
| 196 | |||
| 197 | return vcst->header->num_ents; | ||
| 198 | } | ||
| 199 | |||
| 200 | #if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE | ||
| 201 | #error "FECS trace filter size mismatch!" | ||
| 202 | #endif | ||
| 203 | |||
| 204 | int vgpu_fecs_trace_set_filter(struct gk20a *g, | ||
| 205 | struct nvgpu_gpu_ctxsw_trace_filter *filter) | ||
| 206 | { | ||
| 207 | struct tegra_vgpu_cmd_msg msg = { | ||
| 208 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, | ||
| 209 | .handle = vgpu_get_handle(g), | ||
| 210 | }; | ||
| 211 | struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; | ||
| 212 | int err; | ||
| 213 | |||
| 214 | memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); | ||
| 215 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 216 | err = err ? err : msg.ret; | ||
| 217 | WARN_ON(err); | ||
| 218 | return err; | ||
| 219 | } | ||
| 220 | |||
| 221 | void vgpu_fecs_trace_data_update(struct gk20a *g) | ||
| 222 | { | ||
| 223 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
| 224 | } | ||
| 225 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | ||
diff --git a/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c new file mode 100644 index 0000000..0304bcc --- /dev/null +++ b/include/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c | |||
| @@ -0,0 +1,103 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/platform_device.h> | ||
| 18 | |||
| 19 | #include <nvgpu/nvhost.h> | ||
| 20 | #include <nvgpu/gk20a.h> | ||
| 21 | |||
| 22 | #include "vgpu/clk_vgpu.h" | ||
| 23 | #include "os/linux/platform_gk20a.h" | ||
| 24 | #include "os/linux/os_linux.h" | ||
| 25 | #include "os/linux/vgpu/vgpu_linux.h" | ||
| 26 | #include "os/linux/vgpu/platform_vgpu_tegra.h" | ||
| 27 | |||
| 28 | static int gv11b_vgpu_probe(struct device *dev) | ||
| 29 | { | ||
| 30 | struct platform_device *pdev = to_platform_device(dev); | ||
| 31 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 32 | struct resource *r; | ||
| 33 | void __iomem *regs; | ||
| 34 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g); | ||
| 35 | struct gk20a *g = platform->g; | ||
| 36 | int ret; | ||
| 37 | |||
| 38 | r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode"); | ||
| 39 | if (!r) { | ||
| 40 | nvgpu_err(g, "failed to get usermode regs"); | ||
| 41 | return -ENXIO; | ||
| 42 | } | ||
| 43 | regs = devm_ioremap_resource(dev, r); | ||
| 44 | if (IS_ERR(regs)) { | ||
| 45 | nvgpu_err(g, "failed to map usermode regs"); | ||
| 46 | return PTR_ERR(regs); | ||
| 47 | } | ||
| 48 | l->usermode_regs = regs; | ||
| 49 | |||
| 50 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 51 | ret = nvgpu_get_nvhost_dev(g); | ||
| 52 | if (ret) { | ||
| 53 | l->usermode_regs = NULL; | ||
| 54 | return ret; | ||
| 55 | } | ||
| 56 | |||
| 57 | ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev, | ||
| 58 | &g->syncpt_unit_base, | ||
| 59 | &g->syncpt_unit_size); | ||
| 60 | if (ret) { | ||
| 61 | nvgpu_err(g, "Failed to get syncpt interface"); | ||
| 62 | return -ENOSYS; | ||
| 63 | } | ||
| 64 | g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); | ||
| 65 | nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", | ||
| 66 | g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size); | ||
| 67 | #endif | ||
| 68 | vgpu_init_clk_support(platform->g); | ||
| 69 | |||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | |||
| 73 | struct gk20a_platform gv11b_vgpu_tegra_platform = { | ||
| 74 | .has_syncpoints = true, | ||
| 75 | |||
| 76 | /* power management configuration */ | ||
| 77 | .can_railgate_init = false, | ||
| 78 | .can_elpg_init = false, | ||
| 79 | .enable_slcg = false, | ||
| 80 | .enable_blcg = false, | ||
| 81 | .enable_elcg = false, | ||
| 82 | .enable_elpg = false, | ||
| 83 | .enable_aelpg = false, | ||
| 84 | .can_slcg = false, | ||
| 85 | .can_blcg = false, | ||
| 86 | .can_elcg = false, | ||
| 87 | |||
| 88 | .ch_wdt_timeout_ms = 5000, | ||
| 89 | |||
| 90 | .probe = gv11b_vgpu_probe, | ||
| 91 | |||
| 92 | .clk_round_rate = vgpu_plat_clk_round_rate, | ||
| 93 | .get_clk_freqs = vgpu_plat_clk_get_freqs, | ||
| 94 | |||
| 95 | /* frequency scaling configuration */ | ||
| 96 | .devfreq_governor = "userspace", | ||
| 97 | |||
| 98 | .virtual_dev = true, | ||
| 99 | |||
| 100 | /* power management callbacks */ | ||
| 101 | .suspend = vgpu_tegra_suspend, | ||
| 102 | .resume = vgpu_tegra_resume, | ||
| 103 | }; | ||
diff --git a/include/os/linux/vgpu/platform_vgpu_tegra.c b/include/os/linux/vgpu/platform_vgpu_tegra.c new file mode 100644 index 0000000..948323e --- /dev/null +++ b/include/os/linux/vgpu/platform_vgpu_tegra.c | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | /* | ||
| 2 | * Tegra Virtualized GPU Platform Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <nvgpu/nvhost.h> | ||
| 20 | #include <nvgpu/gk20a.h> | ||
| 21 | |||
| 22 | #include "os/linux/platform_gk20a.h" | ||
| 23 | #include "vgpu/clk_vgpu.h" | ||
| 24 | #include "vgpu_linux.h" | ||
| 25 | |||
| 26 | static int gk20a_tegra_probe(struct device *dev) | ||
| 27 | { | ||
| 28 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
| 29 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
| 30 | int ret; | ||
| 31 | |||
| 32 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
| 33 | if (ret) | ||
| 34 | return ret; | ||
| 35 | |||
| 36 | vgpu_init_clk_support(platform->g); | ||
| 37 | return 0; | ||
| 38 | #else | ||
| 39 | return 0; | ||
| 40 | #endif | ||
| 41 | } | ||
| 42 | |||
| 43 | long vgpu_plat_clk_round_rate(struct device *dev, unsigned long rate) | ||
| 44 | { | ||
| 45 | /* server will handle frequency rounding */ | ||
| 46 | return rate; | ||
| 47 | } | ||
| 48 | |||
| 49 | int vgpu_plat_clk_get_freqs(struct device *dev, unsigned long **freqs, | ||
| 50 | int *num_freqs) | ||
| 51 | { | ||
| 52 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 53 | struct gk20a *g = platform->g; | ||
| 54 | |||
| 55 | return vgpu_clk_get_freqs(g, freqs, num_freqs); | ||
| 56 | } | ||
| 57 | |||
| 58 | int vgpu_plat_clk_cap_rate(struct device *dev, unsigned long rate) | ||
| 59 | { | ||
| 60 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 61 | struct gk20a *g = platform->g; | ||
| 62 | |||
| 63 | return vgpu_clk_cap_rate(g, rate); | ||
| 64 | } | ||
| 65 | |||
| 66 | struct gk20a_platform vgpu_tegra_platform = { | ||
| 67 | .has_syncpoints = true, | ||
| 68 | .aggressive_sync_destroy_thresh = 64, | ||
| 69 | |||
| 70 | /* power management configuration */ | ||
| 71 | .can_railgate_init = false, | ||
| 72 | .can_elpg_init = false, | ||
| 73 | .enable_slcg = false, | ||
| 74 | .enable_blcg = false, | ||
| 75 | .enable_elcg = false, | ||
| 76 | .enable_elpg = false, | ||
| 77 | .enable_aelpg = false, | ||
| 78 | .can_slcg = false, | ||
| 79 | .can_blcg = false, | ||
| 80 | .can_elcg = false, | ||
| 81 | |||
| 82 | .ch_wdt_timeout_ms = 5000, | ||
| 83 | |||
| 84 | .probe = gk20a_tegra_probe, | ||
| 85 | |||
| 86 | .clk_round_rate = vgpu_plat_clk_round_rate, | ||
| 87 | .get_clk_freqs = vgpu_plat_clk_get_freqs, | ||
| 88 | |||
| 89 | /* frequency scaling configuration */ | ||
| 90 | .devfreq_governor = "userspace", | ||
| 91 | |||
| 92 | .virtual_dev = true, | ||
| 93 | |||
| 94 | /* power management callbacks */ | ||
| 95 | .suspend = vgpu_tegra_suspend, | ||
| 96 | .resume = vgpu_tegra_resume, | ||
| 97 | }; | ||
diff --git a/include/os/linux/vgpu/platform_vgpu_tegra.h b/include/os/linux/vgpu/platform_vgpu_tegra.h new file mode 100644 index 0000000..fef346d --- /dev/null +++ b/include/os/linux/vgpu/platform_vgpu_tegra.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef _VGPU_PLATFORM_H_ | ||
| 18 | #define _VGPU_PLATFORM_H_ | ||
| 19 | |||
| 20 | long vgpu_plat_clk_round_rate(struct device *dev, unsigned long rate); | ||
| 21 | int vgpu_plat_clk_get_freqs(struct device *dev, unsigned long **freqs, | ||
| 22 | int *num_freqs); | ||
| 23 | int vgpu_plat_clk_cap_rate(struct device *dev, unsigned long rate); | ||
| 24 | #endif | ||
diff --git a/include/os/linux/vgpu/sysfs_vgpu.c b/include/os/linux/vgpu/sysfs_vgpu.c new file mode 100644 index 0000000..ade5d82 --- /dev/null +++ b/include/os/linux/vgpu/sysfs_vgpu.c | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/device.h> | ||
| 18 | #include <nvgpu/vgpu/vgpu.h> | ||
| 19 | |||
| 20 | #include "os/linux/platform_gk20a.h" | ||
| 21 | #include "os/linux/os_linux.h" | ||
| 22 | #include "vgpu/ecc_vgpu.h" | ||
| 23 | |||
| 24 | static ssize_t vgpu_load_show(struct device *dev, | ||
| 25 | struct device_attribute *attr, | ||
| 26 | char *buf) | ||
| 27 | { | ||
| 28 | struct gk20a *g = get_gk20a(dev); | ||
| 29 | struct tegra_vgpu_cmd_msg msg = {0}; | ||
| 30 | struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load; | ||
| 31 | int err; | ||
| 32 | |||
| 33 | msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD; | ||
| 34 | msg.handle = vgpu_get_handle(g); | ||
| 35 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 36 | if (err) | ||
| 37 | return err; | ||
| 38 | |||
| 39 | return snprintf(buf, PAGE_SIZE, "%u\n", p->load); | ||
| 40 | } | ||
| 41 | static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); | ||
| 42 | |||
| 43 | static ssize_t vgpu_ecc_stat_show(struct device *dev, | ||
| 44 | struct device_attribute *attr, | ||
| 45 | char *buf) | ||
| 46 | { | ||
| 47 | struct gk20a *g = get_gk20a(dev); | ||
| 48 | struct tegra_vgpu_cmd_msg msg = {0}; | ||
| 49 | struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter; | ||
| 50 | struct dev_ext_attribute *ext_attr = container_of(attr, | ||
| 51 | struct dev_ext_attribute, attr); | ||
| 52 | struct vgpu_ecc_stat *ecc_stat = ext_attr->var; | ||
| 53 | int err; | ||
| 54 | |||
| 55 | p->ecc_id = ecc_stat->ecc_id; | ||
| 56 | |||
| 57 | msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE; | ||
| 58 | msg.handle = vgpu_get_handle(g); | ||
| 59 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 60 | err = err ? err : msg.ret; | ||
| 61 | if (unlikely(err)) { | ||
| 62 | nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err); | ||
| 63 | return err; | ||
| 64 | } | ||
| 65 | |||
| 66 | return snprintf(buf, PAGE_SIZE, "%u\n", p->value); | ||
| 67 | } | ||
| 68 | |||
| 69 | static int vgpu_create_ecc_sysfs(struct device *dev) | ||
| 70 | { | ||
| 71 | struct gk20a *g = get_gk20a(dev); | ||
| 72 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 73 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
| 74 | struct vgpu_ecc_stat *stats; | ||
| 75 | struct dev_ext_attribute *attrs; | ||
| 76 | int err, i, count; | ||
| 77 | |||
| 78 | err = vgpu_ecc_get_info(g); | ||
| 79 | if (unlikely(err)) { | ||
| 80 | nvgpu_err(g, "ecc: cannot get ECC info: %d", err); | ||
| 81 | return err; | ||
| 82 | } | ||
| 83 | |||
| 84 | stats = priv->ecc_stats; | ||
| 85 | count = priv->ecc_stats_count; | ||
| 86 | |||
| 87 | attrs = nvgpu_kzalloc(g, count * sizeof(*attrs)); | ||
| 88 | if (unlikely(!attrs)) { | ||
| 89 | nvgpu_err(g, "ecc: no memory"); | ||
| 90 | vgpu_ecc_remove_info(g); | ||
| 91 | return -ENOMEM; | ||
| 92 | } | ||
| 93 | |||
| 94 | for (i = 0; i < count; i++) { | ||
| 95 | sysfs_attr_init(&attrs[i].attr.attr); | ||
| 96 | attrs[i].attr.attr.name = stats[i].name; | ||
| 97 | attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); | ||
| 98 | attrs[i].attr.show = vgpu_ecc_stat_show; | ||
| 99 | attrs[i].attr.store = NULL; | ||
| 100 | attrs[i].var = &stats[i]; | ||
| 101 | |||
| 102 | err = device_create_file(dev, &attrs[i].attr); | ||
| 103 | if (unlikely(err)) { | ||
| 104 | nvgpu_warn(g, "ecc: cannot create file \"%s\": %d", | ||
| 105 | stats[i].name, err); | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | l->ecc_attrs = attrs; | ||
| 110 | return 0; | ||
| 111 | } | ||
| 112 | |||
| 113 | static void vgpu_remove_ecc_sysfs(struct device *dev) | ||
| 114 | { | ||
| 115 | struct gk20a *g = get_gk20a(dev); | ||
| 116 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 117 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
| 118 | int i; | ||
| 119 | |||
| 120 | if (l->ecc_attrs) { | ||
| 121 | for (i = 0; i < priv->ecc_stats_count; i++) | ||
| 122 | device_remove_file(dev, &l->ecc_attrs[i].attr); | ||
| 123 | |||
| 124 | nvgpu_kfree(g, l->ecc_attrs); | ||
| 125 | l->ecc_attrs = NULL; | ||
| 126 | } | ||
| 127 | |||
| 128 | vgpu_ecc_remove_info(g); | ||
| 129 | } | ||
| 130 | |||
| 131 | void vgpu_create_sysfs(struct device *dev) | ||
| 132 | { | ||
| 133 | if (device_create_file(dev, &dev_attr_load)) | ||
| 134 | dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); | ||
| 135 | |||
| 136 | vgpu_create_ecc_sysfs(dev); | ||
| 137 | } | ||
| 138 | |||
| 139 | void vgpu_remove_sysfs(struct device *dev) | ||
| 140 | { | ||
| 141 | device_remove_file(dev, &dev_attr_load); | ||
| 142 | vgpu_remove_ecc_sysfs(dev); | ||
| 143 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_ivc.c b/include/os/linux/vgpu/vgpu_ivc.c new file mode 100644 index 0000000..950f0d4 --- /dev/null +++ b/include/os/linux/vgpu/vgpu_ivc.c | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/types.h> | ||
| 18 | #include <linux/tegra_gr_comm.h> | ||
| 19 | |||
| 20 | #include "os/linux/os_linux.h" | ||
| 21 | |||
| 22 | int vgpu_ivc_init(struct gk20a *g, u32 elems, | ||
| 23 | const size_t *queue_sizes, u32 queue_start, u32 num_queues) | ||
| 24 | { | ||
| 25 | struct platform_device *pdev = to_platform_device(dev_from_gk20a(g)); | ||
| 26 | |||
| 27 | return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start, | ||
| 28 | num_queues); | ||
| 29 | } | ||
| 30 | |||
| 31 | void vgpu_ivc_deinit(u32 queue_start, u32 num_queues) | ||
| 32 | { | ||
| 33 | tegra_gr_comm_deinit(queue_start, num_queues); | ||
| 34 | } | ||
| 35 | |||
| 36 | void vgpu_ivc_release(void *handle) | ||
| 37 | { | ||
| 38 | tegra_gr_comm_release(handle); | ||
| 39 | } | ||
| 40 | |||
| 41 | u32 vgpu_ivc_get_server_vmid(void) | ||
| 42 | { | ||
| 43 | return tegra_gr_comm_get_server_vmid(); | ||
| 44 | } | ||
| 45 | |||
| 46 | int vgpu_ivc_recv(u32 index, void **handle, void **data, | ||
| 47 | size_t *size, u32 *sender) | ||
| 48 | { | ||
| 49 | return tegra_gr_comm_recv(index, handle, data, size, sender); | ||
| 50 | } | ||
| 51 | |||
| 52 | int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size) | ||
| 53 | { | ||
| 54 | return tegra_gr_comm_send(peer, index, data, size); | ||
| 55 | } | ||
| 56 | |||
| 57 | int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle, | ||
| 58 | void **data, size_t *size) | ||
| 59 | { | ||
| 60 | return tegra_gr_comm_sendrecv(peer, index, handle, data, size); | ||
| 61 | } | ||
| 62 | |||
| 63 | u32 vgpu_ivc_get_peer_self(void) | ||
| 64 | { | ||
| 65 | return TEGRA_GR_COMM_ID_SELF; | ||
| 66 | } | ||
| 67 | |||
| 68 | void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr, | ||
| 69 | size_t *size) | ||
| 70 | { | ||
| 71 | return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size); | ||
| 72 | } | ||
| 73 | |||
| 74 | void vgpu_ivc_oob_put_ptr(void *handle) | ||
| 75 | { | ||
| 76 | tegra_gr_comm_oob_put_ptr(handle); | ||
| 77 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_ivm.c b/include/os/linux/vgpu/vgpu_ivm.c new file mode 100644 index 0000000..bbd444d --- /dev/null +++ b/include/os/linux/vgpu/vgpu_ivm.c | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/vgpu/vgpu_ivm.h> | ||
| 18 | |||
| 19 | #include <linux/tegra-ivc.h> | ||
| 20 | |||
| 21 | #include "os/linux/os_linux.h" | ||
| 22 | |||
| 23 | struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id) | ||
| 24 | { | ||
| 25 | return tegra_hv_mempool_reserve(id); | ||
| 26 | } | ||
| 27 | |||
| 28 | int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie) | ||
| 29 | { | ||
| 30 | return tegra_hv_mempool_unreserve(cookie); | ||
| 31 | } | ||
| 32 | |||
| 33 | u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie) | ||
| 34 | { | ||
| 35 | return cookie->ipa; | ||
| 36 | } | ||
| 37 | |||
| 38 | u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie) | ||
| 39 | { | ||
| 40 | return cookie->size; | ||
| 41 | } | ||
| 42 | |||
| 43 | void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie) | ||
| 44 | { | ||
| 45 | return ioremap_cache(vgpu_ivm_get_ipa(cookie), | ||
| 46 | vgpu_ivm_get_size(cookie)); | ||
| 47 | } | ||
| 48 | |||
| 49 | void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie, | ||
| 50 | void *addr) | ||
| 51 | { | ||
| 52 | iounmap(addr); | ||
| 53 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_linux.c b/include/os/linux/vgpu/vgpu_linux.c new file mode 100644 index 0000000..80bcfff --- /dev/null +++ b/include/os/linux/vgpu/vgpu_linux.c | |||
| @@ -0,0 +1,525 @@ | |||
| 1 | /* | ||
| 2 | * Virtualized GPU for Linux | ||
| 3 | * | ||
| 4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/mm.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/dma-mapping.h> | ||
| 22 | #include <linux/pm_runtime.h> | ||
| 23 | #include <linux/pm_qos.h> | ||
| 24 | #include <linux/platform_device.h> | ||
| 25 | #include <soc/tegra/chip-id.h> | ||
| 26 | |||
| 27 | #include <nvgpu/kmem.h> | ||
| 28 | #include <nvgpu/bug.h> | ||
| 29 | #include <nvgpu/enabled.h> | ||
| 30 | #include <nvgpu/debug.h> | ||
| 31 | #include <nvgpu/soc.h> | ||
| 32 | #include <nvgpu/ctxsw_trace.h> | ||
| 33 | #include <nvgpu/defaults.h> | ||
| 34 | #include <nvgpu/ltc.h> | ||
| 35 | #include <nvgpu/channel.h> | ||
| 36 | #include <nvgpu/clk_arb.h> | ||
| 37 | |||
| 38 | #include "vgpu_linux.h" | ||
| 39 | #include "vgpu/fecs_trace_vgpu.h" | ||
| 40 | #include "vgpu/clk_vgpu.h" | ||
| 41 | #include "gk20a/regops_gk20a.h" | ||
| 42 | #include "gm20b/hal_gm20b.h" | ||
| 43 | |||
| 44 | #include "os/linux/module.h" | ||
| 45 | #include "os/linux/os_linux.h" | ||
| 46 | #include "os/linux/ioctl.h" | ||
| 47 | #include "os/linux/scale.h" | ||
| 48 | #include "os/linux/driver_common.h" | ||
| 49 | #include "os/linux/platform_gk20a.h" | ||
| 50 | #include "os/linux/vgpu/platform_vgpu_tegra.h" | ||
| 51 | |||
| 52 | struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g) | ||
| 53 | { | ||
| 54 | struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g)); | ||
| 55 | |||
| 56 | return (struct vgpu_priv_data *)plat->vgpu_priv; | ||
| 57 | } | ||
| 58 | |||
| 59 | static void vgpu_remove_support(struct gk20a *g) | ||
| 60 | { | ||
| 61 | vgpu_remove_support_common(g); | ||
| 62 | } | ||
| 63 | |||
| 64 | static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) | ||
| 65 | { | ||
| 66 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 67 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
| 68 | |||
| 69 | nvgpu_mutex_init(&g->power_lock); | ||
| 70 | nvgpu_mutex_init(&g->ctxsw_disable_lock); | ||
| 71 | nvgpu_mutex_init(&g->clk_arb_enable_lock); | ||
| 72 | nvgpu_mutex_init(&g->cg_pg_lock); | ||
| 73 | |||
| 74 | nvgpu_mutex_init(&priv->vgpu_clk_get_freq_lock); | ||
| 75 | |||
| 76 | nvgpu_mutex_init(&l->ctrl.privs_lock); | ||
| 77 | nvgpu_init_list_node(&l->ctrl.privs); | ||
| 78 | |||
| 79 | l->regs_saved = l->regs; | ||
| 80 | l->bar1_saved = l->bar1; | ||
| 81 | |||
| 82 | nvgpu_atomic_set(&g->clk_arb_global_nr, 0); | ||
| 83 | |||
| 84 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | ||
| 85 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | ||
| 86 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); | ||
| 87 | g->ptimer_src_freq = platform->ptimer_src_freq; | ||
| 88 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); | ||
| 89 | g->railgate_delay = platform->railgate_delay_init; | ||
| 90 | |||
| 91 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, | ||
| 92 | platform->unify_address_spaces); | ||
| 93 | } | ||
| 94 | |||
| 95 | static int vgpu_init_support(struct platform_device *pdev) | ||
| 96 | { | ||
| 97 | struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
| 98 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
| 99 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 100 | void __iomem *regs; | ||
| 101 | int err = 0; | ||
| 102 | |||
| 103 | if (!r) { | ||
| 104 | nvgpu_err(g, "failed to get gk20a bar1"); | ||
| 105 | err = -ENXIO; | ||
| 106 | goto fail; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (r->name && !strcmp(r->name, "/vgpu")) { | ||
| 110 | regs = devm_ioremap_resource(&pdev->dev, r); | ||
| 111 | if (IS_ERR(regs)) { | ||
| 112 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
| 113 | err = PTR_ERR(regs); | ||
| 114 | goto fail; | ||
| 115 | } | ||
| 116 | l->bar1 = regs; | ||
| 117 | l->bar1_mem = r; | ||
| 118 | } | ||
| 119 | |||
| 120 | nvgpu_mutex_init(&g->dbg_sessions_lock); | ||
| 121 | nvgpu_mutex_init(&g->client_lock); | ||
| 122 | |||
| 123 | nvgpu_init_list_node(&g->profiler_objects); | ||
| 124 | |||
| 125 | g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); | ||
| 126 | if (!g->dbg_regops_tmp_buf) { | ||
| 127 | nvgpu_err(g, "couldn't allocate regops tmp buf"); | ||
| 128 | return -ENOMEM; | ||
| 129 | } | ||
| 130 | g->dbg_regops_tmp_buf_ops = | ||
| 131 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
| 132 | |||
| 133 | g->remove_support = vgpu_remove_support; | ||
| 134 | return 0; | ||
| 135 | |||
| 136 | fail: | ||
| 137 | vgpu_remove_support(g); | ||
| 138 | return err; | ||
| 139 | } | ||
| 140 | |||
| 141 | int vgpu_pm_prepare_poweroff(struct device *dev) | ||
| 142 | { | ||
| 143 | struct gk20a *g = get_gk20a(dev); | ||
| 144 | int ret = 0; | ||
| 145 | |||
| 146 | nvgpu_log_fn(g, " "); | ||
| 147 | |||
| 148 | nvgpu_mutex_acquire(&g->power_lock); | ||
| 149 | |||
| 150 | if (!g->power_on) | ||
| 151 | goto done; | ||
| 152 | |||
| 153 | if (g->ops.fifo.channel_suspend) | ||
| 154 | ret = g->ops.fifo.channel_suspend(g); | ||
| 155 | if (ret) | ||
| 156 | goto done; | ||
| 157 | |||
| 158 | g->power_on = false; | ||
| 159 | done: | ||
| 160 | nvgpu_mutex_release(&g->power_lock); | ||
| 161 | |||
| 162 | return ret; | ||
| 163 | } | ||
| 164 | |||
| 165 | int vgpu_pm_finalize_poweron(struct device *dev) | ||
| 166 | { | ||
| 167 | struct gk20a *g = get_gk20a(dev); | ||
| 168 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 169 | int err = 0; | ||
| 170 | |||
| 171 | nvgpu_log_fn(g, " "); | ||
| 172 | |||
| 173 | nvgpu_mutex_acquire(&g->power_lock); | ||
| 174 | |||
| 175 | if (g->power_on) | ||
| 176 | goto done; | ||
| 177 | |||
| 178 | g->power_on = true; | ||
| 179 | |||
| 180 | vgpu_detect_chip(g); | ||
| 181 | err = vgpu_init_hal(g); | ||
| 182 | if (err) | ||
| 183 | goto done; | ||
| 184 | |||
| 185 | if (g->ops.ltc.init_fs_state) | ||
| 186 | g->ops.ltc.init_fs_state(g); | ||
| 187 | |||
| 188 | err = nvgpu_init_ltc_support(g); | ||
| 189 | if (err) { | ||
| 190 | nvgpu_err(g, "failed to init ltc"); | ||
| 191 | goto done; | ||
| 192 | } | ||
| 193 | |||
| 194 | err = vgpu_init_mm_support(g); | ||
| 195 | if (err) { | ||
| 196 | nvgpu_err(g, "failed to init gk20a mm"); | ||
| 197 | goto done; | ||
| 198 | } | ||
| 199 | |||
| 200 | err = vgpu_init_fifo_support(g); | ||
| 201 | if (err) { | ||
| 202 | nvgpu_err(g, "failed to init gk20a fifo"); | ||
| 203 | goto done; | ||
| 204 | } | ||
| 205 | |||
| 206 | err = vgpu_init_gr_support(g); | ||
| 207 | if (err) { | ||
| 208 | nvgpu_err(g, "failed to init gk20a gr"); | ||
| 209 | goto done; | ||
| 210 | } | ||
| 211 | |||
| 212 | err = nvgpu_clk_arb_init_arbiter(g); | ||
| 213 | if (err) { | ||
| 214 | nvgpu_err(g, "failed to init clk arb"); | ||
| 215 | goto done; | ||
| 216 | } | ||
| 217 | |||
| 218 | err = g->ops.chip_init_gpu_characteristics(g); | ||
| 219 | if (err) { | ||
| 220 | nvgpu_err(g, "failed to init gk20a gpu characteristics"); | ||
| 221 | goto done; | ||
| 222 | } | ||
| 223 | |||
| 224 | err = nvgpu_finalize_poweron_linux(l); | ||
| 225 | if (err) | ||
| 226 | goto done; | ||
| 227 | |||
| 228 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
| 229 | gk20a_ctxsw_trace_init(g); | ||
| 230 | #endif | ||
| 231 | gk20a_sched_ctrl_init(g); | ||
| 232 | gk20a_channel_resume(g); | ||
| 233 | |||
| 234 | g->sw_ready = true; | ||
| 235 | |||
| 236 | done: | ||
| 237 | if (err) | ||
| 238 | g->power_on = false; | ||
| 239 | |||
| 240 | nvgpu_mutex_release(&g->power_lock); | ||
| 241 | return err; | ||
| 242 | } | ||
| 243 | |||
| 244 | static int vgpu_qos_notify(struct notifier_block *nb, | ||
| 245 | unsigned long n, void *data) | ||
| 246 | { | ||
| 247 | struct gk20a_scale_profile *profile = | ||
| 248 | container_of(nb, struct gk20a_scale_profile, | ||
| 249 | qos_notify_block); | ||
| 250 | struct gk20a *g = get_gk20a(profile->dev); | ||
| 251 | u32 max_freq; | ||
| 252 | int err; | ||
| 253 | |||
| 254 | nvgpu_log_fn(g, " "); | ||
| 255 | |||
| 256 | max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS); | ||
| 257 | err = vgpu_plat_clk_cap_rate(profile->dev, max_freq); | ||
| 258 | if (err) | ||
| 259 | nvgpu_err(g, "%s failed, err=%d", __func__, err); | ||
| 260 | |||
| 261 | return NOTIFY_OK; /* need notify call further */ | ||
| 262 | } | ||
| 263 | |||
| 264 | static int vgpu_pm_qos_init(struct device *dev) | ||
| 265 | { | ||
| 266 | struct gk20a *g = get_gk20a(dev); | ||
| 267 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
| 268 | |||
| 269 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) { | ||
| 270 | if (!profile) | ||
| 271 | return -EINVAL; | ||
| 272 | } else { | ||
| 273 | profile = nvgpu_kzalloc(g, sizeof(*profile)); | ||
| 274 | if (!profile) | ||
| 275 | return -ENOMEM; | ||
| 276 | g->scale_profile = profile; | ||
| 277 | } | ||
| 278 | |||
| 279 | profile->dev = dev; | ||
| 280 | profile->qos_notify_block.notifier_call = vgpu_qos_notify; | ||
| 281 | pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
| 282 | &profile->qos_notify_block); | ||
| 283 | return 0; | ||
| 284 | } | ||
| 285 | |||
| 286 | static void vgpu_pm_qos_remove(struct device *dev) | ||
| 287 | { | ||
| 288 | struct gk20a *g = get_gk20a(dev); | ||
| 289 | |||
| 290 | pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
| 291 | &g->scale_profile->qos_notify_block); | ||
| 292 | nvgpu_kfree(g, g->scale_profile); | ||
| 293 | g->scale_profile = NULL; | ||
| 294 | } | ||
| 295 | |||
| 296 | static int vgpu_pm_init(struct device *dev) | ||
| 297 | { | ||
| 298 | struct gk20a *g = get_gk20a(dev); | ||
| 299 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 300 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 301 | unsigned long *freqs; | ||
| 302 | int num_freqs; | ||
| 303 | int err = 0; | ||
| 304 | |||
| 305 | nvgpu_log_fn(g, " "); | ||
| 306 | |||
| 307 | if (nvgpu_platform_is_simulation(g)) | ||
| 308 | return 0; | ||
| 309 | |||
| 310 | __pm_runtime_disable(dev, false); | ||
| 311 | |||
| 312 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
| 313 | gk20a_scale_init(dev); | ||
| 314 | |||
| 315 | if (l->devfreq) { | ||
| 316 | /* set min/max frequency based on frequency table */ | ||
| 317 | err = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
| 318 | if (err) | ||
| 319 | return err; | ||
| 320 | |||
| 321 | if (num_freqs < 1) | ||
| 322 | return -EINVAL; | ||
| 323 | |||
| 324 | l->devfreq->min_freq = freqs[0]; | ||
| 325 | l->devfreq->max_freq = freqs[num_freqs - 1]; | ||
| 326 | } | ||
| 327 | |||
| 328 | err = vgpu_pm_qos_init(dev); | ||
| 329 | if (err) | ||
| 330 | return err; | ||
| 331 | |||
| 332 | return err; | ||
| 333 | } | ||
| 334 | |||
| 335 | int vgpu_probe(struct platform_device *pdev) | ||
| 336 | { | ||
| 337 | struct nvgpu_os_linux *l; | ||
| 338 | struct gk20a *gk20a; | ||
| 339 | int err; | ||
| 340 | struct device *dev = &pdev->dev; | ||
| 341 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
| 342 | struct vgpu_priv_data *priv; | ||
| 343 | |||
| 344 | if (!platform) { | ||
| 345 | dev_err(dev, "no platform data\n"); | ||
| 346 | return -ENODATA; | ||
| 347 | } | ||
| 348 | |||
| 349 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
| 350 | if (!l) { | ||
| 351 | dev_err(dev, "couldn't allocate gk20a support"); | ||
| 352 | return -ENOMEM; | ||
| 353 | } | ||
| 354 | gk20a = &l->g; | ||
| 355 | |||
| 356 | nvgpu_log_fn(gk20a, " "); | ||
| 357 | |||
| 358 | nvgpu_init_gk20a(gk20a); | ||
| 359 | |||
| 360 | nvgpu_kmem_init(gk20a); | ||
| 361 | |||
| 362 | err = nvgpu_init_enabled_flags(gk20a); | ||
| 363 | if (err) { | ||
| 364 | kfree(gk20a); | ||
| 365 | return err; | ||
| 366 | } | ||
| 367 | |||
| 368 | l->dev = dev; | ||
| 369 | if (tegra_platform_is_vdk()) | ||
| 370 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | ||
| 371 | |||
| 372 | gk20a->is_virtual = true; | ||
| 373 | |||
| 374 | priv = nvgpu_kzalloc(gk20a, sizeof(*priv)); | ||
| 375 | if (!priv) { | ||
| 376 | kfree(gk20a); | ||
| 377 | return -ENOMEM; | ||
| 378 | } | ||
| 379 | |||
| 380 | platform->g = gk20a; | ||
| 381 | platform->vgpu_priv = priv; | ||
| 382 | |||
| 383 | err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); | ||
| 384 | if (err) | ||
| 385 | return err; | ||
| 386 | |||
| 387 | vgpu_init_support(pdev); | ||
| 388 | |||
| 389 | vgpu_init_vars(gk20a, platform); | ||
| 390 | |||
| 391 | init_rwsem(&l->busy_lock); | ||
| 392 | |||
| 393 | nvgpu_spinlock_init(&gk20a->mc_enable_lock); | ||
| 394 | |||
| 395 | gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; | ||
| 396 | |||
| 397 | /* Initialize the platform interface. */ | ||
| 398 | err = platform->probe(dev); | ||
| 399 | if (err) { | ||
| 400 | if (err == -EPROBE_DEFER) | ||
| 401 | nvgpu_info(gk20a, "platform probe failed"); | ||
| 402 | else | ||
| 403 | nvgpu_err(gk20a, "platform probe failed"); | ||
| 404 | return err; | ||
| 405 | } | ||
| 406 | |||
| 407 | if (platform->late_probe) { | ||
| 408 | err = platform->late_probe(dev); | ||
| 409 | if (err) { | ||
| 410 | nvgpu_err(gk20a, "late probe failed"); | ||
| 411 | return err; | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | err = vgpu_comm_init(gk20a); | ||
| 416 | if (err) { | ||
| 417 | nvgpu_err(gk20a, "failed to init comm interface"); | ||
| 418 | return -ENOSYS; | ||
| 419 | } | ||
| 420 | |||
| 421 | priv->virt_handle = vgpu_connect(); | ||
| 422 | if (!priv->virt_handle) { | ||
| 423 | nvgpu_err(gk20a, "failed to connect to server node"); | ||
| 424 | vgpu_comm_deinit(); | ||
| 425 | return -ENOSYS; | ||
| 426 | } | ||
| 427 | |||
| 428 | err = vgpu_get_constants(gk20a); | ||
| 429 | if (err) { | ||
| 430 | vgpu_comm_deinit(); | ||
| 431 | return err; | ||
| 432 | } | ||
| 433 | |||
| 434 | err = vgpu_pm_init(dev); | ||
| 435 | if (err) { | ||
| 436 | nvgpu_err(gk20a, "pm init failed"); | ||
| 437 | return err; | ||
| 438 | } | ||
| 439 | |||
| 440 | err = nvgpu_thread_create(&priv->intr_handler, gk20a, | ||
| 441 | vgpu_intr_thread, "gk20a"); | ||
| 442 | if (err) | ||
| 443 | return err; | ||
| 444 | |||
| 445 | gk20a_debug_init(gk20a, "gpu.0"); | ||
| 446 | |||
| 447 | /* Set DMA parameters to allow larger sgt lists */ | ||
| 448 | dev->dma_parms = &l->dma_parms; | ||
| 449 | dma_set_max_seg_size(dev, UINT_MAX); | ||
| 450 | |||
| 451 | gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; | ||
| 452 | gk20a->timeouts_disabled_by_user = false; | ||
| 453 | nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0); | ||
| 454 | |||
| 455 | vgpu_create_sysfs(dev); | ||
| 456 | gk20a_init_gr(gk20a); | ||
| 457 | |||
| 458 | nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); | ||
| 459 | gk20a->gr.max_comptag_mem = totalram_size_in_mb; | ||
| 460 | |||
| 461 | nvgpu_ref_init(&gk20a->refcount); | ||
| 462 | |||
| 463 | return 0; | ||
| 464 | } | ||
| 465 | |||
| 466 | int vgpu_remove(struct platform_device *pdev) | ||
| 467 | { | ||
| 468 | struct device *dev = &pdev->dev; | ||
| 469 | struct gk20a *g = get_gk20a(dev); | ||
| 470 | |||
| 471 | nvgpu_log_fn(g, " "); | ||
| 472 | |||
| 473 | vgpu_pm_qos_remove(dev); | ||
| 474 | if (g->remove_support) | ||
| 475 | g->remove_support(g); | ||
| 476 | |||
| 477 | vgpu_comm_deinit(); | ||
| 478 | gk20a_sched_ctrl_cleanup(g); | ||
| 479 | gk20a_user_deinit(dev, &nvgpu_class); | ||
| 480 | vgpu_remove_sysfs(dev); | ||
| 481 | gk20a_get_platform(dev)->g = NULL; | ||
| 482 | gk20a_put(g); | ||
| 483 | |||
| 484 | return 0; | ||
| 485 | } | ||
| 486 | |||
| 487 | bool vgpu_is_reduced_bar1(struct gk20a *g) | ||
| 488 | { | ||
| 489 | struct fifo_gk20a *f = &g->fifo; | ||
| 490 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 491 | |||
| 492 | return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size; | ||
| 493 | } | ||
| 494 | |||
| 495 | int vgpu_tegra_suspend(struct device *dev) | ||
| 496 | { | ||
| 497 | struct tegra_vgpu_cmd_msg msg = {}; | ||
| 498 | struct gk20a *g = get_gk20a(dev); | ||
| 499 | int err = 0; | ||
| 500 | |||
| 501 | msg.cmd = TEGRA_VGPU_CMD_SUSPEND; | ||
| 502 | msg.handle = vgpu_get_handle(g); | ||
| 503 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 504 | err = err ? err : msg.ret; | ||
| 505 | if (err) | ||
| 506 | nvgpu_err(g, "vGPU suspend failed\n"); | ||
| 507 | |||
| 508 | return err; | ||
| 509 | } | ||
| 510 | |||
| 511 | int vgpu_tegra_resume(struct device *dev) | ||
| 512 | { | ||
| 513 | struct tegra_vgpu_cmd_msg msg = {}; | ||
| 514 | struct gk20a *g = get_gk20a(dev); | ||
| 515 | int err = 0; | ||
| 516 | |||
| 517 | msg.cmd = TEGRA_VGPU_CMD_RESUME; | ||
| 518 | msg.handle = vgpu_get_handle(g); | ||
| 519 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
| 520 | err = err ? err : msg.ret; | ||
| 521 | if (err) | ||
| 522 | nvgpu_err(g, "vGPU resume failed\n"); | ||
| 523 | |||
| 524 | return err; | ||
| 525 | } | ||
diff --git a/include/os/linux/vgpu/vgpu_linux.h b/include/os/linux/vgpu/vgpu_linux.h new file mode 100644 index 0000000..ff7d3a6 --- /dev/null +++ b/include/os/linux/vgpu/vgpu_linux.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | /* | ||
| 2 | * Virtualized GPU Linux Interfaces | ||
| 3 | * | ||
| 4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __VGPU_LINUX_H__ | ||
| 20 | #define __VGPU_LINUX_H__ | ||
| 21 | |||
| 22 | struct device; | ||
| 23 | struct platform_device; | ||
| 24 | |||
| 25 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
| 26 | |||
| 27 | #include <nvgpu/vgpu/vgpu.h> | ||
| 28 | |||
| 29 | int vgpu_pm_prepare_poweroff(struct device *dev); | ||
| 30 | int vgpu_pm_finalize_poweron(struct device *dev); | ||
| 31 | int vgpu_probe(struct platform_device *dev); | ||
| 32 | int vgpu_remove(struct platform_device *dev); | ||
| 33 | |||
| 34 | void vgpu_create_sysfs(struct device *dev); | ||
| 35 | void vgpu_remove_sysfs(struct device *dev); | ||
| 36 | |||
| 37 | int vgpu_tegra_suspend(struct device *dev); | ||
| 38 | int vgpu_tegra_resume(struct device *dev); | ||
| 39 | #else | ||
| 40 | /* define placeholders for functions used outside of vgpu */ | ||
| 41 | |||
| 42 | static inline int vgpu_pm_prepare_poweroff(struct device *dev) | ||
| 43 | { | ||
| 44 | return -ENOSYS; | ||
| 45 | } | ||
| 46 | static inline int vgpu_pm_finalize_poweron(struct device *dev) | ||
| 47 | { | ||
| 48 | return -ENOSYS; | ||
| 49 | } | ||
| 50 | static inline int vgpu_probe(struct platform_device *dev) | ||
| 51 | { | ||
| 52 | return -ENOSYS; | ||
| 53 | } | ||
| 54 | static inline int vgpu_remove(struct platform_device *dev) | ||
| 55 | { | ||
| 56 | return -ENOSYS; | ||
| 57 | } | ||
| 58 | static inline int vgpu_tegra_suspend(struct device *dev) | ||
| 59 | { | ||
| 60 | return -ENOSYS; | ||
| 61 | } | ||
| 62 | static inline int vgpu_tegra_resume(struct device *dev) | ||
| 63 | { | ||
| 64 | return -ENOSYS; | ||
| 65 | } | ||
| 66 | #endif | ||
| 67 | |||
| 68 | #endif | ||
diff --git a/include/os/linux/vm.c b/include/os/linux/vm.c new file mode 100644 index 0000000..dc807ab --- /dev/null +++ b/include/os/linux/vm.c | |||
| @@ -0,0 +1,356 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/dma-buf.h> | ||
| 18 | #include <linux/scatterlist.h> | ||
| 19 | #include <uapi/linux/nvgpu.h> | ||
| 20 | |||
| 21 | #include <nvgpu/log.h> | ||
| 22 | #include <nvgpu/lock.h> | ||
| 23 | #include <nvgpu/rbtree.h> | ||
| 24 | #include <nvgpu/vm_area.h> | ||
| 25 | #include <nvgpu/nvgpu_mem.h> | ||
| 26 | #include <nvgpu/page_allocator.h> | ||
| 27 | #include <nvgpu/vidmem.h> | ||
| 28 | #include <nvgpu/utils.h> | ||
| 29 | #include <nvgpu/gk20a.h> | ||
| 30 | |||
| 31 | #include <nvgpu/linux/vm.h> | ||
| 32 | #include <nvgpu/linux/nvgpu_mem.h> | ||
| 33 | |||
| 34 | #include "gk20a/mm_gk20a.h" | ||
| 35 | |||
| 36 | #include "platform_gk20a.h" | ||
| 37 | #include "os_linux.h" | ||
| 38 | #include "dmabuf.h" | ||
| 39 | #include "dmabuf_vidmem.h" | ||
| 40 | |||
| 41 | static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) | ||
| 42 | { | ||
| 43 | u32 core_flags = 0; | ||
| 44 | |||
| 45 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | ||
| 46 | core_flags |= NVGPU_VM_MAP_FIXED_OFFSET; | ||
| 47 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) | ||
| 48 | core_flags |= NVGPU_VM_MAP_CACHEABLE; | ||
| 49 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT) | ||
| 50 | core_flags |= NVGPU_VM_MAP_IO_COHERENT; | ||
| 51 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE) | ||
| 52 | core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE; | ||
| 53 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC) | ||
| 54 | core_flags |= NVGPU_VM_MAP_L3_ALLOC; | ||
| 55 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) | ||
| 56 | core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; | ||
| 57 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC) | ||
| 58 | core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC; | ||
| 59 | |||
| 60 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) | ||
| 61 | nvgpu_warn(g, "Ignoring deprecated flag: " | ||
| 62 | "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS"); | ||
| 63 | |||
| 64 | return core_flags; | ||
| 65 | } | ||
| 66 | |||
| 67 | static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | ||
| 68 | struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) | ||
| 69 | { | ||
| 70 | struct nvgpu_rbtree_node *node = NULL; | ||
| 71 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
| 72 | |||
| 73 | nvgpu_rbtree_enum_start(0, &node, root); | ||
| 74 | |||
| 75 | while (node) { | ||
| 76 | struct nvgpu_mapped_buf *mapped_buffer = | ||
| 77 | mapped_buffer_from_rbtree_node(node); | ||
| 78 | |||
| 79 | if (mapped_buffer->os_priv.dmabuf == dmabuf && | ||
| 80 | mapped_buffer->kind == kind) | ||
| 81 | return mapped_buffer; | ||
| 82 | |||
| 83 | nvgpu_rbtree_enum_next(&node, node); | ||
| 84 | } | ||
| 85 | |||
| 86 | return NULL; | ||
| 87 | } | ||
| 88 | |||
| 89 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | ||
| 90 | struct dma_buf **dmabuf, | ||
| 91 | u64 *offset) | ||
| 92 | { | ||
| 93 | struct nvgpu_mapped_buf *mapped_buffer; | ||
| 94 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 95 | |||
| 96 | nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va); | ||
| 97 | |||
| 98 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
| 99 | |||
| 100 | mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); | ||
| 101 | if (!mapped_buffer) { | ||
| 102 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
| 103 | return -EINVAL; | ||
| 104 | } | ||
| 105 | |||
| 106 | *dmabuf = mapped_buffer->os_priv.dmabuf; | ||
| 107 | *offset = gpu_va - mapped_buffer->addr; | ||
| 108 | |||
| 109 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
| 110 | |||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf) | ||
| 115 | { | ||
| 116 | return os_buf->dmabuf->size; | ||
| 117 | } | ||
| 118 | |||
| 119 | /* | ||
| 120 | * vm->update_gmmu_lock must be held. This checks to see if we already have | ||
| 121 | * mapped the passed buffer into this VM. If so, just return the existing | ||
| 122 | * mapping address. | ||
| 123 | */ | ||
| 124 | struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | ||
| 125 | struct nvgpu_os_buffer *os_buf, | ||
| 126 | u64 map_addr, | ||
| 127 | u32 flags, | ||
| 128 | int kind) | ||
| 129 | { | ||
| 130 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 131 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
| 132 | |||
| 133 | if (flags & NVGPU_VM_MAP_FIXED_OFFSET) { | ||
| 134 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr); | ||
| 135 | if (!mapped_buffer) | ||
| 136 | return NULL; | ||
| 137 | |||
| 138 | if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf || | ||
| 139 | mapped_buffer->kind != (u32)kind) | ||
| 140 | return NULL; | ||
| 141 | } else { | ||
| 142 | mapped_buffer = | ||
| 143 | __nvgpu_vm_find_mapped_buf_reverse(vm, | ||
| 144 | os_buf->dmabuf, | ||
| 145 | kind); | ||
| 146 | if (!mapped_buffer) | ||
| 147 | return NULL; | ||
| 148 | } | ||
| 149 | |||
| 150 | if (mapped_buffer->flags != flags) | ||
| 151 | return NULL; | ||
| 152 | |||
| 153 | /* | ||
| 154 | * If we find the mapping here then that means we have mapped it already | ||
| 155 | * and the prior pin and get must be undone. | ||
| 156 | */ | ||
| 157 | gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, | ||
| 158 | mapped_buffer->os_priv.sgt); | ||
| 159 | dma_buf_put(os_buf->dmabuf); | ||
| 160 | |||
| 161 | nvgpu_log(g, gpu_dbg_map, | ||
| 162 | "gv: 0x%04x_%08x + 0x%-7zu " | ||
| 163 | "[dma: 0x%010llx, pa: 0x%010llx] " | ||
| 164 | "pgsz=%-3dKb as=%-2d " | ||
| 165 | "flags=0x%x apt=%s (reused)", | ||
| 166 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | ||
| 167 | os_buf->dmabuf->size, | ||
| 168 | (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl), | ||
| 169 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), | ||
| 170 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | ||
| 171 | vm_aspace_id(vm), | ||
| 172 | mapped_buffer->flags, | ||
| 173 | nvgpu_aperture_str(g, | ||
| 174 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
| 175 | |||
| 176 | return mapped_buffer; | ||
| 177 | } | ||
| 178 | |||
| 179 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, | ||
| 180 | struct dma_buf *dmabuf, | ||
| 181 | u64 map_addr, | ||
| 182 | u32 flags, | ||
| 183 | u32 page_size, | ||
| 184 | s16 compr_kind, | ||
| 185 | s16 incompr_kind, | ||
| 186 | int rw_flag, | ||
| 187 | u64 buffer_offset, | ||
| 188 | u64 mapping_size, | ||
| 189 | struct vm_gk20a_mapping_batch *batch, | ||
| 190 | u64 *gpu_va) | ||
| 191 | { | ||
| 192 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 193 | struct device *dev = dev_from_gk20a(g); | ||
| 194 | struct nvgpu_os_buffer os_buf; | ||
| 195 | struct sg_table *sgt; | ||
| 196 | struct nvgpu_sgt *nvgpu_sgt = NULL; | ||
| 197 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
| 198 | struct dma_buf_attachment *attachment; | ||
| 199 | int err = 0; | ||
| 200 | |||
| 201 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | ||
| 202 | if (IS_ERR(sgt)) { | ||
| 203 | nvgpu_warn(g, "Failed to pin dma_buf!"); | ||
| 204 | return PTR_ERR(sgt); | ||
| 205 | } | ||
| 206 | os_buf.dmabuf = dmabuf; | ||
| 207 | os_buf.attachment = attachment; | ||
| 208 | os_buf.dev = dev; | ||
| 209 | |||
| 210 | if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { | ||
| 211 | err = -EINVAL; | ||
| 212 | goto clean_up; | ||
| 213 | } | ||
| 214 | |||
| 215 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
| 216 | if (!nvgpu_sgt) { | ||
| 217 | err = -ENOMEM; | ||
| 218 | goto clean_up; | ||
| 219 | } | ||
| 220 | |||
| 221 | mapped_buffer = nvgpu_vm_map(vm, | ||
| 222 | &os_buf, | ||
| 223 | nvgpu_sgt, | ||
| 224 | map_addr, | ||
| 225 | mapping_size, | ||
| 226 | buffer_offset, | ||
| 227 | rw_flag, | ||
| 228 | flags, | ||
| 229 | compr_kind, | ||
| 230 | incompr_kind, | ||
| 231 | batch, | ||
| 232 | gk20a_dmabuf_aperture(g, dmabuf)); | ||
| 233 | |||
| 234 | nvgpu_sgt_free(g, nvgpu_sgt); | ||
| 235 | |||
| 236 | if (IS_ERR(mapped_buffer)) { | ||
| 237 | err = PTR_ERR(mapped_buffer); | ||
| 238 | goto clean_up; | ||
| 239 | } | ||
| 240 | |||
| 241 | mapped_buffer->os_priv.dmabuf = dmabuf; | ||
| 242 | mapped_buffer->os_priv.attachment = attachment; | ||
| 243 | mapped_buffer->os_priv.sgt = sgt; | ||
| 244 | |||
| 245 | *gpu_va = mapped_buffer->addr; | ||
| 246 | return 0; | ||
| 247 | |||
| 248 | clean_up: | ||
| 249 | gk20a_mm_unpin(dev, dmabuf, attachment, sgt); | ||
| 250 | |||
| 251 | return err; | ||
| 252 | } | ||
| 253 | |||
| 254 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | ||
| 255 | int dmabuf_fd, | ||
| 256 | u64 *map_addr, | ||
| 257 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | ||
| 258 | u32 page_size, | ||
| 259 | s16 compr_kind, | ||
| 260 | s16 incompr_kind, | ||
| 261 | u64 buffer_offset, | ||
| 262 | u64 mapping_size, | ||
| 263 | struct vm_gk20a_mapping_batch *batch) | ||
| 264 | { | ||
| 265 | struct gk20a *g = gk20a_from_vm(vm); | ||
| 266 | struct dma_buf *dmabuf; | ||
| 267 | u64 ret_va; | ||
| 268 | int err = 0; | ||
| 269 | |||
| 270 | /* get ref to the mem handle (released on unmap_locked) */ | ||
| 271 | dmabuf = dma_buf_get(dmabuf_fd); | ||
| 272 | if (IS_ERR(dmabuf)) { | ||
| 273 | nvgpu_warn(g, "%s: fd %d is not a dmabuf", | ||
| 274 | __func__, dmabuf_fd); | ||
| 275 | return PTR_ERR(dmabuf); | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * For regular maps we do not accept either an input address or a | ||
| 280 | * buffer_offset. | ||
| 281 | */ | ||
| 282 | if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) && | ||
| 283 | (buffer_offset || *map_addr)) { | ||
| 284 | nvgpu_err(g, | ||
| 285 | "Regular map with addr/buf offset is not supported!"); | ||
| 286 | dma_buf_put(dmabuf); | ||
| 287 | return -EINVAL; | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Map size is always buffer size for non fixed mappings. As such map | ||
| 292 | * size should be left as zero by userspace for non-fixed maps. | ||
| 293 | */ | ||
| 294 | if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
| 295 | nvgpu_err(g, "map_size && non-fixed-mapping!"); | ||
| 296 | dma_buf_put(dmabuf); | ||
| 297 | return -EINVAL; | ||
| 298 | } | ||
| 299 | |||
| 300 | /* verify that we're not overflowing the buffer, i.e. | ||
| 301 | * (buffer_offset + mapping_size) > dmabuf->size. | ||
| 302 | * | ||
| 303 | * Since buffer_offset + mapping_size could overflow, first check | ||
| 304 | * that mapping size < dmabuf_size, at which point we can subtract | ||
| 305 | * mapping_size from both sides for the final comparison. | ||
| 306 | */ | ||
| 307 | if ((mapping_size > dmabuf->size) || | ||
| 308 | (buffer_offset > (dmabuf->size - mapping_size))) { | ||
| 309 | nvgpu_err(g, | ||
| 310 | "buf size %llx < (offset(%llx) + map_size(%llx))", | ||
| 311 | (u64)dmabuf->size, buffer_offset, mapping_size); | ||
| 312 | dma_buf_put(dmabuf); | ||
| 313 | return -EINVAL; | ||
| 314 | } | ||
| 315 | |||
| 316 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); | ||
| 317 | if (err) { | ||
| 318 | dma_buf_put(dmabuf); | ||
| 319 | return err; | ||
| 320 | } | ||
| 321 | |||
| 322 | err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr, | ||
| 323 | nvgpu_vm_translate_linux_flags(g, flags), | ||
| 324 | page_size, | ||
| 325 | compr_kind, incompr_kind, | ||
| 326 | gk20a_mem_flag_none, | ||
| 327 | buffer_offset, | ||
| 328 | mapping_size, | ||
| 329 | batch, | ||
| 330 | &ret_va); | ||
| 331 | |||
| 332 | if (!err) | ||
| 333 | *map_addr = ret_va; | ||
| 334 | else | ||
| 335 | dma_buf_put(dmabuf); | ||
| 336 | |||
| 337 | return err; | ||
| 338 | } | ||
| 339 | |||
| 340 | /* | ||
| 341 | * This is the function call-back for freeing OS specific components of an | ||
| 342 | * nvgpu_mapped_buf. This should most likely never be called outside of the | ||
| 343 | * core MM framework! | ||
| 344 | * | ||
| 345 | * Note: the VM lock will be held. | ||
| 346 | */ | ||
| 347 | void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) | ||
| 348 | { | ||
| 349 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
| 350 | |||
| 351 | gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, | ||
| 352 | mapped_buffer->os_priv.attachment, | ||
| 353 | mapped_buffer->os_priv.sgt); | ||
| 354 | |||
| 355 | dma_buf_put(mapped_buffer->os_priv.dmabuf); | ||
| 356 | } | ||
diff --git a/include/os/linux/vpr.c b/include/os/linux/vpr.c new file mode 100644 index 0000000..3a98125 --- /dev/null +++ b/include/os/linux/vpr.c | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <nvgpu/vpr.h> | ||
| 15 | |||
| 16 | #include <linux/init.h> | ||
| 17 | #include <linux/platform/tegra/common.h> | ||
| 18 | |||
| 19 | bool nvgpu_is_vpr_resize_enabled(void) | ||
| 20 | { | ||
| 21 | return tegra_is_vpr_resize_supported(); | ||
| 22 | } | ||
