diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 95 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 21 |
4 files changed, 37 insertions, 86 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 49b70767..293ccd97 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -597,16 +597,16 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | |||
597 | struct channel_ctx_gk20a *ch_ctx) | 597 | struct channel_ctx_gk20a *ch_ctx) |
598 | { | 598 | { |
599 | /* being defensive still... */ | 599 | /* being defensive still... */ |
600 | if (WARN_ON(ch_ctx->patch_ctx.cpu_va)) { | 600 | if (WARN_ON(ch_ctx->patch_ctx.mem.cpu_va)) { |
601 | gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); | 601 | gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); |
602 | return -EBUSY; | 602 | return -EBUSY; |
603 | } | 603 | } |
604 | 604 | ||
605 | ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages, | 605 | ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, |
606 | PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT, | 606 | PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT, |
607 | 0, pgprot_writecombine(PAGE_KERNEL)); | 607 | 0, pgprot_writecombine(PAGE_KERNEL)); |
608 | 608 | ||
609 | if (!ch_ctx->patch_ctx.cpu_va) | 609 | if (!ch_ctx->patch_ctx.mem.cpu_va) |
610 | return -ENOMEM; | 610 | return -ENOMEM; |
611 | 611 | ||
612 | return 0; | 612 | return 0; |
@@ -616,13 +616,13 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | |||
616 | struct channel_ctx_gk20a *ch_ctx) | 616 | struct channel_ctx_gk20a *ch_ctx) |
617 | { | 617 | { |
618 | /* being defensive still... */ | 618 | /* being defensive still... */ |
619 | if (!ch_ctx->patch_ctx.cpu_va) { | 619 | if (!ch_ctx->patch_ctx.mem.cpu_va) { |
620 | gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); | 620 | gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); |
621 | return -EINVAL; | 621 | return -EINVAL; |
622 | } | 622 | } |
623 | 623 | ||
624 | vunmap(ch_ctx->patch_ctx.cpu_va); | 624 | vunmap(ch_ctx->patch_ctx.mem.cpu_va); |
625 | ch_ctx->patch_ctx.cpu_va = NULL; | 625 | ch_ctx->patch_ctx.mem.cpu_va = NULL; |
626 | return 0; | 626 | return 0; |
627 | } | 627 | } |
628 | 628 | ||
@@ -642,7 +642,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
642 | /* we added an optimization prolog, epilog | 642 | /* we added an optimization prolog, epilog |
643 | * to get rid of unnecessary maps and l2 invals. | 643 | * to get rid of unnecessary maps and l2 invals. |
644 | * but be defensive still... */ | 644 | * but be defensive still... */ |
645 | if (!ch_ctx->patch_ctx.cpu_va) { | 645 | if (!ch_ctx->patch_ctx.mem.cpu_va) { |
646 | int err; | 646 | int err; |
647 | gk20a_err(dev_from_gk20a(g), | 647 | gk20a_err(dev_from_gk20a(g), |
648 | "per-write ctx patch begin?"); | 648 | "per-write ctx patch begin?"); |
@@ -654,7 +654,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
654 | } else | 654 | } else |
655 | mapped_here = false; | 655 | mapped_here = false; |
656 | 656 | ||
657 | patch_ptr = ch_ctx->patch_ctx.cpu_va; | 657 | patch_ptr = ch_ctx->patch_ctx.mem.cpu_va; |
658 | patch_slot = ch_ctx->patch_ctx.data_count * 2; | 658 | patch_slot = ch_ctx->patch_ctx.data_count * 2; |
659 | 659 | ||
660 | gk20a_mem_wr32(patch_ptr, patch_slot++, addr); | 660 | gk20a_mem_wr32(patch_ptr, patch_slot++, addr); |
@@ -1622,8 +1622,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1622 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); | 1622 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); |
1623 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); | 1623 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); |
1624 | 1624 | ||
1625 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); | 1625 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); |
1626 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); | 1626 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); |
1627 | 1627 | ||
1628 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, | 1628 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, |
1629 | ch_ctx->patch_ctx.data_count); | 1629 | ch_ctx->patch_ctx.data_count); |
@@ -2568,82 +2568,33 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | |||
2568 | struct channel_gk20a *c) | 2568 | struct channel_gk20a *c) |
2569 | { | 2569 | { |
2570 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 2570 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; |
2571 | struct device *d = dev_from_gk20a(g); | ||
2572 | struct vm_gk20a *ch_vm = c->vm; | 2571 | struct vm_gk20a *ch_vm = c->vm; |
2573 | DEFINE_DMA_ATTRS(attrs); | ||
2574 | struct sg_table *sgt; | ||
2575 | int err = 0; | 2572 | int err = 0; |
2576 | dma_addr_t iova; | ||
2577 | 2573 | ||
2578 | gk20a_dbg_fn(""); | 2574 | gk20a_dbg_fn(""); |
2579 | 2575 | ||
2580 | patch_ctx->size = 128 * sizeof(u32); | 2576 | err = gk20a_gmmu_alloc_map_attr(ch_vm, DMA_ATTR_NO_KERNEL_MAPPING, |
2581 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | 2577 | 128 * sizeof(u32), &patch_ctx->mem); |
2582 | patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size, | ||
2583 | &iova, GFP_KERNEL, | ||
2584 | &attrs); | ||
2585 | if (!patch_ctx->pages) | ||
2586 | return -ENOMEM; | ||
2587 | |||
2588 | patch_ctx->iova = iova; | ||
2589 | err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages, | ||
2590 | patch_ctx->iova, patch_ctx->size); | ||
2591 | if (err) | 2578 | if (err) |
2592 | goto err_free; | 2579 | return err; |
2593 | |||
2594 | patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size, | ||
2595 | 0, gk20a_mem_flag_none); | ||
2596 | if (!patch_ctx->gpu_va) | ||
2597 | goto err_free_sgtable; | ||
2598 | |||
2599 | gk20a_free_sgtable(&sgt); | ||
2600 | 2580 | ||
2601 | gk20a_dbg_fn("done"); | 2581 | gk20a_dbg_fn("done"); |
2602 | return 0; | 2582 | return 0; |
2603 | |||
2604 | err_free_sgtable: | ||
2605 | gk20a_free_sgtable(&sgt); | ||
2606 | err_free: | ||
2607 | dma_free_attrs(d, patch_ctx->size, | ||
2608 | patch_ctx->pages, patch_ctx->iova, &attrs); | ||
2609 | patch_ctx->pages = NULL; | ||
2610 | patch_ctx->iova = 0; | ||
2611 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
2612 | return err; | ||
2613 | } | ||
2614 | |||
2615 | static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c) | ||
2616 | { | ||
2617 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | ||
2618 | struct vm_gk20a *ch_vm = c->vm; | ||
2619 | |||
2620 | gk20a_dbg_fn(""); | ||
2621 | |||
2622 | if (patch_ctx->gpu_va) | ||
2623 | gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va, | ||
2624 | patch_ctx->size, gk20a_mem_flag_none); | ||
2625 | patch_ctx->gpu_va = 0; | ||
2626 | patch_ctx->data_count = 0; | ||
2627 | } | 2583 | } |
2628 | 2584 | ||
2629 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) | 2585 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) |
2630 | { | 2586 | { |
2631 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 2587 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; |
2632 | struct gk20a *g = c->g; | 2588 | struct gk20a *g = c->g; |
2633 | struct device *d = dev_from_gk20a(g); | ||
2634 | DEFINE_DMA_ATTRS(attrs); | ||
2635 | 2589 | ||
2636 | gk20a_dbg_fn(""); | 2590 | gk20a_dbg_fn(""); |
2637 | 2591 | ||
2638 | gr_gk20a_unmap_channel_patch_ctx(c); | 2592 | if (patch_ctx->mem.gpu_va) |
2593 | gk20a_gmmu_unmap(c->vm, patch_ctx->mem.gpu_va, | ||
2594 | patch_ctx->mem.size, gk20a_mem_flag_none); | ||
2639 | 2595 | ||
2640 | if (patch_ctx->pages) { | 2596 | gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &patch_ctx->mem); |
2641 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | 2597 | patch_ctx->data_count = 0; |
2642 | dma_free_attrs(d, patch_ctx->size, | ||
2643 | patch_ctx->pages, patch_ctx->iova, &attrs); | ||
2644 | patch_ctx->pages = NULL; | ||
2645 | patch_ctx->iova = 0; | ||
2646 | } | ||
2647 | } | 2598 | } |
2648 | 2599 | ||
2649 | void gk20a_free_channel_ctx(struct channel_gk20a *c) | 2600 | void gk20a_free_channel_ctx(struct channel_gk20a *c) |
@@ -2757,7 +2708,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2757 | } | 2708 | } |
2758 | 2709 | ||
2759 | /* allocate patch buffer */ | 2710 | /* allocate patch buffer */ |
2760 | if (ch_ctx->patch_ctx.pages == NULL) { | 2711 | if (ch_ctx->patch_ctx.mem.sgt == NULL) { |
2761 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); | 2712 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); |
2762 | if (err) { | 2713 | if (err) { |
2763 | gk20a_err(dev_from_gk20a(g), | 2714 | gk20a_err(dev_from_gk20a(g), |
@@ -2883,7 +2834,7 @@ int gk20a_free_obj_ctx(struct channel_gk20a *c, | |||
2883 | gk20a_disable_channel(c, | 2834 | gk20a_disable_channel(c, |
2884 | !c->has_timedout, | 2835 | !c->has_timedout, |
2885 | timeout); | 2836 | timeout); |
2886 | gr_gk20a_unmap_channel_patch_ctx(c); | 2837 | gr_gk20a_free_channel_patch_ctx(c); |
2887 | } | 2838 | } |
2888 | 2839 | ||
2889 | return 0; | 2840 | return 0; |
@@ -6081,8 +6032,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6081 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 6032 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
6082 | addr, data, true); | 6033 | addr, data, true); |
6083 | 6034 | ||
6084 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); | 6035 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); |
6085 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); | 6036 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); |
6086 | 6037 | ||
6087 | gk20a_mem_wr32(context + | 6038 | gk20a_mem_wr32(context + |
6088 | ctxsw_prog_main_image_patch_count_o(), | 6039 | ctxsw_prog_main_image_patch_count_o(), |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index fa84337b..bde8afae 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1616,6 +1616,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | |||
1616 | } | 1616 | } |
1617 | 1617 | ||
1618 | mem->cpu_va = NULL; | 1618 | mem->cpu_va = NULL; |
1619 | mem->pages = NULL; | ||
1619 | } | 1620 | } |
1620 | 1621 | ||
1621 | if (mem->sgt) | 1622 | if (mem->sgt) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 91659537..df9ebec3 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -68,11 +68,7 @@ struct gpfifo_desc { | |||
68 | }; | 68 | }; |
69 | 69 | ||
70 | struct patch_desc { | 70 | struct patch_desc { |
71 | struct page **pages; | 71 | struct mem_desc mem; |
72 | u64 iova; | ||
73 | size_t size; | ||
74 | void *cpu_va; | ||
75 | u64 gpu_va; | ||
76 | u32 data_count; | 72 | u32 data_count; |
77 | }; | 73 | }; |
78 | 74 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index fd8bb81b..8d0bb6cf 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c | |||
@@ -321,18 +321,20 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
321 | 321 | ||
322 | gk20a_dbg_fn(""); | 322 | gk20a_dbg_fn(""); |
323 | 323 | ||
324 | patch_ctx->size = 128 * sizeof(u32); | 324 | patch_ctx->mem.size = 128 * sizeof(u32); |
325 | patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0); | 325 | patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm, |
326 | if (!patch_ctx->gpu_va) | 326 | patch_ctx->mem.size, 0); |
327 | if (!patch_ctx->mem.gpu_va) | ||
327 | return -ENOMEM; | 328 | return -ENOMEM; |
328 | 329 | ||
329 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX; | 330 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX; |
330 | msg.handle = platform->virt_handle; | 331 | msg.handle = platform->virt_handle; |
331 | p->handle = c->virt_ctx; | 332 | p->handle = c->virt_ctx; |
332 | p->patch_ctx_va = patch_ctx->gpu_va; | 333 | p->patch_ctx_va = patch_ctx->mem.gpu_va; |
333 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 334 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
334 | if (err || msg.ret) { | 335 | if (err || msg.ret) { |
335 | gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); | 336 | gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, |
337 | patch_ctx->mem.size, 0); | ||
336 | err = -ENOMEM; | 338 | err = -ENOMEM; |
337 | } | 339 | } |
338 | 340 | ||
@@ -347,7 +349,7 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) | |||
347 | 349 | ||
348 | gk20a_dbg_fn(""); | 350 | gk20a_dbg_fn(""); |
349 | 351 | ||
350 | if (patch_ctx->gpu_va) { | 352 | if (patch_ctx->mem.gpu_va) { |
351 | struct tegra_vgpu_cmd_msg msg; | 353 | struct tegra_vgpu_cmd_msg msg; |
352 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | 354 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
353 | int err; | 355 | int err; |
@@ -358,8 +360,9 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) | |||
358 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 360 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
359 | WARN_ON(err || msg.ret); | 361 | WARN_ON(err || msg.ret); |
360 | 362 | ||
361 | gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); | 363 | gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, |
362 | patch_ctx->gpu_va = 0; | 364 | patch_ctx->mem.size, 0); |
365 | patch_ctx->mem.gpu_va = 0; | ||
363 | } | 366 | } |
364 | } | 367 | } |
365 | 368 | ||
@@ -438,7 +441,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, | |||
438 | } | 441 | } |
439 | 442 | ||
440 | /* allocate patch buffer */ | 443 | /* allocate patch buffer */ |
441 | if (ch_ctx->patch_ctx.pages == NULL) { | 444 | if (ch_ctx->patch_ctx.mem.pages == NULL) { |
442 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); | 445 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); |
443 | if (err) { | 446 | if (err) { |
444 | gk20a_err(dev_from_gk20a(g), | 447 | gk20a_err(dev_from_gk20a(g), |