diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 66 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 74 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 14 |
9 files changed, 105 insertions, 82 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 18f2e896..ad452919 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -143,8 +143,8 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c) | |||
143 | 143 | ||
144 | gk20a_mem_wr32(g, &c->inst_block, | 144 | gk20a_mem_wr32(g, &c->inst_block, |
145 | ram_in_ramfc_w() + ram_fc_userd_w(), | 145 | ram_in_ramfc_w() + ram_fc_userd_w(), |
146 | (g->mm.vidmem_is_vidmem ? | 146 | gk20a_aperture_mask(g, &g->fifo.userd, |
147 | pbdma_userd_target_sys_mem_ncoh_f() : | 147 | pbdma_userd_target_sys_mem_ncoh_f(), |
148 | pbdma_userd_target_vid_mem_f()) | | 148 | pbdma_userd_target_vid_mem_f()) | |
149 | pbdma_userd_addr_f(addr_lo)); | 149 | pbdma_userd_addr_f(addr_lo)); |
150 | 150 | ||
@@ -360,8 +360,8 @@ static void channel_gk20a_bind(struct channel_gk20a *c) | |||
360 | 360 | ||
361 | gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), | 361 | gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), |
362 | ccsr_channel_inst_ptr_f(inst_ptr) | | 362 | ccsr_channel_inst_ptr_f(inst_ptr) | |
363 | (g->mm.vidmem_is_vidmem ? | 363 | gk20a_aperture_mask(g, &c->inst_block, |
364 | ccsr_channel_inst_target_sys_mem_ncoh_f() : | 364 | ccsr_channel_inst_target_sys_mem_ncoh_f(), |
365 | ccsr_channel_inst_target_vid_mem_f()) | | 365 | ccsr_channel_inst_target_vid_mem_f()) | |
366 | ccsr_channel_inst_bind_true_f()); | 366 | ccsr_channel_inst_bind_true_f()); |
367 | 367 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 04695d11..0e006cb6 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -2751,8 +2751,8 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2751 | if (count != 0) { | 2751 | if (count != 0) { |
2752 | gk20a_writel(g, fifo_runlist_base_r(), | 2752 | gk20a_writel(g, fifo_runlist_base_r(), |
2753 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | | 2753 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | |
2754 | (g->mm.vidmem_is_vidmem ? | 2754 | gk20a_aperture_mask(g, &runlist->mem[new_buf], |
2755 | fifo_runlist_base_target_sys_mem_ncoh_f() : | 2755 | fifo_runlist_base_target_sys_mem_ncoh_f(), |
2756 | fifo_runlist_base_target_vid_mem_f())); | 2756 | fifo_runlist_base_target_vid_mem_f())); |
2757 | } | 2757 | } |
2758 | 2758 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7bd9775e..45e16ad9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -521,11 +521,11 @@ struct gpu_ops { | |||
521 | void (*remove_bar2_vm)(struct gk20a *g); | 521 | void (*remove_bar2_vm)(struct gk20a *g); |
522 | const struct gk20a_mmu_level * | 522 | const struct gk20a_mmu_level * |
523 | (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); | 523 | (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); |
524 | void (*init_pdb)(struct gk20a *g, struct mem_desc *mem, | 524 | void (*init_pdb)(struct gk20a *g, struct mem_desc *inst_block, |
525 | u64 pdb_addr); | 525 | struct vm_gk20a *vm); |
526 | u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, | 526 | u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, |
527 | u32 flags); | 527 | u32 flags); |
528 | int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); | 528 | int (*bar1_bind)(struct gk20a *g, struct mem_desc *bar1_inst); |
529 | size_t (*get_vidmem_size)(struct gk20a *g); | 529 | size_t (*get_vidmem_size)(struct gk20a *g); |
530 | } mm; | 530 | } mm; |
531 | struct { | 531 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 9790af05..bdc65cab 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -674,11 +674,24 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
674 | } | 674 | } |
675 | } | 675 | } |
676 | 676 | ||
677 | static u32 fecs_current_ctx_data(struct gk20a *g, struct mem_desc *inst_block) | ||
678 | { | ||
679 | u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block) | ||
680 | >> ram_in_base_shift_v()); | ||
681 | u32 aperture = gk20a_aperture_mask(g, inst_block, | ||
682 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), | ||
683 | gr_fecs_current_ctx_target_vid_mem_f()); | ||
684 | |||
685 | return gr_fecs_current_ctx_ptr_f(ptr) | aperture | | ||
686 | gr_fecs_current_ctx_valid_f(1); | ||
687 | } | ||
688 | |||
677 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | 689 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, |
678 | struct channel_gk20a *c) | 690 | struct channel_gk20a *c) |
679 | { | 691 | { |
680 | u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) | 692 | u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) |
681 | >> ram_in_base_shift_v()); | 693 | >> ram_in_base_shift_v()); |
694 | u32 data = fecs_current_ctx_data(g, &c->inst_block); | ||
682 | u32 ret; | 695 | u32 ret; |
683 | 696 | ||
684 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | 697 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", |
@@ -687,11 +700,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | |||
687 | ret = gr_gk20a_submit_fecs_method_op(g, | 700 | ret = gr_gk20a_submit_fecs_method_op(g, |
688 | (struct fecs_method_op_gk20a) { | 701 | (struct fecs_method_op_gk20a) { |
689 | .method.addr = gr_fecs_method_push_adr_bind_pointer_v(), | 702 | .method.addr = gr_fecs_method_push_adr_bind_pointer_v(), |
690 | .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | | 703 | .method.data = data, |
691 | (g->mm.vidmem_is_vidmem ? | ||
692 | gr_fecs_current_ctx_target_sys_mem_ncoh_f() : | ||
693 | gr_fecs_current_ctx_target_vid_mem_f()) | | ||
694 | gr_fecs_current_ctx_valid_f(1)), | ||
695 | .mailbox = { .id = 0, .data = 0, | 704 | .mailbox = { .id = 0, .data = 0, |
696 | .clr = 0x30, | 705 | .clr = 0x30, |
697 | .ret = NULL, | 706 | .ret = NULL, |
@@ -1392,21 +1401,12 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) | |||
1392 | struct gk20a *g = c->g; | 1401 | struct gk20a *g = c->g; |
1393 | int ret; | 1402 | int ret; |
1394 | 1403 | ||
1395 | u32 inst_base_ptr = | ||
1396 | u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) | ||
1397 | >> ram_in_base_shift_v()); | ||
1398 | |||
1399 | |||
1400 | gk20a_dbg_fn(""); | 1404 | gk20a_dbg_fn(""); |
1401 | 1405 | ||
1402 | ret = gr_gk20a_submit_fecs_method_op(g, | 1406 | ret = gr_gk20a_submit_fecs_method_op(g, |
1403 | (struct fecs_method_op_gk20a) { | 1407 | (struct fecs_method_op_gk20a) { |
1404 | .method.addr = save_type, | 1408 | .method.addr = save_type, |
1405 | .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | | 1409 | .method.data = fecs_current_ctx_data(g, &c->inst_block), |
1406 | (g->mm.vidmem_is_vidmem ? | ||
1407 | gr_fecs_current_ctx_target_sys_mem_ncoh_f() : | ||
1408 | gr_fecs_current_ctx_target_vid_mem_f()) | | ||
1409 | gr_fecs_current_ctx_valid_f(1)), | ||
1410 | .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL, | 1410 | .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL, |
1411 | .ok = 1, .fail = 2, | 1411 | .ok = 1, .fail = 2, |
1412 | }, | 1412 | }, |
@@ -1987,18 +1987,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1987 | gk20a_mem_end(g, mem); | 1987 | gk20a_mem_end(g, mem); |
1988 | 1988 | ||
1989 | if (tegra_platform_is_linsim()) { | 1989 | if (tegra_platform_is_linsim()) { |
1990 | u32 inst_base_ptr = | 1990 | u32 mdata = fecs_current_ctx_data(g, &c->inst_block); |
1991 | u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) | ||
1992 | >> ram_in_base_shift_v()); | ||
1993 | 1991 | ||
1994 | ret = gr_gk20a_submit_fecs_method_op(g, | 1992 | ret = gr_gk20a_submit_fecs_method_op(g, |
1995 | (struct fecs_method_op_gk20a) { | 1993 | (struct fecs_method_op_gk20a) { |
1996 | .method.data = | 1994 | .method.data = mdata, |
1997 | (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | | ||
1998 | (g->mm.vidmem_is_vidmem ? | ||
1999 | gr_fecs_current_ctx_target_sys_mem_ncoh_f() : | ||
2000 | gr_fecs_current_ctx_target_vid_mem_f()) | | ||
2001 | gr_fecs_current_ctx_valid_f(1)), | ||
2002 | .method.addr = | 1995 | .method.addr = |
2003 | gr_fecs_method_push_adr_restore_golden_v(), | 1996 | gr_fecs_method_push_adr_restore_golden_v(), |
2004 | .mailbox = { | 1997 | .mailbox = { |
@@ -4507,8 +4500,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4507 | addr >>= fb_mmu_debug_wr_addr_alignment_v(); | 4500 | addr >>= fb_mmu_debug_wr_addr_alignment_v(); |
4508 | 4501 | ||
4509 | gk20a_writel(g, fb_mmu_debug_wr_r(), | 4502 | gk20a_writel(g, fb_mmu_debug_wr_r(), |
4510 | (g->mm.vidmem_is_vidmem ? | 4503 | gk20a_aperture_mask(g, &gr->mmu_wr_mem, |
4511 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f() : | 4504 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), |
4512 | fb_mmu_debug_wr_aperture_vid_mem_f()) | | 4505 | fb_mmu_debug_wr_aperture_vid_mem_f()) | |
4513 | fb_mmu_debug_wr_vol_false_f() | | 4506 | fb_mmu_debug_wr_vol_false_f() | |
4514 | fb_mmu_debug_wr_addr_f(addr)); | 4507 | fb_mmu_debug_wr_addr_f(addr)); |
@@ -4517,8 +4510,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4517 | addr >>= fb_mmu_debug_rd_addr_alignment_v(); | 4510 | addr >>= fb_mmu_debug_rd_addr_alignment_v(); |
4518 | 4511 | ||
4519 | gk20a_writel(g, fb_mmu_debug_rd_r(), | 4512 | gk20a_writel(g, fb_mmu_debug_rd_r(), |
4520 | (g->mm.vidmem_is_vidmem ? | 4513 | gk20a_aperture_mask(g, &gr->mmu_rd_mem, |
4521 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f() : | 4514 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), |
4522 | fb_mmu_debug_rd_aperture_vid_mem_f()) | | 4515 | fb_mmu_debug_rd_aperture_vid_mem_f()) | |
4523 | fb_mmu_debug_rd_vol_false_f() | | 4516 | fb_mmu_debug_rd_vol_false_f() | |
4524 | fb_mmu_debug_rd_addr_f(addr)); | 4517 | fb_mmu_debug_rd_addr_f(addr)); |
@@ -4966,8 +4959,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) | |||
4966 | } | 4959 | } |
4967 | 4960 | ||
4968 | 4961 | ||
4969 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, | 4962 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, &mm->pmu.inst_block); |
4970 | gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block)); | ||
4971 | if (err) { | 4963 | if (err) { |
4972 | gk20a_err(dev_from_gk20a(g), | 4964 | gk20a_err(dev_from_gk20a(g), |
4973 | "fail to bind pmu inst to gr"); | 4965 | "fail to bind pmu inst to gr"); |
@@ -5245,8 +5237,7 @@ int gk20a_gr_reset(struct gk20a *g) | |||
5245 | return err; | 5237 | return err; |
5246 | } | 5238 | } |
5247 | 5239 | ||
5248 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, | 5240 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, &g->mm.pmu.inst_block); |
5249 | gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)); | ||
5250 | if (err) { | 5241 | if (err) { |
5251 | gk20a_err(dev_from_gk20a(g), | 5242 | gk20a_err(dev_from_gk20a(g), |
5252 | "fail to bind pmu inst to gr"); | 5243 | "fail to bind pmu inst to gr"); |
@@ -6346,16 +6337,15 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size) | |||
6346 | .mailbox.fail = 0}, false); | 6337 | .mailbox.fail = 0}, false); |
6347 | } | 6338 | } |
6348 | 6339 | ||
6349 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) | 6340 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, |
6341 | struct mem_desc *inst_block) | ||
6350 | { | 6342 | { |
6343 | u32 data = fecs_current_ctx_data(g, inst_block); | ||
6344 | |||
6351 | return gr_gk20a_submit_fecs_method_op(g, | 6345 | return gr_gk20a_submit_fecs_method_op(g, |
6352 | (struct fecs_method_op_gk20a){ | 6346 | (struct fecs_method_op_gk20a){ |
6353 | .mailbox.id = 4, | 6347 | .mailbox.id = 4, |
6354 | .mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) | | 6348 | .mailbox.data = data, |
6355 | gr_fecs_current_ctx_valid_f(1) | | ||
6356 | (g->mm.vidmem_is_vidmem ? | ||
6357 | gr_fecs_current_ctx_target_sys_mem_ncoh_f() : | ||
6358 | gr_fecs_current_ctx_target_vid_mem_f())), | ||
6359 | .mailbox.clr = ~0, | 6349 | .mailbox.clr = ~0, |
6360 | .method.data = 1, | 6350 | .method.data = 1, |
6361 | .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(), | 6351 | .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(), |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index ad6d8049..aa83f85a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -476,7 +476,8 @@ int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr); | |||
476 | 476 | ||
477 | /* pmu */ | 477 | /* pmu */ |
478 | int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); | 478 | int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); |
479 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr); | 479 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, |
480 | struct mem_desc *inst_block); | ||
480 | int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); | 481 | int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); |
481 | 482 | ||
482 | void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); | 483 | void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index e0b9a720..a274820a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -92,10 +92,10 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 92 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
94 | u32 lo = (u32)(addr & 0xfffff); | 94 | u32 lo = (u32)(addr & 0xfffff); |
95 | u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? | 95 | u32 win = gk20a_aperture_mask(g, mem, |
96 | bus_bar0_window_target_sys_mem_noncoherent_f() : | 96 | bus_bar0_window_target_sys_mem_noncoherent_f(), |
97 | bus_bar0_window_target_vid_mem_f()) | | 97 | bus_bar0_window_target_vid_mem_f()) | |
98 | bus_bar0_window_base_f(hi); | 98 | bus_bar0_window_base_f(hi); |
99 | 99 | ||
100 | gk20a_dbg(gpu_dbg_mem, | 100 | gk20a_dbg(gpu_dbg_mem, |
101 | "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", | 101 | "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", |
@@ -817,8 +817,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
817 | int gk20a_init_mm_setup_hw(struct gk20a *g) | 817 | int gk20a_init_mm_setup_hw(struct gk20a *g) |
818 | { | 818 | { |
819 | struct mm_gk20a *mm = &g->mm; | 819 | struct mm_gk20a *mm = &g->mm; |
820 | struct mem_desc *inst_block = &mm->bar1.inst_block; | ||
821 | u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block); | ||
822 | int err; | 820 | int err; |
823 | 821 | ||
824 | gk20a_dbg_fn(""); | 822 | gk20a_dbg_fn(""); |
@@ -832,7 +830,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
832 | g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) | 830 | g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) |
833 | >> 8); | 831 | >> 8); |
834 | 832 | ||
835 | g->ops.mm.bar1_bind(g, inst_pa); | 833 | g->ops.mm.bar1_bind(g, &mm->bar1.inst_block); |
836 | 834 | ||
837 | if (g->ops.mm.init_bar2_mm_hw_setup) { | 835 | if (g->ops.mm.init_bar2_mm_hw_setup) { |
838 | err = g->ops.mm.init_bar2_mm_hw_setup(g); | 836 | err = g->ops.mm.init_bar2_mm_hw_setup(g); |
@@ -847,17 +845,19 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
847 | return 0; | 845 | return 0; |
848 | } | 846 | } |
849 | 847 | ||
850 | static int gk20a_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) | 848 | static int gk20a_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst) |
851 | { | 849 | { |
852 | u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); | 850 | u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); |
853 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); | 851 | u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); |
852 | |||
853 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); | ||
854 | 854 | ||
855 | gk20a_writel(g, bus_bar1_block_r(), | 855 | gk20a_writel(g, bus_bar1_block_r(), |
856 | (g->mm.vidmem_is_vidmem ? | 856 | gk20a_aperture_mask(g, bar1_inst, |
857 | bus_bar1_block_target_sys_mem_ncoh_f() : | 857 | bus_bar1_block_target_sys_mem_ncoh_f(), |
858 | bus_bar1_block_target_vid_mem_f()) | | 858 | bus_bar1_block_target_vid_mem_f()) | |
859 | bus_bar1_block_mode_virtual_f() | | 859 | bus_bar1_block_mode_virtual_f() | |
860 | bus_bar1_block_ptr_f(inst_pa)); | 860 | bus_bar1_block_ptr_f(ptr_v)); |
861 | 861 | ||
862 | return 0; | 862 | return 0; |
863 | } | 863 | } |
@@ -2559,6 +2559,29 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | |||
2559 | return gk20a_gmmu_free_attr(g, 0, mem); | 2559 | return gk20a_gmmu_free_attr(g, 0, mem); |
2560 | } | 2560 | } |
2561 | 2561 | ||
2562 | u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | ||
2563 | u32 sysmem_mask, u32 vidmem_mask) | ||
2564 | { | ||
2565 | switch (aperture) { | ||
2566 | case APERTURE_SYSMEM: | ||
2567 | /* sysmem for dgpus; some igpus consider system memory vidmem */ | ||
2568 | return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask; | ||
2569 | case APERTURE_VIDMEM: | ||
2570 | /* for dgpus only */ | ||
2571 | return vidmem_mask; | ||
2572 | case APERTURE_INVALID: | ||
2573 | WARN_ON("Bad aperture"); | ||
2574 | } | ||
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, | ||
2579 | u32 sysmem_mask, u32 vidmem_mask) | ||
2580 | { | ||
2581 | return __gk20a_aperture_mask(g, mem->aperture, | ||
2582 | sysmem_mask, vidmem_mask); | ||
2583 | } | ||
2584 | |||
2562 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) | 2585 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) |
2563 | { | 2586 | { |
2564 | return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); | 2587 | return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); |
@@ -4049,19 +4072,23 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) | |||
4049 | false, false, "cde"); | 4072 | false, false, "cde"); |
4050 | } | 4073 | } |
4051 | 4074 | ||
4052 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) | 4075 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block, |
4076 | struct vm_gk20a *vm) | ||
4053 | { | 4077 | { |
4078 | u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | ||
4054 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 4079 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
4055 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 4080 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
4056 | 4081 | ||
4057 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), | 4082 | gk20a_dbg_info("pde pa=0x%llx", pdb_addr); |
4058 | (g->mm.vidmem_is_vidmem ? | 4083 | |
4059 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 4084 | gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), |
4085 | gk20a_aperture_mask(g, &vm->pdb.mem, | ||
4086 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), | ||
4060 | ram_in_page_dir_base_target_vid_mem_f()) | | 4087 | ram_in_page_dir_base_target_vid_mem_f()) | |
4061 | ram_in_page_dir_base_vol_true_f() | | 4088 | ram_in_page_dir_base_vol_true_f() | |
4062 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | 4089 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); |
4063 | 4090 | ||
4064 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), | 4091 | gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), |
4065 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 4092 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
4066 | } | 4093 | } |
4067 | 4094 | ||
@@ -4069,14 +4096,11 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, | |||
4069 | u32 big_page_size) | 4096 | u32 big_page_size) |
4070 | { | 4097 | { |
4071 | struct gk20a *g = gk20a_from_vm(vm); | 4098 | struct gk20a *g = gk20a_from_vm(vm); |
4072 | u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | ||
4073 | 4099 | ||
4074 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | 4100 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", |
4075 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); | 4101 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); |
4076 | 4102 | ||
4077 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); | 4103 | g->ops.mm.init_pdb(g, inst_block, vm); |
4078 | |||
4079 | g->ops.mm.init_pdb(g, inst_block, pde_addr); | ||
4080 | 4104 | ||
4081 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), | 4105 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), |
4082 | u64_lo32(vm->va_limit - 1) & ~0xfff); | 4106 | u64_lo32(vm->va_limit - 1) & ~0xfff); |
@@ -4311,7 +4335,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | |||
4311 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | 4335 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) |
4312 | { | 4336 | { |
4313 | struct gk20a *g = gk20a_from_vm(vm); | 4337 | struct gk20a *g = gk20a_from_vm(vm); |
4314 | u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(vm->mm->g, | 4338 | u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(g, |
4315 | vm->pdb.mem.sgt->sgl, 0) >> 12); | 4339 | vm->pdb.mem.sgt->sgl, 0) >> 12); |
4316 | u32 data; | 4340 | u32 data; |
4317 | s32 retry = 2000; | 4341 | s32 retry = 2000; |
@@ -4348,8 +4372,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
4348 | 4372 | ||
4349 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | 4373 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), |
4350 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | 4374 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | |
4351 | (g->mm.vidmem_is_vidmem ? | 4375 | gk20a_aperture_mask(g, &vm->pdb.mem, |
4352 | fb_mmu_invalidate_pdb_aperture_sys_mem_f() : | 4376 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), |
4353 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); | 4377 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); |
4354 | 4378 | ||
4355 | gk20a_writel(g, fb_mmu_invalidate_r(), | 4379 | gk20a_writel(g, fb_mmu_invalidate_r(), |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7bb4d011..7d3b371c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -600,6 +600,11 @@ static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem) | |||
600 | return 0; | 600 | return 0; |
601 | } | 601 | } |
602 | 602 | ||
603 | u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | ||
604 | u32 sysmem_mask, u32 vidmem_mask); | ||
605 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, | ||
606 | u32 sysmem_mask, u32 vidmem_mask); | ||
607 | |||
603 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 608 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
604 | u64 map_offset, | 609 | u64 map_offset, |
605 | struct sg_table *sgt, | 610 | struct sg_table *sgt, |
@@ -767,7 +772,8 @@ struct gpu_ops; | |||
767 | void gk20a_init_mm(struct gpu_ops *gops); | 772 | void gk20a_init_mm(struct gpu_ops *gops); |
768 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | 773 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, |
769 | u32 big_page_size); | 774 | u32 big_page_size); |
770 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr); | 775 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, |
776 | struct vm_gk20a *vm); | ||
771 | 777 | ||
772 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); | 778 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); |
773 | 779 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 00db510a..26306efc 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -35,8 +35,8 @@ static void channel_gm20b_bind(struct channel_gk20a *c) | |||
35 | 35 | ||
36 | gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), | 36 | gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), |
37 | ccsr_channel_inst_ptr_f(inst_ptr) | | 37 | ccsr_channel_inst_ptr_f(inst_ptr) | |
38 | (g->mm.vidmem_is_vidmem ? | 38 | gk20a_aperture_mask(g, &c->inst_block, |
39 | ccsr_channel_inst_target_sys_mem_ncoh_f() : | 39 | ccsr_channel_inst_target_sys_mem_ncoh_f(), |
40 | ccsr_channel_inst_target_vid_mem_f()) | | 40 | ccsr_channel_inst_target_vid_mem_f()) | |
41 | ccsr_channel_inst_bind_true_f()); | 41 | ccsr_channel_inst_bind_true_f()); |
42 | 42 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 726d73ed..8aa14662 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -135,18 +135,20 @@ static bool gm20b_mm_support_sparse(struct gk20a *g) | |||
135 | return true; | 135 | return true; |
136 | } | 136 | } |
137 | 137 | ||
138 | static int gm20b_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) | 138 | static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst) |
139 | { | 139 | { |
140 | int retry = 1000; | 140 | int retry = 1000; |
141 | u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); | 141 | u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); |
142 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); | 142 | u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); |
143 | |||
144 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); | ||
143 | 145 | ||
144 | gk20a_writel(g, bus_bar1_block_r(), | 146 | gk20a_writel(g, bus_bar1_block_r(), |
145 | (g->mm.vidmem_is_vidmem ? | 147 | gk20a_aperture_mask(g, bar1_inst, |
146 | bus_bar1_block_target_sys_mem_ncoh_f() : | 148 | bus_bar1_block_target_sys_mem_ncoh_f(), |
147 | bus_bar1_block_target_vid_mem_f()) | | 149 | bus_bar1_block_target_vid_mem_f()) | |
148 | bus_bar1_block_mode_virtual_f() | | 150 | bus_bar1_block_mode_virtual_f() | |
149 | bus_bar1_block_ptr_f(inst_pa)); | 151 | bus_bar1_block_ptr_f(ptr_v)); |
150 | do { | 152 | do { |
151 | u32 val = gk20a_readl(g, bus_bind_status_r()); | 153 | u32 val = gk20a_readl(g, bus_bind_status_r()); |
152 | u32 pending = bus_bind_status_bar1_pending_v(val); | 154 | u32 pending = bus_bind_status_bar1_pending_v(val); |