summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-06-17 08:51:02 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-07-05 02:10:59 -0400
commite12c5c8594c429357427130389da632284d79bcc (patch)
tree453043237ef411370a02ec03c6857b63480c019b /drivers/gpu
parentcd5a1dc315abd0a7db4136ee0e6b0c03f0882937 (diff)
gpu: nvgpu: initial support for vidmem apertures
add gk20a_aperture_mask() for memory target selection now that buffers can actually be allocated from vidmem, and use it in all cases that have a mem_desc available. Jira DNVGPU-76 Change-Id: I4353cdc6e1e79488f0875581cfaf2a5cfb8c976a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1169306 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c66
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c74
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c4
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c14
9 files changed, 105 insertions, 82 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 18f2e896..ad452919 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -143,8 +143,8 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
143 143
144 gk20a_mem_wr32(g, &c->inst_block, 144 gk20a_mem_wr32(g, &c->inst_block,
145 ram_in_ramfc_w() + ram_fc_userd_w(), 145 ram_in_ramfc_w() + ram_fc_userd_w(),
146 (g->mm.vidmem_is_vidmem ? 146 gk20a_aperture_mask(g, &g->fifo.userd,
147 pbdma_userd_target_sys_mem_ncoh_f() : 147 pbdma_userd_target_sys_mem_ncoh_f(),
148 pbdma_userd_target_vid_mem_f()) | 148 pbdma_userd_target_vid_mem_f()) |
149 pbdma_userd_addr_f(addr_lo)); 149 pbdma_userd_addr_f(addr_lo));
150 150
@@ -360,8 +360,8 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
360 360
361 gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), 361 gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
362 ccsr_channel_inst_ptr_f(inst_ptr) | 362 ccsr_channel_inst_ptr_f(inst_ptr) |
363 (g->mm.vidmem_is_vidmem ? 363 gk20a_aperture_mask(g, &c->inst_block,
364 ccsr_channel_inst_target_sys_mem_ncoh_f() : 364 ccsr_channel_inst_target_sys_mem_ncoh_f(),
365 ccsr_channel_inst_target_vid_mem_f()) | 365 ccsr_channel_inst_target_vid_mem_f()) |
366 ccsr_channel_inst_bind_true_f()); 366 ccsr_channel_inst_bind_true_f());
367 367
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 04695d11..0e006cb6 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -2751,8 +2751,8 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2751 if (count != 0) { 2751 if (count != 0) {
2752 gk20a_writel(g, fifo_runlist_base_r(), 2752 gk20a_writel(g, fifo_runlist_base_r(),
2753 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | 2753 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
2754 (g->mm.vidmem_is_vidmem ? 2754 gk20a_aperture_mask(g, &runlist->mem[new_buf],
2755 fifo_runlist_base_target_sys_mem_ncoh_f() : 2755 fifo_runlist_base_target_sys_mem_ncoh_f(),
2756 fifo_runlist_base_target_vid_mem_f())); 2756 fifo_runlist_base_target_vid_mem_f()));
2757 } 2757 }
2758 2758
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 7bd9775e..45e16ad9 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -521,11 +521,11 @@ struct gpu_ops {
521 void (*remove_bar2_vm)(struct gk20a *g); 521 void (*remove_bar2_vm)(struct gk20a *g);
522 const struct gk20a_mmu_level * 522 const struct gk20a_mmu_level *
523 (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); 523 (*get_mmu_levels)(struct gk20a *g, u32 big_page_size);
524 void (*init_pdb)(struct gk20a *g, struct mem_desc *mem, 524 void (*init_pdb)(struct gk20a *g, struct mem_desc *inst_block,
525 u64 pdb_addr); 525 struct vm_gk20a *vm);
526 u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, 526 u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl,
527 u32 flags); 527 u32 flags);
528 int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); 528 int (*bar1_bind)(struct gk20a *g, struct mem_desc *bar1_inst);
529 size_t (*get_vidmem_size)(struct gk20a *g); 529 size_t (*get_vidmem_size)(struct gk20a *g);
530 } mm; 530 } mm;
531 struct { 531 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 9790af05..bdc65cab 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -674,11 +674,24 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
674 } 674 }
675} 675}
676 676
677static u32 fecs_current_ctx_data(struct gk20a *g, struct mem_desc *inst_block)
678{
679 u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block)
680 >> ram_in_base_shift_v());
681 u32 aperture = gk20a_aperture_mask(g, inst_block,
682 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
683 gr_fecs_current_ctx_target_vid_mem_f());
684
685 return gr_fecs_current_ctx_ptr_f(ptr) | aperture |
686 gr_fecs_current_ctx_valid_f(1);
687}
688
677static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, 689static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
678 struct channel_gk20a *c) 690 struct channel_gk20a *c)
679{ 691{
680 u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) 692 u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
681 >> ram_in_base_shift_v()); 693 >> ram_in_base_shift_v());
694 u32 data = fecs_current_ctx_data(g, &c->inst_block);
682 u32 ret; 695 u32 ret;
683 696
684 gk20a_dbg_info("bind channel %d inst ptr 0x%08x", 697 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
@@ -687,11 +700,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
687 ret = gr_gk20a_submit_fecs_method_op(g, 700 ret = gr_gk20a_submit_fecs_method_op(g,
688 (struct fecs_method_op_gk20a) { 701 (struct fecs_method_op_gk20a) {
689 .method.addr = gr_fecs_method_push_adr_bind_pointer_v(), 702 .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
690 .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | 703 .method.data = data,
691 (g->mm.vidmem_is_vidmem ?
692 gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
693 gr_fecs_current_ctx_target_vid_mem_f()) |
694 gr_fecs_current_ctx_valid_f(1)),
695 .mailbox = { .id = 0, .data = 0, 704 .mailbox = { .id = 0, .data = 0,
696 .clr = 0x30, 705 .clr = 0x30,
697 .ret = NULL, 706 .ret = NULL,
@@ -1392,21 +1401,12 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1392 struct gk20a *g = c->g; 1401 struct gk20a *g = c->g;
1393 int ret; 1402 int ret;
1394 1403
1395 u32 inst_base_ptr =
1396 u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
1397 >> ram_in_base_shift_v());
1398
1399
1400 gk20a_dbg_fn(""); 1404 gk20a_dbg_fn("");
1401 1405
1402 ret = gr_gk20a_submit_fecs_method_op(g, 1406 ret = gr_gk20a_submit_fecs_method_op(g,
1403 (struct fecs_method_op_gk20a) { 1407 (struct fecs_method_op_gk20a) {
1404 .method.addr = save_type, 1408 .method.addr = save_type,
1405 .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | 1409 .method.data = fecs_current_ctx_data(g, &c->inst_block),
1406 (g->mm.vidmem_is_vidmem ?
1407 gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
1408 gr_fecs_current_ctx_target_vid_mem_f()) |
1409 gr_fecs_current_ctx_valid_f(1)),
1410 .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL, 1410 .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
1411 .ok = 1, .fail = 2, 1411 .ok = 1, .fail = 2,
1412 }, 1412 },
@@ -1987,18 +1987,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1987 gk20a_mem_end(g, mem); 1987 gk20a_mem_end(g, mem);
1988 1988
1989 if (tegra_platform_is_linsim()) { 1989 if (tegra_platform_is_linsim()) {
1990 u32 inst_base_ptr = 1990 u32 mdata = fecs_current_ctx_data(g, &c->inst_block);
1991 u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
1992 >> ram_in_base_shift_v());
1993 1991
1994 ret = gr_gk20a_submit_fecs_method_op(g, 1992 ret = gr_gk20a_submit_fecs_method_op(g,
1995 (struct fecs_method_op_gk20a) { 1993 (struct fecs_method_op_gk20a) {
1996 .method.data = 1994 .method.data = mdata,
1997 (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1998 (g->mm.vidmem_is_vidmem ?
1999 gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
2000 gr_fecs_current_ctx_target_vid_mem_f()) |
2001 gr_fecs_current_ctx_valid_f(1)),
2002 .method.addr = 1995 .method.addr =
2003 gr_fecs_method_push_adr_restore_golden_v(), 1996 gr_fecs_method_push_adr_restore_golden_v(),
2004 .mailbox = { 1997 .mailbox = {
@@ -4507,8 +4500,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4507 addr >>= fb_mmu_debug_wr_addr_alignment_v(); 4500 addr >>= fb_mmu_debug_wr_addr_alignment_v();
4508 4501
4509 gk20a_writel(g, fb_mmu_debug_wr_r(), 4502 gk20a_writel(g, fb_mmu_debug_wr_r(),
4510 (g->mm.vidmem_is_vidmem ? 4503 gk20a_aperture_mask(g, &gr->mmu_wr_mem,
4511 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f() : 4504 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4512 fb_mmu_debug_wr_aperture_vid_mem_f()) | 4505 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4513 fb_mmu_debug_wr_vol_false_f() | 4506 fb_mmu_debug_wr_vol_false_f() |
4514 fb_mmu_debug_wr_addr_f(addr)); 4507 fb_mmu_debug_wr_addr_f(addr));
@@ -4517,8 +4510,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4517 addr >>= fb_mmu_debug_rd_addr_alignment_v(); 4510 addr >>= fb_mmu_debug_rd_addr_alignment_v();
4518 4511
4519 gk20a_writel(g, fb_mmu_debug_rd_r(), 4512 gk20a_writel(g, fb_mmu_debug_rd_r(),
4520 (g->mm.vidmem_is_vidmem ? 4513 gk20a_aperture_mask(g, &gr->mmu_rd_mem,
4521 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f() : 4514 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4522 fb_mmu_debug_rd_aperture_vid_mem_f()) | 4515 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4523 fb_mmu_debug_rd_vol_false_f() | 4516 fb_mmu_debug_rd_vol_false_f() |
4524 fb_mmu_debug_rd_addr_f(addr)); 4517 fb_mmu_debug_rd_addr_f(addr));
@@ -4966,8 +4959,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4966 } 4959 }
4967 4960
4968 4961
4969 err = gr_gk20a_fecs_set_reglist_bind_inst(g, 4962 err = gr_gk20a_fecs_set_reglist_bind_inst(g, &mm->pmu.inst_block);
4970 gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block));
4971 if (err) { 4963 if (err) {
4972 gk20a_err(dev_from_gk20a(g), 4964 gk20a_err(dev_from_gk20a(g),
4973 "fail to bind pmu inst to gr"); 4965 "fail to bind pmu inst to gr");
@@ -5245,8 +5237,7 @@ int gk20a_gr_reset(struct gk20a *g)
5245 return err; 5237 return err;
5246 } 5238 }
5247 5239
5248 err = gr_gk20a_fecs_set_reglist_bind_inst(g, 5240 err = gr_gk20a_fecs_set_reglist_bind_inst(g, &g->mm.pmu.inst_block);
5249 gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block));
5250 if (err) { 5241 if (err) {
5251 gk20a_err(dev_from_gk20a(g), 5242 gk20a_err(dev_from_gk20a(g),
5252 "fail to bind pmu inst to gr"); 5243 "fail to bind pmu inst to gr");
@@ -6346,16 +6337,15 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
6346 .mailbox.fail = 0}, false); 6337 .mailbox.fail = 0}, false);
6347} 6338}
6348 6339
6349int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) 6340int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
6341 struct mem_desc *inst_block)
6350{ 6342{
6343 u32 data = fecs_current_ctx_data(g, inst_block);
6344
6351 return gr_gk20a_submit_fecs_method_op(g, 6345 return gr_gk20a_submit_fecs_method_op(g,
6352 (struct fecs_method_op_gk20a){ 6346 (struct fecs_method_op_gk20a){
6353 .mailbox.id = 4, 6347 .mailbox.id = 4,
6354 .mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) | 6348 .mailbox.data = data,
6355 gr_fecs_current_ctx_valid_f(1) |
6356 (g->mm.vidmem_is_vidmem ?
6357 gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
6358 gr_fecs_current_ctx_target_vid_mem_f())),
6359 .mailbox.clr = ~0, 6349 .mailbox.clr = ~0,
6360 .method.data = 1, 6350 .method.data = 1,
6361 .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(), 6351 .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index ad6d8049..aa83f85a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -476,7 +476,8 @@ int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
476 476
477/* pmu */ 477/* pmu */
478int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); 478int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
479int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr); 479int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
480 struct mem_desc *inst_block);
480int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); 481int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
481 482
482void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); 483void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index e0b9a720..a274820a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -92,10 +92,10 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
92 u32 hi = (u32)((addr & ~(u64)0xfffff) 92 u32 hi = (u32)((addr & ~(u64)0xfffff)
93 >> bus_bar0_window_target_bar0_window_base_shift_v()); 93 >> bus_bar0_window_target_bar0_window_base_shift_v());
94 u32 lo = (u32)(addr & 0xfffff); 94 u32 lo = (u32)(addr & 0xfffff);
95 u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? 95 u32 win = gk20a_aperture_mask(g, mem,
96 bus_bar0_window_target_sys_mem_noncoherent_f() : 96 bus_bar0_window_target_sys_mem_noncoherent_f(),
97 bus_bar0_window_target_vid_mem_f()) | 97 bus_bar0_window_target_vid_mem_f()) |
98 bus_bar0_window_base_f(hi); 98 bus_bar0_window_base_f(hi);
99 99
100 gk20a_dbg(gpu_dbg_mem, 100 gk20a_dbg(gpu_dbg_mem,
101 "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", 101 "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
@@ -817,8 +817,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
817int gk20a_init_mm_setup_hw(struct gk20a *g) 817int gk20a_init_mm_setup_hw(struct gk20a *g)
818{ 818{
819 struct mm_gk20a *mm = &g->mm; 819 struct mm_gk20a *mm = &g->mm;
820 struct mem_desc *inst_block = &mm->bar1.inst_block;
821 u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
822 int err; 820 int err;
823 821
824 gk20a_dbg_fn(""); 822 gk20a_dbg_fn("");
@@ -832,7 +830,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
832 g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) 830 g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
833 >> 8); 831 >> 8);
834 832
835 g->ops.mm.bar1_bind(g, inst_pa); 833 g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
836 834
837 if (g->ops.mm.init_bar2_mm_hw_setup) { 835 if (g->ops.mm.init_bar2_mm_hw_setup) {
838 err = g->ops.mm.init_bar2_mm_hw_setup(g); 836 err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -847,17 +845,19 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
847 return 0; 845 return 0;
848} 846}
849 847
850static int gk20a_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) 848static int gk20a_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
851{ 849{
852 u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); 850 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
853 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); 851 u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
852
853 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
854 854
855 gk20a_writel(g, bus_bar1_block_r(), 855 gk20a_writel(g, bus_bar1_block_r(),
856 (g->mm.vidmem_is_vidmem ? 856 gk20a_aperture_mask(g, bar1_inst,
857 bus_bar1_block_target_sys_mem_ncoh_f() : 857 bus_bar1_block_target_sys_mem_ncoh_f(),
858 bus_bar1_block_target_vid_mem_f()) | 858 bus_bar1_block_target_vid_mem_f()) |
859 bus_bar1_block_mode_virtual_f() | 859 bus_bar1_block_mode_virtual_f() |
860 bus_bar1_block_ptr_f(inst_pa)); 860 bus_bar1_block_ptr_f(ptr_v));
861 861
862 return 0; 862 return 0;
863} 863}
@@ -2559,6 +2559,29 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
2559 return gk20a_gmmu_free_attr(g, 0, mem); 2559 return gk20a_gmmu_free_attr(g, 0, mem);
2560} 2560}
2561 2561
2562u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
2563 u32 sysmem_mask, u32 vidmem_mask)
2564{
2565 switch (aperture) {
2566 case APERTURE_SYSMEM:
2567 /* sysmem for dgpus; some igpus consider system memory vidmem */
2568 return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask;
2569 case APERTURE_VIDMEM:
2570 /* for dgpus only */
2571 return vidmem_mask;
2572 case APERTURE_INVALID:
2573 WARN_ON("Bad aperture");
2574 }
2575 return 0;
2576}
2577
2578u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
2579 u32 sysmem_mask, u32 vidmem_mask)
2580{
2581 return __gk20a_aperture_mask(g, mem->aperture,
2582 sysmem_mask, vidmem_mask);
2583}
2584
2562int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) 2585int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem)
2563{ 2586{
2564 return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); 2587 return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem);
@@ -4049,19 +4072,23 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
4049 false, false, "cde"); 4072 false, false, "cde");
4050} 4073}
4051 4074
4052void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) 4075void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
4076 struct vm_gk20a *vm)
4053{ 4077{
4078 u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
4054 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 4079 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
4055 u32 pdb_addr_hi = u64_hi32(pdb_addr); 4080 u32 pdb_addr_hi = u64_hi32(pdb_addr);
4056 4081
4057 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), 4082 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
4058 (g->mm.vidmem_is_vidmem ? 4083
4059 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 4084 gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
4085 gk20a_aperture_mask(g, &vm->pdb.mem,
4086 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
4060 ram_in_page_dir_base_target_vid_mem_f()) | 4087 ram_in_page_dir_base_target_vid_mem_f()) |
4061 ram_in_page_dir_base_vol_true_f() | 4088 ram_in_page_dir_base_vol_true_f() |
4062 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 4089 ram_in_page_dir_base_lo_f(pdb_addr_lo));
4063 4090
4064 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), 4091 gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
4065 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 4092 ram_in_page_dir_base_hi_f(pdb_addr_hi));
4066} 4093}
4067 4094
@@ -4069,14 +4096,11 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
4069 u32 big_page_size) 4096 u32 big_page_size)
4070{ 4097{
4071 struct gk20a *g = gk20a_from_vm(vm); 4098 struct gk20a *g = gk20a_from_vm(vm);
4072 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
4073 4099
4074 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 4100 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
4075 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); 4101 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
4076 4102
4077 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); 4103 g->ops.mm.init_pdb(g, inst_block, vm);
4078
4079 g->ops.mm.init_pdb(g, inst_block, pde_addr);
4080 4104
4081 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), 4105 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
4082 u64_lo32(vm->va_limit - 1) & ~0xfff); 4106 u64_lo32(vm->va_limit - 1) & ~0xfff);
@@ -4311,7 +4335,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
4311void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) 4335void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
4312{ 4336{
4313 struct gk20a *g = gk20a_from_vm(vm); 4337 struct gk20a *g = gk20a_from_vm(vm);
4314 u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(vm->mm->g, 4338 u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(g,
4315 vm->pdb.mem.sgt->sgl, 0) >> 12); 4339 vm->pdb.mem.sgt->sgl, 0) >> 12);
4316 u32 data; 4340 u32 data;
4317 s32 retry = 2000; 4341 s32 retry = 2000;
@@ -4348,8 +4372,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
4348 4372
4349 gk20a_writel(g, fb_mmu_invalidate_pdb_r(), 4373 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
4350 fb_mmu_invalidate_pdb_addr_f(addr_lo) | 4374 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
4351 (g->mm.vidmem_is_vidmem ? 4375 gk20a_aperture_mask(g, &vm->pdb.mem,
4352 fb_mmu_invalidate_pdb_aperture_sys_mem_f() : 4376 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
4353 fb_mmu_invalidate_pdb_aperture_vid_mem_f())); 4377 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
4354 4378
4355 gk20a_writel(g, fb_mmu_invalidate_r(), 4379 gk20a_writel(g, fb_mmu_invalidate_r(),
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7bb4d011..7d3b371c 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -600,6 +600,11 @@ static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem)
600 return 0; 600 return 0;
601} 601}
602 602
603u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
604 u32 sysmem_mask, u32 vidmem_mask);
605u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
606 u32 sysmem_mask, u32 vidmem_mask);
607
603u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 608u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
604 u64 map_offset, 609 u64 map_offset,
605 struct sg_table *sgt, 610 struct sg_table *sgt,
@@ -767,7 +772,8 @@ struct gpu_ops;
767void gk20a_init_mm(struct gpu_ops *gops); 772void gk20a_init_mm(struct gpu_ops *gops);
768const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, 773const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
769 u32 big_page_size); 774 u32 big_page_size);
770void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr); 775void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
776 struct vm_gk20a *vm);
771 777
772void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); 778void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
773 779
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 00db510a..26306efc 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -35,8 +35,8 @@ static void channel_gm20b_bind(struct channel_gk20a *c)
35 35
36 gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), 36 gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
37 ccsr_channel_inst_ptr_f(inst_ptr) | 37 ccsr_channel_inst_ptr_f(inst_ptr) |
38 (g->mm.vidmem_is_vidmem ? 38 gk20a_aperture_mask(g, &c->inst_block,
39 ccsr_channel_inst_target_sys_mem_ncoh_f() : 39 ccsr_channel_inst_target_sys_mem_ncoh_f(),
40 ccsr_channel_inst_target_vid_mem_f()) | 40 ccsr_channel_inst_target_vid_mem_f()) |
41 ccsr_channel_inst_bind_true_f()); 41 ccsr_channel_inst_bind_true_f());
42 42
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 726d73ed..8aa14662 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -135,18 +135,20 @@ static bool gm20b_mm_support_sparse(struct gk20a *g)
135 return true; 135 return true;
136} 136}
137 137
138static int gm20b_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) 138static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
139{ 139{
140 int retry = 1000; 140 int retry = 1000;
141 u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); 141 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
142 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); 142 u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
143
144 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
143 145
144 gk20a_writel(g, bus_bar1_block_r(), 146 gk20a_writel(g, bus_bar1_block_r(),
145 (g->mm.vidmem_is_vidmem ? 147 gk20a_aperture_mask(g, bar1_inst,
146 bus_bar1_block_target_sys_mem_ncoh_f() : 148 bus_bar1_block_target_sys_mem_ncoh_f(),
147 bus_bar1_block_target_vid_mem_f()) | 149 bus_bar1_block_target_vid_mem_f()) |
148 bus_bar1_block_mode_virtual_f() | 150 bus_bar1_block_mode_virtual_f() |
149 bus_bar1_block_ptr_f(inst_pa)); 151 bus_bar1_block_ptr_f(ptr_v));
150 do { 152 do {
151 u32 val = gk20a_readl(g, bus_bind_status_r()); 153 u32 val = gk20a_readl(g, bus_bind_status_r());
152 u32 pending = bus_bind_status_bar1_pending_v(val); 154 u32 pending = bus_bind_status_bar1_pending_v(val);