summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c206
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c26
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c6
3 files changed, 103 insertions, 135 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 0e6e715d..3ac2cec8 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -43,8 +43,8 @@ static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
43static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img); 43static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img);
44static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img); 44static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img);
45static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); 45static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
46static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, 46static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
47 void *nonwpr_addr); 47 struct mem_desc *nonwpr);
48static int acr_ucode_patch_sig(struct gk20a *g, 48static int acr_ucode_patch_sig(struct gk20a *g,
49 unsigned int *p_img, 49 unsigned int *p_img,
50 unsigned int *p_prod_sig, 50 unsigned int *p_prod_sig,
@@ -355,7 +355,7 @@ int prepare_ucode_blob(struct gk20a *g)
355 355
356 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", 356 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
357 plsfm->managed_flcn_cnt, plsfm->wpr_size); 357 plsfm->managed_flcn_cnt, plsfm->wpr_size);
358 lsfm_init_wpr_contents(g, plsfm, g->acr.ucode_blob.cpu_va); 358 lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob);
359 } else { 359 } else {
360 gm20b_dbg_pmu("LSFM is managing no falcons.\n"); 360 gm20b_dbg_pmu("LSFM is managing no falcons.\n");
361 } 361 }
@@ -613,120 +613,91 @@ static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
613} 613}
614 614
615/* Initialize WPR contents */ 615/* Initialize WPR contents */
616static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, 616static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
617 void *nonwpr_addr) 617 struct mem_desc *ucode)
618{ 618{
619 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
620 u32 i;
619 621
620 int status = 0; 622 /* The WPR array is at the base of the WPR */
621 union flcn_bl_generic_desc *nonwpr_bl_gen_desc; 623 pnode = plsfm->ucode_img_list;
622 if (nonwpr_addr == NULL) { 624 i = 0;
623 status = -ENOMEM;
624 } else {
625 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
626 struct lsf_wpr_header *wpr_hdr;
627 struct lsf_lsb_header *lsb_hdr;
628 void *ucode_off;
629 u32 i;
630
631 /* The WPR array is at the base of the WPR */
632 wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr;
633 pnode = plsfm->ucode_img_list;
634 i = 0;
635 625
636 /* 626 /*
637 * Walk the managed falcons, flush WPR and LSB headers to FB. 627 * Walk the managed falcons, flush WPR and LSB headers to FB.
638 * flush any bl args to the storage area relative to the 628 * flush any bl args to the storage area relative to the
639 * ucode image (appended on the end as a DMEM area). 629 * ucode image (appended on the end as a DMEM area).
640 */ 630 */
641 while (pnode) { 631 while (pnode) {
642 /* Flush WPR header to memory*/ 632 /* Flush WPR header to memory*/
643 memcpy(&wpr_hdr[i], &pnode->wpr_header, 633 gk20a_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header),
644 sizeof(struct lsf_wpr_header)); 634 &pnode->wpr_header, sizeof(pnode->wpr_header));
645 gm20b_dbg_pmu("wpr header as in memory and pnode\n"); 635
646 gm20b_dbg_pmu("falconid :%d %d\n", 636 gm20b_dbg_pmu("wpr header");
647 pnode->wpr_header.falcon_id, 637 gm20b_dbg_pmu("falconid :%d",
648 wpr_hdr[i].falcon_id); 638 pnode->wpr_header.falcon_id);
649 gm20b_dbg_pmu("lsb_offset :%x %x\n", 639 gm20b_dbg_pmu("lsb_offset :%x",
650 pnode->wpr_header.lsb_offset, 640 pnode->wpr_header.lsb_offset);
651 wpr_hdr[i].lsb_offset); 641 gm20b_dbg_pmu("bootstrap_owner :%d",
652 gm20b_dbg_pmu("bootstrap_owner :%d %d\n", 642 pnode->wpr_header.bootstrap_owner);
653 pnode->wpr_header.bootstrap_owner, 643 gm20b_dbg_pmu("lazy_bootstrap :%d",
654 wpr_hdr[i].bootstrap_owner); 644 pnode->wpr_header.lazy_bootstrap);
655 gm20b_dbg_pmu("lazy_bootstrap :%d %d\n", 645 gm20b_dbg_pmu("status :%d",
656 pnode->wpr_header.lazy_bootstrap, 646 pnode->wpr_header.status);
657 wpr_hdr[i].lazy_bootstrap); 647
658 gm20b_dbg_pmu("status :%d %d\n", 648 /*Flush LSB header to memory*/
659 pnode->wpr_header.status, wpr_hdr[i].status); 649 gk20a_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
660 650 &pnode->lsb_header, sizeof(pnode->lsb_header));
661 /*Flush LSB header to memory*/ 651
662 lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr + 652 gm20b_dbg_pmu("lsb header");
663 pnode->wpr_header.lsb_offset); 653 gm20b_dbg_pmu("ucode_off :%x",
664 memcpy(lsb_hdr, &pnode->lsb_header, 654 pnode->lsb_header.ucode_off);
665 sizeof(struct lsf_lsb_header)); 655 gm20b_dbg_pmu("ucode_size :%x",
666 gm20b_dbg_pmu("lsb header as in memory and pnode\n"); 656 pnode->lsb_header.ucode_size);
667 gm20b_dbg_pmu("ucode_off :%x %x\n", 657 gm20b_dbg_pmu("data_size :%x",
668 pnode->lsb_header.ucode_off, 658 pnode->lsb_header.data_size);
669 lsb_hdr->ucode_off); 659 gm20b_dbg_pmu("bl_code_size :%x",
670 gm20b_dbg_pmu("ucode_size :%x %x\n", 660 pnode->lsb_header.bl_code_size);
671 pnode->lsb_header.ucode_size, 661 gm20b_dbg_pmu("bl_imem_off :%x",
672 lsb_hdr->ucode_size); 662 pnode->lsb_header.bl_imem_off);
673 gm20b_dbg_pmu("data_size :%x %x\n", 663 gm20b_dbg_pmu("bl_data_off :%x",
674 pnode->lsb_header.data_size, 664 pnode->lsb_header.bl_data_off);
675 lsb_hdr->data_size); 665 gm20b_dbg_pmu("bl_data_size :%x",
676 gm20b_dbg_pmu("bl_code_size :%x %x\n", 666 pnode->lsb_header.bl_data_size);
677 pnode->lsb_header.bl_code_size, 667 gm20b_dbg_pmu("app_code_off :%x",
678 lsb_hdr->bl_code_size); 668 pnode->lsb_header.app_code_off);
679 gm20b_dbg_pmu("bl_imem_off :%x %x\n", 669 gm20b_dbg_pmu("app_code_size :%x",
680 pnode->lsb_header.bl_imem_off, 670 pnode->lsb_header.app_code_size);
681 lsb_hdr->bl_imem_off); 671 gm20b_dbg_pmu("app_data_off :%x",
682 gm20b_dbg_pmu("bl_data_off :%x %x\n", 672 pnode->lsb_header.app_data_off);
683 pnode->lsb_header.bl_data_off, 673 gm20b_dbg_pmu("app_data_size :%x",
684 lsb_hdr->bl_data_off); 674 pnode->lsb_header.app_data_size);
685 gm20b_dbg_pmu("bl_data_size :%x %x\n", 675 gm20b_dbg_pmu("flags :%x",
686 pnode->lsb_header.bl_data_size, 676 pnode->lsb_header.flags);
687 lsb_hdr->bl_data_size); 677
688 gm20b_dbg_pmu("app_code_off :%x %x\n", 678 /*If this falcon has a boot loader and related args,
689 pnode->lsb_header.app_code_off, 679 * flush them.*/
690 lsb_hdr->app_code_off); 680 if (!pnode->ucode_img.header) {
691 gm20b_dbg_pmu("app_code_size :%x %x\n", 681 /*Populate gen bl and flush to memory*/
692 pnode->lsb_header.app_code_size, 682 lsfm_fill_flcn_bl_gen_desc(g, pnode);
693 lsb_hdr->app_code_size); 683 gk20a_mem_wr_n(g, ucode,
694 gm20b_dbg_pmu("app_data_off :%x %x\n", 684 pnode->lsb_header.bl_data_off,
695 pnode->lsb_header.app_data_off, 685 &pnode->bl_gen_desc,
696 lsb_hdr->app_data_off);
697 gm20b_dbg_pmu("app_data_size :%x %x\n",
698 pnode->lsb_header.app_data_size,
699 lsb_hdr->app_data_size);
700 gm20b_dbg_pmu("flags :%x %x\n",
701 pnode->lsb_header.flags, lsb_hdr->flags);
702
703 /*If this falcon has a boot loader and related args,
704 * flush them.*/
705 if (!pnode->ucode_img.header) {
706 nonwpr_bl_gen_desc =
707 (union flcn_bl_generic_desc *)
708 ((u8 *)nonwpr_addr +
709 pnode->lsb_header.bl_data_off);
710
711 /*Populate gen bl and flush to memory*/
712 lsfm_fill_flcn_bl_gen_desc(g, pnode);
713 memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc,
714 pnode->bl_gen_desc_size); 686 pnode->bl_gen_desc_size);
715 }
716 ucode_off = (void *)(pnode->lsb_header.ucode_off +
717 (u8 *)nonwpr_addr);
718 /*Copying of ucode*/
719 memcpy(ucode_off, pnode->ucode_img.data,
720 pnode->ucode_img.data_size);
721 pnode = pnode->next;
722 i++;
723 } 687 }
724 688 /*Copying of ucode*/
725 /* Tag the terminator WPR header with an invalid falcon ID. */ 689 gk20a_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
726 gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id, 690 pnode->ucode_img.data,
727 0, LSF_FALCON_ID_INVALID); 691 pnode->ucode_img.data_size);
692 pnode = pnode->next;
693 i++;
728 } 694 }
729 return status; 695
696 /* Tag the terminator WPR header with an invalid falcon ID. */
697 gk20a_mem_wr32(g, ucode,
698 plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header) +
699 offsetof(struct lsf_wpr_header, falcon_id),
700 LSF_FALCON_ID_INVALID);
730} 701}
731 702
732/*! 703/*!
@@ -1000,7 +971,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1000{ 971{
1001 struct mm_gk20a *mm = &g->mm; 972 struct mm_gk20a *mm = &g->mm;
1002 struct vm_gk20a *vm = &mm->pmu.vm; 973 struct vm_gk20a *vm = &mm->pmu.vm;
1003 int i, err = 0; 974 int err = 0;
1004 u64 *acr_dmem; 975 u64 *acr_dmem;
1005 u32 img_size_in_bytes = 0; 976 u32 img_size_in_bytes = 0;
1006 u32 status, size; 977 u32 status, size;
@@ -1066,10 +1037,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1066 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; 1037 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
1067 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; 1038 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
1068 1039
1069 for (i = 0; i < (img_size_in_bytes/4); i++) { 1040 gk20a_mem_wr_n(g, &acr->acr_ucode, 0,
1070 gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, 1041 acr_ucode_data_t210_load, img_size_in_bytes);
1071 acr_ucode_data_t210_load[i]);
1072 }
1073 /* 1042 /*
1074 * In order to execute this binary, we will be using 1043 * In order to execute this binary, we will be using
1075 * a bootloader which will load this image into PMU IMEM/DMEM. 1044 * a bootloader which will load this image into PMU IMEM/DMEM.
@@ -1323,7 +1292,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1323 struct mm_gk20a *mm = &g->mm; 1292 struct mm_gk20a *mm = &g->mm;
1324 struct vm_gk20a *vm = &mm->pmu.vm; 1293 struct vm_gk20a *vm = &mm->pmu.vm;
1325 struct device *d = dev_from_gk20a(g); 1294 struct device *d = dev_from_gk20a(g);
1326 int i, err = 0; 1295 int err = 0;
1327 u32 bl_sz; 1296 u32 bl_sz;
1328 struct acr_gm20b *acr = &g->acr; 1297 struct acr_gm20b *acr = &g->acr;
1329 const struct firmware *hsbl_fw = acr->hsbl_fw; 1298 const struct firmware *hsbl_fw = acr->hsbl_fw;
@@ -1369,8 +1338,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1369 goto err_free_ucode; 1338 goto err_free_ucode;
1370 } 1339 }
1371 1340
1372 for (i = 0; i < (bl_sz) >> 2; i++) 1341 gk20a_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz);
1373 gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]);
1374 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); 1342 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1375 } 1343 }
1376 /* 1344 /*
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index b9a1e685..2197bae5 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -849,7 +849,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
849 849
850static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, 850static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
851 struct channel_ctx_gk20a *ch_ctx, 851 struct channel_ctx_gk20a *ch_ctx,
852 void *ctx_ptr) 852 struct mem_desc *mem)
853{ 853{
854 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 854 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
855 u32 cta_preempt_option = 855 u32 cta_preempt_option =
@@ -859,7 +859,8 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
859 859
860 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { 860 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
861 gk20a_dbg_info("CTA: %x", cta_preempt_option); 861 gk20a_dbg_info("CTA: %x", cta_preempt_option);
862 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0, 862 gk20a_mem_wr(g, mem,
863 ctxsw_prog_main_image_preemption_options_o(),
863 cta_preempt_option); 864 cta_preempt_option);
864 } 865 }
865 866
@@ -1005,7 +1006,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1005 bool enable) 1006 bool enable)
1006{ 1007{
1007 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1008 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1008 void *ctx_ptr = NULL; 1009 struct mem_desc *mem;
1009 u32 v; 1010 u32 v;
1010 1011
1011 gk20a_dbg_fn(""); 1012 gk20a_dbg_fn("");
@@ -1013,18 +1014,17 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1013 if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) 1014 if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
1014 return -EINVAL; 1015 return -EINVAL;
1015 1016
1016 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1017 mem = &ch_ctx->gr_ctx->mem;
1017 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 1018
1018 0, pgprot_writecombine(PAGE_KERNEL)); 1019 if (gk20a_mem_begin(c->g, mem))
1019 if (!ctx_ptr)
1020 return -ENOMEM; 1020 return -ENOMEM;
1021 1021
1022 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1022 v = gk20a_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
1023 v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); 1023 v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
1024 v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); 1024 v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
1025 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v); 1025 gk20a_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
1026 1026
1027 vunmap(ctx_ptr); 1027 gk20a_mem_end(c->g, mem);
1028 1028
1029 gk20a_dbg_fn("done"); 1029 gk20a_dbg_fn("done");
1030 1030
@@ -1089,13 +1089,13 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g)
1089#endif 1089#endif
1090} 1090}
1091 1091
1092static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) 1092static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct mem_desc *mem)
1093{ 1093{
1094 u32 cde_v; 1094 u32 cde_v;
1095 1095
1096 cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); 1096 cde_v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o());
1097 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); 1097 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f();
1098 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); 1098 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
1099} 1099}
1100 1100
1101static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) 1101static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index ac73b5c8..726d73ed 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -106,14 +106,14 @@ static void gm20b_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
106} 106}
107 107
108static void gm20b_mm_set_big_page_size(struct gk20a *g, 108static void gm20b_mm_set_big_page_size(struct gk20a *g,
109 void *inst_ptr, int size) 109 struct mem_desc *mem, int size)
110{ 110{
111 u32 val; 111 u32 val;
112 112
113 gk20a_dbg_fn(""); 113 gk20a_dbg_fn("");
114 114
115 gk20a_dbg_info("big page size %d\n", size); 115 gk20a_dbg_info("big page size %d\n", size);
116 val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); 116 val = gk20a_mem_rd32(g, mem, ram_in_big_page_size_w());
117 val &= ~ram_in_big_page_size_m(); 117 val &= ~ram_in_big_page_size_m();
118 118
119 if (size == SZ_64K) 119 if (size == SZ_64K)
@@ -121,7 +121,7 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g,
121 else 121 else
122 val |= ram_in_big_page_size_128kb_f(); 122 val |= ram_in_big_page_size_128kb_f();
123 123
124 gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); 124 gk20a_mem_wr32(g, mem, ram_in_big_page_size_w(), val);
125 gk20a_dbg_fn("done"); 125 gk20a_dbg_fn("done");
126} 126}
127 127