summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:32:05 -0400
committerKen Adams <kadams@nvidia.com>2016-05-13 10:11:33 -0400
commit6eebc87d99f9f04b2b68e0bc0142c161ab3e669d (patch)
tree08e437890869d76072f291ea66f709f05ea07c8a /drivers/gpu/nvgpu/gm20b/acr_gm20b.c
parent14ef0dacc94077bc3dae4c942ff8c279cc4c92ba (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. gk20a_mem_{rd,wr}32() work as previously; add also gk20a_mem_{rd,wr}() for byte-indexed accesses, gk20a_mem_{rd,wr}_n() for memcpy()-like functionality, and gk20a_memset() for filling buffers with a constant. The 8 and 16 bit accessor functions are removed. vmap()/vunmap() pairs are abstracted to gk20a_mem_{begin,end}() to support other types of mappings or conditions where mapping the buffer is unnecessary or different. Several function arguments that would access these buffers are also changed to take a mem_desc instead of a plain cpu pointer. Some relevant occasions are changed to use the accessor functions instead of cpu pointers without them (e.g., memcpying to and from), but the majority of direct accesses will be adjusted later, when the buffers are moved to support vidmem. JIRA DNVGPU-23 Change-Id: I3dd22e14290c4ab742d42e2dd327ebeb5cd3f25a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121143 Reviewed-by: Ken Adams <kadams@nvidia.com> Tested-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/acr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c206
1 files changed, 87 insertions, 119 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 0e6e715d..3ac2cec8 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -43,8 +43,8 @@ static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
43static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img); 43static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img);
44static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img); 44static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img);
45static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); 45static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
46static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, 46static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
47 void *nonwpr_addr); 47 struct mem_desc *nonwpr);
48static int acr_ucode_patch_sig(struct gk20a *g, 48static int acr_ucode_patch_sig(struct gk20a *g,
49 unsigned int *p_img, 49 unsigned int *p_img,
50 unsigned int *p_prod_sig, 50 unsigned int *p_prod_sig,
@@ -355,7 +355,7 @@ int prepare_ucode_blob(struct gk20a *g)
355 355
356 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", 356 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
357 plsfm->managed_flcn_cnt, plsfm->wpr_size); 357 plsfm->managed_flcn_cnt, plsfm->wpr_size);
358 lsfm_init_wpr_contents(g, plsfm, g->acr.ucode_blob.cpu_va); 358 lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob);
359 } else { 359 } else {
360 gm20b_dbg_pmu("LSFM is managing no falcons.\n"); 360 gm20b_dbg_pmu("LSFM is managing no falcons.\n");
361 } 361 }
@@ -613,120 +613,91 @@ static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
613} 613}
614 614
615/* Initialize WPR contents */ 615/* Initialize WPR contents */
616static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, 616static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
617 void *nonwpr_addr) 617 struct mem_desc *ucode)
618{ 618{
619 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
620 u32 i;
619 621
620 int status = 0; 622 /* The WPR array is at the base of the WPR */
621 union flcn_bl_generic_desc *nonwpr_bl_gen_desc; 623 pnode = plsfm->ucode_img_list;
622 if (nonwpr_addr == NULL) { 624 i = 0;
623 status = -ENOMEM;
624 } else {
625 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
626 struct lsf_wpr_header *wpr_hdr;
627 struct lsf_lsb_header *lsb_hdr;
628 void *ucode_off;
629 u32 i;
630
631 /* The WPR array is at the base of the WPR */
632 wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr;
633 pnode = plsfm->ucode_img_list;
634 i = 0;
635 625
636 /* 626 /*
637 * Walk the managed falcons, flush WPR and LSB headers to FB. 627 * Walk the managed falcons, flush WPR and LSB headers to FB.
638 * flush any bl args to the storage area relative to the 628 * flush any bl args to the storage area relative to the
639 * ucode image (appended on the end as a DMEM area). 629 * ucode image (appended on the end as a DMEM area).
640 */ 630 */
641 while (pnode) { 631 while (pnode) {
642 /* Flush WPR header to memory*/ 632 /* Flush WPR header to memory*/
643 memcpy(&wpr_hdr[i], &pnode->wpr_header, 633 gk20a_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header),
644 sizeof(struct lsf_wpr_header)); 634 &pnode->wpr_header, sizeof(pnode->wpr_header));
645 gm20b_dbg_pmu("wpr header as in memory and pnode\n"); 635
646 gm20b_dbg_pmu("falconid :%d %d\n", 636 gm20b_dbg_pmu("wpr header");
647 pnode->wpr_header.falcon_id, 637 gm20b_dbg_pmu("falconid :%d",
648 wpr_hdr[i].falcon_id); 638 pnode->wpr_header.falcon_id);
649 gm20b_dbg_pmu("lsb_offset :%x %x\n", 639 gm20b_dbg_pmu("lsb_offset :%x",
650 pnode->wpr_header.lsb_offset, 640 pnode->wpr_header.lsb_offset);
651 wpr_hdr[i].lsb_offset); 641 gm20b_dbg_pmu("bootstrap_owner :%d",
652 gm20b_dbg_pmu("bootstrap_owner :%d %d\n", 642 pnode->wpr_header.bootstrap_owner);
653 pnode->wpr_header.bootstrap_owner, 643 gm20b_dbg_pmu("lazy_bootstrap :%d",
654 wpr_hdr[i].bootstrap_owner); 644 pnode->wpr_header.lazy_bootstrap);
655 gm20b_dbg_pmu("lazy_bootstrap :%d %d\n", 645 gm20b_dbg_pmu("status :%d",
656 pnode->wpr_header.lazy_bootstrap, 646 pnode->wpr_header.status);
657 wpr_hdr[i].lazy_bootstrap); 647
658 gm20b_dbg_pmu("status :%d %d\n", 648 /*Flush LSB header to memory*/
659 pnode->wpr_header.status, wpr_hdr[i].status); 649 gk20a_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
660 650 &pnode->lsb_header, sizeof(pnode->lsb_header));
661 /*Flush LSB header to memory*/ 651
662 lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr + 652 gm20b_dbg_pmu("lsb header");
663 pnode->wpr_header.lsb_offset); 653 gm20b_dbg_pmu("ucode_off :%x",
664 memcpy(lsb_hdr, &pnode->lsb_header, 654 pnode->lsb_header.ucode_off);
665 sizeof(struct lsf_lsb_header)); 655 gm20b_dbg_pmu("ucode_size :%x",
666 gm20b_dbg_pmu("lsb header as in memory and pnode\n"); 656 pnode->lsb_header.ucode_size);
667 gm20b_dbg_pmu("ucode_off :%x %x\n", 657 gm20b_dbg_pmu("data_size :%x",
668 pnode->lsb_header.ucode_off, 658 pnode->lsb_header.data_size);
669 lsb_hdr->ucode_off); 659 gm20b_dbg_pmu("bl_code_size :%x",
670 gm20b_dbg_pmu("ucode_size :%x %x\n", 660 pnode->lsb_header.bl_code_size);
671 pnode->lsb_header.ucode_size, 661 gm20b_dbg_pmu("bl_imem_off :%x",
672 lsb_hdr->ucode_size); 662 pnode->lsb_header.bl_imem_off);
673 gm20b_dbg_pmu("data_size :%x %x\n", 663 gm20b_dbg_pmu("bl_data_off :%x",
674 pnode->lsb_header.data_size, 664 pnode->lsb_header.bl_data_off);
675 lsb_hdr->data_size); 665 gm20b_dbg_pmu("bl_data_size :%x",
676 gm20b_dbg_pmu("bl_code_size :%x %x\n", 666 pnode->lsb_header.bl_data_size);
677 pnode->lsb_header.bl_code_size, 667 gm20b_dbg_pmu("app_code_off :%x",
678 lsb_hdr->bl_code_size); 668 pnode->lsb_header.app_code_off);
679 gm20b_dbg_pmu("bl_imem_off :%x %x\n", 669 gm20b_dbg_pmu("app_code_size :%x",
680 pnode->lsb_header.bl_imem_off, 670 pnode->lsb_header.app_code_size);
681 lsb_hdr->bl_imem_off); 671 gm20b_dbg_pmu("app_data_off :%x",
682 gm20b_dbg_pmu("bl_data_off :%x %x\n", 672 pnode->lsb_header.app_data_off);
683 pnode->lsb_header.bl_data_off, 673 gm20b_dbg_pmu("app_data_size :%x",
684 lsb_hdr->bl_data_off); 674 pnode->lsb_header.app_data_size);
685 gm20b_dbg_pmu("bl_data_size :%x %x\n", 675 gm20b_dbg_pmu("flags :%x",
686 pnode->lsb_header.bl_data_size, 676 pnode->lsb_header.flags);
687 lsb_hdr->bl_data_size); 677
688 gm20b_dbg_pmu("app_code_off :%x %x\n", 678 /*If this falcon has a boot loader and related args,
689 pnode->lsb_header.app_code_off, 679 * flush them.*/
690 lsb_hdr->app_code_off); 680 if (!pnode->ucode_img.header) {
691 gm20b_dbg_pmu("app_code_size :%x %x\n", 681 /*Populate gen bl and flush to memory*/
692 pnode->lsb_header.app_code_size, 682 lsfm_fill_flcn_bl_gen_desc(g, pnode);
693 lsb_hdr->app_code_size); 683 gk20a_mem_wr_n(g, ucode,
694 gm20b_dbg_pmu("app_data_off :%x %x\n", 684 pnode->lsb_header.bl_data_off,
695 pnode->lsb_header.app_data_off, 685 &pnode->bl_gen_desc,
696 lsb_hdr->app_data_off);
697 gm20b_dbg_pmu("app_data_size :%x %x\n",
698 pnode->lsb_header.app_data_size,
699 lsb_hdr->app_data_size);
700 gm20b_dbg_pmu("flags :%x %x\n",
701 pnode->lsb_header.flags, lsb_hdr->flags);
702
703 /*If this falcon has a boot loader and related args,
704 * flush them.*/
705 if (!pnode->ucode_img.header) {
706 nonwpr_bl_gen_desc =
707 (union flcn_bl_generic_desc *)
708 ((u8 *)nonwpr_addr +
709 pnode->lsb_header.bl_data_off);
710
711 /*Populate gen bl and flush to memory*/
712 lsfm_fill_flcn_bl_gen_desc(g, pnode);
713 memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc,
714 pnode->bl_gen_desc_size); 686 pnode->bl_gen_desc_size);
715 }
716 ucode_off = (void *)(pnode->lsb_header.ucode_off +
717 (u8 *)nonwpr_addr);
718 /*Copying of ucode*/
719 memcpy(ucode_off, pnode->ucode_img.data,
720 pnode->ucode_img.data_size);
721 pnode = pnode->next;
722 i++;
723 } 687 }
724 688 /*Copying of ucode*/
725 /* Tag the terminator WPR header with an invalid falcon ID. */ 689 gk20a_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
726 gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id, 690 pnode->ucode_img.data,
727 0, LSF_FALCON_ID_INVALID); 691 pnode->ucode_img.data_size);
692 pnode = pnode->next;
693 i++;
728 } 694 }
729 return status; 695
696 /* Tag the terminator WPR header with an invalid falcon ID. */
697 gk20a_mem_wr32(g, ucode,
698 plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header) +
699 offsetof(struct lsf_wpr_header, falcon_id),
700 LSF_FALCON_ID_INVALID);
730} 701}
731 702
732/*! 703/*!
@@ -1000,7 +971,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1000{ 971{
1001 struct mm_gk20a *mm = &g->mm; 972 struct mm_gk20a *mm = &g->mm;
1002 struct vm_gk20a *vm = &mm->pmu.vm; 973 struct vm_gk20a *vm = &mm->pmu.vm;
1003 int i, err = 0; 974 int err = 0;
1004 u64 *acr_dmem; 975 u64 *acr_dmem;
1005 u32 img_size_in_bytes = 0; 976 u32 img_size_in_bytes = 0;
1006 u32 status, size; 977 u32 status, size;
@@ -1066,10 +1037,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1066 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; 1037 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
1067 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; 1038 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
1068 1039
1069 for (i = 0; i < (img_size_in_bytes/4); i++) { 1040 gk20a_mem_wr_n(g, &acr->acr_ucode, 0,
1070 gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, 1041 acr_ucode_data_t210_load, img_size_in_bytes);
1071 acr_ucode_data_t210_load[i]);
1072 }
1073 /* 1042 /*
1074 * In order to execute this binary, we will be using 1043 * In order to execute this binary, we will be using
1075 * a bootloader which will load this image into PMU IMEM/DMEM. 1044 * a bootloader which will load this image into PMU IMEM/DMEM.
@@ -1323,7 +1292,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1323 struct mm_gk20a *mm = &g->mm; 1292 struct mm_gk20a *mm = &g->mm;
1324 struct vm_gk20a *vm = &mm->pmu.vm; 1293 struct vm_gk20a *vm = &mm->pmu.vm;
1325 struct device *d = dev_from_gk20a(g); 1294 struct device *d = dev_from_gk20a(g);
1326 int i, err = 0; 1295 int err = 0;
1327 u32 bl_sz; 1296 u32 bl_sz;
1328 struct acr_gm20b *acr = &g->acr; 1297 struct acr_gm20b *acr = &g->acr;
1329 const struct firmware *hsbl_fw = acr->hsbl_fw; 1298 const struct firmware *hsbl_fw = acr->hsbl_fw;
@@ -1369,8 +1338,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1369 goto err_free_ucode; 1338 goto err_free_ucode;
1370 } 1339 }
1371 1340
1372 for (i = 0; i < (bl_sz) >> 2; i++) 1341 gk20a_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz);
1373 gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]);
1374 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); 1342 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1375 } 1343 }
1376 /* 1344 /*