diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-02-26 17:37:43 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:59:26 -0400 |
commit | 7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch) | |
tree | de452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu/gm20b | |
parent | bb51cf9ec6482b50f3020179965ef82f58d91a0a (diff) |
gpu: nvgpu: Implement common allocator and mem_desc
Introduce mem_desc, which holds all information needed for a buffer.
Implement helper functions for allocation and freeing that use this
data type.
Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/712699
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 107 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/acr_gm20b.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 2 |
4 files changed, 25 insertions, 90 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index a58f726a..ecb0f8ab 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -194,7 +194,7 @@ int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) | |||
194 | g->ctxsw_ucode_info.fecs.code.offset; | 194 | g->ctxsw_ucode_info.fecs.code.offset; |
195 | p_img->desc->app_resident_data_size = | 195 | p_img->desc->app_resident_data_size = |
196 | g->ctxsw_ucode_info.fecs.data.size; | 196 | g->ctxsw_ucode_info.fecs.data.size; |
197 | p_img->data = g->ctxsw_ucode_info.surface_desc.cpuva; | 197 | p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va; |
198 | p_img->data_size = p_img->desc->image_size; | 198 | p_img->data_size = p_img->desc->image_size; |
199 | 199 | ||
200 | p_img->fw_ver = NULL; | 200 | p_img->fw_ver = NULL; |
@@ -874,11 +874,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
874 | { | 874 | { |
875 | struct mm_gk20a *mm = &g->mm; | 875 | struct mm_gk20a *mm = &g->mm; |
876 | struct vm_gk20a *vm = &mm->pmu.vm; | 876 | struct vm_gk20a *vm = &mm->pmu.vm; |
877 | struct device *d = dev_from_gk20a(g); | ||
878 | int i, err = 0; | 877 | int i, err = 0; |
879 | struct sg_table *sgt_pmu_ucode = NULL; | 878 | u64 *acr_dmem; |
880 | dma_addr_t iova; | ||
881 | u64 *pacr_ucode_cpuva = NULL, pacr_ucode_pmu_va = 0, *acr_dmem; | ||
882 | u32 img_size_in_bytes = 0; | 879 | u32 img_size_in_bytes = 0; |
883 | u32 status, size; | 880 | u32 status, size; |
884 | u64 start; | 881 | u64 start; |
@@ -924,36 +921,18 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
924 | err = -1; | 921 | err = -1; |
925 | goto err_release_acr_fw; | 922 | goto err_release_acr_fw; |
926 | } | 923 | } |
927 | pacr_ucode_cpuva = dma_alloc_coherent(d, img_size_in_bytes, | 924 | err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes, |
928 | &iova, GFP_KERNEL); | 925 | &acr->acr_ucode); |
929 | if (!pacr_ucode_cpuva) { | 926 | if (err) { |
930 | err = -ENOMEM; | 927 | err = -ENOMEM; |
931 | goto err_release_acr_fw; | 928 | goto err_release_acr_fw; |
932 | } | 929 | } |
933 | 930 | ||
934 | err = gk20a_get_sgtable(d, &sgt_pmu_ucode, | ||
935 | pacr_ucode_cpuva, | ||
936 | iova, | ||
937 | img_size_in_bytes); | ||
938 | if (err) { | ||
939 | gk20a_err(d, "failed to allocate sg table\n"); | ||
940 | err = -ENOMEM; | ||
941 | goto err_free_acr_buf; | ||
942 | } | ||
943 | pacr_ucode_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, | ||
944 | img_size_in_bytes, | ||
945 | 0, /* flags */ | ||
946 | gk20a_mem_flag_read_only); | ||
947 | if (!pacr_ucode_pmu_va) { | ||
948 | gk20a_err(d, "failed to map pmu ucode memory!!"); | ||
949 | err = -ENOMEM; | ||
950 | goto err_free_ucode_sgt; | ||
951 | } | ||
952 | acr_dmem = (u64 *) | 931 | acr_dmem = (u64 *) |
953 | &(((u8 *)acr_ucode_data_t210_load)[ | 932 | &(((u8 *)acr_ucode_data_t210_load)[ |
954 | acr_ucode_header_t210_load[2]]); | 933 | acr_ucode_header_t210_load[2]]); |
955 | acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( | 934 | acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( |
956 | pacr_ucode_cpuva) + acr_ucode_header_t210_load[2]); | 935 | acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]); |
957 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = | 936 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = |
958 | start; | 937 | start; |
959 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = | 938 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = |
@@ -962,13 +941,9 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
962 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; | 941 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; |
963 | 942 | ||
964 | for (i = 0; i < (img_size_in_bytes/4); i++) { | 943 | for (i = 0; i < (img_size_in_bytes/4); i++) { |
965 | gk20a_mem_wr32(pacr_ucode_cpuva, i, | 944 | gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, |
966 | acr_ucode_data_t210_load[i]); | 945 | acr_ucode_data_t210_load[i]); |
967 | } | 946 | } |
968 | acr->acr_ucode.cpuva = pacr_ucode_cpuva; | ||
969 | acr->acr_ucode.iova = iova; | ||
970 | acr->acr_ucode.pmu_va = pacr_ucode_pmu_va; | ||
971 | acr->acr_ucode.size = img_size_in_bytes; | ||
972 | /* | 947 | /* |
973 | * In order to execute this binary, we will be using | 948 | * In order to execute this binary, we will be using |
974 | * a bootloader which will load this image into PMU IMEM/DMEM. | 949 | * a bootloader which will load this image into PMU IMEM/DMEM. |
@@ -983,7 +958,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
983 | bl_dmem_desc->signature[3] = 0; | 958 | bl_dmem_desc->signature[3] = 0; |
984 | bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; | 959 | bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; |
985 | bl_dmem_desc->code_dma_base = | 960 | bl_dmem_desc->code_dma_base = |
986 | (unsigned int)(((u64)pacr_ucode_pmu_va >> 8)); | 961 | (unsigned int)(((u64)acr->acr_ucode.gpu_va >> 8)); |
987 | bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; | 962 | bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; |
988 | bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; | 963 | bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; |
989 | bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; | 964 | bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; |
@@ -993,8 +968,6 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
993 | bl_dmem_desc->code_dma_base + | 968 | bl_dmem_desc->code_dma_base + |
994 | ((acr_ucode_header_t210_load[2]) >> 8); | 969 | ((acr_ucode_header_t210_load[2]) >> 8); |
995 | bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; | 970 | bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; |
996 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
997 | sgt_pmu_ucode = NULL; | ||
998 | } else | 971 | } else |
999 | acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; | 972 | acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; |
1000 | status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); | 973 | status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); |
@@ -1004,17 +977,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
1004 | } | 977 | } |
1005 | return 0; | 978 | return 0; |
1006 | err_free_ucode_map: | 979 | err_free_ucode_map: |
1007 | gk20a_gmmu_unmap(vm, pacr_ucode_pmu_va, | 980 | gk20a_gmmu_unmap_free(vm, &acr->acr_ucode); |
1008 | img_size_in_bytes, gk20a_mem_flag_none); | ||
1009 | acr->acr_ucode.pmu_va = 0; | ||
1010 | err_free_ucode_sgt: | ||
1011 | if (sgt_pmu_ucode) | ||
1012 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1013 | err_free_acr_buf: | ||
1014 | dma_free_coherent(d, img_size_in_bytes, | ||
1015 | pacr_ucode_cpuva, iova); | ||
1016 | acr->acr_ucode.cpuva = NULL; | ||
1017 | acr->acr_ucode.iova = 0; | ||
1018 | err_release_acr_fw: | 981 | err_release_acr_fw: |
1019 | release_firmware(acr_fw); | 982 | release_firmware(acr_fw); |
1020 | acr->acr_fw = NULL; | 983 | acr->acr_fw = NULL; |
@@ -1078,7 +1041,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu, | |||
1078 | pwr_falcon_itfen_ctxen_enable_f()); | 1041 | pwr_falcon_itfen_ctxen_enable_f()); |
1079 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 1042 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
1080 | pwr_pmu_new_instblk_ptr_f( | 1043 | pwr_pmu_new_instblk_ptr_f( |
1081 | mm->pmu.inst_block.cpu_pa >> 12) | | 1044 | sg_phys(mm->pmu.inst_block.sgt->sgl) >> 12) | |
1082 | pwr_pmu_new_instblk_valid_f(1) | | 1045 | pwr_pmu_new_instblk_valid_f(1) | |
1083 | pwr_pmu_new_instblk_target_sys_coh_f()); | 1046 | pwr_pmu_new_instblk_target_sys_coh_f()); |
1084 | 1047 | ||
@@ -1104,7 +1067,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu, | |||
1104 | pwr_falcon_imemc_aincw_f(1)); | 1067 | pwr_falcon_imemc_aincw_f(1)); |
1105 | virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8; | 1068 | virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8; |
1106 | tag = virt_addr >> 8; /* tag is always 256B aligned */ | 1069 | tag = virt_addr >> 8; /* tag is always 256B aligned */ |
1107 | bl_ucode = (u32 *)(acr->hsbl_ucode.cpuva); | 1070 | bl_ucode = (u32 *)(acr->hsbl_ucode.cpu_va); |
1108 | for (index = 0; index < bl_sz/4; index++) { | 1071 | for (index = 0; index < bl_sz/4; index++) { |
1109 | if ((index % 64) == 0) { | 1072 | if ((index % 64) == 0) { |
1110 | gk20a_writel(g, pwr_falcon_imemt_r(0), | 1073 | gk20a_writel(g, pwr_falcon_imemt_r(0), |
@@ -1198,16 +1161,11 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1198 | struct vm_gk20a *vm = &mm->pmu.vm; | 1161 | struct vm_gk20a *vm = &mm->pmu.vm; |
1199 | struct device *d = dev_from_gk20a(g); | 1162 | struct device *d = dev_from_gk20a(g); |
1200 | int i, err = 0; | 1163 | int i, err = 0; |
1201 | struct sg_table *sgt_pmu_ucode = NULL; | ||
1202 | dma_addr_t iova; | ||
1203 | u32 bl_sz; | 1164 | u32 bl_sz; |
1204 | void *bl_cpuva; | ||
1205 | u64 bl_pmu_va; | ||
1206 | struct acr_gm20b *acr = &g->acr; | 1165 | struct acr_gm20b *acr = &g->acr; |
1207 | const struct firmware *hsbl_fw = acr->hsbl_fw; | 1166 | const struct firmware *hsbl_fw = acr->hsbl_fw; |
1208 | struct hsflcn_bl_desc *pmu_bl_gm10x_desc; | 1167 | struct hsflcn_bl_desc *pmu_bl_gm10x_desc; |
1209 | u32 *pmu_bl_gm10x = NULL; | 1168 | u32 *pmu_bl_gm10x = NULL; |
1210 | DEFINE_DMA_ATTRS(attrs); | ||
1211 | gm20b_dbg_pmu(""); | 1169 | gm20b_dbg_pmu(""); |
1212 | 1170 | ||
1213 | if (!hsbl_fw) { | 1171 | if (!hsbl_fw) { |
@@ -1232,44 +1190,25 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1232 | /*TODO in code verify that enable PMU is done, | 1190 | /*TODO in code verify that enable PMU is done, |
1233 | scrubbing etc is done*/ | 1191 | scrubbing etc is done*/ |
1234 | /*TODO in code verify that gmmu vm init is done*/ | 1192 | /*TODO in code verify that gmmu vm init is done*/ |
1235 | dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); | 1193 | err = gk20a_gmmu_alloc_attr(g, |
1236 | bl_cpuva = dma_alloc_attrs(d, bl_sz, | 1194 | DMA_ATTR_READ_ONLY, bl_sz, &acr->hsbl_ucode); |
1237 | &iova, | 1195 | if (err) { |
1238 | GFP_KERNEL, | ||
1239 | &attrs); | ||
1240 | gm20b_dbg_pmu("bl size is %x\n", bl_sz); | ||
1241 | if (!bl_cpuva) { | ||
1242 | gk20a_err(d, "failed to allocate memory\n"); | 1196 | gk20a_err(d, "failed to allocate memory\n"); |
1243 | err = -ENOMEM; | ||
1244 | goto err_done; | 1197 | goto err_done; |
1245 | } | 1198 | } |
1246 | acr->hsbl_ucode.cpuva = bl_cpuva; | ||
1247 | acr->hsbl_ucode.iova = iova; | ||
1248 | |||
1249 | err = gk20a_get_sgtable(d, &sgt_pmu_ucode, | ||
1250 | bl_cpuva, | ||
1251 | iova, | ||
1252 | bl_sz); | ||
1253 | if (err) { | ||
1254 | gk20a_err(d, "failed to allocate sg table\n"); | ||
1255 | goto err_free_cpu_va; | ||
1256 | } | ||
1257 | 1199 | ||
1258 | bl_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, | 1200 | acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, &acr->hsbl_ucode.sgt, |
1259 | bl_sz, | 1201 | bl_sz, |
1260 | 0, /* flags */ | 1202 | 0, /* flags */ |
1261 | gk20a_mem_flag_read_only); | 1203 | gk20a_mem_flag_read_only); |
1262 | if (!bl_pmu_va) { | 1204 | if (!acr->hsbl_ucode.gpu_va) { |
1263 | gk20a_err(d, "failed to map pmu ucode memory!!"); | 1205 | gk20a_err(d, "failed to map pmu ucode memory!!"); |
1264 | goto err_free_ucode_sgt; | 1206 | goto err_free_ucode; |
1265 | } | 1207 | } |
1266 | acr->hsbl_ucode.pmu_va = bl_pmu_va; | ||
1267 | 1208 | ||
1268 | for (i = 0; i < (bl_sz) >> 2; i++) | 1209 | for (i = 0; i < (bl_sz) >> 2; i++) |
1269 | gk20a_mem_wr32(bl_cpuva, i, pmu_bl_gm10x[i]); | 1210 | gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]); |
1270 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); | 1211 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); |
1271 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1272 | sgt_pmu_ucode = NULL; | ||
1273 | } | 1212 | } |
1274 | /* | 1213 | /* |
1275 | * Disable interrupts to avoid kernel hitting breakpoint due | 1214 | * Disable interrupts to avoid kernel hitting breakpoint due |
@@ -1306,14 +1245,10 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1306 | start_gm20b_pmu(g); | 1245 | start_gm20b_pmu(g); |
1307 | return 0; | 1246 | return 0; |
1308 | err_unmap_bl: | 1247 | err_unmap_bl: |
1309 | gk20a_gmmu_unmap(vm, acr->hsbl_ucode.pmu_va, | 1248 | gk20a_gmmu_unmap(vm, acr->hsbl_ucode.gpu_va, |
1310 | acr->hsbl_ucode.size, gk20a_mem_flag_none); | 1249 | acr->hsbl_ucode.size, gk20a_mem_flag_none); |
1311 | err_free_ucode_sgt: | 1250 | err_free_ucode: |
1312 | if (sgt_pmu_ucode) | 1251 | gk20a_gmmu_free(g, &acr->hsbl_ucode); |
1313 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1314 | err_free_cpu_va: | ||
1315 | dma_free_attrs(d, acr->hsbl_ucode.size, | ||
1316 | acr->hsbl_ucode.cpuva, acr->hsbl_ucode.iova, &attrs); | ||
1317 | err_done: | 1252 | err_done: |
1318 | release_firmware(hsbl_fw); | 1253 | release_firmware(hsbl_fw); |
1319 | return err; | 1254 | return err; |
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h index e5d126f8..d26f91ff 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h | |||
@@ -381,9 +381,9 @@ struct acr_gm20b { | |||
381 | u32 pmu_args; | 381 | u32 pmu_args; |
382 | const struct firmware *acr_fw; | 382 | const struct firmware *acr_fw; |
383 | struct flcn_acr_desc *acr_dmem_desc; | 383 | struct flcn_acr_desc *acr_dmem_desc; |
384 | struct pmu_mem_desc acr_ucode; | 384 | struct mem_desc acr_ucode; |
385 | const struct firmware *hsbl_fw; | 385 | const struct firmware *hsbl_fw; |
386 | struct pmu_mem_desc hsbl_ucode; | 386 | struct mem_desc hsbl_ucode; |
387 | struct flcn_bl_dmem_desc bl_dmem_desc; | 387 | struct flcn_bl_dmem_desc bl_dmem_desc; |
388 | const struct firmware *pmu_fw; | 388 | const struct firmware *pmu_fw; |
389 | const struct firmware *pmu_desc; | 389 | const struct firmware *pmu_desc; |
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 6d186c10..10d2a13e 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -24,7 +24,7 @@ static void channel_gm20b_bind(struct channel_gk20a *ch_gk20a) | |||
24 | { | 24 | { |
25 | struct gk20a *g = ch_gk20a->g; | 25 | struct gk20a *g = ch_gk20a->g; |
26 | 26 | ||
27 | u32 inst_ptr = ch_gk20a->inst_block.cpu_pa | 27 | u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl) |
28 | >> ram_in_base_shift_v(); | 28 | >> ram_in_base_shift_v(); |
29 | 29 | ||
30 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | 30 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index c199964f..5ade9e6c 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -656,7 +656,7 @@ static u32 gr_gm20b_get_tpc_num(u32 addr) | |||
656 | static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) | 656 | static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) |
657 | { | 657 | { |
658 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 658 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
659 | u64 addr_base = ucode_info->ucode_gpuva; | 659 | u64 addr_base = ucode_info->surface_desc.gpu_va; |
660 | 660 | ||
661 | gr_gk20a_load_falcon_bind_instblk(g); | 661 | gr_gk20a_load_falcon_bind_instblk(g); |
662 | 662 | ||