diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 289 |
1 files changed, 2 insertions, 287 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 3030c170..2d69a2aa 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -296,19 +296,11 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
296 | struct device *dev, | 296 | struct device *dev, |
297 | struct dma_buf *dmabuf, | 297 | struct dma_buf *dmabuf, |
298 | struct gk20a_comptag_allocator *allocator, | 298 | struct gk20a_comptag_allocator *allocator, |
299 | u32 lines, bool user_mappable, | 299 | u32 lines) |
300 | u64 *ctag_map_win_size, | ||
301 | u32 *ctag_map_win_ctagline) | ||
302 | { | 300 | { |
303 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 301 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
304 | u32 ctaglines_allocsize; | 302 | u32 ctaglines_allocsize; |
305 | u32 ctagline_align; | ||
306 | u32 offset; | 303 | u32 offset; |
307 | u32 alignment_lines; | ||
308 | const u32 aggregate_cacheline_sz = | ||
309 | g->gr.cacheline_size * g->gr.slices_per_ltc * | ||
310 | g->ltc_count; | ||
311 | const u32 small_pgsz = 4096; | ||
312 | int err; | 304 | int err; |
313 | 305 | ||
314 | if (!priv) | 306 | if (!priv) |
@@ -317,42 +309,7 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
317 | if (!lines) | 309 | if (!lines) |
318 | return -EINVAL; | 310 | return -EINVAL; |
319 | 311 | ||
320 | if (!user_mappable) { | 312 | ctaglines_allocsize = lines; |
321 | ctaglines_allocsize = lines; | ||
322 | ctagline_align = 1; | ||
323 | } else { | ||
324 | /* | ||
325 | * For security, align the allocation on a page, and reserve | ||
326 | * whole pages. Unfortunately, we cannot ask the allocator to | ||
327 | * align here, since compbits per cacheline is not always a | ||
328 | * power of two. So, we just have to allocate enough extra that | ||
329 | * we're guaranteed to find a ctagline inside the allocation so | ||
330 | * that: 1) it is the first ctagline in a cacheline that starts | ||
331 | * at a page boundary, and 2) we can add enough overallocation | ||
332 | * that the ctaglines of the succeeding allocation are on | ||
333 | * different page than ours. | ||
334 | */ | ||
335 | |||
336 | ctagline_align = | ||
337 | (lcm(aggregate_cacheline_sz, small_pgsz) / | ||
338 | aggregate_cacheline_sz) * | ||
339 | g->gr.comptags_per_cacheline; | ||
340 | |||
341 | ctaglines_allocsize = | ||
342 | /* for alignment */ | ||
343 | ctagline_align + | ||
344 | |||
345 | /* lines rounded up to cachelines */ | ||
346 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * | ||
347 | g->gr.comptags_per_cacheline + | ||
348 | |||
349 | /* trail-padding */ | ||
350 | DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) * | ||
351 | g->gr.comptags_per_cacheline; | ||
352 | |||
353 | if (ctaglines_allocsize < lines) | ||
354 | return -EINVAL; /* integer overflow */ | ||
355 | } | ||
356 | 313 | ||
357 | /* store the allocator so we can use it when we free the ctags */ | 314 | /* store the allocator so we can use it when we free the ctags */ |
358 | priv->comptag_allocator = allocator; | 315 | priv->comptag_allocator = allocator; |
@@ -361,53 +318,9 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
361 | if (err) | 318 | if (err) |
362 | return err; | 319 | return err; |
363 | 320 | ||
364 | /* | ||
365 | * offset needs to be at the start of a page/cacheline boundary; | ||
366 | * prune the preceding ctaglines that were allocated for alignment. | ||
367 | */ | ||
368 | alignment_lines = | ||
369 | DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - offset; | ||
370 | if (alignment_lines) { | ||
371 | gk20a_comptaglines_free(allocator, offset, alignment_lines); | ||
372 | offset += alignment_lines; | ||
373 | ctaglines_allocsize -= alignment_lines; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * check if we can prune the trailing, too; we just need to reserve | ||
378 | * whole pages and ctagcachelines. | ||
379 | */ | ||
380 | if (user_mappable) { | ||
381 | u32 needed_cachelines = | ||
382 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); | ||
383 | u32 needed_bytes = round_up(needed_cachelines * | ||
384 | aggregate_cacheline_sz, | ||
385 | small_pgsz); | ||
386 | u32 first_unneeded_cacheline = | ||
387 | DIV_ROUND_UP(needed_bytes, aggregate_cacheline_sz); | ||
388 | u32 needed_ctaglines = first_unneeded_cacheline * | ||
389 | g->gr.comptags_per_cacheline; | ||
390 | u64 win_size; | ||
391 | |||
392 | if (needed_ctaglines < ctaglines_allocsize) { | ||
393 | gk20a_comptaglines_free(allocator, | ||
394 | offset + needed_ctaglines, | ||
395 | ctaglines_allocsize - needed_ctaglines); | ||
396 | ctaglines_allocsize = needed_ctaglines; | ||
397 | } | ||
398 | |||
399 | *ctag_map_win_ctagline = offset; | ||
400 | win_size = | ||
401 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * | ||
402 | aggregate_cacheline_sz; | ||
403 | |||
404 | *ctag_map_win_size = round_up(win_size, small_pgsz); | ||
405 | } | ||
406 | |||
407 | priv->comptags.offset = offset; | 321 | priv->comptags.offset = offset; |
408 | priv->comptags.lines = lines; | 322 | priv->comptags.lines = lines; |
409 | priv->comptags.allocated_lines = ctaglines_allocsize; | 323 | priv->comptags.allocated_lines = ctaglines_allocsize; |
410 | priv->comptags.user_mappable = user_mappable; | ||
411 | 324 | ||
412 | return 0; | 325 | return 0; |
413 | } | 326 | } |
@@ -1202,191 +1115,6 @@ int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
1202 | #endif | 1115 | #endif |
1203 | } | 1116 | } |
1204 | 1117 | ||
1205 | int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, | ||
1206 | u64 mapping_gva, | ||
1207 | u64 *compbits_win_size, | ||
1208 | u32 *compbits_win_ctagline, | ||
1209 | u32 *mapping_ctagline, | ||
1210 | u32 *flags) | ||
1211 | { | ||
1212 | struct nvgpu_mapped_buf *mapped_buffer; | ||
1213 | struct gk20a *g = vm->mm->g; | ||
1214 | |||
1215 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
1216 | |||
1217 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva); | ||
1218 | |||
1219 | if (!mapped_buffer || !mapped_buffer->user_mapped) | ||
1220 | { | ||
1221 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1222 | nvgpu_err(g, "%s: bad offset 0x%llx", __func__, mapping_gva); | ||
1223 | return -EFAULT; | ||
1224 | } | ||
1225 | |||
1226 | *compbits_win_size = 0; | ||
1227 | *compbits_win_ctagline = 0; | ||
1228 | *mapping_ctagline = 0; | ||
1229 | *flags = 0; | ||
1230 | |||
1231 | if (mapped_buffer->ctag_offset) | ||
1232 | *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS; | ||
1233 | |||
1234 | if (mapped_buffer->ctags_mappable) | ||
1235 | { | ||
1236 | *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE; | ||
1237 | *compbits_win_size = mapped_buffer->ctag_map_win_size; | ||
1238 | *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline; | ||
1239 | *mapping_ctagline = mapped_buffer->ctag_offset; | ||
1240 | } | ||
1241 | |||
1242 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1243 | return 0; | ||
1244 | } | ||
1245 | |||
1246 | |||
1247 | int nvgpu_vm_map_compbits(struct vm_gk20a *vm, | ||
1248 | u64 mapping_gva, | ||
1249 | u64 *compbits_win_gva, | ||
1250 | u64 *mapping_iova, | ||
1251 | u32 flags) | ||
1252 | { | ||
1253 | struct nvgpu_mapped_buf *mapped_buffer; | ||
1254 | struct gk20a *g = gk20a_from_vm(vm); | ||
1255 | const bool fixed_mapping = | ||
1256 | (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0; | ||
1257 | |||
1258 | if (vm->userspace_managed && !fixed_mapping) { | ||
1259 | nvgpu_err(g, | ||
1260 | "%s: non-fixed-offset mapping is not available on userspace managed address spaces", | ||
1261 | __func__); | ||
1262 | return -EFAULT; | ||
1263 | } | ||
1264 | |||
1265 | if (fixed_mapping && !vm->userspace_managed) { | ||
1266 | nvgpu_err(g, | ||
1267 | "%s: fixed-offset mapping is available only on userspace managed address spaces", | ||
1268 | __func__); | ||
1269 | return -EFAULT; | ||
1270 | } | ||
1271 | |||
1272 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
1273 | |||
1274 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva); | ||
1275 | |||
1276 | if (!mapped_buffer || !mapped_buffer->user_mapped) { | ||
1277 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1278 | nvgpu_err(g, "%s: bad offset 0x%llx", __func__, mapping_gva); | ||
1279 | return -EFAULT; | ||
1280 | } | ||
1281 | |||
1282 | if (!mapped_buffer->ctags_mappable) { | ||
1283 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1284 | nvgpu_err(g, "%s: comptags not mappable, offset 0x%llx", | ||
1285 | __func__, mapping_gva); | ||
1286 | return -EFAULT; | ||
1287 | } | ||
1288 | |||
1289 | if (!mapped_buffer->ctag_map_win_addr) { | ||
1290 | const u32 small_pgsz_index = 0; /* small pages, 4K */ | ||
1291 | const u32 aggregate_cacheline_sz = | ||
1292 | g->gr.cacheline_size * g->gr.slices_per_ltc * | ||
1293 | g->ltc_count; | ||
1294 | |||
1295 | /* first aggregate cacheline to map */ | ||
1296 | u32 cacheline_start; /* inclusive */ | ||
1297 | |||
1298 | /* offset of the start cacheline (will be page aligned) */ | ||
1299 | u64 cacheline_offset_start; | ||
1300 | |||
1301 | if (!mapped_buffer->ctag_map_win_size) { | ||
1302 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1303 | nvgpu_err(g, | ||
1304 | "%s: mapping 0x%llx does not have " | ||
1305 | "mappable comptags", | ||
1306 | __func__, mapping_gva); | ||
1307 | return -EFAULT; | ||
1308 | } | ||
1309 | |||
1310 | cacheline_start = mapped_buffer->ctag_offset / | ||
1311 | g->gr.comptags_per_cacheline; | ||
1312 | cacheline_offset_start = | ||
1313 | (u64)cacheline_start * aggregate_cacheline_sz; | ||
1314 | |||
1315 | if (fixed_mapping) { | ||
1316 | struct buffer_attrs bfr; | ||
1317 | int err; | ||
1318 | struct nvgpu_vm_area *vm_area = NULL; | ||
1319 | |||
1320 | memset(&bfr, 0, sizeof(bfr)); | ||
1321 | |||
1322 | bfr.pgsz_idx = small_pgsz_index; | ||
1323 | |||
1324 | err = nvgpu_vm_area_validate_buffer( | ||
1325 | vm, *compbits_win_gva, mapped_buffer->ctag_map_win_size, | ||
1326 | bfr.pgsz_idx, &vm_area); | ||
1327 | |||
1328 | if (err) { | ||
1329 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1330 | return err; | ||
1331 | } | ||
1332 | |||
1333 | if (vm_area) { | ||
1334 | /* this would create a dangling GPU VA | ||
1335 | * pointer if the space is freed | ||
1336 | * before before the buffer is | ||
1337 | * unmapped */ | ||
1338 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1339 | nvgpu_err(g, | ||
1340 | "%s: comptags cannot be mapped into allocated space", | ||
1341 | __func__); | ||
1342 | return -EINVAL; | ||
1343 | } | ||
1344 | } | ||
1345 | |||
1346 | mapped_buffer->ctag_map_win_addr = | ||
1347 | g->ops.mm.gmmu_map( | ||
1348 | vm, | ||
1349 | !fixed_mapping ? 0 : *compbits_win_gva, /* va */ | ||
1350 | g->gr.compbit_store.mem.priv.sgt, | ||
1351 | cacheline_offset_start, /* sg offset */ | ||
1352 | mapped_buffer->ctag_map_win_size, /* size */ | ||
1353 | small_pgsz_index, | ||
1354 | 0, /* kind */ | ||
1355 | 0, /* ctag_offset */ | ||
1356 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1357 | gk20a_mem_flag_read_only, | ||
1358 | false, /* clear_ctags */ | ||
1359 | false, /* sparse */ | ||
1360 | false, /* priv */ | ||
1361 | NULL, /* mapping_batch handle */ | ||
1362 | g->gr.compbit_store.mem.aperture); | ||
1363 | |||
1364 | if (!mapped_buffer->ctag_map_win_addr) { | ||
1365 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1366 | nvgpu_err(g, | ||
1367 | "%s: failed to map comptags for mapping 0x%llx", | ||
1368 | __func__, mapping_gva); | ||
1369 | return -ENOMEM; | ||
1370 | } | ||
1371 | } else if (fixed_mapping && *compbits_win_gva && | ||
1372 | mapped_buffer->ctag_map_win_addr != *compbits_win_gva) { | ||
1373 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1374 | nvgpu_err(g, | ||
1375 | "%s: re-requesting comptags map into mismatching address. buffer offset 0x" | ||
1376 | "%llx, existing comptag map at 0x%llx, requested remap 0x%llx", | ||
1377 | __func__, mapping_gva, | ||
1378 | mapped_buffer->ctag_map_win_addr, *compbits_win_gva); | ||
1379 | return -EINVAL; | ||
1380 | } | ||
1381 | |||
1382 | *mapping_iova = nvgpu_mem_get_addr_sgl(g, mapped_buffer->sgt->sgl); | ||
1383 | *compbits_win_gva = mapped_buffer->ctag_map_win_addr; | ||
1384 | |||
1385 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
1386 | |||
1387 | return 0; | ||
1388 | } | ||
1389 | |||
1390 | #if defined(CONFIG_GK20A_VIDMEM) | 1118 | #if defined(CONFIG_GK20A_VIDMEM) |
1391 | static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | 1119 | static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) |
1392 | { | 1120 | { |
@@ -1688,19 +1416,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, | |||
1688 | struct vm_gk20a *vm = mapped_buffer->vm; | 1416 | struct vm_gk20a *vm = mapped_buffer->vm; |
1689 | struct gk20a *g = vm->mm->g; | 1417 | struct gk20a *g = vm->mm->g; |
1690 | 1418 | ||
1691 | if (mapped_buffer->ctag_map_win_addr) { | ||
1692 | /* unmap compbits */ | ||
1693 | |||
1694 | g->ops.mm.gmmu_unmap(vm, | ||
1695 | mapped_buffer->ctag_map_win_addr, | ||
1696 | mapped_buffer->ctag_map_win_size, | ||
1697 | 0, /* page size 4k */ | ||
1698 | true, /* va allocated */ | ||
1699 | gk20a_mem_flag_none, | ||
1700 | false, /* not sparse */ | ||
1701 | batch); /* batch handle */ | ||
1702 | } | ||
1703 | |||
1704 | g->ops.mm.gmmu_unmap(vm, | 1419 | g->ops.mm.gmmu_unmap(vm, |
1705 | mapped_buffer->addr, | 1420 | mapped_buffer->addr, |
1706 | mapped_buffer->size, | 1421 | mapped_buffer->size, |