summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c289
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h7
3 files changed, 2 insertions, 295 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 639ec4b5..0cd314d6 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -444,7 +444,6 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
444 444
445 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; 445 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
446 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG; 446 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
447 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
448 447
449 if (g->ops.clk_arb.get_arbiter_clk_domains) 448 if (g->ops.clk_arb.get_arbiter_clk_domains)
450 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS; 449 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 3030c170..2d69a2aa 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -296,19 +296,11 @@ int gk20a_alloc_comptags(struct gk20a *g,
296 struct device *dev, 296 struct device *dev,
297 struct dma_buf *dmabuf, 297 struct dma_buf *dmabuf,
298 struct gk20a_comptag_allocator *allocator, 298 struct gk20a_comptag_allocator *allocator,
299 u32 lines, bool user_mappable, 299 u32 lines)
300 u64 *ctag_map_win_size,
301 u32 *ctag_map_win_ctagline)
302{ 300{
303 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 301 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
304 u32 ctaglines_allocsize; 302 u32 ctaglines_allocsize;
305 u32 ctagline_align;
306 u32 offset; 303 u32 offset;
307 u32 alignment_lines;
308 const u32 aggregate_cacheline_sz =
309 g->gr.cacheline_size * g->gr.slices_per_ltc *
310 g->ltc_count;
311 const u32 small_pgsz = 4096;
312 int err; 304 int err;
313 305
314 if (!priv) 306 if (!priv)
@@ -317,42 +309,7 @@ int gk20a_alloc_comptags(struct gk20a *g,
317 if (!lines) 309 if (!lines)
318 return -EINVAL; 310 return -EINVAL;
319 311
320 if (!user_mappable) { 312 ctaglines_allocsize = lines;
321 ctaglines_allocsize = lines;
322 ctagline_align = 1;
323 } else {
324 /*
325 * For security, align the allocation on a page, and reserve
326 * whole pages. Unfortunately, we cannot ask the allocator to
327 * align here, since compbits per cacheline is not always a
328 * power of two. So, we just have to allocate enough extra that
329 * we're guaranteed to find a ctagline inside the allocation so
330 * that: 1) it is the first ctagline in a cacheline that starts
331 * at a page boundary, and 2) we can add enough overallocation
332 * that the ctaglines of the succeeding allocation are on
333 * different page than ours.
334 */
335
336 ctagline_align =
337 (lcm(aggregate_cacheline_sz, small_pgsz) /
338 aggregate_cacheline_sz) *
339 g->gr.comptags_per_cacheline;
340
341 ctaglines_allocsize =
342 /* for alignment */
343 ctagline_align +
344
345 /* lines rounded up to cachelines */
346 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
347 g->gr.comptags_per_cacheline +
348
349 /* trail-padding */
350 DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) *
351 g->gr.comptags_per_cacheline;
352
353 if (ctaglines_allocsize < lines)
354 return -EINVAL; /* integer overflow */
355 }
356 313
357 /* store the allocator so we can use it when we free the ctags */ 314 /* store the allocator so we can use it when we free the ctags */
358 priv->comptag_allocator = allocator; 315 priv->comptag_allocator = allocator;
@@ -361,53 +318,9 @@ int gk20a_alloc_comptags(struct gk20a *g,
361 if (err) 318 if (err)
362 return err; 319 return err;
363 320
364 /*
365 * offset needs to be at the start of a page/cacheline boundary;
366 * prune the preceding ctaglines that were allocated for alignment.
367 */
368 alignment_lines =
369 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - offset;
370 if (alignment_lines) {
371 gk20a_comptaglines_free(allocator, offset, alignment_lines);
372 offset += alignment_lines;
373 ctaglines_allocsize -= alignment_lines;
374 }
375
376 /*
377 * check if we can prune the trailing, too; we just need to reserve
378 * whole pages and ctagcachelines.
379 */
380 if (user_mappable) {
381 u32 needed_cachelines =
382 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
383 u32 needed_bytes = round_up(needed_cachelines *
384 aggregate_cacheline_sz,
385 small_pgsz);
386 u32 first_unneeded_cacheline =
387 DIV_ROUND_UP(needed_bytes, aggregate_cacheline_sz);
388 u32 needed_ctaglines = first_unneeded_cacheline *
389 g->gr.comptags_per_cacheline;
390 u64 win_size;
391
392 if (needed_ctaglines < ctaglines_allocsize) {
393 gk20a_comptaglines_free(allocator,
394 offset + needed_ctaglines,
395 ctaglines_allocsize - needed_ctaglines);
396 ctaglines_allocsize = needed_ctaglines;
397 }
398
399 *ctag_map_win_ctagline = offset;
400 win_size =
401 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
402 aggregate_cacheline_sz;
403
404 *ctag_map_win_size = round_up(win_size, small_pgsz);
405 }
406
407 priv->comptags.offset = offset; 321 priv->comptags.offset = offset;
408 priv->comptags.lines = lines; 322 priv->comptags.lines = lines;
409 priv->comptags.allocated_lines = ctaglines_allocsize; 323 priv->comptags.allocated_lines = ctaglines_allocsize;
410 priv->comptags.user_mappable = user_mappable;
411 324
412 return 0; 325 return 0;
413} 326}
@@ -1202,191 +1115,6 @@ int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
1202#endif 1115#endif
1203} 1116}
1204 1117
1205int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
1206 u64 mapping_gva,
1207 u64 *compbits_win_size,
1208 u32 *compbits_win_ctagline,
1209 u32 *mapping_ctagline,
1210 u32 *flags)
1211{
1212 struct nvgpu_mapped_buf *mapped_buffer;
1213 struct gk20a *g = vm->mm->g;
1214
1215 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1216
1217 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva);
1218
1219 if (!mapped_buffer || !mapped_buffer->user_mapped)
1220 {
1221 nvgpu_mutex_release(&vm->update_gmmu_lock);
1222 nvgpu_err(g, "%s: bad offset 0x%llx", __func__, mapping_gva);
1223 return -EFAULT;
1224 }
1225
1226 *compbits_win_size = 0;
1227 *compbits_win_ctagline = 0;
1228 *mapping_ctagline = 0;
1229 *flags = 0;
1230
1231 if (mapped_buffer->ctag_offset)
1232 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
1233
1234 if (mapped_buffer->ctags_mappable)
1235 {
1236 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
1237 *compbits_win_size = mapped_buffer->ctag_map_win_size;
1238 *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
1239 *mapping_ctagline = mapped_buffer->ctag_offset;
1240 }
1241
1242 nvgpu_mutex_release(&vm->update_gmmu_lock);
1243 return 0;
1244}
1245
1246
1247int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
1248 u64 mapping_gva,
1249 u64 *compbits_win_gva,
1250 u64 *mapping_iova,
1251 u32 flags)
1252{
1253 struct nvgpu_mapped_buf *mapped_buffer;
1254 struct gk20a *g = gk20a_from_vm(vm);
1255 const bool fixed_mapping =
1256 (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
1257
1258 if (vm->userspace_managed && !fixed_mapping) {
1259 nvgpu_err(g,
1260 "%s: non-fixed-offset mapping is not available on userspace managed address spaces",
1261 __func__);
1262 return -EFAULT;
1263 }
1264
1265 if (fixed_mapping && !vm->userspace_managed) {
1266 nvgpu_err(g,
1267 "%s: fixed-offset mapping is available only on userspace managed address spaces",
1268 __func__);
1269 return -EFAULT;
1270 }
1271
1272 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1273
1274 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva);
1275
1276 if (!mapped_buffer || !mapped_buffer->user_mapped) {
1277 nvgpu_mutex_release(&vm->update_gmmu_lock);
1278 nvgpu_err(g, "%s: bad offset 0x%llx", __func__, mapping_gva);
1279 return -EFAULT;
1280 }
1281
1282 if (!mapped_buffer->ctags_mappable) {
1283 nvgpu_mutex_release(&vm->update_gmmu_lock);
1284 nvgpu_err(g, "%s: comptags not mappable, offset 0x%llx",
1285 __func__, mapping_gva);
1286 return -EFAULT;
1287 }
1288
1289 if (!mapped_buffer->ctag_map_win_addr) {
1290 const u32 small_pgsz_index = 0; /* small pages, 4K */
1291 const u32 aggregate_cacheline_sz =
1292 g->gr.cacheline_size * g->gr.slices_per_ltc *
1293 g->ltc_count;
1294
1295 /* first aggregate cacheline to map */
1296 u32 cacheline_start; /* inclusive */
1297
1298 /* offset of the start cacheline (will be page aligned) */
1299 u64 cacheline_offset_start;
1300
1301 if (!mapped_buffer->ctag_map_win_size) {
1302 nvgpu_mutex_release(&vm->update_gmmu_lock);
1303 nvgpu_err(g,
1304 "%s: mapping 0x%llx does not have "
1305 "mappable comptags",
1306 __func__, mapping_gva);
1307 return -EFAULT;
1308 }
1309
1310 cacheline_start = mapped_buffer->ctag_offset /
1311 g->gr.comptags_per_cacheline;
1312 cacheline_offset_start =
1313 (u64)cacheline_start * aggregate_cacheline_sz;
1314
1315 if (fixed_mapping) {
1316 struct buffer_attrs bfr;
1317 int err;
1318 struct nvgpu_vm_area *vm_area = NULL;
1319
1320 memset(&bfr, 0, sizeof(bfr));
1321
1322 bfr.pgsz_idx = small_pgsz_index;
1323
1324 err = nvgpu_vm_area_validate_buffer(
1325 vm, *compbits_win_gva, mapped_buffer->ctag_map_win_size,
1326 bfr.pgsz_idx, &vm_area);
1327
1328 if (err) {
1329 nvgpu_mutex_release(&vm->update_gmmu_lock);
1330 return err;
1331 }
1332
1333 if (vm_area) {
1334 /* this would create a dangling GPU VA
1335 * pointer if the space is freed
1336 * before before the buffer is
1337 * unmapped */
1338 nvgpu_mutex_release(&vm->update_gmmu_lock);
1339 nvgpu_err(g,
1340 "%s: comptags cannot be mapped into allocated space",
1341 __func__);
1342 return -EINVAL;
1343 }
1344 }
1345
1346 mapped_buffer->ctag_map_win_addr =
1347 g->ops.mm.gmmu_map(
1348 vm,
1349 !fixed_mapping ? 0 : *compbits_win_gva, /* va */
1350 g->gr.compbit_store.mem.priv.sgt,
1351 cacheline_offset_start, /* sg offset */
1352 mapped_buffer->ctag_map_win_size, /* size */
1353 small_pgsz_index,
1354 0, /* kind */
1355 0, /* ctag_offset */
1356 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1357 gk20a_mem_flag_read_only,
1358 false, /* clear_ctags */
1359 false, /* sparse */
1360 false, /* priv */
1361 NULL, /* mapping_batch handle */
1362 g->gr.compbit_store.mem.aperture);
1363
1364 if (!mapped_buffer->ctag_map_win_addr) {
1365 nvgpu_mutex_release(&vm->update_gmmu_lock);
1366 nvgpu_err(g,
1367 "%s: failed to map comptags for mapping 0x%llx",
1368 __func__, mapping_gva);
1369 return -ENOMEM;
1370 }
1371 } else if (fixed_mapping && *compbits_win_gva &&
1372 mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
1373 nvgpu_mutex_release(&vm->update_gmmu_lock);
1374 nvgpu_err(g,
1375 "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
1376 "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
1377 __func__, mapping_gva,
1378 mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
1379 return -EINVAL;
1380 }
1381
1382 *mapping_iova = nvgpu_mem_get_addr_sgl(g, mapped_buffer->sgt->sgl);
1383 *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
1384
1385 nvgpu_mutex_release(&vm->update_gmmu_lock);
1386
1387 return 0;
1388}
1389
1390#if defined(CONFIG_GK20A_VIDMEM) 1118#if defined(CONFIG_GK20A_VIDMEM)
1391static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) 1119static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1392{ 1120{
@@ -1688,19 +1416,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
1688 struct vm_gk20a *vm = mapped_buffer->vm; 1416 struct vm_gk20a *vm = mapped_buffer->vm;
1689 struct gk20a *g = vm->mm->g; 1417 struct gk20a *g = vm->mm->g;
1690 1418
1691 if (mapped_buffer->ctag_map_win_addr) {
1692 /* unmap compbits */
1693
1694 g->ops.mm.gmmu_unmap(vm,
1695 mapped_buffer->ctag_map_win_addr,
1696 mapped_buffer->ctag_map_win_size,
1697 0, /* page size 4k */
1698 true, /* va allocated */
1699 gk20a_mem_flag_none,
1700 false, /* not sparse */
1701 batch); /* batch handle */
1702 }
1703
1704 g->ops.mm.gmmu_unmap(vm, 1419 g->ops.mm.gmmu_unmap(vm,
1705 mapped_buffer->addr, 1420 mapped_buffer->addr,
1706 mapped_buffer->size, 1421 mapped_buffer->size,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 82a4ee85..c77bebf8 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -389,13 +389,6 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
389void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, 389void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
390 struct sg_table *sgt); 390 struct sg_table *sgt);
391 391
392int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
393 u64 mapping_gva,
394 u64 *compbits_win_size,
395 u32 *compbits_win_ctagline,
396 u32 *mapping_ctagline,
397 u32 *flags);
398
399/* vm-as interface */ 392/* vm-as interface */
400struct nvgpu_as_alloc_space_args; 393struct nvgpu_as_alloc_space_args;
401struct nvgpu_as_free_space_args; 394struct nvgpu_as_free_space_args;