summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c622
1 files changed, 59 insertions, 563 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 72a3ee13..84919d50 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -25,6 +25,7 @@
25#include <uapi/linux/nvgpu.h> 25#include <uapi/linux/nvgpu.h>
26#include <trace/events/gk20a.h> 26#include <trace/events/gk20a.h>
27 27
28#include <nvgpu/vm.h>
28#include <nvgpu/dma.h> 29#include <nvgpu/dma.h>
29#include <nvgpu/kmem.h> 30#include <nvgpu/kmem.h>
30#include <nvgpu/timers.h> 31#include <nvgpu/timers.h>
@@ -121,25 +122,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl)
121 * 122 *
122 */ 123 */
123 124
124static inline int vm_aspace_id(struct vm_gk20a *vm)
125{
126 /* -1 is bar1 or pmu, etc. */
127 return vm->as_share ? vm->as_share->id : -1;
128}
129static inline u32 hi32(u64 f)
130{
131 return (u32)(f >> 32);
132}
133static inline u32 lo32(u64 f)
134{
135 return (u32)(f & 0xffffffff);
136}
137
138static struct mapped_buffer_node *find_mapped_buffer_locked(
139 struct nvgpu_rbtree_node *root, u64 addr);
140static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
141 struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
142 u32 kind);
143static int update_gmmu_ptes_locked(struct vm_gk20a *vm, 125static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
144 enum gmmu_pgsz_gk20a pgsz_idx, 126 enum gmmu_pgsz_gk20a pgsz_idx,
145 struct sg_table *sgt, u64 buffer_offset, 127 struct sg_table *sgt, u64 buffer_offset,
@@ -316,13 +298,13 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
316 *comptags = priv->comptags; 298 *comptags = priv->comptags;
317} 299}
318 300
319static int gk20a_alloc_comptags(struct gk20a *g, 301int gk20a_alloc_comptags(struct gk20a *g,
320 struct device *dev, 302 struct device *dev,
321 struct dma_buf *dmabuf, 303 struct dma_buf *dmabuf,
322 struct gk20a_comptag_allocator *allocator, 304 struct gk20a_comptag_allocator *allocator,
323 u32 lines, bool user_mappable, 305 u32 lines, bool user_mappable,
324 u64 *ctag_map_win_size, 306 u64 *ctag_map_win_size,
325 u32 *ctag_map_win_ctagline) 307 u32 *ctag_map_win_ctagline)
326{ 308{
327 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 309 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
328 u32 ctaglines_allocsize; 310 u32 ctaglines_allocsize;
@@ -493,9 +475,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
493 g->ops.mm.remove_bar2_vm(g); 475 g->ops.mm.remove_bar2_vm(g);
494 476
495 if (g->ops.mm.is_bar1_supported(g)) 477 if (g->ops.mm.is_bar1_supported(g))
496 gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); 478 nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
497 479
498 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); 480 nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
499 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); 481 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
500 nvgpu_vm_remove_support_nofree(&mm->cde.vm); 482 nvgpu_vm_remove_support_nofree(&mm->cde.vm);
501 483
@@ -1097,11 +1079,11 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
1097} 1079}
1098 1080
1099int nvgpu_vm_get_buffers(struct vm_gk20a *vm, 1081int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
1100 struct mapped_buffer_node ***mapped_buffers, 1082 struct nvgpu_mapped_buf ***mapped_buffers,
1101 int *num_buffers) 1083 int *num_buffers)
1102{ 1084{
1103 struct mapped_buffer_node *mapped_buffer; 1085 struct nvgpu_mapped_buf *mapped_buffer;
1104 struct mapped_buffer_node **buffer_list; 1086 struct nvgpu_mapped_buf **buffer_list;
1105 struct nvgpu_rbtree_node *node = NULL; 1087 struct nvgpu_rbtree_node *node = NULL;
1106 int i = 0; 1088 int i = 0;
1107 1089
@@ -1141,15 +1123,15 @@ int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
1141 return 0; 1123 return 0;
1142} 1124}
1143 1125
1144static void gk20a_vm_unmap_locked_kref(struct kref *ref) 1126void gk20a_vm_unmap_locked_kref(struct kref *ref)
1145{ 1127{
1146 struct mapped_buffer_node *mapped_buffer = 1128 struct nvgpu_mapped_buf *mapped_buffer =
1147 container_of(ref, struct mapped_buffer_node, ref); 1129 container_of(ref, struct nvgpu_mapped_buf, ref);
1148 nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); 1130 nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
1149} 1131}
1150 1132
1151void nvgpu_vm_put_buffers(struct vm_gk20a *vm, 1133void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
1152 struct mapped_buffer_node **mapped_buffers, 1134 struct nvgpu_mapped_buf **mapped_buffers,
1153 int num_buffers) 1135 int num_buffers)
1154{ 1136{
1155 int i; 1137 int i;
@@ -1177,11 +1159,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
1177 struct vm_gk20a_mapping_batch *batch) 1159 struct vm_gk20a_mapping_batch *batch)
1178{ 1160{
1179 struct gk20a *g = vm->mm->g; 1161 struct gk20a *g = vm->mm->g;
1180 struct mapped_buffer_node *mapped_buffer; 1162 struct nvgpu_mapped_buf *mapped_buffer;
1181 1163
1182 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1164 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1183 1165
1184 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset); 1166 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
1185 if (!mapped_buffer) { 1167 if (!mapped_buffer) {
1186 nvgpu_mutex_release(&vm->update_gmmu_lock); 1168 nvgpu_mutex_release(&vm->update_gmmu_lock);
1187 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); 1169 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
@@ -1273,100 +1255,10 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1273 return 0; 1255 return 0;
1274} 1256}
1275 1257
1276 1258int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1277static void remove_mapped_buffer(struct vm_gk20a *vm, 1259 u32 flags,
1278 struct mapped_buffer_node *mapped_buffer) 1260 struct buffer_attrs *bfr,
1279{ 1261 enum gmmu_pgsz_gk20a pgsz_idx)
1280 nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
1281}
1282
1283static int insert_mapped_buffer(struct vm_gk20a *vm,
1284 struct mapped_buffer_node *mapped_buffer)
1285{
1286 mapped_buffer->node.key_start = mapped_buffer->addr;
1287 mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
1288
1289 nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
1290
1291 return 0;
1292}
1293
1294static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
1295 struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
1296 u32 kind)
1297{
1298 struct nvgpu_rbtree_node *node = NULL;
1299
1300 nvgpu_rbtree_enum_start(0, &node, root);
1301
1302 while (node) {
1303 struct mapped_buffer_node *mapped_buffer =
1304 mapped_buffer_from_rbtree_node(node);
1305
1306 if (mapped_buffer->dmabuf == dmabuf &&
1307 kind == mapped_buffer->kind)
1308 return mapped_buffer;
1309
1310 nvgpu_rbtree_enum_next(&node, node);
1311 }
1312
1313 return NULL;
1314}
1315
1316static struct mapped_buffer_node *find_mapped_buffer_locked(
1317 struct nvgpu_rbtree_node *root, u64 addr)
1318{
1319 struct nvgpu_rbtree_node *node = NULL;
1320
1321 nvgpu_rbtree_search(addr, &node, root);
1322 if (!node)
1323 return NULL;
1324
1325 return mapped_buffer_from_rbtree_node(node);
1326}
1327
1328static struct mapped_buffer_node *find_mapped_buffer_range_locked(
1329 struct nvgpu_rbtree_node *root, u64 addr)
1330{
1331 struct nvgpu_rbtree_node *node = NULL;
1332
1333 nvgpu_rbtree_range_search(addr, &node, root);
1334 if (!node)
1335 return NULL;
1336
1337 return mapped_buffer_from_rbtree_node(node);
1338}
1339
1340/* find the first mapped buffer with GPU VA less than addr */
1341static struct mapped_buffer_node *find_mapped_buffer_less_than_locked(
1342 struct nvgpu_rbtree_node *root, u64 addr)
1343{
1344 struct nvgpu_rbtree_node *node = NULL;
1345
1346 nvgpu_rbtree_less_than_search(addr, &node, root);
1347 if (!node)
1348 return NULL;
1349
1350 return mapped_buffer_from_rbtree_node(node);
1351}
1352
1353struct buffer_attrs {
1354 struct sg_table *sgt;
1355 u64 size;
1356 u64 align;
1357 u32 ctag_offset;
1358 u32 ctag_lines;
1359 u32 ctag_allocated_lines;
1360 int pgsz_idx;
1361 u8 kind_v;
1362 u8 uc_kind_v;
1363 bool ctag_user_mappable;
1364};
1365
1366static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1367 u32 flags,
1368 struct buffer_attrs *bfr,
1369 enum gmmu_pgsz_gk20a pgsz_idx)
1370{ 1262{
1371 bool kind_compressible; 1263 bool kind_compressible;
1372 struct gk20a *g = gk20a_from_vm(vm); 1264 struct gk20a *g = gk20a_from_vm(vm);
@@ -1409,14 +1301,14 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1409 return 0; 1301 return 0;
1410} 1302}
1411 1303
1412static int validate_fixed_buffer(struct vm_gk20a *vm, 1304int validate_fixed_buffer(struct vm_gk20a *vm,
1413 struct buffer_attrs *bfr, 1305 struct buffer_attrs *bfr,
1414 u64 map_offset, u64 map_size, 1306 u64 map_offset, u64 map_size,
1415 struct vm_reserved_va_node **pva_node) 1307 struct vm_reserved_va_node **pva_node)
1416{ 1308{
1417 struct gk20a *g = vm->mm->g; 1309 struct gk20a *g = vm->mm->g;
1418 struct vm_reserved_va_node *va_node; 1310 struct vm_reserved_va_node *va_node;
1419 struct mapped_buffer_node *buffer; 1311 struct nvgpu_mapped_buf *buffer;
1420 u64 map_end = map_offset + map_size; 1312 u64 map_end = map_offset + map_size;
1421 1313
1422 /* can wrap around with insane map_size; zero is disallowed too */ 1314 /* can wrap around with insane map_size; zero is disallowed too */
@@ -1448,8 +1340,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1448 /* check that this mapping does not collide with existing 1340 /* check that this mapping does not collide with existing
1449 * mappings by checking the buffer with the highest GPU VA 1341 * mappings by checking the buffer with the highest GPU VA
1450 * that is less than our buffer end */ 1342 * that is less than our buffer end */
1451 buffer = find_mapped_buffer_less_than_locked( 1343 buffer = __nvgpu_vm_find_mapped_buf_less_than(
1452 vm->mapped_buffers, map_offset + map_size); 1344 vm, map_offset + map_size);
1453 if (buffer && buffer->addr + buffer->size > map_offset) { 1345 if (buffer && buffer->addr + buffer->size > map_offset) {
1454 nvgpu_warn(g, "overlapping buffer map requested"); 1346 nvgpu_warn(g, "overlapping buffer map requested");
1455 return -EINVAL; 1347 return -EINVAL;
@@ -1499,11 +1391,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1499 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " 1391 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
1500 "pgsz=%-3dKb as=%-2d ctags=%d start=%d " 1392 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
1501 "kind=0x%x flags=0x%x apt=%s", 1393 "kind=0x%x flags=0x%x apt=%s",
1502 hi32(map_offset), lo32(map_offset), size, 1394 u64_hi32(map_offset), u64_lo32(map_offset), size,
1503 sgt ? hi32((u64)sg_dma_address(sgt->sgl)) : 0, 1395 sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
1504 sgt ? lo32((u64)sg_dma_address(sgt->sgl)) : 0, 1396 sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
1505 sgt ? hi32((u64)sg_phys(sgt->sgl)) : 0, 1397 sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
1506 sgt ? lo32((u64)sg_phys(sgt->sgl)) : 0, 1398 sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
1507 vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), 1399 vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
1508 ctag_lines, ctag_offset, 1400 ctag_lines, ctag_offset,
1509 kind_v, flags, nvgpu_aperture_str(aperture)); 1401 kind_v, flags, nvgpu_aperture_str(aperture));
@@ -1595,8 +1487,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1595 } 1487 }
1596} 1488}
1597 1489
1598static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, 1490enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1599 struct dma_buf *dmabuf) 1491 struct dma_buf *dmabuf)
1600{ 1492{
1601 struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf); 1493 struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf);
1602 if (buf_owner == NULL) { 1494 if (buf_owner == NULL) {
@@ -1617,80 +1509,6 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1617 } 1509 }
1618} 1510}
1619 1511
1620static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm,
1621 struct dma_buf *dmabuf,
1622 u64 offset_align,
1623 u32 flags,
1624 int kind,
1625 struct sg_table **sgt,
1626 bool user_mapped,
1627 int rw_flag)
1628{
1629 struct gk20a *g = gk20a_from_vm(vm);
1630 struct mapped_buffer_node *mapped_buffer = NULL;
1631
1632 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1633 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers,
1634 offset_align);
1635 if (!mapped_buffer)
1636 return 0;
1637
1638 if (mapped_buffer->dmabuf != dmabuf ||
1639 mapped_buffer->kind != (u32)kind)
1640 return 0;
1641 } else {
1642 mapped_buffer =
1643 find_mapped_buffer_reverse_locked(vm->mapped_buffers,
1644 dmabuf, kind);
1645 if (!mapped_buffer)
1646 return 0;
1647 }
1648
1649 if (mapped_buffer->flags != flags)
1650 return 0;
1651
1652 BUG_ON(mapped_buffer->vm != vm);
1653
1654 /* mark the buffer as used */
1655 if (user_mapped) {
1656 if (mapped_buffer->user_mapped == 0)
1657 vm->num_user_mapped_buffers++;
1658 mapped_buffer->user_mapped++;
1659
1660 /* If the mapping comes from user space, we own
1661 * the handle ref. Since we reuse an
1662 * existing mapping here, we need to give back those
1663 * refs once in order not to leak.
1664 */
1665 if (mapped_buffer->own_mem_ref)
1666 dma_buf_put(mapped_buffer->dmabuf);
1667 else
1668 mapped_buffer->own_mem_ref = true;
1669 }
1670 kref_get(&mapped_buffer->ref);
1671
1672 gk20a_dbg(gpu_dbg_map,
1673 "gv: 0x%04x_%08x + 0x%-7zu "
1674 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
1675 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
1676 "flags=0x%x apt=%s (reused)",
1677 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1678 dmabuf->size,
1679 hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1680 lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1681 hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1682 lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1683 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
1684 vm_aspace_id(vm),
1685 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
1686 mapped_buffer->flags,
1687 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
1688
1689 if (sgt)
1690 *sgt = mapped_buffer->sgt;
1691 return mapped_buffer->addr;
1692}
1693
1694#if defined(CONFIG_GK20A_VIDMEM) 1512#if defined(CONFIG_GK20A_VIDMEM)
1695static struct sg_table *gk20a_vidbuf_map_dma_buf( 1513static struct sg_table *gk20a_vidbuf_map_dma_buf(
1696 struct dma_buf_attachment *attach, enum dma_data_direction dir) 1514 struct dma_buf_attachment *attach, enum dma_data_direction dir)
@@ -1919,310 +1737,6 @@ int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
1919#endif 1737#endif
1920} 1738}
1921 1739
1922static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
1923 enum nvgpu_aperture aperture)
1924{
1925 u64 align = 0, chunk_align = 0;
1926 u64 buf_addr;
1927
1928 if (aperture == APERTURE_VIDMEM) {
1929 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
1930 struct page_alloc_chunk *chunk = NULL;
1931
1932 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
1933 page_alloc_chunk, list_entry) {
1934 chunk_align = 1ULL << __ffs(chunk->base | chunk->length);
1935
1936 if (align)
1937 align = min(align, chunk_align);
1938 else
1939 align = chunk_align;
1940 }
1941
1942 return align;
1943 }
1944
1945 buf_addr = (u64)sg_dma_address(sgl);
1946
1947 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
1948 while (sgl) {
1949 buf_addr = (u64)sg_phys(sgl);
1950 chunk_align = 1ULL << __ffs(buf_addr | (u64)sgl->length);
1951
1952 if (align)
1953 align = min(align, chunk_align);
1954 else
1955 align = chunk_align;
1956 sgl = sg_next(sgl);
1957 }
1958
1959 return align;
1960 }
1961
1962 align = 1ULL << __ffs(buf_addr);
1963
1964 return align;
1965}
1966
1967u64 nvgpu_vm_map(struct vm_gk20a *vm,
1968 struct dma_buf *dmabuf,
1969 u64 offset_align,
1970 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
1971 int kind,
1972 struct sg_table **sgt,
1973 bool user_mapped,
1974 int rw_flag,
1975 u64 buffer_offset,
1976 u64 mapping_size,
1977 struct vm_gk20a_mapping_batch *batch)
1978{
1979 struct gk20a *g = gk20a_from_vm(vm);
1980 struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
1981 struct device *d = dev_from_vm(vm);
1982 struct mapped_buffer_node *mapped_buffer = NULL;
1983 bool inserted = false, va_allocated = false;
1984 u64 map_offset = 0;
1985 int err = 0;
1986 struct buffer_attrs bfr = {NULL};
1987 struct gk20a_comptags comptags;
1988 bool clear_ctags = false;
1989 struct scatterlist *sgl;
1990 u64 ctag_map_win_size = 0;
1991 u32 ctag_map_win_ctagline = 0;
1992 struct vm_reserved_va_node *va_node = NULL;
1993 u32 ctag_offset;
1994 enum nvgpu_aperture aperture;
1995
1996 if (user_mapped && vm->userspace_managed &&
1997 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
1998 nvgpu_err(g,
1999 "%s: non-fixed-offset mapping not available on userspace managed address spaces",
2000 __func__);
2001 return -EFAULT;
2002 }
2003
2004 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2005
2006 /* check if this buffer is already mapped */
2007 if (!vm->userspace_managed) {
2008 map_offset = nvgpu_vm_map_duplicate_locked(
2009 vm, dmabuf, offset_align,
2010 flags, kind, sgt,
2011 user_mapped, rw_flag);
2012 if (map_offset) {
2013 nvgpu_mutex_release(&vm->update_gmmu_lock);
2014 return map_offset;
2015 }
2016 }
2017
2018 /* pin buffer to get phys/iovmm addr */
2019 bfr.sgt = gk20a_mm_pin(d, dmabuf);
2020 if (IS_ERR(bfr.sgt)) {
2021 /* Falling back to physical is actually possible
2022 * here in many cases if we use 4K phys pages in the
2023 * gmmu. However we have some regions which require
2024 * contig regions to work properly (either phys-contig
2025 * or contig through smmu io_vaspace). Until we can
2026 * track the difference between those two cases we have
2027 * to fail the mapping when we run out of SMMU space.
2028 */
2029 nvgpu_warn(g, "oom allocating tracking buffer");
2030 goto clean_up;
2031 }
2032
2033 if (sgt)
2034 *sgt = bfr.sgt;
2035
2036 bfr.kind_v = kind;
2037 bfr.size = dmabuf->size;
2038 sgl = bfr.sgt->sgl;
2039
2040 aperture = gk20a_dmabuf_aperture(g, dmabuf);
2041 if (aperture == APERTURE_INVALID) {
2042 err = -EINVAL;
2043 goto clean_up;
2044 }
2045
2046 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
2047 map_offset = offset_align;
2048
2049 bfr.align = gk20a_mm_get_align(g, sgl, aperture);
2050 bfr.pgsz_idx = __get_pte_size(vm, map_offset,
2051 min_t(u64, bfr.size, bfr.align));
2052 mapping_size = mapping_size ? mapping_size : bfr.size;
2053
2054 /* Check if we should use a fixed offset for mapping this buffer */
2055 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2056 err = validate_fixed_buffer(vm, &bfr,
2057 offset_align, mapping_size,
2058 &va_node);
2059 if (err)
2060 goto clean_up;
2061
2062 map_offset = offset_align;
2063 va_allocated = false;
2064 } else
2065 va_allocated = true;
2066
2067 if (sgt)
2068 *sgt = bfr.sgt;
2069
2070 err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
2071 if (unlikely(err)) {
2072 nvgpu_err(g, "failure setting up kind and compression");
2073 goto clean_up;
2074 }
2075
2076 /* bar1 and pmu vm don't need ctag */
2077 if (!vm->enable_ctag)
2078 bfr.ctag_lines = 0;
2079
2080 gk20a_get_comptags(d, dmabuf, &comptags);
2081
2082 /* ensure alignment to compression page size if compression enabled */
2083 if (bfr.ctag_offset)
2084 mapping_size = ALIGN(mapping_size,
2085 g->ops.fb.compression_page_size(g));
2086
2087 if (bfr.ctag_lines && !comptags.lines) {
2088 const bool user_mappable =
2089 !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
2090
2091 /* allocate compression resources if needed */
2092 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
2093 bfr.ctag_lines, user_mappable,
2094 &ctag_map_win_size,
2095 &ctag_map_win_ctagline);
2096 if (err) {
2097 /* ok to fall back here if we ran out */
2098 /* TBD: we can partially alloc ctags as well... */
2099 bfr.kind_v = bfr.uc_kind_v;
2100 } else {
2101 gk20a_get_comptags(d, dmabuf, &comptags);
2102
2103 if (g->ops.ltc.cbc_ctrl)
2104 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
2105 comptags.offset,
2106 comptags.offset +
2107 comptags.allocated_lines - 1);
2108 else
2109 clear_ctags = true;
2110 }
2111 }
2112
2113 /* store the comptag info */
2114 bfr.ctag_offset = comptags.offset;
2115 bfr.ctag_lines = comptags.lines;
2116 bfr.ctag_allocated_lines = comptags.allocated_lines;
2117 bfr.ctag_user_mappable = comptags.user_mappable;
2118
2119 /*
2120 * Calculate comptag index for this mapping. Differs in
2121 * case of partial mapping.
2122 */
2123 ctag_offset = comptags.offset;
2124 if (ctag_offset)
2125 ctag_offset += buffer_offset >>
2126 ilog2(g->ops.fb.compression_page_size(g));
2127
2128 /* update gmmu ptes */
2129 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
2130 bfr.sgt,
2131 buffer_offset, /* sg offset */
2132 mapping_size,
2133 bfr.pgsz_idx,
2134 bfr.kind_v,
2135 ctag_offset,
2136 flags, rw_flag,
2137 clear_ctags,
2138 false,
2139 false,
2140 batch,
2141 aperture);
2142 if (!map_offset)
2143 goto clean_up;
2144
2145#if defined(NVHOST_DEBUG)
2146 {
2147 int i;
2148 struct scatterlist *sg = NULL;
2149 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
2150 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
2151 u64 da = sg_dma_address(sg);
2152 u64 pa = sg_phys(sg);
2153 u64 len = sg->length;
2154 gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
2155 i, hi32(pa), lo32(pa), hi32(da), lo32(da),
2156 hi32(len), lo32(len));
2157 }
2158 }
2159#endif
2160
2161 /* keep track of the buffer for unmapping */
2162 /* TBD: check for multiple mapping of same buffer */
2163 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
2164 if (!mapped_buffer) {
2165 nvgpu_warn(g, "oom allocating tracking buffer");
2166 goto clean_up;
2167 }
2168 mapped_buffer->dmabuf = dmabuf;
2169 mapped_buffer->sgt = bfr.sgt;
2170 mapped_buffer->addr = map_offset;
2171 mapped_buffer->size = mapping_size;
2172 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
2173 mapped_buffer->ctag_offset = bfr.ctag_offset;
2174 mapped_buffer->ctag_lines = bfr.ctag_lines;
2175 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
2176 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
2177 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
2178 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
2179 mapped_buffer->vm = vm;
2180 mapped_buffer->flags = flags;
2181 mapped_buffer->kind = kind;
2182 mapped_buffer->va_allocated = va_allocated;
2183 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
2184 mapped_buffer->own_mem_ref = user_mapped;
2185 nvgpu_init_list_node(&mapped_buffer->va_buffers_list);
2186 kref_init(&mapped_buffer->ref);
2187
2188 err = insert_mapped_buffer(vm, mapped_buffer);
2189 if (err) {
2190 nvgpu_err(g, "failed to insert into mapped buffer tree");
2191 goto clean_up;
2192 }
2193 inserted = true;
2194 if (user_mapped)
2195 vm->num_user_mapped_buffers++;
2196
2197 gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
2198
2199 if (va_node) {
2200 nvgpu_list_add_tail(&mapped_buffer->va_buffers_list,
2201 &va_node->va_buffers_list);
2202 mapped_buffer->va_node = va_node;
2203 }
2204
2205 nvgpu_mutex_release(&vm->update_gmmu_lock);
2206
2207 return map_offset;
2208
2209clean_up:
2210 if (inserted) {
2211 remove_mapped_buffer(vm, mapped_buffer);
2212 if (user_mapped)
2213 vm->num_user_mapped_buffers--;
2214 }
2215 nvgpu_kfree(g, mapped_buffer);
2216 if (va_allocated)
2217 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
2218 if (!IS_ERR(bfr.sgt))
2219 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
2220
2221 nvgpu_mutex_release(&vm->update_gmmu_lock);
2222 gk20a_dbg_info("err=%d\n", err);
2223 return 0;
2224}
2225
2226int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, 1740int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
2227 u64 mapping_gva, 1741 u64 mapping_gva,
2228 u64 *compbits_win_size, 1742 u64 *compbits_win_size,
@@ -2230,12 +1744,12 @@ int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
2230 u32 *mapping_ctagline, 1744 u32 *mapping_ctagline,
2231 u32 *flags) 1745 u32 *flags)
2232{ 1746{
2233 struct mapped_buffer_node *mapped_buffer; 1747 struct nvgpu_mapped_buf *mapped_buffer;
2234 struct gk20a *g = vm->mm->g; 1748 struct gk20a *g = vm->mm->g;
2235 1749
2236 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1750 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2237 1751
2238 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva); 1752 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva);
2239 1753
2240 if (!mapped_buffer || !mapped_buffer->user_mapped) 1754 if (!mapped_buffer || !mapped_buffer->user_mapped)
2241 { 1755 {
@@ -2271,7 +1785,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
2271 u64 *mapping_iova, 1785 u64 *mapping_iova,
2272 u32 flags) 1786 u32 flags)
2273{ 1787{
2274 struct mapped_buffer_node *mapped_buffer; 1788 struct nvgpu_mapped_buf *mapped_buffer;
2275 struct gk20a *g = gk20a_from_vm(vm); 1789 struct gk20a *g = gk20a_from_vm(vm);
2276 const bool fixed_mapping = 1790 const bool fixed_mapping =
2277 (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0; 1791 (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
@@ -2292,8 +1806,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
2292 1806
2293 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1807 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2294 1808
2295 mapped_buffer = 1809 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva);
2296 find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva);
2297 1810
2298 if (!mapped_buffer || !mapped_buffer->user_mapped) { 1811 if (!mapped_buffer || !mapped_buffer->user_mapped) {
2299 nvgpu_mutex_release(&vm->update_gmmu_lock); 1812 nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -2537,12 +2050,12 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
2537 2050
2538dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) 2051dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
2539{ 2052{
2540 struct mapped_buffer_node *buffer; 2053 struct nvgpu_mapped_buf *buffer;
2541 dma_addr_t addr = 0; 2054 dma_addr_t addr = 0;
2542 struct gk20a *g = gk20a_from_vm(vm); 2055 struct gk20a *g = gk20a_from_vm(vm);
2543 2056
2544 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 2057 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2545 buffer = find_mapped_buffer_locked(vm->mapped_buffers, gpu_vaddr); 2058 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
2546 if (buffer) 2059 if (buffer)
2547 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, 2060 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
2548 buffer->flags); 2061 buffer->flags);
@@ -3026,7 +2539,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3026} 2539}
3027 2540
3028/* NOTE! mapped_buffers lock must be held */ 2541/* NOTE! mapped_buffers lock must be held */
3029void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, 2542void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
3030 struct vm_gk20a_mapping_batch *batch) 2543 struct vm_gk20a_mapping_batch *batch)
3031{ 2544{
3032 struct vm_gk20a *vm = mapped_buffer->vm; 2545 struct vm_gk20a *vm = mapped_buffer->vm;
@@ -3057,7 +2570,7 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3057 2570
3058 gk20a_dbg(gpu_dbg_map, 2571 gk20a_dbg(gpu_dbg_map,
3059 "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d", 2572 "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d",
3060 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), 2573 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
3061 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 2574 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
3062 vm_aspace_id(vm), 2575 vm_aspace_id(vm),
3063 mapped_buffer->own_mem_ref); 2576 mapped_buffer->own_mem_ref);
@@ -3066,9 +2579,9 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3066 mapped_buffer->sgt); 2579 mapped_buffer->sgt);
3067 2580
3068 /* remove from mapped buffer tree and remove list, free */ 2581 /* remove from mapped buffer tree and remove list, free */
3069 remove_mapped_buffer(vm, mapped_buffer); 2582 nvgpu_remove_mapped_buf(vm, mapped_buffer);
3070 if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list)) 2583 if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
3071 nvgpu_list_del(&mapped_buffer->va_buffers_list); 2584 nvgpu_list_del(&mapped_buffer->buffer_list);
3072 2585
3073 /* keep track of mapped buffers */ 2586 /* keep track of mapped buffers */
3074 if (mapped_buffer->user_mapped) 2587 if (mapped_buffer->user_mapped)
@@ -3082,22 +2595,6 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3082 return; 2595 return;
3083} 2596}
3084 2597
3085void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
3086{
3087 struct gk20a *g = vm->mm->g;
3088 struct mapped_buffer_node *mapped_buffer;
3089
3090 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3091 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset);
3092 if (!mapped_buffer) {
3093 nvgpu_mutex_release(&vm->update_gmmu_lock);
3094 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
3095 return;
3096 }
3097
3098 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
3099 nvgpu_mutex_release(&vm->update_gmmu_lock);
3100}
3101 2598
3102static void gk20a_vm_free_entries(struct vm_gk20a *vm, 2599static void gk20a_vm_free_entries(struct vm_gk20a *vm,
3103 struct gk20a_mm_entry *parent, 2600 struct gk20a_mm_entry *parent,
@@ -3659,7 +3156,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3659 va_node->vaddr_start = vaddr_start; 3156 va_node->vaddr_start = vaddr_start;
3660 va_node->size = (u64)args->page_size * (u64)args->pages; 3157 va_node->size = (u64)args->page_size * (u64)args->pages;
3661 va_node->pgsz_idx = pgsz_idx; 3158 va_node->pgsz_idx = pgsz_idx;
3662 nvgpu_init_list_node(&va_node->va_buffers_list); 3159 nvgpu_init_list_node(&va_node->buffer_list_head);
3663 nvgpu_init_list_node(&va_node->reserved_va_list); 3160 nvgpu_init_list_node(&va_node->reserved_va_list);
3664 3161
3665 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3162 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
@@ -3723,15 +3220,15 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3723 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3220 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3724 va_node = addr_to_reservation(vm, args->offset); 3221 va_node = addr_to_reservation(vm, args->offset);
3725 if (va_node) { 3222 if (va_node) {
3726 struct mapped_buffer_node *buffer, *n; 3223 struct nvgpu_mapped_buf *buffer, *n;
3727 3224
3728 /* Decrement the ref count on all buffers in this va_node. This 3225 /* Decrement the ref count on all buffers in this va_node. This
3729 * allows userspace to let the kernel free mappings that are 3226 * allows userspace to let the kernel free mappings that are
3730 * only used by this va_node. */ 3227 * only used by this va_node. */
3731 nvgpu_list_for_each_entry_safe(buffer, n, 3228 nvgpu_list_for_each_entry_safe(buffer, n,
3732 &va_node->va_buffers_list, 3229 &va_node->buffer_list_head,
3733 mapped_buffer_node, va_buffers_list) { 3230 nvgpu_mapped_buf, buffer_list) {
3734 nvgpu_list_del(&buffer->va_buffers_list); 3231 nvgpu_list_del(&buffer->buffer_list);
3735 kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); 3232 kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref);
3736 } 3233 }
3737 3234
@@ -3887,7 +3384,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
3887 } 3384 }
3888 3385
3889 ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, 3386 ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
3890 flags, kind, NULL, true, 3387 flags, kind, true,
3891 gk20a_mem_flag_none, 3388 gk20a_mem_flag_none,
3892 buffer_offset, 3389 buffer_offset,
3893 mapping_size, 3390 mapping_size,
@@ -4296,18 +3793,17 @@ hw_was_off:
4296 gk20a_idle_nosuspend(g->dev); 3793 gk20a_idle_nosuspend(g->dev);
4297} 3794}
4298 3795
4299int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, 3796int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
4300 struct dma_buf **dmabuf, 3797 struct dma_buf **dmabuf,
4301 u64 *offset) 3798 u64 *offset)
4302{ 3799{
4303 struct mapped_buffer_node *mapped_buffer; 3800 struct nvgpu_mapped_buf *mapped_buffer;
4304 3801
4305 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); 3802 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
4306 3803
4307 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3804 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
4308 3805
4309 mapped_buffer = find_mapped_buffer_range_locked(vm->mapped_buffers, 3806 mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
4310 gpu_va);
4311 if (!mapped_buffer) { 3807 if (!mapped_buffer) {
4312 nvgpu_mutex_release(&vm->update_gmmu_lock); 3808 nvgpu_mutex_release(&vm->update_gmmu_lock);
4313 return -EINVAL; 3809 return -EINVAL;