summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorKirill Artamonov <kartamonov@nvidia.com>2014-02-26 15:58:43 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:59 -0400
commitd364553f7c94b4caa649f5e83f709c1c31bc45bb (patch)
tree97b9349f3bab734ef7e1533156cbcfa4ee93feec /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent5dc7fd024305f736bd920bdc12d116d1228ad2db (diff)
gpu: nvgpu: implement mapping for sparse allocation
Implement support for partial buffer mappings. Whitelist gr_pri_bes_crop_hww_esr accessed by fec during sparse texture initialization. bug 1456562 bug 1369014 bug 1361532 Change-Id: Ib0d1ec6438257ac14b40c8466b37856b67e7e34d Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com> Reviewed-on: http://git-master/r/375012 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c72
1 files changed, 51 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 579ff568..ebd829d3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -107,7 +107,7 @@ static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
107 u32 kind); 107 u32 kind);
108static int update_gmmu_ptes_locked(struct vm_gk20a *vm, 108static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
109 enum gmmu_pgsz_gk20a pgsz_idx, 109 enum gmmu_pgsz_gk20a pgsz_idx,
110 struct sg_table *sgt, 110 struct sg_table *sgt, u64 buffer_offset,
111 u64 first_vaddr, u64 last_vaddr, 111 u64 first_vaddr, u64 last_vaddr,
112 u8 kind_v, u32 ctag_offset, bool cacheable, 112 u8 kind_v, u32 ctag_offset, bool cacheable,
113 int rw_flag); 113 int rw_flag);
@@ -1057,7 +1057,7 @@ static int setup_buffer_kind_and_compression(struct device *d,
1057 1057
1058static int validate_fixed_buffer(struct vm_gk20a *vm, 1058static int validate_fixed_buffer(struct vm_gk20a *vm,
1059 struct buffer_attrs *bfr, 1059 struct buffer_attrs *bfr,
1060 u64 map_offset) 1060 u64 map_offset, u64 map_size)
1061{ 1061{
1062 struct device *dev = dev_from_vm(vm); 1062 struct device *dev = dev_from_vm(vm);
1063 struct vm_reserved_va_node *va_node; 1063 struct vm_reserved_va_node *va_node;
@@ -1084,7 +1084,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1084 &va_node->va_buffers_list, va_buffers_list) { 1084 &va_node->va_buffers_list, va_buffers_list) {
1085 s64 begin = max(buffer->addr, map_offset); 1085 s64 begin = max(buffer->addr, map_offset);
1086 s64 end = min(buffer->addr + 1086 s64 end = min(buffer->addr +
1087 buffer->size, map_offset + bfr->size); 1087 buffer->size, map_offset + map_size);
1088 if (end - begin > 0) { 1088 if (end - begin > 0) {
1089 gk20a_warn(dev, "overlapping buffer map requested"); 1089 gk20a_warn(dev, "overlapping buffer map requested");
1090 return -EINVAL; 1090 return -EINVAL;
@@ -1097,6 +1097,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1097static u64 __locked_gmmu_map(struct vm_gk20a *vm, 1097static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1098 u64 map_offset, 1098 u64 map_offset,
1099 struct sg_table *sgt, 1099 struct sg_table *sgt,
1100 u64 buffer_offset,
1100 u64 size, 1101 u64 size,
1101 int pgsz_idx, 1102 int pgsz_idx,
1102 u8 kind_v, 1103 u8 kind_v,
@@ -1139,6 +1140,7 @@ static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1139 1140
1140 err = update_gmmu_ptes_locked(vm, pgsz_idx, 1141 err = update_gmmu_ptes_locked(vm, pgsz_idx,
1141 sgt, 1142 sgt,
1143 buffer_offset,
1142 map_offset, map_offset + size - 1, 1144 map_offset, map_offset + size - 1,
1143 kind_v, 1145 kind_v,
1144 ctag_offset, 1146 ctag_offset,
@@ -1182,6 +1184,7 @@ static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1182 err = update_gmmu_ptes_locked(vm, 1184 err = update_gmmu_ptes_locked(vm,
1183 pgsz_idx, 1185 pgsz_idx,
1184 0, /* n/a for unmap */ 1186 0, /* n/a for unmap */
1187 0,
1185 vaddr, 1188 vaddr,
1186 vaddr + size - 1, 1189 vaddr + size - 1,
1187 0, 0, false /* n/a for unmap */, 1190 0, 0, false /* n/a for unmap */,
@@ -1274,7 +1277,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1274 int kind, 1277 int kind,
1275 struct sg_table **sgt, 1278 struct sg_table **sgt,
1276 bool user_mapped, 1279 bool user_mapped,
1277 int rw_flag) 1280 int rw_flag,
1281 u64 buffer_offset,
1282 u64 mapping_size)
1278{ 1283{
1279 struct gk20a *g = gk20a_from_vm(vm); 1284 struct gk20a *g = gk20a_from_vm(vm);
1280 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; 1285 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
@@ -1324,6 +1329,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1324 buf_addr = (u64)sg_phys(bfr.sgt->sgl); 1329 buf_addr = (u64)sg_phys(bfr.sgt->sgl);
1325 bfr.align = 1 << __ffs(buf_addr); 1330 bfr.align = 1 << __ffs(buf_addr);
1326 bfr.pgsz_idx = -1; 1331 bfr.pgsz_idx = -1;
1332 mapping_size = mapping_size ? mapping_size : bfr.size;
1327 1333
1328 /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select 1334 /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
1329 * page size according to memory alignment */ 1335 * page size according to memory alignment */
@@ -1352,8 +1358,10 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1352 gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; 1358 gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1353 1359
1354 /* Check if we should use a fixed offset for mapping this buffer */ 1360 /* Check if we should use a fixed offset for mapping this buffer */
1361
1355 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 1362 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1356 err = validate_fixed_buffer(vm, &bfr, offset_align); 1363 err = validate_fixed_buffer(vm, &bfr,
1364 offset_align, mapping_size);
1357 if (err) 1365 if (err)
1358 goto clean_up; 1366 goto clean_up;
1359 1367
@@ -1402,11 +1410,13 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1402 /* update gmmu ptes */ 1410 /* update gmmu ptes */
1403 map_offset = __locked_gmmu_map(vm, map_offset, 1411 map_offset = __locked_gmmu_map(vm, map_offset,
1404 bfr.sgt, 1412 bfr.sgt,
1405 bfr.size, 1413 buffer_offset, /* sg offset */
1414 mapping_size,
1406 bfr.pgsz_idx, 1415 bfr.pgsz_idx,
1407 bfr.kind_v, 1416 bfr.kind_v,
1408 bfr.ctag_offset, 1417 bfr.ctag_offset,
1409 flags, rw_flag); 1418 flags, rw_flag);
1419
1410 if (!map_offset) 1420 if (!map_offset)
1411 goto clean_up; 1421 goto clean_up;
1412 1422
@@ -1449,7 +1459,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1449 mapped_buffer->dmabuf = dmabuf; 1459 mapped_buffer->dmabuf = dmabuf;
1450 mapped_buffer->sgt = bfr.sgt; 1460 mapped_buffer->sgt = bfr.sgt;
1451 mapped_buffer->addr = map_offset; 1461 mapped_buffer->addr = map_offset;
1452 mapped_buffer->size = bfr.size; 1462 mapped_buffer->size = mapping_size;
1453 mapped_buffer->pgsz_idx = bfr.pgsz_idx; 1463 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
1454 mapped_buffer->ctag_offset = bfr.ctag_offset; 1464 mapped_buffer->ctag_offset = bfr.ctag_offset;
1455 mapped_buffer->ctag_lines = bfr.ctag_lines; 1465 mapped_buffer->ctag_lines = bfr.ctag_lines;
@@ -1520,6 +1530,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1520 mutex_lock(&vm->update_gmmu_lock); 1530 mutex_lock(&vm->update_gmmu_lock);
1521 vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ 1531 vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1522 *sgt, /* sg table */ 1532 *sgt, /* sg table */
1533 0, /* sg offset */
1523 size, 1534 size,
1524 0, /* page size index = 0 i.e. SZ_4K */ 1535 0, /* page size index = 0 i.e. SZ_4K */
1525 0, /* kind */ 1536 0, /* kind */
@@ -1649,6 +1660,7 @@ u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1649static int update_gmmu_ptes_locked(struct vm_gk20a *vm, 1660static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1650 enum gmmu_pgsz_gk20a pgsz_idx, 1661 enum gmmu_pgsz_gk20a pgsz_idx,
1651 struct sg_table *sgt, 1662 struct sg_table *sgt,
1663 u64 buffer_offset,
1652 u64 first_vaddr, u64 last_vaddr, 1664 u64 first_vaddr, u64 last_vaddr,
1653 u8 kind_v, u32 ctag_offset, 1665 u8 kind_v, u32 ctag_offset,
1654 bool cacheable, 1666 bool cacheable,
@@ -1663,6 +1675,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1663 u32 ctag_incr; 1675 u32 ctag_incr;
1664 u32 page_size = gmmu_page_sizes[pgsz_idx]; 1676 u32 page_size = gmmu_page_sizes[pgsz_idx];
1665 u64 addr = 0; 1677 u64 addr = 0;
1678 u64 space_to_skip = buffer_offset;
1666 1679
1667 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, 1680 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1668 &pde_lo, &pde_hi); 1681 &pde_lo, &pde_hi);
@@ -1675,13 +1688,31 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1675 * comptags are active) is 128KB. We have checks elsewhere for that. */ 1688 * comptags are active) is 128KB. We have checks elsewhere for that. */
1676 ctag_incr = !!ctag_offset; 1689 ctag_incr = !!ctag_offset;
1677 1690
1678 if (sgt) 1691 cur_offset = 0;
1692 if (sgt) {
1679 cur_chunk = sgt->sgl; 1693 cur_chunk = sgt->sgl;
1694 /* space_to_skip must be page aligned */
1695 BUG_ON(space_to_skip & (page_size - 1));
1696
1697 while (space_to_skip > 0 && cur_chunk) {
1698 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1699 if (new_addr) {
1700 addr = new_addr;
1701 addr += cur_offset;
1702 }
1703 cur_offset += page_size;
1704 addr += page_size;
1705 while (cur_chunk &&
1706 cur_offset >= cur_chunk->length) {
1707 cur_offset -= cur_chunk->length;
1708 cur_chunk = sg_next(cur_chunk);
1709 }
1710 space_to_skip -= page_size;
1711 }
1712 }
1680 else 1713 else
1681 cur_chunk = NULL; 1714 cur_chunk = NULL;
1682 1715
1683 cur_offset = 0;
1684
1685 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { 1716 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1686 u32 pte_lo, pte_hi; 1717 u32 pte_lo, pte_hi;
1687 u32 pte_cur; 1718 u32 pte_cur;
@@ -1713,14 +1744,12 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1713 1744
1714 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); 1745 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1715 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { 1746 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1716
1717 if (likely(sgt)) { 1747 if (likely(sgt)) {
1718 u64 new_addr = gk20a_mm_iova_addr(cur_chunk); 1748 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1719 if (new_addr) { 1749 if (new_addr) {
1720 addr = new_addr; 1750 addr = new_addr;
1721 addr += cur_offset; 1751 addr += cur_offset;
1722 } 1752 }
1723
1724 pte_w[0] = gmmu_pte_valid_true_f() | 1753 pte_w[0] = gmmu_pte_valid_true_f() |
1725 gmmu_pte_address_sys_f(addr 1754 gmmu_pte_address_sys_f(addr
1726 >> gmmu_pte_address_shift_v()); 1755 >> gmmu_pte_address_shift_v());
@@ -1737,20 +1766,16 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1737 pte_w[1] |= 1766 pte_w[1] |=
1738 gmmu_pte_read_disable_true_f(); 1767 gmmu_pte_read_disable_true_f();
1739 } 1768 }
1740
1741 if (!cacheable) 1769 if (!cacheable)
1742 pte_w[1] |= gmmu_pte_vol_true_f(); 1770 pte_w[1] |= gmmu_pte_vol_true_f();
1743 1771
1744 pte->ref_cnt++; 1772 pte->ref_cnt++;
1745 1773 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
1746 gk20a_dbg(gpu_dbg_pte,
1747 "pte_cur=%d addr=0x%x,%08x kind=%d"
1748 " ctag=%d vol=%d refs=%d" 1774 " ctag=%d vol=%d refs=%d"
1749 " [0x%08x,0x%08x]", 1775 " [0x%08x,0x%08x]",
1750 pte_cur, hi32(addr), lo32(addr), 1776 pte_cur, hi32(addr), lo32(addr),
1751 kind_v, ctag, !cacheable, 1777 kind_v, ctag, !cacheable,
1752 pte->ref_cnt, pte_w[1], pte_w[0]); 1778 pte->ref_cnt, pte_w[1], pte_w[0]);
1753
1754 ctag += ctag_incr; 1779 ctag += ctag_incr;
1755 cur_offset += page_size; 1780 cur_offset += page_size;
1756 addr += page_size; 1781 addr += page_size;
@@ -1926,7 +1951,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1926 1951
1927 for (i = 0; i < num_pages; i++) { 1952 for (i = 0; i < num_pages; i++) {
1928 u64 page_vaddr = __locked_gmmu_map(vm, vaddr, 1953 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1929 vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0, 1954 vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
1930 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, 1955 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1931 gk20a_mem_flag_none); 1956 gk20a_mem_flag_none);
1932 1957
@@ -2012,6 +2037,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
2012 gk20a_err(d, "invalid addr to unmap 0x%llx", offset); 2037 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
2013 return; 2038 return;
2014 } 2039 }
2040
2015 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); 2041 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
2016 mutex_unlock(&vm->update_gmmu_lock); 2042 mutex_unlock(&vm->update_gmmu_lock);
2017} 2043}
@@ -2301,7 +2327,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2301 2327
2302 va_node->sparse = true; 2328 va_node->sparse = true;
2303 } 2329 }
2304
2305 list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list); 2330 list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2306 2331
2307 mutex_unlock(&vm->update_gmmu_lock); 2332 mutex_unlock(&vm->update_gmmu_lock);
@@ -2441,7 +2466,9 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2441 int dmabuf_fd, 2466 int dmabuf_fd,
2442 u64 *offset_align, 2467 u64 *offset_align,
2443 u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ 2468 u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
2444 int kind) 2469 int kind,
2470 u64 buffer_offset,
2471 u64 mapping_size)
2445{ 2472{
2446 int err = 0; 2473 int err = 0;
2447 struct vm_gk20a *vm = as_share->vm; 2474 struct vm_gk20a *vm = as_share->vm;
@@ -2466,7 +2493,10 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2466 2493
2467 ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, 2494 ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
2468 flags, kind, NULL, true, 2495 flags, kind, NULL, true,
2469 gk20a_mem_flag_none); 2496 gk20a_mem_flag_none,
2497 buffer_offset,
2498 mapping_size);
2499
2470 *offset_align = ret_va; 2500 *offset_align = ret_va;
2471 if (!ret_va) { 2501 if (!ret_va) {
2472 dma_buf_put(dmabuf); 2502 dma_buf_put(dmabuf);