summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c168
1 files changed, 166 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5d1ff563..d896d783 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
221 struct device *dev, 221 struct device *dev,
222 struct dma_buf *dmabuf, 222 struct dma_buf *dmabuf,
223 struct gk20a_allocator *allocator, 223 struct gk20a_allocator *allocator,
224 u32 lines, bool user_mappable) 224 u32 lines, bool user_mappable,
225 u64 *ctag_map_win_size,
226 u32 *ctag_map_win_ctagline)
225{ 227{
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227 u32 offset = 0; 229 u32 offset = 0;
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g,
313 first_unneeded_cacheline * 315 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline; 316 g->gr.comptags_per_cacheline;
315 317
318 u64 win_size;
319
316 if (needed_ctaglines < ctaglines_to_allocate) { 320 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */ 321 /* free alignment lines */
318 int tmp= 322 int tmp=
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g,
326 330
327 ctaglines_to_allocate = needed_ctaglines; 331 ctaglines_to_allocate = needed_ctaglines;
328 } 332 }
333
334 *ctag_map_win_ctagline = offset;
335 win_size =
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
339
340 *ctag_map_win_size = round_up(win_size, small_pgsz);
329 } 341 }
330 342
331 priv->comptags.offset = offset; 343 priv->comptags.offset = offset;
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1374 bool clear_ctags = false; 1386 bool clear_ctags = false;
1375 struct scatterlist *sgl; 1387 struct scatterlist *sgl;
1376 u64 buf_addr; 1388 u64 buf_addr;
1389 u64 ctag_map_win_size = 0;
1390 u32 ctag_map_win_ctagline = 0;
1377 1391
1378 mutex_lock(&vm->update_gmmu_lock); 1392 mutex_lock(&vm->update_gmmu_lock);
1379 1393
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1501 1515
1502 /* allocate compression resources if needed */ 1516 /* allocate compression resources if needed */
1503 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, 1517 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
1504 bfr.ctag_lines, user_mappable); 1518 bfr.ctag_lines, user_mappable,
1519 &ctag_map_win_size,
1520 &ctag_map_win_ctagline);
1505 if (err) { 1521 if (err) {
1506 /* ok to fall back here if we ran out */ 1522 /* ok to fall back here if we ran out */
1507 /* TBD: we can partially alloc ctags as well... */ 1523 /* TBD: we can partially alloc ctags as well... */
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1588 mapped_buffer->ctag_lines = bfr.ctag_lines; 1604 mapped_buffer->ctag_lines = bfr.ctag_lines;
1589 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; 1605 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
1590 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; 1606 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
1607 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
1608 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
1591 mapped_buffer->vm = vm; 1609 mapped_buffer->vm = vm;
1592 mapped_buffer->flags = flags; 1610 mapped_buffer->flags = flags;
1593 mapped_buffer->kind = kind; 1611 mapped_buffer->kind = kind;
@@ -1640,6 +1658,140 @@ clean_up:
1640 return 0; 1658 return 0;
1641} 1659}
1642 1660
1661int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
1662 u64 mapping_gva,
1663 u64 *compbits_win_size,
1664 u32 *compbits_win_ctagline,
1665 u32 *mapping_ctagline,
1666 u32 *flags)
1667{
1668 struct mapped_buffer_node *mapped_buffer;
1669 struct device *d = dev_from_vm(vm);
1670
1671 mutex_lock(&vm->update_gmmu_lock);
1672
1673 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1674
1675 if (!mapped_buffer | !mapped_buffer->user_mapped)
1676 {
1677 mutex_unlock(&vm->update_gmmu_lock);
1678 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1679 return -EFAULT;
1680 }
1681
1682 *compbits_win_size = 0;
1683 *compbits_win_ctagline = 0;
1684 *mapping_ctagline = 0;
1685 *flags = 0;
1686
1687 if (mapped_buffer->ctag_offset)
1688 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
1689
1690 if (mapped_buffer->ctags_mappable)
1691 {
1692 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
1693 *compbits_win_size = mapped_buffer->ctag_map_win_size;
1694 *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
1695 *mapping_ctagline = mapped_buffer->ctag_offset;
1696 }
1697
1698 mutex_unlock(&vm->update_gmmu_lock);
1699 return 0;
1700}
1701
1702
1703int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1704 u64 mapping_gva,
1705 u64 *compbits_win_gva,
1706 u64 *mapping_iova,
1707 u32 flags)
1708{
1709 struct mapped_buffer_node *mapped_buffer;
1710 struct gk20a *g = gk20a_from_vm(vm);
1711 struct device *d = dev_from_vm(vm);
1712
1713 if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
1714 /* This will be implemented later */
1715 gk20a_err(d,
1716 "%s: fixed-offset compbits mapping not yet supported",
1717 __func__);
1718 return -EFAULT;
1719 }
1720
1721 mutex_lock(&vm->update_gmmu_lock);
1722
1723 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1724
1725 if (!mapped_buffer || !mapped_buffer->user_mapped) {
1726 mutex_unlock(&vm->update_gmmu_lock);
1727 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1728 return -EFAULT;
1729 }
1730
1731 if (!mapped_buffer->ctags_mappable) {
1732 mutex_unlock(&vm->update_gmmu_lock);
1733 gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
1734 return -EFAULT;
1735 }
1736
1737 if (!mapped_buffer->ctag_map_win_addr) {
1738 const u32 small_pgsz_index = 0; /* small pages, 4K */
1739 const u32 aggregate_cacheline_sz =
1740 g->gr.cacheline_size * g->gr.slices_per_ltc *
1741 g->ltc_count;
1742
1743 /* first aggregate cacheline to map */
1744 u32 cacheline_start; /* inclusive */
1745
1746 /* offset of the start cacheline (will be page aligned) */
1747 u64 cacheline_offset_start;
1748
1749 if (!mapped_buffer->ctag_map_win_size) {
1750 mutex_unlock(&vm->update_gmmu_lock);
1751 gk20a_err(d,
1752 "%s: mapping 0x%llx does not have "
1753 "mappable comptags",
1754 __func__, mapping_gva);
1755 return -EFAULT;
1756 }
1757
1758 cacheline_start = mapped_buffer->ctag_offset /
1759 g->gr.comptags_per_cacheline;
1760 cacheline_offset_start =
1761 cacheline_start * aggregate_cacheline_sz;
1762
1763 mapped_buffer->ctag_map_win_addr =
1764 g->ops.mm.gmmu_map(
1765 vm,
1766 0,
1767 g->gr.compbit_store.mem.sgt,
1768 cacheline_offset_start, /* sg offset */
1769 mapped_buffer->ctag_map_win_size, /* size */
1770 small_pgsz_index,
1771 0, /* kind */
1772 0, /* ctag_offset */
1773 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1774 gk20a_mem_flag_read_only,
1775 false,
1776 false);
1777
1778 if (!mapped_buffer->ctag_map_win_addr) {
1779 mutex_unlock(&vm->update_gmmu_lock);
1780 gk20a_err(d,
1781 "%s: failed to map comptags for mapping 0x%llx",
1782 __func__, mapping_gva);
1783 return -ENOMEM;
1784 }
1785 }
1786
1787 *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
1788 *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
1789
1790 mutex_unlock(&vm->update_gmmu_lock);
1791
1792 return 0;
1793}
1794
1643u64 gk20a_gmmu_map(struct vm_gk20a *vm, 1795u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1644 struct sg_table **sgt, 1796 struct sg_table **sgt,
1645 u64 size, 1797 u64 size,
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2276 struct vm_gk20a *vm = mapped_buffer->vm; 2428 struct vm_gk20a *vm = mapped_buffer->vm;
2277 struct gk20a *g = vm->mm->g; 2429 struct gk20a *g = vm->mm->g;
2278 2430
2431 if (mapped_buffer->ctag_map_win_addr) {
2432 /* unmap compbits */
2433
2434 g->ops.mm.gmmu_unmap(vm,
2435 mapped_buffer->ctag_map_win_addr,
2436 mapped_buffer->ctag_map_win_size,
2437 0, /* page size 4k */
2438 true, /* va allocated */
2439 gk20a_mem_flag_none,
2440 false); /* not sparse */
2441 }
2442
2279 g->ops.mm.gmmu_unmap(vm, 2443 g->ops.mm.gmmu_unmap(vm,
2280 mapped_buffer->addr, 2444 mapped_buffer->addr,
2281 mapped_buffer->size, 2445 mapped_buffer->size,