summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-03-19 15:28:34 -0400
committerIshan Mittal <imittal@nvidia.com>2015-05-18 02:03:19 -0400
commit520ff00e870eadc98a50f58ecd514ced53a8612f (patch)
tree6822d8ad2a51f98c2df421ba9cc7727e06757fcb /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent069accc8571716dc616c9f96776d54bf657afaee (diff)
gpu: nvgpu: Implement compbits mapping
Implement NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO for requesting info on compbits-mappable buffers; and NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS, which enables mapping compbits to the GPU address space of said buffers. This, subsequently, enables moving comptag swizzling from GPU to CDEH/CDEV formats to userspace. Compbits mapping is conservative and it may map more than what is strictly needed. This is because two reasons: 1) mapping must be done on small page alignment (4kB), and 2) GPU comptags are swizzled all around the aggregate cache line, which means that the whole cache line must be visible even if only some comptag lines are required from it. Cache line size is not necessarily a multiple of the small page size. Bug 200077571 Change-Id: I5ae88fe6b616e5ea37d3bff0dff46c07e9c9267e Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/719710 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c168
1 files changed, 166 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5d1ff563..d896d783 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
221 struct device *dev, 221 struct device *dev,
222 struct dma_buf *dmabuf, 222 struct dma_buf *dmabuf,
223 struct gk20a_allocator *allocator, 223 struct gk20a_allocator *allocator,
224 u32 lines, bool user_mappable) 224 u32 lines, bool user_mappable,
225 u64 *ctag_map_win_size,
226 u32 *ctag_map_win_ctagline)
225{ 227{
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227 u32 offset = 0; 229 u32 offset = 0;
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g,
313 first_unneeded_cacheline * 315 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline; 316 g->gr.comptags_per_cacheline;
315 317
318 u64 win_size;
319
316 if (needed_ctaglines < ctaglines_to_allocate) { 320 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */ 321 /* free alignment lines */
318 int tmp= 322 int tmp=
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g,
326 330
327 ctaglines_to_allocate = needed_ctaglines; 331 ctaglines_to_allocate = needed_ctaglines;
328 } 332 }
333
334 *ctag_map_win_ctagline = offset;
335 win_size =
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
339
340 *ctag_map_win_size = round_up(win_size, small_pgsz);
329 } 341 }
330 342
331 priv->comptags.offset = offset; 343 priv->comptags.offset = offset;
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1374 bool clear_ctags = false; 1386 bool clear_ctags = false;
1375 struct scatterlist *sgl; 1387 struct scatterlist *sgl;
1376 u64 buf_addr; 1388 u64 buf_addr;
1389 u64 ctag_map_win_size = 0;
1390 u32 ctag_map_win_ctagline = 0;
1377 1391
1378 mutex_lock(&vm->update_gmmu_lock); 1392 mutex_lock(&vm->update_gmmu_lock);
1379 1393
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1501 1515
1502 /* allocate compression resources if needed */ 1516 /* allocate compression resources if needed */
1503 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, 1517 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
1504 bfr.ctag_lines, user_mappable); 1518 bfr.ctag_lines, user_mappable,
1519 &ctag_map_win_size,
1520 &ctag_map_win_ctagline);
1505 if (err) { 1521 if (err) {
1506 /* ok to fall back here if we ran out */ 1522 /* ok to fall back here if we ran out */
1507 /* TBD: we can partially alloc ctags as well... */ 1523 /* TBD: we can partially alloc ctags as well... */
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1588 mapped_buffer->ctag_lines = bfr.ctag_lines; 1604 mapped_buffer->ctag_lines = bfr.ctag_lines;
1589 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; 1605 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
1590 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; 1606 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
1607 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
1608 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
1591 mapped_buffer->vm = vm; 1609 mapped_buffer->vm = vm;
1592 mapped_buffer->flags = flags; 1610 mapped_buffer->flags = flags;
1593 mapped_buffer->kind = kind; 1611 mapped_buffer->kind = kind;
@@ -1640,6 +1658,140 @@ clean_up:
1640 return 0; 1658 return 0;
1641} 1659}
1642 1660
1661int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
1662 u64 mapping_gva,
1663 u64 *compbits_win_size,
1664 u32 *compbits_win_ctagline,
1665 u32 *mapping_ctagline,
1666 u32 *flags)
1667{
1668 struct mapped_buffer_node *mapped_buffer;
1669 struct device *d = dev_from_vm(vm);
1670
1671 mutex_lock(&vm->update_gmmu_lock);
1672
1673 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1674
1675 if (!mapped_buffer | !mapped_buffer->user_mapped)
1676 {
1677 mutex_unlock(&vm->update_gmmu_lock);
1678 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1679 return -EFAULT;
1680 }
1681
1682 *compbits_win_size = 0;
1683 *compbits_win_ctagline = 0;
1684 *mapping_ctagline = 0;
1685 *flags = 0;
1686
1687 if (mapped_buffer->ctag_offset)
1688 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
1689
1690 if (mapped_buffer->ctags_mappable)
1691 {
1692 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
1693 *compbits_win_size = mapped_buffer->ctag_map_win_size;
1694 *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
1695 *mapping_ctagline = mapped_buffer->ctag_offset;
1696 }
1697
1698 mutex_unlock(&vm->update_gmmu_lock);
1699 return 0;
1700}
1701
1702
1703int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1704 u64 mapping_gva,
1705 u64 *compbits_win_gva,
1706 u64 *mapping_iova,
1707 u32 flags)
1708{
1709 struct mapped_buffer_node *mapped_buffer;
1710 struct gk20a *g = gk20a_from_vm(vm);
1711 struct device *d = dev_from_vm(vm);
1712
1713 if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
1714 /* This will be implemented later */
1715 gk20a_err(d,
1716 "%s: fixed-offset compbits mapping not yet supported",
1717 __func__);
1718 return -EFAULT;
1719 }
1720
1721 mutex_lock(&vm->update_gmmu_lock);
1722
1723 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1724
1725 if (!mapped_buffer || !mapped_buffer->user_mapped) {
1726 mutex_unlock(&vm->update_gmmu_lock);
1727 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1728 return -EFAULT;
1729 }
1730
1731 if (!mapped_buffer->ctags_mappable) {
1732 mutex_unlock(&vm->update_gmmu_lock);
1733 gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
1734 return -EFAULT;
1735 }
1736
1737 if (!mapped_buffer->ctag_map_win_addr) {
1738 const u32 small_pgsz_index = 0; /* small pages, 4K */
1739 const u32 aggregate_cacheline_sz =
1740 g->gr.cacheline_size * g->gr.slices_per_ltc *
1741 g->ltc_count;
1742
1743 /* first aggregate cacheline to map */
1744 u32 cacheline_start; /* inclusive */
1745
1746 /* offset of the start cacheline (will be page aligned) */
1747 u64 cacheline_offset_start;
1748
1749 if (!mapped_buffer->ctag_map_win_size) {
1750 mutex_unlock(&vm->update_gmmu_lock);
1751 gk20a_err(d,
1752 "%s: mapping 0x%llx does not have "
1753 "mappable comptags",
1754 __func__, mapping_gva);
1755 return -EFAULT;
1756 }
1757
1758 cacheline_start = mapped_buffer->ctag_offset /
1759 g->gr.comptags_per_cacheline;
1760 cacheline_offset_start =
1761 cacheline_start * aggregate_cacheline_sz;
1762
1763 mapped_buffer->ctag_map_win_addr =
1764 g->ops.mm.gmmu_map(
1765 vm,
1766 0,
1767 g->gr.compbit_store.mem.sgt,
1768 cacheline_offset_start, /* sg offset */
1769 mapped_buffer->ctag_map_win_size, /* size */
1770 small_pgsz_index,
1771 0, /* kind */
1772 0, /* ctag_offset */
1773 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1774 gk20a_mem_flag_read_only,
1775 false,
1776 false);
1777
1778 if (!mapped_buffer->ctag_map_win_addr) {
1779 mutex_unlock(&vm->update_gmmu_lock);
1780 gk20a_err(d,
1781 "%s: failed to map comptags for mapping 0x%llx",
1782 __func__, mapping_gva);
1783 return -ENOMEM;
1784 }
1785 }
1786
1787 *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
1788 *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
1789
1790 mutex_unlock(&vm->update_gmmu_lock);
1791
1792 return 0;
1793}
1794
1643u64 gk20a_gmmu_map(struct vm_gk20a *vm, 1795u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1644 struct sg_table **sgt, 1796 struct sg_table **sgt,
1645 u64 size, 1797 u64 size,
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2276 struct vm_gk20a *vm = mapped_buffer->vm; 2428 struct vm_gk20a *vm = mapped_buffer->vm;
2277 struct gk20a *g = vm->mm->g; 2429 struct gk20a *g = vm->mm->g;
2278 2430
2431 if (mapped_buffer->ctag_map_win_addr) {
2432 /* unmap compbits */
2433
2434 g->ops.mm.gmmu_unmap(vm,
2435 mapped_buffer->ctag_map_win_addr,
2436 mapped_buffer->ctag_map_win_size,
2437 0, /* page size 4k */
2438 true, /* va allocated */
2439 gk20a_mem_flag_none,
2440 false); /* not sparse */
2441 }
2442
2279 g->ops.mm.gmmu_unmap(vm, 2443 g->ops.mm.gmmu_unmap(vm,
2280 mapped_buffer->addr, 2444 mapped_buffer->addr,
2281 mapped_buffer->size, 2445 mapped_buffer->size,