diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 168 |
1 files changed, 166 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5d1ff563..d896d783 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
221 | struct device *dev, | 221 | struct device *dev, |
222 | struct dma_buf *dmabuf, | 222 | struct dma_buf *dmabuf, |
223 | struct gk20a_allocator *allocator, | 223 | struct gk20a_allocator *allocator, |
224 | u32 lines, bool user_mappable) | 224 | u32 lines, bool user_mappable, |
225 | u64 *ctag_map_win_size, | ||
226 | u32 *ctag_map_win_ctagline) | ||
225 | { | 227 | { |
226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 228 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
227 | u32 offset = 0; | 229 | u32 offset = 0; |
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
313 | first_unneeded_cacheline * | 315 | first_unneeded_cacheline * |
314 | g->gr.comptags_per_cacheline; | 316 | g->gr.comptags_per_cacheline; |
315 | 317 | ||
318 | u64 win_size; | ||
319 | |||
316 | if (needed_ctaglines < ctaglines_to_allocate) { | 320 | if (needed_ctaglines < ctaglines_to_allocate) { |
317 | /* free alignment lines */ | 321 | /* free alignment lines */ |
318 | int tmp= | 322 | int tmp= |
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
326 | 330 | ||
327 | ctaglines_to_allocate = needed_ctaglines; | 331 | ctaglines_to_allocate = needed_ctaglines; |
328 | } | 332 | } |
333 | |||
334 | *ctag_map_win_ctagline = offset; | ||
335 | win_size = | ||
336 | DIV_ROUND_UP(lines, | ||
337 | g->gr.comptags_per_cacheline) * | ||
338 | aggregate_cacheline_sz; | ||
339 | |||
340 | *ctag_map_win_size = round_up(win_size, small_pgsz); | ||
329 | } | 341 | } |
330 | 342 | ||
331 | priv->comptags.offset = offset; | 343 | priv->comptags.offset = offset; |
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1374 | bool clear_ctags = false; | 1386 | bool clear_ctags = false; |
1375 | struct scatterlist *sgl; | 1387 | struct scatterlist *sgl; |
1376 | u64 buf_addr; | 1388 | u64 buf_addr; |
1389 | u64 ctag_map_win_size = 0; | ||
1390 | u32 ctag_map_win_ctagline = 0; | ||
1377 | 1391 | ||
1378 | mutex_lock(&vm->update_gmmu_lock); | 1392 | mutex_lock(&vm->update_gmmu_lock); |
1379 | 1393 | ||
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1501 | 1515 | ||
1502 | /* allocate compression resources if needed */ | 1516 | /* allocate compression resources if needed */ |
1503 | err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, | 1517 | err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, |
1504 | bfr.ctag_lines, user_mappable); | 1518 | bfr.ctag_lines, user_mappable, |
1519 | &ctag_map_win_size, | ||
1520 | &ctag_map_win_ctagline); | ||
1505 | if (err) { | 1521 | if (err) { |
1506 | /* ok to fall back here if we ran out */ | 1522 | /* ok to fall back here if we ran out */ |
1507 | /* TBD: we can partially alloc ctags as well... */ | 1523 | /* TBD: we can partially alloc ctags as well... */ |
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1588 | mapped_buffer->ctag_lines = bfr.ctag_lines; | 1604 | mapped_buffer->ctag_lines = bfr.ctag_lines; |
1589 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; | 1605 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; |
1590 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; | 1606 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; |
1607 | mapped_buffer->ctag_map_win_size = ctag_map_win_size; | ||
1608 | mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; | ||
1591 | mapped_buffer->vm = vm; | 1609 | mapped_buffer->vm = vm; |
1592 | mapped_buffer->flags = flags; | 1610 | mapped_buffer->flags = flags; |
1593 | mapped_buffer->kind = kind; | 1611 | mapped_buffer->kind = kind; |
@@ -1640,6 +1658,140 @@ clean_up: | |||
1640 | return 0; | 1658 | return 0; |
1641 | } | 1659 | } |
1642 | 1660 | ||
1661 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, | ||
1662 | u64 mapping_gva, | ||
1663 | u64 *compbits_win_size, | ||
1664 | u32 *compbits_win_ctagline, | ||
1665 | u32 *mapping_ctagline, | ||
1666 | u32 *flags) | ||
1667 | { | ||
1668 | struct mapped_buffer_node *mapped_buffer; | ||
1669 | struct device *d = dev_from_vm(vm); | ||
1670 | |||
1671 | mutex_lock(&vm->update_gmmu_lock); | ||
1672 | |||
1673 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); | ||
1674 | |||
1675 | if (!mapped_buffer | !mapped_buffer->user_mapped) | ||
1676 | { | ||
1677 | mutex_unlock(&vm->update_gmmu_lock); | ||
1678 | gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva); | ||
1679 | return -EFAULT; | ||
1680 | } | ||
1681 | |||
1682 | *compbits_win_size = 0; | ||
1683 | *compbits_win_ctagline = 0; | ||
1684 | *mapping_ctagline = 0; | ||
1685 | *flags = 0; | ||
1686 | |||
1687 | if (mapped_buffer->ctag_offset) | ||
1688 | *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS; | ||
1689 | |||
1690 | if (mapped_buffer->ctags_mappable) | ||
1691 | { | ||
1692 | *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE; | ||
1693 | *compbits_win_size = mapped_buffer->ctag_map_win_size; | ||
1694 | *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline; | ||
1695 | *mapping_ctagline = mapped_buffer->ctag_offset; | ||
1696 | } | ||
1697 | |||
1698 | mutex_unlock(&vm->update_gmmu_lock); | ||
1699 | return 0; | ||
1700 | } | ||
1701 | |||
1702 | |||
1703 | int gk20a_vm_map_compbits(struct vm_gk20a *vm, | ||
1704 | u64 mapping_gva, | ||
1705 | u64 *compbits_win_gva, | ||
1706 | u64 *mapping_iova, | ||
1707 | u32 flags) | ||
1708 | { | ||
1709 | struct mapped_buffer_node *mapped_buffer; | ||
1710 | struct gk20a *g = gk20a_from_vm(vm); | ||
1711 | struct device *d = dev_from_vm(vm); | ||
1712 | |||
1713 | if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) { | ||
1714 | /* This will be implemented later */ | ||
1715 | gk20a_err(d, | ||
1716 | "%s: fixed-offset compbits mapping not yet supported", | ||
1717 | __func__); | ||
1718 | return -EFAULT; | ||
1719 | } | ||
1720 | |||
1721 | mutex_lock(&vm->update_gmmu_lock); | ||
1722 | |||
1723 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); | ||
1724 | |||
1725 | if (!mapped_buffer || !mapped_buffer->user_mapped) { | ||
1726 | mutex_unlock(&vm->update_gmmu_lock); | ||
1727 | gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva); | ||
1728 | return -EFAULT; | ||
1729 | } | ||
1730 | |||
1731 | if (!mapped_buffer->ctags_mappable) { | ||
1732 | mutex_unlock(&vm->update_gmmu_lock); | ||
1733 | gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva); | ||
1734 | return -EFAULT; | ||
1735 | } | ||
1736 | |||
1737 | if (!mapped_buffer->ctag_map_win_addr) { | ||
1738 | const u32 small_pgsz_index = 0; /* small pages, 4K */ | ||
1739 | const u32 aggregate_cacheline_sz = | ||
1740 | g->gr.cacheline_size * g->gr.slices_per_ltc * | ||
1741 | g->ltc_count; | ||
1742 | |||
1743 | /* first aggregate cacheline to map */ | ||
1744 | u32 cacheline_start; /* inclusive */ | ||
1745 | |||
1746 | /* offset of the start cacheline (will be page aligned) */ | ||
1747 | u64 cacheline_offset_start; | ||
1748 | |||
1749 | if (!mapped_buffer->ctag_map_win_size) { | ||
1750 | mutex_unlock(&vm->update_gmmu_lock); | ||
1751 | gk20a_err(d, | ||
1752 | "%s: mapping 0x%llx does not have " | ||
1753 | "mappable comptags", | ||
1754 | __func__, mapping_gva); | ||
1755 | return -EFAULT; | ||
1756 | } | ||
1757 | |||
1758 | cacheline_start = mapped_buffer->ctag_offset / | ||
1759 | g->gr.comptags_per_cacheline; | ||
1760 | cacheline_offset_start = | ||
1761 | cacheline_start * aggregate_cacheline_sz; | ||
1762 | |||
1763 | mapped_buffer->ctag_map_win_addr = | ||
1764 | g->ops.mm.gmmu_map( | ||
1765 | vm, | ||
1766 | 0, | ||
1767 | g->gr.compbit_store.mem.sgt, | ||
1768 | cacheline_offset_start, /* sg offset */ | ||
1769 | mapped_buffer->ctag_map_win_size, /* size */ | ||
1770 | small_pgsz_index, | ||
1771 | 0, /* kind */ | ||
1772 | 0, /* ctag_offset */ | ||
1773 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1774 | gk20a_mem_flag_read_only, | ||
1775 | false, | ||
1776 | false); | ||
1777 | |||
1778 | if (!mapped_buffer->ctag_map_win_addr) { | ||
1779 | mutex_unlock(&vm->update_gmmu_lock); | ||
1780 | gk20a_err(d, | ||
1781 | "%s: failed to map comptags for mapping 0x%llx", | ||
1782 | __func__, mapping_gva); | ||
1783 | return -ENOMEM; | ||
1784 | } | ||
1785 | } | ||
1786 | |||
1787 | *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0); | ||
1788 | *compbits_win_gva = mapped_buffer->ctag_map_win_addr; | ||
1789 | |||
1790 | mutex_unlock(&vm->update_gmmu_lock); | ||
1791 | |||
1792 | return 0; | ||
1793 | } | ||
1794 | |||
1643 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, | 1795 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, |
1644 | struct sg_table **sgt, | 1796 | struct sg_table **sgt, |
1645 | u64 size, | 1797 | u64 size, |
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2276 | struct vm_gk20a *vm = mapped_buffer->vm; | 2428 | struct vm_gk20a *vm = mapped_buffer->vm; |
2277 | struct gk20a *g = vm->mm->g; | 2429 | struct gk20a *g = vm->mm->g; |
2278 | 2430 | ||
2431 | if (mapped_buffer->ctag_map_win_addr) { | ||
2432 | /* unmap compbits */ | ||
2433 | |||
2434 | g->ops.mm.gmmu_unmap(vm, | ||
2435 | mapped_buffer->ctag_map_win_addr, | ||
2436 | mapped_buffer->ctag_map_win_size, | ||
2437 | 0, /* page size 4k */ | ||
2438 | true, /* va allocated */ | ||
2439 | gk20a_mem_flag_none, | ||
2440 | false); /* not sparse */ | ||
2441 | } | ||
2442 | |||
2279 | g->ops.mm.gmmu_unmap(vm, | 2443 | g->ops.mm.gmmu_unmap(vm, |
2280 | mapped_buffer->addr, | 2444 | mapped_buffer->addr, |
2281 | mapped_buffer->size, | 2445 | mapped_buffer->size, |