summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-05-04 11:41:23 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-11-18 12:45:07 -0500
commit9d2c9072c8b9a7742db3974d6027df9d44e0953f (patch)
tree15ed7e5a3495db6032b43381641d102837decfad /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent503d3a0b1002685e65efb8b99a2362117ee62104 (diff)
gpu: nvgpu: User-space managed address space support
Implement NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED, which enables creating userspace-managed GPU address spaces. When an address space is marked as userspace-managed, the following changes are in effect: - Only fixed-address mappings are allowed. - VA space allocation for fixed-address mappings is not required, except to mark space as sparse. - Maps and unmaps are always immediate. In particular, the mapping ref increments at kickoffs and decrements at job completion are skipped. Bug 1614735 Bug 1623949 Bug 1660392 Change-Id: I834fe19b3f65e9b02c268952383eddee0e465759 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/738558 Reviewed-on: http://git-master/r/833253 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c131
1 files changed, 103 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 141a37af..a9bca317 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -767,6 +767,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
767 struct rb_node *node; 767 struct rb_node *node;
768 int i = 0; 768 int i = 0;
769 769
770 if (vm->userspace_managed) {
771 *mapped_buffers = NULL;
772 *num_buffers = 0;
773 return 0;
774 }
775
770 mutex_lock(&vm->update_gmmu_lock); 776 mutex_lock(&vm->update_gmmu_lock);
771 777
772 buffer_list = nvgpu_alloc(sizeof(*buffer_list) * 778 buffer_list = nvgpu_alloc(sizeof(*buffer_list) *
@@ -1135,7 +1141,8 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1135 1141
1136static int validate_fixed_buffer(struct vm_gk20a *vm, 1142static int validate_fixed_buffer(struct vm_gk20a *vm,
1137 struct buffer_attrs *bfr, 1143 struct buffer_attrs *bfr,
1138 u64 map_offset, u64 map_size) 1144 u64 map_offset, u64 map_size,
1145 struct vm_reserved_va_node **pva_node)
1139{ 1146{
1140 struct device *dev = dev_from_vm(vm); 1147 struct device *dev = dev_from_vm(vm);
1141 struct vm_reserved_va_node *va_node; 1148 struct vm_reserved_va_node *va_node;
@@ -1154,15 +1161,16 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1154 return -EINVAL; 1161 return -EINVAL;
1155 } 1162 }
1156 1163
1157 /* find the space reservation */ 1164 /* Find the space reservation, but it's ok to have none for
1165 * userspace-managed address spaces */
1158 va_node = addr_to_reservation(vm, map_offset); 1166 va_node = addr_to_reservation(vm, map_offset);
1159 if (!va_node) { 1167 if (!va_node && !vm->userspace_managed) {
1160 gk20a_warn(dev, "fixed offset mapping without space allocation"); 1168 gk20a_warn(dev, "fixed offset mapping without space allocation");
1161 return -EINVAL; 1169 return -EINVAL;
1162 } 1170 }
1163 1171
1164 /* mapped area should fit inside va */ 1172 /* Mapped area should fit inside va, if there's one */
1165 if (map_end > va_node->vaddr_start + va_node->size) { 1173 if (va_node && map_end > va_node->vaddr_start + va_node->size) {
1166 gk20a_warn(dev, "fixed offset mapping size overflows va node"); 1174 gk20a_warn(dev, "fixed offset mapping size overflows va node");
1167 return -EINVAL; 1175 return -EINVAL;
1168 } 1176 }
@@ -1177,6 +1185,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1177 return -EINVAL; 1185 return -EINVAL;
1178 } 1186 }
1179 1187
1188 *pva_node = va_node;
1189
1180 return 0; 1190 return 0;
1181} 1191}
1182 1192
@@ -1411,16 +1421,28 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1411 u64 buf_addr; 1421 u64 buf_addr;
1412 u64 ctag_map_win_size = 0; 1422 u64 ctag_map_win_size = 0;
1413 u32 ctag_map_win_ctagline = 0; 1423 u32 ctag_map_win_ctagline = 0;
1424 struct vm_reserved_va_node *va_node = NULL;
1425
1426 if (user_mapped && vm->userspace_managed &&
1427 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
1428 gk20a_err(d,
1429 "%s: non-fixed-offset mapping not available on userspace managed address spaces",
1430 __func__);
1431 return -EFAULT;
1432 }
1414 1433
1415 mutex_lock(&vm->update_gmmu_lock); 1434 mutex_lock(&vm->update_gmmu_lock);
1416 1435
1417 /* check if this buffer is already mapped */ 1436 /* check if this buffer is already mapped */
1418 map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align, 1437 if (!vm->userspace_managed) {
1419 flags, kind, sgt, 1438 map_offset = gk20a_vm_map_duplicate_locked(
1420 user_mapped, rw_flag); 1439 vm, dmabuf, offset_align,
1421 if (map_offset) { 1440 flags, kind, sgt,
1422 mutex_unlock(&vm->update_gmmu_lock); 1441 user_mapped, rw_flag);
1423 return map_offset; 1442 if (map_offset) {
1443 mutex_unlock(&vm->update_gmmu_lock);
1444 return map_offset;
1445 }
1424 } 1446 }
1425 1447
1426 /* pin buffer to get phys/iovmm addr */ 1448 /* pin buffer to get phys/iovmm addr */
@@ -1504,7 +1526,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1504 1526
1505 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 1527 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1506 err = validate_fixed_buffer(vm, &bfr, 1528 err = validate_fixed_buffer(vm, &bfr,
1507 offset_align, mapping_size); 1529 offset_align, mapping_size,
1530 &va_node);
1508 if (err) 1531 if (err)
1509 goto clean_up; 1532 goto clean_up;
1510 1533
@@ -1671,11 +1694,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1671 1694
1672 gk20a_dbg_info("allocated va @ 0x%llx", map_offset); 1695 gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1673 1696
1674 if (!va_allocated) { 1697 if (va_node) {
1675 struct vm_reserved_va_node *va_node;
1676
1677 /* find the space reservation */
1678 va_node = addr_to_reservation(vm, map_offset);
1679 list_add_tail(&mapped_buffer->va_buffers_list, 1698 list_add_tail(&mapped_buffer->va_buffers_list,
1680 &va_node->va_buffers_list); 1699 &va_node->va_buffers_list);
1681 mapped_buffer->va_node = va_node; 1700 mapped_buffer->va_node = va_node;
@@ -1753,18 +1772,27 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1753 struct mapped_buffer_node *mapped_buffer; 1772 struct mapped_buffer_node *mapped_buffer;
1754 struct gk20a *g = gk20a_from_vm(vm); 1773 struct gk20a *g = gk20a_from_vm(vm);
1755 struct device *d = dev_from_vm(vm); 1774 struct device *d = dev_from_vm(vm);
1775 const bool fixed_mapping =
1776 (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
1777
1778 if (vm->userspace_managed && !fixed_mapping) {
1779 gk20a_err(d,
1780 "%s: non-fixed-offset mapping is not available on userspace managed address spaces",
1781 __func__);
1782 return -EFAULT;
1783 }
1756 1784
1757 if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) { 1785 if (fixed_mapping && !vm->userspace_managed) {
1758 /* This will be implemented later */
1759 gk20a_err(d, 1786 gk20a_err(d,
1760 "%s: fixed-offset compbits mapping not yet supported", 1787 "%s: fixed-offset mapping is available only on userspace managed address spaces",
1761 __func__); 1788 __func__);
1762 return -EFAULT; 1789 return -EFAULT;
1763 } 1790 }
1764 1791
1765 mutex_lock(&vm->update_gmmu_lock); 1792 mutex_lock(&vm->update_gmmu_lock);
1766 1793
1767 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); 1794 mapped_buffer =
1795 find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1768 1796
1769 if (!mapped_buffer || !mapped_buffer->user_mapped) { 1797 if (!mapped_buffer || !mapped_buffer->user_mapped) {
1770 mutex_unlock(&vm->update_gmmu_lock); 1798 mutex_unlock(&vm->update_gmmu_lock);
@@ -1774,7 +1802,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1774 1802
1775 if (!mapped_buffer->ctags_mappable) { 1803 if (!mapped_buffer->ctags_mappable) {
1776 mutex_unlock(&vm->update_gmmu_lock); 1804 mutex_unlock(&vm->update_gmmu_lock);
1777 gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva); 1805 gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
1806 __func__, mapping_gva);
1778 return -EFAULT; 1807 return -EFAULT;
1779 } 1808 }
1780 1809
@@ -1804,10 +1833,41 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1804 cacheline_offset_start = 1833 cacheline_offset_start =
1805 cacheline_start * aggregate_cacheline_sz; 1834 cacheline_start * aggregate_cacheline_sz;
1806 1835
1836 if (fixed_mapping) {
1837 struct buffer_attrs bfr;
1838 int err;
1839 struct vm_reserved_va_node *va_node = NULL;
1840
1841 memset(&bfr, 0, sizeof(bfr));
1842
1843 bfr.pgsz_idx = small_pgsz_index;
1844
1845 err = validate_fixed_buffer(
1846 vm, &bfr, *compbits_win_gva,
1847 mapped_buffer->ctag_map_win_size, &va_node);
1848
1849 if (err) {
1850 mutex_unlock(&vm->update_gmmu_lock);
1851 return err;
1852 }
1853
1854 if (va_node) {
1855 /* this would create a dangling GPU VA
1856 * pointer if the space is freed
1857 * before before the buffer is
1858 * unmapped */
1859 mutex_unlock(&vm->update_gmmu_lock);
1860 gk20a_err(d,
1861 "%s: comptags cannot be mapped into allocated space",
1862 __func__);
1863 return -EINVAL;
1864 }
1865 }
1866
1807 mapped_buffer->ctag_map_win_addr = 1867 mapped_buffer->ctag_map_win_addr =
1808 g->ops.mm.gmmu_map( 1868 g->ops.mm.gmmu_map(
1809 vm, 1869 vm,
1810 0, 1870 !fixed_mapping ? 0 : *compbits_win_gva, /* va */
1811 g->gr.compbit_store.mem.sgt, 1871 g->gr.compbit_store.mem.sgt,
1812 cacheline_offset_start, /* sg offset */ 1872 cacheline_offset_start, /* sg offset */
1813 mapped_buffer->ctag_map_win_size, /* size */ 1873 mapped_buffer->ctag_map_win_size, /* size */
@@ -1828,6 +1888,15 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1828 __func__, mapping_gva); 1888 __func__, mapping_gva);
1829 return -ENOMEM; 1889 return -ENOMEM;
1830 } 1890 }
1891 } else if (fixed_mapping && *compbits_win_gva &&
1892 mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
1893 mutex_unlock(&vm->update_gmmu_lock);
1894 gk20a_err(d,
1895 "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
1896 "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
1897 __func__, mapping_gva,
1898 mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
1899 return -EINVAL;
1831 } 1900 }
1832 1901
1833 *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0); 1902 *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
@@ -2662,6 +2731,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2662 u64 kernel_reserved, 2731 u64 kernel_reserved,
2663 u64 aperture_size, 2732 u64 aperture_size,
2664 bool big_pages, 2733 bool big_pages,
2734 bool userspace_managed,
2665 char *name) 2735 char *name)
2666{ 2736{
2667 int err, i; 2737 int err, i;
@@ -2685,6 +2755,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2685 2755
2686 vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; 2756 vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
2687 2757
2758 vm->userspace_managed = userspace_managed;
2759
2688 vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g, 2760 vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,
2689 vm->big_page_size); 2761 vm->big_page_size);
2690 2762
@@ -2821,7 +2893,8 @@ clean_up_pdes:
2821} 2893}
2822 2894
2823/* address space interfaces for the gk20a module */ 2895/* address space interfaces for the gk20a module */
2824int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) 2896int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
2897 u32 flags)
2825{ 2898{
2826 struct gk20a_as *as = as_share->as; 2899 struct gk20a_as *as = as_share->as;
2827 struct gk20a *g = gk20a_from_as(as); 2900 struct gk20a *g = gk20a_from_as(as);
@@ -2829,6 +2902,8 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2829 struct vm_gk20a *vm; 2902 struct vm_gk20a *vm;
2830 char name[32]; 2903 char name[32];
2831 int err; 2904 int err;
2905 const bool userspace_managed =
2906 (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
2832 2907
2833 gk20a_dbg_fn(""); 2908 gk20a_dbg_fn("");
2834 2909
@@ -2856,7 +2931,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2856 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 2931 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2857 mm->channel.kernel_size, 2932 mm->channel.kernel_size,
2858 mm->channel.user_size + mm->channel.kernel_size, 2933 mm->channel.user_size + mm->channel.kernel_size,
2859 !mm->disable_bigpage, name); 2934 !mm->disable_bigpage, userspace_managed, name);
2860 2935
2861 return err; 2936 return err;
2862} 2937}
@@ -3235,7 +3310,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
3235 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 3310 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
3236 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 3311 gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
3237 mm->bar1.aperture_size - SZ_4K, 3312 mm->bar1.aperture_size - SZ_4K,
3238 mm->bar1.aperture_size, false, "bar1"); 3313 mm->bar1.aperture_size, false, false, "bar1");
3239 3314
3240 err = gk20a_alloc_inst_block(g, inst_block); 3315 err = gk20a_alloc_inst_block(g, inst_block);
3241 if (err) 3316 if (err)
@@ -3263,7 +3338,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
3263 3338
3264 gk20a_init_vm(mm, vm, big_page_size, 3339 gk20a_init_vm(mm, vm, big_page_size,
3265 SZ_4K * 16, GK20A_PMU_VA_SIZE, 3340 SZ_4K * 16, GK20A_PMU_VA_SIZE,
3266 GK20A_PMU_VA_SIZE * 2, false, 3341 GK20A_PMU_VA_SIZE * 2, false, false,
3267 "system"); 3342 "system");
3268 3343
3269 err = gk20a_alloc_inst_block(g, inst_block); 3344 err = gk20a_alloc_inst_block(g, inst_block);
@@ -3303,7 +3378,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
3303 SZ_4K * 16, 3378 SZ_4K * 16,
3304 NV_MM_DEFAULT_KERNEL_SIZE, 3379 NV_MM_DEFAULT_KERNEL_SIZE,
3305 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, 3380 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
3306 false, "cde"); 3381 false, false, "cde");
3307} 3382}
3308 3383
3309void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) 3384void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)