summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-03-31 02:46:33 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-06 13:57:28 -0400
commitce3c30f14f1eed9ace2028b48c7e8f6cdd6b65cb (patch)
treef4fcc21aeccd99aa56d19ca67fdbee38dc4e311a /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent6dda47a114d1ecbef4f5fa77e8100d795ee23ff1 (diff)
gpu: nvgpu: use nvgpu rbtree to store mapped buffers
Use nvgpu rbtree instead of linux rbtree to store mapped buffers for each VM Move to use "struct nvgpu_rbtree_node" instead of "struct rb_node" And similarly use rbtree APIs from <nvgpu/rbtree.h> instead of linux APIs Jira NVGPU-13 Change-Id: Id96ba76e20fa9ecad016cd5d5a6a7d40579a70f2 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1453043 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c157
1 files changed, 66 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 461cf324..2fe76d80 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -311,10 +311,10 @@ static inline u32 lo32(u64 f)
311} 311}
312 312
313static struct mapped_buffer_node *find_mapped_buffer_locked( 313static struct mapped_buffer_node *find_mapped_buffer_locked(
314 struct rb_root *root, u64 addr); 314 struct nvgpu_rbtree_node *root, u64 addr);
315static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( 315static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
316 struct rb_root *root, struct dma_buf *dmabuf, 316 struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
317 u32 kind); 317 u32 kind);
318static int update_gmmu_ptes_locked(struct vm_gk20a *vm, 318static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
319 enum gmmu_pgsz_gk20a pgsz_idx, 319 enum gmmu_pgsz_gk20a pgsz_idx,
320 struct sg_table *sgt, u64 buffer_offset, 320 struct sg_table *sgt, u64 buffer_offset,
@@ -1289,7 +1289,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1289{ 1289{
1290 struct mapped_buffer_node *mapped_buffer; 1290 struct mapped_buffer_node *mapped_buffer;
1291 struct mapped_buffer_node **buffer_list; 1291 struct mapped_buffer_node **buffer_list;
1292 struct rb_node *node; 1292 struct nvgpu_rbtree_node *node = NULL;
1293 int i = 0; 1293 int i = 0;
1294 1294
1295 if (vm->userspace_managed) { 1295 if (vm->userspace_managed) {
@@ -1307,16 +1307,15 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1307 return -ENOMEM; 1307 return -ENOMEM;
1308 } 1308 }
1309 1309
1310 node = rb_first(&vm->mapped_buffers); 1310 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
1311 while (node) { 1311 while (node) {
1312 mapped_buffer = 1312 mapped_buffer = mapped_buffer_from_rbtree_node(node);
1313 container_of(node, struct mapped_buffer_node, node);
1314 if (mapped_buffer->user_mapped) { 1313 if (mapped_buffer->user_mapped) {
1315 buffer_list[i] = mapped_buffer; 1314 buffer_list[i] = mapped_buffer;
1316 kref_get(&mapped_buffer->ref); 1315 kref_get(&mapped_buffer->ref);
1317 i++; 1316 i++;
1318 } 1317 }
1319 node = rb_next(&mapped_buffer->node); 1318 nvgpu_rbtree_enum_next(&node, node);
1320 } 1319 }
1321 1320
1322 BUG_ON(i != vm->num_user_mapped_buffers); 1321 BUG_ON(i != vm->num_user_mapped_buffers);
@@ -1396,7 +1395,7 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
1396 1395
1397 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1396 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1398 1397
1399 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); 1398 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset);
1400 if (!mapped_buffer) { 1399 if (!mapped_buffer) {
1401 nvgpu_mutex_release(&vm->update_gmmu_lock); 1400 nvgpu_mutex_release(&vm->update_gmmu_lock);
1402 gk20a_err(d, "invalid addr to unmap 0x%llx", offset); 1401 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
@@ -1488,104 +1487,81 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1488 return 0; 1487 return 0;
1489} 1488}
1490 1489
1491static int insert_mapped_buffer(struct rb_root *root, 1490
1491static void remove_mapped_buffer(struct vm_gk20a *vm,
1492 struct mapped_buffer_node *mapped_buffer) 1492 struct mapped_buffer_node *mapped_buffer)
1493{ 1493{
1494 struct rb_node **new_node = &(root->rb_node), *parent = NULL; 1494 nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
1495 1495}
1496 /* Figure out where to put new node */
1497 while (*new_node) {
1498 struct mapped_buffer_node *cmp_with =
1499 container_of(*new_node, struct mapped_buffer_node,
1500 node);
1501
1502 parent = *new_node;
1503 1496
1504 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */ 1497static int insert_mapped_buffer(struct vm_gk20a *vm,
1505 new_node = &((*new_node)->rb_left); 1498 struct mapped_buffer_node *mapped_buffer)
1506 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */ 1499{
1507 new_node = &((*new_node)->rb_right); 1500 mapped_buffer->node.key_start = mapped_buffer->addr;
1508 else 1501 mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
1509 return -EINVAL; /* no fair dup'ing */
1510 }
1511 1502
1512 /* Add new node and rebalance tree. */ 1503 nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
1513 rb_link_node(&mapped_buffer->node, parent, new_node);
1514 rb_insert_color(&mapped_buffer->node, root);
1515 1504
1516 return 0; 1505 return 0;
1517} 1506}
1518 1507
1519static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( 1508static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
1520 struct rb_root *root, struct dma_buf *dmabuf, 1509 struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
1521 u32 kind) 1510 u32 kind)
1522{ 1511{
1523 struct rb_node *node = rb_first(root); 1512 struct nvgpu_rbtree_node *node = NULL;
1513
1514 nvgpu_rbtree_enum_start(0, &node, root);
1515
1524 while (node) { 1516 while (node) {
1525 struct mapped_buffer_node *mapped_buffer = 1517 struct mapped_buffer_node *mapped_buffer =
1526 container_of(node, struct mapped_buffer_node, node); 1518 mapped_buffer_from_rbtree_node(node);
1519
1527 if (mapped_buffer->dmabuf == dmabuf && 1520 if (mapped_buffer->dmabuf == dmabuf &&
1528 kind == mapped_buffer->kind) 1521 kind == mapped_buffer->kind)
1529 return mapped_buffer; 1522 return mapped_buffer;
1530 node = rb_next(&mapped_buffer->node); 1523
1524 nvgpu_rbtree_enum_next(&node, node);
1531 } 1525 }
1526
1532 return NULL; 1527 return NULL;
1533} 1528}
1534 1529
1535static struct mapped_buffer_node *find_mapped_buffer_locked( 1530static struct mapped_buffer_node *find_mapped_buffer_locked(
1536 struct rb_root *root, u64 addr) 1531 struct nvgpu_rbtree_node *root, u64 addr)
1537{ 1532{
1533 struct nvgpu_rbtree_node *node = NULL;
1538 1534
1539 struct rb_node *node = root->rb_node; 1535 nvgpu_rbtree_search(addr, &node, root);
1540 while (node) { 1536 if (!node)
1541 struct mapped_buffer_node *mapped_buffer = 1537 return NULL;
1542 container_of(node, struct mapped_buffer_node, node); 1538
1543 if (mapped_buffer->addr > addr) /* u64 cmp */ 1539 return mapped_buffer_from_rbtree_node(node);
1544 node = node->rb_left;
1545 else if (mapped_buffer->addr != addr) /* u64 cmp */
1546 node = node->rb_right;
1547 else
1548 return mapped_buffer;
1549 }
1550 return NULL;
1551} 1540}
1552 1541
1553static struct mapped_buffer_node *find_mapped_buffer_range_locked( 1542static struct mapped_buffer_node *find_mapped_buffer_range_locked(
1554 struct rb_root *root, u64 addr) 1543 struct nvgpu_rbtree_node *root, u64 addr)
1555{ 1544{
1556 struct rb_node *node = root->rb_node; 1545 struct nvgpu_rbtree_node *node = NULL;
1557 while (node) { 1546
1558 struct mapped_buffer_node *m = 1547 nvgpu_rbtree_range_search(addr, &node, root);
1559 container_of(node, struct mapped_buffer_node, node); 1548 if (!node)
1560 if (m->addr <= addr && m->addr + m->size > addr) 1549 return NULL;
1561 return m; 1550
1562 else if (m->addr > addr) /* u64 cmp */ 1551 return mapped_buffer_from_rbtree_node(node);
1563 node = node->rb_left;
1564 else
1565 node = node->rb_right;
1566 }
1567 return NULL;
1568} 1552}
1569 1553
1570/* find the first mapped buffer with GPU VA less than addr */ 1554/* find the first mapped buffer with GPU VA less than addr */
1571static struct mapped_buffer_node *find_mapped_buffer_less_than_locked( 1555static struct mapped_buffer_node *find_mapped_buffer_less_than_locked(
1572 struct rb_root *root, u64 addr) 1556 struct nvgpu_rbtree_node *root, u64 addr)
1573{ 1557{
1574 struct rb_node *node = root->rb_node; 1558 struct nvgpu_rbtree_node *node = NULL;
1575 struct mapped_buffer_node *ret = NULL;
1576 1559
1577 while (node) { 1560 nvgpu_rbtree_less_than_search(addr, &node, root);
1578 struct mapped_buffer_node *mapped_buffer = 1561 if (!node)
1579 container_of(node, struct mapped_buffer_node, node); 1562 return NULL;
1580 if (mapped_buffer->addr >= addr)
1581 node = node->rb_left;
1582 else {
1583 ret = mapped_buffer;
1584 node = node->rb_right;
1585 }
1586 }
1587 1563
1588 return ret; 1564 return mapped_buffer_from_rbtree_node(node);
1589} 1565}
1590 1566
1591#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0])) 1567#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
@@ -1693,7 +1669,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1693 * mappings by checking the buffer with the highest GPU VA 1669 * mappings by checking the buffer with the highest GPU VA
1694 * that is less than our buffer end */ 1670 * that is less than our buffer end */
1695 buffer = find_mapped_buffer_less_than_locked( 1671 buffer = find_mapped_buffer_less_than_locked(
1696 &vm->mapped_buffers, map_offset + map_size); 1672 vm->mapped_buffers, map_offset + map_size);
1697 if (buffer && buffer->addr + buffer->size > map_offset) { 1673 if (buffer && buffer->addr + buffer->size > map_offset) {
1698 gk20a_warn(dev, "overlapping buffer map requested"); 1674 gk20a_warn(dev, "overlapping buffer map requested");
1699 return -EINVAL; 1675 return -EINVAL;
@@ -1877,7 +1853,7 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1877 struct mapped_buffer_node *mapped_buffer = NULL; 1853 struct mapped_buffer_node *mapped_buffer = NULL;
1878 1854
1879 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 1855 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1880 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, 1856 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers,
1881 offset_align); 1857 offset_align);
1882 if (!mapped_buffer) 1858 if (!mapped_buffer)
1883 return 0; 1859 return 0;
@@ -1887,7 +1863,7 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1887 return 0; 1863 return 0;
1888 } else { 1864 } else {
1889 mapped_buffer = 1865 mapped_buffer =
1890 find_mapped_buffer_reverse_locked(&vm->mapped_buffers, 1866 find_mapped_buffer_reverse_locked(vm->mapped_buffers,
1891 dmabuf, kind); 1867 dmabuf, kind);
1892 if (!mapped_buffer) 1868 if (!mapped_buffer)
1893 return 0; 1869 return 0;
@@ -2433,7 +2409,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2433 nvgpu_init_list_node(&mapped_buffer->va_buffers_list); 2409 nvgpu_init_list_node(&mapped_buffer->va_buffers_list);
2434 kref_init(&mapped_buffer->ref); 2410 kref_init(&mapped_buffer->ref);
2435 2411
2436 err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer); 2412 err = insert_mapped_buffer(vm, mapped_buffer);
2437 if (err) { 2413 if (err) {
2438 gk20a_err(d, "failed to insert into mapped buffer tree"); 2414 gk20a_err(d, "failed to insert into mapped buffer tree");
2439 goto clean_up; 2415 goto clean_up;
@@ -2456,7 +2432,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2456 2432
2457clean_up: 2433clean_up:
2458 if (inserted) { 2434 if (inserted) {
2459 rb_erase(&mapped_buffer->node, &vm->mapped_buffers); 2435 remove_mapped_buffer(vm, mapped_buffer);
2460 if (user_mapped) 2436 if (user_mapped)
2461 vm->num_user_mapped_buffers--; 2437 vm->num_user_mapped_buffers--;
2462 } 2438 }
@@ -2483,7 +2459,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
2483 2459
2484 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 2460 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2485 2461
2486 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); 2462 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva);
2487 2463
2488 if (!mapped_buffer || !mapped_buffer->user_mapped) 2464 if (!mapped_buffer || !mapped_buffer->user_mapped)
2489 { 2465 {
@@ -2542,7 +2518,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
2542 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 2518 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2543 2519
2544 mapped_buffer = 2520 mapped_buffer =
2545 find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); 2521 find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva);
2546 2522
2547 if (!mapped_buffer || !mapped_buffer->user_mapped) { 2523 if (!mapped_buffer || !mapped_buffer->user_mapped) {
2548 nvgpu_mutex_release(&vm->update_gmmu_lock); 2524 nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -3274,7 +3250,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
3274 struct gk20a *g = gk20a_from_vm(vm); 3250 struct gk20a *g = gk20a_from_vm(vm);
3275 3251
3276 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3252 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3277 buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr); 3253 buffer = find_mapped_buffer_locked(vm->mapped_buffers, gpu_vaddr);
3278 if (buffer) 3254 if (buffer)
3279 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, 3255 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
3280 buffer->flags); 3256 buffer->flags);
@@ -3886,7 +3862,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3886 mapped_buffer->sgt); 3862 mapped_buffer->sgt);
3887 3863
3888 /* remove from mapped buffer tree and remove list, free */ 3864 /* remove from mapped buffer tree and remove list, free */
3889 rb_erase(&mapped_buffer->node, &vm->mapped_buffers); 3865 remove_mapped_buffer(vm, mapped_buffer);
3890 if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list)) 3866 if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list))
3891 nvgpu_list_del(&mapped_buffer->va_buffers_list); 3867 nvgpu_list_del(&mapped_buffer->va_buffers_list);
3892 3868
@@ -3908,7 +3884,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
3908 struct mapped_buffer_node *mapped_buffer; 3884 struct mapped_buffer_node *mapped_buffer;
3909 3885
3910 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3886 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3911 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); 3887 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset);
3912 if (!mapped_buffer) { 3888 if (!mapped_buffer) {
3913 nvgpu_mutex_release(&vm->update_gmmu_lock); 3889 nvgpu_mutex_release(&vm->update_gmmu_lock);
3914 gk20a_err(d, "invalid addr to unmap 0x%llx", offset); 3890 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
@@ -3939,7 +3915,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
3939{ 3915{
3940 struct mapped_buffer_node *mapped_buffer; 3916 struct mapped_buffer_node *mapped_buffer;
3941 struct vm_reserved_va_node *va_node, *va_node_tmp; 3917 struct vm_reserved_va_node *va_node, *va_node_tmp;
3942 struct rb_node *node; 3918 struct nvgpu_rbtree_node *node = NULL;
3943 struct gk20a *g = vm->mm->g; 3919 struct gk20a *g = vm->mm->g;
3944 3920
3945 gk20a_dbg_fn(""); 3921 gk20a_dbg_fn("");
@@ -3961,12 +3937,11 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
3961 /* TBD: add a flag here for the unmap code to recognize teardown 3937 /* TBD: add a flag here for the unmap code to recognize teardown
3962 * and short-circuit any otherwise expensive operations. */ 3938 * and short-circuit any otherwise expensive operations. */
3963 3939
3964 node = rb_first(&vm->mapped_buffers); 3940 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
3965 while (node) { 3941 while (node) {
3966 mapped_buffer = 3942 mapped_buffer = mapped_buffer_from_rbtree_node(node);
3967 container_of(node, struct mapped_buffer_node, node);
3968 gk20a_vm_unmap_locked(mapped_buffer, NULL); 3943 gk20a_vm_unmap_locked(mapped_buffer, NULL);
3969 node = rb_first(&vm->mapped_buffers); 3944 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
3970 } 3945 }
3971 3946
3972 /* destroy remaining reserved memory areas */ 3947 /* destroy remaining reserved memory areas */
@@ -4402,7 +4377,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4402 if (err) 4377 if (err)
4403 goto clean_up_allocators; 4378 goto clean_up_allocators;
4404 4379
4405 vm->mapped_buffers = RB_ROOT; 4380 vm->mapped_buffers = NULL;
4406 4381
4407 nvgpu_mutex_init(&vm->update_gmmu_lock); 4382 nvgpu_mutex_init(&vm->update_gmmu_lock);
4408 kref_init(&vm->ref); 4383 kref_init(&vm->ref);
@@ -5199,7 +5174,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
5199 5174
5200 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 5175 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
5201 5176
5202 mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers, 5177 mapped_buffer = find_mapped_buffer_range_locked(vm->mapped_buffers,
5203 gpu_va); 5178 gpu_va);
5204 if (!mapped_buffer) { 5179 if (!mapped_buffer) {
5205 nvgpu_mutex_release(&vm->update_gmmu_lock); 5180 nvgpu_mutex_release(&vm->update_gmmu_lock);