summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-02-09 23:54:01 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:07:35 -0400
commitf9fd5bbabe0d188a06d25bacdb18b91ef65a147d (patch)
treeecf651164e5fbdbba48eec53291f2cef9ac715e7 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent9bf82585aa24b6052855c119855abef92671d502 (diff)
gpu: nvgpu: Unify PDE & PTE structs
Introduce a new struct gk20a_mm_entry. Allocate and store PDE and PTE arrays using the same structure. Always pass pointer to this struct when possible between functions in memory code. Change-Id: Ia4a2a6abdac9ab7ba522dafbf73fc3a3d5355c5f Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/696414
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c317
1 files changed, 138 insertions, 179 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 3f71fc5e..10bdc076 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -377,9 +377,7 @@ int gk20a_init_mm_support(struct gk20a *g)
377} 377}
378 378
379static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, 379static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
380 void **handle, 380 struct gk20a_mm_entry *entry)
381 struct sg_table **sgt,
382 size_t *size)
383{ 381{
384 u32 num_pages = 1 << order; 382 u32 num_pages = 1 << order;
385 u32 len = num_pages * PAGE_SIZE; 383 u32 len = num_pages * PAGE_SIZE;
@@ -393,76 +391,81 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
393 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n"); 391 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
394 goto err_out; 392 goto err_out;
395 } 393 }
396 *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); 394 entry->sgt = kzalloc(sizeof(*entry->sgt), GFP_KERNEL);
397 if (!sgt) { 395 if (!entry->sgt) {
398 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); 396 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
399 goto err_alloced; 397 goto err_alloced;
400 } 398 }
401 err = sg_alloc_table(*sgt, 1, GFP_KERNEL); 399 err = sg_alloc_table(entry->sgt, 1, GFP_KERNEL);
402 if (err) { 400 if (err) {
403 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n"); 401 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
404 goto err_sg_table; 402 goto err_sg_table;
405 } 403 }
406 sg_set_page((*sgt)->sgl, pages, len, 0); 404 sg_set_page(entry->sgt->sgl, pages, len, 0);
407 *handle = page_address(pages); 405 entry->cpu_va = page_address(pages);
408 memset(*handle, 0, len); 406 memset(entry->cpu_va, 0, len);
409 *size = len; 407 entry->size = len;
410 FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len); 408 FLUSH_CPU_DCACHE(entry->cpu_va, sg_phys(entry->sgt->sgl), len);
411 409
412 return 0; 410 return 0;
413 411
414err_sg_table: 412err_sg_table:
415 kfree(*sgt); 413 kfree(entry->sgt);
416err_alloced: 414err_alloced:
417 __free_pages(pages, order); 415 __free_pages(pages, order);
418err_out: 416err_out:
419 return -ENOMEM; 417 return -ENOMEM;
420} 418}
421 419
422static void free_gmmu_phys_pages(struct vm_gk20a *vm, void *handle, 420static void free_gmmu_phys_pages(struct vm_gk20a *vm,
423 struct sg_table *sgt, u32 order, 421 struct gk20a_mm_entry *entry)
424 size_t size)
425{ 422{
426 gk20a_dbg_fn(""); 423 gk20a_dbg_fn("");
427 free_pages((unsigned long)handle, order); 424 free_pages((unsigned long)entry->cpu_va, get_order(entry->size));
428 sg_free_table(sgt); 425 entry->cpu_va = NULL;
429 kfree(sgt); 426
427 sg_free_table(entry->sgt);
428 kfree(entry->sgt);
429 entry->sgt = NULL;
430} 430}
431 431
432static int map_gmmu_phys_pages(void *handle, struct sg_table *sgt, 432static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
433 void **va, size_t size)
434{ 433{
435 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); 434 FLUSH_CPU_DCACHE(entry->cpu_va,
436 *va = handle; 435 sg_phys(entry->sgt->sgl),
436 entry->sgt->sgl->length);
437 return 0; 437 return 0;
438} 438}
439 439
440static void unmap_gmmu_phys_pages(void *handle, struct sg_table *sgt, void *va) 440static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
441{ 441{
442 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); 442 FLUSH_CPU_DCACHE(entry->cpu_va,
443 sg_phys(entry->sgt->sgl),
444 entry->sgt->sgl->length);
443} 445}
444 446
445static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, 447static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
446 void **handle, 448 struct gk20a_mm_entry *entry)
447 struct sg_table **sgt,
448 size_t *size)
449{ 449{
450 struct device *d = dev_from_vm(vm); 450 struct device *d = dev_from_vm(vm);
451 u32 num_pages = 1 << order; 451 u32 num_pages = 1 << order;
452 u32 len = num_pages * PAGE_SIZE; 452 u32 len = num_pages * PAGE_SIZE;
453 dma_addr_t iova; 453 dma_addr_t iova;
454 DEFINE_DMA_ATTRS(attrs); 454 DEFINE_DMA_ATTRS(attrs);
455 struct page **pages;
456 void *cpuva; 455 void *cpuva;
457 int err = 0; 456 int err = 0;
458 457
459 gk20a_dbg_fn(""); 458 gk20a_dbg_fn("");
460 459
461 if (tegra_platform_is_linsim()) 460 if (tegra_platform_is_linsim())
462 return alloc_gmmu_phys_pages(vm, order, handle, sgt, size); 461 return alloc_gmmu_phys_pages(vm, order, entry);
463 462
464 *size = len; 463 entry->size = len;
465 464
465 /*
466 * On arm32 we're limited by vmalloc space, so we do not map pages by
467 * default.
468 */
466 if (IS_ENABLED(CONFIG_ARM64)) { 469 if (IS_ENABLED(CONFIG_ARM64)) {
467 cpuva = dma_zalloc_coherent(d, len, &iova, GFP_KERNEL); 470 cpuva = dma_zalloc_coherent(d, len, &iova, GFP_KERNEL);
468 if (!cpuva) { 471 if (!cpuva) {
@@ -470,14 +473,16 @@ static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
470 goto err_out; 473 goto err_out;
471 } 474 }
472 475
473 err = gk20a_get_sgtable(d, sgt, cpuva, iova, len); 476 err = gk20a_get_sgtable(d, &entry->sgt, cpuva, iova, len);
474 if (err) { 477 if (err) {
475 gk20a_err(d, "sgt allocation failed\n"); 478 gk20a_err(d, "sgt allocation failed\n");
476 goto err_free; 479 goto err_free;
477 } 480 }
478 481
479 *handle = cpuva; 482 entry->cpu_va = cpuva;
480 } else { 483 } else {
484 struct page **pages;
485
481 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 486 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
482 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs); 487 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
483 if (!pages) { 488 if (!pages) {
@@ -485,99 +490,106 @@ static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
485 goto err_out; 490 goto err_out;
486 } 491 }
487 492
488 err = gk20a_get_sgtable_from_pages(d, sgt, pages, 493 err = gk20a_get_sgtable_from_pages(d, &entry->sgt, pages,
489 iova, len); 494 iova, len);
490 if (err) { 495 if (err) {
491 gk20a_err(d, "sgt allocation failed\n"); 496 gk20a_err(d, "sgt allocation failed\n");
492 goto err_free; 497 goto err_free;
493 } 498 }
494 499
495 *handle = (void *)pages; 500 entry->pages = pages;
496 } 501 }
497 502
498 return 0; 503 return 0;
499 504
500err_free: 505err_free:
501 if (IS_ENABLED(CONFIG_ARM64)) { 506 if (IS_ENABLED(CONFIG_ARM64)) {
502 dma_free_coherent(d, len, handle, iova); 507 dma_free_coherent(d, len, entry->cpu_va, iova);
503 cpuva = NULL; 508 cpuva = NULL;
504 } else { 509 } else {
505 dma_free_attrs(d, len, pages, iova, &attrs); 510 dma_free_attrs(d, len, entry->pages, iova, &attrs);
506 pages = NULL; 511 entry->pages = NULL;
507 } 512 }
508 iova = 0; 513 iova = 0;
509err_out: 514err_out:
510 return -ENOMEM; 515 return -ENOMEM;
511} 516}
512 517
513void free_gmmu_pages(struct vm_gk20a *vm, void *handle, 518void free_gmmu_pages(struct vm_gk20a *vm,
514 struct sg_table *sgt, u32 order, 519 struct gk20a_mm_entry *entry)
515 size_t size)
516{ 520{
517 struct device *d = dev_from_vm(vm); 521 struct device *d = dev_from_vm(vm);
518 u64 iova; 522 u64 iova;
519 DEFINE_DMA_ATTRS(attrs); 523 DEFINE_DMA_ATTRS(attrs);
520 struct page **pages;
521 524
522 gk20a_dbg_fn(""); 525 gk20a_dbg_fn("");
523 BUG_ON(sgt == NULL); 526 BUG_ON(entry->sgt == NULL);
524 527
525 if (tegra_platform_is_linsim()) { 528 if (tegra_platform_is_linsim()) {
526 free_gmmu_phys_pages(vm, handle, sgt, order, size); 529 free_gmmu_phys_pages(vm, entry);
527 return; 530 return;
528 } 531 }
529 532
530 iova = sg_dma_address(sgt->sgl); 533 iova = sg_dma_address(entry->sgt->sgl);
531 534
532 gk20a_free_sgtable(&sgt); 535 gk20a_free_sgtable(&entry->sgt);
533 536
537 /*
538 * On arm32 we're limited by vmalloc space, so we do not map pages by
539 * default.
540 */
534 if (IS_ENABLED(CONFIG_ARM64)) { 541 if (IS_ENABLED(CONFIG_ARM64)) {
535 dma_free_coherent(d, size, handle, iova); 542 dma_free_coherent(d, entry->size, entry->cpu_va, iova);
543 entry->cpu_va = NULL;
536 } else { 544 } else {
537 pages = (struct page **)handle;
538 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 545 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
539 dma_free_attrs(d, size, pages, iova, &attrs); 546 dma_free_attrs(d, entry->size, entry->pages, iova, &attrs);
540 pages = NULL; 547 entry->pages = NULL;
541 } 548 }
542 549 entry->size = 0;
543 handle = NULL;
544 iova = 0;
545} 550}
546 551
547int map_gmmu_pages(void *handle, struct sg_table *sgt, 552int map_gmmu_pages(struct gk20a_mm_entry *entry)
548 void **kva, size_t size)
549{ 553{
550 int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 554 int count = PAGE_ALIGN(entry->size) >> PAGE_SHIFT;
551 struct page **pages; 555 struct page **pages;
552 gk20a_dbg_fn(""); 556 gk20a_dbg_fn("");
553 557
554 if (tegra_platform_is_linsim()) 558 if (tegra_platform_is_linsim())
555 return map_gmmu_phys_pages(handle, sgt, kva, size); 559 return map_gmmu_phys_pages(entry);
556 560
557 if (IS_ENABLED(CONFIG_ARM64)) { 561 if (IS_ENABLED(CONFIG_ARM64)) {
558 *kva = handle; 562 FLUSH_CPU_DCACHE(entry->cpu_va,
563 sg_phys(entry->sgt->sgl),
564 entry->size);
559 } else { 565 } else {
560 pages = (struct page **)handle; 566 pages = entry->pages;
561 *kva = vmap(pages, count, 0, pgprot_writecombine(PAGE_KERNEL)); 567 entry->cpu_va = vmap(pages, count, 0,
562 if (!(*kva)) 568 pgprot_writecombine(PAGE_KERNEL));
569 if (!entry->cpu_va)
563 return -ENOMEM; 570 return -ENOMEM;
564 } 571 }
565 572
566 return 0; 573 return 0;
567} 574}
568 575
569void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) 576void unmap_gmmu_pages(struct gk20a_mm_entry *entry)
570{ 577{
571 gk20a_dbg_fn(""); 578 gk20a_dbg_fn("");
572 579
573 if (tegra_platform_is_linsim()) { 580 if (tegra_platform_is_linsim()) {
574 unmap_gmmu_phys_pages(handle, sgt, va); 581 unmap_gmmu_phys_pages(entry);
575 return; 582 return;
576 } 583 }
577 584
578 if (!IS_ENABLED(CONFIG_ARM64)) 585 if (IS_ENABLED(CONFIG_ARM64)) {
579 vunmap(va); 586 FLUSH_CPU_DCACHE(entry->cpu_va,
580 va = NULL; 587 sg_phys(entry->sgt->sgl),
588 entry->size);
589 } else {
590 vunmap(entry->cpu_va);
591 entry->cpu_va = NULL;
592 }
581} 593}
582 594
583/* allocate a phys contig region big enough for a full 595/* allocate a phys contig region big enough for a full
@@ -585,33 +597,25 @@ void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
585 * the whole range is zeroed so it's "invalid"/will fault 597 * the whole range is zeroed so it's "invalid"/will fault
586 */ 598 */
587 599
588int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, 600static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm,
589 enum gmmu_pgsz_gk20a gmmu_pgsz_idx, 601 enum gmmu_pgsz_gk20a pgsz_idx,
590 struct page_table_gk20a *pte) 602 struct gk20a_mm_entry *entry)
591{ 603{
592 int err; 604 int err;
593 u32 pte_order; 605 u32 pte_order;
594 void *handle = NULL;
595 struct sg_table *sgt;
596 size_t size;
597 606
598 gk20a_dbg_fn(""); 607 gk20a_dbg_fn("");
599 608
600 /* allocate enough pages for the table */ 609 /* allocate enough pages for the table */
601 pte_order = vm->page_table_sizing[gmmu_pgsz_idx].order; 610 pte_order = vm->page_table_sizing[pgsz_idx].order;
602 611
603 err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); 612 err = alloc_gmmu_pages(vm, pte_order, entry);
604 if (err) 613 gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d",
605 return err; 614 entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl),
606 615 pte_order);
607 gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", 616 entry->pgsz = pgsz_idx;
608 pte, gk20a_mm_iova_addr(vm->mm->g, sgt->sgl), pte_order);
609 617
610 pte->ref = handle; 618 return err;
611 pte->sgt = sgt;
612 pte->size = size;
613
614 return 0;
615} 619}
616 620
617/* given address range (inclusive) determine the pdes crossed */ 621/* given address range (inclusive) determine the pdes crossed */
@@ -629,7 +633,7 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
629 633
630u32 *pde_from_index(struct vm_gk20a *vm, u32 i) 634u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
631{ 635{
632 return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); 636 return (u32 *) (((u8 *)vm->pdb.cpu_va) + i*gmmu_pde__size_v());
633} 637}
634 638
635u32 pte_index_from_vaddr(struct vm_gk20a *vm, 639u32 pte_index_from_vaddr(struct vm_gk20a *vm,
@@ -671,25 +675,30 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
671 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 675 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
672{ 676{
673 int err; 677 int err;
674 struct page_table_gk20a *pte = 678 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
675 vm->pdes.ptes[gmmu_pgsz_idx] + i;
676 679
677 gk20a_dbg_fn(""); 680 gk20a_dbg_fn("");
678 681
679 /* if it's already in place it's valid */ 682 /* if it's already in place it's valid */
680 if (pte->ref) 683 if (entry->size)
681 return 0; 684 return 0;
682 685
683 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", 686 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
684 vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); 687 vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
685 688
686 err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); 689 err = gk20a_zalloc_gmmu_page_table(vm, gmmu_pgsz_idx, entry);
687 if (err) 690 if (err)
688 return err; 691 return err;
689 692
690 /* rewrite pde */ 693 /* rewrite pde */
694 err = map_gmmu_pages(&vm->pdb);
695 if (err)
696 return err;
697
691 update_gmmu_pde_locked(vm, i); 698 update_gmmu_pde_locked(vm, i);
692 699
700 unmap_gmmu_pages(&vm->pdb);
701
693 return 0; 702 return 0;
694} 703}
695 704
@@ -1791,9 +1800,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1791 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { 1800 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1792 u32 pte_lo, pte_hi; 1801 u32 pte_lo, pte_hi;
1793 u32 pte_cur; 1802 u32 pte_cur;
1794 void *pte_kv_cur;
1795 1803
1796 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; 1804 struct gk20a_mm_entry *entry = vm->pdb.entries + pde_i;
1797 1805
1798 if (pde_i == pde_lo) 1806 if (pde_i == pde_lo)
1799 pte_lo = pte_index_from_vaddr(vm, first_vaddr, 1807 pte_lo = pte_index_from_vaddr(vm, first_vaddr,
@@ -1808,8 +1816,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1808 pgsz_idx); 1816 pgsz_idx);
1809 1817
1810 /* get cpu access to the ptes */ 1818 /* get cpu access to the ptes */
1811 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, 1819 err = map_gmmu_pages(entry);
1812 pte->size);
1813 if (err) { 1820 if (err) {
1814 gk20a_err(dev_from_vm(vm), 1821 gk20a_err(dev_from_vm(vm),
1815 "couldn't map ptes for update as=%d", 1822 "couldn't map ptes for update as=%d",
@@ -1817,8 +1824,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1817 goto clean_up; 1824 goto clean_up;
1818 } 1825 }
1819 1826
1820 BUG_ON(!pte_kv_cur);
1821
1822 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); 1827 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1823 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { 1828 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1824 if (likely(sgt)) { 1829 if (likely(sgt)) {
@@ -1869,11 +1874,11 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1869 pte_cur); 1874 pte_cur);
1870 } 1875 }
1871 1876
1872 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); 1877 gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]);
1873 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); 1878 gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]);
1874 } 1879 }
1875 1880
1876 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); 1881 unmap_gmmu_pages(entry);
1877 } 1882 }
1878 1883
1879 smp_mb(); 1884 smp_mb();
@@ -1917,23 +1922,22 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1917{ 1922{
1918 bool small_valid, big_valid; 1923 bool small_valid, big_valid;
1919 u64 pte_addr[2] = {0, 0}; 1924 u64 pte_addr[2] = {0, 0};
1920 struct page_table_gk20a *small_pte = 1925 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
1921 vm->pdes.ptes[gmmu_page_size_small] + i;
1922 struct page_table_gk20a *big_pte =
1923 vm->pdes.ptes[gmmu_page_size_big] + i;
1924 u32 pde_v[2] = {0, 0}; 1926 u32 pde_v[2] = {0, 0};
1925 u32 *pde; 1927 u32 *pde;
1926 1928
1927 small_valid = small_pte && small_pte->ref; 1929 gk20a_dbg_fn("");
1928 big_valid = big_pte && big_pte->ref; 1930
1931 small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
1932 big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
1929 1933
1930 if (small_valid) 1934 if (small_valid)
1931 pte_addr[gmmu_page_size_small] = 1935 pte_addr[gmmu_page_size_small] =
1932 gk20a_mm_iova_addr(vm->mm->g, small_pte->sgt->sgl); 1936 gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl);
1933 1937
1934 if (big_valid) 1938 if (big_valid)
1935 pte_addr[gmmu_page_size_big] = 1939 pte_addr[gmmu_page_size_big] =
1936 gk20a_mm_iova_addr(vm->mm->g, big_pte->sgt->sgl); 1940 gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl);
1937 1941
1938 pde_v[0] = gmmu_pde_size_full_f(); 1942 pde_v[0] = gmmu_pde_size_full_f();
1939 pde_v[0] |= big_valid ? 1943 pde_v[0] |= big_valid ?
@@ -1959,12 +1963,13 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1959 smp_mb(); 1963 smp_mb();
1960 1964
1961 FLUSH_CPU_DCACHE(pde, 1965 FLUSH_CPU_DCACHE(pde,
1962 sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()), 1966 sg_phys(vm->pdb.sgt->sgl) + (i*gmmu_pde__size_v()),
1963 sizeof(u32)*2); 1967 sizeof(u32)*2);
1964 1968
1965 gk20a_mm_l2_invalidate(vm->mm->g); 1969 gk20a_mm_l2_invalidate(vm->mm->g);
1966 1970
1967 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); 1971 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1972 gk20a_dbg_fn("done");
1968} 1973}
1969 1974
1970/* NOTE! mapped_buffers lock must be held */ 1975/* NOTE! mapped_buffers lock must be held */
@@ -2046,6 +2051,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
2046 struct vm_reserved_va_node *va_node, *va_node_tmp; 2051 struct vm_reserved_va_node *va_node, *va_node_tmp;
2047 struct rb_node *node; 2052 struct rb_node *node;
2048 int i; 2053 int i;
2054 u32 pde_lo, pde_hi;
2049 2055
2050 gk20a_dbg_fn(""); 2056 gk20a_dbg_fn("");
2051 mutex_lock(&vm->update_gmmu_lock); 2057 mutex_lock(&vm->update_gmmu_lock);
@@ -2070,29 +2076,18 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
2070 2076
2071 /* unmapping all buffers above may not actually free 2077 /* unmapping all buffers above may not actually free
2072 * all vm ptes. jettison them here for certain... */ 2078 * all vm ptes. jettison them here for certain... */
2073 for (i = 0; i < vm->pdes.num_pdes; i++) { 2079 pde_range_from_vaddr_range(vm, 0, vm->va_limit-1,
2074 struct page_table_gk20a *pte = 2080 &pde_lo, &pde_hi);
2075 &vm->pdes.ptes[gmmu_page_size_small][i]; 2081 for (i = 0; i < pde_hi + 1; i++) {
2076 if (pte->ref) { 2082 struct gk20a_mm_entry *entry = &vm->pdb.entries[i];
2077 free_gmmu_pages(vm, pte->ref, pte->sgt, 2083 if (entry->size)
2078 vm->page_table_sizing[gmmu_page_size_small].order, 2084 free_gmmu_pages(vm, entry);
2079 pte->size);
2080 pte->ref = NULL;
2081 }
2082 pte = &vm->pdes.ptes[gmmu_page_size_big][i];
2083 if (pte->ref) {
2084 free_gmmu_pages(vm, pte->ref, pte->sgt,
2085 vm->page_table_sizing[gmmu_page_size_big].order,
2086 pte->size);
2087 pte->ref = NULL;
2088 }
2089 } 2085 }
2090 2086
2091 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); 2087 unmap_gmmu_pages(&vm->pdb);
2092 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size); 2088 free_gmmu_pages(vm, &vm->pdb);
2093 2089
2094 kfree(vm->pdes.ptes[gmmu_page_size_small]); 2090 kfree(vm->pdb.entries);
2095 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2096 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2091 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2097 if (vm->big_pages) 2092 if (vm->big_pages)
2098 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 2093 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
@@ -2136,7 +2131,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2136 u32 num_small_pages, num_large_pages, low_hole_pages; 2131 u32 num_small_pages, num_large_pages, low_hole_pages;
2137 char alloc_name[32]; 2132 char alloc_name[32];
2138 u64 small_vma_size, large_vma_size; 2133 u64 small_vma_size, large_vma_size;
2139 u32 pde_pages; 2134 u32 pde_lo, pde_hi;
2140 2135
2141 /* note: keep the page sizes sorted lowest to highest here */ 2136 /* note: keep the page sizes sorted lowest to highest here */
2142 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; 2137 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
@@ -2181,52 +2176,24 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2181 (vm->page_table_sizing[gmmu_page_size_big].num_ptes * 2176 (vm->page_table_sizing[gmmu_page_size_big].num_ptes *
2182 gmmu_pte__size_v()) >> 10); 2177 gmmu_pte__size_v()) >> 10);
2183 2178
2184 { 2179 pde_range_from_vaddr_range(vm,
2185 u32 pde_lo, pde_hi; 2180 0, vm->va_limit-1,
2186 pde_range_from_vaddr_range(vm, 2181 &pde_lo, &pde_hi);
2187 0, vm->va_limit-1, 2182 vm->pdb.entries = kzalloc(sizeof(struct gk20a_mm_entry) *
2188 &pde_lo, &pde_hi); 2183 (pde_hi + 1), GFP_KERNEL);
2189 vm->pdes.num_pdes = pde_hi + 1;
2190 }
2191
2192 vm->pdes.ptes[gmmu_page_size_small] =
2193 kzalloc(sizeof(struct page_table_gk20a) *
2194 vm->pdes.num_pdes, GFP_KERNEL);
2195
2196 if (!vm->pdes.ptes[gmmu_page_size_small]) {
2197 err = -ENOMEM;
2198 goto clean_up_pdes;
2199 }
2200
2201 vm->pdes.ptes[gmmu_page_size_big] =
2202 kzalloc(sizeof(struct page_table_gk20a) *
2203 vm->pdes.num_pdes, GFP_KERNEL);
2204 2184
2205 if (!vm->pdes.ptes[gmmu_page_size_big]) { 2185 if (!vm->pdb.entries) {
2206 err = -ENOMEM; 2186 err = -ENOMEM;
2207 goto clean_up_pdes; 2187 goto clean_up_pdes;
2208 } 2188 }
2209 2189
2210 gk20a_dbg_info("init space for %s va_limit=0x%llx num_pdes=%d", 2190 gk20a_dbg_info("init space for %s va_limit=0x%llx num_pdes=%d",
2211 name, vm->va_limit, vm->pdes.num_pdes); 2191 name, vm->va_limit, pde_hi + 1);
2212 2192
2213 /* allocate the page table directory */ 2193 /* allocate the page table directory */
2214 pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512); 2194 err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->pdb);
2215
2216 gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages);
2217 err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref,
2218 &vm->pdes.sgt, &vm->pdes.size);
2219 if (err) 2195 if (err)
2220 goto clean_up_pdes;
2221
2222 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2223 vm->pdes.size);
2224 if (err) {
2225 goto clean_up_ptes; 2196 goto clean_up_ptes;
2226 }
2227 gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2228 vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));
2229 /* we could release vm->pdes.kv but it's only one page... */
2230 2197
2231 /* First 16GB of the address space goes towards small pages. What ever 2198 /* First 16GB of the address space goes towards small pages. What ever
2232 * remains is allocated to large pages. */ 2199 * remains is allocated to large pages. */
@@ -2279,13 +2246,11 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2279clean_up_small_allocator: 2246clean_up_small_allocator:
2280 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2247 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2281clean_up_map_pde: 2248clean_up_map_pde:
2282 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); 2249 unmap_gmmu_pages(&vm->pdb);
2283clean_up_ptes: 2250clean_up_ptes:
2284 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages, 2251 free_gmmu_pages(vm, &vm->pdb);
2285 vm->pdes.size);
2286clean_up_pdes: 2252clean_up_pdes:
2287 kfree(vm->pdes.ptes[gmmu_page_size_small]); 2253 kfree(vm->pdb.entries);
2288 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2289 return err; 2254 return err;
2290} 2255}
2291 2256
@@ -2657,18 +2622,12 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset)
2657 2622
2658void gk20a_deinit_vm(struct vm_gk20a *vm) 2623void gk20a_deinit_vm(struct vm_gk20a *vm)
2659{ 2624{
2660 u32 pde_pages;
2661
2662 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 2625 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2663 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2626 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2664 2627
2665 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); 2628 unmap_gmmu_pages(&vm->pdb);
2666 2629 free_gmmu_pages(vm, &vm->pdb);
2667 pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512); 2630 kfree(vm->pdb.entries);
2668 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
2669 vm->pdes.size);
2670 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2671 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2672} 2631}
2673 2632
2674int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block) 2633int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block)
@@ -2765,7 +2724,7 @@ void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm,
2765 u32 big_page_size) 2724 u32 big_page_size)
2766{ 2725{
2767 struct gk20a *g = gk20a_from_vm(vm); 2726 struct gk20a *g = gk20a_from_vm(vm);
2768 u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl); 2727 u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl);
2769 u32 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); 2728 u32 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v());
2770 u32 pde_addr_hi = u64_hi32(pde_addr); 2729 u32 pde_addr_hi = u64_hi32(pde_addr);
2771 phys_addr_t inst_pa = inst_block->cpu_pa; 2730 phys_addr_t inst_pa = inst_block->cpu_pa;
@@ -2967,7 +2926,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2967{ 2926{
2968 struct gk20a *g = gk20a_from_vm(vm); 2927 struct gk20a *g = gk20a_from_vm(vm);
2969 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g, 2928 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g,
2970 vm->pdes.sgt->sgl) >> 12); 2929 vm->pdb.sgt->sgl) >> 12);
2971 u32 data; 2930 u32 data;
2972 s32 retry = 2000; 2931 s32 retry = 2000;
2973 static DEFINE_MUTEX(tlb_lock); 2932 static DEFINE_MUTEX(tlb_lock);