summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-06-17 08:53:23 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-07-05 02:12:29 -0400
commit71478a031c3c42a737be5cfd7450414e58de849b (patch)
tree782d9cfdf4daf5889e713e7c846fba18e7896bab /drivers
parente12c5c8594c429357427130389da632284d79bcc (diff)
gpu: ngpu: add support for vidmem in page tables
Modify page table updates to take an aperture flag (up until gk20a_locked_gmmu_map()), don't hard-assume sysmem and propagate it to hardware. Jira DNVGPU-76 Change-Id: Ifcb22900c96db993068edd110e09368f72b06f69 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1169307 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c83
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h3
3 files changed, 56 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
index 9b444036..0a21b6ca 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
@@ -182,6 +182,14 @@ static inline u32 gmmu_pte_address_sys_w(void)
182{ 182{
183 return 0; 183 return 0;
184} 184}
185static inline u32 gmmu_pte_address_vid_f(u32 v)
186{
187 return (v & 0x1ffffff) << 4;
188}
189static inline u32 gmmu_pte_address_vid_w(void)
190{
191 return 0;
192}
185static inline u32 gmmu_pte_vol_w(void) 193static inline u32 gmmu_pte_vol_w(void)
186{ 194{
187 return 1; 195 return 1;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a274820a..6fdfacdd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -387,7 +387,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
387 u8 kind_v, u32 ctag_offset, bool cacheable, 387 u8 kind_v, u32 ctag_offset, bool cacheable,
388 bool umapped_pte, int rw_flag, 388 bool umapped_pte, int rw_flag,
389 bool sparse, 389 bool sparse,
390 bool priv); 390 bool priv,
391 enum gk20a_aperture aperture);
391static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); 392static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
392static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); 393static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
393static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); 394static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
@@ -1640,7 +1641,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1640 NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, 1641 NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
1641 rw_flag, 1642 rw_flag,
1642 sparse, 1643 sparse,
1643 priv); 1644 priv,
1645 APERTURE_SYSMEM); /* no vidmem bufs yet */
1644 if (err) { 1646 if (err) {
1645 gk20a_err(d, "failed to update ptes on map"); 1647 gk20a_err(d, "failed to update ptes on map");
1646 goto fail_validate; 1648 goto fail_validate;
@@ -1690,7 +1692,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1690 vaddr + size, 1692 vaddr + size,
1691 0, 0, false /* n/a for unmap */, 1693 0, 0, false /* n/a for unmap */,
1692 false, rw_flag, 1694 false, rw_flag,
1693 sparse, 0); 1695 sparse, 0,
1696 APERTURE_INVALID); /* don't care for unmap */
1694 if (err) 1697 if (err)
1695 dev_err(dev_from_vm(vm), 1698 dev_err(dev_from_vm(vm),
1696 "failed to update gmmu ptes on unmap"); 1699 "failed to update gmmu ptes on unmap");
@@ -2784,26 +2787,32 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
2784} 2787}
2785 2788
2786/* for gk20a the "video memory" apertures here are misnomers. */ 2789/* for gk20a the "video memory" apertures here are misnomers. */
2787static inline u32 big_valid_pde0_bits(struct gk20a *g, u64 pte_addr) 2790static inline u32 big_valid_pde0_bits(struct gk20a *g,
2791 struct mem_desc *entry_mem)
2788{ 2792{
2793 u64 pte_addr = g->ops.mm.get_iova_addr(g, entry_mem->sgt->sgl, 0);
2789 u32 pde0_bits = 2794 u32 pde0_bits =
2790 (g->mm.vidmem_is_vidmem ? 2795 gk20a_aperture_mask(g, entry_mem,
2791 gmmu_pde_aperture_big_sys_mem_ncoh_f() : 2796 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
2792 gmmu_pde_aperture_big_video_memory_f()) | 2797 gmmu_pde_aperture_big_video_memory_f()) |
2793 gmmu_pde_address_big_sys_f( 2798 gmmu_pde_address_big_sys_f(
2794 (u32)(pte_addr >> gmmu_pde_address_shift_v())); 2799 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
2795 return pde0_bits; 2800
2801 return pde0_bits;
2796} 2802}
2797 2803
2798static inline u32 small_valid_pde1_bits(struct gk20a *g, u64 pte_addr) 2804static inline u32 small_valid_pde1_bits(struct gk20a *g,
2805 struct mem_desc *entry_mem)
2799{ 2806{
2807 u64 pte_addr = g->ops.mm.get_iova_addr(g, entry_mem->sgt->sgl, 0);
2800 u32 pde1_bits = 2808 u32 pde1_bits =
2801 (g->mm.vidmem_is_vidmem ? 2809 gk20a_aperture_mask(g, entry_mem,
2802 gmmu_pde_aperture_small_sys_mem_ncoh_f() : 2810 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
2803 gmmu_pde_aperture_small_video_memory_f()) | 2811 gmmu_pde_aperture_small_video_memory_f()) |
2804 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 2812 gmmu_pde_vol_small_true_f() | /* tbd: why? */
2805 gmmu_pde_address_small_sys_f( 2813 gmmu_pde_address_small_sys_f(
2806 (u32)(pte_addr >> gmmu_pde_address_shift_v())); 2814 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
2815
2807 return pde1_bits; 2816 return pde1_bits;
2808} 2817}
2809 2818
@@ -2821,11 +2830,11 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2821 u64 *iova, 2830 u64 *iova,
2822 u32 kind_v, u64 *ctag, 2831 u32 kind_v, u64 *ctag,
2823 bool cacheable, bool unammped_pte, 2832 bool cacheable, bool unammped_pte,
2824 int rw_flag, bool sparse, bool priv) 2833 int rw_flag, bool sparse, bool priv,
2834 enum gk20a_aperture aperture)
2825{ 2835{
2826 struct gk20a *g = gk20a_from_vm(vm); 2836 struct gk20a *g = gk20a_from_vm(vm);
2827 bool small_valid, big_valid; 2837 bool small_valid, big_valid;
2828 u64 pte_addr_small = 0, pte_addr_big = 0;
2829 struct gk20a_mm_entry *entry = vm->pdb.entries + i; 2838 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
2830 u32 pde_v[2] = {0, 0}; 2839 u32 pde_v[2] = {0, 0};
2831 u32 pde; 2840 u32 pde;
@@ -2835,18 +2844,13 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2835 small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small; 2844 small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small;
2836 big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; 2845 big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;
2837 2846
2838 if (small_valid)
2839 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0);
2840
2841 if (big_valid)
2842 pte_addr_big = g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0);
2843
2844 pde_v[0] = gmmu_pde_size_full_f(); 2847 pde_v[0] = gmmu_pde_size_full_f();
2845 pde_v[0] |= big_valid ? big_valid_pde0_bits(g, pte_addr_big) : 2848 pde_v[0] |= big_valid ?
2846 (gmmu_pde_aperture_big_invalid_f()); 2849 big_valid_pde0_bits(g, &entry->mem) :
2850 gmmu_pde_aperture_big_invalid_f();
2847 2851
2848 pde_v[1] |= (small_valid ? 2852 pde_v[1] |= (small_valid ?
2849 small_valid_pde1_bits(g, pte_addr_small) : 2853 small_valid_pde1_bits(g, &entry->mem) :
2850 (gmmu_pde_aperture_small_invalid_f() | 2854 (gmmu_pde_aperture_small_invalid_f() |
2851 gmmu_pde_vol_small_false_f())) 2855 gmmu_pde_vol_small_false_f()))
2852 | 2856 |
@@ -2871,7 +2875,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2871 u64 *iova, 2875 u64 *iova,
2872 u32 kind_v, u64 *ctag, 2876 u32 kind_v, u64 *ctag,
2873 bool cacheable, bool unmapped_pte, 2877 bool cacheable, bool unmapped_pte,
2874 int rw_flag, bool sparse, bool priv) 2878 int rw_flag, bool sparse, bool priv,
2879 enum gk20a_aperture aperture)
2875{ 2880{
2876 struct gk20a *g = gk20a_from_vm(vm); 2881 struct gk20a *g = gk20a_from_vm(vm);
2877 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g)); 2882 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
@@ -2879,20 +2884,21 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2879 u32 pte_w[2] = {0, 0}; /* invalid pte */ 2884 u32 pte_w[2] = {0, 0}; /* invalid pte */
2880 2885
2881 if (*iova) { 2886 if (*iova) {
2882 if (unmapped_pte) 2887 u32 pte_valid = unmapped_pte ?
2883 pte_w[0] = gmmu_pte_valid_false_f() | 2888 gmmu_pte_valid_false_f() :
2884 gmmu_pte_address_sys_f(*iova 2889 gmmu_pte_valid_true_f();
2885 >> gmmu_pte_address_shift_v()); 2890 u32 iova_v = *iova >> gmmu_pte_address_shift_v();
2886 else 2891 u32 pte_addr = aperture == APERTURE_SYSMEM ?
2887 pte_w[0] = gmmu_pte_valid_true_f() | 2892 gmmu_pte_address_sys_f(iova_v) :
2888 gmmu_pte_address_sys_f(*iova 2893 gmmu_pte_address_vid_f(iova_v);
2889 >> gmmu_pte_address_shift_v()); 2894
2895 pte_w[0] = pte_valid | pte_addr;
2890 2896
2891 if (priv) 2897 if (priv)
2892 pte_w[0] |= gmmu_pte_privilege_true_f(); 2898 pte_w[0] |= gmmu_pte_privilege_true_f();
2893 2899
2894 pte_w[1] = (g->mm.vidmem_is_vidmem ? 2900 pte_w[1] = __gk20a_aperture_mask(g, aperture,
2895 gmmu_pte_aperture_sys_mem_ncoh_f() : 2901 gmmu_pte_aperture_sys_mem_ncoh_f(),
2896 gmmu_pte_aperture_video_memory_f()) | 2902 gmmu_pte_aperture_video_memory_f()) |
2897 gmmu_pte_kind_f(kind_v) | 2903 gmmu_pte_kind_f(kind_v) |
2898 gmmu_pte_comptagline_f((u32)(*ctag >> ctag_shift)); 2904 gmmu_pte_comptagline_f((u32)(*ctag >> ctag_shift));
@@ -2973,7 +2979,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2973 int rw_flag, 2979 int rw_flag,
2974 bool sparse, 2980 bool sparse,
2975 int lvl, 2981 int lvl,
2976 bool priv) 2982 bool priv,
2983 enum gk20a_aperture aperture)
2977{ 2984{
2978 const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; 2985 const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
2979 const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; 2986 const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
@@ -3021,7 +3028,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3021 err = l->update_entry(vm, pte, pde_i, pgsz_idx, 3028 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
3022 sgl, offset, iova, 3029 sgl, offset, iova,
3023 kind_v, ctag, cacheable, unmapped_pte, 3030 kind_v, ctag, cacheable, unmapped_pte,
3024 rw_flag, sparse, priv); 3031 rw_flag, sparse, priv, aperture);
3025 if (err) 3032 if (err)
3026 return err; 3033 return err;
3027 3034
@@ -3042,7 +3049,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3042 gpu_va, 3049 gpu_va,
3043 next, 3050 next,
3044 kind_v, ctag, cacheable, unmapped_pte, 3051 kind_v, ctag, cacheable, unmapped_pte,
3045 rw_flag, sparse, lvl+1, priv); 3052 rw_flag, sparse, lvl+1, priv, aperture);
3046 unmap_gmmu_pages(next_pte); 3053 unmap_gmmu_pages(next_pte);
3047 3054
3048 if (err) 3055 if (err)
@@ -3067,7 +3074,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3067 bool cacheable, bool unmapped_pte, 3074 bool cacheable, bool unmapped_pte,
3068 int rw_flag, 3075 int rw_flag,
3069 bool sparse, 3076 bool sparse,
3070 bool priv) 3077 bool priv,
3078 enum gk20a_aperture aperture)
3071{ 3079{
3072 struct gk20a *g = gk20a_from_vm(vm); 3080 struct gk20a *g = gk20a_from_vm(vm);
3073 int ctag_granularity = g->ops.fb.compression_page_size(g); 3081 int ctag_granularity = g->ops.fb.compression_page_size(g);
@@ -3130,7 +3138,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3130 &iova, 3138 &iova,
3131 gpu_va, gpu_end, 3139 gpu_va, gpu_end,
3132 kind_v, &ctag, 3140 kind_v, &ctag,
3133 cacheable, unmapped_pte, rw_flag, sparse, 0, priv); 3141 cacheable, unmapped_pte, rw_flag, sparse, 0, priv,
3142 aperture);
3134 unmap_gmmu_pages(&vm->pdb); 3143 unmap_gmmu_pages(&vm->pdb);
3135 3144
3136 smp_mb(); 3145 smp_mb();
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7d3b371c..a697e520 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -234,7 +234,8 @@ struct gk20a_mmu_level {
234 u64 *iova, 234 u64 *iova,
235 u32 kind_v, u64 *ctag, 235 u32 kind_v, u64 *ctag,
236 bool cacheable, bool unmapped_pte, 236 bool cacheable, bool unmapped_pte,
237 int rw_flag, bool sparse, bool priv); 237 int rw_flag, bool sparse, bool priv,
238 enum gk20a_aperture aperture);
238 size_t entry_size; 239 size_t entry_size;
239}; 240};
240 241