diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-06-17 08:53:23 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-07-05 02:12:29 -0400 |
commit | 71478a031c3c42a737be5cfd7450414e58de849b (patch) | |
tree | 782d9cfdf4daf5889e713e7c846fba18e7896bab | |
parent | e12c5c8594c429357427130389da632284d79bcc (diff) |
gpu: ngpu: add support for vidmem in page tables
Modify page table updates to take an aperture flag (up until
gk20a_locked_gmmu_map()), don't hard-assume sysmem and propagate it to
hardware.
Jira DNVGPU-76
Change-Id: Ifcb22900c96db993068edd110e09368f72b06f69
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169307
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 83 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 3 |
3 files changed, 56 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h index 9b444036..0a21b6ca 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h | |||
@@ -182,6 +182,14 @@ static inline u32 gmmu_pte_address_sys_w(void) | |||
182 | { | 182 | { |
183 | return 0; | 183 | return 0; |
184 | } | 184 | } |
185 | static inline u32 gmmu_pte_address_vid_f(u32 v) | ||
186 | { | ||
187 | return (v & 0x1ffffff) << 4; | ||
188 | } | ||
189 | static inline u32 gmmu_pte_address_vid_w(void) | ||
190 | { | ||
191 | return 0; | ||
192 | } | ||
185 | static inline u32 gmmu_pte_vol_w(void) | 193 | static inline u32 gmmu_pte_vol_w(void) |
186 | { | 194 | { |
187 | return 1; | 195 | return 1; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a274820a..6fdfacdd 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -387,7 +387,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
387 | u8 kind_v, u32 ctag_offset, bool cacheable, | 387 | u8 kind_v, u32 ctag_offset, bool cacheable, |
388 | bool umapped_pte, int rw_flag, | 388 | bool umapped_pte, int rw_flag, |
389 | bool sparse, | 389 | bool sparse, |
390 | bool priv); | 390 | bool priv, |
391 | enum gk20a_aperture aperture); | ||
391 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | 392 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
392 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 393 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
393 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | 394 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); |
@@ -1640,7 +1641,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1640 | NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, | 1641 | NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, |
1641 | rw_flag, | 1642 | rw_flag, |
1642 | sparse, | 1643 | sparse, |
1643 | priv); | 1644 | priv, |
1645 | APERTURE_SYSMEM); /* no vidmem bufs yet */ | ||
1644 | if (err) { | 1646 | if (err) { |
1645 | gk20a_err(d, "failed to update ptes on map"); | 1647 | gk20a_err(d, "failed to update ptes on map"); |
1646 | goto fail_validate; | 1648 | goto fail_validate; |
@@ -1690,7 +1692,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1690 | vaddr + size, | 1692 | vaddr + size, |
1691 | 0, 0, false /* n/a for unmap */, | 1693 | 0, 0, false /* n/a for unmap */, |
1692 | false, rw_flag, | 1694 | false, rw_flag, |
1693 | sparse, 0); | 1695 | sparse, 0, |
1696 | APERTURE_INVALID); /* don't care for unmap */ | ||
1694 | if (err) | 1697 | if (err) |
1695 | dev_err(dev_from_vm(vm), | 1698 | dev_err(dev_from_vm(vm), |
1696 | "failed to update gmmu ptes on unmap"); | 1699 | "failed to update gmmu ptes on unmap"); |
@@ -2784,26 +2787,32 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
2784 | } | 2787 | } |
2785 | 2788 | ||
2786 | /* for gk20a the "video memory" apertures here are misnomers. */ | 2789 | /* for gk20a the "video memory" apertures here are misnomers. */ |
2787 | static inline u32 big_valid_pde0_bits(struct gk20a *g, u64 pte_addr) | 2790 | static inline u32 big_valid_pde0_bits(struct gk20a *g, |
2791 | struct mem_desc *entry_mem) | ||
2788 | { | 2792 | { |
2793 | u64 pte_addr = g->ops.mm.get_iova_addr(g, entry_mem->sgt->sgl, 0); | ||
2789 | u32 pde0_bits = | 2794 | u32 pde0_bits = |
2790 | (g->mm.vidmem_is_vidmem ? | 2795 | gk20a_aperture_mask(g, entry_mem, |
2791 | gmmu_pde_aperture_big_sys_mem_ncoh_f() : | 2796 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), |
2792 | gmmu_pde_aperture_big_video_memory_f()) | | 2797 | gmmu_pde_aperture_big_video_memory_f()) | |
2793 | gmmu_pde_address_big_sys_f( | 2798 | gmmu_pde_address_big_sys_f( |
2794 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | 2799 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); |
2795 | return pde0_bits; | 2800 | |
2801 | return pde0_bits; | ||
2796 | } | 2802 | } |
2797 | 2803 | ||
2798 | static inline u32 small_valid_pde1_bits(struct gk20a *g, u64 pte_addr) | 2804 | static inline u32 small_valid_pde1_bits(struct gk20a *g, |
2805 | struct mem_desc *entry_mem) | ||
2799 | { | 2806 | { |
2807 | u64 pte_addr = g->ops.mm.get_iova_addr(g, entry_mem->sgt->sgl, 0); | ||
2800 | u32 pde1_bits = | 2808 | u32 pde1_bits = |
2801 | (g->mm.vidmem_is_vidmem ? | 2809 | gk20a_aperture_mask(g, entry_mem, |
2802 | gmmu_pde_aperture_small_sys_mem_ncoh_f() : | 2810 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), |
2803 | gmmu_pde_aperture_small_video_memory_f()) | | 2811 | gmmu_pde_aperture_small_video_memory_f()) | |
2804 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ | 2812 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ |
2805 | gmmu_pde_address_small_sys_f( | 2813 | gmmu_pde_address_small_sys_f( |
2806 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | 2814 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); |
2815 | |||
2807 | return pde1_bits; | 2816 | return pde1_bits; |
2808 | } | 2817 | } |
2809 | 2818 | ||
@@ -2821,11 +2830,11 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2821 | u64 *iova, | 2830 | u64 *iova, |
2822 | u32 kind_v, u64 *ctag, | 2831 | u32 kind_v, u64 *ctag, |
2823 | bool cacheable, bool unammped_pte, | 2832 | bool cacheable, bool unammped_pte, |
2824 | int rw_flag, bool sparse, bool priv) | 2833 | int rw_flag, bool sparse, bool priv, |
2834 | enum gk20a_aperture aperture) | ||
2825 | { | 2835 | { |
2826 | struct gk20a *g = gk20a_from_vm(vm); | 2836 | struct gk20a *g = gk20a_from_vm(vm); |
2827 | bool small_valid, big_valid; | 2837 | bool small_valid, big_valid; |
2828 | u64 pte_addr_small = 0, pte_addr_big = 0; | ||
2829 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | 2838 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; |
2830 | u32 pde_v[2] = {0, 0}; | 2839 | u32 pde_v[2] = {0, 0}; |
2831 | u32 pde; | 2840 | u32 pde; |
@@ -2835,18 +2844,13 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2835 | small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small; | 2844 | small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small; |
2836 | big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; | 2845 | big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; |
2837 | 2846 | ||
2838 | if (small_valid) | ||
2839 | pte_addr_small = g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0); | ||
2840 | |||
2841 | if (big_valid) | ||
2842 | pte_addr_big = g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0); | ||
2843 | |||
2844 | pde_v[0] = gmmu_pde_size_full_f(); | 2847 | pde_v[0] = gmmu_pde_size_full_f(); |
2845 | pde_v[0] |= big_valid ? big_valid_pde0_bits(g, pte_addr_big) : | 2848 | pde_v[0] |= big_valid ? |
2846 | (gmmu_pde_aperture_big_invalid_f()); | 2849 | big_valid_pde0_bits(g, &entry->mem) : |
2850 | gmmu_pde_aperture_big_invalid_f(); | ||
2847 | 2851 | ||
2848 | pde_v[1] |= (small_valid ? | 2852 | pde_v[1] |= (small_valid ? |
2849 | small_valid_pde1_bits(g, pte_addr_small) : | 2853 | small_valid_pde1_bits(g, &entry->mem) : |
2850 | (gmmu_pde_aperture_small_invalid_f() | | 2854 | (gmmu_pde_aperture_small_invalid_f() | |
2851 | gmmu_pde_vol_small_false_f())) | 2855 | gmmu_pde_vol_small_false_f())) |
2852 | | | 2856 | | |
@@ -2871,7 +2875,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2871 | u64 *iova, | 2875 | u64 *iova, |
2872 | u32 kind_v, u64 *ctag, | 2876 | u32 kind_v, u64 *ctag, |
2873 | bool cacheable, bool unmapped_pte, | 2877 | bool cacheable, bool unmapped_pte, |
2874 | int rw_flag, bool sparse, bool priv) | 2878 | int rw_flag, bool sparse, bool priv, |
2879 | enum gk20a_aperture aperture) | ||
2875 | { | 2880 | { |
2876 | struct gk20a *g = gk20a_from_vm(vm); | 2881 | struct gk20a *g = gk20a_from_vm(vm); |
2877 | int ctag_shift = ilog2(g->ops.fb.compression_page_size(g)); | 2882 | int ctag_shift = ilog2(g->ops.fb.compression_page_size(g)); |
@@ -2879,20 +2884,21 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2879 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | 2884 | u32 pte_w[2] = {0, 0}; /* invalid pte */ |
2880 | 2885 | ||
2881 | if (*iova) { | 2886 | if (*iova) { |
2882 | if (unmapped_pte) | 2887 | u32 pte_valid = unmapped_pte ? |
2883 | pte_w[0] = gmmu_pte_valid_false_f() | | 2888 | gmmu_pte_valid_false_f() : |
2884 | gmmu_pte_address_sys_f(*iova | 2889 | gmmu_pte_valid_true_f(); |
2885 | >> gmmu_pte_address_shift_v()); | 2890 | u32 iova_v = *iova >> gmmu_pte_address_shift_v(); |
2886 | else | 2891 | u32 pte_addr = aperture == APERTURE_SYSMEM ? |
2887 | pte_w[0] = gmmu_pte_valid_true_f() | | 2892 | gmmu_pte_address_sys_f(iova_v) : |
2888 | gmmu_pte_address_sys_f(*iova | 2893 | gmmu_pte_address_vid_f(iova_v); |
2889 | >> gmmu_pte_address_shift_v()); | 2894 | |
2895 | pte_w[0] = pte_valid | pte_addr; | ||
2890 | 2896 | ||
2891 | if (priv) | 2897 | if (priv) |
2892 | pte_w[0] |= gmmu_pte_privilege_true_f(); | 2898 | pte_w[0] |= gmmu_pte_privilege_true_f(); |
2893 | 2899 | ||
2894 | pte_w[1] = (g->mm.vidmem_is_vidmem ? | 2900 | pte_w[1] = __gk20a_aperture_mask(g, aperture, |
2895 | gmmu_pte_aperture_sys_mem_ncoh_f() : | 2901 | gmmu_pte_aperture_sys_mem_ncoh_f(), |
2896 | gmmu_pte_aperture_video_memory_f()) | | 2902 | gmmu_pte_aperture_video_memory_f()) | |
2897 | gmmu_pte_kind_f(kind_v) | | 2903 | gmmu_pte_kind_f(kind_v) | |
2898 | gmmu_pte_comptagline_f((u32)(*ctag >> ctag_shift)); | 2904 | gmmu_pte_comptagline_f((u32)(*ctag >> ctag_shift)); |
@@ -2973,7 +2979,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2973 | int rw_flag, | 2979 | int rw_flag, |
2974 | bool sparse, | 2980 | bool sparse, |
2975 | int lvl, | 2981 | int lvl, |
2976 | bool priv) | 2982 | bool priv, |
2983 | enum gk20a_aperture aperture) | ||
2977 | { | 2984 | { |
2978 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | 2985 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; |
2979 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | 2986 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; |
@@ -3021,7 +3028,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
3021 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | 3028 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, |
3022 | sgl, offset, iova, | 3029 | sgl, offset, iova, |
3023 | kind_v, ctag, cacheable, unmapped_pte, | 3030 | kind_v, ctag, cacheable, unmapped_pte, |
3024 | rw_flag, sparse, priv); | 3031 | rw_flag, sparse, priv, aperture); |
3025 | if (err) | 3032 | if (err) |
3026 | return err; | 3033 | return err; |
3027 | 3034 | ||
@@ -3042,7 +3049,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
3042 | gpu_va, | 3049 | gpu_va, |
3043 | next, | 3050 | next, |
3044 | kind_v, ctag, cacheable, unmapped_pte, | 3051 | kind_v, ctag, cacheable, unmapped_pte, |
3045 | rw_flag, sparse, lvl+1, priv); | 3052 | rw_flag, sparse, lvl+1, priv, aperture); |
3046 | unmap_gmmu_pages(next_pte); | 3053 | unmap_gmmu_pages(next_pte); |
3047 | 3054 | ||
3048 | if (err) | 3055 | if (err) |
@@ -3067,7 +3074,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3067 | bool cacheable, bool unmapped_pte, | 3074 | bool cacheable, bool unmapped_pte, |
3068 | int rw_flag, | 3075 | int rw_flag, |
3069 | bool sparse, | 3076 | bool sparse, |
3070 | bool priv) | 3077 | bool priv, |
3078 | enum gk20a_aperture aperture) | ||
3071 | { | 3079 | { |
3072 | struct gk20a *g = gk20a_from_vm(vm); | 3080 | struct gk20a *g = gk20a_from_vm(vm); |
3073 | int ctag_granularity = g->ops.fb.compression_page_size(g); | 3081 | int ctag_granularity = g->ops.fb.compression_page_size(g); |
@@ -3130,7 +3138,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3130 | &iova, | 3138 | &iova, |
3131 | gpu_va, gpu_end, | 3139 | gpu_va, gpu_end, |
3132 | kind_v, &ctag, | 3140 | kind_v, &ctag, |
3133 | cacheable, unmapped_pte, rw_flag, sparse, 0, priv); | 3141 | cacheable, unmapped_pte, rw_flag, sparse, 0, priv, |
3142 | aperture); | ||
3134 | unmap_gmmu_pages(&vm->pdb); | 3143 | unmap_gmmu_pages(&vm->pdb); |
3135 | 3144 | ||
3136 | smp_mb(); | 3145 | smp_mb(); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7d3b371c..a697e520 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -234,7 +234,8 @@ struct gk20a_mmu_level { | |||
234 | u64 *iova, | 234 | u64 *iova, |
235 | u32 kind_v, u64 *ctag, | 235 | u32 kind_v, u64 *ctag, |
236 | bool cacheable, bool unmapped_pte, | 236 | bool cacheable, bool unmapped_pte, |
237 | int rw_flag, bool sparse, bool priv); | 237 | int rw_flag, bool sparse, bool priv, |
238 | enum gk20a_aperture aperture); | ||
238 | size_t entry_size; | 239 | size_t entry_size; |
239 | }; | 240 | }; |
240 | 241 | ||