diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 213 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 82 |
2 files changed, 7 insertions, 288 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 2fe76d80..e78eb941 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -56,21 +56,18 @@ | |||
56 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); | 56 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | static inline void | 59 | void set_vidmem_page_alloc(struct scatterlist *sgl, u64 addr) |
60 | set_vidmem_page_alloc(struct scatterlist *sgl, u64 addr) | ||
61 | { | 60 | { |
62 | /* set bit 0 to indicate vidmem allocation */ | 61 | /* set bit 0 to indicate vidmem allocation */ |
63 | sg_dma_address(sgl) = (addr | 1ULL); | 62 | sg_dma_address(sgl) = (addr | 1ULL); |
64 | } | 63 | } |
65 | 64 | ||
66 | static inline bool | 65 | bool is_vidmem_page_alloc(u64 addr) |
67 | is_vidmem_page_alloc(u64 addr) | ||
68 | { | 66 | { |
69 | return !!(addr & 1ULL); | 67 | return !!(addr & 1ULL); |
70 | } | 68 | } |
71 | 69 | ||
72 | struct nvgpu_page_alloc * | 70 | struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl) |
73 | get_vidmem_page_alloc(struct scatterlist *sgl) | ||
74 | { | 71 | { |
75 | u64 addr; | 72 | u64 addr; |
76 | 73 | ||
@@ -84,187 +81,6 @@ get_vidmem_page_alloc(struct scatterlist *sgl) | |||
84 | return (struct nvgpu_page_alloc *)(uintptr_t)addr; | 81 | return (struct nvgpu_page_alloc *)(uintptr_t)addr; |
85 | } | 82 | } |
86 | 83 | ||
87 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | ||
88 | { | ||
89 | void *cpu_va; | ||
90 | |||
91 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
92 | return 0; | ||
93 | |||
94 | if (WARN_ON(mem->cpu_va)) { | ||
95 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); | ||
96 | return -EBUSY; | ||
97 | } | ||
98 | |||
99 | cpu_va = vmap(mem->pages, | ||
100 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | ||
101 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
102 | |||
103 | if (WARN_ON(!cpu_va)) | ||
104 | return -ENOMEM; | ||
105 | |||
106 | mem->cpu_va = cpu_va; | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | ||
111 | { | ||
112 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
113 | return; | ||
114 | |||
115 | vunmap(mem->cpu_va); | ||
116 | mem->cpu_va = NULL; | ||
117 | } | ||
118 | |||
119 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
120 | { | ||
121 | u32 data = 0; | ||
122 | |||
123 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
124 | u32 *ptr = mem->cpu_va; | ||
125 | |||
126 | WARN_ON(!ptr); | ||
127 | data = ptr[w]; | ||
128 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
129 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
130 | #endif | ||
131 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
132 | u32 value; | ||
133 | u32 *p = &value; | ||
134 | |||
135 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
136 | sizeof(u32), pramin_access_batch_rd_n, &p); | ||
137 | |||
138 | data = value; | ||
139 | |||
140 | } else { | ||
141 | WARN_ON("Accessing unallocated mem_desc"); | ||
142 | } | ||
143 | |||
144 | return data; | ||
145 | } | ||
146 | |||
147 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) | ||
148 | { | ||
149 | WARN_ON(offset & 3); | ||
150 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); | ||
151 | } | ||
152 | |||
153 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | ||
154 | u32 offset, void *dest, u32 size) | ||
155 | { | ||
156 | WARN_ON(offset & 3); | ||
157 | WARN_ON(size & 3); | ||
158 | |||
159 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
160 | u8 *src = (u8 *)mem->cpu_va + offset; | ||
161 | |||
162 | WARN_ON(!mem->cpu_va); | ||
163 | memcpy(dest, src, size); | ||
164 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
165 | if (size) | ||
166 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
167 | src, *dest, size); | ||
168 | #endif | ||
169 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
170 | u32 *dest_u32 = dest; | ||
171 | |||
172 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
173 | pramin_access_batch_rd_n, &dest_u32); | ||
174 | } else { | ||
175 | WARN_ON("Accessing unallocated mem_desc"); | ||
176 | } | ||
177 | } | ||
178 | |||
179 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | ||
180 | { | ||
181 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
182 | u32 *ptr = mem->cpu_va; | ||
183 | |||
184 | WARN_ON(!ptr); | ||
185 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
186 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
187 | #endif | ||
188 | ptr[w] = data; | ||
189 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
190 | u32 value = data; | ||
191 | u32 *p = &value; | ||
192 | |||
193 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
194 | sizeof(u32), pramin_access_batch_wr_n, &p); | ||
195 | if (!mem->skip_wmb) | ||
196 | wmb(); | ||
197 | } else { | ||
198 | WARN_ON("Accessing unallocated mem_desc"); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | ||
203 | { | ||
204 | WARN_ON(offset & 3); | ||
205 | gk20a_mem_wr32(g, mem, offset / sizeof(u32), data); | ||
206 | } | ||
207 | |||
208 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
209 | void *src, u32 size) | ||
210 | { | ||
211 | WARN_ON(offset & 3); | ||
212 | WARN_ON(size & 3); | ||
213 | |||
214 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
215 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
216 | |||
217 | WARN_ON(!mem->cpu_va); | ||
218 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
219 | if (size) | ||
220 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
221 | dest, *src, size); | ||
222 | #endif | ||
223 | memcpy(dest, src, size); | ||
224 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
225 | u32 *src_u32 = src; | ||
226 | |||
227 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
228 | pramin_access_batch_wr_n, &src_u32); | ||
229 | if (!mem->skip_wmb) | ||
230 | wmb(); | ||
231 | } else { | ||
232 | WARN_ON("Accessing unallocated mem_desc"); | ||
233 | } | ||
234 | } | ||
235 | |||
236 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
237 | u32 c, u32 size) | ||
238 | { | ||
239 | WARN_ON(offset & 3); | ||
240 | WARN_ON(size & 3); | ||
241 | WARN_ON(c & ~0xff); | ||
242 | |||
243 | c &= 0xff; | ||
244 | |||
245 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
246 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
247 | |||
248 | WARN_ON(!mem->cpu_va); | ||
249 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
250 | if (size) | ||
251 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x [times %d]", | ||
252 | dest, c, size); | ||
253 | #endif | ||
254 | memset(dest, c, size); | ||
255 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
256 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | ||
257 | u32 *p = &repeat_value; | ||
258 | |||
259 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
260 | pramin_access_batch_set, &p); | ||
261 | if (!mem->skip_wmb) | ||
262 | wmb(); | ||
263 | } else { | ||
264 | WARN_ON("Accessing unallocated mem_desc"); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | /* | 84 | /* |
269 | * GPU mapping life cycle | 85 | * GPU mapping life cycle |
270 | * ====================== | 86 | * ====================== |
@@ -3121,29 +2937,6 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) | |||
3121 | } | 2937 | } |
3122 | #endif | 2938 | #endif |
3123 | 2939 | ||
3124 | u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | ||
3125 | u32 sysmem_mask, u32 vidmem_mask) | ||
3126 | { | ||
3127 | switch (aperture) { | ||
3128 | case APERTURE_SYSMEM: | ||
3129 | /* sysmem for dgpus; some igpus consider system memory vidmem */ | ||
3130 | return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask; | ||
3131 | case APERTURE_VIDMEM: | ||
3132 | /* for dgpus only */ | ||
3133 | return vidmem_mask; | ||
3134 | case APERTURE_INVALID: | ||
3135 | WARN_ON("Bad aperture"); | ||
3136 | } | ||
3137 | return 0; | ||
3138 | } | ||
3139 | |||
3140 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, | ||
3141 | u32 sysmem_mask, u32 vidmem_mask) | ||
3142 | { | ||
3143 | return __gk20a_aperture_mask(g, mem->aperture, | ||
3144 | sysmem_mask, vidmem_mask); | ||
3145 | } | ||
3146 | |||
3147 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, | 2940 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, |
3148 | struct mem_desc *mem) | 2941 | struct mem_desc *mem) |
3149 | { | 2942 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b425ec5c..da8bbb0a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/dma-iommu.h> | 24 | #include <asm/dma-iommu.h> |
25 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
26 | 26 | ||
27 | #include <nvgpu/mem_desc.h> | ||
27 | #include <nvgpu/allocator.h> | 28 | #include <nvgpu/allocator.h> |
28 | #include <nvgpu/list.h> | 29 | #include <nvgpu/list.h> |
29 | #include <nvgpu/rbtree.h> | 30 | #include <nvgpu/rbtree.h> |
@@ -39,60 +40,12 @@ | |||
39 | outer_flush_range(pa, pa + (size_t)(size)); \ | 40 | outer_flush_range(pa, pa + (size_t)(size)); \ |
40 | } while (0) | 41 | } while (0) |
41 | 42 | ||
42 | /* | ||
43 | * Real location of a buffer - gk20a_aperture_mask() will deduce what will be | ||
44 | * told to the gpu about the aperture, but this flag designates where the | ||
45 | * memory actually was allocated from. | ||
46 | */ | ||
47 | enum gk20a_aperture { | ||
48 | APERTURE_INVALID, /* unallocated or N/A */ | ||
49 | APERTURE_SYSMEM, | ||
50 | APERTURE_VIDMEM | ||
51 | }; | ||
52 | |||
53 | enum gk20a_mem_rw_flag { | 43 | enum gk20a_mem_rw_flag { |
54 | gk20a_mem_flag_none = 0, | 44 | gk20a_mem_flag_none = 0, |
55 | gk20a_mem_flag_read_only = 1, | 45 | gk20a_mem_flag_read_only = 1, |
56 | gk20a_mem_flag_write_only = 2, | 46 | gk20a_mem_flag_write_only = 2, |
57 | }; | 47 | }; |
58 | 48 | ||
59 | static inline const char *gk20a_aperture_str(enum gk20a_aperture aperture) | ||
60 | { | ||
61 | switch (aperture) { | ||
62 | case APERTURE_INVALID: return "invalid"; | ||
63 | case APERTURE_SYSMEM: return "sysmem"; | ||
64 | case APERTURE_VIDMEM: return "vidmem"; | ||
65 | }; | ||
66 | return "UNKNOWN"; | ||
67 | } | ||
68 | |||
69 | struct mem_desc { | ||
70 | void *cpu_va; /* sysmem only */ | ||
71 | struct page **pages; /* sysmem only */ | ||
72 | struct sg_table *sgt; | ||
73 | enum gk20a_aperture aperture; | ||
74 | size_t size; | ||
75 | u64 gpu_va; | ||
76 | bool fixed; /* vidmem only */ | ||
77 | bool user_mem; /* vidmem only */ | ||
78 | struct nvgpu_allocator *allocator; /* vidmem only */ | ||
79 | struct nvgpu_list_node clear_list_entry; /* vidmem only */ | ||
80 | bool skip_wmb; | ||
81 | unsigned long flags; | ||
82 | }; | ||
83 | |||
84 | static inline struct mem_desc * | ||
85 | mem_desc_from_clear_list_entry(struct nvgpu_list_node *node) | ||
86 | { | ||
87 | return (struct mem_desc *) | ||
88 | ((uintptr_t)node - offsetof(struct mem_desc, clear_list_entry)); | ||
89 | }; | ||
90 | |||
91 | struct mem_desc_sub { | ||
92 | u32 offset; | ||
93 | u32 size; | ||
94 | }; | ||
95 | |||
96 | struct gpfifo_desc { | 49 | struct gpfifo_desc { |
97 | struct mem_desc mem; | 50 | struct mem_desc mem; |
98 | u32 entry_num; | 51 | u32 entry_num; |
@@ -511,36 +464,9 @@ enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | |||
511 | u64 base, u64 size); | 464 | u64 base, u64 size); |
512 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); | 465 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); |
513 | 466 | ||
514 | struct nvgpu_page_alloc * | 467 | void set_vidmem_page_alloc(struct scatterlist *sgl, u64 addr); |
515 | get_vidmem_page_alloc(struct scatterlist *sgl); | 468 | bool is_vidmem_page_alloc(u64 addr); |
516 | 469 | struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl); | |
517 | /* | ||
518 | * Buffer accessors - wrap between begin() and end() if there is no permanent | ||
519 | * kernel mapping for this buffer. | ||
520 | */ | ||
521 | |||
522 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem); | ||
523 | /* nop for null mem, like with free() or vunmap() */ | ||
524 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem); | ||
525 | |||
526 | /* word-indexed offset */ | ||
527 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w); | ||
528 | /* byte offset (32b-aligned) */ | ||
529 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset); | ||
530 | /* memcpy to cpu, offset and size in bytes (32b-aligned) */ | ||
531 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
532 | void *dest, u32 size); | ||
533 | |||
534 | /* word-indexed offset */ | ||
535 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data); | ||
536 | /* byte offset (32b-aligned) */ | ||
537 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data); | ||
538 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ | ||
539 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
540 | void *src, u32 size); | ||
541 | /* size and offset in bytes (32b-aligned), filled with the constant byte c */ | ||
542 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
543 | u32 c, u32 size); | ||
544 | 470 | ||
545 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ | 471 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ |
546 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ | 472 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ |