diff options
Diffstat (limited to 'include/nvgpu/nvgpu_mem.h')
-rw-r--r-- | include/nvgpu/nvgpu_mem.h | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/include/nvgpu/nvgpu_mem.h b/include/nvgpu/nvgpu_mem.h new file mode 100644 index 0000000..4e84f2a --- /dev/null +++ b/include/nvgpu/nvgpu_mem.h | |||
@@ -0,0 +1,359 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #ifndef NVGPU_MEM_H | ||
24 | #define NVGPU_MEM_H | ||
25 | |||
26 | #include <nvgpu/types.h> | ||
27 | #include <nvgpu/list.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | |||
30 | #ifdef __KERNEL__ | ||
31 | #include <nvgpu/linux/nvgpu_mem.h> | ||
32 | #elif defined(__NVGPU_POSIX__) | ||
33 | #include <nvgpu/posix/nvgpu_mem.h> | ||
34 | #else | ||
35 | #include <nvgpu_rmos/include/nvgpu_mem.h> | ||
36 | #endif | ||
37 | |||
38 | struct page; | ||
39 | struct sg_table; | ||
40 | struct nvgpu_sgt; | ||
41 | |||
42 | struct gk20a; | ||
43 | struct nvgpu_allocator; | ||
44 | struct nvgpu_gmmu_attrs; | ||
45 | struct nvgpu_page_alloc; | ||
46 | |||
47 | #define NVGPU_MEM_DMA_ERROR (~0ULL) | ||
48 | |||
49 | /* | ||
50 | * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be | ||
51 | * told to the gpu about the aperture, but this flag designates where the | ||
52 | * memory actually was allocated from. | ||
53 | */ | ||
54 | enum nvgpu_aperture { | ||
55 | APERTURE_INVALID = 0, /* unallocated or N/A */ | ||
56 | APERTURE_SYSMEM, | ||
57 | |||
58 | /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */ | ||
59 | APERTURE_SYSMEM_COH, | ||
60 | |||
61 | APERTURE_VIDMEM | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * Forward declared opaque placeholder type that does not really exist, but | ||
66 | * helps the compiler help us about getting types right. In reality, | ||
67 | * implementors of nvgpu_sgt_ops will have some concrete type in place of this. | ||
68 | */ | ||
69 | struct nvgpu_sgl; | ||
70 | |||
71 | struct nvgpu_sgt_ops { | ||
72 | struct nvgpu_sgl *(*sgl_next)(struct nvgpu_sgl *sgl); | ||
73 | u64 (*sgl_phys)(struct gk20a *g, struct nvgpu_sgl *sgl); | ||
74 | u64 (*sgl_dma)(struct nvgpu_sgl *sgl); | ||
75 | u64 (*sgl_length)(struct nvgpu_sgl *sgl); | ||
76 | u64 (*sgl_gpu_addr)(struct gk20a *g, struct nvgpu_sgl *sgl, | ||
77 | struct nvgpu_gmmu_attrs *attrs); | ||
78 | /* | ||
79 | * If left NULL then iommuable is assumed to be false. | ||
80 | */ | ||
81 | bool (*sgt_iommuable)(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
82 | |||
83 | /* | ||
84 | * Note: this operates on the whole SGT not a specific SGL entry. | ||
85 | */ | ||
86 | void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
87 | }; | ||
88 | |||
89 | /* | ||
90 | * Scatter gather table: this is a list of scatter list entries and the ops for | ||
91 | * interacting with those entries. | ||
92 | */ | ||
93 | struct nvgpu_sgt { | ||
94 | /* | ||
95 | * Ops for interacting with the underlying scatter gather list entries. | ||
96 | */ | ||
97 | const struct nvgpu_sgt_ops *ops; | ||
98 | |||
99 | /* | ||
100 | * The first node in the scatter gather list. | ||
101 | */ | ||
102 | struct nvgpu_sgl *sgl; | ||
103 | }; | ||
104 | |||
105 | /* | ||
106 | * This struct holds the necessary information for describing a struct | ||
107 | * nvgpu_mem's scatter gather list. | ||
108 | * | ||
109 | * This is one underlying implementation for nvgpu_sgl. Not all nvgpu_sgt's use | ||
110 | * this particular implementation. Nor is a given OS required to use this at | ||
111 | * all. | ||
112 | */ | ||
113 | struct nvgpu_mem_sgl { | ||
114 | /* | ||
115 | * Internally this is implemented as a singly linked list. | ||
116 | */ | ||
117 | struct nvgpu_mem_sgl *next; | ||
118 | |||
119 | /* | ||
120 | * There is both a phys address and a DMA address since some systems, | ||
121 | * for example ones with an IOMMU, may see these as different addresses. | ||
122 | */ | ||
123 | u64 phys; | ||
124 | u64 dma; | ||
125 | u64 length; | ||
126 | }; | ||
127 | |||
128 | /* | ||
129 | * Iterate over the SGL entries in an SGT. | ||
130 | */ | ||
131 | #define nvgpu_sgt_for_each_sgl(__sgl__, __sgt__) \ | ||
132 | for ((__sgl__) = (__sgt__)->sgl; \ | ||
133 | (__sgl__) != NULL; \ | ||
134 | (__sgl__) = nvgpu_sgt_get_next(__sgt__, __sgl__)) | ||
135 | |||
136 | struct nvgpu_mem { | ||
137 | /* | ||
138 | * Populated for all nvgpu_mem structs - vidmem or system. | ||
139 | */ | ||
140 | enum nvgpu_aperture aperture; | ||
141 | size_t size; | ||
142 | size_t aligned_size; | ||
143 | u64 gpu_va; | ||
144 | bool skip_wmb; | ||
145 | bool free_gpu_va; | ||
146 | |||
147 | /* | ||
148 | * Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead | ||
149 | * the struct is just a copy of another nvgpu_mem struct. | ||
150 | */ | ||
151 | #define NVGPU_MEM_FLAG_SHADOW_COPY (1 << 0) | ||
152 | |||
153 | /* | ||
154 | * Specify that the GVA mapping is a fixed mapping - that is the caller | ||
155 | * chose the GPU VA, not the GMMU mapping function. Only relevant for | ||
156 | * VIDMEM. | ||
157 | */ | ||
158 | #define NVGPU_MEM_FLAG_FIXED (1 << 1) | ||
159 | |||
160 | /* | ||
161 | * Set for user generated VIDMEM allocations. This triggers a special | ||
162 | * cleanup path that clears the vidmem on free. Given that the VIDMEM is | ||
163 | * zeroed on boot this means that all user vidmem allocations are | ||
164 | * therefor zeroed (to prevent leaking information in VIDMEM buffers). | ||
165 | */ | ||
166 | #define NVGPU_MEM_FLAG_USER_MEM (1 << 2) | ||
167 | |||
168 | /* | ||
169 | * Internal flag that specifies this struct has not been made with DMA | ||
170 | * memory and as a result should not try to use the DMA routines for | ||
171 | * freeing the backing memory. | ||
172 | * | ||
173 | * However, this will not stop the DMA API from freeing other parts of | ||
174 | * nvgpu_mem in a system specific way. | ||
175 | */ | ||
176 | #define __NVGPU_MEM_FLAG_NO_DMA (1 << 3) | ||
177 | /* | ||
178 | * Some nvgpu_mem objects act as facades to memory buffers owned by | ||
179 | * someone else. This internal flag specifies that the sgt field is | ||
180 | * "borrowed", and it must not be freed by us. | ||
181 | * | ||
182 | * Of course the caller will have to make sure that the sgt owner | ||
183 | * outlives the nvgpu_mem. | ||
184 | */ | ||
185 | #define NVGPU_MEM_FLAG_FOREIGN_SGT (1 << 4) | ||
186 | unsigned long mem_flags; | ||
187 | |||
188 | /* | ||
189 | * Only populated for a sysmem allocation. | ||
190 | */ | ||
191 | void *cpu_va; | ||
192 | |||
193 | /* | ||
194 | * Fields only populated for vidmem allocations. | ||
195 | */ | ||
196 | struct nvgpu_page_alloc *vidmem_alloc; | ||
197 | struct nvgpu_allocator *allocator; | ||
198 | struct nvgpu_list_node clear_list_entry; | ||
199 | |||
200 | /* | ||
201 | * This is defined by the system specific header. It can be empty if | ||
202 | * there's no system specific stuff for a given system. | ||
203 | */ | ||
204 | struct nvgpu_mem_priv priv; | ||
205 | }; | ||
206 | |||
207 | static inline struct nvgpu_mem * | ||
208 | nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node) | ||
209 | { | ||
210 | return (struct nvgpu_mem *) | ||
211 | ((uintptr_t)node - offsetof(struct nvgpu_mem, | ||
212 | clear_list_entry)); | ||
213 | }; | ||
214 | |||
215 | static inline const char *nvgpu_aperture_str(struct gk20a *g, | ||
216 | enum nvgpu_aperture aperture) | ||
217 | { | ||
218 | switch (aperture) { | ||
219 | case APERTURE_INVALID: | ||
220 | return "INVAL"; | ||
221 | case APERTURE_SYSMEM: | ||
222 | return "SYSMEM"; | ||
223 | case APERTURE_SYSMEM_COH: | ||
224 | return "SYSCOH"; | ||
225 | case APERTURE_VIDMEM: | ||
226 | return "VIDMEM"; | ||
227 | }; | ||
228 | return "UNKNOWN"; | ||
229 | } | ||
230 | |||
231 | bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap); | ||
232 | bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem); | ||
233 | |||
234 | /* | ||
235 | * Returns true if the passed nvgpu_mem has been allocated (i.e it's valid for | ||
236 | * subsequent use). | ||
237 | */ | ||
238 | static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem) | ||
239 | { | ||
240 | /* | ||
241 | * Internally the DMA APIs must set/unset the aperture flag when | ||
242 | * allocating/freeing the buffer. So check that to see if the *mem | ||
243 | * has been allocated or not. | ||
244 | * | ||
245 | * This relies on mem_descs being zeroed before being initialized since | ||
246 | * APERTURE_INVALID is equal to 0. | ||
247 | */ | ||
248 | return mem->aperture != APERTURE_INVALID; | ||
249 | |||
250 | } | ||
251 | |||
252 | /** | ||
253 | * nvgpu_mem_sgt_create_from_mem - Create a scatter list from an nvgpu_mem. | ||
254 | * | ||
255 | * @g - The GPU. | ||
256 | * @mem - The source memory allocation to use. | ||
257 | * | ||
258 | * Create a scatter gather table from the passed @mem struct. This list lets the | ||
259 | * calling code iterate across each chunk of a DMA allocation for when that DMA | ||
260 | * allocation is not completely contiguous. | ||
261 | */ | ||
262 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
263 | struct nvgpu_mem *mem); | ||
264 | |||
265 | struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, | ||
266 | struct nvgpu_sgl *sgl); | ||
267 | u64 nvgpu_sgt_get_phys(struct gk20a *g, struct nvgpu_sgt *sgt, | ||
268 | struct nvgpu_sgl *sgl); | ||
269 | u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); | ||
270 | u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); | ||
271 | u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, | ||
272 | struct nvgpu_sgl *sgl, | ||
273 | struct nvgpu_gmmu_attrs *attrs); | ||
274 | void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
275 | |||
276 | bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
277 | u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
278 | |||
279 | /** | ||
280 | * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. | ||
281 | * | ||
282 | * @g - The GPU. | ||
283 | * @dest - Destination nvgpu_mem to hold resulting memory description. | ||
284 | * @src - Source memory. Must be valid. | ||
285 | * @start_page - Starting page to use. | ||
286 | * @nr_pages - Number of pages to place in the new nvgpu_mem. | ||
287 | * | ||
288 | * Create a new nvgpu_mem struct describing a subsection of the @src nvgpu_mem. | ||
289 | * This will create an nvpgu_mem object starting at @start_page and is @nr_pages | ||
290 | * long. This currently only works on SYSMEM nvgpu_mems. If this is called on a | ||
291 | * VIDMEM nvgpu_mem then this will return an error. | ||
292 | * | ||
293 | * There is a _major_ caveat to this API: if the source buffer is freed before | ||
294 | * the copy is freed then the copy will become invalid. This is a result from | ||
295 | * how typical DMA APIs work: we can't call free on the buffer multiple times. | ||
296 | * Nor can we call free on parts of a buffer. Thus the only way to ensure that | ||
297 | * the entire buffer is actually freed is to call free once on the source | ||
298 | * buffer. Since these nvgpu_mem structs are not ref-counted in anyway it is up | ||
299 | * to the caller of this API to _ensure_ that the resulting nvgpu_mem buffer | ||
300 | * from this API is freed before the source buffer. Otherwise there can and will | ||
301 | * be memory corruption. | ||
302 | * | ||
303 | * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the | ||
304 | * nvgpu_dma_unmap_free() function depending on whether or not the resulting | ||
305 | * nvgpu_mem has been mapped. | ||
306 | * | ||
307 | * This will return 0 on success. An error is returned if the resulting | ||
308 | * nvgpu_mem would not make sense or if a new scatter gather table cannot be | ||
309 | * created. | ||
310 | */ | ||
311 | int nvgpu_mem_create_from_mem(struct gk20a *g, | ||
312 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | ||
313 | u64 start_page, int nr_pages); | ||
314 | |||
315 | /* | ||
316 | * Really free a vidmem buffer. There's a fair amount of work involved in | ||
317 | * freeing vidmem buffers in the DMA API. This handles none of that - it only | ||
318 | * frees the underlying vidmem specific structures used in vidmem buffers. | ||
319 | * | ||
320 | * This is implemented in the OS specific code. If it's not necessary it can | ||
321 | * be a noop. But the symbol must at least be present. | ||
322 | */ | ||
323 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem); | ||
324 | |||
325 | /* | ||
326 | * Buffer accessors. Sysmem buffers always have a CPU mapping and vidmem | ||
327 | * buffers are accessed via PRAMIN. | ||
328 | */ | ||
329 | |||
330 | /* word-indexed offset */ | ||
331 | u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w); | ||
332 | /* byte offset (32b-aligned) */ | ||
333 | u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset); | ||
334 | /* memcpy to cpu, offset and size in bytes (32b-aligned) */ | ||
335 | void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
336 | void *dest, u32 size); | ||
337 | |||
338 | /* word-indexed offset */ | ||
339 | void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data); | ||
340 | /* byte offset (32b-aligned) */ | ||
341 | void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data); | ||
342 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ | ||
343 | void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
344 | void *src, u32 size); | ||
345 | /* size and offset in bytes (32b-aligned), filled with the constant byte c */ | ||
346 | void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
347 | u32 c, u32 size); | ||
348 | |||
349 | u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); | ||
350 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); | ||
351 | |||
352 | u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, | ||
353 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); | ||
354 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
355 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); | ||
356 | |||
357 | u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); | ||
358 | |||
359 | #endif /* NVGPU_MEM_H */ | ||