/* * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef NVGPU_MEM_H #define NVGPU_MEM_H #include #include #include #ifdef __KERNEL__ #include #elif defined(__NVGPU_POSIX__) #include #else #include #endif struct page; struct sg_table; struct nvgpu_sgt; struct gk20a; struct nvgpu_allocator; struct nvgpu_gmmu_attrs; struct nvgpu_page_alloc; #define NVGPU_MEM_DMA_ERROR (~0ULL) /* * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be * told to the gpu about the aperture, but this flag designates where the * memory actually was allocated from. */ enum nvgpu_aperture { APERTURE_INVALID = 0, /* unallocated or N/A */ APERTURE_SYSMEM, /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */ APERTURE_SYSMEM_COH, APERTURE_VIDMEM }; /* * Forward declared opaque placeholder type that does not really exist, but * helps the compiler help us about getting types right. In reality, * implementors of nvgpu_sgt_ops will have some concrete type in place of this. */ struct nvgpu_sgl; struct nvgpu_sgt_ops { struct nvgpu_sgl *(*sgl_next)(struct nvgpu_sgl *sgl); u64 (*sgl_phys)(struct gk20a *g, struct nvgpu_sgl *sgl); u64 (*sgl_dma)(struct nvgpu_sgl *sgl); u64 (*sgl_length)(struct nvgpu_sgl *sgl); u64 (*sgl_gpu_addr)(struct gk20a *g, struct nvgpu_sgl *sgl, struct nvgpu_gmmu_attrs *attrs); /* * If left NULL then iommuable is assumed to be false. */ bool (*sgt_iommuable)(struct gk20a *g, struct nvgpu_sgt *sgt); /* * Note: this operates on the whole SGT not a specific SGL entry. */ void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt); }; /* * Scatter gather table: this is a list of scatter list entries and the ops for * interacting with those entries. */ struct nvgpu_sgt { /* * Ops for interacting with the underlying scatter gather list entries. */ const struct nvgpu_sgt_ops *ops; /* * The first node in the scatter gather list. */ struct nvgpu_sgl *sgl; }; /* * This struct holds the necessary information for describing a struct * nvgpu_mem's scatter gather list. * * This is one underlying implementation for nvgpu_sgl. Not all nvgpu_sgt's use * this particular implementation. Nor is a given OS required to use this at * all. */ struct nvgpu_mem_sgl { /* * Internally this is implemented as a singly linked list. */ struct nvgpu_mem_sgl *next; /* * There is both a phys address and a DMA address since some systems, * for example ones with an IOMMU, may see these as different addresses. */ u64 phys; u64 dma; u64 length; }; /* * Iterate over the SGL entries in an SGT. */ #define nvgpu_sgt_for_each_sgl(__sgl__, __sgt__) \ for ((__sgl__) = (__sgt__)->sgl; \ (__sgl__) != NULL; \ (__sgl__) = nvgpu_sgt_get_next(__sgt__, __sgl__)) struct nvgpu_mem { /* * Populated for all nvgpu_mem structs - vidmem or system. */ enum nvgpu_aperture aperture; size_t size; size_t aligned_size; u64 gpu_va; bool skip_wmb; bool free_gpu_va; /* * Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead * the struct is just a copy of another nvgpu_mem struct. */ #define NVGPU_MEM_FLAG_SHADOW_COPY (1 << 0) /* * Specify that the GVA mapping is a fixed mapping - that is the caller * chose the GPU VA, not the GMMU mapping function. Only relevant for * VIDMEM. */ #define NVGPU_MEM_FLAG_FIXED (1 << 1) /* * Set for user generated VIDMEM allocations. This triggers a special * cleanup path that clears the vidmem on free. Given that the VIDMEM is * zeroed on boot this means that all user vidmem allocations are * therefor zeroed (to prevent leaking information in VIDMEM buffers). */ #define NVGPU_MEM_FLAG_USER_MEM (1 << 2) /* * Internal flag that specifies this struct has not been made with DMA * memory and as a result should not try to use the DMA routines for * freeing the backing memory. * * However, this will not stop the DMA API from freeing other parts of * nvgpu_mem in a system specific way. */ #define __NVGPU_MEM_FLAG_NO_DMA (1 << 3) /* * Some nvgpu_mem objects act as facades to memory buffers owned by * someone else. This internal flag specifies that the sgt field is * "borrowed", and it must not be freed by us. * * Of course the caller will have to make sure that the sgt owner * outlives the nvgpu_mem. */ #define NVGPU_MEM_FLAG_FOREIGN_SGT (1 << 4) unsigned long mem_flags; /* * Only populated for a sysmem allocation. */ void *cpu_va; /* * Fields only populated for vidmem allocations. */ struct nvgpu_page_alloc *vidmem_alloc; struct nvgpu_allocator *allocator; struct nvgpu_list_node clear_list_entry; /* * This is defined by the system specific header. It can be empty if * there's no system specific stuff for a given system. */ struct nvgpu_mem_priv priv; }; static inline struct nvgpu_mem * nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node) { return (struct nvgpu_mem *) ((uintptr_t)node - offsetof(struct nvgpu_mem, clear_list_entry)); }; static inline const char *nvgpu_aperture_str(struct gk20a *g, enum nvgpu_aperture aperture) { switch (aperture) { case APERTURE_INVALID: return "INVAL"; case APERTURE_SYSMEM: return "SYSMEM"; case APERTURE_SYSMEM_COH: return "SYSCOH"; case APERTURE_VIDMEM: return "VIDMEM"; }; return "UNKNOWN"; } bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap); bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem); /* * Returns true if the passed nvgpu_mem has been allocated (i.e it's valid for * subsequent use). */ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem) { /* * Internally the DMA APIs must set/unset the aperture flag when * allocating/freeing the buffer. So check that to see if the *mem * has been allocated or not. * * This relies on mem_descs being zeroed before being initialized since * APERTURE_INVALID is equal to 0. */ return mem->aperture != APERTURE_INVALID; } /** * nvgpu_mem_sgt_create_from_mem - Create a scatter list from an nvgpu_mem. * * @g - The GPU. * @mem - The source memory allocation to use. * * Create a scatter gather table from the passed @mem struct. This list lets the * calling code iterate across each chunk of a DMA allocation for when that DMA * allocation is not completely contiguous. */ struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, struct nvgpu_mem *mem); struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); u64 nvgpu_sgt_get_phys(struct gk20a *g, struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl, struct nvgpu_gmmu_attrs *attrs); void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt); bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt); u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt); /** * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. * * @g - The GPU. * @dest - Destination nvgpu_mem to hold resulting memory description. * @src - Source memory. Must be valid. * @start_page - Starting page to use. * @nr_pages - Number of pages to place in the new nvgpu_mem. * * Create a new nvgpu_mem struct describing a subsection of the @src nvgpu_mem. * This will create an nvpgu_mem object starting at @start_page and is @nr_pages * long. This currently only works on SYSMEM nvgpu_mems. If this is called on a * VIDMEM nvgpu_mem then this will return an error. * * There is a _major_ caveat to this API: if the source buffer is freed before * the copy is freed then the copy will become invalid. This is a result from * how typical DMA APIs work: we can't call free on the buffer multiple times. * Nor can we call free on parts of a buffer. Thus the only way to ensure that * the entire buffer is actually freed is to call free once on the source * buffer. Since these nvgpu_mem structs are not ref-counted in anyway it is up * to the caller of this API to _ensure_ that the resulting nvgpu_mem buffer * from this API is freed before the source buffer. Otherwise there can and will * be memory corruption. * * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the * nvgpu_dma_unmap_free() function depending on whether or not the resulting * nvgpu_mem has been mapped. * * This will return 0 on success. An error is returned if the resulting * nvgpu_mem would not make sense or if a new scatter gather table cannot be * created. */ int nvgpu_mem_create_from_mem(struct gk20a *g, struct nvgpu_mem *dest, struct nvgpu_mem *src, u64 start_page, int nr_pages); /* * Really free a vidmem buffer. There's a fair amount of work involved in * freeing vidmem buffers in the DMA API. This handles none of that - it only * frees the underlying vidmem specific structures used in vidmem buffers. * * This is implemented in the OS specific code. If it's not necessary it can * be a noop. But the symbol must at least be present. */ void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem); /* * Buffer accessors. Sysmem buffers always have a CPU mapping and vidmem * buffers are accessed via PRAMIN. */ /* word-indexed offset */ u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w); /* byte offset (32b-aligned) */ u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset); /* memcpy to cpu, offset and size in bytes (32b-aligned) */ void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, void *dest, u32 size); /* word-indexed offset */ void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data); /* byte offset (32b-aligned) */ void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data); /* memcpy from cpu, offset and size in bytes (32b-aligned) */ void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, void *src, u32 size); /* size and offset in bytes (32b-aligned), filled with the constant byte c */ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 c, u32 size); u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); #endif /* NVGPU_MEM_H */