diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-01-12 21:50:34 -0500 |
---|---|---|
committer | Varun Colbert <vcolbert@nvidia.com> | 2017-02-13 21:14:45 -0500 |
commit | aa36d3786aeed6755b9744fed37aad000b582322 (patch) | |
tree | d68d71632a01062e00fc2b057c5a0c37dfda4fb8 /drivers/gpu/nvgpu/include | |
parent | b9194a1c3300e505d22fba97136dd305300397f0 (diff) |
gpu: nvgpu: Organize semaphore_gk20a.[ch]
Move semaphore_gk20a.c drivers/gpu/nvgpu/common/ since the semaphore
code is common to all chips.
Move the semaphore_gk20a.h header file to drivers/gpu/nvgpu/include/nvgpu
and rename it to semaphore.h. Also update all places where the header
is inluced to use the new path.
This revealed an odd location for the enum gk20a_mem_rw_flag. This should
be in the mm headers. As a result many places that did not need anything
semaphore related had to include the semaphore header file. Fixing this
oddity allowed the semaphore include to be removed from many C files that
did not need it.
Bug 1799159
Change-Id: Ie017219acf34c4c481747323b9f3ac33e76e064c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1284627
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/include')
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h new file mode 100644 index 00000000..07a27584 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h | |||
@@ -0,0 +1,312 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef SEMAPHORE_GK20A_H | ||
15 | #define SEMAPHORE_GK20A_H | ||
16 | |||
17 | #include <linux/kref.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/delay.h> | ||
20 | |||
21 | #include <nvgpu/allocator.h> | ||
22 | |||
23 | #include "gk20a/gk20a.h" | ||
24 | #include "gk20a/mm_gk20a.h" | ||
25 | #include "gk20a/channel_gk20a.h" | ||
26 | |||
27 | #define gpu_sema_dbg(fmt, args...) \ | ||
28 | gk20a_dbg(gpu_dbg_sema, fmt, ##args) | ||
29 | #define gpu_sema_verbose_dbg(fmt, args...) \ | ||
30 | gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) | ||
31 | |||
32 | /* | ||
33 | * Max number of channels that can be used is 512. This of course needs to be | ||
34 | * fixed to be dynamic but still fast. | ||
35 | */ | ||
36 | #define SEMAPHORE_POOL_COUNT 512 | ||
37 | #define SEMAPHORE_SIZE 16 | ||
38 | #define SEMAPHORE_SEA_GROWTH_RATE 32 | ||
39 | |||
40 | struct gk20a_semaphore_sea; | ||
41 | |||
42 | /* | ||
43 | * Underlying semaphore data structure. This semaphore can be shared amongst | ||
44 | * other semaphore instances. | ||
45 | */ | ||
46 | struct gk20a_semaphore_int { | ||
47 | int idx; /* Semaphore index. */ | ||
48 | u32 offset; /* Offset into the pool. */ | ||
49 | atomic_t next_value; /* Next available value. */ | ||
50 | u32 *value; /* Current value (access w/ readl()). */ | ||
51 | u32 nr_incrs; /* Number of increments programmed. */ | ||
52 | struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */ | ||
53 | struct channel_gk20a *ch; /* Channel that owns this sema. */ | ||
54 | struct list_head hw_sema_list; /* List of HW semaphores. */ | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * A semaphore which the rest of the driver actually uses. This consists of a | ||
59 | * pointer to a real semaphore and a value to wait for. This allows one physical | ||
60 | * semaphore to be shared among an essentially infinite number of submits. | ||
61 | */ | ||
62 | struct gk20a_semaphore { | ||
63 | struct gk20a_semaphore_int *hw_sema; | ||
64 | |||
65 | atomic_t value; | ||
66 | int incremented; | ||
67 | |||
68 | struct kref ref; | ||
69 | }; | ||
70 | |||
71 | /* | ||
72 | * A semaphore pool. Each address space will own exactly one of these. | ||
73 | */ | ||
74 | struct gk20a_semaphore_pool { | ||
75 | struct page *page; /* This pool's page of memory */ | ||
76 | struct list_head pool_list_entry; /* Node for list of pools. */ | ||
77 | void *cpu_va; /* CPU access to the pool. */ | ||
78 | u64 gpu_va; /* GPU access to the pool. */ | ||
79 | u64 gpu_va_ro; /* GPU access to the pool. */ | ||
80 | int page_idx; /* Index into sea bitmap. */ | ||
81 | |||
82 | struct list_head hw_semas; /* List of HW semas. */ | ||
83 | DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE); | ||
84 | |||
85 | struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */ | ||
86 | |||
87 | struct mutex pool_lock; | ||
88 | |||
89 | /* | ||
90 | * This is the address spaces's personal RW table. Other channels will | ||
91 | * ultimately map this page as RO. | ||
92 | */ | ||
93 | struct sg_table *rw_sg_table; | ||
94 | |||
95 | /* | ||
96 | * This is to keep track of whether the pool has had its sg_table | ||
97 | * updated during sea resizing. | ||
98 | */ | ||
99 | struct sg_table *ro_sg_table; | ||
100 | |||
101 | int mapped; | ||
102 | |||
103 | /* | ||
104 | * Sometimes a channel can be released before other channels are | ||
105 | * done waiting on it. This ref count ensures that the pool doesn't | ||
106 | * go away until all semaphores using this pool are cleaned up first. | ||
107 | */ | ||
108 | struct kref ref; | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple | ||
113 | * channels can share a VM each channel gets it's own HW semaphore from the | ||
114 | * pool. Channels then allocate regular semaphores - basically just a value that | ||
115 | * signifies when a particular job is done. | ||
116 | */ | ||
117 | struct gk20a_semaphore_sea { | ||
118 | struct list_head pool_list; /* List of pools in this sea. */ | ||
119 | struct gk20a *gk20a; | ||
120 | |||
121 | size_t size; /* Number of pages available. */ | ||
122 | u64 gpu_va; /* GPU virtual address of sema sea. */ | ||
123 | u64 map_size; /* Size of the mapping. */ | ||
124 | |||
125 | /* | ||
126 | * TODO: | ||
127 | * List of pages that we use to back the pools. The number of pages | ||
128 | * can grow dynamically since allocating 512 pages for all channels at | ||
129 | * once would be a tremendous waste. | ||
130 | */ | ||
131 | int page_count; /* Pages allocated to pools. */ | ||
132 | |||
133 | struct sg_table *ro_sg_table; | ||
134 | /* | ||
135 | struct page *pages[SEMAPHORE_POOL_COUNT]; | ||
136 | */ | ||
137 | |||
138 | struct mem_desc sea_mem; | ||
139 | |||
140 | /* | ||
141 | * Can't use a regular allocator here since the full range of pools are | ||
142 | * not always allocated. Instead just use a bitmap. | ||
143 | */ | ||
144 | DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT); | ||
145 | |||
146 | struct mutex sea_lock; /* Lock alloc/free calls. */ | ||
147 | }; | ||
148 | |||
149 | /* | ||
150 | * Semaphore sea functions. | ||
151 | */ | ||
152 | struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a); | ||
153 | int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea, | ||
154 | struct vm_gk20a *vm); | ||
155 | void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea, | ||
156 | struct vm_gk20a *vm); | ||
157 | struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g); | ||
158 | |||
159 | /* | ||
160 | * Semaphore pool functions. | ||
161 | */ | ||
162 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( | ||
163 | struct gk20a_semaphore_sea *sea); | ||
164 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool, | ||
165 | struct vm_gk20a *vm); | ||
166 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool, | ||
167 | struct vm_gk20a *vm); | ||
168 | u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global); | ||
169 | void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p); | ||
170 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p); | ||
171 | |||
172 | /* | ||
173 | * Semaphore functions. | ||
174 | */ | ||
175 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch); | ||
176 | void gk20a_semaphore_put(struct gk20a_semaphore *s); | ||
177 | void gk20a_semaphore_get(struct gk20a_semaphore *s); | ||
178 | void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch); | ||
179 | |||
180 | /* | ||
181 | * Return the address of a specific semaphore. | ||
182 | * | ||
183 | * Don't call this on a semaphore you don't own - the VA returned will make no | ||
184 | * sense in your specific channel's VM. | ||
185 | */ | ||
186 | static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s) | ||
187 | { | ||
188 | return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) + | ||
189 | s->hw_sema->offset; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Get the global RO address for the semaphore. Can be called on any semaphore | ||
194 | * regardless of whether you own it. | ||
195 | */ | ||
196 | static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s) | ||
197 | { | ||
198 | return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) + | ||
199 | s->hw_sema->offset; | ||
200 | } | ||
201 | |||
202 | static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema) | ||
203 | { | ||
204 | return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) + | ||
205 | hw_sema->offset; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * TODO: handle wrap around... Hmm, how to do this? | ||
210 | */ | ||
211 | static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s) | ||
212 | { | ||
213 | u32 sema_val = readl(s->hw_sema->value); | ||
214 | |||
215 | /* | ||
216 | * If the underlying semaphore value is greater than or equal to | ||
217 | * the value of the semaphore then the semaphore has been signaled | ||
218 | * (a.k.a. released). | ||
219 | */ | ||
220 | return (int)sema_val >= atomic_read(&s->value); | ||
221 | } | ||
222 | |||
223 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) | ||
224 | { | ||
225 | return !gk20a_semaphore_is_released(s); | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Read the underlying value from a semaphore. | ||
230 | */ | ||
231 | static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s) | ||
232 | { | ||
233 | return readl(s->hw_sema->value); | ||
234 | } | ||
235 | |||
236 | static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s) | ||
237 | { | ||
238 | return (u32)atomic_read(&s->value); | ||
239 | } | ||
240 | |||
241 | static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s) | ||
242 | { | ||
243 | return (u32)atomic_read(&s->hw_sema->next_value); | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * If @force is set then this will not wait for the underlying semaphore to | ||
248 | * catch up to the passed semaphore. | ||
249 | */ | ||
250 | static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s, | ||
251 | bool force) | ||
252 | { | ||
253 | u32 current_val; | ||
254 | u32 val = gk20a_semaphore_get_value(s); | ||
255 | int attempts = 0; | ||
256 | |||
257 | /* | ||
258 | * Wait until the sema value is 1 less than the write value. That | ||
259 | * way this function is essentially an increment. | ||
260 | * | ||
261 | * TODO: tune the wait a little better. | ||
262 | */ | ||
263 | while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) { | ||
264 | if (force) | ||
265 | break; | ||
266 | msleep(100); | ||
267 | attempts += 1; | ||
268 | if (attempts > 100) { | ||
269 | WARN(1, "Stall on sema release!"); | ||
270 | return; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * If the semaphore has already passed the value we would write then | ||
276 | * this is really just a NO-OP. | ||
277 | */ | ||
278 | if (current_val >= val) | ||
279 | return; | ||
280 | |||
281 | writel(val, s->hw_sema->value); | ||
282 | |||
283 | gpu_sema_verbose_dbg("(c=%d) WRITE %u", | ||
284 | s->hw_sema->ch->hw_chid, val); | ||
285 | } | ||
286 | |||
287 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) | ||
288 | { | ||
289 | __gk20a_semaphore_release(s, false); | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * Configure a software based increment on this semaphore. This is useful for | ||
294 | * when we want the GPU to wait on a SW event before processing a channel. | ||
295 | * Another way to describe this is when the GPU needs to wait on a SW pre-fence. | ||
296 | * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which | ||
297 | * then allows the GPU to continue. | ||
298 | * | ||
299 | * Also used to prep a semaphore for an INCR by the GPU. | ||
300 | */ | ||
301 | static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s) | ||
302 | { | ||
303 | BUG_ON(s->incremented); | ||
304 | |||
305 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); | ||
306 | s->incremented = 1; | ||
307 | |||
308 | gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", | ||
309 | s->hw_sema->ch->hw_chid, | ||
310 | gk20a_semaphore_next_value(s)); | ||
311 | } | ||
312 | #endif | ||