summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/include
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-01-12 21:50:34 -0500
committerVarun Colbert <vcolbert@nvidia.com>2017-02-13 21:14:45 -0500
commitaa36d3786aeed6755b9744fed37aad000b582322 (patch)
treed68d71632a01062e00fc2b057c5a0c37dfda4fb8 /drivers/gpu/nvgpu/include
parentb9194a1c3300e505d22fba97136dd305300397f0 (diff)
gpu: nvgpu: Organize semaphore_gk20a.[ch]
Move semaphore_gk20a.c drivers/gpu/nvgpu/common/ since the semaphore code is common to all chips. Move the semaphore_gk20a.h header file to drivers/gpu/nvgpu/include/nvgpu and rename it to semaphore.h. Also update all places where the header is inluced to use the new path. This revealed an odd location for the enum gk20a_mem_rw_flag. This should be in the mm headers. As a result many places that did not need anything semaphore related had to include the semaphore header file. Fixing this oddity allowed the semaphore include to be removed from many C files that did not need it. Bug 1799159 Change-Id: Ie017219acf34c4c481747323b9f3ac33e76e064c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1284627 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/include')
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h312
1 files changed, 312 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
new file mode 100644
index 00000000..07a27584
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -0,0 +1,312 @@
1/*
2 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#ifndef SEMAPHORE_GK20A_H
15#define SEMAPHORE_GK20A_H
16
17#include <linux/kref.h>
18#include <linux/list.h>
19#include <linux/delay.h>
20
21#include <nvgpu/allocator.h>
22
23#include "gk20a/gk20a.h"
24#include "gk20a/mm_gk20a.h"
25#include "gk20a/channel_gk20a.h"
26
27#define gpu_sema_dbg(fmt, args...) \
28 gk20a_dbg(gpu_dbg_sema, fmt, ##args)
29#define gpu_sema_verbose_dbg(fmt, args...) \
30 gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
31
32/*
33 * Max number of channels that can be used is 512. This of course needs to be
34 * fixed to be dynamic but still fast.
35 */
36#define SEMAPHORE_POOL_COUNT 512
37#define SEMAPHORE_SIZE 16
38#define SEMAPHORE_SEA_GROWTH_RATE 32
39
40struct gk20a_semaphore_sea;
41
42/*
43 * Underlying semaphore data structure. This semaphore can be shared amongst
44 * other semaphore instances.
45 */
46struct gk20a_semaphore_int {
47 int idx; /* Semaphore index. */
48 u32 offset; /* Offset into the pool. */
49 atomic_t next_value; /* Next available value. */
50 u32 *value; /* Current value (access w/ readl()). */
51 u32 nr_incrs; /* Number of increments programmed. */
52 struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */
53 struct channel_gk20a *ch; /* Channel that owns this sema. */
54 struct list_head hw_sema_list; /* List of HW semaphores. */
55};
56
57/*
58 * A semaphore which the rest of the driver actually uses. This consists of a
59 * pointer to a real semaphore and a value to wait for. This allows one physical
60 * semaphore to be shared among an essentially infinite number of submits.
61 */
62struct gk20a_semaphore {
63 struct gk20a_semaphore_int *hw_sema;
64
65 atomic_t value;
66 int incremented;
67
68 struct kref ref;
69};
70
71/*
72 * A semaphore pool. Each address space will own exactly one of these.
73 */
74struct gk20a_semaphore_pool {
75 struct page *page; /* This pool's page of memory */
76 struct list_head pool_list_entry; /* Node for list of pools. */
77 void *cpu_va; /* CPU access to the pool. */
78 u64 gpu_va; /* GPU access to the pool. */
79 u64 gpu_va_ro; /* GPU access to the pool. */
80 int page_idx; /* Index into sea bitmap. */
81
82 struct list_head hw_semas; /* List of HW semas. */
83 DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
84
85 struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */
86
87 struct mutex pool_lock;
88
89 /*
90 * This is the address spaces's personal RW table. Other channels will
91 * ultimately map this page as RO.
92 */
93 struct sg_table *rw_sg_table;
94
95 /*
96 * This is to keep track of whether the pool has had its sg_table
97 * updated during sea resizing.
98 */
99 struct sg_table *ro_sg_table;
100
101 int mapped;
102
103 /*
104 * Sometimes a channel can be released before other channels are
105 * done waiting on it. This ref count ensures that the pool doesn't
106 * go away until all semaphores using this pool are cleaned up first.
107 */
108 struct kref ref;
109};
110
111/*
112 * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
113 * channels can share a VM each channel gets it's own HW semaphore from the
114 * pool. Channels then allocate regular semaphores - basically just a value that
115 * signifies when a particular job is done.
116 */
117struct gk20a_semaphore_sea {
118 struct list_head pool_list; /* List of pools in this sea. */
119 struct gk20a *gk20a;
120
121 size_t size; /* Number of pages available. */
122 u64 gpu_va; /* GPU virtual address of sema sea. */
123 u64 map_size; /* Size of the mapping. */
124
125 /*
126 * TODO:
127 * List of pages that we use to back the pools. The number of pages
128 * can grow dynamically since allocating 512 pages for all channels at
129 * once would be a tremendous waste.
130 */
131 int page_count; /* Pages allocated to pools. */
132
133 struct sg_table *ro_sg_table;
134 /*
135 struct page *pages[SEMAPHORE_POOL_COUNT];
136 */
137
138 struct mem_desc sea_mem;
139
140 /*
141 * Can't use a regular allocator here since the full range of pools are
142 * not always allocated. Instead just use a bitmap.
143 */
144 DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
145
146 struct mutex sea_lock; /* Lock alloc/free calls. */
147};
148
149/*
150 * Semaphore sea functions.
151 */
152struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
153int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
154 struct vm_gk20a *vm);
155void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
156 struct vm_gk20a *vm);
157struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
158
159/*
160 * Semaphore pool functions.
161 */
162struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
163 struct gk20a_semaphore_sea *sea);
164int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
165 struct vm_gk20a *vm);
166void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
167 struct vm_gk20a *vm);
168u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
169void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
170void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
171
172/*
173 * Semaphore functions.
174 */
175struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
176void gk20a_semaphore_put(struct gk20a_semaphore *s);
177void gk20a_semaphore_get(struct gk20a_semaphore *s);
178void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch);
179
180/*
181 * Return the address of a specific semaphore.
182 *
183 * Don't call this on a semaphore you don't own - the VA returned will make no
184 * sense in your specific channel's VM.
185 */
186static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
187{
188 return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
189 s->hw_sema->offset;
190}
191
192/*
193 * Get the global RO address for the semaphore. Can be called on any semaphore
194 * regardless of whether you own it.
195 */
196static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
197{
198 return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
199 s->hw_sema->offset;
200}
201
202static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
203{
204 return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
205 hw_sema->offset;
206}
207
208/*
209 * TODO: handle wrap around... Hmm, how to do this?
210 */
211static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
212{
213 u32 sema_val = readl(s->hw_sema->value);
214
215 /*
216 * If the underlying semaphore value is greater than or equal to
217 * the value of the semaphore then the semaphore has been signaled
218 * (a.k.a. released).
219 */
220 return (int)sema_val >= atomic_read(&s->value);
221}
222
223static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
224{
225 return !gk20a_semaphore_is_released(s);
226}
227
228/*
229 * Read the underlying value from a semaphore.
230 */
231static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
232{
233 return readl(s->hw_sema->value);
234}
235
236static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
237{
238 return (u32)atomic_read(&s->value);
239}
240
241static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
242{
243 return (u32)atomic_read(&s->hw_sema->next_value);
244}
245
246/*
247 * If @force is set then this will not wait for the underlying semaphore to
248 * catch up to the passed semaphore.
249 */
250static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s,
251 bool force)
252{
253 u32 current_val;
254 u32 val = gk20a_semaphore_get_value(s);
255 int attempts = 0;
256
257 /*
258 * Wait until the sema value is 1 less than the write value. That
259 * way this function is essentially an increment.
260 *
261 * TODO: tune the wait a little better.
262 */
263 while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
264 if (force)
265 break;
266 msleep(100);
267 attempts += 1;
268 if (attempts > 100) {
269 WARN(1, "Stall on sema release!");
270 return;
271 }
272 }
273
274 /*
275 * If the semaphore has already passed the value we would write then
276 * this is really just a NO-OP.
277 */
278 if (current_val >= val)
279 return;
280
281 writel(val, s->hw_sema->value);
282
283 gpu_sema_verbose_dbg("(c=%d) WRITE %u",
284 s->hw_sema->ch->hw_chid, val);
285}
286
287static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
288{
289 __gk20a_semaphore_release(s, false);
290}
291
292/*
293 * Configure a software based increment on this semaphore. This is useful for
294 * when we want the GPU to wait on a SW event before processing a channel.
295 * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
296 * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
297 * then allows the GPU to continue.
298 *
299 * Also used to prep a semaphore for an INCR by the GPU.
300 */
301static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
302{
303 BUG_ON(s->incremented);
304
305 atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
306 s->incremented = 1;
307
308 gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
309 s->hw_sema->ch->hw_chid,
310 gk20a_semaphore_next_value(s));
311}
312#endif