summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h303
1 files changed, 250 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
index 1f12e262..58081b56 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -15,17 +15,128 @@
15#define SEMAPHORE_GK20A_H 15#define SEMAPHORE_GK20A_H
16 16
17#include <linux/kref.h> 17#include <linux/kref.h>
18#include "gk20a_allocator.h" 18#include <linux/list.h>
19#include <linux/delay.h>
20
21#include "gk20a.h"
19#include "mm_gk20a.h" 22#include "mm_gk20a.h"
23#include "channel_gk20a.h"
24
25/*
26 * Max number of channels that can be used is 512. This of course needs to be
27 * fixed to be dynamic but still fast.
28 */
29#define SEMAPHORE_POOL_COUNT 512
30#define SEMAPHORE_SIZE 16
31#define SEMAPHORE_SEA_GROWTH_RATE 32
32
33struct gk20a_semaphore_sea;
34
35/*
36 * Underlying semaphore data structure. This semaphore can be shared amongst
37 * other semaphore instances.
38 */
39struct gk20a_semaphore_int {
40 int idx; /* Semaphore index. */
41 u32 offset; /* Offset into the pool. */
42 atomic_t next_value; /* Next available value. */
43 u32 *value; /* Current value (access w/ readl()). */
44 u32 nr_incrs; /* Number of increments programmed. */
45 struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */
46 struct channel_gk20a *ch; /* Channel that owns this sema. */
47 struct list_head hw_sema_list; /* List of HW semaphores. */
48};
49
50/*
51 * A semaphore which the rest of the driver actually uses. This consists of a
52 * pointer to a real semaphore and a value to wait for. This allows one physical
53 * semaphore to be shared among an essentially infinite number of submits.
54 */
55struct gk20a_semaphore {
56 struct gk20a_semaphore_int *hw_sema;
20 57
21/* A memory pool for holding semaphores. */ 58 atomic_t value;
59 int incremented;
60
61 struct kref ref;
62};
63
64/*
65 * A semaphore pool. Each address space will own exactly one of these.
66 */
22struct gk20a_semaphore_pool { 67struct gk20a_semaphore_pool {
23 struct mem_desc mem; 68 struct page *page; /* This pool's page of memory */
24 struct gk20a *g; 69 struct list_head pool_list_entry; /* Node for list of pools. */
25 struct list_head maps; 70 void *cpu_va; /* CPU access to the pool. */
26 struct mutex maps_mutex; 71 u64 gpu_va; /* GPU access to the pool. */
72 u64 gpu_va_ro; /* GPU access to the pool. */
73 int page_idx; /* Index into sea bitmap. */
74
75 struct list_head hw_semas; /* List of HW semas. */
76 DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
77
78 struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */
79
80 struct mutex pool_lock;
81
82 /*
83 * This is the address spaces's personal RW table. Other channels will
84 * ultimately map this page as RO.
85 */
86 struct sg_table *rw_sg_table;
87
88 /*
89 * This is to keep track of whether the pool has had its sg_table
90 * updated during sea resizing.
91 */
92 struct sg_table *ro_sg_table;
93
94 int mapped;
95
96 /*
97 * Sometimes a channel can be released before other channels are
98 * done waiting on it. This ref count ensures that the pool doesn't
99 * go away until all semaphores using this pool are cleaned up first.
100 */
27 struct kref ref; 101 struct kref ref;
28 struct gk20a_allocator alloc; 102};
103
104/*
105 * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
106 * channels can share a VM each channel gets it's own HW semaphore from the
107 * pool. Channels then allocate regular semaphores - basically just a value that
108 * signifies when a particular job is done.
109 */
110struct gk20a_semaphore_sea {
111 struct list_head pool_list; /* List of pools in this sea. */
112 struct gk20a *gk20a;
113
114 size_t size; /* Number of pages available. */
115 u64 gpu_va; /* GPU virtual address of sema sea. */
116 u64 map_size; /* Size of the mapping. */
117
118 /*
119 * TODO:
120 * List of pages that we use to back the pools. The number of pages
121 * can grow dynamically since allocating 512 pages for all channels at
122 * once would be a tremendous waste.
123 */
124 int page_count; /* Pages allocated to pools. */
125
126 struct sg_table *ro_sg_table;
127 /*
128 struct page *pages[SEMAPHORE_POOL_COUNT];
129 */
130
131 struct mem_desc sea_mem;
132
133 /*
134 * Can't use a regular allocator here since the full range of pools are
135 * not always allocated. Instead just use a bitmap.
136 */
137 DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
138
139 struct mutex sea_lock; /* Lock alloc/free calls. */
29}; 140};
30 141
31enum gk20a_mem_rw_flag { 142enum gk20a_mem_rw_flag {
@@ -34,64 +145,150 @@ enum gk20a_mem_rw_flag {
34 gk20a_mem_flag_write_only = 2, 145 gk20a_mem_flag_write_only = 2,
35}; 146};
36 147
37/* A semaphore pool can be mapped to multiple GPU address spaces. */ 148/*
38struct gk20a_semaphore_pool_map { 149 * Semaphore sea functions.
39 u64 gpu_va; 150 */
40 enum gk20a_mem_rw_flag rw_flag; 151struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
41 struct vm_gk20a *vm; 152int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
42 struct list_head list; 153 struct vm_gk20a *vm);
43}; 154void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
155 struct vm_gk20a *vm);
156struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
157
158/*
159 * Semaphore pool functions.
160 */
161struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
162 struct gk20a_semaphore_sea *sea);
163int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
164 struct vm_gk20a *vm);
165void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
166 struct vm_gk20a *vm);
167u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
168void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
169void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
170
171/*
172 * Semaphore functions.
173 */
174struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
175void gk20a_semaphore_put(struct gk20a_semaphore *s);
176void gk20a_semaphore_get(struct gk20a_semaphore *s);
177
178/*
179 * Return the address of a specific semaphore.
180 *
181 * Don't call this on a semaphore you don't own - the VA returned will make no
182 * sense in your specific channel's VM.
183 */
184static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
185{
186 return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
187 s->hw_sema->offset;
188}
189
190/*
191 * Get the global RO address for the semaphore. Can be called on any semaphore
192 * regardless of whether you own it.
193 */
194static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
195{
196 return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
197 s->hw_sema->offset;
198}
199
200static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
201{
202 return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
203 hw_sema->offset;
204}
205
206/*
207 * TODO: handle wrap around... Hmm, how to do this?
208 */
209static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
210{
211 u32 sema_val = readl(s->hw_sema->value);
44 212
45/* A semaphore that lives inside a semaphore pool. */
46struct gk20a_semaphore {
47 struct gk20a_semaphore_pool *pool;
48 /* 213 /*
49 * value exists within the pool's memory at the specified offset. 214 * If the underlying semaphore value is greater than or equal to
50 * 0=acquired, 1=released. 215 * the value of the semaphore then the semaphore has been signaled
216 * (a.k.a. released).
51 */ 217 */
52 u32 offset; /* byte offset within pool */ 218 return sema_val >= atomic_read(&s->value);
53 struct kref ref; 219}
54};
55 220
56/* Create a semaphore pool that can hold at most 'capacity' semaphores. */ 221static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
57struct gk20a_semaphore_pool *
58gk20a_semaphore_pool_alloc(struct gk20a *, const char *unique_name,
59 size_t capacity);
60void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *);
61int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *,
62 struct vm_gk20a *,
63 enum gk20a_mem_rw_flag);
64void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *,
65 struct vm_gk20a *);
66u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *,
67 struct vm_gk20a *);
68
69/* Allocate a semaphore from the semaphore pool. The newly allocated
70 * semaphore will be in acquired state (value=0). */
71struct gk20a_semaphore *
72gk20a_semaphore_alloc(struct gk20a_semaphore_pool *);
73void gk20a_semaphore_put(struct gk20a_semaphore *);
74void gk20a_semaphore_get(struct gk20a_semaphore *);
75
76static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s,
77 struct vm_gk20a *vm)
78{ 222{
79 return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset; 223 return !gk20a_semaphore_is_released(s);
80} 224}
81 225
82static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) 226/*
227 * Read the underlying value from a semaphore.
228 */
229static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
83{ 230{
84 u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset); 231 return readl(s->hw_sema->value);
232}
85 233
86 /* When often block on value reaching a certain threshold. We must make 234static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
87 * sure that if we get unblocked, we haven't read anything too early. */ 235{
88 smp_rmb(); 236 return atomic_read(&s->value);
89 return v == 0;
90} 237}
91 238
239static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
240{
241 return atomic_read(&s->hw_sema->next_value);
242}
243
244/*
245 * Note - if you call this then any prior semaphores will also be released.
246 */
92static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) 247static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
93{ 248{
94 smp_wmb(); 249 u32 current_val;
95 gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1); 250 u32 val = gk20a_semaphore_get_value(s);
251 int attempts = 0;
252
253 /*
254 * Wait until the sema value is 1 less than the write value. That
255 * way this function is essentially an increment.
256 *
257 * TODO: tune the wait a little better.
258 */
259 while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
260 msleep(100);
261 attempts += 1;
262 if (attempts > 100) {
263 WARN(1, "Stall on sema release!");
264 return;
265 }
266 }
267
268 /*
269 * If the semaphore has already passed the value we would write then
270 * this is really just a NO-OP.
271 */
272 if (current_val >= val)
273 return;
274
275 writel(val, s->hw_sema->value);
276}
277
278/*
279 * Configure a software based increment on this semaphore. This is useful for
280 * when we want the GPU to wait on a SW event before processing a channel.
281 * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
282 * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
283 * then allows the GPU to continue.
284 *
285 * Also used to prep a semaphore for an INCR by the GPU.
286 */
287static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
288{
289 BUG_ON(s->incremented);
290
291 atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
292 s->incremented = 1;
96} 293}
97#endif 294#endif