diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | 303 |
1 files changed, 250 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index 1f12e262..58081b56 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | |||
@@ -15,17 +15,128 @@ | |||
15 | #define SEMAPHORE_GK20A_H | 15 | #define SEMAPHORE_GK20A_H |
16 | 16 | ||
17 | #include <linux/kref.h> | 17 | #include <linux/kref.h> |
18 | #include "gk20a_allocator.h" | 18 | #include <linux/list.h> |
19 | #include <linux/delay.h> | ||
20 | |||
21 | #include "gk20a.h" | ||
19 | #include "mm_gk20a.h" | 22 | #include "mm_gk20a.h" |
23 | #include "channel_gk20a.h" | ||
24 | |||
25 | /* | ||
26 | * Max number of channels that can be used is 512. This of course needs to be | ||
27 | * fixed to be dynamic but still fast. | ||
28 | */ | ||
29 | #define SEMAPHORE_POOL_COUNT 512 | ||
30 | #define SEMAPHORE_SIZE 16 | ||
31 | #define SEMAPHORE_SEA_GROWTH_RATE 32 | ||
32 | |||
33 | struct gk20a_semaphore_sea; | ||
34 | |||
35 | /* | ||
36 | * Underlying semaphore data structure. This semaphore can be shared amongst | ||
37 | * other semaphore instances. | ||
38 | */ | ||
39 | struct gk20a_semaphore_int { | ||
40 | int idx; /* Semaphore index. */ | ||
41 | u32 offset; /* Offset into the pool. */ | ||
42 | atomic_t next_value; /* Next available value. */ | ||
43 | u32 *value; /* Current value (access w/ readl()). */ | ||
44 | u32 nr_incrs; /* Number of increments programmed. */ | ||
45 | struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */ | ||
46 | struct channel_gk20a *ch; /* Channel that owns this sema. */ | ||
47 | struct list_head hw_sema_list; /* List of HW semaphores. */ | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * A semaphore which the rest of the driver actually uses. This consists of a | ||
52 | * pointer to a real semaphore and a value to wait for. This allows one physical | ||
53 | * semaphore to be shared among an essentially infinite number of submits. | ||
54 | */ | ||
55 | struct gk20a_semaphore { | ||
56 | struct gk20a_semaphore_int *hw_sema; | ||
20 | 57 | ||
21 | /* A memory pool for holding semaphores. */ | 58 | atomic_t value; |
59 | int incremented; | ||
60 | |||
61 | struct kref ref; | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * A semaphore pool. Each address space will own exactly one of these. | ||
66 | */ | ||
22 | struct gk20a_semaphore_pool { | 67 | struct gk20a_semaphore_pool { |
23 | struct mem_desc mem; | 68 | struct page *page; /* This pool's page of memory */ |
24 | struct gk20a *g; | 69 | struct list_head pool_list_entry; /* Node for list of pools. */ |
25 | struct list_head maps; | 70 | void *cpu_va; /* CPU access to the pool. */ |
26 | struct mutex maps_mutex; | 71 | u64 gpu_va; /* GPU access to the pool. */ |
72 | u64 gpu_va_ro; /* GPU access to the pool. */ | ||
73 | int page_idx; /* Index into sea bitmap. */ | ||
74 | |||
75 | struct list_head hw_semas; /* List of HW semas. */ | ||
76 | DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE); | ||
77 | |||
78 | struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */ | ||
79 | |||
80 | struct mutex pool_lock; | ||
81 | |||
82 | /* | ||
83 | * This is the address spaces's personal RW table. Other channels will | ||
84 | * ultimately map this page as RO. | ||
85 | */ | ||
86 | struct sg_table *rw_sg_table; | ||
87 | |||
88 | /* | ||
89 | * This is to keep track of whether the pool has had its sg_table | ||
90 | * updated during sea resizing. | ||
91 | */ | ||
92 | struct sg_table *ro_sg_table; | ||
93 | |||
94 | int mapped; | ||
95 | |||
96 | /* | ||
97 | * Sometimes a channel can be released before other channels are | ||
98 | * done waiting on it. This ref count ensures that the pool doesn't | ||
99 | * go away until all semaphores using this pool are cleaned up first. | ||
100 | */ | ||
27 | struct kref ref; | 101 | struct kref ref; |
28 | struct gk20a_allocator alloc; | 102 | }; |
103 | |||
104 | /* | ||
105 | * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple | ||
106 | * channels can share a VM each channel gets it's own HW semaphore from the | ||
107 | * pool. Channels then allocate regular semaphores - basically just a value that | ||
108 | * signifies when a particular job is done. | ||
109 | */ | ||
110 | struct gk20a_semaphore_sea { | ||
111 | struct list_head pool_list; /* List of pools in this sea. */ | ||
112 | struct gk20a *gk20a; | ||
113 | |||
114 | size_t size; /* Number of pages available. */ | ||
115 | u64 gpu_va; /* GPU virtual address of sema sea. */ | ||
116 | u64 map_size; /* Size of the mapping. */ | ||
117 | |||
118 | /* | ||
119 | * TODO: | ||
120 | * List of pages that we use to back the pools. The number of pages | ||
121 | * can grow dynamically since allocating 512 pages for all channels at | ||
122 | * once would be a tremendous waste. | ||
123 | */ | ||
124 | int page_count; /* Pages allocated to pools. */ | ||
125 | |||
126 | struct sg_table *ro_sg_table; | ||
127 | /* | ||
128 | struct page *pages[SEMAPHORE_POOL_COUNT]; | ||
129 | */ | ||
130 | |||
131 | struct mem_desc sea_mem; | ||
132 | |||
133 | /* | ||
134 | * Can't use a regular allocator here since the full range of pools are | ||
135 | * not always allocated. Instead just use a bitmap. | ||
136 | */ | ||
137 | DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT); | ||
138 | |||
139 | struct mutex sea_lock; /* Lock alloc/free calls. */ | ||
29 | }; | 140 | }; |
30 | 141 | ||
31 | enum gk20a_mem_rw_flag { | 142 | enum gk20a_mem_rw_flag { |
@@ -34,64 +145,150 @@ enum gk20a_mem_rw_flag { | |||
34 | gk20a_mem_flag_write_only = 2, | 145 | gk20a_mem_flag_write_only = 2, |
35 | }; | 146 | }; |
36 | 147 | ||
37 | /* A semaphore pool can be mapped to multiple GPU address spaces. */ | 148 | /* |
38 | struct gk20a_semaphore_pool_map { | 149 | * Semaphore sea functions. |
39 | u64 gpu_va; | 150 | */ |
40 | enum gk20a_mem_rw_flag rw_flag; | 151 | struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a); |
41 | struct vm_gk20a *vm; | 152 | int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea, |
42 | struct list_head list; | 153 | struct vm_gk20a *vm); |
43 | }; | 154 | void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea, |
155 | struct vm_gk20a *vm); | ||
156 | struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g); | ||
157 | |||
158 | /* | ||
159 | * Semaphore pool functions. | ||
160 | */ | ||
161 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( | ||
162 | struct gk20a_semaphore_sea *sea); | ||
163 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool, | ||
164 | struct vm_gk20a *vm); | ||
165 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool, | ||
166 | struct vm_gk20a *vm); | ||
167 | u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global); | ||
168 | void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p); | ||
169 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p); | ||
170 | |||
171 | /* | ||
172 | * Semaphore functions. | ||
173 | */ | ||
174 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch); | ||
175 | void gk20a_semaphore_put(struct gk20a_semaphore *s); | ||
176 | void gk20a_semaphore_get(struct gk20a_semaphore *s); | ||
177 | |||
178 | /* | ||
179 | * Return the address of a specific semaphore. | ||
180 | * | ||
181 | * Don't call this on a semaphore you don't own - the VA returned will make no | ||
182 | * sense in your specific channel's VM. | ||
183 | */ | ||
184 | static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s) | ||
185 | { | ||
186 | return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) + | ||
187 | s->hw_sema->offset; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Get the global RO address for the semaphore. Can be called on any semaphore | ||
192 | * regardless of whether you own it. | ||
193 | */ | ||
194 | static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s) | ||
195 | { | ||
196 | return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) + | ||
197 | s->hw_sema->offset; | ||
198 | } | ||
199 | |||
200 | static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema) | ||
201 | { | ||
202 | return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) + | ||
203 | hw_sema->offset; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * TODO: handle wrap around... Hmm, how to do this? | ||
208 | */ | ||
209 | static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s) | ||
210 | { | ||
211 | u32 sema_val = readl(s->hw_sema->value); | ||
44 | 212 | ||
45 | /* A semaphore that lives inside a semaphore pool. */ | ||
46 | struct gk20a_semaphore { | ||
47 | struct gk20a_semaphore_pool *pool; | ||
48 | /* | 213 | /* |
49 | * value exists within the pool's memory at the specified offset. | 214 | * If the underlying semaphore value is greater than or equal to |
50 | * 0=acquired, 1=released. | 215 | * the value of the semaphore then the semaphore has been signaled |
216 | * (a.k.a. released). | ||
51 | */ | 217 | */ |
52 | u32 offset; /* byte offset within pool */ | 218 | return sema_val >= atomic_read(&s->value); |
53 | struct kref ref; | 219 | } |
54 | }; | ||
55 | 220 | ||
56 | /* Create a semaphore pool that can hold at most 'capacity' semaphores. */ | 221 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) |
57 | struct gk20a_semaphore_pool * | ||
58 | gk20a_semaphore_pool_alloc(struct gk20a *, const char *unique_name, | ||
59 | size_t capacity); | ||
60 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *); | ||
61 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *, | ||
62 | struct vm_gk20a *, | ||
63 | enum gk20a_mem_rw_flag); | ||
64 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *, | ||
65 | struct vm_gk20a *); | ||
66 | u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *, | ||
67 | struct vm_gk20a *); | ||
68 | |||
69 | /* Allocate a semaphore from the semaphore pool. The newly allocated | ||
70 | * semaphore will be in acquired state (value=0). */ | ||
71 | struct gk20a_semaphore * | ||
72 | gk20a_semaphore_alloc(struct gk20a_semaphore_pool *); | ||
73 | void gk20a_semaphore_put(struct gk20a_semaphore *); | ||
74 | void gk20a_semaphore_get(struct gk20a_semaphore *); | ||
75 | |||
76 | static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s, | ||
77 | struct vm_gk20a *vm) | ||
78 | { | 222 | { |
79 | return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset; | 223 | return !gk20a_semaphore_is_released(s); |
80 | } | 224 | } |
81 | 225 | ||
82 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) | 226 | /* |
227 | * Read the underlying value from a semaphore. | ||
228 | */ | ||
229 | static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s) | ||
83 | { | 230 | { |
84 | u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset); | 231 | return readl(s->hw_sema->value); |
232 | } | ||
85 | 233 | ||
86 | /* When often block on value reaching a certain threshold. We must make | 234 | static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s) |
87 | * sure that if we get unblocked, we haven't read anything too early. */ | 235 | { |
88 | smp_rmb(); | 236 | return atomic_read(&s->value); |
89 | return v == 0; | ||
90 | } | 237 | } |
91 | 238 | ||
239 | static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s) | ||
240 | { | ||
241 | return atomic_read(&s->hw_sema->next_value); | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Note - if you call this then any prior semaphores will also be released. | ||
246 | */ | ||
92 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) | 247 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) |
93 | { | 248 | { |
94 | smp_wmb(); | 249 | u32 current_val; |
95 | gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1); | 250 | u32 val = gk20a_semaphore_get_value(s); |
251 | int attempts = 0; | ||
252 | |||
253 | /* | ||
254 | * Wait until the sema value is 1 less than the write value. That | ||
255 | * way this function is essentially an increment. | ||
256 | * | ||
257 | * TODO: tune the wait a little better. | ||
258 | */ | ||
259 | while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) { | ||
260 | msleep(100); | ||
261 | attempts += 1; | ||
262 | if (attempts > 100) { | ||
263 | WARN(1, "Stall on sema release!"); | ||
264 | return; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * If the semaphore has already passed the value we would write then | ||
270 | * this is really just a NO-OP. | ||
271 | */ | ||
272 | if (current_val >= val) | ||
273 | return; | ||
274 | |||
275 | writel(val, s->hw_sema->value); | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Configure a software based increment on this semaphore. This is useful for | ||
280 | * when we want the GPU to wait on a SW event before processing a channel. | ||
281 | * Another way to describe this is when the GPU needs to wait on a SW pre-fence. | ||
282 | * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which | ||
283 | * then allows the GPU to continue. | ||
284 | * | ||
285 | * Also used to prep a semaphore for an INCR by the GPU. | ||
286 | */ | ||
287 | static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s) | ||
288 | { | ||
289 | BUG_ON(s->incremented); | ||
290 | |||
291 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); | ||
292 | s->incremented = 1; | ||
96 | } | 293 | } |
97 | #endif | 294 | #endif |