drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

/*
 * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#ifndef SEMAPHORE_GK20A_H
#define SEMAPHORE_GK20A_H

#include <linux/kref.h>
#include <linux/list.h>
#include <linux/delay.h>

#include "gk20a.h"
#include "mm_gk20a.h"
#include "channel_gk20a.h"
#include "gk20a_allocator.h"

#define gpu_sema_dbg(fmt, args...)		\
	gk20a_dbg(gpu_dbg_sema, fmt, ##args)
#define gpu_sema_verbose_dbg(fmt, args...)	\
	gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)

/*
 * Max number of channels that can be used is 512. This of course needs to be
 * fixed to be dynamic but still fast.
 */
#define SEMAPHORE_POOL_COUNT		512
#define SEMAPHORE_SIZE			16
#define SEMAPHORE_SEA_GROWTH_RATE	32

struct gk20a_semaphore_sea;

/*
 * Underlying semaphore data structure. This semaphore can be shared amongst
 * other semaphore instances.
 */
struct gk20a_semaphore_int {
	int idx;			/* Semaphore index. */
	u32 offset;			/* Offset into the pool. */
	atomic_t next_value;		/* Next available value. */
	u32 *value;			/* Current value (access w/ readl()). */
	u32 nr_incrs;			/* Number of increments programmed. */
	struct gk20a_semaphore_pool *p;	/* Pool that owns this sema. */
	struct channel_gk20a *ch;	/* Channel that owns this sema. */
	struct list_head hw_sema_list;	/* List of HW semaphores. */
};

/*
 * A semaphore which the rest of the driver actually uses. This consists of a
 * pointer to a real semaphore and a value to wait for. This allows one physical
 * semaphore to be shared among an essentially infinite number of submits.
 */
struct gk20a_semaphore {
	struct gk20a_semaphore_int *hw_sema;

	atomic_t value;
	int incremented;

	struct kref ref;
};

/*
 * A semaphore pool. Each address space will own exactly one of these.
 */
struct gk20a_semaphore_pool {
	struct page *page;			/* This pool's page of memory */
	struct list_head pool_list_entry;	/* Node for list of pools. */
	void *cpu_va;				/* CPU access to the pool. */
	u64 gpu_va;				/* GPU access to the pool. */
	u64 gpu_va_ro;				/* GPU access to the pool. */
	int page_idx;				/* Index into sea bitmap. */

	struct list_head hw_semas;		/* List of HW semas. */
	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);

	struct gk20a_semaphore_sea *sema_sea;	/* Sea that owns this pool. */

	struct mutex pool_lock;

	/*
	 * This is the address spaces's personal RW table. Other channels will
	 * ultimately map this page as RO.
	 */
	struct sg_table *rw_sg_table;

	/*
	 * This is to keep track of whether the pool has had its sg_table
	 * updated during sea resizing.
	 */
	struct sg_table *ro_sg_table;

	int mapped;

	/*
	 * Sometimes a channel can be released before other channels are
	 * done waiting on it. This ref count ensures that the pool doesn't
	 * go away until all semaphores using this pool are cleaned up first.
	 */
	struct kref ref;
};

/*
 * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
 * channels can share a VM each channel gets it's own HW semaphore from the
 * pool. Channels then allocate regular semaphores - basically just a value that
 * signifies when a particular job is done.
 */
struct gk20a_semaphore_sea {
	struct list_head pool_list;	/* List of pools in this sea. */
	struct gk20a *gk20a;

	size_t size;			/* Number of pages available. */
	u64 gpu_va;			/* GPU virtual address of sema sea. */
	u64 map_size;			/* Size of the mapping. */

	/*
	 * TODO:
	 * List of pages that we use to back the pools. The number of pages
	 * can grow dynamically since allocating 512 pages for all channels at
	 * once would be a tremendous waste.
	 */
	int page_count;			/* Pages allocated to pools. */

	struct sg_table *ro_sg_table;
	/*
	struct page *pages[SEMAPHORE_POOL_COUNT];
	*/

	struct mem_desc sea_mem;

	/*
	 * Can't use a regular allocator here since the full range of pools are
	 * not always allocated. Instead just use a bitmap.
	 */
	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);

	struct mutex sea_lock;		/* Lock alloc/free calls. */
};

enum gk20a_mem_rw_flag {
	gk20a_mem_flag_none = 0,
	gk20a_mem_flag_read_only = 1,
	gk20a_mem_flag_write_only = 2,
};

/*
 * Semaphore sea functions.
 */
struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
			    struct vm_gk20a *vm);
void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
			       struct vm_gk20a *vm);
struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);

/*
 * Semaphore pool functions.
 */
struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
	struct gk20a_semaphore_sea *sea);
int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
			     struct vm_gk20a *vm);
void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
				struct vm_gk20a *vm);
u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);

/*
 * Semaphore functions.
 */
struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
void gk20a_semaphore_put(struct gk20a_semaphore *s);
void gk20a_semaphore_get(struct gk20a_semaphore *s);
void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch);

/*
 * Return the address of a specific semaphore.
 *
 * Don't call this on a semaphore you don't own - the VA returned will make no
 * sense in your specific channel's VM.
 */
static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
{
	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
		s->hw_sema->offset;
}

/*
 * Get the global RO address for the semaphore. Can be called on any semaphore
 * regardless of whether you own it.
 */
static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
{
	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
		s->hw_sema->offset;
}

static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
{
	return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
		hw_sema->offset;
}

/*
 * TODO: handle wrap around... Hmm, how to do this?
 */
static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
{
	u32 sema_val = readl(s->hw_sema->value);

	/*
	 * If the underlying semaphore value is greater than or equal to
	 * the value of the semaphore then the semaphore has been signaled
	 * (a.k.a. released).
	 */
	return (int)sema_val >= atomic_read(&s->value);
}

static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
{
	return !gk20a_semaphore_is_released(s);
}

/*
 * Read the underlying value from a semaphore.
 */
static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
{
	return readl(s->hw_sema->value);
}

static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
{
	return (u32)atomic_read(&s->value);
}

static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
{
	return (u32)atomic_read(&s->hw_sema->next_value);
}

/*
 * If @force is set then this will not wait for the underlying semaphore to
 * catch up to the passed semaphore.
 */
static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s,
					     bool force)
{
	u32 current_val;
	u32 val = gk20a_semaphore_get_value(s);
	int attempts = 0;

	/*
	 * Wait until the sema value is 1 less than the write value. That
	 * way this function is essentially an increment.
	 *
	 * TODO: tune the wait a little better.
	 */
	while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
		if (force)
			break;
		msleep(100);
		attempts += 1;
		if (attempts > 100) {
			WARN(1, "Stall on sema release!");
			return;
		}
	}

	/*
	 * If the semaphore has already passed the value we would write then
	 * this is really just a NO-OP.
	 */
	if (current_val >= val)
		return;

	writel(val, s->hw_sema->value);

	gpu_sema_verbose_dbg("(c=%d) WRITE %u",
			     s->hw_sema->ch->hw_chid, val);
}

static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
{
	__gk20a_semaphore_release(s, false);
}

/*
 * Configure a software based increment on this semaphore. This is useful for
 * when we want the GPU to wait on a SW event before processing a channel.
 * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
 * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
 * then allows the GPU to continue.
 *
 * Also used to prep a semaphore for an INCR by the GPU.
 */
static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
{
	BUG_ON(s->incremented);

	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
	s->incremented = 1;

	gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
			     s->hw_sema->ch->hw_chid,
			     gk20a_semaphore_next_value(s));
}
#endif