1 files changed, 250 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
index 1f12e262..58081b56 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -15,17 +15,128 @@
 #define SEMAPHORE_GK20A_H
 #include <linux/kref.h>
-#include "gk20a_allocator.h"
+#include <linux/list.h>
+#include <linux/delay.h>
+#include "gk20a.h"
 #include "mm_gk20a.h"
+#include "channel_gk20a.h"
+/*
+ * Max number of channels that can be used is 512. This of course needs to be
+ * fixed to be dynamic but still fast.
+ */
+#define SEMAPHORE_POOL_COUNT            512
+#define SEMAPHORE_SIZE                  16
+#define SEMAPHORE_SEA_GROWTH_RATE       32
+struct gk20a_semaphore_sea;
+/*
+ * Underlying semaphore data structure. This semaphore can be shared amongst
+ * other semaphore instances.
+ */
+struct gk20a_semaphore_int {
+        int idx;                        /* Semaphore index. */
+        u32 offset;                     /* Offset into the pool. */
+        atomic_t next_value;            /* Next available value. */
+        u32 *value;                     /* Current value (access w/ readl()). */
+        u32 nr_incrs;                   /* Number of increments programmed. */
+        struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */
+        struct channel_gk20a *ch;       /* Channel that owns this sema. */
+        struct list_head hw_sema_list;  /* List of HW semaphores. */
+};
+/*
+ * A semaphore which the rest of the driver actually uses. This consists of a
+ * pointer to a real semaphore and a value to wait for. This allows one physical
+ * semaphore to be shared among an essentially infinite number of submits.
+ */
+struct gk20a_semaphore {
+        struct gk20a_semaphore_int *hw_sema;
-/* A memory pool for holding semaphores. */
+        atomic_t value;
+        int incremented;
+        struct kref ref;
+};
+/*
+ * A semaphore pool. Each address space will own exactly one of these.
+ */
 struct gk20a_semaphore_pool {
-        struct mem_desc mem;
+        struct page *page;                      /* This pool's page of memory */
-        struct gk20a *g;
+        struct list_head pool_list_entry;       /* Node for list of pools. */
-        struct list_head maps;
+        void *cpu_va;                           /* CPU access to the pool. */
-        struct mutex maps_mutex;
+        u64 gpu_va;                             /* GPU access to the pool. */
+        u64 gpu_va_ro;                          /* GPU access to the pool. */
+        int page_idx;                           /* Index into sea bitmap. */
+        struct list_head hw_semas;              /* List of HW semas. */
+        DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
+        struct gk20a_semaphore_sea *sema_sea;   /* Sea that owns this pool. */
+        struct mutex pool_lock;
+        /*
+         * This is the address spaces's personal RW table. Other channels will
+         * ultimately map this page as RO.
+         */
+        struct sg_table *rw_sg_table;
+        /*
+         * This is to keep track of whether the pool has had its sg_table
+         * updated during sea resizing.
+         */
+        struct sg_table *ro_sg_table;
+        int mapped;
+        /*
+         * Sometimes a channel can be released before other channels are
+         * done waiting on it. This ref count ensures that the pool doesn't
+         * go away until all semaphores using this pool are cleaned up first.
+         */
        struct kref ref;
-        struct gk20a_allocator alloc;
+};
+/*
+ * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
+ * channels can share a VM each channel gets it's own HW semaphore from the
+ * pool. Channels then allocate regular semaphores - basically just a value that
+ * signifies when a particular job is done.
+ */
+struct gk20a_semaphore_sea {
+        struct list_head pool_list;     /* List of pools in this sea. */
+        struct gk20a *gk20a;
+        size_t size;                    /* Number of pages available. */
+        u64 gpu_va;                     /* GPU virtual address of sema sea. */
+        u64 map_size;                   /* Size of the mapping. */
+        /*
+         * TODO:
+         * List of pages that we use to back the pools. The number of pages
+         * can grow dynamically since allocating 512 pages for all channels at
+         * once would be a tremendous waste.
+         */
+        int page_count;                 /* Pages allocated to pools. */
+        struct sg_table *ro_sg_table;
+        /*
+        struct page *pages[SEMAPHORE_POOL_COUNT];
+        */
+        struct mem_desc sea_mem;
+        /*
+         * Can't use a regular allocator here since the full range of pools are
+         * not always allocated. Instead just use a bitmap.
+         */
+        DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
+        struct mutex sea_lock;          /* Lock alloc/free calls. */
 };
 enum gk20a_mem_rw_flag {
@@ -34,64 +145,150 @@ enum gk20a_mem_rw_flag {
        gk20a_mem_flag_write_only = 2,
 };
-/* A semaphore pool can be mapped to multiple GPU address spaces. */
+/*
-struct gk20a_semaphore_pool_map {
+ * Semaphore sea functions.
-        u64 gpu_va;
+ */
-        enum gk20a_mem_rw_flag rw_flag;
+struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
-        struct vm_gk20a *vm;
+int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
-        struct list_head list;
+                            struct vm_gk20a *vm);
-};
+void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
+                               struct vm_gk20a *vm);
+struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
+/*
+ * Semaphore pool functions.
+ */
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
+        struct gk20a_semaphore_sea *sea);
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
+                             struct vm_gk20a *vm);
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
+                                struct vm_gk20a *vm);
+u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
+void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
+/*
+ * Semaphore functions.
+ */
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
+void gk20a_semaphore_put(struct gk20a_semaphore *s);
+void gk20a_semaphore_get(struct gk20a_semaphore *s);
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
+{
+        return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
+                s->hw_sema->offset;
+}
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
+{
+        return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
+                s->hw_sema->offset;
+}
+static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
+{
+        return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
+                hw_sema->offset;
+}
+/*
+ * TODO: handle wrap around... Hmm, how to do this?
+ */
+static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
+{
+        u32 sema_val = readl(s->hw_sema->value);
-/* A semaphore that lives inside a semaphore pool. */
-struct gk20a_semaphore {
-        struct gk20a_semaphore_pool *pool;
        /*
-         * value exists within the pool's memory at the specified offset.
+         * If the underlying semaphore value is greater than or equal to
-         * 0=acquired, 1=released.
+         * the value of the semaphore then the semaphore has been signaled
+         * (a.k.a. released).
         */
-        u32 offset; /* byte offset within pool */
+        return sema_val >= atomic_read(&s->value);
-        struct kref ref;
+}
-};
-/* Create a semaphore pool that can hold at most 'capacity' semaphores. */
+static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
-struct gk20a_semaphore_pool *
-gk20a_semaphore_pool_alloc(struct gk20a *, const char *unique_name,
-                           size_t capacity);
-void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *);
-int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *,
-                             struct vm_gk20a *,
-                             enum gk20a_mem_rw_flag);
-void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *,
-                                struct vm_gk20a *);
-u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *,
-                                struct vm_gk20a *);
-/* Allocate a semaphore from the semaphore pool. The newly allocated
- * semaphore will be in acquired state (value=0). */
-struct gk20a_semaphore *
-gk20a_semaphore_alloc(struct gk20a_semaphore_pool *);
-void gk20a_semaphore_put(struct gk20a_semaphore *);
-void gk20a_semaphore_get(struct gk20a_semaphore *);
-static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s,
-                                         struct vm_gk20a *vm)
 {
-        return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset;
+        return !gk20a_semaphore_is_released(s);
 }
-static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
+/*
+ * Read the underlying value from a semaphore.
+ */
+static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
 {
-        u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset);
+        return readl(s->hw_sema->value);
+}
-        /* When often block on value reaching a certain threshold. We must make
+static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
-         * sure that if we get unblocked, we haven't read anything too early. */
+{
-        smp_rmb();
+        return atomic_read(&s->value);
-        return v == 0;
 }
+static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
+{
+        return atomic_read(&s->hw_sema->next_value);
+}
+/*
+ * Note - if you call this then any prior semaphores will also be released.
+ */
 static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
 {
-        smp_wmb();
+        u32 current_val;
-        gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1);
+        u32 val = gk20a_semaphore_get_value(s);
+        int attempts = 0;
+        /*
+         * Wait until the sema value is 1 less than the write value. That
+         * way this function is essentially an increment.
+         *
+         * TODO: tune the wait a little better.
+         */
+        while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
+                msleep(100);
+                attempts += 1;
+                if (attempts > 100) {
+                        WARN(1, "Stall on sema release!");
+                        return;
+                }
+        }
+        /*
+         * If the semaphore has already passed the value we would write then
+         * this is really just a NO-OP.
+         */
+        if (current_val >= val)
+                return;
+        writel(val, s->hw_sema->value);
+}
+/*
+ * Configure a software based increment on this semaphore. This is useful for
+ * when we want the GPU to wait on a SW event before processing a channel.
+ * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
+ * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
+ * then allows the GPU to continue.
+ *
+ * Also used to prep a semaphore for an INCR by the GPU.
+ */
+static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
+{
+        BUG_ON(s->incremented);
+        atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
+        s->incremented = 1;
 }
 #endif

diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index 1f12e262..58081b56 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -15,17 +15,128 @@
15	#define SEMAPHORE_GK20A_H	15	#define SEMAPHORE_GK20A_H
16		16
17	#include <linux/kref.h>	17	#include <linux/kref.h>
18	#include "gk20a_allocator.h"	18	#include <linux/list.h>
		19	#include <linux/delay.h>
		20
		21	#include "gk20a.h"
19	#include "mm_gk20a.h"	22	#include "mm_gk20a.h"
		23	#include "channel_gk20a.h"
		24
		25	/*
		26	* Max number of channels that can be used is 512. This of course needs to be
		27	* fixed to be dynamic but still fast.
		28	*/
		29	#define SEMAPHORE_POOL_COUNT 512
		30	#define SEMAPHORE_SIZE 16
		31	#define SEMAPHORE_SEA_GROWTH_RATE 32
		32
		33	struct gk20a_semaphore_sea;
		34
		35	/*
		36	* Underlying semaphore data structure. This semaphore can be shared amongst
		37	* other semaphore instances.
		38	*/
		39	struct gk20a_semaphore_int {
		40	int idx; /* Semaphore index. */
		41	u32 offset; /* Offset into the pool. */
		42	atomic_t next_value; /* Next available value. */
		43	u32 value; / Current value (access w/ readl()). */
		44	u32 nr_incrs; /* Number of increments programmed. */
		45	struct gk20a_semaphore_pool p; / Pool that owns this sema. */
		46	struct channel_gk20a ch; / Channel that owns this sema. */
		47	struct list_head hw_sema_list; /* List of HW semaphores. */
		48	};
		49
		50	/*
		51	* A semaphore which the rest of the driver actually uses. This consists of a
		52	* pointer to a real semaphore and a value to wait for. This allows one physical
		53	* semaphore to be shared among an essentially infinite number of submits.
		54	*/
		55	struct gk20a_semaphore {
		56	struct gk20a_semaphore_int *hw_sema;
20		57
21	/* A memory pool for holding semaphores. */	58	atomic_t value;
		59	int incremented;
		60
		61	struct kref ref;
		62	};
		63
		64	/*
		65	* A semaphore pool. Each address space will own exactly one of these.
		66	*/
22	struct gk20a_semaphore_pool {	67	struct gk20a_semaphore_pool {
23	struct mem_desc mem;	68	struct page page; / This pool's page of memory */
24	struct gk20a *g;	69	struct list_head pool_list_entry; /* Node for list of pools. */
25	struct list_head maps;	70	void cpu_va; / CPU access to the pool. */
26	struct mutex maps_mutex;	71	u64 gpu_va; /* GPU access to the pool. */
		72	u64 gpu_va_ro; /* GPU access to the pool. */
		73	int page_idx; /* Index into sea bitmap. */
		74
		75	struct list_head hw_semas; /* List of HW semas. */
		76	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
		77
		78	struct gk20a_semaphore_sea sema_sea; / Sea that owns this pool. */
		79
		80	struct mutex pool_lock;
		81
		82	/*
		83	* This is the address spaces's personal RW table. Other channels will
		84	* ultimately map this page as RO.
		85	*/
		86	struct sg_table *rw_sg_table;
		87
		88	/*
		89	* This is to keep track of whether the pool has had its sg_table
		90	* updated during sea resizing.
		91	*/
		92	struct sg_table *ro_sg_table;
		93
		94	int mapped;
		95
		96	/*
		97	* Sometimes a channel can be released before other channels are
		98	* done waiting on it. This ref count ensures that the pool doesn't
		99	* go away until all semaphores using this pool are cleaned up first.
		100	*/
27	struct kref ref;	101	struct kref ref;
28	struct gk20a_allocator alloc;	102	};
		103
		104	/*
		105	* A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
		106	* channels can share a VM each channel gets it's own HW semaphore from the
		107	* pool. Channels then allocate regular semaphores - basically just a value that
		108	* signifies when a particular job is done.
		109	*/
		110	struct gk20a_semaphore_sea {
		111	struct list_head pool_list; /* List of pools in this sea. */
		112	struct gk20a *gk20a;
		113
		114	size_t size; /* Number of pages available. */
		115	u64 gpu_va; /* GPU virtual address of sema sea. */
		116	u64 map_size; /* Size of the mapping. */
		117
		118	/*
		119	* TODO:
		120	* List of pages that we use to back the pools. The number of pages
		121	* can grow dynamically since allocating 512 pages for all channels at
		122	* once would be a tremendous waste.
		123	*/
		124	int page_count; /* Pages allocated to pools. */
		125
		126	struct sg_table *ro_sg_table;
		127	/*
		128	struct page *pages[SEMAPHORE_POOL_COUNT];
		129	*/
		130
		131	struct mem_desc sea_mem;
		132
		133	/*
		134	* Can't use a regular allocator here since the full range of pools are
		135	* not always allocated. Instead just use a bitmap.
		136	*/
		137	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
		138
		139	struct mutex sea_lock; /* Lock alloc/free calls. */
29	};	140	};
30		141
31	enum gk20a_mem_rw_flag {	142	enum gk20a_mem_rw_flag {
@@ -34,64 +145,150 @@ enum gk20a_mem_rw_flag {
34	gk20a_mem_flag_write_only = 2,	145	gk20a_mem_flag_write_only = 2,
35	};	146	};
36		147
37	/* A semaphore pool can be mapped to multiple GPU address spaces. */	148	/*
38	struct gk20a_semaphore_pool_map {	149	* Semaphore sea functions.
39	u64 gpu_va;	150	*/
40	enum gk20a_mem_rw_flag rw_flag;	151	struct gk20a_semaphore_sea gk20a_semaphore_sea_create(struct gk20a gk20a);
41	struct vm_gk20a *vm;	152	int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
42	struct list_head list;	153	struct vm_gk20a *vm);
43	};	154	void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
		155	struct vm_gk20a *vm);
		156	struct gk20a_semaphore_sea gk20a_semaphore_get_sea(struct gk20a g);
		157
		158	/*
		159	* Semaphore pool functions.
		160	*/
		161	struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
		162	struct gk20a_semaphore_sea *sea);
		163	int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
		164	struct vm_gk20a *vm);
		165	void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
		166	struct vm_gk20a *vm);
		167	u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
		168	void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
		169	void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
		170
		171	/*
		172	* Semaphore functions.
		173	*/
		174	struct gk20a_semaphore gk20a_semaphore_alloc(struct channel_gk20a ch);
		175	void gk20a_semaphore_put(struct gk20a_semaphore *s);
		176	void gk20a_semaphore_get(struct gk20a_semaphore *s);
		177
		178	/*
		179	* Return the address of a specific semaphore.
		180	*
		181	* Don't call this on a semaphore you don't own - the VA returned will make no
		182	* sense in your specific channel's VM.
		183	*/
		184	static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
		185	{
		186	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
		187	s->hw_sema->offset;
		188	}
		189
		190	/*
		191	* Get the global RO address for the semaphore. Can be called on any semaphore
		192	* regardless of whether you own it.
		193	*/
		194	static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
		195	{
		196	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
		197	s->hw_sema->offset;
		198	}
		199
		200	static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
		201	{
		202	return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
		203	hw_sema->offset;
		204	}
		205
		206	/*
		207	* TODO: handle wrap around... Hmm, how to do this?
		208	*/
		209	static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
		210	{
		211	u32 sema_val = readl(s->hw_sema->value);
44		212
45	/* A semaphore that lives inside a semaphore pool. */
46	struct gk20a_semaphore {
47	struct gk20a_semaphore_pool *pool;
48	/*	213	/*
49	* value exists within the pool's memory at the specified offset.	214	* If the underlying semaphore value is greater than or equal to
50	* 0=acquired, 1=released.	215	* the value of the semaphore then the semaphore has been signaled
		216	* (a.k.a. released).
51	*/	217	*/
52	u32 offset; /* byte offset within pool */	218	return sema_val >= atomic_read(&s->value);
53	struct kref ref;	219	}
54	};
55		220
56	/* Create a semaphore pool that can hold at most 'capacity' semaphores. */	221	static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
57	struct gk20a_semaphore_pool *
58	gk20a_semaphore_pool_alloc(struct gk20a , const char unique_name,
59	size_t capacity);
60	void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *);
61	int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *,
62	struct vm_gk20a *,
63	enum gk20a_mem_rw_flag);
64	void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *,
65	struct vm_gk20a *);
66	u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *,
67	struct vm_gk20a *);
68
69	/* Allocate a semaphore from the semaphore pool. The newly allocated
70	* semaphore will be in acquired state (value=0). */
71	struct gk20a_semaphore *
72	gk20a_semaphore_alloc(struct gk20a_semaphore_pool *);
73	void gk20a_semaphore_put(struct gk20a_semaphore *);
74	void gk20a_semaphore_get(struct gk20a_semaphore *);
75
76	static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s,
77	struct vm_gk20a *vm)
78	{	222	{
79	return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset;	223	return !gk20a_semaphore_is_released(s);
80	}	224	}
81		225
82	static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)	226	/*
		227	* Read the underlying value from a semaphore.
		228	*/
		229	static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
83	{	230	{
84	u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset);	231	return readl(s->hw_sema->value);
		232	}
85		233
86	/* When often block on value reaching a certain threshold. We must make	234	static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
87	* sure that if we get unblocked, we haven't read anything too early. */	235	{
88	smp_rmb();	236	return atomic_read(&s->value);
89	return v == 0;
90	}	237	}
91		238
		239	static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
		240	{
		241	return atomic_read(&s->hw_sema->next_value);
		242	}
		243
		244	/*
		245	* Note - if you call this then any prior semaphores will also be released.
		246	*/
92	static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)	247	static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
93	{	248	{
94	smp_wmb();	249	u32 current_val;
95	gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1);	250	u32 val = gk20a_semaphore_get_value(s);
		251	int attempts = 0;
		252
		253	/*
		254	* Wait until the sema value is 1 less than the write value. That
		255	* way this function is essentially an increment.
		256	*
		257	* TODO: tune the wait a little better.
		258	*/
		259	while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
		260	msleep(100);
		261	attempts += 1;
		262	if (attempts > 100) {
		263	WARN(1, "Stall on sema release!");
		264	return;
		265	}
		266	}
		267
		268	/*
		269	* If the semaphore has already passed the value we would write then
		270	* this is really just a NO-OP.
		271	*/
		272	if (current_val >= val)
		273	return;
		274
		275	writel(val, s->hw_sema->value);
		276	}
		277
		278	/*
		279	* Configure a software based increment on this semaphore. This is useful for
		280	* when we want the GPU to wait on a SW event before processing a channel.
		281	* Another way to describe this is when the GPU needs to wait on a SW pre-fence.
		282	* The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
		283	* then allows the GPU to continue.
		284	*
		285	* Also used to prep a semaphore for an INCR by the GPU.
		286	*/
		287	static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
		288	{
		289	BUG_ON(s->incremented);
		290
		291	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
		292	s->incremented = 1;
96	}	293	}
97	#endif	294	#endif