From e6b3d1e87f7f2d91c97bf260d4609f17d62ff8bc Mon Sep 17 00:00:00 2001
From: Lauri Peltonen <lpeltonen@nvidia.com>
Date: Tue, 25 Feb 2014 14:44:44 +0200
Subject: gpu: nvgpu: Add gk20a semaphore APIs

Add semaphore_gk20a.c/h that implement a new semaphore management API
for the gk20a driver. The API introduces two entities, 'semaphore pools'
and 'semaphores'.

Semaphore pools are memory areas dedicated for hosting one or more
semaphores. Typically, one pool equals one 4K page. A semaphore pool
is always mapped to the kernel memory, and it can be mapped and
unmapped to gpu address spaces using gk20a_semaphore_pool_map/unmap.

Semaphores are backed by 16 bytes of memory allocated from a semaphore
pool. The value of a semaphore can be 0=acuired or 1=released. When
allocated, the semaphores are initialized to the acquired state. They
can be released, or their releasing can be waited for by the CPU or GPU.

Semaphores are intended to be used only once, and after they are
released they should be freed so that the slot within the semaphore
pool can be reused. However GPU jobs must take references to the
semaphores that they use (similarly as they take references on memory
buffers that they use) so that the semaphore backing memory is not
reused too soon.

Bug 1450122
Bug 1445450

Change-Id: I3fd35f34ca55035decc3e06a9c0ede20c1d48db9
Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
Reviewed-on: http://git-master/r/374842
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/Makefile          |   1 +
 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 191 ++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h |  97 +++++++++++++++
 3 files changed, 289 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
 create mode 100644 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h

diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
index 81ae027e..e5eb817d 100644
--- a/drivers/gpu/nvgpu/gk20a/Makefile
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -19,6 +19,7 @@ nvgpu-y := \
 	mm_gk20a.o \
 	pmu_gk20a.o \
 	priv_ring_gk20a.o \
+	semaphore_gk20a.o \
 	clk_gk20a.o \
 	therm_gk20a.o \
 	gr_ctx_gk20a_sim.o \
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
new file mode 100644
index 00000000..55fa0e32
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -0,0 +1,191 @@
+/*
+ * drivers/video/tegra/host/gk20a/semaphore_gk20a.c
+ *
+ * GK20A Semaphores
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "semaphore_gk20a.h"
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include "gk20a.h"
+#include "mm_gk20a.h"
+
+static const int SEMAPHORE_SIZE = 16;
+
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
+		const char *unique_name, size_t capacity)
+{
+	struct gk20a_semaphore_pool *p;
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	kref_init(&p->ref);
+	INIT_LIST_HEAD(&p->maps);
+	mutex_init(&p->maps_mutex);
+	p->dev = d;
+
+	/* Alloc one 4k page of semaphore per channel. */
+	p->size = roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE);
+	p->cpu_va = dma_alloc_coherent(d, p->size, &p->iova, GFP_KERNEL);
+	if (!p->cpu_va)
+		goto clean_up;
+	if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size))
+		goto clean_up;
+
+	if (gk20a_allocator_init(&p->alloc, unique_name, 0,
+			     p->size, SEMAPHORE_SIZE))
+		goto clean_up;
+
+	gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va,
+		(u64)sg_dma_address(p->sgt->sgl), (u64)sg_phys(p->sgt->sgl));
+	return p;
+clean_up:
+	if (p->cpu_va)
+		dma_free_coherent(d, p->size, p->cpu_va, p->iova);
+	if (p->sgt)
+		gk20a_free_sgtable(&p->sgt);
+	kfree(p);
+	return NULL;
+}
+
+static void gk20a_semaphore_pool_free(struct kref *ref)
+{
+	struct gk20a_semaphore_pool *p =
+		container_of(ref, struct gk20a_semaphore_pool, ref);
+	mutex_lock(&p->maps_mutex);
+	WARN_ON(!list_empty(&p->maps));
+	mutex_unlock(&p->maps_mutex);
+	gk20a_free_sgtable(&p->sgt);
+	dma_free_coherent(p->dev, p->size, p->cpu_va, p->iova);
+	gk20a_allocator_destroy(&p->alloc);
+	kfree(p);
+}
+
+static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
+{
+	kref_get(&p->ref);
+}
+
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
+{
+	kref_put(&p->ref, gk20a_semaphore_pool_free);
+}
+
+static struct gk20a_semaphore_pool_map *
+gk20a_semaphore_pool_find_map(struct gk20a_semaphore_pool *p,
+			      struct vm_gk20a *vm)
+{
+	struct gk20a_semaphore_pool_map *map, *found = NULL;
+	mutex_lock(&p->maps_mutex);
+	list_for_each_entry(map, &p->maps, list) {
+		if (map->vm == vm) {
+			found = map;
+			break;
+		}
+	}
+	mutex_unlock(&p->maps_mutex);
+	return found;
+}
+
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
+			     struct vm_gk20a *vm,
+			     enum gk20a_mem_rw_flag rw_flag)
+{
+	struct gk20a_semaphore_pool_map *map;
+
+	WARN_ON(gk20a_semaphore_pool_find_map(p, vm));
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+	map->vm = vm;
+	map->rw_flag = rw_flag;
+	map->gpu_va = gk20a_gmmu_map(vm, &p->sgt, p->size,
+				     0/*uncached*/, rw_flag);
+	if (!map->gpu_va) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	mutex_lock(&p->maps_mutex);
+	list_add(&map->list, &p->maps);
+	mutex_unlock(&p->maps_mutex);
+	return 0;
+}
+
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
+		struct vm_gk20a *vm)
+{
+	struct gk20a_semaphore_pool_map *map =
+		gk20a_semaphore_pool_find_map(p, vm);
+	if (!map)
+		return;
+	gk20a_gmmu_unmap(vm, map->gpu_va, p->size, map->rw_flag);
+	list_del(&map->list);
+	kfree(map);
+}
+
+u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p,
+		struct vm_gk20a *vm)
+{
+	struct gk20a_semaphore_pool_map *map =
+		gk20a_semaphore_pool_find_map(p, vm);
+	if (!map)
+		return 0;
+	return map->gpu_va;
+}
+
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
+{
+	struct gk20a_semaphore *s;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+
+	if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE)) {
+		gk20a_err(pool->dev, "failed to allocate semaphore");
+		kfree(s);
+		return NULL;
+	}
+
+	gk20a_semaphore_pool_get(pool);
+	s->pool = pool;
+
+	kref_init(&s->ref);
+	s->value = (volatile u32 *)((uintptr_t)pool->cpu_va + s->offset);
+	*s->value = 0; /* Initially acquired. */
+	gk20a_dbg_info("created semaphore offset=%d, value_cpu=%p, value=%d",
+			s->offset, s->value, *s->value);
+	return s;
+}
+
+static void gk20a_semaphore_free(struct kref *ref)
+{
+	struct gk20a_semaphore *s =
+		container_of(ref, struct gk20a_semaphore, ref);
+
+	s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE);
+	gk20a_semaphore_pool_put(s->pool);
+	kfree(s);
+}
+
+void gk20a_semaphore_put(struct gk20a_semaphore *s)
+{
+	kref_put(&s->ref, gk20a_semaphore_free);
+}
+
+void gk20a_semaphore_get(struct gk20a_semaphore *s)
+{
+	kref_get(&s->ref);
+}
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
new file mode 100644
index 00000000..214db398
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -0,0 +1,97 @@
+/*
+ * drivers/video/tegra/host/gk20a/semaphore_gk20a.h
+ *
+ * GK20A Semaphores
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GK20A_SEMAPHORE_H_
+#define _GK20A_SEMAPHORE_H_
+
+#include <linux/kref.h>
+#include "gk20a_allocator.h"
+#include "mm_gk20a.h"
+
+/* A memory pool for holding semaphores. */
+struct gk20a_semaphore_pool {
+	void *cpu_va;
+	dma_addr_t iova;
+	size_t size;
+	struct device *dev;
+	struct sg_table *sgt;
+	struct list_head maps;
+	struct mutex maps_mutex;
+	struct kref ref;
+	struct gk20a_allocator alloc;
+};
+
+/* A semaphore pool can be mapped to multiple GPU address spaces. */
+struct gk20a_semaphore_pool_map {
+	u64 gpu_va;
+	enum gk20a_mem_rw_flag rw_flag;
+	struct vm_gk20a *vm;
+	struct list_head list;
+};
+
+/* A semaphore that lives inside a semaphore pool. */
+struct gk20a_semaphore {
+	struct gk20a_semaphore_pool *pool;
+	u32 offset; /* byte offset within pool */
+	struct kref ref;
+	/* value is a pointer within the pool's coherent cpu_va.
+	 * It is shared between CPU and GPU, hence volatile. */
+	volatile u32 *value; /* 0=acquired, 1=released */
+};
+
+/* Create a semaphore pool that can hold at most 'capacity' semaphores. */
+struct gk20a_semaphore_pool *
+gk20a_semaphore_pool_alloc(struct device *, const char *unique_name,
+			   size_t capacity);
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *);
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *,
+			     struct vm_gk20a *,
+			     enum gk20a_mem_rw_flag);
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *,
+				struct vm_gk20a *);
+u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *,
+				struct vm_gk20a *);
+
+/* Allocate a semaphore from the semaphore pool. The newly allocated
+ * semaphore will be in acquired state (value=0). */
+struct gk20a_semaphore *
+gk20a_semaphore_alloc(struct gk20a_semaphore_pool *);
+void gk20a_semaphore_put(struct gk20a_semaphore *);
+void gk20a_semaphore_get(struct gk20a_semaphore *);
+
+static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s,
+					 struct vm_gk20a *vm)
+{
+	return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset;
+}
+
+static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
+{
+	u32 v = *s->value;
+
+	/* When often block on value reaching a certain threshold. We must make
+	 * sure that if we get unblocked, we haven't read anything too early. */
+	smp_rmb();
+	return v == 0;
+}
+
+static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
+{
+	smp_wmb();
+	*s->value = 1;
+}
+#endif
-- 
cgit v1.2.2