From aa36d3786aeed6755b9744fed37aad000b582322 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Thu, 12 Jan 2017 18:50:34 -0800
Subject: gpu: nvgpu: Organize semaphore_gk20a.[ch]

Move semaphore_gk20a.c drivers/gpu/nvgpu/common/ since the semaphore
code is common to all chips.

Move the semaphore_gk20a.h header file to drivers/gpu/nvgpu/include/nvgpu
and rename it to semaphore.h. Also update all places where the header
is inluced to use the new path.

This revealed an odd location for the enum gk20a_mem_rw_flag. This should
be in the mm headers. As a result many places that did not need anything
semaphore related had to include the semaphore header file. Fixing this
oddity allowed the semaphore include to be removed from many C files that
did not need it.

Bug 1799159

Change-Id: Ie017219acf34c4c481747323b9f3ac33e76e064c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1284627
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 312 ++++++++++++++++++++++++++++
 1 file changed, 312 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/semaphore.h

(limited to 'drivers/gpu/nvgpu/include')

diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
new file mode 100644
index 00000000..07a27584
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef SEMAPHORE_GK20A_H
+#define SEMAPHORE_GK20A_H
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+
+#include <nvgpu/allocator.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "gk20a/channel_gk20a.h"
+
+#define gpu_sema_dbg(fmt, args...)		\
+	gk20a_dbg(gpu_dbg_sema, fmt, ##args)
+#define gpu_sema_verbose_dbg(fmt, args...)	\
+	gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
+
+/*
+ * Max number of channels that can be used is 512. This of course needs to be
+ * fixed to be dynamic but still fast.
+ */
+#define SEMAPHORE_POOL_COUNT		512
+#define SEMAPHORE_SIZE			16
+#define SEMAPHORE_SEA_GROWTH_RATE	32
+
+struct gk20a_semaphore_sea;
+
+/*
+ * Underlying semaphore data structure. This semaphore can be shared amongst
+ * other semaphore instances.
+ */
+struct gk20a_semaphore_int {
+	int idx;			/* Semaphore index. */
+	u32 offset;			/* Offset into the pool. */
+	atomic_t next_value;		/* Next available value. */
+	u32 *value;			/* Current value (access w/ readl()). */
+	u32 nr_incrs;			/* Number of increments programmed. */
+	struct gk20a_semaphore_pool *p;	/* Pool that owns this sema. */
+	struct channel_gk20a *ch;	/* Channel that owns this sema. */
+	struct list_head hw_sema_list;	/* List of HW semaphores. */
+};
+
+/*
+ * A semaphore which the rest of the driver actually uses. This consists of a
+ * pointer to a real semaphore and a value to wait for. This allows one physical
+ * semaphore to be shared among an essentially infinite number of submits.
+ */
+struct gk20a_semaphore {
+	struct gk20a_semaphore_int *hw_sema;
+
+	atomic_t value;
+	int incremented;
+
+	struct kref ref;
+};
+
+/*
+ * A semaphore pool. Each address space will own exactly one of these.
+ */
+struct gk20a_semaphore_pool {
+	struct page *page;			/* This pool's page of memory */
+	struct list_head pool_list_entry;	/* Node for list of pools. */
+	void *cpu_va;				/* CPU access to the pool. */
+	u64 gpu_va;				/* GPU access to the pool. */
+	u64 gpu_va_ro;				/* GPU access to the pool. */
+	int page_idx;				/* Index into sea bitmap. */
+
+	struct list_head hw_semas;		/* List of HW semas. */
+	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
+
+	struct gk20a_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
+
+	struct mutex pool_lock;
+
+	/*
+	 * This is the address spaces's personal RW table. Other channels will
+	 * ultimately map this page as RO.
+	 */
+	struct sg_table *rw_sg_table;
+
+	/*
+	 * This is to keep track of whether the pool has had its sg_table
+	 * updated during sea resizing.
+	 */
+	struct sg_table *ro_sg_table;
+
+	int mapped;
+
+	/*
+	 * Sometimes a channel can be released before other channels are
+	 * done waiting on it. This ref count ensures that the pool doesn't
+	 * go away until all semaphores using this pool are cleaned up first.
+	 */
+	struct kref ref;
+};
+
+/*
+ * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
+ * channels can share a VM each channel gets it's own HW semaphore from the
+ * pool. Channels then allocate regular semaphores - basically just a value that
+ * signifies when a particular job is done.
+ */
+struct gk20a_semaphore_sea {
+	struct list_head pool_list;	/* List of pools in this sea. */
+	struct gk20a *gk20a;
+
+	size_t size;			/* Number of pages available. */
+	u64 gpu_va;			/* GPU virtual address of sema sea. */
+	u64 map_size;			/* Size of the mapping. */
+
+	/*
+	 * TODO:
+	 * List of pages that we use to back the pools. The number of pages
+	 * can grow dynamically since allocating 512 pages for all channels at
+	 * once would be a tremendous waste.
+	 */
+	int page_count;			/* Pages allocated to pools. */
+
+	struct sg_table *ro_sg_table;
+	/*
+	struct page *pages[SEMAPHORE_POOL_COUNT];
+	*/
+
+	struct mem_desc sea_mem;
+
+	/*
+	 * Can't use a regular allocator here since the full range of pools are
+	 * not always allocated. Instead just use a bitmap.
+	 */
+	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
+
+	struct mutex sea_lock;		/* Lock alloc/free calls. */
+};
+
+/*
+ * Semaphore sea functions.
+ */
+struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a);
+int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea,
+			    struct vm_gk20a *vm);
+void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea,
+			       struct vm_gk20a *vm);
+struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g);
+
+/*
+ * Semaphore pool functions.
+ */
+struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
+	struct gk20a_semaphore_sea *sea);
+int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool,
+			     struct vm_gk20a *vm);
+void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool,
+				struct vm_gk20a *vm);
+u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global);
+void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p);
+void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p);
+
+/*
+ * Semaphore functions.
+ */
+struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch);
+void gk20a_semaphore_put(struct gk20a_semaphore *s);
+void gk20a_semaphore_get(struct gk20a_semaphore *s);
+void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch);
+
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s)
+{
+	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) +
+		s->hw_sema->offset;
+}
+
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s)
+{
+	return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) +
+		s->hw_sema->offset;
+}
+
+static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema)
+{
+	return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) +
+		hw_sema->offset;
+}
+
+/*
+ * TODO: handle wrap around... Hmm, how to do this?
+ */
+static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s)
+{
+	u32 sema_val = readl(s->hw_sema->value);
+
+	/*
+	 * If the underlying semaphore value is greater than or equal to
+	 * the value of the semaphore then the semaphore has been signaled
+	 * (a.k.a. released).
+	 */
+	return (int)sema_val >= atomic_read(&s->value);
+}
+
+static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
+{
+	return !gk20a_semaphore_is_released(s);
+}
+
+/*
+ * Read the underlying value from a semaphore.
+ */
+static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s)
+{
+	return readl(s->hw_sema->value);
+}
+
+static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s)
+{
+	return (u32)atomic_read(&s->value);
+}
+
+static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s)
+{
+	return (u32)atomic_read(&s->hw_sema->next_value);
+}
+
+/*
+ * If @force is set then this will not wait for the underlying semaphore to
+ * catch up to the passed semaphore.
+ */
+static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s,
+					     bool force)
+{
+	u32 current_val;
+	u32 val = gk20a_semaphore_get_value(s);
+	int attempts = 0;
+
+	/*
+	 * Wait until the sema value is 1 less than the write value. That
+	 * way this function is essentially an increment.
+	 *
+	 * TODO: tune the wait a little better.
+	 */
+	while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) {
+		if (force)
+			break;
+		msleep(100);
+		attempts += 1;
+		if (attempts > 100) {
+			WARN(1, "Stall on sema release!");
+			return;
+		}
+	}
+
+	/*
+	 * If the semaphore has already passed the value we would write then
+	 * this is really just a NO-OP.
+	 */
+	if (current_val >= val)
+		return;
+
+	writel(val, s->hw_sema->value);
+
+	gpu_sema_verbose_dbg("(c=%d) WRITE %u",
+			     s->hw_sema->ch->hw_chid, val);
+}
+
+static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
+{
+	__gk20a_semaphore_release(s, false);
+}
+
+/*
+ * Configure a software based increment on this semaphore. This is useful for
+ * when we want the GPU to wait on a SW event before processing a channel.
+ * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
+ * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which
+ * then allows the GPU to continue.
+ *
+ * Also used to prep a semaphore for an INCR by the GPU.
+ */
+static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
+{
+	BUG_ON(s->incremented);
+
+	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
+	s->incremented = 1;
+
+	gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
+			     s->hw_sema->ch->hw_chid,
+			     gk20a_semaphore_next_value(s));
+}
+#endif
-- 
cgit v1.2.2