summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2018-04-23 07:56:51 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-10 11:40:11 -0400
commit70e69e2686527990865b221a60e0ec1e9a53d316 (patch)
tree623d0c2612162227370f7b6eb5388fc5b0e11326
parent4dfd6e43cfe303c8b23421ef32738db2ee52e291 (diff)
gpu: nvgpu: adapt gk20a_channel_syncpt to use os_fence
This patch adapts gk20a_channel_syncpt to use os_fence for post fence as well as pre-fence(wait) use cases. Jira NVGPU-66 Change-Id: I49627d1f88d52a53511a02f5de60fed6df8350de Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1676631 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/Makefile4
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c121
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c148
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h6
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/os_fence.h19
7 files changed, 213 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 9a46f12c..5af7de35 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -164,6 +164,10 @@ nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o \
164 common/linux/os_fence_android.o \ 164 common/linux/os_fence_android.o \
165 common/linux/os_fence_android_sema.o 165 common/linux/os_fence_android_sema.o
166 166
167ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y)
168nvgpu-$(CONFIG_SYNC) += common/linux/os_fence_android_syncpt.o
169endif
170
167nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \ 171nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \
168 common/linux/pci_usermode.o \ 172 common/linux/pci_usermode.o \
169 173
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android.c b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
index d689a2a8..d8f70639 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
@@ -55,15 +55,15 @@ void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
55int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, 55int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
56 struct channel_gk20a *c, int fd) 56 struct channel_gk20a *c, int fd)
57{ 57{
58 int err; 58 int err = -ENOSYS;
59 59
60 err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); 60#ifdef CONFIG_TEGRA_GK20A_NVHOST
61 err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
62#endif
63
64 if (err)
65 err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
61 66
62 /* TO-DO
63 * check if fence is empty and if CONFIG_TEGRA_GK20A_NVHOST
64 * is enabled, try to get a sync_fence using
65 * corresponding nvhost method.
66 */
67 if (err) 67 if (err)
68 nvgpu_err(c->g, "error obtaining fence from fd %d", fd); 68 nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
69 69
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
new file mode 100644
index 00000000..76def831
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
@@ -0,0 +1,121 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/errno.h>
18
19#include <nvgpu/types.h>
20#include <nvgpu/os_fence.h>
21#include <nvgpu/linux/os_fence_android.h>
22#include <nvgpu/nvhost.h>
23#include <nvgpu/atomic.h>
24
25#include "gk20a/gk20a.h"
26#include "gk20a/channel_gk20a.h"
27#include "gk20a/sync_gk20a.h"
28#include "gk20a/channel_sync_gk20a.h"
29#include "gk20a/mm_gk20a.h"
30
31#include "../drivers/staging/android/sync.h"
32
33int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
34 struct priv_cmd_entry *wait_cmd,
35 struct channel_gk20a *c,
36 int max_wait_cmds)
37{
38 int err;
39 int wait_cmd_size;
40 int num_wait_cmds;
41 int i;
42 u32 wait_id;
43 struct sync_pt *pt;
44
45 struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
46
47 if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
48 return -EINVAL;
49
50 /* validate syncpt ids */
51 for (i = 0; i < sync_fence->num_fences; i++) {
52 pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
53 wait_id = nvgpu_nvhost_sync_pt_id(pt);
54 if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
55 c->g->nvhost_dev, wait_id)) {
56 return -EINVAL;
57 }
58 }
59
60 num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
61 if (num_wait_cmds == 0)
62 return 0;
63
64 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
65 err = gk20a_channel_alloc_priv_cmdbuf(c,
66 wait_cmd_size * num_wait_cmds, wait_cmd);
67 if (err) {
68 nvgpu_err(c->g,
69 "not enough priv cmd buffer space");
70 return err;
71 }
72
73 for (i = 0; i < sync_fence->num_fences; i++) {
74 struct fence *f = sync_fence->cbs[i].sync_pt;
75 struct sync_pt *pt = sync_pt_from_fence(f);
76 u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
77 u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
78
79 err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
80 wait_cmd, wait_cmd_size, i, true);
81 }
82
83 WARN_ON(i != num_wait_cmds);
84
85 return 0;
86}
87
88static const struct nvgpu_os_fence_ops syncpt_ops = {
89 .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
90 .drop_ref = nvgpu_os_fence_android_drop_ref,
91};
92
93int nvgpu_os_fence_syncpt_create(
94 struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
95 struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
96{
97 struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
98 nvhost_dev, id, thresh, "fence");
99
100 if (!fence) {
101 nvgpu_err(c->g, "error constructing fence %s", "fence");
102 return -ENOMEM;
103 }
104
105 nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
106
107 return 0;
108}
109
110int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
111 struct channel_gk20a *c, int fd)
112{
113 struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
114
115 if (!fence)
116 return -ENOMEM;
117
118 nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
119
120 return 0;
121} \ No newline at end of file
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index a0b0ac1e..16898593 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -51,6 +51,39 @@ struct gk20a_channel_syncpt {
51 struct nvgpu_mem syncpt_buf; 51 struct nvgpu_mem syncpt_buf;
52}; 52};
53 53
54int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c,
55 u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
56 u32 wait_cmd_size, int pos, bool preallocated)
57{
58 int err = 0;
59 bool is_expired = nvgpu_nvhost_syncpt_is_expired_ext(
60 c->g->nvhost_dev, id, thresh);
61
62 if (is_expired) {
63 if (preallocated) {
64 nvgpu_memset(c->g, wait_cmd->mem,
65 (wait_cmd->off + pos * wait_cmd_size) * sizeof(u32),
66 0, wait_cmd_size * sizeof(u32));
67 }
68 } else {
69 if (!preallocated) {
70 err = gk20a_channel_alloc_priv_cmdbuf(c,
71 c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
72 if (err) {
73 nvgpu_err(c->g, "not enough priv cmd buffer space");
74 return err;
75 }
76 }
77 nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
78 id, c->vm->syncpt_ro_map_gpu_va);
79 c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
80 pos * wait_cmd_size, id, thresh,
81 c->vm->syncpt_ro_map_gpu_va);
82 }
83
84 return 0;
85}
86
54static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, 87static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
55 u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd) 88 u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd)
56{ 89{
@@ -58,108 +91,36 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
58 container_of(s, struct gk20a_channel_syncpt, ops); 91 container_of(s, struct gk20a_channel_syncpt, ops);
59 struct channel_gk20a *c = sp->c; 92 struct channel_gk20a *c = sp->c;
60 int err = 0; 93 int err = 0;
94 u32 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
61 95
62 if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id)) 96 if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id))
63 return -EINVAL; 97 return -EINVAL;
64 98
65 if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh)) 99 err = gk20a_channel_gen_syncpt_wait_cmd(c, id, thresh,
66 return 0; 100 wait_cmd, wait_cmd_size, 0, false);
67 101
68 err = gk20a_channel_alloc_priv_cmdbuf(c, 102 return err;
69 c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
70 if (err) {
71 nvgpu_err(c->g,
72 "not enough priv cmd buffer space");
73 return err;
74 }
75
76 nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
77 id, sp->c->vm->syncpt_ro_map_gpu_va);
78 c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id,
79 thresh, c->vm->syncpt_ro_map_gpu_va);
80
81 return 0;
82} 103}
83 104
84static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, 105static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
85 struct priv_cmd_entry *wait_cmd, int max_wait_cmds) 106 struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
86{ 107{
87#ifdef CONFIG_SYNC 108 struct nvgpu_os_fence os_fence = {0};
88 int i;
89 int num_wait_cmds;
90 struct sync_fence *sync_fence;
91 struct sync_pt *pt;
92 struct gk20a_channel_syncpt *sp = 109 struct gk20a_channel_syncpt *sp =
93 container_of(s, struct gk20a_channel_syncpt, ops); 110 container_of(s, struct gk20a_channel_syncpt, ops);
94 struct channel_gk20a *c = sp->c; 111 struct channel_gk20a *c = sp->c;
95 u32 wait_id;
96 int err = 0; 112 int err = 0;
97 u32 wait_cmd_size = 0;
98
99 sync_fence = nvgpu_nvhost_sync_fdget(fd);
100 if (!sync_fence)
101 return -EINVAL;
102 113
103 if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) { 114 err = nvgpu_os_fence_fdget(&os_fence, c, fd);
104 sync_fence_put(sync_fence); 115 if (err)
105 return -EINVAL; 116 return -EINVAL;
106 }
107
108 /* validate syncpt ids */
109 for (i = 0; i < sync_fence->num_fences; i++) {
110 pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
111 wait_id = nvgpu_nvhost_sync_pt_id(pt);
112 if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
113 sp->nvhost_dev, wait_id)) {
114 sync_fence_put(sync_fence);
115 return -EINVAL;
116 }
117 }
118 117
119 num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); 118 err = os_fence.ops->program_waits(&os_fence,
120 if (num_wait_cmds == 0) { 119 wait_cmd, c, max_wait_cmds);
121 sync_fence_put(sync_fence);
122 return 0;
123 }
124 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
125 err = gk20a_channel_alloc_priv_cmdbuf(c,
126 wait_cmd_size * num_wait_cmds,
127 wait_cmd);
128 if (err) {
129 nvgpu_err(c->g,
130 "not enough priv cmd buffer space");
131 sync_fence_put(sync_fence);
132 return err;
133 }
134
135 i = 0;
136 for (i = 0; i < sync_fence->num_fences; i++) {
137 struct fence *f = sync_fence->cbs[i].sync_pt;
138 struct sync_pt *pt = sync_pt_from_fence(f);
139 u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
140 u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
141
142 if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev,
143 wait_id, wait_value)) {
144 nvgpu_memset(c->g, wait_cmd->mem,
145 (wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
146 0, wait_cmd_size * sizeof(u32));
147 } else {
148 nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
149 wait_id, sp->syncpt_buf.gpu_va);
150 c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
151 i * wait_cmd_size, wait_id, wait_value,
152 c->vm->syncpt_ro_map_gpu_va);
153 }
154 }
155 120
156 WARN_ON(i != num_wait_cmds); 121 os_fence.ops->drop_ref(&os_fence);
157 sync_fence_put(sync_fence);
158 122
159 return 0; 123 return err;
160#else
161 return -ENODEV;
162#endif
163} 124}
164 125
165static void gk20a_channel_syncpt_update(void *priv, int nr_completed) 126static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
@@ -185,6 +146,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
185 container_of(s, struct gk20a_channel_syncpt, ops); 146 container_of(s, struct gk20a_channel_syncpt, ops);
186 struct channel_gk20a *c = sp->c; 147 struct channel_gk20a *c = sp->c;
187 struct sync_fence *sync_fence = NULL; 148 struct sync_fence *sync_fence = NULL;
149 struct nvgpu_os_fence os_fence = {0};
188 150
189 err = gk20a_channel_alloc_priv_cmdbuf(c, 151 err = gk20a_channel_alloc_priv_cmdbuf(c,
190 c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd), 152 c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd),
@@ -226,26 +188,22 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
226 } 188 }
227 } 189 }
228 190
229#ifdef CONFIG_SYNC
230 if (need_sync_fence) { 191 if (need_sync_fence) {
231 sync_fence = nvgpu_nvhost_sync_create_fence(sp->nvhost_dev, 192 err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost_dev,
232 sp->id, thresh, "fence"); 193 sp->id, thresh);
233 194
234 if (IS_ERR(sync_fence)) { 195 if (err)
235 err = PTR_ERR(sync_fence);
236 goto clean_up_priv_cmd; 196 goto clean_up_priv_cmd;
237 } 197
198 sync_fence = (struct sync_fence *)os_fence.priv;
238 } 199 }
239#endif
240 200
241 err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, 201 err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev,
242 sp->id, thresh, sync_fence); 202 sp->id, thresh, sync_fence);
243 203
244 if (err) { 204 if (err) {
245#ifdef CONFIG_SYNC 205 if (nvgpu_os_fence_is_initialized(&os_fence))
246 if (sync_fence) 206 os_fence.ops->drop_ref(&os_fence);
247 sync_fence_put(sync_fence);
248#endif
249 goto clean_up_priv_cmd; 207 goto clean_up_priv_cmd;
250 } 208 }
251 209
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 565b4f86..5c6e91c9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -108,6 +108,10 @@ void gk20a_channel_gen_sema_wait_cmd(struct channel_gk20a *c,
108 struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd, 108 struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
109 u32 wait_cmd_size, int pos); 109 u32 wait_cmd_size, int pos);
110 110
111int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c,
112 u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
113 u32 wait_cmd_size, int pos, bool preallocated);
114
111void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync, 115void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync,
112 bool set_safe_state); 116 bool set_safe_state);
113struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c, 117struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
index 79cc51ea..39d08339 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
@@ -39,4 +39,8 @@ void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
39 struct gk20a *g, const struct nvgpu_os_fence_ops *fops, 39 struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
40 struct sync_fence *fence); 40 struct sync_fence *fence);
41 41
42#endif \ No newline at end of file 42int nvgpu_os_fence_syncpt_fdget(
43 struct nvgpu_os_fence *fence_out,
44 struct channel_gk20a *c, int fd);
45
46#endif /* __NVGPU_OS_FENCE_ANDROID_H__ */ \ No newline at end of file
diff --git a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
index c8d24fc2..a22140da 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
@@ -28,6 +28,7 @@
28struct nvgpu_semaphore; 28struct nvgpu_semaphore;
29struct channel_gk20a; 29struct channel_gk20a;
30struct priv_cmd_entry; 30struct priv_cmd_entry;
31struct nvgpu_nvhost_dev;
31 32
32/* 33/*
33 * struct nvgpu_os_fence adds an abstraction to the earlier Android Sync 34 * struct nvgpu_os_fence adds an abstraction to the earlier Android Sync
@@ -108,4 +109,22 @@ static inline int nvgpu_os_fence_fdget(
108 109
109#endif /* CONFIG_SYNC */ 110#endif /* CONFIG_SYNC */
110 111
112#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_SYNC)
113
114int nvgpu_os_fence_syncpt_create(struct nvgpu_os_fence *fence_out,
115 struct channel_gk20a *c, struct nvgpu_nvhost_dev *nvhost_dev,
116 u32 id, u32 thresh);
117
118#else
119
120static inline int nvgpu_os_fence_syncpt_create(
121 struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
122 struct nvgpu_nvhost_dev *nvhost_dev,
123 u32 id, u32 thresh)
124{
125 return -ENOSYS;
126}
127
128#endif /* CONFIG_TEGRA_GK20A_NVHOST && CONFIG_SYNC */
129
111#endif /* __NVGPU_OS_FENCE__ */ 130#endif /* __NVGPU_OS_FENCE__ */