diff options
author | Sachit Kadle <skadle@nvidia.com> | 2016-08-22 21:06:30 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-20 11:14:04 -0400 |
commit | 63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch) | |
tree | b91247eebf886f4e987d38eb4069aceace284ecf /drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |
parent | 3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff) |
gpu: nvgpu: use inplace allocation in sync framework
This change is the first of a series of changes to
support the usage of pre-allocated job tracking resources
in the submit path. With this change, we still maintain a
dynamically-allocated joblist, but make the necessary changes
in the channel_sync & fence framework to use in-place
allocations. Specifically, we:
1) Update channel sync framework routines to take in
pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather
than dynamically allocating themselves
2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s)
to gk20a_submit_prepare_syncs
3) Modify fence framework to have seperate allocation
and init APIs. We expose allocation as a seperate API, so
the client can allocate the object before passing it into
the channel sync framework.
4) Fix clean_up logic in channel sync framework
Bug 1795076
Change-Id: I96db457683cd207fd029c31c45f548f98055e844
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1206725
(cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624)
Reviewed-on: http://git-master/r/1223933
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fence_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 88 |
1 files changed, 61 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 596dc549..f788829f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f) | |||
63 | return f; | 63 | return f; |
64 | } | 64 | } |
65 | 65 | ||
66 | static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) | ||
67 | { | ||
68 | bool valid = f->valid; | ||
69 | |||
70 | rmb(); | ||
71 | return valid; | ||
72 | } | ||
73 | |||
66 | int gk20a_fence_wait(struct gk20a_fence *f, int timeout) | 74 | int gk20a_fence_wait(struct gk20a_fence *f, int timeout) |
67 | { | 75 | { |
68 | if (!tegra_platform_is_silicon()) | 76 | if (f && gk20a_fence_is_valid(f)) { |
69 | timeout = (u32)MAX_SCHEDULE_TIMEOUT; | 77 | if (!tegra_platform_is_silicon()) |
70 | return f->ops->wait(f, timeout); | 78 | timeout = (u32)MAX_SCHEDULE_TIMEOUT; |
79 | return f->ops->wait(f, timeout); | ||
80 | } | ||
81 | return 0; | ||
71 | } | 82 | } |
72 | 83 | ||
73 | bool gk20a_fence_is_expired(struct gk20a_fence *f) | 84 | bool gk20a_fence_is_expired(struct gk20a_fence *f) |
74 | { | 85 | { |
75 | if (f && f->ops) | 86 | if (f && gk20a_fence_is_valid(f) && f->ops) |
76 | return f->ops->is_expired(f); | 87 | return f->ops->is_expired(f); |
77 | else | 88 | else |
78 | return true; | 89 | return true; |
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) | |||
83 | #ifdef CONFIG_SYNC | 94 | #ifdef CONFIG_SYNC |
84 | int fd; | 95 | int fd; |
85 | 96 | ||
86 | if (!f->sync_fence) | 97 | if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence) |
87 | return -EINVAL; | 98 | return -EINVAL; |
88 | 99 | ||
89 | fd = get_unused_fd_flags(O_RDWR); | 100 | fd = get_unused_fd_flags(O_RDWR); |
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) | |||
98 | #endif | 109 | #endif |
99 | } | 110 | } |
100 | 111 | ||
101 | struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, | 112 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) |
102 | struct sync_fence *sync_fence, bool wfi) | ||
103 | { | 113 | { |
104 | struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL); | 114 | struct gk20a_fence *fence; |
105 | if (!f) | 115 | |
116 | fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL); | ||
117 | if (!fence) | ||
106 | return NULL; | 118 | return NULL; |
107 | kref_init(&f->ref); | 119 | |
120 | kref_init(&fence->ref); | ||
121 | return fence; | ||
122 | } | ||
123 | |||
124 | void gk20a_init_fence(struct gk20a_fence *f, | ||
125 | const struct gk20a_fence_ops *ops, | ||
126 | struct sync_fence *sync_fence, bool wfi) | ||
127 | { | ||
128 | if (!f) | ||
129 | return; | ||
108 | f->ops = ops; | 130 | f->ops = ops; |
109 | f->sync_fence = sync_fence; | 131 | f->sync_fence = sync_fence; |
110 | f->wfi = wfi; | 132 | f->wfi = wfi; |
111 | f->syncpt_id = -1; | 133 | f->syncpt_id = -1; |
112 | return f; | ||
113 | } | 134 | } |
114 | 135 | ||
115 | /* Fences that are backed by GPU semaphores: */ | 136 | /* Fences that are backed by GPU semaphores: */ |
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = { | |||
143 | }; | 164 | }; |
144 | 165 | ||
145 | /* This function takes ownership of the semaphore */ | 166 | /* This function takes ownership of the semaphore */ |
146 | struct gk20a_fence *gk20a_fence_from_semaphore( | 167 | int gk20a_fence_from_semaphore( |
168 | struct gk20a_fence *fence_out, | ||
147 | struct sync_timeline *timeline, | 169 | struct sync_timeline *timeline, |
148 | struct gk20a_semaphore *semaphore, | 170 | struct gk20a_semaphore *semaphore, |
149 | wait_queue_head_t *semaphore_wq, | 171 | wait_queue_head_t *semaphore_wq, |
150 | struct sync_fence *dependency, | 172 | struct sync_fence *dependency, |
151 | bool wfi, bool need_sync_fence) | 173 | bool wfi, bool need_sync_fence) |
152 | { | 174 | { |
153 | struct gk20a_fence *f; | 175 | struct gk20a_fence *f = fence_out; |
154 | struct sync_fence *sync_fence = NULL; | 176 | struct sync_fence *sync_fence = NULL; |
155 | 177 | ||
156 | #ifdef CONFIG_SYNC | 178 | #ifdef CONFIG_SYNC |
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore( | |||
159 | dependency, "f-gk20a-0x%04x", | 181 | dependency, "f-gk20a-0x%04x", |
160 | gk20a_semaphore_gpu_ro_va(semaphore)); | 182 | gk20a_semaphore_gpu_ro_va(semaphore)); |
161 | if (!sync_fence) | 183 | if (!sync_fence) |
162 | return NULL; | 184 | return -1; |
163 | } | 185 | } |
164 | #endif | 186 | #endif |
165 | 187 | ||
166 | f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi); | 188 | gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi); |
167 | if (!f) { | 189 | if (!f) { |
168 | #ifdef CONFIG_SYNC | 190 | #ifdef CONFIG_SYNC |
169 | sync_fence_put(sync_fence); | 191 | sync_fence_put(sync_fence); |
170 | #endif | 192 | #endif |
171 | return NULL; | 193 | return -EINVAL; |
172 | } | 194 | } |
173 | 195 | ||
174 | f->semaphore = semaphore; | 196 | f->semaphore = semaphore; |
175 | f->semaphore_wq = semaphore_wq; | 197 | f->semaphore_wq = semaphore_wq; |
176 | return f; | 198 | |
199 | /* commit previous writes before setting the valid flag */ | ||
200 | wmb(); | ||
201 | f->valid = true; | ||
202 | |||
203 | return 0; | ||
177 | } | 204 | } |
178 | 205 | ||
179 | #ifdef CONFIG_TEGRA_GK20A | 206 | #ifdef CONFIG_TEGRA_GK20A |
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { | |||
197 | .is_expired = &gk20a_syncpt_fence_is_expired, | 224 | .is_expired = &gk20a_syncpt_fence_is_expired, |
198 | }; | 225 | }; |
199 | 226 | ||
200 | struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | 227 | int gk20a_fence_from_syncpt( |
201 | u32 id, u32 value, bool wfi, | 228 | struct gk20a_fence *fence_out, |
202 | bool need_sync_fence) | 229 | struct platform_device *host1x_pdev, |
230 | u32 id, u32 value, bool wfi, | ||
231 | bool need_sync_fence) | ||
203 | { | 232 | { |
204 | struct gk20a_fence *f; | 233 | struct gk20a_fence *f = fence_out; |
205 | struct sync_fence *sync_fence = NULL; | 234 | struct sync_fence *sync_fence = NULL; |
206 | 235 | ||
207 | #ifdef CONFIG_SYNC | 236 | #ifdef CONFIG_SYNC |
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | |||
214 | sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, | 243 | sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, |
215 | "fence"); | 244 | "fence"); |
216 | if (IS_ERR(sync_fence)) | 245 | if (IS_ERR(sync_fence)) |
217 | return NULL; | 246 | return -1; |
218 | } | 247 | } |
219 | #endif | 248 | #endif |
220 | 249 | ||
221 | f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); | 250 | gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi); |
222 | if (!f) { | 251 | if (!f) { |
223 | #ifdef CONFIG_SYNC | 252 | #ifdef CONFIG_SYNC |
224 | if (sync_fence) | 253 | if (sync_fence) |
225 | sync_fence_put(sync_fence); | 254 | sync_fence_put(sync_fence); |
226 | #endif | 255 | #endif |
227 | return NULL; | 256 | return -EINVAL; |
228 | } | 257 | } |
229 | f->host1x_pdev = host1x_pdev; | 258 | f->host1x_pdev = host1x_pdev; |
230 | f->syncpt_id = id; | 259 | f->syncpt_id = id; |
231 | f->syncpt_value = value; | 260 | f->syncpt_value = value; |
232 | return f; | 261 | |
262 | /* commit previous writes before setting the valid flag */ | ||
263 | wmb(); | ||
264 | f->valid = true; | ||
265 | |||
266 | return 0; | ||
233 | } | 267 | } |
234 | #else | 268 | #else |
235 | struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | 269 | int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, |
236 | u32 id, u32 value, bool wfi) | 270 | u32 id, u32 value, bool wfi) |
237 | { | 271 | { |
238 | return NULL; | 272 | return -EINVAL; |
239 | } | 273 | } |
240 | #endif | 274 | #endif |