diff options
author | Richard Zhao <rizhao@nvidia.com> | 2016-03-07 17:23:12 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-11 18:38:12 -0400 |
commit | 60b715e85600a6be283e54c610c2a3db3b552059 (patch) | |
tree | aaf2332b8a03c5869a1be74843eae5ea5ccb99be /drivers/gpu/nvgpu/gk20a | |
parent | 6eeabfbdd08e48f924885952c80ff41aa2b534b7 (diff) |
gpu: nvgpu: vgpu: add fecs trace support
Bug 1648908
Change-Id: I7901e7bce5f7aa124a188101dd0736241d87bd53
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1031861
Reviewed-on: http://git-master/r/1121261
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 90 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 7 |
4 files changed, 74 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 9e7c04ad..6a61d744 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -130,42 +130,55 @@ static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | |||
130 | { | 130 | { |
131 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | 131 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); |
132 | dev->write_enabled = true; | 132 | dev->write_enabled = true; |
133 | dev->g->ops.fecs_trace.enable(dev->g); | ||
133 | return 0; | 134 | return 0; |
134 | } | 135 | } |
135 | 136 | ||
136 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | 137 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) |
137 | { | 138 | { |
138 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | 139 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); |
140 | dev->g->ops.fecs_trace.disable(dev->g); | ||
139 | dev->write_enabled = false; | 141 | dev->write_enabled = false; |
140 | return 0; | 142 | return 0; |
141 | } | 143 | } |
142 | 144 | ||
143 | static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, | 145 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, |
144 | size_t size) | 146 | size_t size) |
145 | { | 147 | { |
146 | struct nvgpu_ctxsw_ring_header *hdr; | 148 | struct gk20a *g = dev->g; |
147 | 149 | void *buf; | |
148 | if (atomic_read(&dev->vma_ref)) | 150 | int err; |
149 | return -EBUSY; | ||
150 | 151 | ||
151 | if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) | 152 | if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) |
152 | return -EBUSY; | 153 | return -EBUSY; |
153 | 154 | ||
154 | size = roundup(size, PAGE_SIZE); | 155 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); |
155 | hdr = vmalloc_user(size); | 156 | if (err) |
156 | if (!hdr) | 157 | return err; |
157 | return -ENOMEM; | ||
158 | 158 | ||
159 | if (dev->hdr) | ||
160 | vfree(dev->hdr); | ||
161 | 159 | ||
162 | dev->hdr = hdr; | 160 | dev->hdr = buf; |
163 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); | 161 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); |
164 | dev->size = size; | 162 | dev->size = size; |
165 | 163 | ||
164 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
165 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
170 | void **buf, size_t *size) | ||
171 | { | ||
172 | struct nvgpu_ctxsw_ring_header *hdr; | ||
173 | |||
174 | *size = roundup(*size, PAGE_SIZE); | ||
175 | hdr = vmalloc_user(*size); | ||
176 | if (!hdr) | ||
177 | return -ENOMEM; | ||
178 | |||
166 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | 179 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; |
167 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | 180 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; |
168 | hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header)) | 181 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) |
169 | / sizeof(struct nvgpu_ctxsw_trace_entry); | 182 | / sizeof(struct nvgpu_ctxsw_trace_entry); |
170 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); | 183 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); |
171 | hdr->drop_count = 0; | 184 | hdr->drop_count = 0; |
@@ -173,8 +186,15 @@ static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, | |||
173 | hdr->write_idx = 0; | 186 | hdr->write_idx = 0; |
174 | hdr->write_seqno = 0; | 187 | hdr->write_seqno = 0; |
175 | 188 | ||
176 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | 189 | *buf = hdr; |
177 | dev->size, dev->hdr, dev->ents, hdr->num_ents); | 190 | return 0; |
191 | } | ||
192 | |||
193 | static int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
194 | { | ||
195 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
196 | |||
197 | vfree(dev->hdr); | ||
178 | return 0; | 198 | return 0; |
179 | } | 199 | } |
180 | 200 | ||
@@ -188,13 +208,17 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | |||
188 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | 208 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) |
189 | return -EINVAL; | 209 | return -EINVAL; |
190 | 210 | ||
191 | return gk20a_ctxsw_dev_ring_alloc(dev, size); | 211 | return gk20a_ctxsw_dev_alloc_buffer(dev, size); |
192 | } | 212 | } |
193 | 213 | ||
194 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | 214 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, |
195 | struct nvgpu_ctxsw_trace_filter_args *args) | 215 | struct nvgpu_ctxsw_trace_filter_args *args) |
196 | { | 216 | { |
217 | struct gk20a *g = dev->g; | ||
218 | |||
197 | dev->filter = args->filter; | 219 | dev->filter = args->filter; |
220 | if (g->ops.fecs_trace.set_filter) | ||
221 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
198 | return 0; | 222 | return 0; |
199 | } | 223 | } |
200 | 224 | ||
@@ -276,15 +300,13 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | |||
276 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | 300 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", |
277 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); | 301 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); |
278 | 302 | ||
279 | err = gk20a_ctxsw_dev_ring_alloc(dev, size); | 303 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); |
280 | if (!err) { | 304 | if (!err) { |
281 | filp->private_data = dev; | 305 | filp->private_data = dev; |
282 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | 306 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", |
283 | filp, dev, size); | 307 | filp, dev, size); |
284 | } | 308 | } |
285 | 309 | ||
286 | err = g->ops.fecs_trace.enable(g); | ||
287 | |||
288 | done: | 310 | done: |
289 | mutex_unlock(&dev->lock); | 311 | mutex_unlock(&dev->lock); |
290 | 312 | ||
@@ -297,19 +319,18 @@ idle: | |||
297 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | 319 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) |
298 | { | 320 | { |
299 | struct gk20a_ctxsw_dev *dev = filp->private_data; | 321 | struct gk20a_ctxsw_dev *dev = filp->private_data; |
300 | struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); | ||
301 | 322 | ||
302 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | 323 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); |
303 | 324 | ||
304 | mutex_lock(&dev->lock); | 325 | mutex_lock(&dev->lock); |
305 | dev->write_enabled = false; | 326 | if (dev->write_enabled) |
327 | gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
328 | |||
306 | if (dev->hdr) { | 329 | if (dev->hdr) { |
307 | vfree(dev->hdr); | 330 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); |
308 | dev->hdr = NULL; | 331 | dev->hdr = NULL; |
309 | } | 332 | } |
310 | 333 | ||
311 | g->ops.fecs_trace.disable(g); | ||
312 | |||
313 | mutex_unlock(&dev->lock); | 334 | mutex_unlock(&dev->lock); |
314 | 335 | ||
315 | return 0; | 336 | return 0; |
@@ -417,6 +438,12 @@ static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | |||
417 | .close = gk20a_ctxsw_dev_vma_close, | 438 | .close = gk20a_ctxsw_dev_vma_close, |
418 | }; | 439 | }; |
419 | 440 | ||
441 | static int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
442 | struct vm_area_struct *vma) | ||
443 | { | ||
444 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
445 | } | ||
446 | |||
420 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | 447 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) |
421 | { | 448 | { |
422 | struct gk20a_ctxsw_dev *dev = filp->private_data; | 449 | struct gk20a_ctxsw_dev *dev = filp->private_data; |
@@ -425,7 +452,7 @@ int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | |||
425 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | 452 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", |
426 | vma->vm_start, vma->vm_end); | 453 | vma->vm_start, vma->vm_end); |
427 | 454 | ||
428 | ret = remap_vmalloc_range(vma, dev->hdr, 0); | 455 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); |
429 | if (likely(!ret)) { | 456 | if (likely(!ret)) { |
430 | vma->vm_private_data = dev; | 457 | vma->vm_private_data = dev; |
431 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | 458 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; |
@@ -482,6 +509,7 @@ int gk20a_ctxsw_trace_init(struct gk20a *g) | |||
482 | return 0; | 509 | return 0; |
483 | 510 | ||
484 | fail: | 511 | fail: |
512 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
485 | kfree(trace); | 513 | kfree(trace); |
486 | g->ctxsw_trace = NULL; | 514 | g->ctxsw_trace = NULL; |
487 | return err; | 515 | return err; |
@@ -493,6 +521,9 @@ fail: | |||
493 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | 521 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) |
494 | { | 522 | { |
495 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 523 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
524 | if (!g->ctxsw_trace) | ||
525 | return; | ||
526 | |||
496 | kfree(g->ctxsw_trace); | 527 | kfree(g->ctxsw_trace); |
497 | g->ctxsw_trace = NULL; | 528 | g->ctxsw_trace = NULL; |
498 | 529 | ||
@@ -584,3 +615,10 @@ void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | |||
584 | 615 | ||
585 | wake_up_interruptible(&dev->readout_wq); | 616 | wake_up_interruptible(&dev->readout_wq); |
586 | } | 617 | } |
618 | |||
619 | void gk20a_ctxsw_trace_init_ops(struct gpu_ops *ops) | ||
620 | { | ||
621 | ops->fecs_trace.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc; | ||
622 | ops->fecs_trace.free_user_buffer = gk20a_ctxsw_dev_ring_free; | ||
623 | ops->fecs_trace.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer; | ||
624 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h index c57d95d1..7a2f4aeb 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | 17 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 |
18 | 18 | ||
19 | struct gk20a; | 19 | struct gk20a; |
20 | struct gpu_ops; | ||
20 | struct nvgpu_ctxsw_trace_entry; | 21 | struct nvgpu_ctxsw_trace_entry; |
21 | struct channel_gk20a; | 22 | struct channel_gk20a; |
22 | struct channel_ctx_gk20a; | 23 | struct channel_ctx_gk20a; |
@@ -37,5 +38,6 @@ int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr); | |||
37 | void gk20a_ctxsw_trace_cleanup(struct gk20a *); | 38 | void gk20a_ctxsw_trace_cleanup(struct gk20a *); |
38 | int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); | 39 | int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); |
39 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); | 40 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); |
41 | void gk20a_ctxsw_trace_init_ops(struct gpu_ops *ops); | ||
40 | 42 | ||
41 | #endif /* __CTXSW_TRACE_GK20A_H */ | 43 | #endif /* __CTXSW_TRACE_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index aef0c9b3..58d8fda1 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -745,6 +745,7 @@ static int gk20a_fecs_trace_disable(struct gk20a *g) | |||
745 | 745 | ||
746 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) | 746 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) |
747 | { | 747 | { |
748 | gk20a_ctxsw_trace_init_ops(ops); | ||
748 | ops->fecs_trace.init = gk20a_fecs_trace_init; | 749 | ops->fecs_trace.init = gk20a_fecs_trace_init; |
749 | ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; | 750 | ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; |
750 | ops->fecs_trace.enable = gk20a_fecs_trace_enable; | 751 | ops->fecs_trace.enable = gk20a_fecs_trace_enable; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d9cc3d4f..fb149f56 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -391,6 +391,13 @@ struct gpu_ops { | |||
391 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); | 391 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); |
392 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); | 392 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); |
393 | int (*deinit)(struct gk20a *g); | 393 | int (*deinit)(struct gk20a *g); |
394 | int (*alloc_user_buffer)(struct gk20a *g, | ||
395 | void **buf, size_t *size); | ||
396 | int (*free_user_buffer)(struct gk20a *g); | ||
397 | int (*mmap_user_buffer)(struct gk20a *g, | ||
398 | struct vm_area_struct *vma); | ||
399 | int (*set_filter)(struct gk20a *g, | ||
400 | struct nvgpu_ctxsw_trace_filter *filter); | ||
394 | } fecs_trace; | 401 | } fecs_trace; |
395 | struct { | 402 | struct { |
396 | bool (*support_sparse)(struct gk20a *g); | 403 | bool (*support_sparse)(struct gk20a *g); |