diff options
author | Richard Zhao <rizhao@nvidia.com> | 2016-03-07 17:23:12 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-11 18:38:12 -0400 |
commit | 60b715e85600a6be283e54c610c2a3db3b552059 (patch) | |
tree | aaf2332b8a03c5869a1be74843eae5ea5ccb99be | |
parent | 6eeabfbdd08e48f924885952c80ff41aa2b534b7 (diff) |
gpu: nvgpu: vgpu: add fecs trace support
Bug 1648908
Change-Id: I7901e7bce5f7aa124a188101dd0736241d87bd53
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1031861
Reviewed-on: http://git-master/r/1121261
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 90 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c | 196 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/vgpu.c | 8 | ||||
-rw-r--r-- | include/linux/tegra_vgpu.h | 27 |
8 files changed, 300 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 9e7c04ad..6a61d744 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -130,42 +130,55 @@ static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | |||
130 | { | 130 | { |
131 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | 131 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); |
132 | dev->write_enabled = true; | 132 | dev->write_enabled = true; |
133 | dev->g->ops.fecs_trace.enable(dev->g); | ||
133 | return 0; | 134 | return 0; |
134 | } | 135 | } |
135 | 136 | ||
136 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | 137 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) |
137 | { | 138 | { |
138 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | 139 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); |
140 | dev->g->ops.fecs_trace.disable(dev->g); | ||
139 | dev->write_enabled = false; | 141 | dev->write_enabled = false; |
140 | return 0; | 142 | return 0; |
141 | } | 143 | } |
142 | 144 | ||
143 | static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, | 145 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, |
144 | size_t size) | 146 | size_t size) |
145 | { | 147 | { |
146 | struct nvgpu_ctxsw_ring_header *hdr; | 148 | struct gk20a *g = dev->g; |
147 | 149 | void *buf; | |
148 | if (atomic_read(&dev->vma_ref)) | 150 | int err; |
149 | return -EBUSY; | ||
150 | 151 | ||
151 | if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) | 152 | if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) |
152 | return -EBUSY; | 153 | return -EBUSY; |
153 | 154 | ||
154 | size = roundup(size, PAGE_SIZE); | 155 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); |
155 | hdr = vmalloc_user(size); | 156 | if (err) |
156 | if (!hdr) | 157 | return err; |
157 | return -ENOMEM; | ||
158 | 158 | ||
159 | if (dev->hdr) | ||
160 | vfree(dev->hdr); | ||
161 | 159 | ||
162 | dev->hdr = hdr; | 160 | dev->hdr = buf; |
163 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); | 161 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); |
164 | dev->size = size; | 162 | dev->size = size; |
165 | 163 | ||
164 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
165 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
170 | void **buf, size_t *size) | ||
171 | { | ||
172 | struct nvgpu_ctxsw_ring_header *hdr; | ||
173 | |||
174 | *size = roundup(*size, PAGE_SIZE); | ||
175 | hdr = vmalloc_user(*size); | ||
176 | if (!hdr) | ||
177 | return -ENOMEM; | ||
178 | |||
166 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | 179 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; |
167 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | 180 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; |
168 | hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header)) | 181 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) |
169 | / sizeof(struct nvgpu_ctxsw_trace_entry); | 182 | / sizeof(struct nvgpu_ctxsw_trace_entry); |
170 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); | 183 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); |
171 | hdr->drop_count = 0; | 184 | hdr->drop_count = 0; |
@@ -173,8 +186,15 @@ static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, | |||
173 | hdr->write_idx = 0; | 186 | hdr->write_idx = 0; |
174 | hdr->write_seqno = 0; | 187 | hdr->write_seqno = 0; |
175 | 188 | ||
176 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | 189 | *buf = hdr; |
177 | dev->size, dev->hdr, dev->ents, hdr->num_ents); | 190 | return 0; |
191 | } | ||
192 | |||
193 | static int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
194 | { | ||
195 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
196 | |||
197 | vfree(dev->hdr); | ||
178 | return 0; | 198 | return 0; |
179 | } | 199 | } |
180 | 200 | ||
@@ -188,13 +208,17 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | |||
188 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | 208 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) |
189 | return -EINVAL; | 209 | return -EINVAL; |
190 | 210 | ||
191 | return gk20a_ctxsw_dev_ring_alloc(dev, size); | 211 | return gk20a_ctxsw_dev_alloc_buffer(dev, size); |
192 | } | 212 | } |
193 | 213 | ||
194 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | 214 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, |
195 | struct nvgpu_ctxsw_trace_filter_args *args) | 215 | struct nvgpu_ctxsw_trace_filter_args *args) |
196 | { | 216 | { |
217 | struct gk20a *g = dev->g; | ||
218 | |||
197 | dev->filter = args->filter; | 219 | dev->filter = args->filter; |
220 | if (g->ops.fecs_trace.set_filter) | ||
221 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
198 | return 0; | 222 | return 0; |
199 | } | 223 | } |
200 | 224 | ||
@@ -276,15 +300,13 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | |||
276 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | 300 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", |
277 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); | 301 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); |
278 | 302 | ||
279 | err = gk20a_ctxsw_dev_ring_alloc(dev, size); | 303 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); |
280 | if (!err) { | 304 | if (!err) { |
281 | filp->private_data = dev; | 305 | filp->private_data = dev; |
282 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | 306 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", |
283 | filp, dev, size); | 307 | filp, dev, size); |
284 | } | 308 | } |
285 | 309 | ||
286 | err = g->ops.fecs_trace.enable(g); | ||
287 | |||
288 | done: | 310 | done: |
289 | mutex_unlock(&dev->lock); | 311 | mutex_unlock(&dev->lock); |
290 | 312 | ||
@@ -297,19 +319,18 @@ idle: | |||
297 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | 319 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) |
298 | { | 320 | { |
299 | struct gk20a_ctxsw_dev *dev = filp->private_data; | 321 | struct gk20a_ctxsw_dev *dev = filp->private_data; |
300 | struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); | ||
301 | 322 | ||
302 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | 323 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); |
303 | 324 | ||
304 | mutex_lock(&dev->lock); | 325 | mutex_lock(&dev->lock); |
305 | dev->write_enabled = false; | 326 | if (dev->write_enabled) |
327 | gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
328 | |||
306 | if (dev->hdr) { | 329 | if (dev->hdr) { |
307 | vfree(dev->hdr); | 330 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); |
308 | dev->hdr = NULL; | 331 | dev->hdr = NULL; |
309 | } | 332 | } |
310 | 333 | ||
311 | g->ops.fecs_trace.disable(g); | ||
312 | |||
313 | mutex_unlock(&dev->lock); | 334 | mutex_unlock(&dev->lock); |
314 | 335 | ||
315 | return 0; | 336 | return 0; |
@@ -417,6 +438,12 @@ static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | |||
417 | .close = gk20a_ctxsw_dev_vma_close, | 438 | .close = gk20a_ctxsw_dev_vma_close, |
418 | }; | 439 | }; |
419 | 440 | ||
441 | static int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
442 | struct vm_area_struct *vma) | ||
443 | { | ||
444 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
445 | } | ||
446 | |||
420 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | 447 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) |
421 | { | 448 | { |
422 | struct gk20a_ctxsw_dev *dev = filp->private_data; | 449 | struct gk20a_ctxsw_dev *dev = filp->private_data; |
@@ -425,7 +452,7 @@ int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | |||
425 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | 452 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", |
426 | vma->vm_start, vma->vm_end); | 453 | vma->vm_start, vma->vm_end); |
427 | 454 | ||
428 | ret = remap_vmalloc_range(vma, dev->hdr, 0); | 455 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); |
429 | if (likely(!ret)) { | 456 | if (likely(!ret)) { |
430 | vma->vm_private_data = dev; | 457 | vma->vm_private_data = dev; |
431 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | 458 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; |
@@ -482,6 +509,7 @@ int gk20a_ctxsw_trace_init(struct gk20a *g) | |||
482 | return 0; | 509 | return 0; |
483 | 510 | ||
484 | fail: | 511 | fail: |
512 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
485 | kfree(trace); | 513 | kfree(trace); |
486 | g->ctxsw_trace = NULL; | 514 | g->ctxsw_trace = NULL; |
487 | return err; | 515 | return err; |
@@ -493,6 +521,9 @@ fail: | |||
493 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | 521 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) |
494 | { | 522 | { |
495 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 523 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
524 | if (!g->ctxsw_trace) | ||
525 | return; | ||
526 | |||
496 | kfree(g->ctxsw_trace); | 527 | kfree(g->ctxsw_trace); |
497 | g->ctxsw_trace = NULL; | 528 | g->ctxsw_trace = NULL; |
498 | 529 | ||
@@ -584,3 +615,10 @@ void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | |||
584 | 615 | ||
585 | wake_up_interruptible(&dev->readout_wq); | 616 | wake_up_interruptible(&dev->readout_wq); |
586 | } | 617 | } |
618 | |||
619 | void gk20a_ctxsw_trace_init_ops(struct gpu_ops *ops) | ||
620 | { | ||
621 | ops->fecs_trace.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc; | ||
622 | ops->fecs_trace.free_user_buffer = gk20a_ctxsw_dev_ring_free; | ||
623 | ops->fecs_trace.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer; | ||
624 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h index c57d95d1..7a2f4aeb 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | 17 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 |
18 | 18 | ||
19 | struct gk20a; | 19 | struct gk20a; |
20 | struct gpu_ops; | ||
20 | struct nvgpu_ctxsw_trace_entry; | 21 | struct nvgpu_ctxsw_trace_entry; |
21 | struct channel_gk20a; | 22 | struct channel_gk20a; |
22 | struct channel_ctx_gk20a; | 23 | struct channel_ctx_gk20a; |
@@ -37,5 +38,6 @@ int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr); | |||
37 | void gk20a_ctxsw_trace_cleanup(struct gk20a *); | 38 | void gk20a_ctxsw_trace_cleanup(struct gk20a *); |
38 | int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); | 39 | int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); |
39 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); | 40 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); |
41 | void gk20a_ctxsw_trace_init_ops(struct gpu_ops *ops); | ||
40 | 42 | ||
41 | #endif /* __CTXSW_TRACE_GK20A_H */ | 43 | #endif /* __CTXSW_TRACE_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index aef0c9b3..58d8fda1 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -745,6 +745,7 @@ static int gk20a_fecs_trace_disable(struct gk20a *g) | |||
745 | 745 | ||
746 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) | 746 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) |
747 | { | 747 | { |
748 | gk20a_ctxsw_trace_init_ops(ops); | ||
748 | ops->fecs_trace.init = gk20a_fecs_trace_init; | 749 | ops->fecs_trace.init = gk20a_fecs_trace_init; |
749 | ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; | 750 | ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; |
750 | ops->fecs_trace.enable = gk20a_fecs_trace_enable; | 751 | ops->fecs_trace.enable = gk20a_fecs_trace_enable; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d9cc3d4f..fb149f56 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -391,6 +391,13 @@ struct gpu_ops { | |||
391 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); | 391 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); |
392 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); | 392 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); |
393 | int (*deinit)(struct gk20a *g); | 393 | int (*deinit)(struct gk20a *g); |
394 | int (*alloc_user_buffer)(struct gk20a *g, | ||
395 | void **buf, size_t *size); | ||
396 | int (*free_user_buffer)(struct gk20a *g); | ||
397 | int (*mmap_user_buffer)(struct gk20a *g, | ||
398 | struct vm_area_struct *vma); | ||
399 | int (*set_filter)(struct gk20a *g, | ||
400 | struct nvgpu_ctxsw_trace_filter *filter); | ||
394 | } fecs_trace; | 401 | } fecs_trace; |
395 | struct { | 402 | struct { |
396 | bool (*support_sparse)(struct gk20a *g); | 403 | bool (*support_sparse)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c index cb955811..568f3784 100644 --- a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c | |||
@@ -12,10 +12,204 @@ | |||
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/string.h> | 14 | #include <linux/string.h> |
15 | #include <linux/tegra-ivc.h> | ||
16 | #include <linux/tegra_vgpu.h> | ||
17 | |||
15 | #include "gk20a/gk20a.h" | 18 | #include "gk20a/gk20a.h" |
19 | #include "gk20a/ctxsw_trace_gk20a.h" | ||
20 | #include "vgpu.h" | ||
16 | #include "fecs_trace_vgpu.h" | 21 | #include "fecs_trace_vgpu.h" |
17 | 22 | ||
23 | struct vgpu_fecs_trace { | ||
24 | struct tegra_hv_ivm_cookie *cookie; | ||
25 | struct nvgpu_ctxsw_ring_header *header; | ||
26 | struct nvgpu_ctxsw_trace_entry *entries; | ||
27 | int num_entries; | ||
28 | void *buf; | ||
29 | }; | ||
30 | |||
31 | static int vgpu_fecs_trace_init(struct gk20a *g) | ||
32 | { | ||
33 | struct device *dev = g->dev; | ||
34 | struct device_node *np = dev->of_node; | ||
35 | struct of_phandle_args args; | ||
36 | struct device_node *hv_np; | ||
37 | struct vgpu_fecs_trace *vcst; | ||
38 | u32 mempool; | ||
39 | int err; | ||
40 | |||
41 | gk20a_dbg_fn(""); | ||
42 | |||
43 | vcst = kzalloc(sizeof(*vcst), GFP_KERNEL); | ||
44 | if (!vcst) | ||
45 | return -ENOMEM; | ||
46 | |||
47 | err = of_parse_phandle_with_fixed_args(np, | ||
48 | "mempool-fecs-trace", 1, 0, &args); | ||
49 | if (err) { | ||
50 | dev_info(dev_from_gk20a(g), "does not support fecs trace\n"); | ||
51 | goto fail; | ||
52 | } | ||
53 | |||
54 | hv_np = args.np; | ||
55 | mempool = args.args[0]; | ||
56 | vcst->cookie = tegra_hv_mempool_reserve(hv_np, mempool); | ||
57 | if (IS_ERR(vcst->cookie)) { | ||
58 | dev_info(dev_from_gk20a(g), | ||
59 | "mempool %u reserve failed\n", mempool); | ||
60 | err = -EINVAL; | ||
61 | goto fail; | ||
62 | } | ||
63 | |||
64 | vcst->buf = ioremap_cache(vcst->cookie->ipa, vcst->cookie->size); | ||
65 | vcst->header = vcst->buf; | ||
66 | vcst->num_entries = vcst->header->num_ents; | ||
67 | if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { | ||
68 | dev_err(dev_from_gk20a(g), | ||
69 | "entry size mismatch\n"); | ||
70 | goto fail; | ||
71 | } | ||
72 | vcst->entries = vcst->buf + sizeof(*vcst->header); | ||
73 | g->fecs_trace = (struct gk20a_fecs_trace *)vcst; | ||
74 | |||
75 | return 0; | ||
76 | fail: | ||
77 | iounmap(vcst->buf); | ||
78 | if (vcst->cookie) | ||
79 | tegra_hv_mempool_unreserve(vcst->cookie); | ||
80 | kfree(vcst); | ||
81 | return err; | ||
82 | } | ||
83 | |||
84 | static int vgpu_fecs_trace_deinit(struct gk20a *g) | ||
85 | { | ||
86 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
87 | |||
88 | iounmap(vcst->buf); | ||
89 | tegra_hv_mempool_unreserve(vcst->cookie); | ||
90 | kfree(vcst); | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static int vgpu_fecs_trace_enable(struct gk20a *g) | ||
95 | { | ||
96 | struct tegra_vgpu_cmd_msg msg = { | ||
97 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, | ||
98 | .handle = gk20a_get_platform(g->dev)->virt_handle, | ||
99 | }; | ||
100 | int err; | ||
101 | |||
102 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
103 | err = err ? err : msg.ret; | ||
104 | WARN_ON(err); | ||
105 | return err; | ||
106 | } | ||
107 | |||
108 | static int vgpu_fecs_trace_disable(struct gk20a *g) | ||
109 | { | ||
110 | struct tegra_vgpu_cmd_msg msg = { | ||
111 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, | ||
112 | .handle = gk20a_get_platform(g->dev)->virt_handle, | ||
113 | }; | ||
114 | int err; | ||
115 | |||
116 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
117 | err = err ? err : msg.ret; | ||
118 | WARN_ON(err); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | static int vgpu_fecs_trace_poll(struct gk20a *g) | ||
123 | { | ||
124 | struct tegra_vgpu_cmd_msg msg = { | ||
125 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, | ||
126 | .handle = gk20a_get_platform(g->dev)->virt_handle, | ||
127 | }; | ||
128 | int err; | ||
129 | |||
130 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
131 | err = err ? err : msg.ret; | ||
132 | WARN_ON(err); | ||
133 | return err; | ||
134 | } | ||
135 | |||
136 | static int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) | ||
137 | { | ||
138 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
139 | |||
140 | *buf = vcst->buf; | ||
141 | *size = vcst->cookie->size; | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static int vgpu_free_user_buffer(struct gk20a *g) | ||
146 | { | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) | ||
151 | { | ||
152 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
153 | unsigned long size = vcst->cookie->size; | ||
154 | unsigned long vsize = vma->vm_end - vma->vm_start; | ||
155 | |||
156 | size = min(size, vsize); | ||
157 | size = round_up(size, PAGE_SIZE); | ||
158 | |||
159 | return remap_pfn_range(vma, vma->vm_start, | ||
160 | vcst->cookie->ipa >> PAGE_SHIFT, | ||
161 | size, | ||
162 | vma->vm_page_prot); | ||
163 | } | ||
164 | |||
165 | static int vgpu_fecs_trace_max_entries(struct gk20a *g, | ||
166 | struct nvgpu_ctxsw_trace_filter *filter) | ||
167 | { | ||
168 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
169 | |||
170 | return vcst->header->num_ents; | ||
171 | } | ||
172 | |||
173 | #if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE | ||
174 | #error "FECS trace filter size mismatch!" | ||
175 | #endif | ||
176 | |||
177 | static int vgpu_fecs_trace_set_filter(struct gk20a *g, | ||
178 | struct nvgpu_ctxsw_trace_filter *filter) | ||
179 | { | ||
180 | struct tegra_vgpu_cmd_msg msg = { | ||
181 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, | ||
182 | .handle = gk20a_get_platform(g->dev)->virt_handle, | ||
183 | }; | ||
184 | struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; | ||
185 | int err; | ||
186 | |||
187 | memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); | ||
188 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
189 | err = err ? err : msg.ret; | ||
190 | WARN_ON(err); | ||
191 | return err; | ||
192 | } | ||
193 | |||
18 | void vgpu_init_fecs_trace_ops(struct gpu_ops *ops) | 194 | void vgpu_init_fecs_trace_ops(struct gpu_ops *ops) |
19 | { | 195 | { |
20 | memset(&ops->fecs_trace, 0, sizeof(ops->fecs_trace)); | 196 | ops->fecs_trace.init = vgpu_fecs_trace_init; |
197 | ops->fecs_trace.deinit = vgpu_fecs_trace_deinit; | ||
198 | ops->fecs_trace.enable = vgpu_fecs_trace_enable; | ||
199 | ops->fecs_trace.disable = vgpu_fecs_trace_disable; | ||
200 | ops->fecs_trace.reset = NULL; | ||
201 | ops->fecs_trace.flush = NULL; | ||
202 | ops->fecs_trace.poll = vgpu_fecs_trace_poll; | ||
203 | ops->fecs_trace.bind_channel = NULL; | ||
204 | ops->fecs_trace.unbind_channel = NULL; | ||
205 | ops->fecs_trace.max_entries = vgpu_fecs_trace_max_entries; | ||
206 | ops->fecs_trace.alloc_user_buffer = vgpu_alloc_user_buffer; | ||
207 | ops->fecs_trace.free_user_buffer = vgpu_free_user_buffer; | ||
208 | ops->fecs_trace.mmap_user_buffer = vgpu_mmap_user_buffer; | ||
209 | ops->fecs_trace.set_filter = vgpu_fecs_trace_set_filter; | ||
210 | } | ||
211 | |||
212 | void vgpu_fecs_trace_data_update(struct gk20a *g) | ||
213 | { | ||
214 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
21 | } | 215 | } |
diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h index 1aace1fe..93c44b71 100644 --- a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h | |||
@@ -16,5 +16,6 @@ | |||
16 | 16 | ||
17 | struct gpu_ops; | 17 | struct gpu_ops; |
18 | void vgpu_init_fecs_trace_ops(struct gpu_ops *ops); | 18 | void vgpu_init_fecs_trace_ops(struct gpu_ops *ops); |
19 | void vgpu_fecs_trace_data_update(struct gk20a *g); | ||
19 | 20 | ||
20 | #endif /* __FECS_TRACE_VGPU_H */ | 21 | #endif /* __FECS_TRACE_VGPU_H */ |
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index b67f4d95..90197ab4 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "gk20a/debug_gk20a.h" | 22 | #include "gk20a/debug_gk20a.h" |
23 | #include "gk20a/hal_gk20a.h" | 23 | #include "gk20a/hal_gk20a.h" |
24 | #include "gk20a/hw_mc_gk20a.h" | 24 | #include "gk20a/hw_mc_gk20a.h" |
25 | #include "gk20a/ctxsw_trace_gk20a.h" | ||
25 | #include "gm20b/hal_gm20b.h" | 26 | #include "gm20b/hal_gm20b.h" |
26 | 27 | ||
27 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | 28 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC |
@@ -120,6 +121,12 @@ static int vgpu_intr_thread(void *dev_id) | |||
120 | break; | 121 | break; |
121 | } | 122 | } |
122 | 123 | ||
124 | if (msg->event == TEGRA_VGPU_EVENT_FECS_TRACE) { | ||
125 | vgpu_fecs_trace_data_update(g); | ||
126 | tegra_gr_comm_release(handle); | ||
127 | continue; | ||
128 | } | ||
129 | |||
123 | if (msg->unit == TEGRA_VGPU_INTR_GR) | 130 | if (msg->unit == TEGRA_VGPU_INTR_GR) |
124 | vgpu_gr_isr(g, &msg->info.gr_intr); | 131 | vgpu_gr_isr(g, &msg->info.gr_intr); |
125 | else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_GR) | 132 | else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_GR) |
@@ -334,6 +341,7 @@ int vgpu_pm_finalize_poweron(struct device *dev) | |||
334 | 341 | ||
335 | g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_TSG; | 342 | g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_TSG; |
336 | 343 | ||
344 | gk20a_ctxsw_trace_init(g); | ||
337 | gk20a_channel_resume(g); | 345 | gk20a_channel_resume(g); |
338 | 346 | ||
339 | done: | 347 | done: |
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index 979d454e..67bd0d76 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h | |||
@@ -77,10 +77,10 @@ enum { | |||
77 | TEGRA_VGPU_CMD_CHANNEL_SET_PRIORITY, | 77 | TEGRA_VGPU_CMD_CHANNEL_SET_PRIORITY, |
78 | TEGRA_VGPU_CMD_CHANNEL_SET_RUNLIST_INTERLEAVE, | 78 | TEGRA_VGPU_CMD_CHANNEL_SET_RUNLIST_INTERLEAVE, |
79 | TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE, | 79 | TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE, |
80 | RESVD1, | 80 | TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, |
81 | RESVD2, | 81 | TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, |
82 | RESVD3, | 82 | TEGRA_VGPU_CMD_FECS_TRACE_POLL, |
83 | RESVD4, | 83 | TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, |
84 | TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE, | 84 | TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE, |
85 | TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE, | 85 | TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE, |
86 | TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX, | 86 | TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX, |
@@ -319,6 +319,11 @@ struct tegra_vgpu_channel_timeslice_params { | |||
319 | u32 timeslice_us; | 319 | u32 timeslice_us; |
320 | }; | 320 | }; |
321 | 321 | ||
322 | #define TEGRA_VGPU_FECS_TRACE_FILTER_SIZE 256 | ||
323 | struct tegra_vgpu_fecs_trace_filter { | ||
324 | u64 tag_bits[(TEGRA_VGPU_FECS_TRACE_FILTER_SIZE + 63) / 64]; | ||
325 | }; | ||
326 | |||
322 | enum { | 327 | enum { |
323 | TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0, | 328 | TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0, |
324 | TEGRA_VGPU_CTXSW_MODE_CTXSW, | 329 | TEGRA_VGPU_CTXSW_MODE_CTXSW, |
@@ -363,6 +368,7 @@ struct tegra_vgpu_cmd_msg { | |||
363 | struct tegra_vgpu_channel_priority_params channel_priority; | 368 | struct tegra_vgpu_channel_priority_params channel_priority; |
364 | struct tegra_vgpu_channel_runlist_interleave_params channel_interleave; | 369 | struct tegra_vgpu_channel_runlist_interleave_params channel_interleave; |
365 | struct tegra_vgpu_channel_timeslice_params channel_timeslice; | 370 | struct tegra_vgpu_channel_timeslice_params channel_timeslice; |
371 | struct tegra_vgpu_fecs_trace_filter fecs_trace_filter; | ||
366 | struct tegra_vgpu_channel_set_ctxsw_mode set_ctxsw_mode; | 372 | struct tegra_vgpu_channel_set_ctxsw_mode set_ctxsw_mode; |
367 | struct tegra_vgpu_channel_free_hwpm_ctx free_hwpm_ctx; | 373 | struct tegra_vgpu_channel_free_hwpm_ctx free_hwpm_ctx; |
368 | char padding[192]; | 374 | char padding[192]; |
@@ -412,6 +418,15 @@ struct tegra_vgpu_ce2_nonstall_intr_info { | |||
412 | }; | 418 | }; |
413 | 419 | ||
414 | enum { | 420 | enum { |
421 | TEGRA_VGPU_FECS_TRACE_DATA_UPDATE = 0 | ||
422 | }; | ||
423 | |||
424 | struct tegra_vgpu_fecs_trace_event_info { | ||
425 | u32 type; | ||
426 | }; | ||
427 | |||
428 | enum { | ||
429 | |||
415 | TEGRA_VGPU_INTR_GR = 0, | 430 | TEGRA_VGPU_INTR_GR = 0, |
416 | TEGRA_VGPU_INTR_FIFO, | 431 | TEGRA_VGPU_INTR_FIFO, |
417 | TEGRA_VGPU_INTR_CE2, | 432 | TEGRA_VGPU_INTR_CE2, |
@@ -422,7 +437,8 @@ enum { | |||
422 | 437 | ||
423 | enum { | 438 | enum { |
424 | TEGRA_VGPU_EVENT_INTR = 0, | 439 | TEGRA_VGPU_EVENT_INTR = 0, |
425 | TEGRA_VGPU_EVENT_ABORT | 440 | TEGRA_VGPU_EVENT_ABORT, |
441 | TEGRA_VGPU_EVENT_FECS_TRACE | ||
426 | }; | 442 | }; |
427 | 443 | ||
428 | struct tegra_vgpu_intr_msg { | 444 | struct tegra_vgpu_intr_msg { |
@@ -434,6 +450,7 @@ struct tegra_vgpu_intr_msg { | |||
434 | struct tegra_vgpu_fifo_intr_info fifo_intr; | 450 | struct tegra_vgpu_fifo_intr_info fifo_intr; |
435 | struct tegra_vgpu_fifo_nonstall_intr_info fifo_nonstall_intr; | 451 | struct tegra_vgpu_fifo_nonstall_intr_info fifo_nonstall_intr; |
436 | struct tegra_vgpu_ce2_nonstall_intr_info ce2_nonstall_intr; | 452 | struct tegra_vgpu_ce2_nonstall_intr_info ce2_nonstall_intr; |
453 | struct tegra_vgpu_fecs_trace_event_info fecs_trace; | ||
437 | char padding[32]; | 454 | char padding[32]; |
438 | } info; | 455 | } info; |
439 | }; | 456 | }; |