summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorThomas Fleury <tfleury@nvidia.com>2017-04-27 14:28:27 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-06-15 14:53:10 -0400
commit741e5c45179db066ddf5bed0be6f36e4d0d4010e (patch)
tree815f83070ae0fcf37a7b234caf8a2f86997f99bb /drivers/gpu
parent77e2cbab237637f71367df25384164b8c936a31a (diff)
gpu: nvgpu: hal for timestamps correlation
In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1472447 (cherry picked from commit 56f56b5cd9d2e75cf7d2613b5e115bfebdbee0ce) Reviewed-on: http://git-master/r/1489183 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c78
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.c61
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.c1
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c43
6 files changed, 136 insertions, 60 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
index 90a3fbd4..025a30fe 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -533,76 +533,34 @@ static int gk20a_ctrl_get_buffer_info(
533 &args->out.id, &args->out.length); 533 &args->out.id, &args->out.length);
534} 534}
535 535
536static inline u64 get_cpu_timestamp_tsc(void)
537{
538 return ((u64) get_cycles());
539}
540
541static inline u64 get_cpu_timestamp_jiffies(void)
542{
543 return (get_jiffies_64() - INITIAL_JIFFIES);
544}
545
546static inline u64 get_cpu_timestamp_timeofday(void)
547{
548 struct timeval tv;
549
550 do_gettimeofday(&tv);
551 return timeval_to_jiffies(&tv);
552}
553
554static inline int get_timestamps_zipper(struct gk20a *g,
555 u64 (*get_cpu_timestamp)(void),
556 struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
557{
558 int err = 0;
559 unsigned int i = 0;
560
561 if (gk20a_busy(g)) {
562 nvgpu_err(g, "GPU not powered on");
563 err = -EINVAL;
564 goto end;
565 }
566
567 for (i = 0; i < args->count; i++) {
568 err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
569 if (err)
570 return err;
571
572 args->samples[i].cpu_timestamp = get_cpu_timestamp();
573 }
574
575end:
576 gk20a_idle(g);
577 return err;
578}
579
580static int nvgpu_gpu_get_cpu_time_correlation_info( 536static int nvgpu_gpu_get_cpu_time_correlation_info(
581 struct gk20a *g, 537 struct gk20a *g,
582 struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) 538 struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
583{ 539{
584 int err = 0; 540 struct nvgpu_cpu_time_correlation_sample *samples;
585 u64 (*get_cpu_timestamp)(void) = NULL; 541 int err;
542 u32 i;
586 543
587 if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) 544 if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
588 return -EINVAL; 545 return -EINVAL;
589 546
590 switch (args->source_id) { 547 samples = nvgpu_kzalloc(g, args->count *
591 case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC: 548 sizeof(struct nvgpu_cpu_time_correlation_sample));
592 get_cpu_timestamp = get_cpu_timestamp_tsc; 549 if (!samples) {
593 break; 550 return -ENOMEM;
594 case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES:
595 get_cpu_timestamp = get_cpu_timestamp_jiffies;
596 break;
597 case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY:
598 get_cpu_timestamp = get_cpu_timestamp_timeofday;
599 break;
600 default:
601 nvgpu_err(g, "invalid cpu clock source id");
602 return -EINVAL;
603 } 551 }
604 552
605 err = get_timestamps_zipper(g, get_cpu_timestamp, args); 553 err = g->ops.bus.get_timestamps_zipper(g,
554 args->source_id, args->count, samples);
555 if (!err) {
556 for (i = 0; i < args->count; i++) {
557 args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
558 args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
559 }
560 }
561
562 nvgpu_kfree(g, samples);
563
606 return err; 564 return err;
607} 565}
608 566
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 52ef08e4..7f0ca013 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -128,6 +128,66 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
128 return -EBUSY; 128 return -EBUSY;
129} 129}
130 130
131static inline u64 get_cpu_timestamp_tsc(void)
132{
133 return ((u64) get_cycles());
134}
135
136static inline u64 get_cpu_timestamp_jiffies(void)
137{
138 return (get_jiffies_64() - INITIAL_JIFFIES);
139}
140
141static inline u64 get_cpu_timestamp_timeofday(void)
142{
143 struct timeval tv;
144
145 do_gettimeofday(&tv);
146 return timeval_to_jiffies(&tv);
147}
148
149int gk20a_get_timestamps_zipper(struct gk20a *g,
150 u32 source_id, u32 count,
151 struct nvgpu_cpu_time_correlation_sample *samples)
152{
153 int err = 0;
154 unsigned int i = 0;
155 u64 (*get_cpu_timestamp)(void) = NULL;
156
157 switch (source_id) {
158 case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
159 get_cpu_timestamp = get_cpu_timestamp_tsc;
160 break;
161 case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES:
162 get_cpu_timestamp = get_cpu_timestamp_jiffies;
163 break;
164 case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY:
165 get_cpu_timestamp = get_cpu_timestamp_timeofday;
166 break;
167 default:
168 nvgpu_err(g, "invalid cpu clock source id\n");
169 return -EINVAL;
170 }
171
172 if (gk20a_busy(g)) {
173 nvgpu_err(g, "GPU not powered on\n");
174 err = -EINVAL;
175 goto end;
176 }
177
178 for (i = 0; i < count; i++) {
179 err = g->ops.bus.read_ptimer(g, &samples[i].gpu_timestamp);
180 if (err)
181 return err;
182
183 samples[i].cpu_timestamp = get_cpu_timestamp();
184 }
185
186end:
187 gk20a_idle(g);
188 return err;
189}
190
131static int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) 191static int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
132{ 192{
133 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); 193 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
@@ -150,5 +210,6 @@ void gk20a_init_bus(struct gpu_ops *gops)
150 gops->bus.init_hw = gk20a_bus_init_hw; 210 gops->bus.init_hw = gk20a_bus_init_hw;
151 gops->bus.isr = gk20a_bus_isr; 211 gops->bus.isr = gk20a_bus_isr;
152 gops->bus.read_ptimer = gk20a_read_ptimer; 212 gops->bus.read_ptimer = gk20a_read_ptimer;
213 gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
153 gops->bus.bar1_bind = gk20a_bus_bar1_bind; 214 gops->bus.bar1_bind = gk20a_bus_bar1_bind;
154} 215}
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/bus_gk20a.h
index 72710c35..088c385e 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.h
@@ -22,10 +22,19 @@ struct gk20a;
22struct gpu_ops; 22struct gpu_ops;
23struct nvgpu_mem; 23struct nvgpu_mem;
24 24
25struct nvgpu_cpu_time_correlation_sample {
26 u64 cpu_timestamp;
27 u64 gpu_timestamp;
28};
29
25void gk20a_init_bus(struct gpu_ops *gops); 30void gk20a_init_bus(struct gpu_ops *gops);
26 31
27void gk20a_bus_isr(struct gk20a *g); 32void gk20a_bus_isr(struct gk20a *g);
28int gk20a_read_ptimer(struct gk20a *g, u64 *value); 33int gk20a_read_ptimer(struct gk20a *g, u64 *value);
29void gk20a_bus_init_hw(struct gk20a *g); 34void gk20a_bus_init_hw(struct gk20a *g);
30 35
36int gk20a_get_timestamps_zipper(struct gk20a *g,
37 u32 source_id, u32 count,
38 struct nvgpu_cpu_time_correlation_sample *samples);
39
31#endif /* GK20A_H */ 40#endif /* GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 34f89fc8..d1a55104 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -63,6 +63,7 @@ struct nvgpu_nvhost_dev;
63#include "priv_ring_gk20a.h" 63#include "priv_ring_gk20a.h"
64#include "therm_gk20a.h" 64#include "therm_gk20a.h"
65#include "gm20b/acr_gm20b.h" 65#include "gm20b/acr_gm20b.h"
66#include "gk20a/bus_gk20a.h"
66#include "cde_gk20a.h" 67#include "cde_gk20a.h"
67#include "sched_gk20a.h" 68#include "sched_gk20a.h"
68#ifdef CONFIG_ARCH_TEGRA_18x_SOC 69#ifdef CONFIG_ARCH_TEGRA_18x_SOC
@@ -905,6 +906,9 @@ struct gpu_ops {
905 void (*init_hw)(struct gk20a *g); 906 void (*init_hw)(struct gk20a *g);
906 void (*isr)(struct gk20a *g); 907 void (*isr)(struct gk20a *g);
907 int (*read_ptimer)(struct gk20a *g, u64 *value); 908 int (*read_ptimer)(struct gk20a *g, u64 *value);
909 int (*get_timestamps_zipper)(struct gk20a *g,
910 u32 source_id, u32 count,
911 struct nvgpu_cpu_time_correlation_sample *);
908 int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst); 912 int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst);
909 } bus; 913 } bus;
910 914
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index 7b414196..39778c55 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -58,5 +58,6 @@ void gm20b_init_bus(struct gpu_ops *gops)
58 gops->bus.init_hw = gk20a_bus_init_hw; 58 gops->bus.init_hw = gk20a_bus_init_hw;
59 gops->bus.isr = gk20a_bus_isr; 59 gops->bus.isr = gk20a_bus_isr;
60 gops->bus.read_ptimer = gk20a_read_ptimer; 60 gops->bus.read_ptimer = gk20a_read_ptimer;
61 gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
61 gops->bus.bar1_bind = gm20b_bus_bar1_bind; 62 gops->bus.bar1_bind = gm20b_bus_bar1_bind;
62} 63}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index cdd0d378..a8c28826 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -367,6 +367,48 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
367 return err; 367 return err;
368} 368}
369 369
370int vgpu_get_timestamps_zipper(struct gk20a *g,
371 u32 source_id, u32 count,
372 struct nvgpu_cpu_time_correlation_sample *samples)
373{
374 struct tegra_vgpu_cmd_msg msg = {0};
375 struct tegra_vgpu_get_timestamps_zipper_params *p =
376 &msg.params.get_timestamps_zipper;
377 int err;
378 u32 i;
379
380 gk20a_dbg_fn("");
381
382 if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
383 nvgpu_err(g, "count %u overflow", count);
384 return -EINVAL;
385 }
386
387 if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) {
388 nvgpu_err(g, "source_id %u not supported", source_id);
389 return -EINVAL;
390 }
391
392 msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
393 msg.handle = vgpu_get_handle(g);
394 p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
395 p->count = count;
396
397 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
398 err = err ? err : msg.ret;
399 if (err) {
400 nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err);
401 return err;
402 }
403
404 for (i = 0; i < count; i++) {
405 samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
406 samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
407 }
408
409 return err;
410}
411
370void vgpu_init_hal_common(struct gk20a *g) 412void vgpu_init_hal_common(struct gk20a *g)
371{ 413{
372 struct gpu_ops *gops = &g->ops; 414 struct gpu_ops *gops = &g->ops;
@@ -384,6 +426,7 @@ void vgpu_init_hal_common(struct gk20a *g)
384#endif 426#endif
385 gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; 427 gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
386 gops->bus.read_ptimer = vgpu_read_ptimer; 428 gops->bus.read_ptimer = vgpu_read_ptimer;
429 gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper;
387} 430}
388 431
389static int vgpu_init_hal(struct gk20a *g) 432static int vgpu_init_hal(struct gk20a *g)