From 741e5c45179db066ddf5bed0be6f36e4d0d4010e Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Thu, 27 Apr 2017 11:28:27 -0700 Subject: gpu: nvgpu: hal for timestamps correlation In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1472447 (cherry picked from commit 56f56b5cd9d2e75cf7d2613b5e115bfebdbee0ce) Reviewed-on: http://git-master/r/1489183 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 78 +++++++---------------------- drivers/gpu/nvgpu/gk20a/bus_gk20a.c | 61 ++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/bus_gk20a.h | 9 ++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 4 ++ drivers/gpu/nvgpu/gm20b/bus_gm20b.c | 1 + drivers/gpu/nvgpu/vgpu/vgpu.c | 43 ++++++++++++++++ include/linux/tegra_vgpu.h | 18 +++++++ 7 files changed, 154 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index 90a3fbd4..025a30fe 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c @@ -533,76 +533,34 @@ static int gk20a_ctrl_get_buffer_info( &args->out.id, &args->out.length); } -static inline u64 get_cpu_timestamp_tsc(void) -{ - return ((u64) get_cycles()); -} - -static inline u64 get_cpu_timestamp_jiffies(void) -{ - return (get_jiffies_64() - INITIAL_JIFFIES); -} - -static inline u64 get_cpu_timestamp_timeofday(void) -{ - struct timeval tv; - - do_gettimeofday(&tv); - return timeval_to_jiffies(&tv); -} - -static inline int get_timestamps_zipper(struct gk20a *g, - u64 (*get_cpu_timestamp)(void), - struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) -{ - int err = 0; - unsigned int i = 0; - - if (gk20a_busy(g)) { - nvgpu_err(g, "GPU not powered on"); - err = -EINVAL; - goto end; - } - - for (i = 0; i < args->count; i++) { - err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp); - if (err) - return err; - - args->samples[i].cpu_timestamp = get_cpu_timestamp(); - } - -end: - gk20a_idle(g); - return err; -} - static int nvgpu_gpu_get_cpu_time_correlation_info( struct gk20a *g, struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) { - int err = 0; - u64 (*get_cpu_timestamp)(void) = NULL; + struct nvgpu_cpu_time_correlation_sample *samples; + int err; + u32 i; if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) return -EINVAL; - switch (args->source_id) { - case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC: - get_cpu_timestamp = get_cpu_timestamp_tsc; - break; - case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES: - get_cpu_timestamp = get_cpu_timestamp_jiffies; - break; - case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY: - get_cpu_timestamp = get_cpu_timestamp_timeofday; - break; - default: - nvgpu_err(g, "invalid cpu clock source id"); - return -EINVAL; + samples = nvgpu_kzalloc(g, args->count * + sizeof(struct nvgpu_cpu_time_correlation_sample)); + if (!samples) { + return -ENOMEM; } - err = get_timestamps_zipper(g, get_cpu_timestamp, args); + err = g->ops.bus.get_timestamps_zipper(g, + args->source_id, args->count, samples); + if (!err) { + for (i = 0; i < args->count; i++) { + args->samples[i].cpu_timestamp = samples[i].cpu_timestamp; + args->samples[i].gpu_timestamp = samples[i].gpu_timestamp; + } + } + + nvgpu_kfree(g, samples); + return err; } diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index 52ef08e4..7f0ca013 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c @@ -128,6 +128,66 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value) return -EBUSY; } +static inline u64 get_cpu_timestamp_tsc(void) +{ + return ((u64) get_cycles()); +} + +static inline u64 get_cpu_timestamp_jiffies(void) +{ + return (get_jiffies_64() - INITIAL_JIFFIES); +} + +static inline u64 get_cpu_timestamp_timeofday(void) +{ + struct timeval tv; + + do_gettimeofday(&tv); + return timeval_to_jiffies(&tv); +} + +int gk20a_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples) +{ + int err = 0; + unsigned int i = 0; + u64 (*get_cpu_timestamp)(void) = NULL; + + switch (source_id) { + case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC: + get_cpu_timestamp = get_cpu_timestamp_tsc; + break; + case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES: + get_cpu_timestamp = get_cpu_timestamp_jiffies; + break; + case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY: + get_cpu_timestamp = get_cpu_timestamp_timeofday; + break; + default: + nvgpu_err(g, "invalid cpu clock source id\n"); + return -EINVAL; + } + + if (gk20a_busy(g)) { + nvgpu_err(g, "GPU not powered on\n"); + err = -EINVAL; + goto end; + } + + for (i = 0; i < count; i++) { + err = g->ops.bus.read_ptimer(g, &samples[i].gpu_timestamp); + if (err) + return err; + + samples[i].cpu_timestamp = get_cpu_timestamp(); + } + +end: + gk20a_idle(g); + return err; +} + static int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) { u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); @@ -150,5 +210,6 @@ void gk20a_init_bus(struct gpu_ops *gops) gops->bus.init_hw = gk20a_bus_init_hw; gops->bus.isr = gk20a_bus_isr; gops->bus.read_ptimer = gk20a_read_ptimer; + gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper; gops->bus.bar1_bind = gk20a_bus_bar1_bind; } diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/bus_gk20a.h index 72710c35..088c385e 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.h @@ -22,10 +22,19 @@ struct gk20a; struct gpu_ops; struct nvgpu_mem; +struct nvgpu_cpu_time_correlation_sample { + u64 cpu_timestamp; + u64 gpu_timestamp; +}; + void gk20a_init_bus(struct gpu_ops *gops); void gk20a_bus_isr(struct gk20a *g); int gk20a_read_ptimer(struct gk20a *g, u64 *value); void gk20a_bus_init_hw(struct gk20a *g); +int gk20a_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples); + #endif /* GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 34f89fc8..d1a55104 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -63,6 +63,7 @@ struct nvgpu_nvhost_dev; #include "priv_ring_gk20a.h" #include "therm_gk20a.h" #include "gm20b/acr_gm20b.h" +#include "gk20a/bus_gk20a.h" #include "cde_gk20a.h" #include "sched_gk20a.h" #ifdef CONFIG_ARCH_TEGRA_18x_SOC @@ -905,6 +906,9 @@ struct gpu_ops { void (*init_hw)(struct gk20a *g); void (*isr)(struct gk20a *g); int (*read_ptimer)(struct gk20a *g, u64 *value); + int (*get_timestamps_zipper)(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *); int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst); } bus; diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c index 7b414196..39778c55 100644 --- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c @@ -58,5 +58,6 @@ void gm20b_init_bus(struct gpu_ops *gops) gops->bus.init_hw = gk20a_bus_init_hw; gops->bus.isr = gk20a_bus_isr; gops->bus.read_ptimer = gk20a_read_ptimer; + gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper; gops->bus.bar1_bind = gm20b_bus_bar1_bind; } diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index cdd0d378..a8c28826 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -367,6 +367,48 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value) return err; } +int vgpu_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_get_timestamps_zipper_params *p = + &msg.params.get_timestamps_zipper; + int err; + u32 i; + + gk20a_dbg_fn(""); + + if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) { + nvgpu_err(g, "count %u overflow", count); + return -EINVAL; + } + + if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) { + nvgpu_err(g, "source_id %u not supported", source_id); + return -EINVAL; + } + + msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER; + msg.handle = vgpu_get_handle(g); + p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC; + p->count = count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err); + return err; + } + + for (i = 0; i < count; i++) { + samples[i].cpu_timestamp = p->samples[i].cpu_timestamp; + samples[i].gpu_timestamp = p->samples[i].gpu_timestamp; + } + + return err; +} + void vgpu_init_hal_common(struct gk20a *g) { struct gpu_ops *gops = &g->ops; @@ -384,6 +426,7 @@ void vgpu_init_hal_common(struct gk20a *g) #endif gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; gops->bus.read_ptimer = vgpu_read_ptimer; + gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper; } static int vgpu_init_hal(struct gk20a *g) diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index daef7d98..67f51806 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h @@ -101,6 +101,7 @@ enum { TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67, TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE = 68, TEGRA_VGPU_CMD_PROF_MGT = 72, + TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74, }; struct tegra_vgpu_connect_params { @@ -389,6 +390,22 @@ struct tegra_vgpu_read_ptimer_params { u64 time; }; +#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT 16 +#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC 1 +struct tegra_vgpu_get_timestamps_zipper_params { + /* timestamp pairs */ + struct { + /* gpu timestamp value */ + u64 cpu_timestamp; + /* raw GPU counter (PTIMER) value */ + u64 gpu_timestamp; + } samples[TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT]; + /* number of pairs to read */ + u32 count; + /* cpu clock source id */ + u32 source_id; +}; + struct tegra_vgpu_set_powergate_params { u32 mode; }; @@ -518,6 +535,7 @@ struct tegra_vgpu_cmd_msg { struct tegra_vgpu_suspend_resume_contexts resume_contexts; struct tegra_vgpu_clear_sm_error_state clear_sm_error_state; struct tegra_vgpu_prof_mgt_params prof_management; + struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper; char padding[192]; } params; }; -- cgit v1.2.2