diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 43 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 45 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 |
6 files changed, 47 insertions, 50 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 39581eb2..3e34b6b8 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -626,50 +626,17 @@ static int nvgpu_gpu_get_gpu_time( | |||
626 | struct gk20a *g, | 626 | struct gk20a *g, |
627 | struct nvgpu_gpu_get_gpu_time_args *args) | 627 | struct nvgpu_gpu_get_gpu_time_args *args) |
628 | { | 628 | { |
629 | int err = 0; | 629 | u64 time; |
630 | const unsigned int max_iterations = 3; | 630 | int err; |
631 | unsigned int i = 0; | ||
632 | u32 gpu_timestamp_hi_prev = 0; | ||
633 | 631 | ||
634 | err = gk20a_busy(g->dev); | 632 | err = gk20a_busy(g->dev); |
635 | if (err) | 633 | if (err) |
636 | return err; | 634 | return err; |
637 | 635 | ||
638 | /* Note. The GPU nanosecond timer consists of two 32-bit | 636 | err = g->ops.read_ptimer(g, &time); |
639 | * registers (high & low). To detect a possible low register | 637 | if (!err) |
640 | * wrap-around between the reads, we need to read the high | 638 | args->gpu_timestamp = time; |
641 | * register before and after low. The wraparound happens | ||
642 | * approximately once per 4 secs. */ | ||
643 | 639 | ||
644 | /* get initial gpu_timestamp_hi value */ | ||
645 | gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r()); | ||
646 | |||
647 | for (i = 0; i < max_iterations; ++i) { | ||
648 | u32 gpu_timestamp_hi = 0; | ||
649 | u32 gpu_timestamp_lo = 0; | ||
650 | |||
651 | rmb(); /* maintain read order */ | ||
652 | gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r()); | ||
653 | rmb(); /* maintain read order */ | ||
654 | gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r()); | ||
655 | |||
656 | if (gpu_timestamp_hi == gpu_timestamp_hi_prev) { | ||
657 | args->gpu_timestamp = | ||
658 | (((u64)gpu_timestamp_hi) << 32) | | ||
659 | gpu_timestamp_lo; | ||
660 | goto clean_up; | ||
661 | } | ||
662 | |||
663 | /* wrap-around detected, retry */ | ||
664 | gpu_timestamp_hi_prev = gpu_timestamp_hi; | ||
665 | } | ||
666 | |||
667 | /* too many iterations, bail out */ | ||
668 | gk20a_err(dev_from_gk20a(g), | ||
669 | "Failed to read GPU time. Clock or bus unstable?\n"); | ||
670 | err = -EBUSY; | ||
671 | |||
672 | clean_up: | ||
673 | gk20a_idle(g->dev); | 640 | gk20a_idle(g->dev); |
674 | return err; | 641 | return err; |
675 | } | 642 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 3682d904..aadbf7af 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -656,7 +656,6 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | |||
656 | struct nvgpu_ctxsw_trace_entry entry = { | 656 | struct nvgpu_ctxsw_trace_entry entry = { |
657 | .vmid = 0, | 657 | .vmid = 0, |
658 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | 658 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, |
659 | .timestamp = gk20a_read_ptimer(g), | ||
660 | .context_id = 0, | 659 | .context_id = 0, |
661 | .pid = ch->pid, | 660 | .pid = ch->pid, |
662 | }; | 661 | }; |
@@ -664,6 +663,7 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | |||
664 | if (!g->ctxsw_trace) | 663 | if (!g->ctxsw_trace) |
665 | return; | 664 | return; |
666 | 665 | ||
666 | g->ops.read_ptimer(g, &entry.timestamp); | ||
667 | gk20a_ctxsw_trace_write(g, &entry); | 667 | gk20a_ctxsw_trace_write(g, &entry); |
668 | gk20a_ctxsw_trace_wake_up(g, 0); | 668 | gk20a_ctxsw_trace_wake_up(g, 0); |
669 | #endif | 669 | #endif |
@@ -676,7 +676,6 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | |||
676 | struct nvgpu_ctxsw_trace_entry entry = { | 676 | struct nvgpu_ctxsw_trace_entry entry = { |
677 | .vmid = 0, | 677 | .vmid = 0, |
678 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | 678 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, |
679 | .timestamp = gk20a_read_ptimer(g), | ||
680 | .context_id = 0, | 679 | .context_id = 0, |
681 | .pid = 0, | 680 | .pid = 0, |
682 | }; | 681 | }; |
@@ -685,6 +684,7 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | |||
685 | if (!g->ctxsw_trace) | 684 | if (!g->ctxsw_trace) |
686 | return; | 685 | return; |
687 | 686 | ||
687 | g->ops.read_ptimer(g, &entry.timestamp); | ||
688 | mutex_lock(&tsg->ch_list_lock); | 688 | mutex_lock(&tsg->ch_list_lock); |
689 | if (!list_empty(&tsg->ch_list)) { | 689 | if (!list_empty(&tsg->ch_list)) { |
690 | ch = list_entry(tsg->ch_list.next, | 690 | ch = list_entry(tsg->ch_list.next, |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index d21d0527..db77d40f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -2227,18 +2227,45 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name) | |||
2227 | return fw; | 2227 | return fw; |
2228 | } | 2228 | } |
2229 | 2229 | ||
2230 | 2230 | int gk20a_read_ptimer(struct gk20a *g, u64 *value) | |
2231 | u64 gk20a_read_ptimer(struct gk20a *g) | ||
2232 | { | 2231 | { |
2233 | u32 time_hi0 = gk20a_readl(g, timer_time_1_r()); | 2232 | const unsigned int max_iterations = 3; |
2234 | u32 time_lo = gk20a_readl(g, timer_time_0_r()); | 2233 | unsigned int i = 0; |
2235 | u32 time_hi1 = gk20a_readl(g, timer_time_1_r()); | 2234 | u32 gpu_timestamp_hi_prev = 0; |
2236 | u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1; | ||
2237 | u64 time = ((u64)time_hi << 32) | time_lo; | ||
2238 | 2235 | ||
2239 | return time; | 2236 | if (!value) |
2240 | } | 2237 | return -EINVAL; |
2238 | |||
2239 | /* Note. The GPU nanosecond timer consists of two 32-bit | ||
2240 | * registers (high & low). To detect a possible low register | ||
2241 | * wrap-around between the reads, we need to read the high | ||
2242 | * register before and after low. The wraparound happens | ||
2243 | * approximately once per 4 secs. */ | ||
2244 | |||
2245 | /* get initial gpu_timestamp_hi value */ | ||
2246 | gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r()); | ||
2247 | |||
2248 | for (i = 0; i < max_iterations; ++i) { | ||
2249 | u32 gpu_timestamp_hi = 0; | ||
2250 | u32 gpu_timestamp_lo = 0; | ||
2251 | |||
2252 | gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r()); | ||
2253 | gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r()); | ||
2241 | 2254 | ||
2255 | if (gpu_timestamp_hi == gpu_timestamp_hi_prev) { | ||
2256 | *value = (((u64)gpu_timestamp_hi) << 32) | | ||
2257 | gpu_timestamp_lo; | ||
2258 | return 0; | ||
2259 | } | ||
2260 | |||
2261 | /* wrap-around detected, retry */ | ||
2262 | gpu_timestamp_hi_prev = gpu_timestamp_hi; | ||
2263 | } | ||
2264 | |||
2265 | /* too many iterations, bail out */ | ||
2266 | gk20a_err(dev_from_gk20a(g), "failed to read ptimer"); | ||
2267 | return -EBUSY; | ||
2268 | } | ||
2242 | 2269 | ||
2243 | MODULE_LICENSE("GPL v2"); | 2270 | MODULE_LICENSE("GPL v2"); |
2244 | module_init(gk20a_init); | 2271 | module_init(gk20a_init); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 4ede8982..e639ace4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -620,6 +620,7 @@ struct gpu_ops { | |||
620 | 620 | ||
621 | int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value); | 621 | int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value); |
622 | int (*chip_init_gpu_characteristics)(struct gk20a *g); | 622 | int (*chip_init_gpu_characteristics)(struct gk20a *g); |
623 | int (*read_ptimer)(struct gk20a *g, u64 *value); | ||
623 | 624 | ||
624 | struct { | 625 | struct { |
625 | int (*init)(struct gk20a *g); | 626 | int (*init)(struct gk20a *g); |
@@ -1111,7 +1112,7 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x) | |||
1111 | return (timeout * 10) / scale10x; | 1112 | return (timeout * 10) / scale10x; |
1112 | } | 1113 | } |
1113 | 1114 | ||
1114 | u64 gk20a_read_ptimer(struct gk20a *g); | 1115 | int gk20a_read_ptimer(struct gk20a *g, u64 *value); |
1115 | extern struct class nvgpu_class; | 1116 | extern struct class nvgpu_class; |
1116 | 1117 | ||
1117 | #define INTERFACE_NAME "nvhost%s-gpu" | 1118 | #define INTERFACE_NAME "nvhost%s-gpu" |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index bf58d298..99c55992 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -150,6 +150,7 @@ int gk20a_init_hal(struct gk20a *g) | |||
150 | gops->name = "gk20a"; | 150 | gops->name = "gk20a"; |
151 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 151 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
152 | gops->get_litter_value = gk20a_get_litter_value; | 152 | gops->get_litter_value = gk20a_get_litter_value; |
153 | gops->read_ptimer = gk20a_read_ptimer; | ||
153 | 154 | ||
154 | c->twod_class = FERMI_TWOD_A; | 155 | c->twod_class = FERMI_TWOD_A; |
155 | c->threed_class = KEPLER_C; | 156 | c->threed_class = KEPLER_C; |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index cbd3f50b..4f2fffc8 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -218,6 +218,7 @@ int gm20b_init_hal(struct gk20a *g) | |||
218 | gops->name = "gm20b"; | 218 | gops->name = "gm20b"; |
219 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 219 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
220 | gops->get_litter_value = gm20b_get_litter_value; | 220 | gops->get_litter_value = gm20b_get_litter_value; |
221 | gops->read_ptimer = gk20a_read_ptimer; | ||
221 | 222 | ||
222 | c->twod_class = FERMI_TWOD_A; | 223 | c->twod_class = FERMI_TWOD_A; |
223 | c->threed_class = MAXWELL_B; | 224 | c->threed_class = MAXWELL_B; |