summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2016-06-02 20:17:14 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-16 17:06:46 -0400
commit86225cb04eb040a83400d5ad6619b3c2318a53f8 (patch)
tree3c61b7b6831ee2f01c9977d77b3ac9bbaf0f3407 /drivers/gpu/nvgpu
parent03164b0f4abeb06d4b5207531b97f11fe4a860dd (diff)
gpu: nvgpu: add read_ptimer to gops
Move all places that read ptimer to use the callback. It's for add vgpu implementation of read ptimer. Bug 1395833 Change-Id: Ia339f2f08d75ca4969a443fffc9a61cff1d3d2b7 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: http://git-master/r/1159587 (cherry picked from commit a01f804684f875c9cffc31eb2c1038f2f29ec66f) Reviewed-on: http://git-master/r/1158449 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c43
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c1
6 files changed, 47 insertions, 50 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 39581eb2..3e34b6b8 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -626,50 +626,17 @@ static int nvgpu_gpu_get_gpu_time(
626 struct gk20a *g, 626 struct gk20a *g,
627 struct nvgpu_gpu_get_gpu_time_args *args) 627 struct nvgpu_gpu_get_gpu_time_args *args)
628{ 628{
629 int err = 0; 629 u64 time;
630 const unsigned int max_iterations = 3; 630 int err;
631 unsigned int i = 0;
632 u32 gpu_timestamp_hi_prev = 0;
633 631
634 err = gk20a_busy(g->dev); 632 err = gk20a_busy(g->dev);
635 if (err) 633 if (err)
636 return err; 634 return err;
637 635
638 /* Note. The GPU nanosecond timer consists of two 32-bit 636 err = g->ops.read_ptimer(g, &time);
639 * registers (high & low). To detect a possible low register 637 if (!err)
640 * wrap-around between the reads, we need to read the high 638 args->gpu_timestamp = time;
641 * register before and after low. The wraparound happens
642 * approximately once per 4 secs. */
643 639
644 /* get initial gpu_timestamp_hi value */
645 gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
646
647 for (i = 0; i < max_iterations; ++i) {
648 u32 gpu_timestamp_hi = 0;
649 u32 gpu_timestamp_lo = 0;
650
651 rmb(); /* maintain read order */
652 gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
653 rmb(); /* maintain read order */
654 gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
655
656 if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
657 args->gpu_timestamp =
658 (((u64)gpu_timestamp_hi) << 32) |
659 gpu_timestamp_lo;
660 goto clean_up;
661 }
662
663 /* wrap-around detected, retry */
664 gpu_timestamp_hi_prev = gpu_timestamp_hi;
665 }
666
667 /* too many iterations, bail out */
668 gk20a_err(dev_from_gk20a(g),
669 "Failed to read GPU time. Clock or bus unstable?\n");
670 err = -EBUSY;
671
672clean_up:
673 gk20a_idle(g->dev); 640 gk20a_idle(g->dev);
674 return err; 641 return err;
675} 642}
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 3682d904..aadbf7af 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -656,7 +656,6 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
656 struct nvgpu_ctxsw_trace_entry entry = { 656 struct nvgpu_ctxsw_trace_entry entry = {
657 .vmid = 0, 657 .vmid = 0,
658 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, 658 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
659 .timestamp = gk20a_read_ptimer(g),
660 .context_id = 0, 659 .context_id = 0,
661 .pid = ch->pid, 660 .pid = ch->pid,
662 }; 661 };
@@ -664,6 +663,7 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
664 if (!g->ctxsw_trace) 663 if (!g->ctxsw_trace)
665 return; 664 return;
666 665
666 g->ops.read_ptimer(g, &entry.timestamp);
667 gk20a_ctxsw_trace_write(g, &entry); 667 gk20a_ctxsw_trace_write(g, &entry);
668 gk20a_ctxsw_trace_wake_up(g, 0); 668 gk20a_ctxsw_trace_wake_up(g, 0);
669#endif 669#endif
@@ -676,7 +676,6 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
676 struct nvgpu_ctxsw_trace_entry entry = { 676 struct nvgpu_ctxsw_trace_entry entry = {
677 .vmid = 0, 677 .vmid = 0,
678 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, 678 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
679 .timestamp = gk20a_read_ptimer(g),
680 .context_id = 0, 679 .context_id = 0,
681 .pid = 0, 680 .pid = 0,
682 }; 681 };
@@ -685,6 +684,7 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
685 if (!g->ctxsw_trace) 684 if (!g->ctxsw_trace)
686 return; 685 return;
687 686
687 g->ops.read_ptimer(g, &entry.timestamp);
688 mutex_lock(&tsg->ch_list_lock); 688 mutex_lock(&tsg->ch_list_lock);
689 if (!list_empty(&tsg->ch_list)) { 689 if (!list_empty(&tsg->ch_list)) {
690 ch = list_entry(tsg->ch_list.next, 690 ch = list_entry(tsg->ch_list.next,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index d21d0527..db77d40f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2227,18 +2227,45 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name)
2227 return fw; 2227 return fw;
2228} 2228}
2229 2229
2230 2230int gk20a_read_ptimer(struct gk20a *g, u64 *value)
2231u64 gk20a_read_ptimer(struct gk20a *g)
2232{ 2231{
2233 u32 time_hi0 = gk20a_readl(g, timer_time_1_r()); 2232 const unsigned int max_iterations = 3;
2234 u32 time_lo = gk20a_readl(g, timer_time_0_r()); 2233 unsigned int i = 0;
2235 u32 time_hi1 = gk20a_readl(g, timer_time_1_r()); 2234 u32 gpu_timestamp_hi_prev = 0;
2236 u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1;
2237 u64 time = ((u64)time_hi << 32) | time_lo;
2238 2235
2239 return time; 2236 if (!value)
2240} 2237 return -EINVAL;
2238
2239 /* Note. The GPU nanosecond timer consists of two 32-bit
2240 * registers (high & low). To detect a possible low register
2241 * wrap-around between the reads, we need to read the high
2242 * register before and after low. The wraparound happens
2243 * approximately once per 4 secs. */
2244
2245 /* get initial gpu_timestamp_hi value */
2246 gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
2247
2248 for (i = 0; i < max_iterations; ++i) {
2249 u32 gpu_timestamp_hi = 0;
2250 u32 gpu_timestamp_lo = 0;
2251
2252 gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
2253 gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
2241 2254
2255 if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
2256 *value = (((u64)gpu_timestamp_hi) << 32) |
2257 gpu_timestamp_lo;
2258 return 0;
2259 }
2260
2261 /* wrap-around detected, retry */
2262 gpu_timestamp_hi_prev = gpu_timestamp_hi;
2263 }
2264
2265 /* too many iterations, bail out */
2266 gk20a_err(dev_from_gk20a(g), "failed to read ptimer");
2267 return -EBUSY;
2268}
2242 2269
2243MODULE_LICENSE("GPL v2"); 2270MODULE_LICENSE("GPL v2");
2244module_init(gk20a_init); 2271module_init(gk20a_init);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4ede8982..e639ace4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -620,6 +620,7 @@ struct gpu_ops {
620 620
621 int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value); 621 int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value);
622 int (*chip_init_gpu_characteristics)(struct gk20a *g); 622 int (*chip_init_gpu_characteristics)(struct gk20a *g);
623 int (*read_ptimer)(struct gk20a *g, u64 *value);
623 624
624 struct { 625 struct {
625 int (*init)(struct gk20a *g); 626 int (*init)(struct gk20a *g);
@@ -1111,7 +1112,7 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
1111 return (timeout * 10) / scale10x; 1112 return (timeout * 10) / scale10x;
1112} 1113}
1113 1114
1114u64 gk20a_read_ptimer(struct gk20a *g); 1115int gk20a_read_ptimer(struct gk20a *g, u64 *value);
1115extern struct class nvgpu_class; 1116extern struct class nvgpu_class;
1116 1117
1117#define INTERFACE_NAME "nvhost%s-gpu" 1118#define INTERFACE_NAME "nvhost%s-gpu"
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index bf58d298..99c55992 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -150,6 +150,7 @@ int gk20a_init_hal(struct gk20a *g)
150 gops->name = "gk20a"; 150 gops->name = "gk20a";
151 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; 151 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
152 gops->get_litter_value = gk20a_get_litter_value; 152 gops->get_litter_value = gk20a_get_litter_value;
153 gops->read_ptimer = gk20a_read_ptimer;
153 154
154 c->twod_class = FERMI_TWOD_A; 155 c->twod_class = FERMI_TWOD_A;
155 c->threed_class = KEPLER_C; 156 c->threed_class = KEPLER_C;
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index cbd3f50b..4f2fffc8 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -218,6 +218,7 @@ int gm20b_init_hal(struct gk20a *g)
218 gops->name = "gm20b"; 218 gops->name = "gm20b";
219 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; 219 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
220 gops->get_litter_value = gm20b_get_litter_value; 220 gops->get_litter_value = gm20b_get_litter_value;
221 gops->read_ptimer = gk20a_read_ptimer;
221 222
222 c->twod_class = FERMI_TWOD_A; 223 c->twod_class = FERMI_TWOD_A;
223 c->threed_class = MAXWELL_B; 224 c->threed_class = MAXWELL_B;