diff options
author | Rob Clark <robdclark@gmail.com> | 2014-05-30 14:49:43 -0400 |
---|---|---|
committer | Rob Clark <robdclark@gmail.com> | 2014-06-02 07:36:21 -0400 |
commit | 70c70f091b1ffd16b3e1a439bd595f7d539b1d5d (patch) | |
tree | 658865a8a0b0b18a57a967989b034af84f0b0765 | |
parent | a7d3c9509b2fecf8e593f3c933ab302cbe987d2e (diff) |
drm/msm: add perf logging debugfs
Signed-off-by: Rob Clark <robdclark@gmail.com>
-rw-r--r-- | drivers/gpu/drm/msm/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 103 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.h | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_perf.c | 275 |
7 files changed, 436 insertions, 5 deletions
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 9a506b5921e5..93ca49c8df44 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile | |||
@@ -34,6 +34,7 @@ msm-y := \ | |||
34 | msm_gem_submit.o \ | 34 | msm_gem_submit.o \ |
35 | msm_gpu.o \ | 35 | msm_gpu.o \ |
36 | msm_iommu.o \ | 36 | msm_iommu.o \ |
37 | msm_perf.o \ | ||
37 | msm_rd.o \ | 38 | msm_rd.o \ |
38 | msm_ringbuffer.o | 39 | msm_ringbuffer.o |
39 | 40 | ||
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index f20fbde5dc49..942e09d898a8 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c | |||
@@ -207,11 +207,11 @@ static int a3xx_hw_init(struct msm_gpu *gpu) | |||
207 | /* Turn on performance counters: */ | 207 | /* Turn on performance counters: */ |
208 | gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); | 208 | gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); |
209 | 209 | ||
210 | /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS | 210 | /* Enable the perfcntrs that we use.. */ |
211 | * we will use this to augment our hang detection: | 211 | for (i = 0; i < gpu->num_perfcntrs; i++) { |
212 | */ | 212 | const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; |
213 | gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, | 213 | gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); |
214 | SP_FS_FULL_ALU_INSTRUCTIONS); | 214 | } |
215 | 215 | ||
216 | gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); | 216 | gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); |
217 | 217 | ||
@@ -465,6 +465,13 @@ static const struct adreno_gpu_funcs funcs = { | |||
465 | }, | 465 | }, |
466 | }; | 466 | }; |
467 | 467 | ||
468 | static const struct msm_gpu_perfcntr perfcntrs[] = { | ||
469 | { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, | ||
470 | SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, | ||
471 | { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, | ||
472 | SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, | ||
473 | }; | ||
474 | |||
468 | struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) | 475 | struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) |
469 | { | 476 | { |
470 | struct a3xx_gpu *a3xx_gpu = NULL; | 477 | struct a3xx_gpu *a3xx_gpu = NULL; |
@@ -504,6 +511,9 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) | |||
504 | DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", | 511 | DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", |
505 | gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); | 512 | gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); |
506 | 513 | ||
514 | gpu->perfcntrs = perfcntrs; | ||
515 | gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); | ||
516 | |||
507 | ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev); | 517 | ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev); |
508 | if (ret) | 518 | if (ret) |
509 | goto fail; | 519 | goto fail; |
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 929f57343440..1d5151ab81e7 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c | |||
@@ -548,6 +548,12 @@ static int late_init_minor(struct drm_minor *minor) | |||
548 | return ret; | 548 | return ret; |
549 | } | 549 | } |
550 | 550 | ||
551 | ret = msm_perf_debugfs_init(minor); | ||
552 | if (ret) { | ||
553 | dev_err(minor->dev->dev, "could not install perf debugfs\n"); | ||
554 | return ret; | ||
555 | } | ||
556 | |||
551 | return 0; | 557 | return 0; |
552 | } | 558 | } |
553 | 559 | ||
@@ -588,6 +594,7 @@ static void msm_debugfs_cleanup(struct drm_minor *minor) | |||
588 | if (!minor->dev->dev_private) | 594 | if (!minor->dev->dev_private) |
589 | return; | 595 | return; |
590 | msm_rd_debugfs_cleanup(minor); | 596 | msm_rd_debugfs_cleanup(minor); |
597 | msm_perf_debugfs_cleanup(minor); | ||
591 | } | 598 | } |
592 | #endif | 599 | #endif |
593 | 600 | ||
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index c9c71a0c3e6f..8a2c5fd0893e 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h | |||
@@ -56,6 +56,7 @@ struct msm_kms; | |||
56 | struct msm_gpu; | 56 | struct msm_gpu; |
57 | struct msm_mmu; | 57 | struct msm_mmu; |
58 | struct msm_rd_state; | 58 | struct msm_rd_state; |
59 | struct msm_perf_state; | ||
59 | struct msm_gem_submit; | 60 | struct msm_gem_submit; |
60 | 61 | ||
61 | #define NUM_DOMAINS 2 /* one for KMS, then one per gpu core (?) */ | 62 | #define NUM_DOMAINS 2 /* one for KMS, then one per gpu core (?) */ |
@@ -85,6 +86,7 @@ struct msm_drm_private { | |||
85 | wait_queue_head_t fence_event; | 86 | wait_queue_head_t fence_event; |
86 | 87 | ||
87 | struct msm_rd_state *rd; | 88 | struct msm_rd_state *rd; |
89 | struct msm_perf_state *perf; | ||
88 | 90 | ||
89 | /* list of GEM objects: */ | 91 | /* list of GEM objects: */ |
90 | struct list_head inactive_list; | 92 | struct list_head inactive_list; |
@@ -212,6 +214,8 @@ int msm_debugfs_late_init(struct drm_device *dev); | |||
212 | int msm_rd_debugfs_init(struct drm_minor *minor); | 214 | int msm_rd_debugfs_init(struct drm_minor *minor); |
213 | void msm_rd_debugfs_cleanup(struct drm_minor *minor); | 215 | void msm_rd_debugfs_cleanup(struct drm_minor *minor); |
214 | void msm_rd_dump_submit(struct msm_gem_submit *submit); | 216 | void msm_rd_dump_submit(struct msm_gem_submit *submit); |
217 | int msm_perf_debugfs_init(struct drm_minor *minor); | ||
218 | void msm_perf_debugfs_cleanup(struct drm_minor *minor); | ||
215 | #else | 219 | #else |
216 | static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } | 220 | static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } |
217 | static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {} | 221 | static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {} |
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 67371f3ddf99..c6322197db8c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c | |||
@@ -320,6 +320,101 @@ static void hangcheck_handler(unsigned long data) | |||
320 | } | 320 | } |
321 | 321 | ||
322 | /* | 322 | /* |
323 | * Performance Counters: | ||
324 | */ | ||
325 | |||
326 | /* called under perf_lock */ | ||
327 | static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) | ||
328 | { | ||
329 | uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; | ||
330 | int i, n = min(ncntrs, gpu->num_perfcntrs); | ||
331 | |||
332 | /* read current values: */ | ||
333 | for (i = 0; i < gpu->num_perfcntrs; i++) | ||
334 | current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); | ||
335 | |||
336 | /* update cntrs: */ | ||
337 | for (i = 0; i < n; i++) | ||
338 | cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; | ||
339 | |||
340 | /* save current values: */ | ||
341 | for (i = 0; i < gpu->num_perfcntrs; i++) | ||
342 | gpu->last_cntrs[i] = current_cntrs[i]; | ||
343 | |||
344 | return n; | ||
345 | } | ||
346 | |||
347 | static void update_sw_cntrs(struct msm_gpu *gpu) | ||
348 | { | ||
349 | ktime_t time; | ||
350 | uint32_t elapsed; | ||
351 | unsigned long flags; | ||
352 | |||
353 | spin_lock_irqsave(&gpu->perf_lock, flags); | ||
354 | if (!gpu->perfcntr_active) | ||
355 | goto out; | ||
356 | |||
357 | time = ktime_get(); | ||
358 | elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); | ||
359 | |||
360 | gpu->totaltime += elapsed; | ||
361 | if (gpu->last_sample.active) | ||
362 | gpu->activetime += elapsed; | ||
363 | |||
364 | gpu->last_sample.active = msm_gpu_active(gpu); | ||
365 | gpu->last_sample.time = time; | ||
366 | |||
367 | out: | ||
368 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | ||
369 | } | ||
370 | |||
371 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu) | ||
372 | { | ||
373 | unsigned long flags; | ||
374 | |||
375 | spin_lock_irqsave(&gpu->perf_lock, flags); | ||
376 | /* we could dynamically enable/disable perfcntr registers too.. */ | ||
377 | gpu->last_sample.active = msm_gpu_active(gpu); | ||
378 | gpu->last_sample.time = ktime_get(); | ||
379 | gpu->activetime = gpu->totaltime = 0; | ||
380 | gpu->perfcntr_active = true; | ||
381 | update_hw_cntrs(gpu, 0, NULL); | ||
382 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | ||
383 | } | ||
384 | |||
385 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) | ||
386 | { | ||
387 | gpu->perfcntr_active = false; | ||
388 | } | ||
389 | |||
390 | /* returns -errno or # of cntrs sampled */ | ||
391 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, | ||
392 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) | ||
393 | { | ||
394 | unsigned long flags; | ||
395 | int ret; | ||
396 | |||
397 | spin_lock_irqsave(&gpu->perf_lock, flags); | ||
398 | |||
399 | if (!gpu->perfcntr_active) { | ||
400 | ret = -EINVAL; | ||
401 | goto out; | ||
402 | } | ||
403 | |||
404 | *activetime = gpu->activetime; | ||
405 | *totaltime = gpu->totaltime; | ||
406 | |||
407 | gpu->activetime = gpu->totaltime = 0; | ||
408 | |||
409 | ret = update_hw_cntrs(gpu, ncntrs, cntrs); | ||
410 | |||
411 | out: | ||
412 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | ||
413 | |||
414 | return ret; | ||
415 | } | ||
416 | |||
417 | /* | ||
323 | * Cmdstream submission/retirement: | 418 | * Cmdstream submission/retirement: |
324 | */ | 419 | */ |
325 | 420 | ||
@@ -361,6 +456,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) | |||
361 | { | 456 | { |
362 | struct msm_drm_private *priv = gpu->dev->dev_private; | 457 | struct msm_drm_private *priv = gpu->dev->dev_private; |
363 | queue_work(priv->wq, &gpu->retire_work); | 458 | queue_work(priv->wq, &gpu->retire_work); |
459 | update_sw_cntrs(gpu); | ||
364 | } | 460 | } |
365 | 461 | ||
366 | /* add bo's to gpu's ring, and kick gpu: */ | 462 | /* add bo's to gpu's ring, and kick gpu: */ |
@@ -381,6 +477,8 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, | |||
381 | 477 | ||
382 | gpu->submitted_fence = submit->fence; | 478 | gpu->submitted_fence = submit->fence; |
383 | 479 | ||
480 | update_sw_cntrs(gpu); | ||
481 | |||
384 | ret = gpu->funcs->submit(gpu, submit, ctx); | 482 | ret = gpu->funcs->submit(gpu, submit, ctx); |
385 | priv->lastctx = ctx; | 483 | priv->lastctx = ctx; |
386 | 484 | ||
@@ -433,6 +531,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, | |||
433 | struct iommu_domain *iommu; | 531 | struct iommu_domain *iommu; |
434 | int i, ret; | 532 | int i, ret; |
435 | 533 | ||
534 | if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) | ||
535 | gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); | ||
536 | |||
436 | gpu->dev = drm; | 537 | gpu->dev = drm; |
437 | gpu->funcs = funcs; | 538 | gpu->funcs = funcs; |
438 | gpu->name = name; | 539 | gpu->name = name; |
@@ -448,6 +549,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, | |||
448 | setup_timer(&gpu->hangcheck_timer, hangcheck_handler, | 549 | setup_timer(&gpu->hangcheck_timer, hangcheck_handler, |
449 | (unsigned long)gpu); | 550 | (unsigned long)gpu); |
450 | 551 | ||
552 | spin_lock_init(&gpu->perf_lock); | ||
553 | |||
451 | BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); | 554 | BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); |
452 | 555 | ||
453 | /* Map registers: */ | 556 | /* Map registers: */ |
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index fad27008922f..9b579b792840 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "msm_ringbuffer.h" | 25 | #include "msm_ringbuffer.h" |
26 | 26 | ||
27 | struct msm_gem_submit; | 27 | struct msm_gem_submit; |
28 | struct msm_gpu_perfcntr; | ||
28 | 29 | ||
29 | /* So far, with hardware that I've seen to date, we can have: | 30 | /* So far, with hardware that I've seen to date, we can have: |
30 | * + zero, one, or two z180 2d cores | 31 | * + zero, one, or two z180 2d cores |
@@ -64,6 +65,18 @@ struct msm_gpu { | |||
64 | struct drm_device *dev; | 65 | struct drm_device *dev; |
65 | const struct msm_gpu_funcs *funcs; | 66 | const struct msm_gpu_funcs *funcs; |
66 | 67 | ||
68 | /* performance counters (hw & sw): */ | ||
69 | spinlock_t perf_lock; | ||
70 | bool perfcntr_active; | ||
71 | struct { | ||
72 | bool active; | ||
73 | ktime_t time; | ||
74 | } last_sample; | ||
75 | uint32_t totaltime, activetime; /* sw counters */ | ||
76 | uint32_t last_cntrs[5]; /* hw counters */ | ||
77 | const struct msm_gpu_perfcntr *perfcntrs; | ||
78 | uint32_t num_perfcntrs; | ||
79 | |||
67 | struct msm_ringbuffer *rb; | 80 | struct msm_ringbuffer *rb; |
68 | uint32_t rb_iova; | 81 | uint32_t rb_iova; |
69 | 82 | ||
@@ -113,6 +126,19 @@ static inline bool msm_gpu_active(struct msm_gpu *gpu) | |||
113 | return gpu->submitted_fence > gpu->funcs->last_fence(gpu); | 126 | return gpu->submitted_fence > gpu->funcs->last_fence(gpu); |
114 | } | 127 | } |
115 | 128 | ||
129 | /* Perf-Counters: | ||
130 | * The select_reg and select_val are just there for the benefit of the child | ||
131 | * class that actually enables the perf counter.. but msm_gpu base class | ||
132 | * will handle sampling/displaying the counters. | ||
133 | */ | ||
134 | |||
135 | struct msm_gpu_perfcntr { | ||
136 | uint32_t select_reg; | ||
137 | uint32_t sample_reg; | ||
138 | uint32_t select_val; | ||
139 | const char *name; | ||
140 | }; | ||
141 | |||
116 | static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) | 142 | static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) |
117 | { | 143 | { |
118 | msm_writel(data, gpu->mmio + (reg << 2)); | 144 | msm_writel(data, gpu->mmio + (reg << 2)); |
@@ -126,6 +152,11 @@ static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) | |||
126 | int msm_gpu_pm_suspend(struct msm_gpu *gpu); | 152 | int msm_gpu_pm_suspend(struct msm_gpu *gpu); |
127 | int msm_gpu_pm_resume(struct msm_gpu *gpu); | 153 | int msm_gpu_pm_resume(struct msm_gpu *gpu); |
128 | 154 | ||
155 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu); | ||
156 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); | ||
157 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, | ||
158 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); | ||
159 | |||
129 | void msm_gpu_retire(struct msm_gpu *gpu); | 160 | void msm_gpu_retire(struct msm_gpu *gpu); |
130 | int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, | 161 | int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
131 | struct msm_file_private *ctx); | 162 | struct msm_file_private *ctx); |
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c new file mode 100644 index 000000000000..830857c47c86 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_perf.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2013 Red Hat | ||
3 | * Author: Rob Clark <robdclark@gmail.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | /* For profiling, userspace can: | ||
19 | * | ||
20 | * tail -f /sys/kernel/debug/dri/<minor>/gpu | ||
21 | * | ||
22 | * This will enable performance counters/profiling to track the busy time | ||
23 | * and any gpu specific performance counters that are supported. | ||
24 | */ | ||
25 | |||
26 | #ifdef CONFIG_DEBUG_FS | ||
27 | |||
28 | #include <linux/debugfs.h> | ||
29 | |||
30 | #include "msm_drv.h" | ||
31 | #include "msm_gpu.h" | ||
32 | |||
33 | struct msm_perf_state { | ||
34 | struct drm_device *dev; | ||
35 | |||
36 | bool open; | ||
37 | int cnt; | ||
38 | struct mutex read_lock; | ||
39 | |||
40 | char buf[256]; | ||
41 | int buftot, bufpos; | ||
42 | |||
43 | unsigned long next_jiffies; | ||
44 | |||
45 | struct dentry *ent; | ||
46 | struct drm_info_node *node; | ||
47 | }; | ||
48 | |||
49 | #define SAMPLE_TIME (HZ/4) | ||
50 | |||
51 | /* wait for next sample time: */ | ||
52 | static int wait_sample(struct msm_perf_state *perf) | ||
53 | { | ||
54 | unsigned long start_jiffies = jiffies; | ||
55 | |||
56 | if (time_after(perf->next_jiffies, start_jiffies)) { | ||
57 | unsigned long remaining_jiffies = | ||
58 | perf->next_jiffies - start_jiffies; | ||
59 | int ret = schedule_timeout_interruptible(remaining_jiffies); | ||
60 | if (ret > 0) { | ||
61 | /* interrupted */ | ||
62 | return -ERESTARTSYS; | ||
63 | } | ||
64 | } | ||
65 | perf->next_jiffies += SAMPLE_TIME; | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static int refill_buf(struct msm_perf_state *perf) | ||
70 | { | ||
71 | struct msm_drm_private *priv = perf->dev->dev_private; | ||
72 | struct msm_gpu *gpu = priv->gpu; | ||
73 | char *ptr = perf->buf; | ||
74 | int rem = sizeof(perf->buf); | ||
75 | int i, n; | ||
76 | |||
77 | if ((perf->cnt++ % 32) == 0) { | ||
78 | /* Header line: */ | ||
79 | n = snprintf(ptr, rem, "%%BUSY"); | ||
80 | ptr += n; | ||
81 | rem -= n; | ||
82 | |||
83 | for (i = 0; i < gpu->num_perfcntrs; i++) { | ||
84 | const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; | ||
85 | n = snprintf(ptr, rem, "\t%s", perfcntr->name); | ||
86 | ptr += n; | ||
87 | rem -= n; | ||
88 | } | ||
89 | } else { | ||
90 | /* Sample line: */ | ||
91 | uint32_t activetime = 0, totaltime = 0; | ||
92 | uint32_t cntrs[5]; | ||
93 | uint32_t val; | ||
94 | int ret; | ||
95 | |||
96 | /* sleep until next sample time: */ | ||
97 | ret = wait_sample(perf); | ||
98 | if (ret) | ||
99 | return ret; | ||
100 | |||
101 | ret = msm_gpu_perfcntr_sample(gpu, &activetime, &totaltime, | ||
102 | ARRAY_SIZE(cntrs), cntrs); | ||
103 | if (ret < 0) | ||
104 | return ret; | ||
105 | |||
106 | val = totaltime ? 1000 * activetime / totaltime : 0; | ||
107 | n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10); | ||
108 | ptr += n; | ||
109 | rem -= n; | ||
110 | |||
111 | for (i = 0; i < ret; i++) { | ||
112 | /* cycle counters (I think).. convert to MHz.. */ | ||
113 | val = cntrs[i] / 10000; | ||
114 | n = snprintf(ptr, rem, "\t%5d.%02d", | ||
115 | val / 100, val % 100); | ||
116 | ptr += n; | ||
117 | rem -= n; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | n = snprintf(ptr, rem, "\n"); | ||
122 | ptr += n; | ||
123 | rem -= n; | ||
124 | |||
125 | perf->bufpos = 0; | ||
126 | perf->buftot = ptr - perf->buf; | ||
127 | |||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static ssize_t perf_read(struct file *file, char __user *buf, | ||
132 | size_t sz, loff_t *ppos) | ||
133 | { | ||
134 | struct msm_perf_state *perf = file->private_data; | ||
135 | int n = 0, ret; | ||
136 | |||
137 | mutex_lock(&perf->read_lock); | ||
138 | |||
139 | if (perf->bufpos >= perf->buftot) { | ||
140 | ret = refill_buf(perf); | ||
141 | if (ret) | ||
142 | goto out; | ||
143 | } | ||
144 | |||
145 | n = min((int)sz, perf->buftot - perf->bufpos); | ||
146 | ret = copy_to_user(buf, &perf->buf[perf->bufpos], n); | ||
147 | if (ret) | ||
148 | goto out; | ||
149 | |||
150 | perf->bufpos += n; | ||
151 | *ppos += n; | ||
152 | |||
153 | out: | ||
154 | mutex_unlock(&perf->read_lock); | ||
155 | if (ret) | ||
156 | return ret; | ||
157 | return n; | ||
158 | } | ||
159 | |||
160 | static int perf_open(struct inode *inode, struct file *file) | ||
161 | { | ||
162 | struct msm_perf_state *perf = inode->i_private; | ||
163 | struct drm_device *dev = perf->dev; | ||
164 | struct msm_drm_private *priv = dev->dev_private; | ||
165 | struct msm_gpu *gpu = priv->gpu; | ||
166 | int ret = 0; | ||
167 | |||
168 | mutex_lock(&dev->struct_mutex); | ||
169 | |||
170 | if (perf->open || !gpu) { | ||
171 | ret = -EBUSY; | ||
172 | goto out; | ||
173 | } | ||
174 | |||
175 | file->private_data = perf; | ||
176 | perf->open = true; | ||
177 | perf->cnt = 0; | ||
178 | perf->buftot = 0; | ||
179 | perf->bufpos = 0; | ||
180 | msm_gpu_perfcntr_start(gpu); | ||
181 | perf->next_jiffies = jiffies + SAMPLE_TIME; | ||
182 | |||
183 | out: | ||
184 | mutex_unlock(&dev->struct_mutex); | ||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | static int perf_release(struct inode *inode, struct file *file) | ||
189 | { | ||
190 | struct msm_perf_state *perf = inode->i_private; | ||
191 | struct msm_drm_private *priv = perf->dev->dev_private; | ||
192 | msm_gpu_perfcntr_stop(priv->gpu); | ||
193 | perf->open = false; | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | |||
198 | static const struct file_operations perf_debugfs_fops = { | ||
199 | .owner = THIS_MODULE, | ||
200 | .open = perf_open, | ||
201 | .read = perf_read, | ||
202 | .llseek = no_llseek, | ||
203 | .release = perf_release, | ||
204 | }; | ||
205 | |||
206 | int msm_perf_debugfs_init(struct drm_minor *minor) | ||
207 | { | ||
208 | struct msm_drm_private *priv = minor->dev->dev_private; | ||
209 | struct msm_perf_state *perf; | ||
210 | |||
211 | /* only create on first minor: */ | ||
212 | if (priv->perf) | ||
213 | return 0; | ||
214 | |||
215 | perf = kzalloc(sizeof(*perf), GFP_KERNEL); | ||
216 | if (!perf) | ||
217 | return -ENOMEM; | ||
218 | |||
219 | perf->dev = minor->dev; | ||
220 | |||
221 | mutex_init(&perf->read_lock); | ||
222 | priv->perf = perf; | ||
223 | |||
224 | perf->node = kzalloc(sizeof(*perf->node), GFP_KERNEL); | ||
225 | if (!perf->node) | ||
226 | goto fail; | ||
227 | |||
228 | perf->ent = debugfs_create_file("perf", S_IFREG | S_IRUGO, | ||
229 | minor->debugfs_root, perf, &perf_debugfs_fops); | ||
230 | if (!perf->ent) { | ||
231 | DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/perf\n", | ||
232 | minor->debugfs_root->d_name.name); | ||
233 | goto fail; | ||
234 | } | ||
235 | |||
236 | perf->node->minor = minor; | ||
237 | perf->node->dent = perf->ent; | ||
238 | perf->node->info_ent = NULL; | ||
239 | |||
240 | mutex_lock(&minor->debugfs_lock); | ||
241 | list_add(&perf->node->list, &minor->debugfs_list); | ||
242 | mutex_unlock(&minor->debugfs_lock); | ||
243 | |||
244 | return 0; | ||
245 | |||
246 | fail: | ||
247 | msm_perf_debugfs_cleanup(minor); | ||
248 | return -1; | ||
249 | } | ||
250 | |||
251 | void msm_perf_debugfs_cleanup(struct drm_minor *minor) | ||
252 | { | ||
253 | struct msm_drm_private *priv = minor->dev->dev_private; | ||
254 | struct msm_perf_state *perf = priv->perf; | ||
255 | |||
256 | if (!perf) | ||
257 | return; | ||
258 | |||
259 | priv->perf = NULL; | ||
260 | |||
261 | debugfs_remove(perf->ent); | ||
262 | |||
263 | if (perf->node) { | ||
264 | mutex_lock(&minor->debugfs_lock); | ||
265 | list_del(&perf->node->list); | ||
266 | mutex_unlock(&minor->debugfs_lock); | ||
267 | kfree(perf->node); | ||
268 | } | ||
269 | |||
270 | mutex_destroy(&perf->read_lock); | ||
271 | |||
272 | kfree(perf); | ||
273 | } | ||
274 | |||
275 | #endif | ||