summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-04-26 17:16:37 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-05-26 13:00:31 -0400
commit394545acf63319e27d5dea2eeb22fddd26e9416a (patch)
treef397ba98f6447a4afd9b9f3aec2a2a904dae2c0b /drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
parent0bd76f6c80d8556514c66d21778f2db10b6ce1d6 (diff)
gpu: nvgpu: Use kernel bitmap ops for perfmon ids
Bug 200192125 Change-Id: I44418fbbe393d5b9463dc3c9e62f3673da2a06c5 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1132967 Reviewed-by: Konsta Holtta <kholtta@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c73
1 files changed, 14 insertions, 59 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 1d05c902..5edb07a7 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -131,28 +131,12 @@ struct gk20a_cs_snapshot_client {
131/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ 131/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
132#define CSS_MAX_PERFMON_IDS 256 132#define CSS_MAX_PERFMON_IDS 256
133 133
134
135/* this type is used for storing bits in perfmon mask */
136typedef u32 css_perfmon_t;
137
138/* local definitions to avoid hardcodes sizes and shifts */ 134/* local definitions to avoid hardcodes sizes and shifts */
139#define PM_BITS (sizeof(css_perfmon_t) * BITS_PER_BYTE) 135#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
140#define PM_BITS_MASK (PM_BITS - 1)
141
142#define PM_BITMAP_SIZE ((CSS_MAX_PERFMON_IDS + PM_BITS - 1) / PM_BITS)
143
144#define PM_SLOT(i) ((i) / PM_BITS)
145#define PM_SHIFT(i) ((i) & PM_BITS_MASK)
146#define PM_BIT(i) (1u << PM_SHIFT(i))
147
148#define CSS_PERFMON_GET(p, i) (1 == ((p[PM_SLOT(i)] >> PM_SHIFT(i)) & 1))
149#define CSS_PERFMON_USE(p, i) (p[PM_SLOT(i)] |= PM_BIT(i))
150#define CSS_PERFMON_REL(p, i) (p[PM_SLOT(i)] &= ~PM_BIT(i))
151
152 136
153/* cycle stats snapshot control structure for one HW entry and many clients */ 137/* cycle stats snapshot control structure for one HW entry and many clients */
154struct gk20a_cs_snapshot { 138struct gk20a_cs_snapshot {
155 css_perfmon_t perfmon_ids[PM_BITMAP_SIZE]; 139 unsigned long perfmon_ids[PM_BITMAP_SIZE];
156 struct list_head clients; 140 struct list_head clients;
157 struct mem_desc hw_memdesc; 141 struct mem_desc hw_memdesc;
158 /* pointer to allocated cpu_va memory where GPU place data */ 142 /* pointer to allocated cpu_va memory where GPU place data */
@@ -497,59 +481,30 @@ next_hw_fifo_entry:
497static u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, 481static u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
498 u32 count) 482 u32 count)
499{ 483{
500 u32 *pids = data->perfmon_ids; 484 unsigned long *pids = data->perfmon_ids;
501 u32 f; 485 unsigned int f;
502 u32 e = CSS_MAX_PERFMON_IDS - count;
503
504 if (!count || count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID)
505 return 0;
506 486
507 for (f = CSS_FIRST_PERFMON_ID; f <= e; f++) { 487 f = bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS,
508 u32 slots; 488 CSS_FIRST_PERFMON_ID, count, 0);
509 u32 cur; 489 if (f > CSS_MAX_PERFMON_IDS)
510 u32 end; 490 f = 0;
511 491 else
512 if (CSS_PERFMON_GET(pids, f)) 492 bitmap_set(pids, f, count);
513 continue;
514
515 /* lookup for continuous hole [f, f+count) of unused bits */
516 slots = 0;
517 end = f + count;
518 for (cur = f; cur < end; cur++) {
519 if (CSS_PERFMON_GET(pids, cur))
520 break;
521 slots++;
522 }
523
524 if (count == slots) {
525 /* we found of hole of unused bits with required */
526 /* length -> can occupy it for our perfmon IDs */
527 for (cur = f; cur < end; cur++)
528 CSS_PERFMON_USE(pids, cur);
529
530 return f;
531 }
532 }
533 493
534 return 0; 494 return f;
535} 495}
536 496
537static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, 497static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
538 u32 start, 498 u32 start,
539 u32 count) 499 u32 count)
540{ 500{
541 u32 *pids = data->perfmon_ids; 501 unsigned long *pids = data->perfmon_ids;
542 u32 end = start + count; 502 u32 end = start + count;
543 u32 cnt = 0; 503 u32 cnt = 0;
544 504
545 if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { 505 if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) {
546 u32 i; 506 bitmap_clear(pids, start, count);
547 for (i = start; i < end; i++) { 507 cnt = count;
548 if (CSS_PERFMON_GET(pids, i)) {
549 CSS_PERFMON_REL(pids, i);
550 cnt++;
551 }
552 }
553 } 508 }
554 509
555 return cnt; 510 return cnt;