diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-26 17:16:37 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-26 13:00:31 -0400 |
commit | 394545acf63319e27d5dea2eeb22fddd26e9416a (patch) | |
tree | f397ba98f6447a4afd9b9f3aec2a2a904dae2c0b /drivers/gpu/nvgpu/gk20a | |
parent | 0bd76f6c80d8556514c66d21778f2db10b6ce1d6 (diff) |
gpu: nvgpu: Use kernel bitmap ops for perfmon ids
Bug 200192125
Change-Id: I44418fbbe393d5b9463dc3c9e62f3673da2a06c5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1132967
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 73 |
1 files changed, 14 insertions, 59 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 1d05c902..5edb07a7 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -131,28 +131,12 @@ struct gk20a_cs_snapshot_client { | |||
131 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | 131 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ |
132 | #define CSS_MAX_PERFMON_IDS 256 | 132 | #define CSS_MAX_PERFMON_IDS 256 |
133 | 133 | ||
134 | |||
135 | /* this type is used for storing bits in perfmon mask */ | ||
136 | typedef u32 css_perfmon_t; | ||
137 | |||
138 | /* local definitions to avoid hardcodes sizes and shifts */ | 134 | /* local definitions to avoid hardcodes sizes and shifts */ |
139 | #define PM_BITS (sizeof(css_perfmon_t) * BITS_PER_BYTE) | 135 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) |
140 | #define PM_BITS_MASK (PM_BITS - 1) | ||
141 | |||
142 | #define PM_BITMAP_SIZE ((CSS_MAX_PERFMON_IDS + PM_BITS - 1) / PM_BITS) | ||
143 | |||
144 | #define PM_SLOT(i) ((i) / PM_BITS) | ||
145 | #define PM_SHIFT(i) ((i) & PM_BITS_MASK) | ||
146 | #define PM_BIT(i) (1u << PM_SHIFT(i)) | ||
147 | |||
148 | #define CSS_PERFMON_GET(p, i) (1 == ((p[PM_SLOT(i)] >> PM_SHIFT(i)) & 1)) | ||
149 | #define CSS_PERFMON_USE(p, i) (p[PM_SLOT(i)] |= PM_BIT(i)) | ||
150 | #define CSS_PERFMON_REL(p, i) (p[PM_SLOT(i)] &= ~PM_BIT(i)) | ||
151 | |||
152 | 136 | ||
153 | /* cycle stats snapshot control structure for one HW entry and many clients */ | 137 | /* cycle stats snapshot control structure for one HW entry and many clients */ |
154 | struct gk20a_cs_snapshot { | 138 | struct gk20a_cs_snapshot { |
155 | css_perfmon_t perfmon_ids[PM_BITMAP_SIZE]; | 139 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; |
156 | struct list_head clients; | 140 | struct list_head clients; |
157 | struct mem_desc hw_memdesc; | 141 | struct mem_desc hw_memdesc; |
158 | /* pointer to allocated cpu_va memory where GPU place data */ | 142 | /* pointer to allocated cpu_va memory where GPU place data */ |
@@ -497,59 +481,30 @@ next_hw_fifo_entry: | |||
497 | static u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, | 481 | static u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, |
498 | u32 count) | 482 | u32 count) |
499 | { | 483 | { |
500 | u32 *pids = data->perfmon_ids; | 484 | unsigned long *pids = data->perfmon_ids; |
501 | u32 f; | 485 | unsigned int f; |
502 | u32 e = CSS_MAX_PERFMON_IDS - count; | ||
503 | |||
504 | if (!count || count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID) | ||
505 | return 0; | ||
506 | 486 | ||
507 | for (f = CSS_FIRST_PERFMON_ID; f <= e; f++) { | 487 | f = bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS, |
508 | u32 slots; | 488 | CSS_FIRST_PERFMON_ID, count, 0); |
509 | u32 cur; | 489 | if (f > CSS_MAX_PERFMON_IDS) |
510 | u32 end; | 490 | f = 0; |
511 | 491 | else | |
512 | if (CSS_PERFMON_GET(pids, f)) | 492 | bitmap_set(pids, f, count); |
513 | continue; | ||
514 | |||
515 | /* lookup for continuous hole [f, f+count) of unused bits */ | ||
516 | slots = 0; | ||
517 | end = f + count; | ||
518 | for (cur = f; cur < end; cur++) { | ||
519 | if (CSS_PERFMON_GET(pids, cur)) | ||
520 | break; | ||
521 | slots++; | ||
522 | } | ||
523 | |||
524 | if (count == slots) { | ||
525 | /* we found of hole of unused bits with required */ | ||
526 | /* length -> can occupy it for our perfmon IDs */ | ||
527 | for (cur = f; cur < end; cur++) | ||
528 | CSS_PERFMON_USE(pids, cur); | ||
529 | |||
530 | return f; | ||
531 | } | ||
532 | } | ||
533 | 493 | ||
534 | return 0; | 494 | return f; |
535 | } | 495 | } |
536 | 496 | ||
537 | static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | 497 | static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, |
538 | u32 start, | 498 | u32 start, |
539 | u32 count) | 499 | u32 count) |
540 | { | 500 | { |
541 | u32 *pids = data->perfmon_ids; | 501 | unsigned long *pids = data->perfmon_ids; |
542 | u32 end = start + count; | 502 | u32 end = start + count; |
543 | u32 cnt = 0; | 503 | u32 cnt = 0; |
544 | 504 | ||
545 | if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { | 505 | if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { |
546 | u32 i; | 506 | bitmap_clear(pids, start, count); |
547 | for (i = start; i < end; i++) { | 507 | cnt = count; |
548 | if (CSS_PERFMON_GET(pids, i)) { | ||
549 | CSS_PERFMON_REL(pids, i); | ||
550 | cnt++; | ||
551 | } | ||
552 | } | ||
553 | } | 508 | } |
554 | 509 | ||
555 | return cnt; | 510 | return cnt; |