diff options
author | Peter Daifuku <pdaifuku@nvidia.com> | 2016-08-31 20:04:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-08 19:04:09 -0400 |
commit | 9aa7de15c2a644e9c7e9c157e49087e66d4ac3d0 (patch) | |
tree | e5080886f09aa75c6a3cc83e5b27f8f7553678a4 /drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | |
parent | 70cad5fbb593602a49f91e57c04d1da0334b3a49 (diff) |
gpu: nvgpu: vgpu: cyclestat snapshot support
Add support for cyclestats snapshots in the virtual case
Bug 1700143
JIRA EVLR-278
Change-Id: I376a8804d57324f43eb16452d857a3b7bb0ecc90
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1211547
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h new file mode 100644 index 00000000..be638abf --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef CSS_GR_GK20A_H | ||
20 | #define CSS_GR_GK20A_H | ||
21 | |||
22 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
23 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
24 | |||
25 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
26 | struct gk20a_cs_snapshot_fifo { | ||
27 | /* layout description of the buffer */ | ||
28 | u32 start; | ||
29 | u32 end; | ||
30 | |||
31 | /* snafu bits */ | ||
32 | u32 hw_overflow_events_occured; | ||
33 | u32 sw_overflow_events_occured; | ||
34 | |||
35 | /* the kernel copies new entries to put and | ||
36 | * increment the put++. if put == get then | ||
37 | * overflowEventsOccured++ | ||
38 | */ | ||
39 | u32 put; | ||
40 | u32 _reserved10; | ||
41 | u32 _reserved11; | ||
42 | u32 _reserved12; | ||
43 | |||
44 | /* the driver/client reads from get until | ||
45 | * put==get, get++ */ | ||
46 | u32 get; | ||
47 | u32 _reserved20; | ||
48 | u32 _reserved21; | ||
49 | u32 _reserved22; | ||
50 | |||
51 | /* unused */ | ||
52 | u32 _reserved30; | ||
53 | u32 _reserved31; | ||
54 | u32 _reserved32; | ||
55 | u32 _reserved33; | ||
56 | }; | ||
57 | |||
58 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
59 | struct gk20a_cs_snapshot_fifo_entry { | ||
60 | /* global 48 timestamp */ | ||
61 | u32 timestamp31_00:32; | ||
62 | u32 timestamp39_32:8; | ||
63 | |||
64 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
65 | u32 perfmon_id:8; | ||
66 | |||
67 | /* typically samples_counter is wired to #pmtrigger count */ | ||
68 | u32 samples_counter:12; | ||
69 | |||
70 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
71 | u32 ds:1; | ||
72 | u32 sz:1; | ||
73 | u32 zero0:1; | ||
74 | u32 zero1:1; | ||
75 | |||
76 | /* counter results */ | ||
77 | u32 event_cnt:32; | ||
78 | u32 trigger0_cnt:32; | ||
79 | u32 trigger1_cnt:32; | ||
80 | u32 sample_cnt:32; | ||
81 | |||
82 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
83 | u16 local_trigger_b_count:16; | ||
84 | u16 book_mark_b:16; | ||
85 | u16 local_trigger_a_count:16; | ||
86 | u16 book_mark_a:16; | ||
87 | }; | ||
88 | |||
89 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
90 | struct gk20a_cs_snapshot_client { | ||
91 | struct list_head list; | ||
92 | u32 dmabuf_fd; | ||
93 | struct dma_buf *dma_handler; | ||
94 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
95 | u32 snapshot_size; | ||
96 | u32 perfmon_start; | ||
97 | u32 perfmon_count; | ||
98 | }; | ||
99 | |||
100 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
101 | #define CSS_MAX_PERFMON_IDS 256 | ||
102 | |||
103 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
104 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) | ||
105 | |||
106 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
107 | struct gk20a_cs_snapshot { | ||
108 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; | ||
109 | struct list_head clients; | ||
110 | struct mem_desc hw_memdesc; | ||
111 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
112 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
113 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
114 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
115 | }; | ||
116 | |||
117 | void gk20a_init_css_ops(struct gpu_ops *gops); | ||
118 | |||
119 | #endif /* CSS_GR_GK20A_H */ | ||