diff options
Diffstat (limited to 'include/gk20a/css_gr_gk20a.h')
-rw-r--r-- | include/gk20a/css_gr_gk20a.h | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/include/gk20a/css_gr_gk20a.h b/include/gk20a/css_gr_gk20a.h new file mode 100644 index 0000000..bf8890b --- /dev/null +++ b/include/gk20a/css_gr_gk20a.h | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef CSS_GR_GK20A_H | ||
26 | #define CSS_GR_GK20A_H | ||
27 | |||
28 | #include <nvgpu/nvgpu_mem.h> | ||
29 | #include <nvgpu/list.h> | ||
30 | |||
31 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
32 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
33 | |||
34 | struct gk20a; | ||
35 | struct gr_gk20a; | ||
36 | struct channel_gk20a; | ||
37 | |||
38 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
39 | struct gk20a_cs_snapshot_fifo { | ||
40 | /* layout description of the buffer */ | ||
41 | u32 start; | ||
42 | u32 end; | ||
43 | |||
44 | /* snafu bits */ | ||
45 | u32 hw_overflow_events_occured; | ||
46 | u32 sw_overflow_events_occured; | ||
47 | |||
48 | /* the kernel copies new entries to put and | ||
49 | * increment the put++. if put == get then | ||
50 | * overflowEventsOccured++ | ||
51 | */ | ||
52 | u32 put; | ||
53 | u32 _reserved10; | ||
54 | u32 _reserved11; | ||
55 | u32 _reserved12; | ||
56 | |||
57 | /* the driver/client reads from get until | ||
58 | * put==get, get++ */ | ||
59 | u32 get; | ||
60 | u32 _reserved20; | ||
61 | u32 _reserved21; | ||
62 | u32 _reserved22; | ||
63 | |||
64 | /* unused */ | ||
65 | u32 _reserved30; | ||
66 | u32 _reserved31; | ||
67 | u32 _reserved32; | ||
68 | u32 _reserved33; | ||
69 | }; | ||
70 | |||
71 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
72 | struct gk20a_cs_snapshot_fifo_entry { | ||
73 | /* global 48 timestamp */ | ||
74 | u32 timestamp31_00:32; | ||
75 | u32 timestamp39_32:8; | ||
76 | |||
77 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
78 | u32 perfmon_id:8; | ||
79 | |||
80 | /* typically samples_counter is wired to #pmtrigger count */ | ||
81 | u32 samples_counter:12; | ||
82 | |||
83 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
84 | u32 ds:1; | ||
85 | u32 sz:1; | ||
86 | u32 zero0:1; | ||
87 | u32 zero1:1; | ||
88 | |||
89 | /* counter results */ | ||
90 | u32 event_cnt:32; | ||
91 | u32 trigger0_cnt:32; | ||
92 | u32 trigger1_cnt:32; | ||
93 | u32 sample_cnt:32; | ||
94 | |||
95 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
96 | u16 local_trigger_b_count:16; | ||
97 | u16 book_mark_b:16; | ||
98 | u16 local_trigger_a_count:16; | ||
99 | u16 book_mark_a:16; | ||
100 | }; | ||
101 | |||
102 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
103 | struct gk20a_cs_snapshot_client { | ||
104 | struct nvgpu_list_node list; | ||
105 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
106 | u32 snapshot_size; | ||
107 | u32 perfmon_start; | ||
108 | u32 perfmon_count; | ||
109 | }; | ||
110 | |||
111 | static inline struct gk20a_cs_snapshot_client * | ||
112 | gk20a_cs_snapshot_client_from_list(struct nvgpu_list_node *node) | ||
113 | { | ||
114 | return (struct gk20a_cs_snapshot_client *) | ||
115 | ((uintptr_t)node - offsetof(struct gk20a_cs_snapshot_client, list)); | ||
116 | }; | ||
117 | |||
118 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
119 | #define CSS_MAX_PERFMON_IDS 256 | ||
120 | |||
121 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
122 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) | ||
123 | |||
124 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
125 | struct gk20a_cs_snapshot { | ||
126 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; | ||
127 | struct nvgpu_list_node clients; | ||
128 | struct nvgpu_mem hw_memdesc; | ||
129 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
130 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
131 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
132 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
133 | }; | ||
134 | |||
135 | bool css_hw_get_overflow_status(struct gk20a *g); | ||
136 | u32 css_hw_get_pending_snapshots(struct gk20a *g); | ||
137 | void css_hw_set_handled_snapshots(struct gk20a *g, u32 done); | ||
138 | int css_hw_enable_snapshot(struct channel_gk20a *ch, | ||
139 | struct gk20a_cs_snapshot_client *cs_client); | ||
140 | void css_hw_disable_snapshot(struct gr_gk20a *gr); | ||
141 | u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
142 | u32 count); | ||
143 | u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
144 | u32 start, | ||
145 | u32 count); | ||
146 | int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
147 | bool *hw_overflow); | ||
148 | struct gk20a_cs_snapshot_client* | ||
149 | css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon); | ||
150 | |||
151 | #endif /* CSS_GR_GK20A_H */ | ||