aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/css_gr_gk20a.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/gk20a/css_gr_gk20a.h')
-rw-r--r--include/gk20a/css_gr_gk20a.h151
1 files changed, 151 insertions, 0 deletions
diff --git a/include/gk20a/css_gr_gk20a.h b/include/gk20a/css_gr_gk20a.h
new file mode 100644
index 0000000..bf8890b
--- /dev/null
+++ b/include/gk20a/css_gr_gk20a.h
@@ -0,0 +1,151 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef CSS_GR_GK20A_H
26#define CSS_GR_GK20A_H
27
28#include <nvgpu/nvgpu_mem.h>
29#include <nvgpu/list.h>
30
31/* the minimal size of HW buffer - should be enough to avoid HW overflows */
32#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
33
34struct gk20a;
35struct gr_gk20a;
36struct channel_gk20a;
37
38/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
39struct gk20a_cs_snapshot_fifo {
40 /* layout description of the buffer */
41 u32 start;
42 u32 end;
43
44 /* snafu bits */
45 u32 hw_overflow_events_occured;
46 u32 sw_overflow_events_occured;
47
48 /* the kernel copies new entries to put and
49 * increment the put++. if put == get then
50 * overflowEventsOccured++
51 */
52 u32 put;
53 u32 _reserved10;
54 u32 _reserved11;
55 u32 _reserved12;
56
57 /* the driver/client reads from get until
58 * put==get, get++ */
59 u32 get;
60 u32 _reserved20;
61 u32 _reserved21;
62 u32 _reserved22;
63
64 /* unused */
65 u32 _reserved30;
66 u32 _reserved31;
67 u32 _reserved32;
68 u32 _reserved33;
69};
70
71/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
72struct gk20a_cs_snapshot_fifo_entry {
73 /* global 48 timestamp */
74 u32 timestamp31_00:32;
75 u32 timestamp39_32:8;
76
77 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
78 u32 perfmon_id:8;
79
80 /* typically samples_counter is wired to #pmtrigger count */
81 u32 samples_counter:12;
82
83 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
84 u32 ds:1;
85 u32 sz:1;
86 u32 zero0:1;
87 u32 zero1:1;
88
89 /* counter results */
90 u32 event_cnt:32;
91 u32 trigger0_cnt:32;
92 u32 trigger1_cnt:32;
93 u32 sample_cnt:32;
94
95 /* Local PmTrigger results for Maxwell+ or padding otherwise */
96 u16 local_trigger_b_count:16;
97 u16 book_mark_b:16;
98 u16 local_trigger_a_count:16;
99 u16 book_mark_a:16;
100};
101
102/* cycle stats snapshot client data (e.g. associated with channel) */
103struct gk20a_cs_snapshot_client {
104 struct nvgpu_list_node list;
105 struct gk20a_cs_snapshot_fifo *snapshot;
106 u32 snapshot_size;
107 u32 perfmon_start;
108 u32 perfmon_count;
109};
110
111static inline struct gk20a_cs_snapshot_client *
112gk20a_cs_snapshot_client_from_list(struct nvgpu_list_node *node)
113{
114 return (struct gk20a_cs_snapshot_client *)
115 ((uintptr_t)node - offsetof(struct gk20a_cs_snapshot_client, list));
116};
117
118/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
119#define CSS_MAX_PERFMON_IDS 256
120
121/* local definitions to avoid hardcodes sizes and shifts */
122#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
123
124/* cycle stats snapshot control structure for one HW entry and many clients */
125struct gk20a_cs_snapshot {
126 unsigned long perfmon_ids[PM_BITMAP_SIZE];
127 struct nvgpu_list_node clients;
128 struct nvgpu_mem hw_memdesc;
129 /* pointer to allocated cpu_va memory where GPU place data */
130 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
131 struct gk20a_cs_snapshot_fifo_entry *hw_end;
132 struct gk20a_cs_snapshot_fifo_entry *hw_get;
133};
134
135bool css_hw_get_overflow_status(struct gk20a *g);
136u32 css_hw_get_pending_snapshots(struct gk20a *g);
137void css_hw_set_handled_snapshots(struct gk20a *g, u32 done);
138int css_hw_enable_snapshot(struct channel_gk20a *ch,
139 struct gk20a_cs_snapshot_client *cs_client);
140void css_hw_disable_snapshot(struct gr_gk20a *gr);
141u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
142 u32 count);
143u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
144 u32 start,
145 u32 count);
146int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
147 bool *hw_overflow);
148struct gk20a_cs_snapshot_client*
149css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon);
150
151#endif /* CSS_GR_GK20A_H */