summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h142
1 files changed, 142 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
new file mode 100644
index 00000000..f0ad6044
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
@@ -0,0 +1,142 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef CSS_GR_GK20A_H
26#define CSS_GR_GK20A_H
27
28/* the minimal size of HW buffer - should be enough to avoid HW overflows */
29#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
30
31/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32struct gk20a_cs_snapshot_fifo {
33 /* layout description of the buffer */
34 u32 start;
35 u32 end;
36
37 /* snafu bits */
38 u32 hw_overflow_events_occured;
39 u32 sw_overflow_events_occured;
40
41 /* the kernel copies new entries to put and
42 * increment the put++. if put == get then
43 * overflowEventsOccured++
44 */
45 u32 put;
46 u32 _reserved10;
47 u32 _reserved11;
48 u32 _reserved12;
49
50 /* the driver/client reads from get until
51 * put==get, get++ */
52 u32 get;
53 u32 _reserved20;
54 u32 _reserved21;
55 u32 _reserved22;
56
57 /* unused */
58 u32 _reserved30;
59 u32 _reserved31;
60 u32 _reserved32;
61 u32 _reserved33;
62};
63
64/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
65struct gk20a_cs_snapshot_fifo_entry {
66 /* global 48 timestamp */
67 u32 timestamp31_00:32;
68 u32 timestamp39_32:8;
69
70 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
71 u32 perfmon_id:8;
72
73 /* typically samples_counter is wired to #pmtrigger count */
74 u32 samples_counter:12;
75
76 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
77 u32 ds:1;
78 u32 sz:1;
79 u32 zero0:1;
80 u32 zero1:1;
81
82 /* counter results */
83 u32 event_cnt:32;
84 u32 trigger0_cnt:32;
85 u32 trigger1_cnt:32;
86 u32 sample_cnt:32;
87
88 /* Local PmTrigger results for Maxwell+ or padding otherwise */
89 u16 local_trigger_b_count:16;
90 u16 book_mark_b:16;
91 u16 local_trigger_a_count:16;
92 u16 book_mark_a:16;
93};
94
95/* cycle stats snapshot client data (e.g. associated with channel) */
96struct gk20a_cs_snapshot_client {
97 struct nvgpu_list_node list;
98 u32 dmabuf_fd;
99 struct dma_buf *dma_handler;
100 struct gk20a_cs_snapshot_fifo *snapshot;
101 u32 snapshot_size;
102 u32 perfmon_start;
103 u32 perfmon_count;
104};
105
106static inline struct gk20a_cs_snapshot_client *
107gk20a_cs_snapshot_client_from_list(struct nvgpu_list_node *node)
108{
109 return (struct gk20a_cs_snapshot_client *)
110 ((uintptr_t)node - offsetof(struct gk20a_cs_snapshot_client, list));
111};
112
113/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
114#define CSS_MAX_PERFMON_IDS 256
115
116/* local definitions to avoid hardcodes sizes and shifts */
117#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
118
119/* cycle stats snapshot control structure for one HW entry and many clients */
120struct gk20a_cs_snapshot {
121 unsigned long perfmon_ids[PM_BITMAP_SIZE];
122 struct nvgpu_list_node clients;
123 struct nvgpu_mem hw_memdesc;
124 /* pointer to allocated cpu_va memory where GPU place data */
125 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
126 struct gk20a_cs_snapshot_fifo_entry *hw_end;
127 struct gk20a_cs_snapshot_fifo_entry *hw_get;
128};
129
130inline void css_hw_set_handled_snapshots(struct gk20a *g, u32 done);
131int css_hw_enable_snapshot(struct channel_gk20a *ch,
132 struct gk20a_cs_snapshot_client *cs_client);
133void css_hw_disable_snapshot(struct gr_gk20a *gr);
134u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
135 u32 count);
136u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
137 u32 start,
138 u32 count);
139int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
140 bool *hw_overflow);
141
142#endif /* CSS_GR_GK20A_H */