diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 105 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 759 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 32 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h | 88 |
7 files changed, 997 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index c83da8b4..643adca5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -486,6 +486,95 @@ static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, | |||
486 | return -EINVAL; | 486 | return -EINVAL; |
487 | } | 487 | } |
488 | } | 488 | } |
489 | |||
490 | |||
491 | static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
492 | { | ||
493 | int ret; | ||
494 | |||
495 | mutex_lock(&ch->cs_client_mutex); | ||
496 | if (ch->cs_client) | ||
497 | ret = gr_gk20a_css_flush(ch->g, ch->cs_client); | ||
498 | else | ||
499 | ret = -EBADF; | ||
500 | mutex_unlock(&ch->cs_client_mutex); | ||
501 | |||
502 | return ret; | ||
503 | } | ||
504 | |||
505 | static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
506 | u32 dmabuf_fd, | ||
507 | u32 perfmon_id_count, | ||
508 | u32 *perfmon_id_start) | ||
509 | { | ||
510 | int ret; | ||
511 | |||
512 | mutex_lock(&ch->cs_client_mutex); | ||
513 | if (ch->cs_client) { | ||
514 | ret = -EEXIST; | ||
515 | } else { | ||
516 | ret = gr_gk20a_css_attach(ch->g, | ||
517 | dmabuf_fd, | ||
518 | perfmon_id_count, | ||
519 | perfmon_id_start, | ||
520 | &ch->cs_client); | ||
521 | } | ||
522 | mutex_unlock(&ch->cs_client_mutex); | ||
523 | |||
524 | return ret; | ||
525 | } | ||
526 | |||
527 | static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
528 | { | ||
529 | int ret; | ||
530 | |||
531 | mutex_lock(&ch->cs_client_mutex); | ||
532 | if (ch->cs_client) { | ||
533 | ret = gr_gk20a_css_detach(ch->g, ch->cs_client); | ||
534 | ch->cs_client = NULL; | ||
535 | } else { | ||
536 | ret = 0; | ||
537 | } | ||
538 | mutex_unlock(&ch->cs_client_mutex); | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
544 | struct nvgpu_cycle_stats_snapshot_args *args) | ||
545 | { | ||
546 | int ret; | ||
547 | |||
548 | if (!args->dmabuf_fd) | ||
549 | return -EINVAL; | ||
550 | |||
551 | /* handle the command (most frequent cases first) */ | ||
552 | switch (args->cmd) { | ||
553 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: | ||
554 | ret = gk20a_flush_cycle_stats_snapshot(ch); | ||
555 | args->extra = 0; | ||
556 | break; | ||
557 | |||
558 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: | ||
559 | ret = gk20a_attach_cycle_stats_snapshot(ch, | ||
560 | args->dmabuf_fd, | ||
561 | args->extra, | ||
562 | &args->extra); | ||
563 | break; | ||
564 | |||
565 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: | ||
566 | ret = gk20a_free_cycle_stats_snapshot(ch); | ||
567 | args->extra = 0; | ||
568 | break; | ||
569 | |||
570 | default: | ||
571 | pr_err("cyclestats: unknown command %u\n", args->cmd); | ||
572 | ret = -EINVAL; | ||
573 | break; | ||
574 | } | ||
575 | |||
576 | return ret; | ||
577 | } | ||
489 | #endif | 578 | #endif |
490 | 579 | ||
491 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | 580 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, |
@@ -602,6 +691,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | |||
602 | 691 | ||
603 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 692 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
604 | gk20a_free_cycle_stats_buffer(ch); | 693 | gk20a_free_cycle_stats_buffer(ch); |
694 | gk20a_free_cycle_stats_snapshot(ch); | ||
605 | #endif | 695 | #endif |
606 | 696 | ||
607 | channel_gk20a_free_priv_cmdbuf(ch); | 697 | channel_gk20a_free_priv_cmdbuf(ch); |
@@ -1639,6 +1729,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
1639 | INIT_LIST_HEAD(&c->jobs); | 1729 | INIT_LIST_HEAD(&c->jobs); |
1640 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 1730 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
1641 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | 1731 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); |
1732 | mutex_init(&c->cs_client_mutex); | ||
1642 | #endif | 1733 | #endif |
1643 | INIT_LIST_HEAD(&c->dbg_s_list); | 1734 | INIT_LIST_HEAD(&c->dbg_s_list); |
1644 | mutex_init(&c->dbg_s_lock); | 1735 | mutex_init(&c->dbg_s_lock); |
@@ -2335,6 +2426,20 @@ long gk20a_channel_ioctl(struct file *filp, | |||
2335 | err = gk20a_channel_events_ctrl(ch, | 2426 | err = gk20a_channel_events_ctrl(ch, |
2336 | (struct nvgpu_channel_events_ctrl_args *)buf); | 2427 | (struct nvgpu_channel_events_ctrl_args *)buf); |
2337 | break; | 2428 | break; |
2429 | #ifdef CONFIG_GK20A_CYCLE_STATS | ||
2430 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT: | ||
2431 | err = gk20a_busy(dev); | ||
2432 | if (err) { | ||
2433 | dev_err(&dev->dev, | ||
2434 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
2435 | __func__, cmd); | ||
2436 | break; | ||
2437 | } | ||
2438 | err = gk20a_channel_cycle_stats_snapshot(ch, | ||
2439 | (struct nvgpu_cycle_stats_snapshot_args *)buf); | ||
2440 | gk20a_idle(dev); | ||
2441 | break; | ||
2442 | #endif | ||
2338 | default: | 2443 | default: |
2339 | dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); | 2444 | dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); |
2340 | err = -ENOTTY; | 2445 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 5fe03cef..f022fe36 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -133,6 +133,9 @@ struct channel_gk20a { | |||
133 | struct dma_buf *cyclestate_buffer_handler; | 133 | struct dma_buf *cyclestate_buffer_handler; |
134 | struct mutex cyclestate_buffer_mutex; | 134 | struct mutex cyclestate_buffer_mutex; |
135 | } cyclestate; | 135 | } cyclestate; |
136 | |||
137 | struct mutex cs_client_mutex; | ||
138 | struct gk20a_cs_snapshot_client *cs_client; | ||
136 | #endif | 139 | #endif |
137 | struct mutex dbg_s_lock; | 140 | struct mutex dbg_s_lock; |
138 | struct list_head dbg_s_list; | 141 | struct list_head dbg_s_list; |
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c new file mode 100644 index 00000000..7509acd7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -0,0 +1,759 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/dma-mapping.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/vmalloc.h> | ||
24 | |||
25 | #include "gk20a.h" | ||
26 | #include "hw_perf_gk20a.h" | ||
27 | #include "hw_mc_gk20a.h" | ||
28 | |||
29 | |||
30 | |||
31 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
32 | struct gk20a_cs_snapshot_fifo { | ||
33 | /* layout description of the buffer */ | ||
34 | u32 start; | ||
35 | u32 end; | ||
36 | |||
37 | /* snafu bits */ | ||
38 | u32 hw_overflow_events_occured; | ||
39 | u32 sw_overflow_events_occured; | ||
40 | |||
41 | /* the kernel copies new entries to put and | ||
42 | * increment the put++. if put == get then | ||
43 | * overflowEventsOccured++ | ||
44 | */ | ||
45 | u32 put; | ||
46 | u32 _reserved10; | ||
47 | u32 _reserved11; | ||
48 | u32 _reserved12; | ||
49 | |||
50 | /* the driver/client reads from get until | ||
51 | * put==get, get++ */ | ||
52 | u32 get; | ||
53 | u32 _reserved20; | ||
54 | u32 _reserved21; | ||
55 | u32 _reserved22; | ||
56 | |||
57 | /* unused */ | ||
58 | u32 _reserved30; | ||
59 | u32 _reserved31; | ||
60 | u32 _reserved32; | ||
61 | u32 _reserved33; | ||
62 | }; | ||
63 | |||
64 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
65 | struct gk20a_cs_snapshot_fifo_entry { | ||
66 | /* global 48 timestamp */ | ||
67 | u32 timestamp31_00:32; | ||
68 | u32 timestamp39_32:8; | ||
69 | |||
70 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
71 | u32 perfmon_id:8; | ||
72 | |||
73 | /* typically samples_counter is wired to #pmtrigger count */ | ||
74 | u32 samples_counter:12; | ||
75 | |||
76 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
77 | u32 ds:1; | ||
78 | u32 sz:1; | ||
79 | u32 zero0:1; | ||
80 | u32 zero1:1; | ||
81 | |||
82 | /* counter results */ | ||
83 | u32 event_cnt:32; | ||
84 | u32 trigger0_cnt:32; | ||
85 | u32 trigger1_cnt:32; | ||
86 | u32 sample_cnt:32; | ||
87 | |||
88 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
89 | u16 local_trigger_b_count:16; | ||
90 | u16 book_mark_b:16; | ||
91 | u16 local_trigger_a_count:16; | ||
92 | u16 book_mark_a:16; | ||
93 | }; | ||
94 | |||
95 | |||
96 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
97 | struct gk20a_cs_snapshot_client { | ||
98 | struct list_head list; | ||
99 | u32 dmabuf_fd; | ||
100 | struct dma_buf *dma_handler; | ||
101 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
102 | u32 snapshot_size; | ||
103 | u32 perfmon_start; | ||
104 | u32 perfmon_count; | ||
105 | }; | ||
106 | |||
107 | /* check client for pointed perfmon ownership */ | ||
108 | #define CONTAINS_PERFMON(cl, pm) \ | ||
109 | ((cl)->perfmon_start <= (pm) && \ | ||
110 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) | ||
111 | |||
112 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
113 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
114 | |||
115 | /* the minimal size of client buffer */ | ||
116 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | ||
117 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | ||
118 | sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) | ||
119 | |||
120 | /* address of fifo entry by offset */ | ||
121 | #define CSS_FIFO_ENTRY(fifo, offs) \ | ||
122 | ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs))) | ||
123 | |||
124 | /* calculate area capacity in number of fifo entries */ | ||
125 | #define CSS_FIFO_ENTRY_CAPACITY(s) \ | ||
126 | (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \ | ||
127 | / sizeof(struct gk20a_cs_snapshot_fifo_entry)) | ||
128 | |||
129 | /* reserved to indicate failures with data */ | ||
130 | #define CSS_FIRST_PERFMON_ID 32 | ||
131 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
132 | #define CSS_MAX_PERFMON_IDS 256 | ||
133 | |||
134 | |||
135 | /* this type is used for storing bits in perfmon mask */ | ||
136 | typedef u32 css_perfmon_t; | ||
137 | |||
138 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
139 | #define PM_BITS (sizeof(css_perfmon_t) * BITS_PER_BYTE) | ||
140 | #define PM_BITS_MASK (PM_BITS - 1) | ||
141 | |||
142 | #define PM_BITMAP_SIZE ((CSS_MAX_PERFMON_IDS + PM_BITS - 1) / PM_BITS) | ||
143 | |||
144 | #define PM_SLOT(i) ((i) / PM_BITS) | ||
145 | #define PM_SHIFT(i) ((i) & PM_BITS_MASK) | ||
146 | #define PM_BIT(i) (1u << PM_SHIFT(i)) | ||
147 | |||
148 | #define CSS_PERFMON_GET(p, i) (1 == ((p[PM_SLOT(i)] >> PM_SHIFT(i)) & 1)) | ||
149 | #define CSS_PERFMON_USE(p, i) (p[PM_SLOT(i)] |= PM_BIT(i)) | ||
150 | #define CSS_PERFMON_REL(p, i) (p[PM_SLOT(i)] &= ~PM_BIT(i)) | ||
151 | |||
152 | |||
153 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
154 | struct gk20a_cs_snapshot { | ||
155 | css_perfmon_t perfmon_ids[PM_BITMAP_SIZE]; | ||
156 | struct list_head clients; | ||
157 | struct mem_desc hw_memdesc; | ||
158 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
159 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
160 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
161 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
162 | }; | ||
163 | |||
164 | /* reports whether the hw queue overflowed */ | ||
165 | static inline bool css_hw_get_overflow_status(struct gk20a *g) | ||
166 | { | ||
167 | const u32 st = perf_pmasys_control_membuf_status_overflowed_f(); | ||
168 | return st == (gk20a_readl(g, perf_pmasys_control_r()) & st); | ||
169 | } | ||
170 | |||
171 | /* returns how many pending snapshot entries are pending */ | ||
172 | static inline u32 css_hw_get_pending_snapshots(struct gk20a *g) | ||
173 | { | ||
174 | return gk20a_readl(g, perf_pmasys_mem_bytes_r()) / | ||
175 | sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
176 | } | ||
177 | |||
178 | /* informs hw how many snapshots have been processed (frees up fifo space) */ | ||
179 | static inline void css_hw_set_handled_snapshots(struct gk20a *g, u32 done) | ||
180 | { | ||
181 | if (done > 0) { | ||
182 | gk20a_writel(g, perf_pmasys_mem_bump_r(), | ||
183 | done * sizeof(struct gk20a_cs_snapshot_fifo_entry)); | ||
184 | } | ||
185 | } | ||
186 | |||
187 | /* disable streaming to memory */ | ||
188 | static void css_hw_reset_streaming(struct gk20a *g) | ||
189 | { | ||
190 | u32 engine_status; | ||
191 | u32 old_pmc = gk20a_readl(g, mc_enable_r()); | ||
192 | |||
193 | /* reset the perfmon */ | ||
194 | gk20a_writel(g, mc_enable_r(), | ||
195 | old_pmc & ~mc_enable_perfmon_enabled_f()); | ||
196 | gk20a_writel(g, mc_enable_r(), old_pmc); | ||
197 | |||
198 | /* RBUFEMPTY must be set -- otherwise we'll pick up */ | ||
199 | /* snapshot that have been queued up from earlier */ | ||
200 | engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); | ||
201 | WARN_ON(0 == (engine_status | ||
202 | & perf_pmasys_enginestatus_rbufempty_empty_f())); | ||
203 | |||
204 | /* turn off writes */ | ||
205 | gk20a_writel(g, perf_pmasys_control_r(), | ||
206 | perf_pmasys_control_membuf_clear_status_doit_f()); | ||
207 | |||
208 | /* pointing all pending snapshots as handled */ | ||
209 | css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g)); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * WARNING: all css_gr_XXX functions are local and expected to be called | ||
214 | * from locked context (protected by cs_lock) | ||
215 | */ | ||
216 | |||
217 | static int css_gr_create_shared_data(struct gr_gk20a *gr) | ||
218 | { | ||
219 | struct gk20a_cs_snapshot *data; | ||
220 | |||
221 | if (gr->cs_data) | ||
222 | return 0; | ||
223 | |||
224 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
225 | if (!data) | ||
226 | return -ENOMEM; | ||
227 | |||
228 | INIT_LIST_HEAD(&data->clients); | ||
229 | gr->cs_data = data; | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) | ||
235 | { | ||
236 | struct gk20a *g = gr->g; | ||
237 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
238 | int ret; | ||
239 | |||
240 | u32 virt_addr_lo; | ||
241 | u32 virt_addr_hi; | ||
242 | u32 inst_pa_page; | ||
243 | |||
244 | if (data->hw_snapshot) | ||
245 | return 0; | ||
246 | |||
247 | if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE) | ||
248 | snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE; | ||
249 | |||
250 | ret = gk20a_gmmu_alloc_map(&g->mm.pmu.vm, snapshot_size, | ||
251 | &data->hw_memdesc); | ||
252 | if (ret) | ||
253 | return ret; | ||
254 | |||
255 | /* perf output buffer may not cross a 4GB boundary - with a separate */ | ||
256 | /* va smaller than that, it won't but check anyway */ | ||
257 | if (!data->hw_memdesc.cpu_va || | ||
258 | data->hw_memdesc.size < snapshot_size || | ||
259 | data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) { | ||
260 | ret = -EFAULT; | ||
261 | goto failed_allocation; | ||
262 | } | ||
263 | |||
264 | data->hw_snapshot = | ||
265 | (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va; | ||
266 | data->hw_end = data->hw_snapshot + | ||
267 | snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
268 | data->hw_get = data->hw_snapshot; | ||
269 | memset(data->hw_snapshot, 0xff, snapshot_size); | ||
270 | |||
271 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
272 | * as zeros */ | ||
273 | virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va); | ||
274 | virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va); | ||
275 | |||
276 | css_hw_reset_streaming(g); | ||
277 | |||
278 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
279 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
280 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
281 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); | ||
282 | |||
283 | /* this field is aligned to 4K */ | ||
284 | inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12; | ||
285 | |||
286 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
287 | * should be written last */ | ||
288 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
289 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
290 | perf_pmasys_mem_block_valid_true_f() | | ||
291 | perf_pmasys_mem_block_target_lfb_f()); | ||
292 | |||
293 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n"); | ||
294 | |||
295 | return 0; | ||
296 | |||
297 | failed_allocation: | ||
298 | if (data->hw_memdesc.size) { | ||
299 | gk20a_gmmu_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc); | ||
300 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
301 | } | ||
302 | data->hw_snapshot = NULL; | ||
303 | |||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | static void css_hw_disable_snapshot(struct gr_gk20a *gr) | ||
308 | { | ||
309 | struct gk20a *g = gr->g; | ||
310 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
311 | |||
312 | if (!data->hw_snapshot) | ||
313 | return; | ||
314 | |||
315 | css_hw_reset_streaming(g); | ||
316 | |||
317 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
318 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
319 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
320 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
321 | |||
322 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
323 | perf_pmasys_mem_block_base_f(0) | | ||
324 | perf_pmasys_mem_block_valid_false_f() | | ||
325 | perf_pmasys_mem_block_target_f(0)); | ||
326 | |||
327 | gk20a_gmmu_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc); | ||
328 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
329 | data->hw_snapshot = NULL; | ||
330 | |||
331 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n"); | ||
332 | } | ||
333 | |||
334 | static void css_gr_free_shared_data(struct gr_gk20a *gr) | ||
335 | { | ||
336 | if (gr->cs_data) { | ||
337 | /* the clients list is expected to be empty */ | ||
338 | css_hw_disable_snapshot(gr); | ||
339 | |||
340 | /* release the objects */ | ||
341 | kfree(gr->cs_data); | ||
342 | gr->cs_data = NULL; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | |||
347 | static struct gk20a_cs_snapshot_client* | ||
348 | css_gr_search_client(struct list_head *clients, u32 perfmon) | ||
349 | { | ||
350 | struct list_head *pos; | ||
351 | |||
352 | list_for_each(pos, clients) { | ||
353 | struct gk20a_cs_snapshot_client *client = | ||
354 | container_of(pos, | ||
355 | struct gk20a_cs_snapshot_client, list); | ||
356 | if (CONTAINS_PERFMON(client, perfmon)) | ||
357 | return client; | ||
358 | } | ||
359 | |||
360 | return NULL; | ||
361 | } | ||
362 | |||
363 | static int css_gr_flush_snapshots(struct gr_gk20a *gr) | ||
364 | { | ||
365 | struct gk20a *g = gr->g; | ||
366 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
367 | struct gk20a_cs_snapshot_client *cur; | ||
368 | u32 pending; | ||
369 | |||
370 | /* variables for iterating over HW entries */ | ||
371 | u32 sid; | ||
372 | struct gk20a_cs_snapshot_fifo_entry *src; | ||
373 | |||
374 | /* due to data sharing with userspace we allowed update only */ | ||
375 | /* overflows and put field in the fifo header */ | ||
376 | struct gk20a_cs_snapshot_fifo *dst; | ||
377 | struct gk20a_cs_snapshot_fifo_entry *dst_get; | ||
378 | struct gk20a_cs_snapshot_fifo_entry *dst_put; | ||
379 | struct gk20a_cs_snapshot_fifo_entry *dst_head; | ||
380 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; | ||
381 | |||
382 | if (!css) | ||
383 | return -EINVAL; | ||
384 | |||
385 | if (!css->hw_snapshot) | ||
386 | return -EINVAL; | ||
387 | |||
388 | if (list_empty(&css->clients)) | ||
389 | return -EBADF; | ||
390 | |||
391 | /* check data available */ | ||
392 | pending = css_hw_get_pending_snapshots(g); | ||
393 | if (!pending) | ||
394 | return 0; | ||
395 | |||
396 | if (css_hw_get_overflow_status(g)) { | ||
397 | struct list_head *pos; | ||
398 | |||
399 | list_for_each(pos, &css->clients) { | ||
400 | cur = container_of(pos, | ||
401 | struct gk20a_cs_snapshot_client, list); | ||
402 | cur->snapshot->hw_overflow_events_occured++; | ||
403 | } | ||
404 | |||
405 | gk20a_warn(dev_from_gk20a(g), | ||
406 | "cyclestats: hardware overflow detected\n"); | ||
407 | } | ||
408 | |||
409 | /* proceed all items in HW buffer */ | ||
410 | sid = 0; | ||
411 | cur = NULL; | ||
412 | dst = NULL; | ||
413 | dst_put = NULL; | ||
414 | src = css->hw_get; | ||
415 | |||
416 | /* proceed all completed records */ | ||
417 | while (sid < pending && 0 == src->zero0) { | ||
418 | /* we may have a new perfmon_id which required to */ | ||
419 | /* switch to a new client -> let's forget current */ | ||
420 | if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) { | ||
421 | dst->put = (char *)dst_put - (char *)dst; | ||
422 | dst = NULL; | ||
423 | cur = NULL; | ||
424 | } | ||
425 | |||
426 | /* now we have to select a new current client */ | ||
427 | /* the client selection rate depends from experiment */ | ||
428 | /* activity but on Android usually happened 1-2 times */ | ||
429 | if (!cur) { | ||
430 | cur = css_gr_search_client(&css->clients, | ||
431 | src->perfmon_id); | ||
432 | if (cur) { | ||
433 | /* found - setup all required data */ | ||
434 | dst = cur->snapshot; | ||
435 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); | ||
436 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); | ||
437 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); | ||
438 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1; | ||
439 | } else { | ||
440 | /* client not found - skipping this entry */ | ||
441 | gk20a_warn(dev_from_gk20a(g), | ||
442 | "cyclestats: orphaned perfmon %u\n", | ||
443 | src->perfmon_id); | ||
444 | goto next_hw_fifo_entry; | ||
445 | } | ||
446 | } | ||
447 | |||
448 | /* check for software overflows */ | ||
449 | if (dst_put + 1 == dst_get || | ||
450 | (dst_put == dst_tail && dst_get == dst_head)) { | ||
451 | /* no data copy, no pointer updates */ | ||
452 | dst->sw_overflow_events_occured++; | ||
453 | gk20a_warn(dev_from_gk20a(g), | ||
454 | "cyclestats: perfmon %u soft overflow\n", | ||
455 | src->perfmon_id); | ||
456 | } else { | ||
457 | *dst_put = *src; | ||
458 | if (dst_put == dst_tail) | ||
459 | dst_put = dst_head; | ||
460 | else | ||
461 | dst_put++; | ||
462 | } | ||
463 | |||
464 | next_hw_fifo_entry: | ||
465 | sid++; | ||
466 | if (++src >= css->hw_end) | ||
467 | src = css->hw_snapshot; | ||
468 | } | ||
469 | |||
470 | /* update client put pointer if necessary */ | ||
471 | if (cur && dst) | ||
472 | dst->put = (char *)dst_put - (char *)dst; | ||
473 | |||
474 | /* re-set HW buffer after processing taking wrapping into account */ | ||
475 | if (css->hw_get < src) { | ||
476 | memset(css->hw_get, 0xff, (src - css->hw_get) * sizeof(*src)); | ||
477 | } else { | ||
478 | memset(css->hw_snapshot, 0xff, | ||
479 | (src - css->hw_snapshot) * sizeof(*src)); | ||
480 | memset(css->hw_get, 0xff, | ||
481 | (css->hw_end - css->hw_get) * sizeof(*src)); | ||
482 | } | ||
483 | gr->cs_data->hw_get = src; | ||
484 | css_hw_set_handled_snapshots(g, sid); | ||
485 | if (pending != sid) { | ||
486 | /* not all entries proceed correctly. some of problems */ | ||
487 | /* reported as overflows, some as orphaned perfmons, */ | ||
488 | /* but it will be better notify with summary about it */ | ||
489 | gk20a_warn(dev_from_gk20a(g), | ||
490 | "cyclestats: done %u from %u entries\n", | ||
491 | sid, pending); | ||
492 | } | ||
493 | |||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | static u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
498 | u32 count) | ||
499 | { | ||
500 | u32 *pids = data->perfmon_ids; | ||
501 | u32 f; | ||
502 | u32 e = CSS_MAX_PERFMON_IDS - count; | ||
503 | |||
504 | if (!count || count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID) | ||
505 | return 0; | ||
506 | |||
507 | for (f = CSS_FIRST_PERFMON_ID; f < e; f++) { | ||
508 | u32 slots = 0; | ||
509 | u32 cur; | ||
510 | u32 end = f + count; | ||
511 | |||
512 | /* lookup for continuous hole [f, f+count) of unused bits */ | ||
513 | for (cur = f; cur < end; cur++) { | ||
514 | if (CSS_PERFMON_GET(pids, cur)) | ||
515 | break; | ||
516 | slots++; | ||
517 | } | ||
518 | |||
519 | if (count == slots) { | ||
520 | /* we found of hole of unused bits with required */ | ||
521 | /* length -> can occupy it for our perfmon IDs */ | ||
522 | for (cur = f; cur < end; cur++) | ||
523 | CSS_PERFMON_USE(pids, cur); | ||
524 | |||
525 | return f; | ||
526 | } | ||
527 | } | ||
528 | |||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
533 | u32 start, | ||
534 | u32 count) | ||
535 | { | ||
536 | u32 *pids = data->perfmon_ids; | ||
537 | u32 end = start + count; | ||
538 | u32 cnt = 0; | ||
539 | |||
540 | if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { | ||
541 | u32 i; | ||
542 | for (i = start; i < end; i++) { | ||
543 | if (CSS_PERFMON_GET(pids, i)) { | ||
544 | CSS_PERFMON_REL(pids, i); | ||
545 | cnt++; | ||
546 | } | ||
547 | } | ||
548 | } | ||
549 | |||
550 | return cnt; | ||
551 | } | ||
552 | |||
553 | |||
554 | static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | ||
555 | struct gk20a_cs_snapshot_client *client) | ||
556 | { | ||
557 | int ret = 0; | ||
558 | |||
559 | list_del(&client->list); | ||
560 | if (client->perfmon_start && client->perfmon_count) { | ||
561 | if (client->perfmon_count != css_gr_release_perfmon_ids(data, | ||
562 | client->perfmon_start, client->perfmon_count)) | ||
563 | ret = -EINVAL; | ||
564 | } | ||
565 | if (client->dma_handler) { | ||
566 | dma_buf_vunmap(client->dma_handler, client->snapshot); | ||
567 | dma_buf_put(client->dma_handler); | ||
568 | } | ||
569 | |||
570 | kfree(client); | ||
571 | |||
572 | return ret; | ||
573 | } | ||
574 | |||
575 | static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | ||
576 | u32 dmabuf_fd, u32 perfmon_count, | ||
577 | struct gk20a_cs_snapshot_client **client) | ||
578 | { | ||
579 | struct gk20a_cs_snapshot_client *cur; | ||
580 | int ret = 0; | ||
581 | |||
582 | cur = kzalloc(sizeof(*cur), GFP_KERNEL); | ||
583 | if (!cur) { | ||
584 | ret = -ENOMEM; | ||
585 | goto failed; | ||
586 | } | ||
587 | |||
588 | cur->dmabuf_fd = dmabuf_fd; | ||
589 | cur->dma_handler = dma_buf_get(cur->dmabuf_fd); | ||
590 | if (IS_ERR(cur->dma_handler)) { | ||
591 | ret = PTR_ERR(cur->dma_handler); | ||
592 | cur->dma_handler = NULL; | ||
593 | goto failed; | ||
594 | } | ||
595 | |||
596 | cur->snapshot = (struct gk20a_cs_snapshot_fifo *) | ||
597 | dma_buf_vmap(cur->dma_handler); | ||
598 | if (!cur->snapshot) { | ||
599 | ret = -ENOMEM; | ||
600 | goto failed; | ||
601 | } | ||
602 | |||
603 | cur->snapshot_size = cur->dma_handler->size; | ||
604 | if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { | ||
605 | ret = -ENOMEM; | ||
606 | goto failed; | ||
607 | } | ||
608 | |||
609 | memset(cur->snapshot, 0, sizeof(*cur->snapshot)); | ||
610 | cur->snapshot->start = sizeof(*cur->snapshot); | ||
611 | /* we should be ensure that can fit all fifo entries here */ | ||
612 | cur->snapshot->end = | ||
613 | CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size) | ||
614 | * sizeof(struct gk20a_cs_snapshot_fifo_entry) | ||
615 | + sizeof(struct gk20a_cs_snapshot_fifo); | ||
616 | cur->snapshot->get = cur->snapshot->start; | ||
617 | cur->snapshot->put = cur->snapshot->start; | ||
618 | |||
619 | cur->perfmon_count = perfmon_count; | ||
620 | if (cur->perfmon_count) { | ||
621 | cur->perfmon_start = css_gr_allocate_perfmon_ids(data, | ||
622 | cur->perfmon_count); | ||
623 | if (!cur->perfmon_start) { | ||
624 | ret = -ENOENT; | ||
625 | goto failed; | ||
626 | } | ||
627 | } | ||
628 | |||
629 | list_add_tail(&cur->list, &data->clients); | ||
630 | *client = cur; | ||
631 | |||
632 | return 0; | ||
633 | |||
634 | failed: | ||
635 | *client = NULL; | ||
636 | if (cur) | ||
637 | css_gr_free_client_data(data, cur); | ||
638 | |||
639 | return ret; | ||
640 | } | ||
641 | |||
642 | |||
643 | int gr_gk20a_css_attach(struct gk20a *g, | ||
644 | u32 dmabuf_fd, | ||
645 | u32 perfmon_count, | ||
646 | u32 *perfmon_start, | ||
647 | struct gk20a_cs_snapshot_client **cs_client) | ||
648 | { | ||
649 | int ret = 0; | ||
650 | struct gr_gk20a *gr; | ||
651 | |||
652 | if (!g->allow_all) | ||
653 | return -EACCES; | ||
654 | /* we must have a placeholder to store pointer to client structure */ | ||
655 | if (!cs_client) | ||
656 | return -EINVAL; | ||
657 | |||
658 | gr = &g->gr; | ||
659 | *cs_client = NULL; | ||
660 | |||
661 | mutex_lock(&gr->cs_lock); | ||
662 | |||
663 | ret = css_gr_create_shared_data(gr); | ||
664 | if (ret) | ||
665 | goto failed; | ||
666 | |||
667 | ret = css_gr_create_client_data(gr->cs_data, | ||
668 | dmabuf_fd, | ||
669 | perfmon_count, | ||
670 | cs_client); | ||
671 | if (ret) | ||
672 | goto failed; | ||
673 | |||
674 | ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size); | ||
675 | if (ret) | ||
676 | goto failed; | ||
677 | |||
678 | if (perfmon_start) | ||
679 | *perfmon_start = (*cs_client)->perfmon_start; | ||
680 | |||
681 | mutex_unlock(&gr->cs_lock); | ||
682 | |||
683 | return 0; | ||
684 | |||
685 | failed: | ||
686 | if (gr->cs_data) { | ||
687 | if (*cs_client) { | ||
688 | css_gr_free_client_data(gr->cs_data, *cs_client); | ||
689 | *cs_client = NULL; | ||
690 | } | ||
691 | |||
692 | if (list_empty(&gr->cs_data->clients)) | ||
693 | css_gr_free_shared_data(gr); | ||
694 | } | ||
695 | mutex_unlock(&gr->cs_lock); | ||
696 | |||
697 | if (perfmon_start) | ||
698 | *perfmon_start = 0; | ||
699 | |||
700 | return ret; | ||
701 | } | ||
702 | |||
703 | int gr_gk20a_css_detach(struct gk20a *g, | ||
704 | struct gk20a_cs_snapshot_client *cs_client) | ||
705 | { | ||
706 | int ret = 0; | ||
707 | struct gr_gk20a *gr; | ||
708 | |||
709 | if (!g->allow_all) | ||
710 | return -EACCES; | ||
711 | |||
712 | if (!cs_client) | ||
713 | return -EINVAL; | ||
714 | |||
715 | gr = &g->gr; | ||
716 | mutex_lock(&gr->cs_lock); | ||
717 | if (gr->cs_data) { | ||
718 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
719 | |||
720 | ret = css_gr_free_client_data(data, cs_client); | ||
721 | if (list_empty(&data->clients)) | ||
722 | css_gr_free_shared_data(gr); | ||
723 | } else { | ||
724 | ret = -EBADF; | ||
725 | } | ||
726 | mutex_unlock(&gr->cs_lock); | ||
727 | |||
728 | return ret; | ||
729 | } | ||
730 | |||
731 | int gr_gk20a_css_flush(struct gk20a *g, | ||
732 | struct gk20a_cs_snapshot_client *cs_client) | ||
733 | { | ||
734 | int ret = 0; | ||
735 | struct gr_gk20a *gr; | ||
736 | |||
737 | if (!g->allow_all) | ||
738 | return -EACCES; | ||
739 | |||
740 | if (!cs_client) | ||
741 | return -EINVAL; | ||
742 | |||
743 | gr = &g->gr; | ||
744 | mutex_lock(&gr->cs_lock); | ||
745 | ret = css_gr_flush_snapshots(gr); | ||
746 | mutex_unlock(&gr->cs_lock); | ||
747 | |||
748 | return ret; | ||
749 | } | ||
750 | |||
751 | /* helper function with locking to cleanup snapshot code code in gr_gk20a.c */ | ||
752 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | ||
753 | { | ||
754 | struct gr_gk20a *gr = &g->gr; | ||
755 | |||
756 | mutex_lock(&gr->cs_lock); | ||
757 | css_gr_free_shared_data(gr); | ||
758 | mutex_unlock(&gr->cs_lock); | ||
759 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index cc1b221d..9c201f32 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -2011,8 +2011,10 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
2011 | gk20a_platform_has_syncpoints(g->dev)) | 2011 | gk20a_platform_has_syncpoints(g->dev)) |
2012 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; | 2012 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; |
2013 | 2013 | ||
2014 | if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS)) | 2014 | if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS)) { |
2015 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; | 2015 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; |
2016 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; | ||
2017 | } | ||
2016 | 2018 | ||
2017 | gpu->gpc_mask = 1; | 2019 | gpu->gpc_mask = 1; |
2018 | 2020 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 7c007622..85d1a886 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -2847,6 +2847,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
2847 | 2847 | ||
2848 | gk20a_dbg_fn(""); | 2848 | gk20a_dbg_fn(""); |
2849 | 2849 | ||
2850 | gr_gk20a_free_cyclestats_snapshot_data(g); | ||
2851 | |||
2850 | gr_gk20a_free_global_ctx_buffers(g); | 2852 | gr_gk20a_free_global_ctx_buffers(g); |
2851 | 2853 | ||
2852 | gk20a_gmmu_free(g, &gr->mmu_wr_mem); | 2854 | gk20a_gmmu_free(g, &gr->mmu_wr_mem); |
@@ -4523,6 +4525,11 @@ int gk20a_init_gr_support(struct gk20a *g) | |||
4523 | 4525 | ||
4524 | gk20a_dbg_fn(""); | 4526 | gk20a_dbg_fn(""); |
4525 | 4527 | ||
4528 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
4529 | mutex_init(&g->gr.cs_lock); | ||
4530 | g->gr.cs_data = NULL; | ||
4531 | #endif | ||
4532 | |||
4526 | /* this is required before gr_gk20a_init_ctx_state */ | 4533 | /* this is required before gr_gk20a_init_ctx_state */ |
4527 | mutex_init(&g->gr.fecs_mutex); | 4534 | mutex_init(&g->gr.fecs_mutex); |
4528 | 4535 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index b2213739..0909b660 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -170,6 +170,11 @@ struct sm_info { | |||
170 | u8 tpc_index; | 170 | u8 tpc_index; |
171 | }; | 171 | }; |
172 | 172 | ||
173 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
174 | struct gk20a_cs_snapshot_client; | ||
175 | struct gk20a_cs_snapshot; | ||
176 | #endif | ||
177 | |||
173 | struct gr_gk20a { | 178 | struct gr_gk20a { |
174 | struct gk20a *g; | 179 | struct gk20a *g; |
175 | struct { | 180 | struct { |
@@ -294,6 +299,10 @@ struct gr_gk20a { | |||
294 | u32 fbp_en_mask; | 299 | u32 fbp_en_mask; |
295 | u32 no_of_sm; | 300 | u32 no_of_sm; |
296 | struct sm_info *sm_to_cluster; | 301 | struct sm_info *sm_to_cluster; |
302 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
303 | struct mutex cs_lock; | ||
304 | struct gk20a_cs_snapshot *cs_data; | ||
305 | #endif | ||
297 | }; | 306 | }; |
298 | 307 | ||
299 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); | 308 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); |
@@ -497,4 +506,27 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g, | |||
497 | int gr_gk20a_halt_pipe(struct gk20a *g); | 506 | int gr_gk20a_halt_pipe(struct gk20a *g); |
498 | int gr_gk20a_debugfs_init(struct gk20a *g); | 507 | int gr_gk20a_debugfs_init(struct gk20a *g); |
499 | 508 | ||
509 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
510 | int gr_gk20a_css_attach(struct gk20a *g, /* in - main hw structure */ | ||
511 | u32 dmabuf_fd, /* in - dma mapped memory */ | ||
512 | u32 perfmon_id_count, /* in - number of perfmons*/ | ||
513 | u32 *perfmon_id_start, /* out- index of first pm */ | ||
514 | /* out - pointer to client data used in later */ | ||
515 | struct gk20a_cs_snapshot_client **css_client); | ||
516 | |||
517 | int gr_gk20a_css_detach(struct gk20a *g, | ||
518 | struct gk20a_cs_snapshot_client *css_client); | ||
519 | int gr_gk20a_css_flush(struct gk20a *g, | ||
520 | struct gk20a_cs_snapshot_client *css_client); | ||
521 | |||
522 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); | ||
523 | |||
524 | #else | ||
525 | /* fake empty cleanup function if no cyclestats snapshots enabled */ | ||
526 | static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | ||
527 | { | ||
528 | (void)g; | ||
529 | } | ||
530 | #endif | ||
531 | |||
500 | #endif /*__GR_GK20A_H__*/ | 532 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h index 65d91de6..1ca80d29 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h | |||
@@ -50,6 +50,38 @@ | |||
50 | #ifndef _hw_perf_gk20a_h_ | 50 | #ifndef _hw_perf_gk20a_h_ |
51 | #define _hw_perf_gk20a_h_ | 51 | #define _hw_perf_gk20a_h_ |
52 | 52 | ||
53 | static inline u32 perf_pmasys_control_r(void) | ||
54 | { | ||
55 | return 0x001b4000; | ||
56 | } | ||
57 | static inline u32 perf_pmasys_control_membuf_status_v(u32 r) | ||
58 | { | ||
59 | return (r >> 4) & 0x1; | ||
60 | } | ||
61 | static inline u32 perf_pmasys_control_membuf_status_overflowed_v(void) | ||
62 | { | ||
63 | return 0x00000001; | ||
64 | } | ||
65 | static inline u32 perf_pmasys_control_membuf_status_overflowed_f(void) | ||
66 | { | ||
67 | return 0x10; | ||
68 | } | ||
69 | static inline u32 perf_pmasys_control_membuf_clear_status_f(u32 v) | ||
70 | { | ||
71 | return (v & 0x1) << 5; | ||
72 | } | ||
73 | static inline u32 perf_pmasys_control_membuf_clear_status_v(u32 r) | ||
74 | { | ||
75 | return (r >> 5) & 0x1; | ||
76 | } | ||
77 | static inline u32 perf_pmasys_control_membuf_clear_status_doit_v(void) | ||
78 | { | ||
79 | return 0x00000001; | ||
80 | } | ||
81 | static inline u32 perf_pmasys_control_membuf_clear_status_doit_f(void) | ||
82 | { | ||
83 | return 0x20; | ||
84 | } | ||
53 | static inline u32 perf_pmasys_mem_block_r(void) | 85 | static inline u32 perf_pmasys_mem_block_r(void) |
54 | { | 86 | { |
55 | return 0x001b4070; | 87 | return 0x001b4070; |
@@ -74,6 +106,22 @@ static inline u32 perf_pmasys_mem_block_target_lfb_f(void) | |||
74 | { | 106 | { |
75 | return 0x0; | 107 | return 0x0; |
76 | } | 108 | } |
109 | static inline u32 perf_pmasys_mem_block_target_sys_coh_v(void) | ||
110 | { | ||
111 | return 0x00000002; | ||
112 | } | ||
113 | static inline u32 perf_pmasys_mem_block_target_sys_coh_f(void) | ||
114 | { | ||
115 | return 0x20000000; | ||
116 | } | ||
117 | static inline u32 perf_pmasys_mem_block_target_sys_ncoh_v(void) | ||
118 | { | ||
119 | return 0x00000003; | ||
120 | } | ||
121 | static inline u32 perf_pmasys_mem_block_target_sys_ncoh_f(void) | ||
122 | { | ||
123 | return 0x30000000; | ||
124 | } | ||
77 | static inline u32 perf_pmasys_mem_block_valid_f(u32 v) | 125 | static inline u32 perf_pmasys_mem_block_valid_f(u32 v) |
78 | { | 126 | { |
79 | return (v & 0x1) << 31; | 127 | return (v & 0x1) << 31; |
@@ -102,6 +150,10 @@ static inline u32 perf_pmasys_outbase_r(void) | |||
102 | { | 150 | { |
103 | return 0x001b4074; | 151 | return 0x001b4074; |
104 | } | 152 | } |
153 | static inline u32 perf_pmasys_outbase_ptr_f(u32 v) | ||
154 | { | ||
155 | return (v & 0x7ffffff) << 5; | ||
156 | } | ||
105 | static inline u32 perf_pmasys_outbaseupper_r(void) | 157 | static inline u32 perf_pmasys_outbaseupper_r(void) |
106 | { | 158 | { |
107 | return 0x001b4078; | 159 | return 0x001b4078; |
@@ -114,4 +166,40 @@ static inline u32 perf_pmasys_outsize_r(void) | |||
114 | { | 166 | { |
115 | return 0x001b407c; | 167 | return 0x001b407c; |
116 | } | 168 | } |
169 | static inline u32 perf_pmasys_outsize_numbytes_f(u32 v) | ||
170 | { | ||
171 | return (v & 0x7ffffff) << 5; | ||
172 | } | ||
173 | static inline u32 perf_pmasys_mem_bytes_r(void) | ||
174 | { | ||
175 | return 0x001b4084; | ||
176 | } | ||
177 | static inline u32 perf_pmasys_mem_bytes_numbytes_f(u32 v) | ||
178 | { | ||
179 | return (v & 0xfffffff) << 4; | ||
180 | } | ||
181 | static inline u32 perf_pmasys_mem_bump_r(void) | ||
182 | { | ||
183 | return 0x001b4088; | ||
184 | } | ||
185 | static inline u32 perf_pmasys_mem_bump_numbytes_f(u32 v) | ||
186 | { | ||
187 | return (v & 0xfffffff) << 4; | ||
188 | } | ||
189 | static inline u32 perf_pmasys_enginestatus_r(void) | ||
190 | { | ||
191 | return 0x001b40a4; | ||
192 | } | ||
193 | static inline u32 perf_pmasys_enginestatus_rbufempty_f(u32 v) | ||
194 | { | ||
195 | return (v & 0x1) << 4; | ||
196 | } | ||
197 | static inline u32 perf_pmasys_enginestatus_rbufempty_empty_v(void) | ||
198 | { | ||
199 | return 0x00000001; | ||
200 | } | ||
201 | static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void) | ||
202 | { | ||
203 | return 0x10; | ||
204 | } | ||
117 | #endif | 205 | #endif |